1 /*
   2  * Copyright 2002-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package java.lang;
  27 import java.util.Map;
  28 import java.util.HashMap;
  29 import java.util.Locale;
  30 
  31 /**
  32  * The <code>Character</code> class wraps a value of the primitive
  33  * type <code>char</code> in an object. An object of type
  34  * <code>Character</code> contains a single field whose type is
  35  * <code>char</code>.
  36  * <p>
  37  * In addition, this class provides several methods for determining
  38  * a character's category (lowercase letter, digit, etc.) and for converting
  39  * characters from uppercase to lowercase and vice versa.
  40  * <p>
  41  * Character information is based on the Unicode Standard, version 5.1.0.
  42  * <p>
  43  * The methods and data of class <code>Character</code> are defined by
  44  * the information in the <i>UnicodeData</i> file that is part of the
  45  * Unicode Character Database maintained by the Unicode
  46  * Consortium. This file specifies various properties including name
  47  * and general category for every defined Unicode code point or
  48  * character range.
  49  * <p>
  50  * The file and its description are available from the Unicode Consortium at:
  51  * <ul>
  52  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  53  * </ul>
  54  *
  55  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  56  *
  57  * <p>The <code>char</code> data type (and therefore the value that a
  58  * <code>Character</code> object encapsulates) are based on the
  59  * original Unicode specification, which defined characters as
  60  * fixed-width 16-bit entities. The Unicode standard has since been
  61  * changed to allow for characters whose representation requires more
  62  * than 16 bits.  The range of legal <em>code point</em>s is now
  63  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  64  * (Refer to the <a
  65  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  66  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  67  * standard.)
  68  *
  69  * <p>The set of characters from U+0000 to U+FFFF is sometimes
  70  * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
  71  * name="supplementary">Characters</a> whose code points are greater
  72  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  73  * 2 platform uses the UTF-16 representation in <code>char</code>
  74  * arrays and in the <code>String</code> and <code>StringBuffer</code>
  75  * classes. In this representation, supplementary characters are
  76  * represented as a pair of <code>char</code> values, the first from
  77  * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
  78  * second from the <em>low-surrogates</em> range
  79  * (&#92;uDC00-&#92;uDFFF).
  80  *
  81  * <p>A <code>char</code> value, therefore, represents Basic
  82  * Multilingual Plane (BMP) code points, including the surrogate
  83  * code points, or code units of the UTF-16 encoding. An
  84  * <code>int</code> value represents all Unicode code points,
  85  * including supplementary code points. The lower (least significant)
  86  * 21 bits of <code>int</code> are used to represent Unicode code
  87  * points and the upper (most significant) 11 bits must be zero.
  88  * Unless otherwise specified, the behavior with respect to
  89  * supplementary characters and surrogate <code>char</code> values is
  90  * as follows:
  91  *
  92  * <ul>
  93  * <li>The methods that only accept a <code>char</code> value cannot support
  94  * supplementary characters. They treat <code>char</code> values from the
  95  * surrogate ranges as undefined characters. For example,
  96  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
  97  * this specific value if followed by any low-surrogate value in a string
  98  * would represent a letter.
  99  *
 100  * <li>The methods that accept an <code>int</code> value support all
 101  * Unicode characters, including supplementary characters. For
 102  * example, <code>Character.isLetter(0x2F81A)</code> returns
 103  * <code>true</code> because the code point value represents a letter
 104  * (a CJK ideograph).
 105  * </ul>
 106  *
 107  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 108  * used for character values in the range between U+0000 and U+10FFFF,
 109  * and <em>Unicode code unit</em> is used for 16-bit
 110  * <code>char</code> values that are code units of the <em>UTF-16</em>
 111  * encoding. For more information on Unicode terminology, refer to the
 112  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 113  *
 114  * @author  Lee Boynton
 115  * @author  Guy Steele
 116  * @author  Akira Tanaka
 117  * @since   1.0
 118  */
 119 public final
 120 class Character extends Object implements java.io.Serializable, Comparable<Character> {
 121     /**
 122      * The minimum radix available for conversion to and from strings.
 123      * The constant value of this field is the smallest value permitted
 124      * for the radix argument in radix-conversion methods such as the
 125      * <code>digit</code> method, the <code>forDigit</code>
 126      * method, and the <code>toString</code> method of class
 127      * <code>Integer</code>.
 128      *
 129      * @see     java.lang.Character#digit(char, int)
 130      * @see     java.lang.Character#forDigit(int, int)
 131      * @see     java.lang.Integer#toString(int, int)
 132      * @see     java.lang.Integer#valueOf(java.lang.String)
 133      */
 134     public static final int MIN_RADIX = 2;
 135 
 136     /**
 137      * The maximum radix available for conversion to and from strings.
 138      * The constant value of this field is the largest value permitted
 139      * for the radix argument in radix-conversion methods such as the
 140      * <code>digit</code> method, the <code>forDigit</code>
 141      * method, and the <code>toString</code> method of class
 142      * <code>Integer</code>.
 143      *
 144      * @see     java.lang.Character#digit(char, int)
 145      * @see     java.lang.Character#forDigit(int, int)
 146      * @see     java.lang.Integer#toString(int, int)
 147      * @see     java.lang.Integer#valueOf(java.lang.String)
 148      */
 149     public static final int MAX_RADIX = 36;
 150 
 151     /**
 152      * The constant value of this field is the smallest value of type
 153      * <code>char</code>, <code>'&#92;u0000'</code>.
 154      *
 155      * @since   1.0.2
 156      */
 157     public static final char   MIN_VALUE = '\u0000';
 158 
 159     /**
 160      * The constant value of this field is the largest value of type
 161      * <code>char</code>, <code>'&#92;uFFFF'</code>.
 162      *
 163      * @since   1.0.2
 164      */
 165     public static final char   MAX_VALUE = '\uFFFF';
 166 
 167     /**
 168      * The <code>Class</code> instance representing the primitive type
 169      * <code>char</code>.
 170      *
 171      * @since   1.1
 172      */
 173     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 174 
 175    /*
 176     * Normative general types
 177     */
 178 
 179    /*
 180     * General character types
 181     */
 182 
 183    /**
 184     * General category "Cn" in the Unicode specification.
 185     * @since   1.1
 186     */
 187     public static final byte
 188         UNASSIGNED                  = 0;
 189 
 190    /**
 191     * General category "Lu" in the Unicode specification.
 192     * @since   1.1
 193     */
 194     public static final byte
 195         UPPERCASE_LETTER            = 1;
 196 
 197    /**
 198     * General category "Ll" in the Unicode specification.
 199     * @since   1.1
 200     */
 201     public static final byte
 202         LOWERCASE_LETTER            = 2;
 203 
 204    /**
 205     * General category "Lt" in the Unicode specification.
 206     * @since   1.1
 207     */
 208     public static final byte
 209         TITLECASE_LETTER            = 3;
 210 
 211    /**
 212     * General category "Lm" in the Unicode specification.
 213     * @since   1.1
 214     */
 215     public static final byte
 216         MODIFIER_LETTER             = 4;
 217 
 218    /**
 219     * General category "Lo" in the Unicode specification.
 220     * @since   1.1
 221     */
 222     public static final byte
 223         OTHER_LETTER                = 5;
 224 
 225    /**
 226     * General category "Mn" in the Unicode specification.
 227     * @since   1.1
 228     */
 229     public static final byte
 230         NON_SPACING_MARK            = 6;
 231 
 232    /**
 233     * General category "Me" in the Unicode specification.
 234     * @since   1.1
 235     */
 236     public static final byte
 237         ENCLOSING_MARK              = 7;
 238 
 239    /**
 240     * General category "Mc" in the Unicode specification.
 241     * @since   1.1
 242     */
 243     public static final byte
 244         COMBINING_SPACING_MARK      = 8;
 245 
 246    /**
 247     * General category "Nd" in the Unicode specification.
 248     * @since   1.1
 249     */
 250     public static final byte
 251         DECIMAL_DIGIT_NUMBER        = 9;
 252 
 253    /**
 254     * General category "Nl" in the Unicode specification.
 255     * @since   1.1
 256     */
 257     public static final byte
 258         LETTER_NUMBER               = 10;
 259 
 260    /**
 261     * General category "No" in the Unicode specification.
 262     * @since   1.1
 263     */
 264     public static final byte
 265         OTHER_NUMBER                = 11;
 266 
 267    /**
 268     * General category "Zs" in the Unicode specification.
 269     * @since   1.1
 270     */
 271     public static final byte
 272         SPACE_SEPARATOR             = 12;
 273 
 274    /**
 275     * General category "Zl" in the Unicode specification.
 276     * @since   1.1
 277     */
 278     public static final byte
 279         LINE_SEPARATOR              = 13;
 280 
 281    /**
 282     * General category "Zp" in the Unicode specification.
 283     * @since   1.1
 284     */
 285     public static final byte
 286         PARAGRAPH_SEPARATOR         = 14;
 287 
 288    /**
 289     * General category "Cc" in the Unicode specification.
 290     * @since   1.1
 291     */
 292     public static final byte
 293         CONTROL                     = 15;
 294 
 295    /**
 296     * General category "Cf" in the Unicode specification.
 297     * @since   1.1
 298     */
 299     public static final byte
 300         FORMAT                      = 16;
 301 
 302    /**
 303     * General category "Co" in the Unicode specification.
 304     * @since   1.1
 305     */
 306     public static final byte
 307         PRIVATE_USE                 = 18;
 308 
 309    /**
 310     * General category "Cs" in the Unicode specification.
 311     * @since   1.1
 312     */
 313     public static final byte
 314         SURROGATE                   = 19;
 315 
 316    /**
 317     * General category "Pd" in the Unicode specification.
 318     * @since   1.1
 319     */
 320     public static final byte
 321         DASH_PUNCTUATION            = 20;
 322 
 323    /**
 324     * General category "Ps" in the Unicode specification.
 325     * @since   1.1
 326     */
 327     public static final byte
 328         START_PUNCTUATION           = 21;
 329 
 330    /**
 331     * General category "Pe" in the Unicode specification.
 332     * @since   1.1
 333     */
 334     public static final byte
 335         END_PUNCTUATION             = 22;
 336 
 337    /**
 338     * General category "Pc" in the Unicode specification.
 339     * @since   1.1
 340     */
 341     public static final byte
 342         CONNECTOR_PUNCTUATION       = 23;
 343 
 344    /**
 345     * General category "Po" in the Unicode specification.
 346     * @since   1.1
 347     */
 348     public static final byte
 349         OTHER_PUNCTUATION           = 24;
 350 
 351    /**
 352     * General category "Sm" in the Unicode specification.
 353     * @since   1.1
 354     */
 355     public static final byte
 356         MATH_SYMBOL                 = 25;
 357 
 358    /**
 359     * General category "Sc" in the Unicode specification.
 360     * @since   1.1
 361     */
 362     public static final byte
 363         CURRENCY_SYMBOL             = 26;
 364 
 365    /**
 366     * General category "Sk" in the Unicode specification.
 367     * @since   1.1
 368     */
 369     public static final byte
 370         MODIFIER_SYMBOL             = 27;
 371 
 372    /**
 373     * General category "So" in the Unicode specification.
 374     * @since   1.1
 375     */
 376     public static final byte
 377         OTHER_SYMBOL                = 28;
 378 
 379    /**
 380     * General category "Pi" in the Unicode specification.
 381     * @since   1.4
 382     */
 383     public static final byte
 384         INITIAL_QUOTE_PUNCTUATION   = 29;
 385 
 386    /**
 387     * General category "Pf" in the Unicode specification.
 388     * @since   1.4
 389     */
 390     public static final byte
 391         FINAL_QUOTE_PUNCTUATION     = 30;
 392 
 393     /**
 394      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 395      */
 396      static final int ERROR = 0xFFFFFFFF;
 397 
 398 
 399     /**
 400      * Undefined bidirectional character type. Undefined <code>char</code>
 401      * values have undefined directionality in the Unicode specification.
 402      * @since 1.4
 403      */
 404      public static final byte DIRECTIONALITY_UNDEFINED = -1;
 405 
 406     /**
 407      * Strong bidirectional character type "L" in the Unicode specification.
 408      * @since 1.4
 409      */
 410     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 411 
 412     /**
 413      * Strong bidirectional character type "R" in the Unicode specification.
 414      * @since 1.4
 415      */
 416     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 417 
 418     /**
 419     * Strong bidirectional character type "AL" in the Unicode specification.
 420      * @since 1.4
 421      */
 422     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 423 
 424     /**
 425      * Weak bidirectional character type "EN" in the Unicode specification.
 426      * @since 1.4
 427      */
 428     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 429 
 430     /**
 431      * Weak bidirectional character type "ES" in the Unicode specification.
 432      * @since 1.4
 433      */
 434     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 435 
 436     /**
 437      * Weak bidirectional character type "ET" in the Unicode specification.
 438      * @since 1.4
 439      */
 440     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 441 
 442     /**
 443      * Weak bidirectional character type "AN" in the Unicode specification.
 444      * @since 1.4
 445      */
 446     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 447 
 448     /**
 449      * Weak bidirectional character type "CS" in the Unicode specification.
 450      * @since 1.4
 451      */
 452     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 453 
 454     /**
 455      * Weak bidirectional character type "NSM" in the Unicode specification.
 456      * @since 1.4
 457      */
 458     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 459 
 460     /**
 461      * Weak bidirectional character type "BN" in the Unicode specification.
 462      * @since 1.4
 463      */
 464     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 465 
 466     /**
 467      * Neutral bidirectional character type "B" in the Unicode specification.
 468      * @since 1.4
 469      */
 470     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 471 
 472     /**
 473      * Neutral bidirectional character type "S" in the Unicode specification.
 474      * @since 1.4
 475      */
 476     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 477 
 478     /**
 479      * Neutral bidirectional character type "WS" in the Unicode specification.
 480      * @since 1.4
 481      */
 482     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 483 
 484     /**
 485      * Neutral bidirectional character type "ON" in the Unicode specification.
 486      * @since 1.4
 487      */
 488     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 489 
 490     /**
 491      * Strong bidirectional character type "LRE" in the Unicode specification.
 492      * @since 1.4
 493      */
 494     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 495 
 496     /**
 497      * Strong bidirectional character type "LRO" in the Unicode specification.
 498      * @since 1.4
 499      */
 500     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 501 
 502     /**
 503      * Strong bidirectional character type "RLE" in the Unicode specification.
 504      * @since 1.4
 505      */
 506     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 507 
 508     /**
 509      * Strong bidirectional character type "RLO" in the Unicode specification.
 510      * @since 1.4
 511      */
 512     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 513 
 514     /**
 515      * Weak bidirectional character type "PDF" in the Unicode specification.
 516      * @since 1.4
 517      */
 518     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 519 
 520     /**
 521      * The minimum value of a
 522      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 523      * Unicode high-surrogate code unit</a>
 524      * in the UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 525      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 526      *
 527      * @since 1.5
 528      */
 529     public static final char MIN_HIGH_SURROGATE = '\uD800';
 530 
 531     /**
 532      * The maximum value of a
 533      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 534      * Unicode high-surrogate code unit</a>
 535      * in the UTF-16 encoding, constant <code>'&#92;uDBFF'</code>.
 536      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 537      *
 538      * @since 1.5
 539      */
 540     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 541 
 542     /**
 543      * The minimum value of a
 544      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 545      * Unicode low-surrogate code unit</a>
 546      * in the UTF-16 encoding, constant <code>'&#92;uDC00'</code>.
 547      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 548      *
 549      * @since 1.5
 550      */
 551     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 552 
 553     /**
 554      * The maximum value of a
 555      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 556      * Unicode low-surrogate code unit</a>
 557      * in the UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 558      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 559      *
 560      * @since 1.5
 561      */
 562     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 563 
 564     /**
 565      * The minimum value of a Unicode surrogate code unit in the
 566      * UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 567      *
 568      * @since 1.5
 569      */
 570     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 571 
 572     /**
 573      * The maximum value of a Unicode surrogate code unit in the
 574      * UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 575      *
 576      * @since 1.5
 577      */
 578     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 579 
 580     /**
 581      * The minimum value of a
 582      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 583      * Unicode supplementary code point</a>, constant {@code U+10000}.
 584      *
 585      * @since 1.5
 586      */
 587     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 588 
 589     /**
 590      * The minimum value of a
 591      * <a href="http://www.unicode.org/glossary/#code_point">
 592      * Unicode code point</a>, constant {@code U+0000}.
 593      *
 594      * @since 1.5
 595      */
 596     public static final int MIN_CODE_POINT = 0x000000;
 597 
 598     /**
 599      * The maximum value of a
 600      * <a href="http://www.unicode.org/glossary/#code_point">
 601      * Unicode code point</a>, constant {@code U+10FFFF}.
 602      *
 603      * @since 1.5
 604      */
 605     public static final int MAX_CODE_POINT = 0X10FFFF;
 606 
 607 
 608     /**
 609      * Instances of this class represent particular subsets of the Unicode
 610      * character set.  The only family of subsets defined in the
 611      * <code>Character</code> class is <code>{@link Character.UnicodeBlock
 612      * UnicodeBlock}</code>.  Other portions of the Java API may define other
 613      * subsets for their own purposes.
 614      *
 615      * @since 1.2
 616      */
 617     public static class Subset  {
 618 
 619         private String name;
 620 
 621         /**
 622          * Constructs a new <code>Subset</code> instance.
 623          *
 624          * @exception NullPointerException if name is <code>null</code>
 625          * @param  name  The name of this subset
 626          */
 627         protected Subset(String name) {
 628             if (name == null) {
 629                 throw new NullPointerException("name");
 630             }
 631             this.name = name;
 632         }
 633 
 634         /**
 635          * Compares two <code>Subset</code> objects for equality.
 636          * This method returns <code>true</code> if and only if
 637          * <code>this</code> and the argument refer to the same
 638          * object; since this method is <code>final</code>, this
 639          * guarantee holds for all subclasses.
 640          */
 641         public final boolean equals(Object obj) {
 642             return (this == obj);
 643         }
 644 
 645         /**
 646          * Returns the standard hash code as defined by the
 647          * <code>{@link Object#hashCode}</code> method.  This method
 648          * is <code>final</code> in order to ensure that the
 649          * <code>equals</code> and <code>hashCode</code> methods will
 650          * be consistent in all subclasses.
 651          */
 652         public final int hashCode() {
 653             return super.hashCode();
 654         }
 655 
 656         /**
 657          * Returns the name of this subset.
 658          */
 659         public final String toString() {
 660             return name;
 661         }
 662     }
 663 
 664     /**
 665      * A family of character subsets representing the character blocks in the
 666      * Unicode specification. Character blocks generally define characters
 667      * used for a specific script or purpose. A character is contained by
 668      * at most one Unicode block.
 669      *
 670      * @since 1.2
 671      */
 672     public static final class UnicodeBlock extends Subset {
 673 
 674         private static Map map = new HashMap();
 675 
 676         /**
 677          * Create a UnicodeBlock with the given identifier name.
 678          * This name must be the same as the block identifier.
 679          */
 680         private UnicodeBlock(String idName) {
 681             super(idName);
 682             map.put(idName.toUpperCase(Locale.US), this);
 683         }
 684 
 685         /**
 686          * Create a UnicodeBlock with the given identifier name and
 687          * alias name.
 688          */
 689         private UnicodeBlock(String idName, String alias) {
 690             this(idName);
 691             map.put(alias.toUpperCase(Locale.US), this);
 692         }
 693 
 694         /**
 695          * Create a UnicodeBlock with the given identifier name and
 696          * alias names.
 697          */
 698         private UnicodeBlock(String idName, String[] aliasName) {
 699             this(idName);
 700             if (aliasName != null) {
 701                 for(int x=0; x<aliasName.length; ++x) {
 702                     map.put(aliasName[x].toUpperCase(Locale.US), this);
 703                 }
 704             }
 705         }
 706 
 707         /**
 708          * Constant for the "Basic Latin" Unicode character block.
 709          * @since 1.2
 710          */
 711         public static final UnicodeBlock  BASIC_LATIN =
 712             new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
 713 
 714         /**
 715          * Constant for the "Latin-1 Supplement" Unicode character block.
 716          * @since 1.2
 717          */
 718         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 719             new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
 720 
 721         /**
 722          * Constant for the "Latin Extended-A" Unicode character block.
 723          * @since 1.2
 724          */
 725         public static final UnicodeBlock LATIN_EXTENDED_A =
 726             new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
 727 
 728         /**
 729          * Constant for the "Latin Extended-B" Unicode character block.
 730          * @since 1.2
 731          */
 732         public static final UnicodeBlock LATIN_EXTENDED_B =
 733             new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
 734 
 735         /**
 736          * Constant for the "IPA Extensions" Unicode character block.
 737          * @since 1.2
 738          */
 739         public static final UnicodeBlock IPA_EXTENSIONS =
 740             new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
 741 
 742         /**
 743          * Constant for the "Spacing Modifier Letters" Unicode character block.
 744          * @since 1.2
 745          */
 746         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 747             new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
 748                                                                         "SpacingModifierLetters"});
 749 
 750         /**
 751          * Constant for the "Combining Diacritical Marks" Unicode character block.
 752          * @since 1.2
 753          */
 754         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 755             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
 756                                                                           "CombiningDiacriticalMarks" });
 757 
 758         /**
 759          * Constant for the "Greek and Coptic" Unicode character block.
 760          * <p>
 761          * This block was previously known as the "Greek" block.
 762          *
 763          * @since 1.2
 764          */
 765         public static final UnicodeBlock GREEK
 766             = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
 767 
 768         /**
 769          * Constant for the "Cyrillic" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock CYRILLIC =
 773             new UnicodeBlock("CYRILLIC");
 774 
 775         /**
 776          * Constant for the "Armenian" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARMENIAN =
 780             new UnicodeBlock("ARMENIAN");
 781 
 782         /**
 783          * Constant for the "Hebrew" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock HEBREW =
 787             new UnicodeBlock("HEBREW");
 788 
 789         /**
 790          * Constant for the "Arabic" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock ARABIC =
 794             new UnicodeBlock("ARABIC");
 795 
 796         /**
 797          * Constant for the "Devanagari" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock DEVANAGARI =
 801             new UnicodeBlock("DEVANAGARI");
 802 
 803         /**
 804          * Constant for the "Bengali" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock BENGALI =
 808             new UnicodeBlock("BENGALI");
 809 
 810         /**
 811          * Constant for the "Gurmukhi" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock GURMUKHI =
 815             new UnicodeBlock("GURMUKHI");
 816 
 817         /**
 818          * Constant for the "Gujarati" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock GUJARATI =
 822             new UnicodeBlock("GUJARATI");
 823 
 824         /**
 825          * Constant for the "Oriya" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock ORIYA =
 829             new UnicodeBlock("ORIYA");
 830 
 831         /**
 832          * Constant for the "Tamil" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock TAMIL =
 836             new UnicodeBlock("TAMIL");
 837 
 838         /**
 839          * Constant for the "Telugu" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock TELUGU =
 843             new UnicodeBlock("TELUGU");
 844 
 845         /**
 846          * Constant for the "Kannada" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock KANNADA =
 850             new UnicodeBlock("KANNADA");
 851 
 852         /**
 853          * Constant for the "Malayalam" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock MALAYALAM =
 857             new UnicodeBlock("MALAYALAM");
 858 
 859         /**
 860          * Constant for the "Thai" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock THAI =
 864             new UnicodeBlock("THAI");
 865 
 866         /**
 867          * Constant for the "Lao" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock LAO =
 871             new UnicodeBlock("LAO");
 872 
 873         /**
 874          * Constant for the "Tibetan" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock TIBETAN =
 878             new UnicodeBlock("TIBETAN");
 879 
 880         /**
 881          * Constant for the "Georgian" Unicode character block.
 882          * @since 1.2
 883          */
 884         public static final UnicodeBlock GEORGIAN =
 885             new UnicodeBlock("GEORGIAN");
 886 
 887         /**
 888          * Constant for the "Hangul Jamo" Unicode character block.
 889          * @since 1.2
 890          */
 891         public static final UnicodeBlock HANGUL_JAMO =
 892             new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
 893 
 894         /**
 895          * Constant for the "Latin Extended Additional" Unicode character block.
 896          * @since 1.2
 897          */
 898         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 899             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
 900                                                                         "LatinExtendedAdditional"});
 901 
 902         /**
 903          * Constant for the "Greek Extended" Unicode character block.
 904          * @since 1.2
 905          */
 906         public static final UnicodeBlock GREEK_EXTENDED =
 907             new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
 908 
 909         /**
 910          * Constant for the "General Punctuation" Unicode character block.
 911          * @since 1.2
 912          */
 913         public static final UnicodeBlock GENERAL_PUNCTUATION =
 914             new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
 915 
 916         /**
 917          * Constant for the "Superscripts and Subscripts" Unicode character block.
 918          * @since 1.2
 919          */
 920         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 921             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
 922                                                                           "SuperscriptsandSubscripts" });
 923 
 924         /**
 925          * Constant for the "Currency Symbols" Unicode character block.
 926          * @since 1.2
 927          */
 928         public static final UnicodeBlock CURRENCY_SYMBOLS =
 929             new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
 930 
 931         /**
 932          * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
 933          * <p>
 934          * This block was previously known as "Combining Marks for Symbols".
 935          * @since 1.2
 936          */
 937         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 938             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
 939                                                                                                                                                    "CombiningDiacriticalMarksforSymbols",
 940                                                                            "Combining Marks for Symbols",
 941                                                                            "CombiningMarksforSymbols" });
 942 
 943         /**
 944          * Constant for the "Letterlike Symbols" Unicode character block.
 945          * @since 1.2
 946          */
 947         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 948             new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
 949 
 950         /**
 951          * Constant for the "Number Forms" Unicode character block.
 952          * @since 1.2
 953          */
 954         public static final UnicodeBlock NUMBER_FORMS =
 955             new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
 956 
 957         /**
 958          * Constant for the "Arrows" Unicode character block.
 959          * @since 1.2
 960          */
 961         public static final UnicodeBlock ARROWS =
 962             new UnicodeBlock("ARROWS");
 963 
 964         /**
 965          * Constant for the "Mathematical Operators" Unicode character block.
 966          * @since 1.2
 967          */
 968         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 969             new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
 970                                                                      "MathematicalOperators"});
 971 
 972         /**
 973          * Constant for the "Miscellaneous Technical" Unicode character block.
 974          * @since 1.2
 975          */
 976         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 977             new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
 978                                                                       "MiscellaneousTechnical"});
 979 
 980         /**
 981          * Constant for the "Control Pictures" Unicode character block.
 982          * @since 1.2
 983          */
 984         public static final UnicodeBlock CONTROL_PICTURES =
 985             new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
 986 
 987         /**
 988          * Constant for the "Optical Character Recognition" Unicode character block.
 989          * @since 1.2
 990          */
 991         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 992             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
 993                                                                             "OpticalCharacterRecognition"});
 994 
 995         /**
 996          * Constant for the "Enclosed Alphanumerics" Unicode character block.
 997          * @since 1.2
 998          */
 999         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1000             new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
1001                                                                      "EnclosedAlphanumerics"});
1002 
1003         /**
1004          * Constant for the "Box Drawing" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock BOX_DRAWING =
1008             new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
1009 
1010         /**
1011          * Constant for the "Block Elements" Unicode character block.
1012          * @since 1.2
1013          */
1014         public static final UnicodeBlock BLOCK_ELEMENTS =
1015             new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
1016 
1017         /**
1018          * Constant for the "Geometric Shapes" Unicode character block.
1019          * @since 1.2
1020          */
1021         public static final UnicodeBlock GEOMETRIC_SHAPES =
1022             new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
1023 
1024         /**
1025          * Constant for the "Miscellaneous Symbols" Unicode character block.
1026          * @since 1.2
1027          */
1028         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1029             new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
1030                                                                     "MiscellaneousSymbols"});
1031 
1032         /**
1033          * Constant for the "Dingbats" Unicode character block.
1034          * @since 1.2
1035          */
1036         public static final UnicodeBlock DINGBATS =
1037             new UnicodeBlock("DINGBATS");
1038 
1039         /**
1040          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1041          * @since 1.2
1042          */
1043         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1044             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
1045                                                                           "CJKSymbolsandPunctuation"});
1046 
1047         /**
1048          * Constant for the "Hiragana" Unicode character block.
1049          * @since 1.2
1050          */
1051         public static final UnicodeBlock HIRAGANA =
1052             new UnicodeBlock("HIRAGANA");
1053 
1054         /**
1055          * Constant for the "Katakana" Unicode character block.
1056          * @since 1.2
1057          */
1058         public static final UnicodeBlock KATAKANA =
1059             new UnicodeBlock("KATAKANA");
1060 
1061         /**
1062          * Constant for the "Bopomofo" Unicode character block.
1063          * @since 1.2
1064          */
1065         public static final UnicodeBlock BOPOMOFO =
1066             new UnicodeBlock("BOPOMOFO");
1067 
1068         /**
1069          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1070          * @since 1.2
1071          */
1072         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1073             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
1074                                                                         "HangulCompatibilityJamo"});
1075 
1076         /**
1077          * Constant for the "Kanbun" Unicode character block.
1078          * @since 1.2
1079          */
1080         public static final UnicodeBlock KANBUN =
1081             new UnicodeBlock("KANBUN");
1082 
1083         /**
1084          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1085          * @since 1.2
1086          */
1087         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1088             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
1089                                                                               "EnclosedCJKLettersandMonths"});
1090 
1091         /**
1092          * Constant for the "CJK Compatibility" Unicode character block.
1093          * @since 1.2
1094          */
1095         public static final UnicodeBlock CJK_COMPATIBILITY =
1096             new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
1097 
1098         /**
1099          * Constant for the "CJK Unified Ideographs" Unicode character block.
1100          * @since 1.2
1101          */
1102         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1103             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
1104                                                                      "CJKUnifiedIdeographs"});
1105 
1106         /**
1107          * Constant for the "Hangul Syllables" Unicode character block.
1108          * @since 1.2
1109          */
1110         public static final UnicodeBlock HANGUL_SYLLABLES =
1111             new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
1112 
1113         /**
1114          * Constant for the "Private Use Area" Unicode character block.
1115          * @since 1.2
1116          */
1117         public static final UnicodeBlock PRIVATE_USE_AREA =
1118             new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
1119 
1120         /**
1121          * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1122          * @since 1.2
1123          */
1124         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1125             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1126                              new String[] {"CJK Compatibility Ideographs",
1127                                            "CJKCompatibilityIdeographs"});
1128 
1129         /**
1130          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1134             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
1135                                                                             "AlphabeticPresentationForms"});
1136 
1137         /**
1138          * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1142             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
1143                                                                           "ArabicPresentationForms-A"});
1144 
1145         /**
1146          * Constant for the "Combining Half Marks" Unicode character block.
1147          * @since 1.2
1148          */
1149         public static final UnicodeBlock COMBINING_HALF_MARKS =
1150             new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
1151                                                                    "CombiningHalfMarks"});
1152 
1153         /**
1154          * Constant for the "CJK Compatibility Forms" Unicode character block.
1155          * @since 1.2
1156          */
1157         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1158             new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
1159                                                                       "CJKCompatibilityForms"});
1160 
1161         /**
1162          * Constant for the "Small Form Variants" Unicode character block.
1163          * @since 1.2
1164          */
1165         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1166             new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
1167                                                                   "SmallFormVariants"});
1168 
1169         /**
1170          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1171          * @since 1.2
1172          */
1173         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1174             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
1175                                                                           "ArabicPresentationForms-B"});
1176 
1177         /**
1178          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1179          * @since 1.2
1180          */
1181         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1182             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1183                              new String[] {"Halfwidth and Fullwidth Forms",
1184                                            "HalfwidthandFullwidthForms"});
1185 
1186         /**
1187          * Constant for the "Specials" Unicode character block.
1188          * @since 1.2
1189          */
1190         public static final UnicodeBlock SPECIALS =
1191             new UnicodeBlock("SPECIALS");
1192 
1193         /**
1194          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1195          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1196          *             {@link #LOW_SURROGATES}. These new constants match
1197          *             the block definitions of the Unicode Standard.
1198          *             The {@link #of(char)} and {@link #of(int)} methods
1199          *             return the new constants, not SURROGATES_AREA.
1200          */
1201         @Deprecated
1202         public static final UnicodeBlock SURROGATES_AREA =
1203             new UnicodeBlock("SURROGATES_AREA");
1204 
1205         /**
1206          * Constant for the "Syriac" Unicode character block.
1207          * @since 1.4
1208          */
1209         public static final UnicodeBlock SYRIAC =
1210             new UnicodeBlock("SYRIAC");
1211 
1212         /**
1213          * Constant for the "Thaana" Unicode character block.
1214          * @since 1.4
1215          */
1216         public static final UnicodeBlock THAANA =
1217             new UnicodeBlock("THAANA");
1218 
1219         /**
1220          * Constant for the "Sinhala" Unicode character block.
1221          * @since 1.4
1222          */
1223         public static final UnicodeBlock SINHALA =
1224             new UnicodeBlock("SINHALA");
1225 
1226         /**
1227          * Constant for the "Myanmar" Unicode character block.
1228          * @since 1.4
1229          */
1230         public static final UnicodeBlock MYANMAR =
1231             new UnicodeBlock("MYANMAR");
1232 
1233         /**
1234          * Constant for the "Ethiopic" Unicode character block.
1235          * @since 1.4
1236          */
1237         public static final UnicodeBlock ETHIOPIC =
1238             new UnicodeBlock("ETHIOPIC");
1239 
1240         /**
1241          * Constant for the "Cherokee" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock CHEROKEE =
1245             new UnicodeBlock("CHEROKEE");
1246 
1247         /**
1248          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1252             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1253                              new String[] {"Unified Canadian Aboriginal Syllabics",
1254                                            "UnifiedCanadianAboriginalSyllabics"});
1255 
1256         /**
1257          * Constant for the "Ogham" Unicode character block.
1258          * @since 1.4
1259          */
1260         public static final UnicodeBlock OGHAM =
1261                              new UnicodeBlock("OGHAM");
1262 
1263         /**
1264          * Constant for the "Runic" Unicode character block.
1265          * @since 1.4
1266          */
1267         public static final UnicodeBlock RUNIC =
1268                              new UnicodeBlock("RUNIC");
1269 
1270         /**
1271          * Constant for the "Khmer" Unicode character block.
1272          * @since 1.4
1273          */
1274         public static final UnicodeBlock KHMER =
1275                              new UnicodeBlock("KHMER");
1276 
1277         /**
1278          * Constant for the "Mongolian" Unicode character block.
1279          * @since 1.4
1280          */
1281         public static final UnicodeBlock MONGOLIAN =
1282                              new UnicodeBlock("MONGOLIAN");
1283 
1284         /**
1285          * Constant for the "Braille Patterns" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock BRAILLE_PATTERNS =
1289             new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
1290                                                                "BraillePatterns"});
1291 
1292         /**
1293          * Constant for the "CJK Radicals Supplement" Unicode character block.
1294          * @since 1.4
1295          */
1296         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1297              new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
1298                                                                        "CJKRadicalsSupplement"});
1299 
1300         /**
1301          * Constant for the "Kangxi Radicals" Unicode character block.
1302          * @since 1.4
1303          */
1304         public static final UnicodeBlock KANGXI_RADICALS =
1305             new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
1306 
1307         /**
1308          * Constant for the "Ideographic Description Characters" Unicode character block.
1309          * @since 1.4
1310          */
1311         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1312             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
1313                                                                                  "IdeographicDescriptionCharacters"});
1314 
1315         /**
1316          * Constant for the "Bopomofo Extended" Unicode character block.
1317          * @since 1.4
1318          */
1319         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1320             new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
1321                                                                 "BopomofoExtended"});
1322 
1323         /**
1324          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1325          * @since 1.4
1326          */
1327         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1328             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
1329                                                                                  "CJKUnifiedIdeographsExtensionA"});
1330 
1331         /**
1332          * Constant for the "Yi Syllables" Unicode character block.
1333          * @since 1.4
1334          */
1335         public static final UnicodeBlock YI_SYLLABLES =
1336             new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
1337 
1338         /**
1339          * Constant for the "Yi Radicals" Unicode character block.
1340          * @since 1.4
1341          */
1342         public static final UnicodeBlock YI_RADICALS =
1343             new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
1344 
1345 
1346         /**
1347          * Constant for the "Cyrillic Supplementary" Unicode character block.
1348          * @since 1.5
1349          */
1350         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1351             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1352                              new String[] {"Cyrillic Supplementary",
1353                                            "CyrillicSupplementary",
1354                                            "Cyrillic Supplement",
1355                                            "CyrillicSupplement"});
1356 
1357         /**
1358          * Constant for the "Tagalog" Unicode character block.
1359          * @since 1.5
1360          */
1361         public static final UnicodeBlock TAGALOG =
1362             new UnicodeBlock("TAGALOG");
1363 
1364         /**
1365          * Constant for the "Hanunoo" Unicode character block.
1366          * @since 1.5
1367          */
1368         public static final UnicodeBlock HANUNOO =
1369             new UnicodeBlock("HANUNOO");
1370 
1371         /**
1372          * Constant for the "Buhid" Unicode character block.
1373          * @since 1.5
1374          */
1375         public static final UnicodeBlock BUHID =
1376             new UnicodeBlock("BUHID");
1377 
1378         /**
1379          * Constant for the "Tagbanwa" Unicode character block.
1380          * @since 1.5
1381          */
1382         public static final UnicodeBlock TAGBANWA =
1383             new UnicodeBlock("TAGBANWA");
1384 
1385         /**
1386          * Constant for the "Limbu" Unicode character block.
1387          * @since 1.5
1388          */
1389         public static final UnicodeBlock LIMBU =
1390             new UnicodeBlock("LIMBU");
1391 
1392         /**
1393          * Constant for the "Tai Le" Unicode character block.
1394          * @since 1.5
1395          */
1396         public static final UnicodeBlock TAI_LE =
1397             new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
1398 
1399         /**
1400          * Constant for the "Khmer Symbols" Unicode character block.
1401          * @since 1.5
1402          */
1403         public static final UnicodeBlock KHMER_SYMBOLS =
1404             new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
1405 
1406         /**
1407          * Constant for the "Phonetic Extensions" Unicode character block.
1408          * @since 1.5
1409          */
1410         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1411             new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
1412 
1413         /**
1414          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1415          * @since 1.5
1416          */
1417         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1418             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1419                              new String[]{"Miscellaneous Mathematical Symbols-A",
1420                                           "MiscellaneousMathematicalSymbols-A"});
1421 
1422         /**
1423          * Constant for the "Supplemental Arrows-A" Unicode character block.
1424          * @since 1.5
1425          */
1426         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1427             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
1428                                                                     "SupplementalArrows-A"});
1429 
1430         /**
1431          * Constant for the "Supplemental Arrows-B" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1435             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
1436                                                                     "SupplementalArrows-B"});
1437 
1438         /**
1439          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1440          * @since 1.5
1441          */
1442         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1443                 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1444                                    new String[] {"Miscellaneous Mathematical Symbols-B",
1445                                                  "MiscellaneousMathematicalSymbols-B"});
1446 
1447         /**
1448          * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1449          * @since 1.5
1450          */
1451         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1452             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1453                              new String[]{"Supplemental Mathematical Operators",
1454                                           "SupplementalMathematicalOperators"} );
1455 
1456         /**
1457          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1458          * @since 1.5
1459          */
1460         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1461             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
1462                                                                                "MiscellaneousSymbolsandArrows"});
1463 
1464         /**
1465          * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1469             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
1470                                                                            "KatakanaPhoneticExtensions"});
1471 
1472         /**
1473          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1474          * @since 1.5
1475          */
1476         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1477             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
1478                                                                       "YijingHexagramSymbols"});
1479 
1480         /**
1481          * Constant for the "Variation Selectors" Unicode character block.
1482          * @since 1.5
1483          */
1484         public static final UnicodeBlock VARIATION_SELECTORS =
1485             new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
1486 
1487         /**
1488          * Constant for the "Linear B Syllabary" Unicode character block.
1489          * @since 1.5
1490          */
1491         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1492             new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
1493 
1494         /**
1495          * Constant for the "Linear B Ideograms" Unicode character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1499             new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
1500 
1501         /**
1502          * Constant for the "Aegean Numbers" Unicode character block.
1503          * @since 1.5
1504          */
1505         public static final UnicodeBlock AEGEAN_NUMBERS =
1506             new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
1507 
1508         /**
1509          * Constant for the "Old Italic" Unicode character block.
1510          * @since 1.5
1511          */
1512         public static final UnicodeBlock OLD_ITALIC =
1513             new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
1514 
1515         /**
1516          * Constant for the "Gothic" Unicode character block.
1517          * @since 1.5
1518          */
1519         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1520 
1521         /**
1522          * Constant for the "Ugaritic" Unicode character block.
1523          * @since 1.5
1524          */
1525         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1526 
1527         /**
1528          * Constant for the "Deseret" Unicode character block.
1529          * @since 1.5
1530          */
1531         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1532 
1533         /**
1534          * Constant for the "Shavian" Unicode character block.
1535          * @since 1.5
1536          */
1537         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1538 
1539         /**
1540          * Constant for the "Osmanya" Unicode character block.
1541          * @since 1.5
1542          */
1543         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1544 
1545         /**
1546          * Constant for the "Cypriot Syllabary" Unicode character block.
1547          * @since 1.5
1548          */
1549         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1550             new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
1551 
1552         /**
1553          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1554          * @since 1.5
1555          */
1556         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1557             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
1558                                                                         "ByzantineMusicalSymbols"});
1559 
1560         /**
1561          * Constant for the "Musical Symbols" Unicode character block.
1562          * @since 1.5
1563          */
1564         public static final UnicodeBlock MUSICAL_SYMBOLS =
1565             new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
1566 
1567         /**
1568          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1572             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
1573                                                                      "TaiXuanJingSymbols"});
1574 
1575         /**
1576          * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1577          * @since 1.5
1578          */
1579         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1580             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1581                              new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1582 
1583         /**
1584          * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1585          * @since 1.5
1586          */
1587         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1588             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1589                              new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1590 
1591         /**
1592          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1593          * @since 1.5
1594          */
1595         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1596             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1597                              new String[]{"CJK Compatibility Ideographs Supplement",
1598                                           "CJKCompatibilityIdeographsSupplement"});
1599 
1600         /**
1601          * Constant for the "Tags" Unicode character block.
1602          * @since 1.5
1603          */
1604         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1605 
1606         /**
1607          * Constant for the "Variation Selectors Supplement" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1611             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
1612                                                                              "VariationSelectorsSupplement"});
1613 
1614         /**
1615          * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1616          * @since 1.5
1617          */
1618         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1619             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1620                              new String[] {"Supplementary Private Use Area-A",
1621                                            "SupplementaryPrivateUseArea-A"});
1622 
1623         /**
1624          * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1625          * @since 1.5
1626          */
1627         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1628             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1629                              new String[] {"Supplementary Private Use Area-B",
1630                                            "SupplementaryPrivateUseArea-B"});
1631 
1632         /**
1633          * Constant for the "High Surrogates" Unicode character block.
1634          * This block represents codepoint values in the high surrogate
1635          * range: 0xD800 through 0xDB7F
1636          *
1637          * @since 1.5
1638          */
1639         public static final UnicodeBlock HIGH_SURROGATES =
1640             new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
1641 
1642         /**
1643          * Constant for the "High Private Use Surrogates" Unicode character block.
1644          * This block represents codepoint values in the high surrogate
1645          * range: 0xDB80 through 0xDBFF
1646          *
1647          * @since 1.5
1648          */
1649         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1650             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
1651                                                                            "HighPrivateUseSurrogates"});
1652 
1653         /**
1654          * Constant for the "Low Surrogates" Unicode character block.
1655          * This block represents codepoint values in the high surrogate
1656          * range: 0xDC00 through 0xDFFF
1657          *
1658          * @since 1.5
1659          */
1660         public static final UnicodeBlock LOW_SURROGATES =
1661             new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
1662 
1663         /**
1664          * Constant for the "Arabic Supplement" Unicode character block.
1665          * @since 1.7
1666          */
1667         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1668             new UnicodeBlock("ARABIC_SUPPLEMENT",
1669                              new String[] { "Arabic Supplement",
1670                                             "ArabicSupplement"});
1671 
1672         /**
1673          * Constant for the "NKo" Unicode character block.
1674          * @since 1.7
1675          */
1676         public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
1677 
1678         /**
1679          * Constant for the "Ethiopic Supplement" Unicode character block.
1680          * @since 1.7
1681          */
1682         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1683             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1684                              new String[] { "Ethiopic Supplement",
1685                                             "EthiopicSupplement"});
1686 
1687         /**
1688          * Constant for the "New Tai Lue" Unicode character block.
1689          * @since 1.7
1690          */
1691         public static final UnicodeBlock NEW_TAI_LUE =
1692             new UnicodeBlock("NEW_TAI_LUE",
1693                              new String[] { "New Tai Lue",
1694                                             "NewTaiLue"});
1695 
1696         /**
1697          * Constant for the "Buginese" Unicode character block.
1698          * @since 1.7
1699          */
1700         public static final UnicodeBlock BUGINESE =
1701             new UnicodeBlock("BUGINESE");
1702 
1703         /**
1704          * Constant for the "Balinese" Unicode character block.
1705          * @since 1.7
1706          */
1707         public static final UnicodeBlock BALINESE =
1708             new UnicodeBlock("BALINESE");
1709 
1710         /**
1711          * Constant for the "Sundanese" Unicode character block.
1712          * @since 1.7
1713          */
1714         public static final UnicodeBlock SUNDANESE =
1715             new UnicodeBlock("SUNDANESE");
1716 
1717         /**
1718          * Constant for the "Lepcha" Unicode character block.
1719          * @since 1.7
1720          */
1721         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
1722 
1723         /**
1724          * Constant for the "Ol Chiki" Unicode character block.
1725          * @since 1.7
1726          */
1727         public static final UnicodeBlock OL_CHIKI =
1728             new UnicodeBlock("OL_CHIKI",
1729                              new String[] { "Ol Chiki",
1730                                             "OlChiki"});
1731 
1732         /**
1733          * Constant for the "Phonetic Extensions Supplement" Unicode character
1734          * block.
1735          * @since 1.7
1736          */
1737         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1738             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1739                              new String[] { "Phonetic Extensions Supplement",
1740                                             "PhoneticExtensionsSupplement"});
1741 
1742         /**
1743          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1744          * character block.
1745          * @since 1.7
1746          */
1747         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1748             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1749                              new String[] { "Combining Diacritical Marks Supplement",
1750                                             "CombiningDiacriticalMarksSupplement"});
1751 
1752         /**
1753          * Constant for the "Glagolitic" Unicode character block.
1754          * @since 1.7
1755          */
1756         public static final UnicodeBlock GLAGOLITIC =
1757             new UnicodeBlock("GLAGOLITIC");
1758 
1759         /**
1760          * Constant for the "Latin Extended-C" Unicode character block.
1761          * @since 1.7
1762          */
1763         public static final UnicodeBlock LATIN_EXTENDED_C =
1764             new UnicodeBlock("LATIN_EXTENDED_C",
1765                              new String[] { "Latin Extended-C",
1766                                             "LatinExtended-C"});
1767 
1768         /**
1769          * Constant for the "Coptic" Unicode character block.
1770          * @since 1.7
1771          */
1772         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
1773 
1774         /**
1775          * Constant for the "Georgian Supplement" Unicode character block.
1776          * @since 1.7
1777          */
1778         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1779             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1780                              new String[] { "Georgian Supplement",
1781                                             "GeorgianSupplement"});
1782 
1783         /**
1784          * Constant for the "Tifinagh" Unicode character block.
1785          * @since 1.7
1786          */
1787         public static final UnicodeBlock TIFINAGH =
1788             new UnicodeBlock("TIFINAGH");
1789 
1790         /**
1791          * Constant for the "Ethiopic Extended" Unicode character block.
1792          * @since 1.7
1793          */
1794         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1795             new UnicodeBlock("ETHIOPIC_EXTENDED",
1796                              new String[] { "Ethiopic Extended",
1797                                             "EthiopicExtended"});
1798 
1799         /**
1800          * Constant for the "Cyrillic Extended-A" Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1804             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1805                              new String[] { "Cyrillic Extended-A",
1806                                             "CyrillicExtended-A"});
1807 
1808         /**
1809          * Constant for the "Supplemental Punctuation" Unicode character block.
1810          * @since 1.7
1811          */
1812         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1813             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1814                              new String[] { "Supplemental Punctuation",
1815                                             "SupplementalPunctuation"});
1816 
1817         /**
1818          * Constant for the "CJK Strokes" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock CJK_STROKES =
1822             new UnicodeBlock("CJK_STROKES",
1823                              new String[] { "CJK Strokes",
1824                                             "CJKStrokes"});
1825 
1826         /**
1827          * Constant for the "Vai" Unicode character block.
1828          * @since 1.7
1829          */
1830         public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
1831 
1832         /**
1833          * Constant for the "Cyrillic Extended-B" Unicode character block.
1834          * @since 1.7
1835          */
1836         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1837             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1838                              new String[] { "Cyrillic Extended-B",
1839                                             "CyrillicExtended-B"});
1840 
1841         /**
1842          * Constant for the "Modifier Tone Letters" Unicode character block.
1843          * @since 1.7
1844          */
1845         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1846             new UnicodeBlock("MODIFIER_TONE_LETTERS",
1847                              new String[] { "Modifier Tone Letters",
1848                                             "ModifierToneLetters"});
1849 
1850         /**
1851          * Constant for the "Latin Extended-D" Unicode character block.
1852          * @since 1.7
1853          */
1854         public static final UnicodeBlock LATIN_EXTENDED_D =
1855             new UnicodeBlock("LATIN_EXTENDED_D",
1856                              new String[] { "Latin Extended-D",
1857                                             "LatinExtended-D"});
1858 
1859         /**
1860          * Constant for the "Syloti Nagri" Unicode character block.
1861          * @since 1.7
1862          */
1863         public static final UnicodeBlock SYLOTI_NAGRI =
1864             new UnicodeBlock("SYLOTI_NAGRI",
1865                              new String[] { "Syloti Nagri",
1866                                             "SylotiNagri"});
1867 
1868         /**
1869          * Constant for the "Phags-pa" Unicode character block.
1870          * @since 1.7
1871          */
1872         public static final UnicodeBlock PHAGS_PA =
1873             new UnicodeBlock("PHAGS_PA", new String[] { "Phags-pa"});
1874 
1875         /**
1876          * Constant for the "Saurashtra" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock SAURASHTRA =
1880             new UnicodeBlock("SAURASHTRA");
1881 
1882         /**
1883          * Constant for the "Kayah Li" Unicode character block.
1884          * @since 1.7
1885          */
1886         public static final UnicodeBlock KAYAH_LI =
1887             new UnicodeBlock("KAYAH_LI",
1888                              new String[] { "Kayah Li",
1889                                             "KayahLi"});
1890 
1891         /**
1892          * Constant for the "Rejang" Unicode character block.
1893          * @since 1.7
1894          */
1895         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
1896 
1897         /**
1898          * Constant for the "Cham" Unicode character block.
1899          * @since 1.7
1900          */
1901         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
1902 
1903         /**
1904          * Constant for the "Vertical Forms" Unicode character block.
1905          * @since 1.7
1906          */
1907         public static final UnicodeBlock VERTICAL_FORMS =
1908             new UnicodeBlock("VERTICAL_FORMS",
1909                              new String[] { "Vertical Forms",
1910                                             "VerticalForms"});
1911 
1912         /**
1913          * Constant for the "Ancient Greek Numbers" Unicode character block.
1914          * @since 1.7
1915          */
1916         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1917             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
1918                              new String[] { "Ancient Greek Numbers",
1919                                             "AncientGreekNumbers"});
1920 
1921         /**
1922          * Constant for the "Ancient Symbols" Unicode character block.
1923          * @since 1.7
1924          */
1925         public static final UnicodeBlock ANCIENT_SYMBOLS =
1926             new UnicodeBlock("ANCIENT_SYMBOLS",
1927                              new String[] { "Ancient Symbols",
1928                                             "AncientSymbols"});
1929 
1930         /**
1931          * Constant for the "Phaistos Disc" Unicode character block.
1932          * @since 1.7
1933          */
1934         public static final UnicodeBlock PHAISTOS_DISC =
1935             new UnicodeBlock("PHAISTOS_DISC",
1936                              new String[] { "Phaistos Disc",
1937                                             "PhaistosDisc"});
1938 
1939         /**
1940          * Constant for the "Lycian" Unicode character block.
1941          * @since 1.7
1942          */
1943         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
1944 
1945         /**
1946          * Constant for the "Carian" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
1950 
1951         /**
1952          * Constant for the "Old Persian" Unicode character block.
1953          * @since 1.7
1954          */
1955         public static final UnicodeBlock OLD_PERSIAN =
1956             new UnicodeBlock("OLD_PERSIAN",
1957                              new String[] { "Old Persian",
1958                                             "OldPersian"});
1959 
1960         /**
1961          * Constant for the "Phoenician" Unicode character block.
1962          * @since 1.7
1963          */
1964         public static final UnicodeBlock PHOENICIAN =
1965             new UnicodeBlock("PHOENICIAN");
1966 
1967         /**
1968          * Constant for the "Lydian" Unicode character block.
1969          * @since 1.7
1970          */
1971         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
1972 
1973         /**
1974          * Constant for the "Kharoshthi" Unicode character block.
1975          * @since 1.7
1976          */
1977         public static final UnicodeBlock KHAROSHTHI =
1978             new UnicodeBlock("KHAROSHTHI");
1979 
1980         /**
1981          * Constant for the "Cuneiform" Unicode character block.
1982          * @since 1.7
1983          */
1984         public static final UnicodeBlock CUNEIFORM =
1985             new UnicodeBlock("CUNEIFORM");
1986 
1987         /**
1988          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
1989          * character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1993             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1994                              new String[] { "Cuneiform Numbers and Punctuation",
1995                                             "CuneiformNumbersandPunctuation"});
1996 
1997         /**
1998          * Constant for the "Ancient Greek Musical Notation" Unicode character
1999          * block.
2000          * @since 1.7
2001          */
2002         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2003             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2004                              new String[] { "Ancient Greek Musical Notation",
2005                                             "AncientGreekMusicalNotation"});
2006 
2007         /**
2008          * Constant for the "Counting Rod Numerals" Unicode character block.
2009          * @since 1.7
2010          */
2011         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2012             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2013                              new String[] { "Counting Rod Numerals",
2014                                             "CountingRodNumerals"});
2015 
2016         /**
2017          * Constant for the "Mahjong Tiles" Unicode character block.
2018          * @since 1.7
2019          */
2020         public static final UnicodeBlock MAHJONG_TILES =
2021             new UnicodeBlock("MAHJONG_TILES",
2022                              new String[] { "Mahjong Tiles",
2023                                             "MahjongTiles"});
2024 
2025         /**
2026          * Constant for the "Domino Tiles" Unicode character block.
2027          * @since 1.7
2028          */
2029         public static final UnicodeBlock DOMINO_TILES =
2030             new UnicodeBlock("DOMINO_TILES",
2031                              new String[] { "Domino Tiles",
2032                                             "DominoTiles"});
2033 
2034         private static final int blockStarts[] = {
2035             0x0000,   // 0000..007F; Basic Latin
2036             0x0080,   // 0080..00FF; Latin-1 Supplement
2037             0x0100,   // 0100..017F; Latin Extended-A
2038             0x0180,   // 0180..024F; Latin Extended-B
2039             0x0250,   // 0250..02AF; IPA Extensions
2040             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2041             0x0300,   // 0300..036F; Combining Diacritical Marks
2042             0x0370,   // 0370..03FF; Greek and Coptic
2043             0x0400,   // 0400..04FF; Cyrillic
2044             0x0500,   // 0500..052F; Cyrillic Supplement
2045             0x0530,   // 0530..058F; Armenian
2046             0x0590,   // 0590..05FF; Hebrew
2047             0x0600,   // 0600..06FF; Arabic
2048             0x0700,   // 0700..074F; Syria
2049             0x0750,   // 0750..077F; Arabic Supplement
2050             0x0780,   // 0780..07BF; Thaana
2051             0x07C0,   // 07C0..07FF; NKo
2052             0x0800,   //             unassigned
2053             0x0900,   // 0900..097F; Devanagari
2054             0x0980,   // 0980..09FF; Bengali
2055             0x0A00,   // 0A00..0A7F; Gurmukhi
2056             0x0A80,   // 0A80..0AFF; Gujarati
2057             0x0B00,   // 0B00..0B7F; Oriya
2058             0x0B80,   // 0B80..0BFF; Tamil
2059             0x0C00,   // 0C00..0C7F; Telugu
2060             0x0C80,   // 0C80..0CFF; Kannada
2061             0x0D00,   // 0D00..0D7F; Malayalam
2062             0x0D80,   // 0D80..0DFF; Sinhala
2063             0x0E00,   // 0E00..0E7F; Thai
2064             0x0E80,   // 0E80..0EFF; Lao
2065             0x0F00,   // 0F00..0FFF; Tibetan
2066             0x1000,   // 1000..109F; Myanmar
2067             0x10A0,   // 10A0..10FF; Georgian
2068             0x1100,   // 1100..11FF; Hangul Jamo
2069             0x1200,   // 1200..137F; Ethiopic
2070             0x1380,   // 1380..139F; Ethiopic Supplement
2071             0x13A0,   // 13A0..13FF; Cherokee
2072             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2073             0x1680,   // 1680..169F; Ogham
2074             0x16A0,   // 16A0..16FF; Runic
2075             0x1700,   // 1700..171F; Tagalog
2076             0x1720,   // 1720..173F; Hanunoo
2077             0x1740,   // 1740..175F; Buhid
2078             0x1760,   // 1760..177F; Tagbanwa
2079             0x1780,   // 1780..17FF; Khmer
2080             0x1800,   // 1800..18AF; Mongolian
2081             0x18B0,   //             unassigned
2082             0x1900,   // 1900..194F; Limbu
2083             0x1950,   // 1950..197F; Tai Le
2084             0x1980,   // 1980..19DF; New Tai Lue
2085             0x19E0,   // 19E0..19FF; Khmer Symbols
2086             0x1A00,   // 1A00..1A1F; Buginese
2087             0x1A20,   //             unassigned
2088             0x1B00,   // 1B00..1B7F; Balinese
2089             0x1B80,   // 1B80..1BBF; Sundanese
2090             0x1BC0,   //             unassigned
2091             0x1C00,   // 1C00..1C4F; Lepcha
2092             0x1C50,   // 1C50..1C7F; Ol Chiki
2093             0x1C80,   //             unassigned
2094             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2095             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2096             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2097             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2098             0x1F00,   // 1F00..1FFF; Greek Extended
2099             0x2000,   // 2000..206F; General Punctuation
2100             0x2070,   // 2070..209F; Superscripts and Subscripts
2101             0x20A0,   // 20A0..20CF; Currency Symbols
2102             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2103             0x2100,   // 2100..214F; Letterlike Symbols
2104             0x2150,   // 2150..218F; Number Forms
2105             0x2190,   // 2190..21FF; Arrows
2106             0x2200,   // 2200..22FF; Mathematical Operators
2107             0x2300,   // 2300..23FF; Miscellaneous Technical
2108             0x2400,   // 2400..243F; Control Pictures
2109             0x2440,   // 2440..245F; Optical Character Recognition
2110             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2111             0x2500,   // 2500..257F; Box Drawing
2112             0x2580,   // 2580..259F; Block Elements
2113             0x25A0,   // 25A0..25FF; Geometric Shapes
2114             0x2600,   // 2600..26FF; Miscellaneous Symbols
2115             0x2700,   // 2700..27BF; Dingbats
2116             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2117             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2118             0x2800,   // 2800..28FF; Braille Patterns
2119             0x2900,   // 2900..297F; Supplemental Arrows-B
2120             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2121             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2122             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2123             0x2C00,   // 2C00..2C5F; Glagolitic
2124             0x2C60,   // 2C60..2C7F; Latin Extended-C
2125             0x2C80,   // 2C80..2CFF; Coptic
2126             0x2D00,   // 2D00..2D2F; Georgian Supplement
2127             0x2D30,   // 2D30..2D7F; Tifinagh
2128             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2129             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2130             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2131             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2132             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2133             0x2FE0,   //             unassigned
2134             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2135             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2136             0x3040,   // 3040..309F; Hiragana
2137             0x30A0,   // 30A0..30FF; Katakana
2138             0x3100,   // 3100..312F; Bopomofo
2139             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2140             0x3190,   // 3190..319F; Kanbun
2141             0x31A0,   // 31A0..31BF; Bopomofo Extended
2142             0x31C0,   // 31C0..31EF; CJK Strokes
2143             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2144             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2145             0x3300,   // 3300..33FF; CJK Compatibility
2146             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2147             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2148             0x4E00,   // 4E00..9FFF; CJK Unified Ideograph
2149             0xA000,   // A000..A48F; Yi Syllables
2150             0xA490,   // A490..A4CF; Yi Radicals
2151             0xA4D0,   //             unassigned
2152             0xA500,   // A500..A63F; Vai
2153             0xA640,   // A640..A69F; Cyrillic Extended-B
2154             0xA6A0,   //             unassigned
2155             0xA700,   // A700..A71F; Modifier Tone Letters
2156             0xA720,   // A720..A7FF; Latin Extended-D
2157             0xA800,   // A800..A82F; Syloti Nagri
2158             0xA830,   //             unassigned
2159             0xA840,   // A840..A87F; Phags-pa
2160             0xA880,   // A880..A8DF; Saurashtra
2161             0xA8E0,   //             unassigned
2162             0xA900,   // A900..A92F; Kayah Li
2163             0xA930,   // A930..A95F; Rejang
2164             0xA960,   //             unassigned
2165             0xAA00,   // AA00..AA5F; Cham
2166             0xAA60,   //             unassigned
2167             0xAC00,   // AC00..D7AF; Hangul Syllables
2168             0xD7B0,   //             unassigned
2169             0xD800,   // D800..DB7F; High Surrogates
2170             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2171             0xDC00,   // DC00..DFFF; Low Surrogates
2172             0xE000,   // E000..F8FF; Private Use Area
2173             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2174             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2175             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2176             0xFE00,   // FE00..FE0F; Variation Selectors
2177             0xFE10,   // FE10..FE1F; Vertical Forms
2178             0xFE20,   // FE20..FE2F; Combining Half Marks
2179             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2180             0xFE50,   // FE50..FE6F; Small Form Variants
2181             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2182             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2183             0xFFF0,   // FFF0..FFFF; Specials
2184             0x10000,  // 10000..1007F; Linear B Syllabary
2185             0x10080,  // 10080..100FF; Linear B Ideograms
2186             0x10100,  // 10100..1013F; Aegean Numbers
2187             0x10140,  // 10140..1018F; Ancient Greek Numbers
2188             0x10190,  // 10190..101CF; Ancient Symbols
2189             0x101D0,  // 101D0..101FF; Phaistos Disc
2190             0x10200,  //               unassigned
2191             0x10280,  // 10280..1029F; Lycian
2192             0x102A0,  // 102A0..102DF; Carian
2193             0x102E0,  //               unassigned
2194             0x10300,  // 10300..1032F; Old Italic
2195             0x10330,  // 10330..1034F; Gothic
2196             0x10350,  //               unassigned
2197             0x10380,  // 10380..1039F; Ugaritic
2198             0x103A0,  // 103A0..103DF; Old Persian
2199             0x103E0,  //               unassigned
2200             0x10400,  // 10400..1044F; Desere
2201             0x10450,  // 10450..1047F; Shavian
2202             0x10480,  // 10480..104AF; Osmanya
2203             0x104B0,  //               unassigned
2204             0x10800,  // 10800..1083F; Cypriot Syllabary
2205             0x10840,  //               unassigned
2206             0x10900,  // 10900..1091F; Phoenician
2207             0x10920,  // 10920..1093F; Lydian
2208             0x10940,  //               unassigned
2209             0x10A00,  // 10A00..10A5F; Kharoshthi
2210             0x10A60,  //               unassigned
2211             0x12000,  // 12000..123FF; Cuneiform
2212             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2213             0x12480,  //               unassigned
2214             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2215             0x1D100,  // 1D100..1D1FF; Musical Symbols
2216             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2217             0x1D250,  //               unassigned
2218             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2219             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2220             0x1D380,  //               unassigned
2221             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2222             0x1D800,  //               unassigned
2223             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2224             0x1F030,  // 1F030..1F09F; Domino Tiles
2225             0x1F0A0,  //               unassigned
2226             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2227             0x2A6E0,  //               unassigned
2228             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2229             0x2FA20,  //               unassigned
2230             0xE0000,  // E0000..E007F; Tags
2231             0xE0080,  //               unassigned
2232             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2233             0xE01F0,  //               unassigned
2234             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2235             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
2236         };
2237 
2238         private static final UnicodeBlock[] blocks = {
2239             BASIC_LATIN,
2240             LATIN_1_SUPPLEMENT,
2241             LATIN_EXTENDED_A,
2242             LATIN_EXTENDED_B,
2243             IPA_EXTENSIONS,
2244             SPACING_MODIFIER_LETTERS,
2245             COMBINING_DIACRITICAL_MARKS,
2246             GREEK,
2247             CYRILLIC,
2248             CYRILLIC_SUPPLEMENTARY,
2249             ARMENIAN,
2250             HEBREW,
2251             ARABIC,
2252             SYRIAC,
2253             ARABIC_SUPPLEMENT,
2254             THAANA,
2255             NKO,
2256             null,
2257             DEVANAGARI,
2258             BENGALI,
2259             GURMUKHI,
2260             GUJARATI,
2261             ORIYA,
2262             TAMIL,
2263             TELUGU,
2264             KANNADA,
2265             MALAYALAM,
2266             SINHALA,
2267             THAI,
2268             LAO,
2269             TIBETAN,
2270             MYANMAR,
2271             GEORGIAN,
2272             HANGUL_JAMO,
2273             ETHIOPIC,
2274             ETHIOPIC_SUPPLEMENT,
2275             CHEROKEE,
2276             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2277             OGHAM,
2278             RUNIC,
2279             TAGALOG,
2280             HANUNOO,
2281             BUHID,
2282             TAGBANWA,
2283             KHMER,
2284             MONGOLIAN,
2285             null,
2286             LIMBU,
2287             TAI_LE,
2288             NEW_TAI_LUE,
2289             KHMER_SYMBOLS,
2290             BUGINESE,
2291             null,
2292             BALINESE,
2293             SUNDANESE,
2294             null,
2295             LEPCHA,
2296             OL_CHIKI,
2297             null,
2298             PHONETIC_EXTENSIONS,
2299             PHONETIC_EXTENSIONS_SUPPLEMENT,
2300             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2301             LATIN_EXTENDED_ADDITIONAL,
2302             GREEK_EXTENDED,
2303             GENERAL_PUNCTUATION,
2304             SUPERSCRIPTS_AND_SUBSCRIPTS,
2305             CURRENCY_SYMBOLS,
2306             COMBINING_MARKS_FOR_SYMBOLS,
2307             LETTERLIKE_SYMBOLS,
2308             NUMBER_FORMS,
2309             ARROWS,
2310             MATHEMATICAL_OPERATORS,
2311             MISCELLANEOUS_TECHNICAL,
2312             CONTROL_PICTURES,
2313             OPTICAL_CHARACTER_RECOGNITION,
2314             ENCLOSED_ALPHANUMERICS,
2315             BOX_DRAWING,
2316             BLOCK_ELEMENTS,
2317             GEOMETRIC_SHAPES,
2318             MISCELLANEOUS_SYMBOLS,
2319             DINGBATS,
2320             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2321             SUPPLEMENTAL_ARROWS_A,
2322             BRAILLE_PATTERNS,
2323             SUPPLEMENTAL_ARROWS_B,
2324             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2325             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2326             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2327             GLAGOLITIC,
2328             LATIN_EXTENDED_C,
2329             COPTIC,
2330             GEORGIAN_SUPPLEMENT,
2331             TIFINAGH,
2332             ETHIOPIC_EXTENDED,
2333             CYRILLIC_EXTENDED_A,
2334             SUPPLEMENTAL_PUNCTUATION,
2335             CJK_RADICALS_SUPPLEMENT,
2336             KANGXI_RADICALS,
2337             null,
2338             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2339             CJK_SYMBOLS_AND_PUNCTUATION,
2340             HIRAGANA,
2341             KATAKANA,
2342             BOPOMOFO,
2343             HANGUL_COMPATIBILITY_JAMO,
2344             KANBUN,
2345             BOPOMOFO_EXTENDED,
2346             CJK_STROKES,
2347             KATAKANA_PHONETIC_EXTENSIONS,
2348             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2349             CJK_COMPATIBILITY,
2350             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2351             YIJING_HEXAGRAM_SYMBOLS,
2352             CJK_UNIFIED_IDEOGRAPHS,
2353             YI_SYLLABLES,
2354             YI_RADICALS,
2355             null,
2356             VAI,
2357             CYRILLIC_EXTENDED_B,
2358             null,
2359             MODIFIER_TONE_LETTERS,
2360             LATIN_EXTENDED_D,
2361             SYLOTI_NAGRI,
2362             null,
2363             PHAGS_PA,
2364             SAURASHTRA,
2365             null,
2366             KAYAH_LI,
2367             REJANG,
2368             null,
2369             CHAM,
2370             null,
2371             HANGUL_SYLLABLES,
2372             null,
2373             HIGH_SURROGATES,
2374             HIGH_PRIVATE_USE_SURROGATES,
2375             LOW_SURROGATES,
2376             PRIVATE_USE_AREA,
2377             CJK_COMPATIBILITY_IDEOGRAPHS,
2378             ALPHABETIC_PRESENTATION_FORMS,
2379             ARABIC_PRESENTATION_FORMS_A,
2380             VARIATION_SELECTORS,
2381             VERTICAL_FORMS,
2382             COMBINING_HALF_MARKS,
2383             CJK_COMPATIBILITY_FORMS,
2384             SMALL_FORM_VARIANTS,
2385             ARABIC_PRESENTATION_FORMS_B,
2386             HALFWIDTH_AND_FULLWIDTH_FORMS,
2387             SPECIALS,
2388             LINEAR_B_SYLLABARY,
2389             LINEAR_B_IDEOGRAMS,
2390             AEGEAN_NUMBERS,
2391             ANCIENT_GREEK_NUMBERS,
2392             ANCIENT_SYMBOLS,
2393             PHAISTOS_DISC,
2394             null,
2395             LYCIAN,
2396             CARIAN,
2397             null,
2398             OLD_ITALIC,
2399             GOTHIC,
2400             null,
2401             UGARITIC,
2402             OLD_PERSIAN,
2403             null,
2404             DESERET,
2405             SHAVIAN,
2406             OSMANYA,
2407             null,
2408             CYPRIOT_SYLLABARY,
2409             null,
2410             PHOENICIAN,
2411             LYDIAN,
2412             null,
2413             KHAROSHTHI,
2414             null,
2415             CUNEIFORM,
2416             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2417             null,
2418             BYZANTINE_MUSICAL_SYMBOLS,
2419             MUSICAL_SYMBOLS,
2420             ANCIENT_GREEK_MUSICAL_NOTATION,
2421             null,
2422             TAI_XUAN_JING_SYMBOLS,
2423             COUNTING_ROD_NUMERALS,
2424             null,
2425             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2426             null,
2427             MAHJONG_TILES,
2428             DOMINO_TILES,
2429             null,
2430             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2431             null,
2432             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2433             null,
2434             TAGS,
2435             null,
2436             VARIATION_SELECTORS_SUPPLEMENT,
2437             null,
2438             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2439             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2440         };
2441 
2442 
2443         /**
2444          * Returns the object representing the Unicode block containing the
2445          * given character, or <code>null</code> if the character is not a
2446          * member of a defined block.
2447          *
2448                  * <p><b>Note:</b> This method cannot handle <a
2449                  * href="Character.html#supplementary"> supplementary
2450                  * characters</a>. To support all Unicode characters,
2451                  * including supplementary characters, use the {@link
2452                  * #of(int)} method.
2453          *
2454          * @param   c  The character in question
2455          * @return  The <code>UnicodeBlock</code> instance representing the
2456          *          Unicode block of which this character is a member, or
2457          *          <code>null</code> if the character is not a member of any
2458          *          Unicode block
2459          */
2460         public static UnicodeBlock of(char c) {
2461             return of((int)c);
2462         }
2463 
2464 
2465         /**
2466          * Returns the object representing the Unicode block
2467          * containing the given character (Unicode code point), or
2468          * <code>null</code> if the character is not a member of a
2469          * defined block.
2470          *
2471                  * @param   codePoint the character (Unicode code point) in question.
2472          * @return  The <code>UnicodeBlock</code> instance representing the
2473          *          Unicode block of which this character is a member, or
2474          *          <code>null</code> if the character is not a member of any
2475          *          Unicode block
2476                  * @exception IllegalArgumentException if the specified
2477                  * <code>codePoint</code> is an invalid Unicode code point.
2478                  * @see Character#isValidCodePoint(int)
2479                  * @since   1.5
2480          */
2481         public static UnicodeBlock of(int codePoint) {
2482             if (!isValidCodePoint(codePoint)) {
2483                 throw new IllegalArgumentException();
2484             }
2485 
2486             int top, bottom, current;
2487             bottom = 0;
2488             top = blockStarts.length;
2489             current = top/2;
2490 
2491             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
2492             while (top - bottom > 1) {
2493                 if (codePoint >= blockStarts[current]) {
2494                     bottom = current;
2495                 } else {
2496                     top = current;
2497                 }
2498                 current = (top + bottom) / 2;
2499             }
2500             return blocks[current];
2501         }
2502 
2503         /**
2504          * Returns the UnicodeBlock with the given name. Block
2505          * names are determined by The Unicode Standard. The file
2506          * Blocks-&lt;version&gt;.txt defines blocks for a particular
2507          * version of the standard. The {@link Character} class specifies
2508          * the version of the standard that it supports.
2509          * <p>
2510          * This method accepts block names in the following forms:
2511          * <ol>
2512          * <li> Canonical block names as defined by the Unicode Standard.
2513          * For example, the standard defines a "Basic Latin" block. Therefore, this
2514          * method accepts "Basic Latin" as a valid block name. The documentation of
2515          * each UnicodeBlock provides the canonical name.
2516          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2517          * is a valid block name for the "Basic Latin" block.
2518          * <li>The text representation of each constant UnicodeBlock identifier.
2519          * For example, this method will return the {@link #BASIC_LATIN} block if
2520          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2521          *  hyphens in the canonical name with underscores.
2522          * </ol>
2523          * Finally, character case is ignored for all of the valid block name forms.
2524          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2525          * The en_US locale's case mapping rules are used to provide case-insensitive
2526          * string comparisons for block name validation.
2527          * <p>
2528          * If the Unicode Standard changes block names, both the previous and
2529          * current names will be accepted.
2530          *
2531          * @param blockName A <code>UnicodeBlock</code> name.
2532          * @return The <code>UnicodeBlock</code> instance identified
2533          *         by <code>blockName</code>
2534          * @throws IllegalArgumentException if <code>blockName</code> is an
2535          *         invalid name
2536          * @throws NullPointerException if <code>blockName</code> is null
2537          * @since 1.5
2538          */
2539         public static final UnicodeBlock forName(String blockName) {
2540             UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2541             if (block == null) {
2542                 throw new IllegalArgumentException();
2543             }
2544             return block;
2545         }
2546     }
2547 
2548 
2549     /**
2550      * The value of the <code>Character</code>.
2551      *
2552      * @serial
2553      */
2554     private final char value;
2555 
2556     /** use serialVersionUID from JDK 1.0.2 for interoperability */
2557     private static final long serialVersionUID = 3786198910865385080L;
2558 
2559     /**
2560      * Constructs a newly allocated <code>Character</code> object that
2561      * represents the specified <code>char</code> value.
2562      *
2563      * @param  value   the value to be represented by the
2564      *                  <code>Character</code> object.
2565      */
2566     public Character(char value) {
2567         this.value = value;
2568     }
2569 
2570     private static class CharacterCache {
2571         private CharacterCache(){}
2572 
2573         static final Character cache[] = new Character[127 + 1];
2574 
2575         static {
2576             for(int i = 0; i < cache.length; i++)
2577                 cache[i] = new Character((char)i);
2578         }
2579     }
2580 
2581     /**
2582      * Returns a <tt>Character</tt> instance representing the specified
2583      * <tt>char</tt> value.
2584      * If a new <tt>Character</tt> instance is not required, this method
2585      * should generally be used in preference to the constructor
2586      * {@link #Character(char)}, as this method is likely to yield
2587      * significantly better space and time performance by caching
2588      * frequently requested values.
2589      *
2590      * This method will always cache values in the range {@code
2591      * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may
2592      * cache other values outside of this range.
2593      *
2594      * @param  c a char value.
2595      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2596      * @since  1.5
2597      */
2598     public static Character valueOf(char c) {
2599         if(c <= 127) { // must cache
2600             return CharacterCache.cache[(int)c];
2601         }
2602         return new Character(c);
2603     }
2604 
2605     /**
2606      * Returns the value of this <code>Character</code> object.
2607      * @return  the primitive <code>char</code> value represented by
2608      *          this object.
2609      */
2610     public char charValue() {
2611         return value;
2612     }
2613 
2614     /**
2615      * Returns a hash code for this {@code Character}; equal to the result
2616      * of invoking {@code charValue()}.
2617      *
2618      * @return a hash code value for this {@code Character}
2619      */
2620     public int hashCode() {
2621         return (int)value;
2622     }
2623 
2624     /**
2625      * Compares this object against the specified object.
2626      * The result is <code>true</code> if and only if the argument is not
2627      * <code>null</code> and is a <code>Character</code> object that
2628      * represents the same <code>char</code> value as this object.
2629      *
2630      * @param   obj   the object to compare with.
2631      * @return  <code>true</code> if the objects are the same;
2632      *          <code>false</code> otherwise.
2633      */
2634     public boolean equals(Object obj) {
2635         if (obj instanceof Character) {
2636             return value == ((Character)obj).charValue();
2637         }
2638         return false;
2639     }
2640 
2641     /**
2642      * Returns a <code>String</code> object representing this
2643      * <code>Character</code>'s value.  The result is a string of
2644      * length 1 whose sole component is the primitive
2645      * <code>char</code> value represented by this
2646      * <code>Character</code> object.
2647      *
2648      * @return  a string representation of this object.
2649      */
2650     public String toString() {
2651         char buf[] = {value};
2652         return String.valueOf(buf);
2653     }
2654 
2655     /**
2656      * Returns a <code>String</code> object representing the
2657      * specified <code>char</code>.  The result is a string of length
2658      * 1 consisting solely of the specified <code>char</code>.
2659      *
2660      * @param c the <code>char</code> to be converted
2661      * @return the string representation of the specified <code>char</code>
2662      * @since 1.4
2663      */
2664     public static String toString(char c) {
2665         return String.valueOf(c);
2666     }
2667 
2668     /**
2669      * Determines whether the specified code point is a valid
2670      * <a href="http://www.unicode.org/glossary/#code_point">
2671      * Unicode code point value</a>.
2672      *
2673      * @param  codePoint the Unicode code point to be tested
2674      * @return {@code true} if the specified code point value is between
2675      *         {@link #MIN_CODE_POINT} and
2676      *         {@link #MAX_CODE_POINT} inclusive;
2677      *         {@code false} otherwise.
2678      * @since  1.5
2679      */
2680     public static boolean isValidCodePoint(int codePoint) {
2681         return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2682     }
2683 
2684     /**
2685      * Determines whether the specified character (Unicode code point)
2686      * is in the <a href="#supplementary">supplementary character</a> range.
2687      *
2688      * @param  codePoint the character (Unicode code point) to be tested
2689      * @return {@code true} if the specified code point is between
2690      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
2691      *         {@link #MAX_CODE_POINT} inclusive;
2692      *         {@code false} otherwise.
2693      * @since  1.5
2694      */
2695     public static boolean isSupplementaryCodePoint(int codePoint) {
2696         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2697             && codePoint <= MAX_CODE_POINT;
2698     }
2699 
2700     /**
2701      * Determines if the given {@code char} value is a
2702      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
2703      * Unicode high-surrogate code unit</a>
2704      * (also known as <i>leading-surrogate code unit</i>).
2705      *
2706      * <p>Such values do not represent characters by themselves,
2707      * but are used in the representation of
2708      * <a href="#supplementary">supplementary characters</a>
2709      * in the UTF-16 encoding.
2710      *
2711      * @param  ch the {@code char} value to be tested.
2712      * @return {@code true} if the {@code char} value is between
2713      *         {@link #MIN_HIGH_SURROGATE} and
2714      *         {@link #MAX_HIGH_SURROGATE} inclusive;
2715      *         {@code false} otherwise.
2716      * @see    #isLowSurrogate(char)
2717      * @see    Character.UnicodeBlock#of(int)
2718      * @since  1.5
2719      */
2720     public static boolean isHighSurrogate(char ch) {
2721         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2722     }
2723 
2724     /**
2725      * Determines if the given {@code char} value is a
2726      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
2727      * Unicode low-surrogate code unit</a>
2728      * (also known as <i>trailing-surrogate code unit</i>).
2729      *
2730      * <p>Such values do not represent characters by themselves,
2731      * but are used in the representation of
2732      * <a href="#supplementary">supplementary characters</a>
2733      * in the UTF-16 encoding.
2734      *
2735      * @param  ch the {@code char} value to be tested.
2736      * @return {@code true} if the {@code char} value is between
2737      *         {@link #MIN_LOW_SURROGATE} and
2738      *         {@link #MAX_LOW_SURROGATE} inclusive;
2739      *         {@code false} otherwise.
2740      * @see    #isHighSurrogate(char)
2741      * @since  1.5
2742      */
2743     public static boolean isLowSurrogate(char ch) {
2744         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2745     }
2746 
2747     /**
2748      * Determines if the given {@code char} value is a Unicode
2749      * <i>surrogate code unit</i>.
2750      *
2751      * <p>Such values do not represent characters by themselves,
2752      * but are used in the representation of
2753      * <a href="#supplementary">supplementary characters</a>
2754      * in the UTF-16 encoding.
2755      *
2756      * <p>A char value is a surrogate code unit if and only if it is either
2757      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
2758      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
2759      *
2760      * @param  ch the {@code char} value to be tested.
2761      * @return {@code true} if the {@code char} value is between
2762      *         {@link #MIN_SURROGATE} and
2763      *         {@link #MAX_SURROGATE} inclusive;
2764      *         {@code false} otherwise.
2765      * @since  1.7
2766      */
2767     public static boolean isSurrogate(char ch) {
2768         return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
2769     }
2770 
2771     /**
2772      * Determines whether the specified pair of <code>char</code>
2773      * values is a valid
2774      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
2775      * Unicode surrogate pair</a>.
2776 
2777      * <p>This method is equivalent to the expression:
2778      * <blockquote><pre>
2779      * isHighSurrogate(high) && isLowSurrogate(low)
2780      * </pre></blockquote>
2781      *
2782      * @param  high the high-surrogate code value to be tested
2783      * @param  low the low-surrogate code value to be tested
2784      * @return <code>true</code> if the specified high and
2785      * low-surrogate code values represent a valid surrogate pair;
2786      * <code>false</code> otherwise.
2787      * @since  1.5
2788      */
2789     public static boolean isSurrogatePair(char high, char low) {
2790         return isHighSurrogate(high) && isLowSurrogate(low);
2791     }
2792 
2793     /**
2794      * Determines the number of <code>char</code> values needed to
2795      * represent the specified character (Unicode code point). If the
2796      * specified character is equal to or greater than 0x10000, then
2797      * the method returns 2. Otherwise, the method returns 1.
2798      *
2799      * <p>This method doesn't validate the specified character to be a
2800      * valid Unicode code point. The caller must validate the
2801      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2802      * if necessary.
2803      *
2804      * @param   codePoint the character (Unicode code point) to be tested.
2805      * @return  2 if the character is a valid supplementary character; 1 otherwise.
2806      * @see     #isSupplementaryCodePoint(int)
2807      * @since   1.5
2808      */
2809     public static int charCount(int codePoint) {
2810         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2811     }
2812 
2813     /**
2814      * Converts the specified surrogate pair to its supplementary code
2815      * point value. This method does not validate the specified
2816      * surrogate pair. The caller must validate it using {@link
2817      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2818      *
2819      * @param  high the high-surrogate code unit
2820      * @param  low the low-surrogate code unit
2821      * @return the supplementary code point composed from the
2822      *         specified surrogate pair.
2823      * @since  1.5
2824      */
2825     public static int toCodePoint(char high, char low) {
2826         // Optimized form of:
2827         // return ((high - MIN_HIGH_SURROGATE) << 10)
2828         //         + (low - MIN_LOW_SURROGATE)
2829         //         + MIN_SUPPLEMENTARY_CODE_POINT;
2830         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
2831                                        - (MIN_HIGH_SURROGATE << 10)
2832                                        - MIN_LOW_SURROGATE);
2833     }
2834 
2835     /**
2836      * Returns the code point at the given index of the
2837      * <code>CharSequence</code>. If the <code>char</code> value at
2838      * the given index in the <code>CharSequence</code> is in the
2839      * high-surrogate range, the following index is less than the
2840      * length of the <code>CharSequence</code>, and the
2841      * <code>char</code> value at the following index is in the
2842      * low-surrogate range, then the supplementary code point
2843      * corresponding to this surrogate pair is returned. Otherwise,
2844      * the <code>char</code> value at the given index is returned.
2845      *
2846      * @param seq a sequence of <code>char</code> values (Unicode code
2847      * units)
2848      * @param index the index to the <code>char</code> values (Unicode
2849      * code units) in <code>seq</code> to be converted
2850      * @return the Unicode code point at the given index
2851      * @exception NullPointerException if <code>seq</code> is null.
2852      * @exception IndexOutOfBoundsException if the value
2853      * <code>index</code> is negative or not less than
2854      * {@link CharSequence#length() seq.length()}.
2855      * @since  1.5
2856      */
2857     public static int codePointAt(CharSequence seq, int index) {
2858         char c1 = seq.charAt(index++);
2859         if (isHighSurrogate(c1)) {
2860             if (index < seq.length()) {
2861                 char c2 = seq.charAt(index);
2862                 if (isLowSurrogate(c2)) {
2863                     return toCodePoint(c1, c2);
2864                 }
2865             }
2866         }
2867         return c1;
2868     }
2869 
2870     /**
2871      * Returns the code point at the given index of the
2872      * <code>char</code> array. If the <code>char</code> value at
2873      * the given index in the <code>char</code> array is in the
2874      * high-surrogate range, the following index is less than the
2875      * length of the <code>char</code> array, and the
2876      * <code>char</code> value at the following index is in the
2877      * low-surrogate range, then the supplementary code point
2878      * corresponding to this surrogate pair is returned. Otherwise,
2879      * the <code>char</code> value at the given index is returned.
2880      *
2881      * @param a the <code>char</code> array
2882      * @param index the index to the <code>char</code> values (Unicode
2883      * code units) in the <code>char</code> array to be converted
2884      * @return the Unicode code point at the given index
2885      * @exception NullPointerException if <code>a</code> is null.
2886      * @exception IndexOutOfBoundsException if the value
2887      * <code>index</code> is negative or not less than
2888      * the length of the <code>char</code> array.
2889      * @since  1.5
2890      */
2891     public static int codePointAt(char[] a, int index) {
2892         return codePointAtImpl(a, index, a.length);
2893     }
2894 
2895     /**
2896      * Returns the code point at the given index of the
2897      * <code>char</code> array, where only array elements with
2898      * <code>index</code> less than <code>limit</code> can be used. If
2899      * the <code>char</code> value at the given index in the
2900      * <code>char</code> array is in the high-surrogate range, the
2901      * following index is less than the <code>limit</code>, and the
2902      * <code>char</code> value at the following index is in the
2903      * low-surrogate range, then the supplementary code point
2904      * corresponding to this surrogate pair is returned. Otherwise,
2905      * the <code>char</code> value at the given index is returned.
2906      *
2907      * @param a the <code>char</code> array
2908      * @param index the index to the <code>char</code> values (Unicode
2909      * code units) in the <code>char</code> array to be converted
2910      * @param limit the index after the last array element that can be used in the
2911      * <code>char</code> array
2912      * @return the Unicode code point at the given index
2913      * @exception NullPointerException if <code>a</code> is null.
2914      * @exception IndexOutOfBoundsException if the <code>index</code>
2915      * argument is negative or not less than the <code>limit</code>
2916      * argument, or if the <code>limit</code> argument is negative or
2917      * greater than the length of the <code>char</code> array.
2918      * @since  1.5
2919      */
2920     public static int codePointAt(char[] a, int index, int limit) {
2921         if (index >= limit || limit < 0 || limit > a.length) {
2922             throw new IndexOutOfBoundsException();
2923         }
2924         return codePointAtImpl(a, index, limit);
2925     }
2926 
2927     static int codePointAtImpl(char[] a, int index, int limit) {
2928         char c1 = a[index++];
2929         if (isHighSurrogate(c1)) {
2930             if (index < limit) {
2931                 char c2 = a[index];
2932                 if (isLowSurrogate(c2)) {
2933                     return toCodePoint(c1, c2);
2934                 }
2935             }
2936         }
2937         return c1;
2938     }
2939 
2940     /**
2941      * Returns the code point preceding the given index of the
2942      * <code>CharSequence</code>. If the <code>char</code> value at
2943      * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2944      * the low-surrogate range, <code>(index - 2)</code> is not
2945      * negative, and the <code>char</code> value at <code>(index -
2946      * 2)</code> in the <code>CharSequence</code> is in the
2947      * high-surrogate range, then the supplementary code point
2948      * corresponding to this surrogate pair is returned. Otherwise,
2949      * the <code>char</code> value at <code>(index - 1)</code> is
2950      * returned.
2951      *
2952      * @param seq the <code>CharSequence</code> instance
2953      * @param index the index following the code point that should be returned
2954      * @return the Unicode code point value before the given index.
2955      * @exception NullPointerException if <code>seq</code> is null.
2956      * @exception IndexOutOfBoundsException if the <code>index</code>
2957      * argument is less than 1 or greater than {@link
2958      * CharSequence#length() seq.length()}.
2959      * @since  1.5
2960      */
2961     public static int codePointBefore(CharSequence seq, int index) {
2962         char c2 = seq.charAt(--index);
2963         if (isLowSurrogate(c2)) {
2964             if (index > 0) {
2965                 char c1 = seq.charAt(--index);
2966                 if (isHighSurrogate(c1)) {
2967                     return toCodePoint(c1, c2);
2968                 }
2969             }
2970         }
2971         return c2;
2972     }
2973 
2974     /**
2975      * Returns the code point preceding the given index of the
2976      * <code>char</code> array. If the <code>char</code> value at
2977      * <code>(index - 1)</code> in the <code>char</code> array is in
2978      * the low-surrogate range, <code>(index - 2)</code> is not
2979      * negative, and the <code>char</code> value at <code>(index -
2980      * 2)</code> in the <code>char</code> array is in the
2981      * high-surrogate range, then the supplementary code point
2982      * corresponding to this surrogate pair is returned. Otherwise,
2983      * the <code>char</code> value at <code>(index - 1)</code> is
2984      * returned.
2985      *
2986      * @param a the <code>char</code> array
2987      * @param index the index following the code point that should be returned
2988      * @return the Unicode code point value before the given index.
2989      * @exception NullPointerException if <code>a</code> is null.
2990      * @exception IndexOutOfBoundsException if the <code>index</code>
2991      * argument is less than 1 or greater than the length of the
2992      * <code>char</code> array
2993      * @since  1.5
2994      */
2995     public static int codePointBefore(char[] a, int index) {
2996         return codePointBeforeImpl(a, index, 0);
2997     }
2998 
2999     /**
3000      * Returns the code point preceding the given index of the
3001      * <code>char</code> array, where only array elements with
3002      * <code>index</code> greater than or equal to <code>start</code>
3003      * can be used. If the <code>char</code> value at <code>(index -
3004      * 1)</code> in the <code>char</code> array is in the
3005      * low-surrogate range, <code>(index - 2)</code> is not less than
3006      * <code>start</code>, and the <code>char</code> value at
3007      * <code>(index - 2)</code> in the <code>char</code> array is in
3008      * the high-surrogate range, then the supplementary code point
3009      * corresponding to this surrogate pair is returned. Otherwise,
3010      * the <code>char</code> value at <code>(index - 1)</code> is
3011      * returned.
3012      *
3013      * @param a the <code>char</code> array
3014      * @param index the index following the code point that should be returned
3015      * @param start the index of the first array element in the
3016      * <code>char</code> array
3017      * @return the Unicode code point value before the given index.
3018      * @exception NullPointerException if <code>a</code> is null.
3019      * @exception IndexOutOfBoundsException if the <code>index</code>
3020      * argument is not greater than the <code>start</code> argument or
3021      * is greater than the length of the <code>char</code> array, or
3022      * if the <code>start</code> argument is negative or not less than
3023      * the length of the <code>char</code> array.
3024      * @since  1.5
3025      */
3026     public static int codePointBefore(char[] a, int index, int start) {
3027         if (index <= start || start < 0 || start >= a.length) {
3028             throw new IndexOutOfBoundsException();
3029         }
3030         return codePointBeforeImpl(a, index, start);
3031     }
3032 
3033     static int codePointBeforeImpl(char[] a, int index, int start) {
3034         char c2 = a[--index];
3035         if (isLowSurrogate(c2)) {
3036             if (index > start) {
3037                 char c1 = a[--index];
3038                 if (isHighSurrogate(c1)) {
3039                     return toCodePoint(c1, c2);
3040                 }
3041             }
3042         }
3043         return c2;
3044     }
3045 
3046     /**
3047      * Converts the specified character (Unicode code point) to its
3048      * UTF-16 representation. If the specified code point is a BMP
3049      * (Basic Multilingual Plane or Plane 0) value, the same value is
3050      * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
3051      * specified code point is a supplementary character, its
3052      * surrogate values are stored in <code>dst[dstIndex]</code>
3053      * (high-surrogate) and <code>dst[dstIndex+1]</code>
3054      * (low-surrogate), and 2 is returned.
3055      *
3056      * @param  codePoint the character (Unicode code point) to be converted.
3057      * @param  dst an array of <code>char</code> in which the
3058      * <code>codePoint</code>'s UTF-16 value is stored.
3059      * @param dstIndex the start index into the <code>dst</code>
3060      * array where the converted value is stored.
3061      * @return 1 if the code point is a BMP code point, 2 if the
3062      * code point is a supplementary code point.
3063      * @exception IllegalArgumentException if the specified
3064      * <code>codePoint</code> is not a valid Unicode code point.
3065      * @exception NullPointerException if the specified <code>dst</code> is null.
3066      * @exception IndexOutOfBoundsException if <code>dstIndex</code>
3067      * is negative or not less than <code>dst.length</code>, or if
3068      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
3069      * array element(s) to store the resulting <code>char</code>
3070      * value(s). (If <code>dstIndex</code> is equal to
3071      * <code>dst.length-1</code> and the specified
3072      * <code>codePoint</code> is a supplementary character, the
3073      * high-surrogate value is not stored in
3074      * <code>dst[dstIndex]</code>.)
3075      * @since  1.5
3076      */
3077     public static int toChars(int codePoint, char[] dst, int dstIndex) {
3078         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3079             throw new IllegalArgumentException();
3080         }
3081         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3082             dst[dstIndex] = (char) codePoint;
3083             return 1;
3084         }
3085         toSurrogates(codePoint, dst, dstIndex);
3086         return 2;
3087     }
3088 
3089     /**
3090      * Converts the specified character (Unicode code point) to its
3091      * UTF-16 representation stored in a <code>char</code> array. If
3092      * the specified code point is a BMP (Basic Multilingual Plane or
3093      * Plane 0) value, the resulting <code>char</code> array has
3094      * the same value as <code>codePoint</code>. If the specified code
3095      * point is a supplementary code point, the resulting
3096      * <code>char</code> array has the corresponding surrogate pair.
3097      *
3098      * @param  codePoint a Unicode code point
3099      * @return a <code>char</code> array having
3100      *         <code>codePoint</code>'s UTF-16 representation.
3101      * @exception IllegalArgumentException if the specified
3102      * <code>codePoint</code> is not a valid Unicode code point.
3103      * @since  1.5
3104      */
3105     public static char[] toChars(int codePoint) {
3106         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3107             throw new IllegalArgumentException();
3108         }
3109         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3110                 return new char[] { (char) codePoint };
3111         }
3112         char[] result = new char[2];
3113         toSurrogates(codePoint, result, 0);
3114         return result;
3115     }
3116 
3117     static void toSurrogates(int codePoint, char[] dst, int index) {
3118         // We write elements "backwards" to guarantee all-or-nothing
3119         dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
3120         dst[index] = (char)((codePoint >>> 10)
3121             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
3122     }
3123 
3124     /**
3125      * Returns the number of Unicode code points in the text range of
3126      * the specified char sequence. The text range begins at the
3127      * specified <code>beginIndex</code> and extends to the
3128      * <code>char</code> at index <code>endIndex - 1</code>. Thus the
3129      * length (in <code>char</code>s) of the text range is
3130      * <code>endIndex-beginIndex</code>. Unpaired surrogates within
3131      * the text range count as one code point each.
3132      *
3133      * @param seq the char sequence
3134      * @param beginIndex the index to the first <code>char</code> of
3135      * the text range.
3136      * @param endIndex the index after the last <code>char</code> of
3137      * the text range.
3138      * @return the number of Unicode code points in the specified text
3139      * range
3140      * @exception NullPointerException if <code>seq</code> is null.
3141      * @exception IndexOutOfBoundsException if the
3142      * <code>beginIndex</code> is negative, or <code>endIndex</code>
3143      * is larger than the length of the given sequence, or
3144      * <code>beginIndex</code> is larger than <code>endIndex</code>.
3145      * @since  1.5
3146      */
3147     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
3148         int length = seq.length();
3149         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
3150             throw new IndexOutOfBoundsException();
3151         }
3152         int n = 0;
3153         for (int i = beginIndex; i < endIndex; ) {
3154             n++;
3155             if (isHighSurrogate(seq.charAt(i++))) {
3156                 if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
3157                     i++;
3158                 }
3159             }
3160         }
3161         return n;
3162     }
3163 
3164     /**
3165      * Returns the number of Unicode code points in a subarray of the
3166      * <code>char</code> array argument. The <code>offset</code>
3167      * argument is the index of the first <code>char</code> of the
3168      * subarray and the <code>count</code> argument specifies the
3169      * length of the subarray in <code>char</code>s. Unpaired
3170      * surrogates within the subarray count as one code point each.
3171      *
3172      * @param a the <code>char</code> array
3173      * @param offset the index of the first <code>char</code> in the
3174      * given <code>char</code> array
3175      * @param count the length of the subarray in <code>char</code>s
3176      * @return the number of Unicode code points in the specified subarray
3177      * @exception NullPointerException if <code>a</code> is null.
3178      * @exception IndexOutOfBoundsException if <code>offset</code> or
3179      * <code>count</code> is negative, or if <code>offset +
3180      * count</code> is larger than the length of the given array.
3181      * @since  1.5
3182      */
3183     public static int codePointCount(char[] a, int offset, int count) {
3184         if (count > a.length - offset || offset < 0 || count < 0) {
3185             throw new IndexOutOfBoundsException();
3186         }
3187         return codePointCountImpl(a, offset, count);
3188     }
3189 
3190     static int codePointCountImpl(char[] a, int offset, int count) {
3191         int endIndex = offset + count;
3192         int n = 0;
3193         for (int i = offset; i < endIndex; ) {
3194             n++;
3195             if (isHighSurrogate(a[i++])) {
3196                 if (i < endIndex && isLowSurrogate(a[i])) {
3197                     i++;
3198                 }
3199             }
3200         }
3201         return n;
3202     }
3203 
3204     /**
3205      * Returns the index within the given char sequence that is offset
3206      * from the given <code>index</code> by <code>codePointOffset</code>
3207      * code points. Unpaired surrogates within the text range given by
3208      * <code>index</code> and <code>codePointOffset</code> count as
3209      * one code point each.
3210      *
3211      * @param seq the char sequence
3212      * @param index the index to be offset
3213      * @param codePointOffset the offset in code points
3214      * @return the index within the char sequence
3215      * @exception NullPointerException if <code>seq</code> is null.
3216      * @exception IndexOutOfBoundsException if <code>index</code>
3217      *   is negative or larger then the length of the char sequence,
3218      *   or if <code>codePointOffset</code> is positive and the
3219      *   subsequence starting with <code>index</code> has fewer than
3220      *   <code>codePointOffset</code> code points, or if
3221      *   <code>codePointOffset</code> is negative and the subsequence
3222      *   before <code>index</code> has fewer than the absolute value
3223      *   of <code>codePointOffset</code> code points.
3224      * @since 1.5
3225      */
3226     public static int offsetByCodePoints(CharSequence seq, int index,
3227                                          int codePointOffset) {
3228         int length = seq.length();
3229         if (index < 0 || index > length) {
3230             throw new IndexOutOfBoundsException();
3231         }
3232 
3233         int x = index;
3234         if (codePointOffset >= 0) {
3235             int i;
3236             for (i = 0; x < length && i < codePointOffset; i++) {
3237                 if (isHighSurrogate(seq.charAt(x++))) {
3238                     if (x < length && isLowSurrogate(seq.charAt(x))) {
3239                         x++;
3240                     }
3241                 }
3242             }
3243             if (i < codePointOffset) {
3244                 throw new IndexOutOfBoundsException();
3245             }
3246         } else {
3247             int i;
3248             for (i = codePointOffset; x > 0 && i < 0; i++) {
3249                 if (isLowSurrogate(seq.charAt(--x))) {
3250                     if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
3251                         x--;
3252                     }
3253                 }
3254             }
3255             if (i < 0) {
3256                 throw new IndexOutOfBoundsException();
3257             }
3258         }
3259         return x;
3260     }
3261 
3262     /**
3263      * Returns the index within the given <code>char</code> subarray
3264      * that is offset from the given <code>index</code> by
3265      * <code>codePointOffset</code> code points. The
3266      * <code>start</code> and <code>count</code> arguments specify a
3267      * subarray of the <code>char</code> array. Unpaired surrogates
3268      * within the text range given by <code>index</code> and
3269      * <code>codePointOffset</code> count as one code point each.
3270      *
3271      * @param a the <code>char</code> array
3272      * @param start the index of the first <code>char</code> of the
3273      * subarray
3274      * @param count the length of the subarray in <code>char</code>s
3275      * @param index the index to be offset
3276      * @param codePointOffset the offset in code points
3277      * @return the index within the subarray
3278      * @exception NullPointerException if <code>a</code> is null.
3279      * @exception IndexOutOfBoundsException
3280      *   if <code>start</code> or <code>count</code> is negative,
3281      *   or if <code>start + count</code> is larger than the length of
3282      *   the given array,
3283      *   or if <code>index</code> is less than <code>start</code> or
3284      *   larger then <code>start + count</code>,
3285      *   or if <code>codePointOffset</code> is positive and the text range
3286      *   starting with <code>index</code> and ending with <code>start
3287      *   + count - 1</code> has fewer than <code>codePointOffset</code> code
3288      *   points,
3289      *   or if <code>codePointOffset</code> is negative and the text range
3290      *   starting with <code>start</code> and ending with <code>index
3291      *   - 1</code> has fewer than the absolute value of
3292      *   <code>codePointOffset</code> code points.
3293      * @since 1.5
3294      */
3295     public static int offsetByCodePoints(char[] a, int start, int count,
3296                                          int index, int codePointOffset) {
3297         if (count > a.length-start || start < 0 || count < 0
3298             || index < start || index > start+count) {
3299             throw new IndexOutOfBoundsException();
3300         }
3301         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
3302     }
3303 
3304     static int offsetByCodePointsImpl(char[]a, int start, int count,
3305                                       int index, int codePointOffset) {
3306         int x = index;
3307         if (codePointOffset >= 0) {
3308             int limit = start + count;
3309             int i;
3310             for (i = 0; x < limit && i < codePointOffset; i++) {
3311                 if (isHighSurrogate(a[x++])) {
3312                     if (x < limit && isLowSurrogate(a[x])) {
3313                         x++;
3314                     }
3315                 }
3316             }
3317             if (i < codePointOffset) {
3318                 throw new IndexOutOfBoundsException();
3319             }
3320         } else {
3321             int i;
3322             for (i = codePointOffset; x > start && i < 0; i++) {
3323                 if (isLowSurrogate(a[--x])) {
3324                     if (x > start && isHighSurrogate(a[x-1])) {
3325                         x--;
3326                     }
3327                 }
3328             }
3329             if (i < 0) {
3330                 throw new IndexOutOfBoundsException();
3331             }
3332         }
3333         return x;
3334     }
3335 
3336    /**
3337      * Determines if the specified character is a lowercase character.
3338      * <p>
3339      * A character is lowercase if its general category type, provided
3340      * by <code>Character.getType(ch)</code>, is
3341      * <code>LOWERCASE_LETTER</code>.
3342      * <p>
3343      * The following are examples of lowercase characters:
3344      * <p><blockquote><pre>
3345      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3346      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3347      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3348      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3349      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3350      * </pre></blockquote>
3351      * <p> Many other Unicode characters are lowercase too.
3352      *
3353      * <p><b>Note:</b> This method cannot handle <a
3354      * href="#supplementary"> supplementary characters</a>. To support
3355      * all Unicode characters, including supplementary characters, use
3356      * the {@link #isLowerCase(int)} method.
3357      *
3358      * @param   ch   the character to be tested.
3359      * @return  <code>true</code> if the character is lowercase;
3360      *          <code>false</code> otherwise.
3361      * @see     java.lang.Character#isLowerCase(char)
3362      * @see     java.lang.Character#isTitleCase(char)
3363      * @see     java.lang.Character#toLowerCase(char)
3364      * @see     java.lang.Character#getType(char)
3365      */
3366     public static boolean isLowerCase(char ch) {
3367         return isLowerCase((int)ch);
3368     }
3369 
3370     /**
3371      * Determines if the specified character (Unicode code point) is a
3372      * lowercase character.
3373      * <p>
3374      * A character is lowercase if its general category type, provided
3375      * by {@link Character#getType getType(codePoint)}, is
3376      * <code>LOWERCASE_LETTER</code>.
3377      * <p>
3378      * The following are examples of lowercase characters:
3379      * <p><blockquote><pre>
3380      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3381      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3382      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3383      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3384      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3385      * </pre></blockquote>
3386      * <p> Many other Unicode characters are lowercase too.
3387      *
3388      * @param   codePoint the character (Unicode code point) to be tested.
3389      * @return  <code>true</code> if the character is lowercase;
3390      *          <code>false</code> otherwise.
3391      * @see     java.lang.Character#isLowerCase(int)
3392      * @see     java.lang.Character#isTitleCase(int)
3393      * @see     java.lang.Character#toLowerCase(int)
3394      * @see     java.lang.Character#getType(int)
3395      * @since   1.5
3396      */
3397     public static boolean isLowerCase(int codePoint) {
3398         return getType(codePoint) == Character.LOWERCASE_LETTER;
3399     }
3400 
3401    /**
3402      * Determines if the specified character is an uppercase character.
3403      * <p>
3404      * A character is uppercase if its general category type, provided by
3405      * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
3406      * <p>
3407      * The following are examples of uppercase characters:
3408      * <p><blockquote><pre>
3409      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3410      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3411      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3412      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3413      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3414      * </pre></blockquote>
3415      * <p> Many other Unicode characters are uppercase too.<p>
3416      *
3417      * <p><b>Note:</b> This method cannot handle <a
3418      * href="#supplementary"> supplementary characters</a>. To support
3419      * all Unicode characters, including supplementary characters, use
3420      * the {@link #isUpperCase(int)} method.
3421      *
3422      * @param   ch   the character to be tested.
3423      * @return  <code>true</code> if the character is uppercase;
3424      *          <code>false</code> otherwise.
3425      * @see     java.lang.Character#isLowerCase(char)
3426      * @see     java.lang.Character#isTitleCase(char)
3427      * @see     java.lang.Character#toUpperCase(char)
3428      * @see     java.lang.Character#getType(char)
3429      * @since   1.0
3430      */
3431     public static boolean isUpperCase(char ch) {
3432         return isUpperCase((int)ch);
3433     }
3434 
3435     /**
3436      * Determines if the specified character (Unicode code point) is an uppercase character.
3437      * <p>
3438      * A character is uppercase if its general category type, provided by
3439      * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
3440      * <p>
3441      * The following are examples of uppercase characters:
3442      * <p><blockquote><pre>
3443      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3444      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3445      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3446      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3447      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3448      * </pre></blockquote>
3449      * <p> Many other Unicode characters are uppercase too.<p>
3450      *
3451      * @param   codePoint the character (Unicode code point) to be tested.
3452      * @return  <code>true</code> if the character is uppercase;
3453      *          <code>false</code> otherwise.
3454      * @see     java.lang.Character#isLowerCase(int)
3455      * @see     java.lang.Character#isTitleCase(int)
3456      * @see     java.lang.Character#toUpperCase(int)
3457      * @see     java.lang.Character#getType(int)
3458      * @since   1.5
3459      */
3460     public static boolean isUpperCase(int codePoint) {
3461         return getType(codePoint) == Character.UPPERCASE_LETTER;
3462     }
3463 
3464     /**
3465      * Determines if the specified character is a titlecase character.
3466      * <p>
3467      * A character is a titlecase character if its general
3468      * category type, provided by <code>Character.getType(ch)</code>,
3469      * is <code>TITLECASE_LETTER</code>.
3470      * <p>
3471      * Some characters look like pairs of Latin letters. For example, there
3472      * is an uppercase letter that looks like "LJ" and has a corresponding
3473      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3474      * is the appropriate form to use when rendering a word in lowercase
3475      * with initial capitals, as for a book title.
3476      * <p>
3477      * These are some of the Unicode characters for which this method returns
3478      * <code>true</code>:
3479      * <ul>
3480      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3481      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3482      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3483      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3484      * </ul>
3485      * <p> Many other Unicode characters are titlecase too.<p>
3486      *
3487      * <p><b>Note:</b> This method cannot handle <a
3488      * href="#supplementary"> supplementary characters</a>. To support
3489      * all Unicode characters, including supplementary characters, use
3490      * the {@link #isTitleCase(int)} method.
3491      *
3492      * @param   ch   the character to be tested.
3493      * @return  <code>true</code> if the character is titlecase;
3494      *          <code>false</code> otherwise.
3495      * @see     java.lang.Character#isLowerCase(char)
3496      * @see     java.lang.Character#isUpperCase(char)
3497      * @see     java.lang.Character#toTitleCase(char)
3498      * @see     java.lang.Character#getType(char)
3499      * @since   1.0.2
3500      */
3501     public static boolean isTitleCase(char ch) {
3502         return isTitleCase((int)ch);
3503     }
3504 
3505     /**
3506      * Determines if the specified character (Unicode code point) is a titlecase character.
3507      * <p>
3508      * A character is a titlecase character if its general
3509      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3510      * is <code>TITLECASE_LETTER</code>.
3511      * <p>
3512      * Some characters look like pairs of Latin letters. For example, there
3513      * is an uppercase letter that looks like "LJ" and has a corresponding
3514      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3515      * is the appropriate form to use when rendering a word in lowercase
3516      * with initial capitals, as for a book title.
3517      * <p>
3518      * These are some of the Unicode characters for which this method returns
3519      * <code>true</code>:
3520      * <ul>
3521      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3522      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3523      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3524      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3525      * </ul>
3526      * <p> Many other Unicode characters are titlecase too.<p>
3527      *
3528      * @param   codePoint the character (Unicode code point) to be tested.
3529      * @return  <code>true</code> if the character is titlecase;
3530      *          <code>false</code> otherwise.
3531      * @see     java.lang.Character#isLowerCase(int)
3532      * @see     java.lang.Character#isUpperCase(int)
3533      * @see     java.lang.Character#toTitleCase(int)
3534      * @see     java.lang.Character#getType(int)
3535      * @since   1.5
3536      */
3537     public static boolean isTitleCase(int codePoint) {
3538         return getType(codePoint) == Character.TITLECASE_LETTER;
3539     }
3540 
3541     /**
3542      * Determines if the specified character is a digit.
3543      * <p>
3544      * A character is a digit if its general category type, provided
3545      * by <code>Character.getType(ch)</code>, is
3546      * <code>DECIMAL_DIGIT_NUMBER</code>.
3547      * <p>
3548      * Some Unicode character ranges that contain digits:
3549      * <ul>
3550      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3551      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3552      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3553      *     Arabic-Indic digits
3554      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3555      *     Extended Arabic-Indic digits
3556      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3557      *     Devanagari digits
3558      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3559      *     Fullwidth digits
3560      * </ul>
3561      *
3562      * Many other character ranges contain digits as well.
3563      *
3564      * <p><b>Note:</b> This method cannot handle <a
3565      * href="#supplementary"> supplementary characters</a>. To support
3566      * all Unicode characters, including supplementary characters, use
3567      * the {@link #isDigit(int)} method.
3568      *
3569      * @param   ch   the character to be tested.
3570      * @return  <code>true</code> if the character is a digit;
3571      *          <code>false</code> otherwise.
3572      * @see     java.lang.Character#digit(char, int)
3573      * @see     java.lang.Character#forDigit(int, int)
3574      * @see     java.lang.Character#getType(char)
3575      */
3576     public static boolean isDigit(char ch) {
3577         return isDigit((int)ch);
3578     }
3579 
3580     /**
3581      * Determines if the specified character (Unicode code point) is a digit.
3582      * <p>
3583      * A character is a digit if its general category type, provided
3584      * by {@link Character#getType(int) getType(codePoint)}, is
3585      * <code>DECIMAL_DIGIT_NUMBER</code>.
3586      * <p>
3587      * Some Unicode character ranges that contain digits:
3588      * <ul>
3589      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3590      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3591      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3592      *     Arabic-Indic digits
3593      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3594      *     Extended Arabic-Indic digits
3595      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3596      *     Devanagari digits
3597      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3598      *     Fullwidth digits
3599      * </ul>
3600      *
3601      * Many other character ranges contain digits as well.
3602      *
3603      * @param   codePoint the character (Unicode code point) to be tested.
3604      * @return  <code>true</code> if the character is a digit;
3605      *          <code>false</code> otherwise.
3606      * @see     java.lang.Character#forDigit(int, int)
3607      * @see     java.lang.Character#getType(int)
3608      * @since   1.5
3609      */
3610     public static boolean isDigit(int codePoint) {
3611         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3612     }
3613 
3614     /**
3615      * Determines if a character is defined in Unicode.
3616      * <p>
3617      * A character is defined if at least one of the following is true:
3618      * <ul>
3619      * <li>It has an entry in the UnicodeData file.
3620      * <li>It has a value in a range defined by the UnicodeData file.
3621      * </ul>
3622      *
3623      * <p><b>Note:</b> This method cannot handle <a
3624      * href="#supplementary"> supplementary characters</a>. To support
3625      * all Unicode characters, including supplementary characters, use
3626      * the {@link #isDefined(int)} method.
3627      *
3628      * @param   ch   the character to be tested
3629      * @return  <code>true</code> if the character has a defined meaning
3630      *          in Unicode; <code>false</code> otherwise.
3631      * @see     java.lang.Character#isDigit(char)
3632      * @see     java.lang.Character#isLetter(char)
3633      * @see     java.lang.Character#isLetterOrDigit(char)
3634      * @see     java.lang.Character#isLowerCase(char)
3635      * @see     java.lang.Character#isTitleCase(char)
3636      * @see     java.lang.Character#isUpperCase(char)
3637      * @since   1.0.2
3638      */
3639     public static boolean isDefined(char ch) {
3640         return isDefined((int)ch);
3641     }
3642 
3643     /**
3644      * Determines if a character (Unicode code point) is defined in Unicode.
3645      * <p>
3646      * A character is defined if at least one of the following is true:
3647      * <ul>
3648      * <li>It has an entry in the UnicodeData file.
3649      * <li>It has a value in a range defined by the UnicodeData file.
3650      * </ul>
3651      *
3652      * @param   codePoint the character (Unicode code point) to be tested.
3653      * @return  <code>true</code> if the character has a defined meaning
3654      *          in Unicode; <code>false</code> otherwise.
3655      * @see     java.lang.Character#isDigit(int)
3656      * @see     java.lang.Character#isLetter(int)
3657      * @see     java.lang.Character#isLetterOrDigit(int)
3658      * @see     java.lang.Character#isLowerCase(int)
3659      * @see     java.lang.Character#isTitleCase(int)
3660      * @see     java.lang.Character#isUpperCase(int)
3661      * @since   1.5
3662      */
3663     public static boolean isDefined(int codePoint) {
3664         return getType(codePoint) != Character.UNASSIGNED;
3665     }
3666 
3667     /**
3668      * Determines if the specified character is a letter.
3669      * <p>
3670      * A character is considered to be a letter if its general
3671      * category type, provided by <code>Character.getType(ch)</code>,
3672      * is any of the following:
3673      * <ul>
3674      * <li> <code>UPPERCASE_LETTER</code>
3675      * <li> <code>LOWERCASE_LETTER</code>
3676      * <li> <code>TITLECASE_LETTER</code>
3677      * <li> <code>MODIFIER_LETTER</code>
3678      * <li> <code>OTHER_LETTER</code>
3679      * </ul>
3680      *
3681      * Not all letters have case. Many characters are
3682      * letters but are neither uppercase nor lowercase nor titlecase.
3683      *
3684      * <p><b>Note:</b> This method cannot handle <a
3685      * href="#supplementary"> supplementary characters</a>. To support
3686      * all Unicode characters, including supplementary characters, use
3687      * the {@link #isLetter(int)} method.
3688      *
3689      * @param   ch   the character to be tested.
3690      * @return  <code>true</code> if the character is a letter;
3691      *          <code>false</code> otherwise.
3692      * @see     java.lang.Character#isDigit(char)
3693      * @see     java.lang.Character#isJavaIdentifierStart(char)
3694      * @see     java.lang.Character#isJavaLetter(char)
3695      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3696      * @see     java.lang.Character#isLetterOrDigit(char)
3697      * @see     java.lang.Character#isLowerCase(char)
3698      * @see     java.lang.Character#isTitleCase(char)
3699      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3700      * @see     java.lang.Character#isUpperCase(char)
3701      */
3702     public static boolean isLetter(char ch) {
3703         return isLetter((int)ch);
3704     }
3705 
3706     /**
3707      * Determines if the specified character (Unicode code point) is a letter.
3708      * <p>
3709      * A character is considered to be a letter if its general
3710      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3711      * is any of the following:
3712      * <ul>
3713      * <li> <code>UPPERCASE_LETTER</code>
3714      * <li> <code>LOWERCASE_LETTER</code>
3715      * <li> <code>TITLECASE_LETTER</code>
3716      * <li> <code>MODIFIER_LETTER</code>
3717      * <li> <code>OTHER_LETTER</code>
3718      * </ul>
3719      *
3720      * Not all letters have case. Many characters are
3721      * letters but are neither uppercase nor lowercase nor titlecase.
3722      *
3723      * @param   codePoint the character (Unicode code point) to be tested.
3724      * @return  <code>true</code> if the character is a letter;
3725      *          <code>false</code> otherwise.
3726      * @see     java.lang.Character#isDigit(int)
3727      * @see     java.lang.Character#isJavaIdentifierStart(int)
3728      * @see     java.lang.Character#isLetterOrDigit(int)
3729      * @see     java.lang.Character#isLowerCase(int)
3730      * @see     java.lang.Character#isTitleCase(int)
3731      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3732      * @see     java.lang.Character#isUpperCase(int)
3733      * @since   1.5
3734      */
3735     public static boolean isLetter(int codePoint) {
3736         return ((((1 << Character.UPPERCASE_LETTER) |
3737             (1 << Character.LOWERCASE_LETTER) |
3738             (1 << Character.TITLECASE_LETTER) |
3739             (1 << Character.MODIFIER_LETTER) |
3740             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
3741             != 0;
3742     }
3743 
3744     /**
3745      * Determines if the specified character is a letter or digit.
3746      * <p>
3747      * A character is considered to be a letter or digit if either
3748      * <code>Character.isLetter(char ch)</code> or
3749      * <code>Character.isDigit(char ch)</code> returns
3750      * <code>true</code> for the character.
3751      *
3752      * <p><b>Note:</b> This method cannot handle <a
3753      * href="#supplementary"> supplementary characters</a>. To support
3754      * all Unicode characters, including supplementary characters, use
3755      * the {@link #isLetterOrDigit(int)} method.
3756      *
3757      * @param   ch   the character to be tested.
3758      * @return  <code>true</code> if the character is a letter or digit;
3759      *          <code>false</code> otherwise.
3760      * @see     java.lang.Character#isDigit(char)
3761      * @see     java.lang.Character#isJavaIdentifierPart(char)
3762      * @see     java.lang.Character#isJavaLetter(char)
3763      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3764      * @see     java.lang.Character#isLetter(char)
3765      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3766      * @since   1.0.2
3767      */
3768     public static boolean isLetterOrDigit(char ch) {
3769         return isLetterOrDigit((int)ch);
3770     }
3771 
3772     /**
3773      * Determines if the specified character (Unicode code point) is a letter or digit.
3774      * <p>
3775      * A character is considered to be a letter or digit if either
3776      * {@link #isLetter(int) isLetter(codePoint)} or
3777      * {@link #isDigit(int) isDigit(codePoint)} returns
3778      * <code>true</code> for the character.
3779      *
3780      * @param   codePoint the character (Unicode code point) to be tested.
3781      * @return  <code>true</code> if the character is a letter or digit;
3782      *          <code>false</code> otherwise.
3783      * @see     java.lang.Character#isDigit(int)
3784      * @see     java.lang.Character#isJavaIdentifierPart(int)
3785      * @see     java.lang.Character#isLetter(int)
3786      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3787      * @since   1.5
3788      */
3789     public static boolean isLetterOrDigit(int codePoint) {
3790         return ((((1 << Character.UPPERCASE_LETTER) |
3791             (1 << Character.LOWERCASE_LETTER) |
3792             (1 << Character.TITLECASE_LETTER) |
3793             (1 << Character.MODIFIER_LETTER) |
3794             (1 << Character.OTHER_LETTER) |
3795             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
3796             != 0;
3797     }
3798 
3799     /**
3800      * Determines if the specified character is permissible as the first
3801      * character in a Java identifier.
3802      * <p>
3803      * A character may start a Java identifier if and only if
3804      * one of the following is true:
3805      * <ul>
3806      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3807      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3808      * <li> ch is a currency symbol (such as "$")
3809      * <li> ch is a connecting punctuation character (such as "_").
3810      * </ul>
3811      *
3812      * @param   ch the character to be tested.
3813      * @return  <code>true</code> if the character may start a Java
3814      *          identifier; <code>false</code> otherwise.
3815      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3816      * @see     java.lang.Character#isJavaIdentifierStart(char)
3817      * @see     java.lang.Character#isJavaIdentifierPart(char)
3818      * @see     java.lang.Character#isLetter(char)
3819      * @see     java.lang.Character#isLetterOrDigit(char)
3820      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3821      * @since   1.02
3822      * @deprecated Replaced by isJavaIdentifierStart(char).
3823      */
3824     @Deprecated
3825     public static boolean isJavaLetter(char ch) {
3826         return isJavaIdentifierStart(ch);
3827     }
3828 
3829     /**
3830      * Determines if the specified character may be part of a Java
3831      * identifier as other than the first character.
3832      * <p>
3833      * A character may be part of a Java identifier if and only if any
3834      * of the following are true:
3835      * <ul>
3836      * <li>  it is a letter
3837      * <li>  it is a currency symbol (such as <code>'$'</code>)
3838      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3839      * <li>  it is a digit
3840      * <li>  it is a numeric letter (such as a Roman numeral character)
3841      * <li>  it is a combining mark
3842      * <li>  it is a non-spacing mark
3843      * <li> <code>isIdentifierIgnorable</code> returns
3844      * <code>true</code> for the character.
3845      * </ul>
3846      *
3847      * @param   ch the character to be tested.
3848      * @return  <code>true</code> if the character may be part of a
3849      *          Java identifier; <code>false</code> otherwise.
3850      * @see     java.lang.Character#isJavaLetter(char)
3851      * @see     java.lang.Character#isJavaIdentifierStart(char)
3852      * @see     java.lang.Character#isJavaIdentifierPart(char)
3853      * @see     java.lang.Character#isLetter(char)
3854      * @see     java.lang.Character#isLetterOrDigit(char)
3855      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3856      * @see     java.lang.Character#isIdentifierIgnorable(char)
3857      * @since   1.02
3858      * @deprecated Replaced by isJavaIdentifierPart(char).
3859      */
3860     @Deprecated
3861     public static boolean isJavaLetterOrDigit(char ch) {
3862         return isJavaIdentifierPart(ch);
3863     }
3864 
3865     /**
3866      * Determines if the specified character is
3867      * permissible as the first character in a Java identifier.
3868      * <p>
3869      * A character may start a Java identifier if and only if
3870      * one of the following conditions is true:
3871      * <ul>
3872      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3873      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3874      * <li> ch is a currency symbol (such as "$")
3875      * <li> ch is a connecting punctuation character (such as "_").
3876      * </ul>
3877      *
3878      * <p><b>Note:</b> This method cannot handle <a
3879      * href="#supplementary"> supplementary characters</a>. To support
3880      * all Unicode characters, including supplementary characters, use
3881      * the {@link #isJavaIdentifierStart(int)} method.
3882      *
3883      * @param   ch the character to be tested.
3884      * @return  <code>true</code> if the character may start a Java identifier;
3885      *          <code>false</code> otherwise.
3886      * @see     java.lang.Character#isJavaIdentifierPart(char)
3887      * @see     java.lang.Character#isLetter(char)
3888      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3889      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3890      * @since   1.1
3891      */
3892     public static boolean isJavaIdentifierStart(char ch) {
3893         return isJavaIdentifierStart((int)ch);
3894     }
3895 
3896     /**
3897      * Determines if the character (Unicode code point) is
3898      * permissible as the first character in a Java identifier.
3899      * <p>
3900      * A character may start a Java identifier if and only if
3901      * one of the following conditions is true:
3902      * <ul>
3903      * <li> {@link #isLetter(int) isLetter(codePoint)}
3904      *      returns <code>true</code>
3905      * <li> {@link #getType(int) getType(codePoint)}
3906      *      returns <code>LETTER_NUMBER</code>
3907      * <li> the referenced character is a currency symbol (such as "$")
3908      * <li> the referenced character is a connecting punctuation character
3909      *      (such as "_").
3910      * </ul>
3911      *
3912      * @param   codePoint the character (Unicode code point) to be tested.
3913      * @return  <code>true</code> if the character may start a Java identifier;
3914      *          <code>false</code> otherwise.
3915      * @see     java.lang.Character#isJavaIdentifierPart(int)
3916      * @see     java.lang.Character#isLetter(int)
3917      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3918      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3919      * @since   1.5
3920      */
3921     public static boolean isJavaIdentifierStart(int codePoint) {
3922         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
3923     }
3924 
3925     /**
3926      * Determines if the specified character may be part of a Java
3927      * identifier as other than the first character.
3928      * <p>
3929      * A character may be part of a Java identifier if any of the following
3930      * are true:
3931      * <ul>
3932      * <li>  it is a letter
3933      * <li>  it is a currency symbol (such as <code>'$'</code>)
3934      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3935      * <li>  it is a digit
3936      * <li>  it is a numeric letter (such as a Roman numeral character)
3937      * <li>  it is a combining mark
3938      * <li>  it is a non-spacing mark
3939      * <li> <code>isIdentifierIgnorable</code> returns
3940      * <code>true</code> for the character
3941      * </ul>
3942      *
3943      * <p><b>Note:</b> This method cannot handle <a
3944      * href="#supplementary"> supplementary characters</a>. To support
3945      * all Unicode characters, including supplementary characters, use
3946      * the {@link #isJavaIdentifierPart(int)} method.
3947      *
3948      * @param   ch      the character to be tested.
3949      * @return <code>true</code> if the character may be part of a
3950      *          Java identifier; <code>false</code> otherwise.
3951      * @see     java.lang.Character#isIdentifierIgnorable(char)
3952      * @see     java.lang.Character#isJavaIdentifierStart(char)
3953      * @see     java.lang.Character#isLetterOrDigit(char)
3954      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3955      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3956      * @since   1.1
3957      */
3958     public static boolean isJavaIdentifierPart(char ch) {
3959         return isJavaIdentifierPart((int)ch);
3960     }
3961 
3962     /**
3963      * Determines if the character (Unicode code point) may be part of a Java
3964      * identifier as other than the first character.
3965      * <p>
3966      * A character may be part of a Java identifier if any of the following
3967      * are true:
3968      * <ul>
3969      * <li>  it is a letter
3970      * <li>  it is a currency symbol (such as <code>'$'</code>)
3971      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3972      * <li>  it is a digit
3973      * <li>  it is a numeric letter (such as a Roman numeral character)
3974      * <li>  it is a combining mark
3975      * <li>  it is a non-spacing mark
3976      * <li> {@link #isIdentifierIgnorable(int)
3977      * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3978      * the character
3979      * </ul>
3980      *
3981      * @param   codePoint the character (Unicode code point) to be tested.
3982      * @return <code>true</code> if the character may be part of a
3983      *          Java identifier; <code>false</code> otherwise.
3984      * @see     java.lang.Character#isIdentifierIgnorable(int)
3985      * @see     java.lang.Character#isJavaIdentifierStart(int)
3986      * @see     java.lang.Character#isLetterOrDigit(int)
3987      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3988      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3989      * @since   1.5
3990      */
3991     public static boolean isJavaIdentifierPart(int codePoint) {
3992         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
3993     }
3994 
3995     /**
3996      * Determines if the specified character is permissible as the
3997      * first character in a Unicode identifier.
3998      * <p>
3999      * A character may start a Unicode identifier if and only if
4000      * one of the following conditions is true:
4001      * <ul>
4002      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
4003      * <li> {@link #getType(char) getType(ch)} returns
4004      *      <code>LETTER_NUMBER</code>.
4005      * </ul>
4006      *
4007      * <p><b>Note:</b> This method cannot handle <a
4008      * href="#supplementary"> supplementary characters</a>. To support
4009      * all Unicode characters, including supplementary characters, use
4010      * the {@link #isUnicodeIdentifierStart(int)} method.
4011      *
4012      * @param   ch      the character to be tested.
4013      * @return  <code>true</code> if the character may start a Unicode
4014      *          identifier; <code>false</code> otherwise.
4015      * @see     java.lang.Character#isJavaIdentifierStart(char)
4016      * @see     java.lang.Character#isLetter(char)
4017      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
4018      * @since   1.1
4019      */
4020     public static boolean isUnicodeIdentifierStart(char ch) {
4021         return isUnicodeIdentifierStart((int)ch);
4022     }
4023 
4024     /**
4025      * Determines if the specified character (Unicode code point) is permissible as the
4026      * first character in a Unicode identifier.
4027      * <p>
4028      * A character may start a Unicode identifier if and only if
4029      * one of the following conditions is true:
4030      * <ul>
4031      * <li> {@link #isLetter(int) isLetter(codePoint)}
4032      *      returns <code>true</code>
4033      * <li> {@link #getType(int) getType(codePoint)}
4034      *      returns <code>LETTER_NUMBER</code>.
4035      * </ul>
4036      * @param   codePoint the character (Unicode code point) to be tested.
4037      * @return  <code>true</code> if the character may start a Unicode
4038      *          identifier; <code>false</code> otherwise.
4039      * @see     java.lang.Character#isJavaIdentifierStart(int)
4040      * @see     java.lang.Character#isLetter(int)
4041      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
4042      * @since   1.5
4043      */
4044     public static boolean isUnicodeIdentifierStart(int codePoint) {
4045         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
4046     }
4047 
4048     /**
4049      * Determines if the specified character may be part of a Unicode
4050      * identifier as other than the first character.
4051      * <p>
4052      * A character may be part of a Unicode identifier if and only if
4053      * one of the following statements is true:
4054      * <ul>
4055      * <li>  it is a letter
4056      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4057      * <li>  it is a digit
4058      * <li>  it is a numeric letter (such as a Roman numeral character)
4059      * <li>  it is a combining mark
4060      * <li>  it is a non-spacing mark
4061      * <li> <code>isIdentifierIgnorable</code> returns
4062      * <code>true</code> for this character.
4063      * </ul>
4064      *
4065      * <p><b>Note:</b> This method cannot handle <a
4066      * href="#supplementary"> supplementary characters</a>. To support
4067      * all Unicode characters, including supplementary characters, use
4068      * the {@link #isUnicodeIdentifierPart(int)} method.
4069      *
4070      * @param   ch      the character to be tested.
4071      * @return  <code>true</code> if the character may be part of a
4072      *          Unicode identifier; <code>false</code> otherwise.
4073      * @see     java.lang.Character#isIdentifierIgnorable(char)
4074      * @see     java.lang.Character#isJavaIdentifierPart(char)
4075      * @see     java.lang.Character#isLetterOrDigit(char)
4076      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
4077      * @since   1.1
4078      */
4079     public static boolean isUnicodeIdentifierPart(char ch) {
4080         return isUnicodeIdentifierPart((int)ch);
4081     }
4082 
4083     /**
4084      * Determines if the specified character (Unicode code point) may be part of a Unicode
4085      * identifier as other than the first character.
4086      * <p>
4087      * A character may be part of a Unicode identifier if and only if
4088      * one of the following statements is true:
4089      * <ul>
4090      * <li>  it is a letter
4091      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4092      * <li>  it is a digit
4093      * <li>  it is a numeric letter (such as a Roman numeral character)
4094      * <li>  it is a combining mark
4095      * <li>  it is a non-spacing mark
4096      * <li> <code>isIdentifierIgnorable</code> returns
4097      * <code>true</code> for this character.
4098      * </ul>
4099      * @param   codePoint the character (Unicode code point) to be tested.
4100      * @return  <code>true</code> if the character may be part of a
4101      *          Unicode identifier; <code>false</code> otherwise.
4102      * @see     java.lang.Character#isIdentifierIgnorable(int)
4103      * @see     java.lang.Character#isJavaIdentifierPart(int)
4104      * @see     java.lang.Character#isLetterOrDigit(int)
4105      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
4106      * @since   1.5
4107      */
4108     public static boolean isUnicodeIdentifierPart(int codePoint) {
4109         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
4110     }
4111 
4112     /**
4113      * Determines if the specified character should be regarded as
4114      * an ignorable character in a Java identifier or a Unicode identifier.
4115      * <p>
4116      * The following Unicode characters are ignorable in a Java identifier
4117      * or a Unicode identifier:
4118      * <ul>
4119      * <li>ISO control characters that are not whitespace
4120      * <ul>
4121      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4122      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4123      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4124      * </ul>
4125      *
4126      * <li>all characters that have the <code>FORMAT</code> general
4127      * category value
4128      * </ul>
4129      *
4130      * <p><b>Note:</b> This method cannot handle <a
4131      * href="#supplementary"> supplementary characters</a>. To support
4132      * all Unicode characters, including supplementary characters, use
4133      * the {@link #isIdentifierIgnorable(int)} method.
4134      *
4135      * @param   ch      the character to be tested.
4136      * @return  <code>true</code> if the character is an ignorable control
4137      *          character that may be part of a Java or Unicode identifier;
4138      *           <code>false</code> otherwise.
4139      * @see     java.lang.Character#isJavaIdentifierPart(char)
4140      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
4141      * @since   1.1
4142      */
4143     public static boolean isIdentifierIgnorable(char ch) {
4144         return isIdentifierIgnorable((int)ch);
4145     }
4146 
4147     /**
4148      * Determines if the specified character (Unicode code point) should be regarded as
4149      * an ignorable character in a Java identifier or a Unicode identifier.
4150      * <p>
4151      * The following Unicode characters are ignorable in a Java identifier
4152      * or a Unicode identifier:
4153      * <ul>
4154      * <li>ISO control characters that are not whitespace
4155      * <ul>
4156      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4157      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4158      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4159      * </ul>
4160      *
4161      * <li>all characters that have the <code>FORMAT</code> general
4162      * category value
4163      * </ul>
4164      *
4165      * @param   codePoint the character (Unicode code point) to be tested.
4166      * @return  <code>true</code> if the character is an ignorable control
4167      *          character that may be part of a Java or Unicode identifier;
4168      *          <code>false</code> otherwise.
4169      * @see     java.lang.Character#isJavaIdentifierPart(int)
4170      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
4171      * @since   1.5
4172      */
4173     public static boolean isIdentifierIgnorable(int codePoint) {
4174         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
4175     }
4176 
4177     /**
4178      * Converts the character argument to lowercase using case
4179      * mapping information from the UnicodeData file.
4180      * <p>
4181      * Note that
4182      * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
4183      * does not always return <code>true</code> for some ranges of
4184      * characters, particularly those that are symbols or ideographs.
4185      *
4186      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4187      * characters to lowercase. <code>String</code> case mapping methods
4188      * have several benefits over <code>Character</code> case mapping methods.
4189      * <code>String</code> case mapping methods can perform locale-sensitive
4190      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4191      * the <code>Character</code> case mapping methods cannot.
4192      *
4193      * <p><b>Note:</b> This method cannot handle <a
4194      * href="#supplementary"> supplementary characters</a>. To support
4195      * all Unicode characters, including supplementary characters, use
4196      * the {@link #toLowerCase(int)} method.
4197      *
4198      * @param   ch   the character to be converted.
4199      * @return  the lowercase equivalent of the character, if any;
4200      *          otherwise, the character itself.
4201      * @see     java.lang.Character#isLowerCase(char)
4202      * @see     java.lang.String#toLowerCase()
4203      */
4204     public static char toLowerCase(char ch) {
4205         return (char)toLowerCase((int)ch);
4206     }
4207 
4208     /**
4209      * Converts the character (Unicode code point) argument to
4210      * lowercase using case mapping information from the UnicodeData
4211      * file.
4212      *
4213      * <p> Note that
4214      * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
4215      * does not always return <code>true</code> for some ranges of
4216      * characters, particularly those that are symbols or ideographs.
4217      *
4218      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4219      * characters to lowercase. <code>String</code> case mapping methods
4220      * have several benefits over <code>Character</code> case mapping methods.
4221      * <code>String</code> case mapping methods can perform locale-sensitive
4222      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4223      * the <code>Character</code> case mapping methods cannot.
4224      *
4225      * @param   codePoint   the character (Unicode code point) to be converted.
4226      * @return  the lowercase equivalent of the character (Unicode code
4227      *          point), if any; otherwise, the character itself.
4228      * @see     java.lang.Character#isLowerCase(int)
4229      * @see     java.lang.String#toLowerCase()
4230      *
4231      * @since   1.5
4232      */
4233     public static int toLowerCase(int codePoint) {
4234         return CharacterData.of(codePoint).toLowerCase(codePoint);
4235     }
4236 
4237     /**
4238      * Converts the character argument to uppercase using case mapping
4239      * information from the UnicodeData file.
4240      * <p>
4241      * Note that
4242      * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
4243      * does not always return <code>true</code> for some ranges of
4244      * characters, particularly those that are symbols or ideographs.
4245      *
4246      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4247      * characters to uppercase. <code>String</code> case mapping methods
4248      * have several benefits over <code>Character</code> case mapping methods.
4249      * <code>String</code> case mapping methods can perform locale-sensitive
4250      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4251      * the <code>Character</code> case mapping methods cannot.
4252      *
4253      * <p><b>Note:</b> This method cannot handle <a
4254      * href="#supplementary"> supplementary characters</a>. To support
4255      * all Unicode characters, including supplementary characters, use
4256      * the {@link #toUpperCase(int)} method.
4257      *
4258      * @param   ch   the character to be converted.
4259      * @return  the uppercase equivalent of the character, if any;
4260      *          otherwise, the character itself.
4261      * @see     java.lang.Character#isUpperCase(char)
4262      * @see     java.lang.String#toUpperCase()
4263      */
4264     public static char toUpperCase(char ch) {
4265         return (char)toUpperCase((int)ch);
4266     }
4267 
4268     /**
4269      * Converts the character (Unicode code point) argument to
4270      * uppercase using case mapping information from the UnicodeData
4271      * file.
4272      *
4273      * <p>Note that
4274      * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
4275      * does not always return <code>true</code> for some ranges of
4276      * characters, particularly those that are symbols or ideographs.
4277      *
4278      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4279      * characters to uppercase. <code>String</code> case mapping methods
4280      * have several benefits over <code>Character</code> case mapping methods.
4281      * <code>String</code> case mapping methods can perform locale-sensitive
4282      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4283      * the <code>Character</code> case mapping methods cannot.
4284      *
4285      * @param   codePoint   the character (Unicode code point) to be converted.
4286      * @return  the uppercase equivalent of the character, if any;
4287      *          otherwise, the character itself.
4288      * @see     java.lang.Character#isUpperCase(int)
4289      * @see     java.lang.String#toUpperCase()
4290      *
4291      * @since   1.5
4292      */
4293     public static int toUpperCase(int codePoint) {
4294         return CharacterData.of(codePoint).toUpperCase(codePoint);
4295     }
4296 
4297     /**
4298      * Converts the character argument to titlecase using case mapping
4299      * information from the UnicodeData file. If a character has no
4300      * explicit titlecase mapping and is not itself a titlecase char
4301      * according to UnicodeData, then the uppercase mapping is
4302      * returned as an equivalent titlecase mapping. If the
4303      * <code>char</code> argument is already a titlecase
4304      * <code>char</code>, the same <code>char</code> value will be
4305      * returned.
4306      * <p>
4307      * Note that
4308      * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
4309      * does not always return <code>true</code> for some ranges of
4310      * characters.
4311      *
4312      * <p><b>Note:</b> This method cannot handle <a
4313      * href="#supplementary"> supplementary characters</a>. To support
4314      * all Unicode characters, including supplementary characters, use
4315      * the {@link #toTitleCase(int)} method.
4316      *
4317      * @param   ch   the character to be converted.
4318      * @return  the titlecase equivalent of the character, if any;
4319      *          otherwise, the character itself.
4320      * @see     java.lang.Character#isTitleCase(char)
4321      * @see     java.lang.Character#toLowerCase(char)
4322      * @see     java.lang.Character#toUpperCase(char)
4323      * @since   1.0.2
4324      */
4325     public static char toTitleCase(char ch) {
4326         return (char)toTitleCase((int)ch);
4327     }
4328 
4329     /**
4330      * Converts the character (Unicode code point) argument to titlecase using case mapping
4331      * information from the UnicodeData file. If a character has no
4332      * explicit titlecase mapping and is not itself a titlecase char
4333      * according to UnicodeData, then the uppercase mapping is
4334      * returned as an equivalent titlecase mapping. If the
4335      * character argument is already a titlecase
4336      * character, the same character value will be
4337      * returned.
4338      *
4339      * <p>Note that
4340      * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
4341      * does not always return <code>true</code> for some ranges of
4342      * characters.
4343      *
4344      * @param   codePoint   the character (Unicode code point) to be converted.
4345      * @return  the titlecase equivalent of the character, if any;
4346      *          otherwise, the character itself.
4347      * @see     java.lang.Character#isTitleCase(int)
4348      * @see     java.lang.Character#toLowerCase(int)
4349      * @see     java.lang.Character#toUpperCase(int)
4350      * @since   1.5
4351      */
4352     public static int toTitleCase(int codePoint) {
4353         return CharacterData.of(codePoint).toTitleCase(codePoint);
4354     }
4355 
4356     /**
4357      * Returns the numeric value of the character <code>ch</code> in the
4358      * specified radix.
4359      * <p>
4360      * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4361      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4362      * value of <code>ch</code> is not a valid digit in the specified
4363      * radix, <code>-1</code> is returned. A character is a valid digit
4364      * if at least one of the following is true:
4365      * <ul>
4366      * <li>The method <code>isDigit</code> is <code>true</code> of the character
4367      *     and the Unicode decimal digit value of the character (or its
4368      *     single-character decomposition) is less than the specified radix.
4369      *     In this case the decimal digit value is returned.
4370      * <li>The character is one of the uppercase Latin letters
4371      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4372      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4373      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4374      *     is returned.
4375      * <li>The character is one of the lowercase Latin letters
4376      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4377      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4378      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4379      *     is returned.
4380      * </ul>
4381      *
4382      * <p><b>Note:</b> This method cannot handle <a
4383      * href="#supplementary"> supplementary characters</a>. To support
4384      * all Unicode characters, including supplementary characters, use
4385      * the {@link #digit(int, int)} method.
4386      *
4387      * @param   ch      the character to be converted.
4388      * @param   radix   the radix.
4389      * @return  the numeric value represented by the character in the
4390      *          specified radix.
4391      * @see     java.lang.Character#forDigit(int, int)
4392      * @see     java.lang.Character#isDigit(char)
4393      */
4394     public static int digit(char ch, int radix) {
4395         return digit((int)ch, radix);
4396     }
4397 
4398     /**
4399      * Returns the numeric value of the specified character (Unicode
4400      * code point) in the specified radix.
4401      *
4402      * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4403      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4404      * character is not a valid digit in the specified
4405      * radix, <code>-1</code> is returned. A character is a valid digit
4406      * if at least one of the following is true:
4407      * <ul>
4408      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
4409      *     and the Unicode decimal digit value of the character (or its
4410      *     single-character decomposition) is less than the specified radix.
4411      *     In this case the decimal digit value is returned.
4412      * <li>The character is one of the uppercase Latin letters
4413      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4414      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4415      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4416      *     is returned.
4417      * <li>The character is one of the lowercase Latin letters
4418      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4419      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4420      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4421      *     is returned.
4422      * </ul>
4423      *
4424      * @param   codePoint the character (Unicode code point) to be converted.
4425      * @param   radix   the radix.
4426      * @return  the numeric value represented by the character in the
4427      *          specified radix.
4428      * @see     java.lang.Character#forDigit(int, int)
4429      * @see     java.lang.Character#isDigit(int)
4430      * @since   1.5
4431      */
4432     public static int digit(int codePoint, int radix) {
4433         return CharacterData.of(codePoint).digit(codePoint, radix);
4434     }
4435 
4436     /**
4437      * Returns the <code>int</code> value that the specified Unicode
4438      * character represents. For example, the character
4439      * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
4440      * an int with a value of 50.
4441      * <p>
4442      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4443      * <code>'&#92;u005A'</code>), lowercase
4444      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4445      * full width variant (<code>'&#92;uFF21'</code> through
4446      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4447      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4448      * through 35. This is independent of the Unicode specification,
4449      * which does not assign numeric values to these <code>char</code>
4450      * values.
4451      * <p>
4452      * If the character does not have a numeric value, then -1 is returned.
4453      * If the character has a numeric value that cannot be represented as a
4454      * nonnegative integer (for example, a fractional value), then -2
4455      * is returned.
4456      *
4457      * <p><b>Note:</b> This method cannot handle <a
4458      * href="#supplementary"> supplementary characters</a>. To support
4459      * all Unicode characters, including supplementary characters, use
4460      * the {@link #getNumericValue(int)} method.
4461      *
4462      * @param   ch      the character to be converted.
4463      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4464      *           value; -2 if the character has a numeric value that is not a
4465      *          nonnegative integer; -1 if the character has no numeric value.
4466      * @see     java.lang.Character#forDigit(int, int)
4467      * @see     java.lang.Character#isDigit(char)
4468      * @since   1.1
4469      */
4470     public static int getNumericValue(char ch) {
4471         return getNumericValue((int)ch);
4472     }
4473 
4474     /**
4475      * Returns the <code>int</code> value that the specified
4476      * character (Unicode code point) represents. For example, the character
4477      * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
4478      * an <code>int</code> with a value of 50.
4479      * <p>
4480      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4481      * <code>'&#92;u005A'</code>), lowercase
4482      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4483      * full width variant (<code>'&#92;uFF21'</code> through
4484      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4485      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4486      * through 35. This is independent of the Unicode specification,
4487      * which does not assign numeric values to these <code>char</code>
4488      * values.
4489      * <p>
4490      * If the character does not have a numeric value, then -1 is returned.
4491      * If the character has a numeric value that cannot be represented as a
4492      * nonnegative integer (for example, a fractional value), then -2
4493      * is returned.
4494      *
4495      * @param   codePoint the character (Unicode code point) to be converted.
4496      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4497      *          value; -2 if the character has a numeric value that is not a
4498      *          nonnegative integer; -1 if the character has no numeric value.
4499      * @see     java.lang.Character#forDigit(int, int)
4500      * @see     java.lang.Character#isDigit(int)
4501      * @since   1.5
4502      */
4503     public static int getNumericValue(int codePoint) {
4504         return CharacterData.of(codePoint).getNumericValue(codePoint);
4505     }
4506 
4507     /**
4508      * Determines if the specified character is ISO-LATIN-1 white space.
4509      * This method returns <code>true</code> for the following five
4510      * characters only:
4511      * <table>
4512      * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
4513      *     <td><code>HORIZONTAL TABULATION</code></td></tr>
4514      * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
4515      *     <td><code>NEW LINE</code></td></tr>
4516      * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
4517      *     <td><code>FORM FEED</code></td></tr>
4518      * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
4519      *     <td><code>CARRIAGE RETURN</code></td></tr>
4520      * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
4521      *     <td><code>SPACE</code></td></tr>
4522      * </table>
4523      *
4524      * @param      ch   the character to be tested.
4525      * @return     <code>true</code> if the character is ISO-LATIN-1 white
4526      *             space; <code>false</code> otherwise.
4527      * @see        java.lang.Character#isSpaceChar(char)
4528      * @see        java.lang.Character#isWhitespace(char)
4529      * @deprecated Replaced by isWhitespace(char).
4530      */
4531     @Deprecated
4532     public static boolean isSpace(char ch) {
4533         return (ch <= 0x0020) &&
4534             (((((1L << 0x0009) |
4535             (1L << 0x000A) |
4536             (1L << 0x000C) |
4537             (1L << 0x000D) |
4538             (1L << 0x0020)) >> ch) & 1L) != 0);
4539     }
4540 
4541 
4542     /**
4543      * Determines if the specified character is a Unicode space character.
4544      * A character is considered to be a space character if and only if
4545      * it is specified to be a space character by the Unicode standard. This
4546      * method returns true if the character's general category type is any of
4547      * the following:
4548      * <ul>
4549      * <li> <code>SPACE_SEPARATOR</code>
4550      * <li> <code>LINE_SEPARATOR</code>
4551      * <li> <code>PARAGRAPH_SEPARATOR</code>
4552      * </ul>
4553      *
4554      * <p><b>Note:</b> This method cannot handle <a
4555      * href="#supplementary"> supplementary characters</a>. To support
4556      * all Unicode characters, including supplementary characters, use
4557      * the {@link #isSpaceChar(int)} method.
4558      *
4559      * @param   ch      the character to be tested.
4560      * @return  <code>true</code> if the character is a space character;
4561      *          <code>false</code> otherwise.
4562      * @see     java.lang.Character#isWhitespace(char)
4563      * @since   1.1
4564      */
4565     public static boolean isSpaceChar(char ch) {
4566         return isSpaceChar((int)ch);
4567     }
4568 
4569     /**
4570      * Determines if the specified character (Unicode code point) is a
4571      * Unicode space character.  A character is considered to be a
4572      * space character if and only if it is specified to be a space
4573      * character by the Unicode standard. This method returns true if
4574      * the character's general category type is any of the following:
4575      *
4576      * <ul>
4577      * <li> {@link #SPACE_SEPARATOR}
4578      * <li> {@link #LINE_SEPARATOR}
4579      * <li> {@link #PARAGRAPH_SEPARATOR}
4580      * </ul>
4581      *
4582      * @param   codePoint the character (Unicode code point) to be tested.
4583      * @return  <code>true</code> if the character is a space character;
4584      *          <code>false</code> otherwise.
4585      * @see     java.lang.Character#isWhitespace(int)
4586      * @since   1.5
4587      */
4588     public static boolean isSpaceChar(int codePoint) {
4589         return ((((1 << Character.SPACE_SEPARATOR) |
4590                   (1 << Character.LINE_SEPARATOR) |
4591                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
4592             != 0;
4593     }
4594 
4595     /**
4596      * Determines if the specified character is white space according to Java.
4597      * A character is a Java whitespace character if and only if it satisfies
4598      * one of the following criteria:
4599      * <ul>
4600      * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4601      *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4602      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4603      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4604      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4605      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4606      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4607      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4608      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4609      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4610      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4611      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4612      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4613      * </ul>
4614      *
4615      * <p><b>Note:</b> This method cannot handle <a
4616      * href="#supplementary"> supplementary characters</a>. To support
4617      * all Unicode characters, including supplementary characters, use
4618      * the {@link #isWhitespace(int)} method.
4619      *
4620      * @param   ch the character to be tested.
4621      * @return  <code>true</code> if the character is a Java whitespace
4622      *          character; <code>false</code> otherwise.
4623      * @see     java.lang.Character#isSpaceChar(char)
4624      * @since   1.1
4625      */
4626     public static boolean isWhitespace(char ch) {
4627         return isWhitespace((int)ch);
4628     }
4629 
4630     /**
4631      * Determines if the specified character (Unicode code point) is
4632      * white space according to Java.  A character is a Java
4633      * whitespace character if and only if it satisfies one of the
4634      * following criteria:
4635      * <ul>
4636      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4637      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4638      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4639      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4640      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4641      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4642      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4643      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4644      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4645      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4646      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4647      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4648      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4649      * </ul>
4650      * <p>
4651      *
4652      * @param   codePoint the character (Unicode code point) to be tested.
4653      * @return  <code>true</code> if the character is a Java whitespace
4654      *          character; <code>false</code> otherwise.
4655      * @see     java.lang.Character#isSpaceChar(int)
4656      * @since   1.5
4657      */
4658     public static boolean isWhitespace(int codePoint) {
4659         return CharacterData.of(codePoint).isWhitespace(codePoint);
4660     }
4661 
4662     /**
4663      * Determines if the specified character is an ISO control
4664      * character.  A character is considered to be an ISO control
4665      * character if its code is in the range <code>'&#92;u0000'</code>
4666      * through <code>'&#92;u001F'</code> or in the range
4667      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4668      *
4669      * <p><b>Note:</b> This method cannot handle <a
4670      * href="#supplementary"> supplementary characters</a>. To support
4671      * all Unicode characters, including supplementary characters, use
4672      * the {@link #isISOControl(int)} method.
4673      *
4674      * @param   ch      the character to be tested.
4675      * @return  <code>true</code> if the character is an ISO control character;
4676      *          <code>false</code> otherwise.
4677      *
4678      * @see     java.lang.Character#isSpaceChar(char)
4679      * @see     java.lang.Character#isWhitespace(char)
4680      * @since   1.1
4681      */
4682     public static boolean isISOControl(char ch) {
4683         return isISOControl((int)ch);
4684     }
4685 
4686     /**
4687      * Determines if the referenced character (Unicode code point) is an ISO control
4688      * character.  A character is considered to be an ISO control
4689      * character if its code is in the range <code>'&#92;u0000'</code>
4690      * through <code>'&#92;u001F'</code> or in the range
4691      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4692      *
4693      * @param   codePoint the character (Unicode code point) to be tested.
4694      * @return  <code>true</code> if the character is an ISO control character;
4695      *          <code>false</code> otherwise.
4696      * @see     java.lang.Character#isSpaceChar(int)
4697      * @see     java.lang.Character#isWhitespace(int)
4698      * @since   1.5
4699      */
4700     public static boolean isISOControl(int codePoint) {
4701         return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
4702             (codePoint >= 0x007F && codePoint <= 0x009F);
4703     }
4704 
4705     /**
4706      * Returns a value indicating a character's general category.
4707      *
4708      * <p><b>Note:</b> This method cannot handle <a
4709      * href="#supplementary"> supplementary characters</a>. To support
4710      * all Unicode characters, including supplementary characters, use
4711      * the {@link #getType(int)} method.
4712      *
4713      * @param   ch      the character to be tested.
4714      * @return  a value of type <code>int</code> representing the
4715      *          character's general category.
4716      * @see     java.lang.Character#COMBINING_SPACING_MARK
4717      * @see     java.lang.Character#CONNECTOR_PUNCTUATION
4718      * @see     java.lang.Character#CONTROL
4719      * @see     java.lang.Character#CURRENCY_SYMBOL
4720      * @see     java.lang.Character#DASH_PUNCTUATION
4721      * @see     java.lang.Character#DECIMAL_DIGIT_NUMBER
4722      * @see     java.lang.Character#ENCLOSING_MARK
4723      * @see     java.lang.Character#END_PUNCTUATION
4724      * @see     java.lang.Character#FINAL_QUOTE_PUNCTUATION
4725      * @see     java.lang.Character#FORMAT
4726      * @see     java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4727      * @see     java.lang.Character#LETTER_NUMBER
4728      * @see     java.lang.Character#LINE_SEPARATOR
4729      * @see     java.lang.Character#LOWERCASE_LETTER
4730      * @see     java.lang.Character#MATH_SYMBOL
4731      * @see     java.lang.Character#MODIFIER_LETTER
4732      * @see     java.lang.Character#MODIFIER_SYMBOL
4733      * @see     java.lang.Character#NON_SPACING_MARK
4734      * @see     java.lang.Character#OTHER_LETTER
4735      * @see     java.lang.Character#OTHER_NUMBER
4736      * @see     java.lang.Character#OTHER_PUNCTUATION
4737      * @see     java.lang.Character#OTHER_SYMBOL
4738      * @see     java.lang.Character#PARAGRAPH_SEPARATOR
4739      * @see     java.lang.Character#PRIVATE_USE
4740      * @see     java.lang.Character#SPACE_SEPARATOR
4741      * @see     java.lang.Character#START_PUNCTUATION
4742      * @see     java.lang.Character#SURROGATE
4743      * @see     java.lang.Character#TITLECASE_LETTER
4744      * @see     java.lang.Character#UNASSIGNED
4745      * @see     java.lang.Character#UPPERCASE_LETTER
4746      * @since   1.1
4747      */
4748     public static int getType(char ch) {
4749         return getType((int)ch);
4750     }
4751 
4752     /**
4753      * Returns a value indicating a character's general category.
4754      *
4755      * @param   codePoint the character (Unicode code point) to be tested.
4756      * @return  a value of type <code>int</code> representing the
4757      *          character's general category.
4758      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4759      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4760      * @see     Character#CONTROL CONTROL
4761      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4762      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
4763      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4764      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
4765      * @see     Character#END_PUNCTUATION END_PUNCTUATION
4766      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4767      * @see     Character#FORMAT FORMAT
4768      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4769      * @see     Character#LETTER_NUMBER LETTER_NUMBER
4770      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
4771      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
4772      * @see     Character#MATH_SYMBOL MATH_SYMBOL
4773      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
4774      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4775      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
4776      * @see     Character#OTHER_LETTER OTHER_LETTER
4777      * @see     Character#OTHER_NUMBER OTHER_NUMBER
4778      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4779      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
4780      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4781      * @see     Character#PRIVATE_USE PRIVATE_USE
4782      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
4783      * @see     Character#START_PUNCTUATION START_PUNCTUATION
4784      * @see     Character#SURROGATE SURROGATE
4785      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
4786      * @see     Character#UNASSIGNED UNASSIGNED
4787      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
4788      * @since   1.5
4789      */
4790     public static int getType(int codePoint) {
4791         return CharacterData.of(codePoint).getType(codePoint);
4792     }
4793 
4794     /**
4795      * Determines the character representation for a specific digit in
4796      * the specified radix. If the value of <code>radix</code> is not a
4797      * valid radix, or the value of <code>digit</code> is not a valid
4798      * digit in the specified radix, the null character
4799      * (<code>'&#92;u0000'</code>) is returned.
4800      * <p>
4801      * The <code>radix</code> argument is valid if it is greater than or
4802      * equal to <code>MIN_RADIX</code> and less than or equal to
4803      * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4804      * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
4805      * <p>
4806      * If the digit is less than 10, then
4807      * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
4808      * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
4809      *
4810      * @param   digit   the number to convert to a character.
4811      * @param   radix   the radix.
4812      * @return  the <code>char</code> representation of the specified digit
4813      *          in the specified radix.
4814      * @see     java.lang.Character#MIN_RADIX
4815      * @see     java.lang.Character#MAX_RADIX
4816      * @see     java.lang.Character#digit(char, int)
4817      */
4818     public static char forDigit(int digit, int radix) {
4819         if ((digit >= radix) || (digit < 0)) {
4820             return '\0';
4821         }
4822         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
4823             return '\0';
4824         }
4825         if (digit < 10) {
4826             return (char)('0' + digit);
4827         }
4828         return (char)('a' - 10 + digit);
4829     }
4830 
4831     /**
4832      * Returns the Unicode directionality property for the given
4833      * character.  Character directionality is used to calculate the
4834      * visual ordering of text. The directionality value of undefined
4835      * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4836      *
4837      * <p><b>Note:</b> This method cannot handle <a
4838      * href="#supplementary"> supplementary characters</a>. To support
4839      * all Unicode characters, including supplementary characters, use
4840      * the {@link #getDirectionality(int)} method.
4841      *
4842      * @param  ch <code>char</code> for which the directionality property
4843      *            is requested.
4844      * @return the directionality property of the <code>char</code> value.
4845      *
4846      * @see Character#DIRECTIONALITY_UNDEFINED
4847      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4848      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4849      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4850      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4851      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4852      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4853      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4854      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4855      * @see Character#DIRECTIONALITY_NONSPACING_MARK
4856      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4857      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4858      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4859      * @see Character#DIRECTIONALITY_WHITESPACE
4860      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4861      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4862      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4863      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4864      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4865      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4866      * @since 1.4
4867      */
4868     public static byte getDirectionality(char ch) {
4869         return getDirectionality((int)ch);
4870     }
4871 
4872     /**
4873      * Returns the Unicode directionality property for the given
4874      * character (Unicode code point).  Character directionality is
4875      * used to calculate the visual ordering of text. The
4876      * directionality value of undefined character is {@link
4877      * #DIRECTIONALITY_UNDEFINED}.
4878      *
4879      * @param   codePoint the character (Unicode code point) for which
4880      *          the directionality property is requested.
4881      * @return the directionality property of the character.
4882      *
4883      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4884      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4885      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4886      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4887      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4888      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4889      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4890      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4891      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4892      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4893      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4894      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4895      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4896      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4897      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4898      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4899      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4900      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4901      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4902      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4903      * @since    1.5
4904      */
4905     public static byte getDirectionality(int codePoint) {
4906         return CharacterData.of(codePoint).getDirectionality(codePoint);
4907     }
4908 
4909     /**
4910      * Determines whether the character is mirrored according to the
4911      * Unicode specification.  Mirrored characters should have their
4912      * glyphs horizontally mirrored when displayed in text that is
4913      * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
4914      * PARENTHESIS is semantically defined to be an <i>opening
4915      * parenthesis</i>.  This will appear as a "(" in text that is
4916      * left-to-right but as a ")" in text that is right-to-left.
4917      *
4918      * <p><b>Note:</b> This method cannot handle <a
4919      * href="#supplementary"> supplementary characters</a>. To support
4920      * all Unicode characters, including supplementary characters, use
4921      * the {@link #isMirrored(int)} method.
4922      *
4923      * @param  ch <code>char</code> for which the mirrored property is requested
4924      * @return <code>true</code> if the char is mirrored, <code>false</code>
4925      *         if the <code>char</code> is not mirrored or is not defined.
4926      * @since 1.4
4927      */
4928     public static boolean isMirrored(char ch) {
4929         return isMirrored((int)ch);
4930     }
4931 
4932     /**
4933      * Determines whether the specified character (Unicode code point)
4934      * is mirrored according to the Unicode specification.  Mirrored
4935      * characters should have their glyphs horizontally mirrored when
4936      * displayed in text that is right-to-left.  For example,
4937      * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
4938      * defined to be an <i>opening parenthesis</i>.  This will appear
4939      * as a "(" in text that is left-to-right but as a ")" in text
4940      * that is right-to-left.
4941      *
4942      * @param   codePoint the character (Unicode code point) to be tested.
4943      * @return  <code>true</code> if the character is mirrored, <code>false</code>
4944      *          if the character is not mirrored or is not defined.
4945      * @since   1.5
4946      */
4947     public static boolean isMirrored(int codePoint) {
4948         return CharacterData.of(codePoint).isMirrored(codePoint);
4949     }
4950 
4951     /**
4952      * Compares two <code>Character</code> objects numerically.
4953      *
4954      * @param   anotherCharacter   the <code>Character</code> to be compared.
4955 
4956      * @return  the value <code>0</code> if the argument <code>Character</code>
4957      *          is equal to this <code>Character</code>; a value less than
4958      *          <code>0</code> if this <code>Character</code> is numerically less
4959      *          than the <code>Character</code> argument; and a value greater than
4960      *          <code>0</code> if this <code>Character</code> is numerically greater
4961      *          than the <code>Character</code> argument (unsigned comparison).
4962      *          Note that this is strictly a numerical comparison; it is not
4963      *          locale-dependent.
4964      * @since   1.2
4965      */
4966     public int compareTo(Character anotherCharacter) {
4967         return compare(this.value, anotherCharacter.value);
4968     }
4969 
4970     /**
4971      * Compares two {@code char} values numerically.
4972      * The value returned is identical to what would be returned by:
4973      * <pre>
4974      *    Character.valueOf(x).compareTo(Character.valueOf(y))
4975      * </pre>
4976      *
4977      * @param  x the first {@code char} to compare
4978      * @param  y the second {@code char} to compare
4979      * @return the value {@code 0} if {@code x == y};
4980      *         a value less than {@code 0} if {@code x < y}; and
4981      *         a value greater than {@code 0} if {@code x > y}
4982      * @since 1.7
4983      */
4984     public static int compare(char x, char y) {
4985         return x - y;
4986     }
4987 
4988     /**
4989      * Converts the character (Unicode code point) argument to uppercase using
4990      * information from the UnicodeData file.
4991      * <p>
4992      *
4993      * @param   codePoint   the character (Unicode code point) to be converted.
4994      * @return  either the uppercase equivalent of the character, if
4995      *          any, or an error flag (<code>Character.ERROR</code>)
4996      *          that indicates that a 1:M <code>char</code> mapping exists.
4997      * @see     java.lang.Character#isLowerCase(char)
4998      * @see     java.lang.Character#isUpperCase(char)
4999      * @see     java.lang.Character#toLowerCase(char)
5000      * @see     java.lang.Character#toTitleCase(char)
5001      * @since 1.4
5002      */
5003     static int toUpperCaseEx(int codePoint) {
5004         assert isValidCodePoint(codePoint);
5005         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
5006     }
5007 
5008     /**
5009      * Converts the character (Unicode code point) argument to uppercase using case
5010      * mapping information from the SpecialCasing file in the Unicode
5011      * specification. If a character has no explicit uppercase
5012      * mapping, then the <code>char</code> itself is returned in the
5013      * <code>char[]</code>.
5014      *
5015      * @param   codePoint   the character (Unicode code point) to be converted.
5016      * @return a <code>char[]</code> with the uppercased character.
5017      * @since 1.4
5018      */
5019     static char[] toUpperCaseCharArray(int codePoint) {
5020         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
5021         assert isValidCodePoint(codePoint) &&
5022                !isSupplementaryCodePoint(codePoint);
5023         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
5024     }
5025 
5026     /**
5027      * The number of bits used to represent a <tt>char</tt> value in unsigned
5028      * binary form, constant {@code 16}.
5029      *
5030      * @since 1.5
5031      */
5032     public static final int SIZE = 16;
5033 
5034     /**
5035      * Returns the value obtained by reversing the order of the bytes in the
5036      * specified <tt>char</tt> value.
5037      *
5038      * @return the value obtained by reversing (or, equivalently, swapping)
5039      *     the bytes in the specified <tt>char</tt> value.
5040      * @since 1.5
5041      */
5042     public static char reverseBytes(char ch) {
5043         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
5044     }
5045 }