1 /*
   2  * Copyright 2002-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package java.lang;
  27 import java.util.Map;
  28 import java.util.HashMap;
  29 import java.util.Locale;
  30 
  31 /**
  32  * The <code>Character</code> class wraps a value of the primitive
  33  * type <code>char</code> in an object. An object of type
  34  * <code>Character</code> contains a single field whose type is
  35  * <code>char</code>.
  36  * <p>
  37  * In addition, this class provides several methods for determining
  38  * a character's category (lowercase letter, digit, etc.) and for converting
  39  * characters from uppercase to lowercase and vice versa.
  40  * <p>
  41  * Character information is based on the Unicode Standard, version 4.0.
  42  * <p>
  43  * The methods and data of class <code>Character</code> are defined by
  44  * the information in the <i>UnicodeData</i> file that is part of the
  45  * Unicode Character Database maintained by the Unicode
  46  * Consortium. This file specifies various properties including name
  47  * and general category for every defined Unicode code point or
  48  * character range.
  49  * <p>
  50  * The file and its description are available from the Unicode Consortium at:
  51  * <ul>
  52  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  53  * </ul>
  54  *
  55  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  56  *
  57  * <p>The <code>char</code> data type (and therefore the value that a
  58  * <code>Character</code> object encapsulates) are based on the
  59  * original Unicode specification, which defined characters as
  60  * fixed-width 16-bit entities. The Unicode standard has since been
  61  * changed to allow for characters whose representation requires more
  62  * than 16 bits.  The range of legal <em>code point</em>s is now
  63  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  64  * (Refer to the <a
  65  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  66  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  67  * standard.)
  68  *
  69  * <p>The set of characters from U+0000 to U+FFFF is sometimes
  70  * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
  71  * name="supplementary">Characters</a> whose code points are greater
  72  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  73  * 2 platform uses the UTF-16 representation in <code>char</code>
  74  * arrays and in the <code>String</code> and <code>StringBuffer</code>
  75  * classes. In this representation, supplementary characters are
  76  * represented as a pair of <code>char</code> values, the first from
  77  * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
  78  * second from the <em>low-surrogates</em> range
  79  * (&#92;uDC00-&#92;uDFFF).
  80  *
  81  * <p>A <code>char</code> value, therefore, represents Basic
  82  * Multilingual Plane (BMP) code points, including the surrogate
  83  * code points, or code units of the UTF-16 encoding. An
  84  * <code>int</code> value represents all Unicode code points,
  85  * including supplementary code points. The lower (least significant)
  86  * 21 bits of <code>int</code> are used to represent Unicode code
  87  * points and the upper (most significant) 11 bits must be zero.
  88  * Unless otherwise specified, the behavior with respect to
  89  * supplementary characters and surrogate <code>char</code> values is
  90  * as follows:
  91  *
  92  * <ul>
  93  * <li>The methods that only accept a <code>char</code> value cannot support
  94  * supplementary characters. They treat <code>char</code> values from the
  95  * surrogate ranges as undefined characters. For example,
  96  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
  97  * this specific value if followed by any low-surrogate value in a string
  98  * would represent a letter.
  99  *
 100  * <li>The methods that accept an <code>int</code> value support all
 101  * Unicode characters, including supplementary characters. For
 102  * example, <code>Character.isLetter(0x2F81A)</code> returns
 103  * <code>true</code> because the code point value represents a letter
 104  * (a CJK ideograph).
 105  * </ul>
 106  *
 107  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 108  * used for character values in the range between U+0000 and U+10FFFF,
 109  * and <em>Unicode code unit</em> is used for 16-bit
 110  * <code>char</code> values that are code units of the <em>UTF-16</em>
 111  * encoding. For more information on Unicode terminology, refer to the
 112  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 113  *
 114  * @author  Lee Boynton
 115  * @author  Guy Steele
 116  * @author  Akira Tanaka
 117  * @since   1.0
 118  */
 119 public final
 120 class Character extends Object implements java.io.Serializable, Comparable<Character> {
 121     /**
 122      * The minimum radix available for conversion to and from strings.
 123      * The constant value of this field is the smallest value permitted
 124      * for the radix argument in radix-conversion methods such as the
 125      * <code>digit</code> method, the <code>forDigit</code>
 126      * method, and the <code>toString</code> method of class
 127      * <code>Integer</code>.
 128      *
 129      * @see     java.lang.Character#digit(char, int)
 130      * @see     java.lang.Character#forDigit(int, int)
 131      * @see     java.lang.Integer#toString(int, int)
 132      * @see     java.lang.Integer#valueOf(java.lang.String)
 133      */
 134     public static final int MIN_RADIX = 2;
 135 
 136     /**
 137      * The maximum radix available for conversion to and from strings.
 138      * The constant value of this field is the largest value permitted
 139      * for the radix argument in radix-conversion methods such as the
 140      * <code>digit</code> method, the <code>forDigit</code>
 141      * method, and the <code>toString</code> method of class
 142      * <code>Integer</code>.
 143      *
 144      * @see     java.lang.Character#digit(char, int)
 145      * @see     java.lang.Character#forDigit(int, int)
 146      * @see     java.lang.Integer#toString(int, int)
 147      * @see     java.lang.Integer#valueOf(java.lang.String)
 148      */
 149     public static final int MAX_RADIX = 36;
 150 
 151     /**
 152      * The constant value of this field is the smallest value of type
 153      * <code>char</code>, <code>'&#92;u0000'</code>.
 154      *
 155      * @since   1.0.2
 156      */
 157     public static final char   MIN_VALUE = '\u0000';
 158 
 159     /**
 160      * The constant value of this field is the largest value of type
 161      * <code>char</code>, <code>'&#92;uFFFF'</code>.
 162      *
 163      * @since   1.0.2
 164      */
 165     public static final char   MAX_VALUE = '\uffff';
 166 
 167     /**
 168      * The <code>Class</code> instance representing the primitive type
 169      * <code>char</code>.
 170      *
 171      * @since   1.1
 172      */
 173     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 174 
 175    /*
 176     * Normative general types
 177     */
 178 
 179    /*
 180     * General character types
 181     */
 182 
 183    /**
 184     * General category "Cn" in the Unicode specification.
 185     * @since   1.1
 186     */
 187     public static final byte
 188         UNASSIGNED                  = 0;
 189 
 190    /**
 191     * General category "Lu" in the Unicode specification.
 192     * @since   1.1
 193     */
 194     public static final byte
 195         UPPERCASE_LETTER            = 1;
 196 
 197    /**
 198     * General category "Ll" in the Unicode specification.
 199     * @since   1.1
 200     */
 201     public static final byte
 202         LOWERCASE_LETTER            = 2;
 203 
 204    /**
 205     * General category "Lt" in the Unicode specification.
 206     * @since   1.1
 207     */
 208     public static final byte
 209         TITLECASE_LETTER            = 3;
 210 
 211    /**
 212     * General category "Lm" in the Unicode specification.
 213     * @since   1.1
 214     */
 215     public static final byte
 216         MODIFIER_LETTER             = 4;
 217 
 218    /**
 219     * General category "Lo" in the Unicode specification.
 220     * @since   1.1
 221     */
 222     public static final byte
 223         OTHER_LETTER                = 5;
 224 
 225    /**
 226     * General category "Mn" in the Unicode specification.
 227     * @since   1.1
 228     */
 229     public static final byte
 230         NON_SPACING_MARK            = 6;
 231 
 232    /**
 233     * General category "Me" in the Unicode specification.
 234     * @since   1.1
 235     */
 236     public static final byte
 237         ENCLOSING_MARK              = 7;
 238 
 239    /**
 240     * General category "Mc" in the Unicode specification.
 241     * @since   1.1
 242     */
 243     public static final byte
 244         COMBINING_SPACING_MARK      = 8;
 245 
 246    /**
 247     * General category "Nd" in the Unicode specification.
 248     * @since   1.1
 249     */
 250     public static final byte
 251         DECIMAL_DIGIT_NUMBER        = 9;
 252 
 253    /**
 254     * General category "Nl" in the Unicode specification.
 255     * @since   1.1
 256     */
 257     public static final byte
 258         LETTER_NUMBER               = 10;
 259 
 260    /**
 261     * General category "No" in the Unicode specification.
 262     * @since   1.1
 263     */
 264     public static final byte
 265         OTHER_NUMBER                = 11;
 266 
 267    /**
 268     * General category "Zs" in the Unicode specification.
 269     * @since   1.1
 270     */
 271     public static final byte
 272         SPACE_SEPARATOR             = 12;
 273 
 274    /**
 275     * General category "Zl" in the Unicode specification.
 276     * @since   1.1
 277     */
 278     public static final byte
 279         LINE_SEPARATOR              = 13;
 280 
 281    /**
 282     * General category "Zp" in the Unicode specification.
 283     * @since   1.1
 284     */
 285     public static final byte
 286         PARAGRAPH_SEPARATOR         = 14;
 287 
 288    /**
 289     * General category "Cc" in the Unicode specification.
 290     * @since   1.1
 291     */
 292     public static final byte
 293         CONTROL                     = 15;
 294 
 295    /**
 296     * General category "Cf" in the Unicode specification.
 297     * @since   1.1
 298     */
 299     public static final byte
 300         FORMAT                      = 16;
 301 
 302    /**
 303     * General category "Co" in the Unicode specification.
 304     * @since   1.1
 305     */
 306     public static final byte
 307         PRIVATE_USE                 = 18;
 308 
 309    /**
 310     * General category "Cs" in the Unicode specification.
 311     * @since   1.1
 312     */
 313     public static final byte
 314         SURROGATE                   = 19;
 315 
 316    /**
 317     * General category "Pd" in the Unicode specification.
 318     * @since   1.1
 319     */
 320     public static final byte
 321         DASH_PUNCTUATION            = 20;
 322 
 323    /**
 324     * General category "Ps" in the Unicode specification.
 325     * @since   1.1
 326     */
 327     public static final byte
 328         START_PUNCTUATION           = 21;
 329 
 330    /**
 331     * General category "Pe" in the Unicode specification.
 332     * @since   1.1
 333     */
 334     public static final byte
 335         END_PUNCTUATION             = 22;
 336 
 337    /**
 338     * General category "Pc" in the Unicode specification.
 339     * @since   1.1
 340     */
 341     public static final byte
 342         CONNECTOR_PUNCTUATION       = 23;
 343 
 344    /**
 345     * General category "Po" in the Unicode specification.
 346     * @since   1.1
 347     */
 348     public static final byte
 349         OTHER_PUNCTUATION           = 24;
 350 
 351    /**
 352     * General category "Sm" in the Unicode specification.
 353     * @since   1.1
 354     */
 355     public static final byte
 356         MATH_SYMBOL                 = 25;
 357 
 358    /**
 359     * General category "Sc" in the Unicode specification.
 360     * @since   1.1
 361     */
 362     public static final byte
 363         CURRENCY_SYMBOL             = 26;
 364 
 365    /**
 366     * General category "Sk" in the Unicode specification.
 367     * @since   1.1
 368     */
 369     public static final byte
 370         MODIFIER_SYMBOL             = 27;
 371 
 372    /**
 373     * General category "So" in the Unicode specification.
 374     * @since   1.1
 375     */
 376     public static final byte
 377         OTHER_SYMBOL                = 28;
 378 
 379    /**
 380     * General category "Pi" in the Unicode specification.
 381     * @since   1.4
 382     */
 383     public static final byte
 384         INITIAL_QUOTE_PUNCTUATION   = 29;
 385 
 386    /**
 387     * General category "Pf" in the Unicode specification.
 388     * @since   1.4
 389     */
 390     public static final byte
 391         FINAL_QUOTE_PUNCTUATION     = 30;
 392 
 393     /**
 394      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 395      */
 396      static final int ERROR = 0xFFFFFFFF;
 397 
 398 
 399     /**
 400      * Undefined bidirectional character type. Undefined <code>char</code>
 401      * values have undefined directionality in the Unicode specification.
 402      * @since 1.4
 403      */
 404      public static final byte DIRECTIONALITY_UNDEFINED = -1;
 405 
 406     /**
 407      * Strong bidirectional character type "L" in the Unicode specification.
 408      * @since 1.4
 409      */
 410     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 411 
 412     /**
 413      * Strong bidirectional character type "R" in the Unicode specification.
 414      * @since 1.4
 415      */
 416     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 417 
 418     /**
 419     * Strong bidirectional character type "AL" in the Unicode specification.
 420      * @since 1.4
 421      */
 422     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 423 
 424     /**
 425      * Weak bidirectional character type "EN" in the Unicode specification.
 426      * @since 1.4
 427      */
 428     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 429 
 430     /**
 431      * Weak bidirectional character type "ES" in the Unicode specification.
 432      * @since 1.4
 433      */
 434     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 435 
 436     /**
 437      * Weak bidirectional character type "ET" in the Unicode specification.
 438      * @since 1.4
 439      */
 440     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 441 
 442     /**
 443      * Weak bidirectional character type "AN" in the Unicode specification.
 444      * @since 1.4
 445      */
 446     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 447 
 448     /**
 449      * Weak bidirectional character type "CS" in the Unicode specification.
 450      * @since 1.4
 451      */
 452     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 453 
 454     /**
 455      * Weak bidirectional character type "NSM" in the Unicode specification.
 456      * @since 1.4
 457      */
 458     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 459 
 460     /**
 461      * Weak bidirectional character type "BN" in the Unicode specification.
 462      * @since 1.4
 463      */
 464     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 465 
 466     /**
 467      * Neutral bidirectional character type "B" in the Unicode specification.
 468      * @since 1.4
 469      */
 470     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 471 
 472     /**
 473      * Neutral bidirectional character type "S" in the Unicode specification.
 474      * @since 1.4
 475      */
 476     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 477 
 478     /**
 479      * Neutral bidirectional character type "WS" in the Unicode specification.
 480      * @since 1.4
 481      */
 482     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 483 
 484     /**
 485      * Neutral bidirectional character type "ON" in the Unicode specification.
 486      * @since 1.4
 487      */
 488     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 489 
 490     /**
 491      * Strong bidirectional character type "LRE" in the Unicode specification.
 492      * @since 1.4
 493      */
 494     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 495 
 496     /**
 497      * Strong bidirectional character type "LRO" in the Unicode specification.
 498      * @since 1.4
 499      */
 500     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 501 
 502     /**
 503      * Strong bidirectional character type "RLE" in the Unicode specification.
 504      * @since 1.4
 505      */
 506     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 507 
 508     /**
 509      * Strong bidirectional character type "RLO" in the Unicode specification.
 510      * @since 1.4
 511      */
 512     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 513 
 514     /**
 515      * Weak bidirectional character type "PDF" in the Unicode specification.
 516      * @since 1.4
 517      */
 518     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 519 
 520     /**
 521      * The minimum value of a Unicode high-surrogate code unit in the
 522      * UTF-16 encoding. A high-surrogate is also known as a
 523      * <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a Unicode high-surrogate code unit in the
 531      * UTF-16 encoding. A high-surrogate is also known as a
 532      * <i>leading-surrogate</i>.
 533      *
 534      * @since 1.5
 535      */
 536     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 537 
 538     /**
 539      * The minimum value of a Unicode low-surrogate code unit in the
 540      * UTF-16 encoding. A low-surrogate is also known as a
 541      * <i>trailing-surrogate</i>.
 542      *
 543      * @since 1.5
 544      */
 545     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 546 
 547     /**
 548      * The maximum value of a Unicode low-surrogate code unit in the
 549      * UTF-16 encoding. A low-surrogate is also known as a
 550      * <i>trailing-surrogate</i>.
 551      *
 552      * @since 1.5
 553      */
 554     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 555 
 556     /**
 557      * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
 558      *
 559      * @since 1.5
 560      */
 561     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 562 
 563     /**
 564      * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 569 
 570     /**
 571      * The minimum value of a supplementary code point.
 572      *
 573      * @since 1.5
 574      */
 575     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 576 
 577     /**
 578      * The minimum value of a Unicode code point.
 579      *
 580      * @since 1.5
 581      */
 582     public static final int MIN_CODE_POINT = 0x000000;
 583 
 584     /**
 585      * The maximum value of a Unicode code point.
 586      *
 587      * @since 1.5
 588      */
 589     public static final int MAX_CODE_POINT = 0x10ffff;
 590 
 591 
 592     /**
 593      * Instances of this class represent particular subsets of the Unicode
 594      * character set.  The only family of subsets defined in the
 595      * <code>Character</code> class is <code>{@link Character.UnicodeBlock
 596      * UnicodeBlock}</code>.  Other portions of the Java API may define other
 597      * subsets for their own purposes.
 598      *
 599      * @since 1.2
 600      */
 601     public static class Subset  {
 602 
 603         private String name;
 604 
 605         /**
 606          * Constructs a new <code>Subset</code> instance.
 607          *
 608          * @exception NullPointerException if name is <code>null</code>
 609          * @param  name  The name of this subset
 610          */
 611         protected Subset(String name) {
 612             if (name == null) {
 613                 throw new NullPointerException("name");
 614             }
 615             this.name = name;
 616         }
 617 
 618         /**
 619          * Compares two <code>Subset</code> objects for equality.
 620          * This method returns <code>true</code> if and only if
 621          * <code>this</code> and the argument refer to the same
 622          * object; since this method is <code>final</code>, this
 623          * guarantee holds for all subclasses.
 624          */
 625         public final boolean equals(Object obj) {
 626             return (this == obj);
 627         }
 628 
 629         /**
 630          * Returns the standard hash code as defined by the
 631          * <code>{@link Object#hashCode}</code> method.  This method
 632          * is <code>final</code> in order to ensure that the
 633          * <code>equals</code> and <code>hashCode</code> methods will
 634          * be consistent in all subclasses.
 635          */
 636         public final int hashCode() {
 637             return super.hashCode();
 638         }
 639 
 640         /**
 641          * Returns the name of this subset.
 642          */
 643         public final String toString() {
 644             return name;
 645         }
 646     }
 647 
 648     /**
 649      * A family of character subsets representing the character blocks in the
 650      * Unicode specification. Character blocks generally define characters
 651      * used for a specific script or purpose. A character is contained by
 652      * at most one Unicode block.
 653      *
 654      * @since 1.2
 655      */
 656     public static final class UnicodeBlock extends Subset {
 657 
 658         private static Map map = new HashMap();
 659 
 660         /**
 661          * Create a UnicodeBlock with the given identifier name.
 662          * This name must be the same as the block identifier.
 663          */
 664         private UnicodeBlock(String idName) {
 665             super(idName);
 666             map.put(idName.toUpperCase(Locale.US), this);
 667         }
 668 
 669         /**
 670          * Create a UnicodeBlock with the given identifier name and
 671          * alias name.
 672          */
 673         private UnicodeBlock(String idName, String alias) {
 674             this(idName);
 675             map.put(alias.toUpperCase(Locale.US), this);
 676         }
 677 
 678         /**
 679          * Create a UnicodeBlock with the given identifier name and
 680          * alias names.
 681          */
 682         private UnicodeBlock(String idName, String[] aliasName) {
 683             this(idName);
 684             if (aliasName != null) {
 685                 for(int x=0; x<aliasName.length; ++x) {
 686                     map.put(aliasName[x].toUpperCase(Locale.US), this);
 687                 }
 688             }
 689         }
 690 
 691         /**
 692          * Constant for the "Basic Latin" Unicode character block.
 693          * @since 1.2
 694          */
 695         public static final UnicodeBlock  BASIC_LATIN =
 696             new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
 697 
 698         /**
 699          * Constant for the "Latin-1 Supplement" Unicode character block.
 700          * @since 1.2
 701          */
 702         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 703             new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
 704 
 705         /**
 706          * Constant for the "Latin Extended-A" Unicode character block.
 707          * @since 1.2
 708          */
 709         public static final UnicodeBlock LATIN_EXTENDED_A =
 710             new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
 711 
 712         /**
 713          * Constant for the "Latin Extended-B" Unicode character block.
 714          * @since 1.2
 715          */
 716         public static final UnicodeBlock LATIN_EXTENDED_B =
 717             new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
 718 
 719         /**
 720          * Constant for the "IPA Extensions" Unicode character block.
 721          * @since 1.2
 722          */
 723         public static final UnicodeBlock IPA_EXTENSIONS =
 724             new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
 725 
 726         /**
 727          * Constant for the "Spacing Modifier Letters" Unicode character block.
 728          * @since 1.2
 729          */
 730         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 731             new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
 732                                                                         "SpacingModifierLetters"});
 733 
 734         /**
 735          * Constant for the "Combining Diacritical Marks" Unicode character block.
 736          * @since 1.2
 737          */
 738         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 739             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
 740                                                                           "CombiningDiacriticalMarks" });
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK
 750             = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
 751 
 752         /**
 753          * Constant for the "Cyrillic" Unicode character block.
 754          * @since 1.2
 755          */
 756         public static final UnicodeBlock CYRILLIC =
 757             new UnicodeBlock("CYRILLIC");
 758 
 759         /**
 760          * Constant for the "Armenian" Unicode character block.
 761          * @since 1.2
 762          */
 763         public static final UnicodeBlock ARMENIAN =
 764             new UnicodeBlock("ARMENIAN");
 765 
 766         /**
 767          * Constant for the "Hebrew" Unicode character block.
 768          * @since 1.2
 769          */
 770         public static final UnicodeBlock HEBREW =
 771             new UnicodeBlock("HEBREW");
 772 
 773         /**
 774          * Constant for the "Arabic" Unicode character block.
 775          * @since 1.2
 776          */
 777         public static final UnicodeBlock ARABIC =
 778             new UnicodeBlock("ARABIC");
 779 
 780         /**
 781          * Constant for the "Devanagari" Unicode character block.
 782          * @since 1.2
 783          */
 784         public static final UnicodeBlock DEVANAGARI =
 785             new UnicodeBlock("DEVANAGARI");
 786 
 787         /**
 788          * Constant for the "Bengali" Unicode character block.
 789          * @since 1.2
 790          */
 791         public static final UnicodeBlock BENGALI =
 792             new UnicodeBlock("BENGALI");
 793 
 794         /**
 795          * Constant for the "Gurmukhi" Unicode character block.
 796          * @since 1.2
 797          */
 798         public static final UnicodeBlock GURMUKHI =
 799             new UnicodeBlock("GURMUKHI");
 800 
 801         /**
 802          * Constant for the "Gujarati" Unicode character block.
 803          * @since 1.2
 804          */
 805         public static final UnicodeBlock GUJARATI =
 806             new UnicodeBlock("GUJARATI");
 807 
 808         /**
 809          * Constant for the "Oriya" Unicode character block.
 810          * @since 1.2
 811          */
 812         public static final UnicodeBlock ORIYA =
 813             new UnicodeBlock("ORIYA");
 814 
 815         /**
 816          * Constant for the "Tamil" Unicode character block.
 817          * @since 1.2
 818          */
 819         public static final UnicodeBlock TAMIL =
 820             new UnicodeBlock("TAMIL");
 821 
 822         /**
 823          * Constant for the "Telugu" Unicode character block.
 824          * @since 1.2
 825          */
 826         public static final UnicodeBlock TELUGU =
 827             new UnicodeBlock("TELUGU");
 828 
 829         /**
 830          * Constant for the "Kannada" Unicode character block.
 831          * @since 1.2
 832          */
 833         public static final UnicodeBlock KANNADA =
 834             new UnicodeBlock("KANNADA");
 835 
 836         /**
 837          * Constant for the "Malayalam" Unicode character block.
 838          * @since 1.2
 839          */
 840         public static final UnicodeBlock MALAYALAM =
 841             new UnicodeBlock("MALAYALAM");
 842 
 843         /**
 844          * Constant for the "Thai" Unicode character block.
 845          * @since 1.2
 846          */
 847         public static final UnicodeBlock THAI =
 848             new UnicodeBlock("THAI");
 849 
 850         /**
 851          * Constant for the "Lao" Unicode character block.
 852          * @since 1.2
 853          */
 854         public static final UnicodeBlock LAO =
 855             new UnicodeBlock("LAO");
 856 
 857         /**
 858          * Constant for the "Tibetan" Unicode character block.
 859          * @since 1.2
 860          */
 861         public static final UnicodeBlock TIBETAN =
 862             new UnicodeBlock("TIBETAN");
 863 
 864         /**
 865          * Constant for the "Georgian" Unicode character block.
 866          * @since 1.2
 867          */
 868         public static final UnicodeBlock GEORGIAN =
 869             new UnicodeBlock("GEORGIAN");
 870 
 871         /**
 872          * Constant for the "Hangul Jamo" Unicode character block.
 873          * @since 1.2
 874          */
 875         public static final UnicodeBlock HANGUL_JAMO =
 876             new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
 877 
 878         /**
 879          * Constant for the "Latin Extended Additional" Unicode character block.
 880          * @since 1.2
 881          */
 882         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 883             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
 884                                                                         "LatinExtendedAdditional"});
 885 
 886         /**
 887          * Constant for the "Greek Extended" Unicode character block.
 888          * @since 1.2
 889          */
 890         public static final UnicodeBlock GREEK_EXTENDED =
 891             new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
 892 
 893         /**
 894          * Constant for the "General Punctuation" Unicode character block.
 895          * @since 1.2
 896          */
 897         public static final UnicodeBlock GENERAL_PUNCTUATION =
 898             new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
 899 
 900         /**
 901          * Constant for the "Superscripts and Subscripts" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 905             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
 906                                                                           "SuperscriptsandSubscripts" });
 907 
 908         /**
 909          * Constant for the "Currency Symbols" Unicode character block.
 910          * @since 1.2
 911          */
 912         public static final UnicodeBlock CURRENCY_SYMBOLS =
 913             new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
 914 
 915         /**
 916          * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
 917          * <p>
 918          * This block was previously known as "Combining Marks for Symbols".
 919          * @since 1.2
 920          */
 921         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 922             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
 923                                                                                                                                                    "CombiningDiacriticalMarksforSymbols",
 924                                                                            "Combining Marks for Symbols",
 925                                                                            "CombiningMarksforSymbols" });
 926 
 927         /**
 928          * Constant for the "Letterlike Symbols" Unicode character block.
 929          * @since 1.2
 930          */
 931         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 932             new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
 933 
 934         /**
 935          * Constant for the "Number Forms" Unicode character block.
 936          * @since 1.2
 937          */
 938         public static final UnicodeBlock NUMBER_FORMS =
 939             new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
 940 
 941         /**
 942          * Constant for the "Arrows" Unicode character block.
 943          * @since 1.2
 944          */
 945         public static final UnicodeBlock ARROWS =
 946             new UnicodeBlock("ARROWS");
 947 
 948         /**
 949          * Constant for the "Mathematical Operators" Unicode character block.
 950          * @since 1.2
 951          */
 952         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 953             new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
 954                                                                      "MathematicalOperators"});
 955 
 956         /**
 957          * Constant for the "Miscellaneous Technical" Unicode character block.
 958          * @since 1.2
 959          */
 960         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 961             new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
 962                                                                       "MiscellaneousTechnical"});
 963 
 964         /**
 965          * Constant for the "Control Pictures" Unicode character block.
 966          * @since 1.2
 967          */
 968         public static final UnicodeBlock CONTROL_PICTURES =
 969             new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
 970 
 971         /**
 972          * Constant for the "Optical Character Recognition" Unicode character block.
 973          * @since 1.2
 974          */
 975         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 976             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
 977                                                                             "OpticalCharacterRecognition"});
 978 
 979         /**
 980          * Constant for the "Enclosed Alphanumerics" Unicode character block.
 981          * @since 1.2
 982          */
 983         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
 984             new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
 985                                                                      "EnclosedAlphanumerics"});
 986 
 987         /**
 988          * Constant for the "Box Drawing" Unicode character block.
 989          * @since 1.2
 990          */
 991         public static final UnicodeBlock BOX_DRAWING =
 992             new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
 993 
 994         /**
 995          * Constant for the "Block Elements" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock BLOCK_ELEMENTS =
 999             new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
1000 
1001         /**
1002          * Constant for the "Geometric Shapes" Unicode character block.
1003          * @since 1.2
1004          */
1005         public static final UnicodeBlock GEOMETRIC_SHAPES =
1006             new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
1007 
1008         /**
1009          * Constant for the "Miscellaneous Symbols" Unicode character block.
1010          * @since 1.2
1011          */
1012         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1013             new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
1014                                                                     "MiscellaneousSymbols"});
1015 
1016         /**
1017          * Constant for the "Dingbats" Unicode character block.
1018          * @since 1.2
1019          */
1020         public static final UnicodeBlock DINGBATS =
1021             new UnicodeBlock("DINGBATS");
1022 
1023         /**
1024          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1025          * @since 1.2
1026          */
1027         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1028             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
1029                                                                           "CJKSymbolsandPunctuation"});
1030 
1031         /**
1032          * Constant for the "Hiragana" Unicode character block.
1033          * @since 1.2
1034          */
1035         public static final UnicodeBlock HIRAGANA =
1036             new UnicodeBlock("HIRAGANA");
1037 
1038         /**
1039          * Constant for the "Katakana" Unicode character block.
1040          * @since 1.2
1041          */
1042         public static final UnicodeBlock KATAKANA =
1043             new UnicodeBlock("KATAKANA");
1044 
1045         /**
1046          * Constant for the "Bopomofo" Unicode character block.
1047          * @since 1.2
1048          */
1049         public static final UnicodeBlock BOPOMOFO =
1050             new UnicodeBlock("BOPOMOFO");
1051 
1052         /**
1053          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1054          * @since 1.2
1055          */
1056         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1057             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
1058                                                                         "HangulCompatibilityJamo"});
1059 
1060         /**
1061          * Constant for the "Kanbun" Unicode character block.
1062          * @since 1.2
1063          */
1064         public static final UnicodeBlock KANBUN =
1065             new UnicodeBlock("KANBUN");
1066 
1067         /**
1068          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1069          * @since 1.2
1070          */
1071         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1072             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
1073                                                                               "EnclosedCJKLettersandMonths"});
1074 
1075         /**
1076          * Constant for the "CJK Compatibility" Unicode character block.
1077          * @since 1.2
1078          */
1079         public static final UnicodeBlock CJK_COMPATIBILITY =
1080             new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
1081 
1082         /**
1083          * Constant for the "CJK Unified Ideographs" Unicode character block.
1084          * @since 1.2
1085          */
1086         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1087             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
1088                                                                      "CJKUnifiedIdeographs"});
1089 
1090         /**
1091          * Constant for the "Hangul Syllables" Unicode character block.
1092          * @since 1.2
1093          */
1094         public static final UnicodeBlock HANGUL_SYLLABLES =
1095             new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
1096 
1097         /**
1098          * Constant for the "Private Use Area" Unicode character block.
1099          * @since 1.2
1100          */
1101         public static final UnicodeBlock PRIVATE_USE_AREA =
1102             new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
1103 
1104         /**
1105          * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1106          * @since 1.2
1107          */
1108         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1109             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1110                              new String[] {"CJK Compatibility Ideographs",
1111                                            "CJKCompatibilityIdeographs"});
1112 
1113         /**
1114          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1115          * @since 1.2
1116          */
1117         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1118             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
1119                                                                             "AlphabeticPresentationForms"});
1120 
1121         /**
1122          * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1123          * @since 1.2
1124          */
1125         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1126             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
1127                                                                           "ArabicPresentationForms-A"});
1128 
1129         /**
1130          * Constant for the "Combining Half Marks" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock COMBINING_HALF_MARKS =
1134             new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
1135                                                                    "CombiningHalfMarks"});
1136 
1137         /**
1138          * Constant for the "CJK Compatibility Forms" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1142             new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
1143                                                                       "CJKCompatibilityForms"});
1144 
1145         /**
1146          * Constant for the "Small Form Variants" Unicode character block.
1147          * @since 1.2
1148          */
1149         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1150             new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
1151                                                                   "SmallFormVariants"});
1152 
1153         /**
1154          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1155          * @since 1.2
1156          */
1157         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1158             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
1159                                                                           "ArabicPresentationForms-B"});
1160 
1161         /**
1162          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1163          * @since 1.2
1164          */
1165         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1166             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1167                              new String[] {"Halfwidth and Fullwidth Forms",
1168                                            "HalfwidthandFullwidthForms"});
1169 
1170         /**
1171          * Constant for the "Specials" Unicode character block.
1172          * @since 1.2
1173          */
1174         public static final UnicodeBlock SPECIALS =
1175             new UnicodeBlock("SPECIALS");
1176 
1177         /**
1178          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1179          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1180          *             {@link #LOW_SURROGATES}. These new constants match
1181          *             the block definitions of the Unicode Standard.
1182          *             The {@link #of(char)} and {@link #of(int)} methods
1183          *             return the new constants, not SURROGATES_AREA.
1184          */
1185         @Deprecated
1186         public static final UnicodeBlock SURROGATES_AREA =
1187             new UnicodeBlock("SURROGATES_AREA");
1188 
1189         /**
1190          * Constant for the "Syriac" Unicode character block.
1191          * @since 1.4
1192          */
1193         public static final UnicodeBlock SYRIAC =
1194             new UnicodeBlock("SYRIAC");
1195 
1196         /**
1197          * Constant for the "Thaana" Unicode character block.
1198          * @since 1.4
1199          */
1200         public static final UnicodeBlock THAANA =
1201             new UnicodeBlock("THAANA");
1202 
1203         /**
1204          * Constant for the "Sinhala" Unicode character block.
1205          * @since 1.4
1206          */
1207         public static final UnicodeBlock SINHALA =
1208             new UnicodeBlock("SINHALA");
1209 
1210         /**
1211          * Constant for the "Myanmar" Unicode character block.
1212          * @since 1.4
1213          */
1214         public static final UnicodeBlock MYANMAR =
1215             new UnicodeBlock("MYANMAR");
1216 
1217         /**
1218          * Constant for the "Ethiopic" Unicode character block.
1219          * @since 1.4
1220          */
1221         public static final UnicodeBlock ETHIOPIC =
1222             new UnicodeBlock("ETHIOPIC");
1223 
1224         /**
1225          * Constant for the "Cherokee" Unicode character block.
1226          * @since 1.4
1227          */
1228         public static final UnicodeBlock CHEROKEE =
1229             new UnicodeBlock("CHEROKEE");
1230 
1231         /**
1232          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1233          * @since 1.4
1234          */
1235         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1236             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1237                              new String[] {"Unified Canadian Aboriginal Syllabics",
1238                                            "UnifiedCanadianAboriginalSyllabics"});
1239 
1240         /**
1241          * Constant for the "Ogham" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock OGHAM =
1245                              new UnicodeBlock("OGHAM");
1246 
1247         /**
1248          * Constant for the "Runic" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock RUNIC =
1252                              new UnicodeBlock("RUNIC");
1253 
1254         /**
1255          * Constant for the "Khmer" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock KHMER =
1259                              new UnicodeBlock("KHMER");
1260 
1261         /**
1262          * Constant for the "Mongolian" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MONGOLIAN =
1266                              new UnicodeBlock("MONGOLIAN");
1267 
1268         /**
1269          * Constant for the "Braille Patterns" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock BRAILLE_PATTERNS =
1273             new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
1274                                                                "BraillePatterns"});
1275 
1276         /**
1277          * Constant for the "CJK Radicals Supplement" Unicode character block.
1278          * @since 1.4
1279          */
1280         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1281              new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
1282                                                                        "CJKRadicalsSupplement"});
1283 
1284         /**
1285          * Constant for the "Kangxi Radicals" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock KANGXI_RADICALS =
1289             new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
1290 
1291         /**
1292          * Constant for the "Ideographic Description Characters" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1296             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
1297                                                                                  "IdeographicDescriptionCharacters"});
1298 
1299         /**
1300          * Constant for the "Bopomofo Extended" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1304             new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
1305                                                                 "BopomofoExtended"});
1306 
1307         /**
1308          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1309          * @since 1.4
1310          */
1311         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1312             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
1313                                                                                  "CJKUnifiedIdeographsExtensionA"});
1314 
1315         /**
1316          * Constant for the "Yi Syllables" Unicode character block.
1317          * @since 1.4
1318          */
1319         public static final UnicodeBlock YI_SYLLABLES =
1320             new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
1321 
1322         /**
1323          * Constant for the "Yi Radicals" Unicode character block.
1324          * @since 1.4
1325          */
1326         public static final UnicodeBlock YI_RADICALS =
1327             new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
1328 
1329 
1330         /**
1331          * Constant for the "Cyrillic Supplementary" Unicode character block.
1332          * @since 1.5
1333          */
1334         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1335             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1336                              new String[] {"Cyrillic Supplementary",
1337                                            "CyrillicSupplementary",
1338                                            "Cyrillic Supplement",
1339                                            "CyrillicSupplement"});
1340 
1341         /**
1342          * Constant for the "Tagalog" Unicode character block.
1343          * @since 1.5
1344          */
1345         public static final UnicodeBlock TAGALOG =
1346             new UnicodeBlock("TAGALOG");
1347 
1348         /**
1349          * Constant for the "Hanunoo" Unicode character block.
1350          * @since 1.5
1351          */
1352         public static final UnicodeBlock HANUNOO =
1353             new UnicodeBlock("HANUNOO");
1354 
1355         /**
1356          * Constant for the "Buhid" Unicode character block.
1357          * @since 1.5
1358          */
1359         public static final UnicodeBlock BUHID =
1360             new UnicodeBlock("BUHID");
1361 
1362         /**
1363          * Constant for the "Tagbanwa" Unicode character block.
1364          * @since 1.5
1365          */
1366         public static final UnicodeBlock TAGBANWA =
1367             new UnicodeBlock("TAGBANWA");
1368 
1369         /**
1370          * Constant for the "Limbu" Unicode character block.
1371          * @since 1.5
1372          */
1373         public static final UnicodeBlock LIMBU =
1374             new UnicodeBlock("LIMBU");
1375 
1376         /**
1377          * Constant for the "Tai Le" Unicode character block.
1378          * @since 1.5
1379          */
1380         public static final UnicodeBlock TAI_LE =
1381             new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
1382 
1383         /**
1384          * Constant for the "Khmer Symbols" Unicode character block.
1385          * @since 1.5
1386          */
1387         public static final UnicodeBlock KHMER_SYMBOLS =
1388             new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
1389 
1390         /**
1391          * Constant for the "Phonetic Extensions" Unicode character block.
1392          * @since 1.5
1393          */
1394         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1395             new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
1396 
1397         /**
1398          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1399          * @since 1.5
1400          */
1401         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1402             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1403                              new String[]{"Miscellaneous Mathematical Symbols-A",
1404                                           "MiscellaneousMathematicalSymbols-A"});
1405 
1406         /**
1407          * Constant for the "Supplemental Arrows-A" Unicode character block.
1408          * @since 1.5
1409          */
1410         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1411             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
1412                                                                     "SupplementalArrows-A"});
1413 
1414         /**
1415          * Constant for the "Supplemental Arrows-B" Unicode character block.
1416          * @since 1.5
1417          */
1418         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1419             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
1420                                                                     "SupplementalArrows-B"});
1421 
1422         /**
1423          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1424          * @since 1.5
1425          */
1426         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1427                 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1428                                    new String[] {"Miscellaneous Mathematical Symbols-B",
1429                                                  "MiscellaneousMathematicalSymbols-B"});
1430 
1431         /**
1432          * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1433          * @since 1.5
1434          */
1435         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1436             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1437                              new String[]{"Supplemental Mathematical Operators",
1438                                           "SupplementalMathematicalOperators"} );
1439 
1440         /**
1441          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1442          * @since 1.5
1443          */
1444         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1445             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
1446                                                                                "MiscellaneousSymbolsandArrows"});
1447 
1448         /**
1449          * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1453             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
1454                                                                            "KatakanaPhoneticExtensions"});
1455 
1456         /**
1457          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1458          * @since 1.5
1459          */
1460         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1461             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
1462                                                                       "YijingHexagramSymbols"});
1463 
1464         /**
1465          * Constant for the "Variation Selectors" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock VARIATION_SELECTORS =
1469             new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
1470 
1471         /**
1472          * Constant for the "Linear B Syllabary" Unicode character block.
1473          * @since 1.5
1474          */
1475         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1476             new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
1477 
1478         /**
1479          * Constant for the "Linear B Ideograms" Unicode character block.
1480          * @since 1.5
1481          */
1482         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1483             new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
1484 
1485         /**
1486          * Constant for the "Aegean Numbers" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock AEGEAN_NUMBERS =
1490             new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
1491 
1492         /**
1493          * Constant for the "Old Italic" Unicode character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock OLD_ITALIC =
1497             new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
1498 
1499         /**
1500          * Constant for the "Gothic" Unicode character block.
1501          * @since 1.5
1502          */
1503         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1504 
1505         /**
1506          * Constant for the "Ugaritic" Unicode character block.
1507          * @since 1.5
1508          */
1509         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1510 
1511         /**
1512          * Constant for the "Deseret" Unicode character block.
1513          * @since 1.5
1514          */
1515         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1516 
1517         /**
1518          * Constant for the "Shavian" Unicode character block.
1519          * @since 1.5
1520          */
1521         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1522 
1523         /**
1524          * Constant for the "Osmanya" Unicode character block.
1525          * @since 1.5
1526          */
1527         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1528 
1529         /**
1530          * Constant for the "Cypriot Syllabary" Unicode character block.
1531          * @since 1.5
1532          */
1533         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1534             new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
1535 
1536         /**
1537          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1538          * @since 1.5
1539          */
1540         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1541             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
1542                                                                         "ByzantineMusicalSymbols"});
1543 
1544         /**
1545          * Constant for the "Musical Symbols" Unicode character block.
1546          * @since 1.5
1547          */
1548         public static final UnicodeBlock MUSICAL_SYMBOLS =
1549             new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
1550 
1551         /**
1552          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1553          * @since 1.5
1554          */
1555         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1556             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
1557                                                                      "TaiXuanJingSymbols"});
1558 
1559         /**
1560          * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1561          * @since 1.5
1562          */
1563         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1564             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1565                              new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1566 
1567         /**
1568          * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1572             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1573                              new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1574 
1575         /**
1576          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1577          * @since 1.5
1578          */
1579         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1580             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1581                              new String[]{"CJK Compatibility Ideographs Supplement",
1582                                           "CJKCompatibilityIdeographsSupplement"});
1583 
1584         /**
1585          * Constant for the "Tags" Unicode character block.
1586          * @since 1.5
1587          */
1588         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1589 
1590         /**
1591          * Constant for the "Variation Selectors Supplement" Unicode character block.
1592          * @since 1.5
1593          */
1594         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1595             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
1596                                                                              "VariationSelectorsSupplement"});
1597 
1598         /**
1599          * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1600          * @since 1.5
1601          */
1602         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1603             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1604                              new String[] {"Supplementary Private Use Area-A",
1605                                            "SupplementaryPrivateUseArea-A"});
1606 
1607         /**
1608          * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1609          * @since 1.5
1610          */
1611         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1612             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1613                              new String[] {"Supplementary Private Use Area-B",
1614                                            "SupplementaryPrivateUseArea-B"});
1615 
1616         /**
1617          * Constant for the "High Surrogates" Unicode character block.
1618          * This block represents codepoint values in the high surrogate
1619          * range: 0xD800 through 0xDB7F
1620          *
1621          * @since 1.5
1622          */
1623         public static final UnicodeBlock HIGH_SURROGATES =
1624             new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
1625 
1626         /**
1627          * Constant for the "High Private Use Surrogates" Unicode character block.
1628          * This block represents codepoint values in the high surrogate
1629          * range: 0xDB80 through 0xDBFF
1630          *
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1634             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
1635                                                                            "HighPrivateUseSurrogates"});
1636 
1637         /**
1638          * Constant for the "Low Surrogates" Unicode character block.
1639          * This block represents codepoint values in the high surrogate
1640          * range: 0xDC00 through 0xDFFF
1641          *
1642          * @since 1.5
1643          */
1644         public static final UnicodeBlock LOW_SURROGATES =
1645             new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
1646 
1647         /**
1648          * Constant for the "Arabic Supplement" Unicode character block.
1649          * @since 1.7
1650          */
1651         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1652             new UnicodeBlock("ARABIC_SUPPLEMENT",
1653                              new String[] { "Arabic Supplement",
1654                                             "ArabicSupplement"});
1655 
1656         /**
1657          * Constant for the "NKo" Unicode character block.
1658          * @since 1.7
1659          */
1660         public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
1661 
1662         /**
1663          * Constant for the "Ethiopic Supplement" Unicode character block.
1664          * @since 1.7
1665          */
1666         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1667             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1668                              new String[] { "Ethiopic Supplement",
1669                                             "EthiopicSupplement"});
1670 
1671         /**
1672          * Constant for the "New Tai Lue" Unicode character block.
1673          * @since 1.7
1674          */
1675         public static final UnicodeBlock NEW_TAI_LUE =
1676             new UnicodeBlock("NEW_TAI_LUE",
1677                              new String[] { "New Tai Lue",
1678                                             "NewTaiLue"});
1679 
1680         /**
1681          * Constant for the "Buginese" Unicode character block.
1682          * @since 1.7
1683          */
1684         public static final UnicodeBlock BUGINESE =
1685             new UnicodeBlock("BUGINESE");
1686 
1687         /**
1688          * Constant for the "Balinese" Unicode character block.
1689          * @since 1.7
1690          */
1691         public static final UnicodeBlock BALINESE =
1692             new UnicodeBlock("BALINESE");
1693 
1694         /**
1695          * Constant for the "Sundanese" Unicode character block.
1696          * @since 1.7
1697          */
1698         public static final UnicodeBlock SUNDANESE =
1699             new UnicodeBlock("SUNDANESE");
1700 
1701         /**
1702          * Constant for the "Lepcha" Unicode character block.
1703          * @since 1.7
1704          */
1705         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
1706 
1707         /**
1708          * Constant for the "Ol Chiki" Unicode character block.
1709          * @since 1.7
1710          */
1711         public static final UnicodeBlock OL_CHIKI =
1712             new UnicodeBlock("OL_CHIKI",
1713                              new String[] { "Ol Chiki",
1714                                             "OlChiki"});
1715 
1716         /**
1717          * Constant for the "Phonetic Extensions Supplement" Unicode character
1718          * block.
1719          * @since 1.7
1720          */
1721         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1722             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1723                              new String[] { "Phonetic Extensions Supplement",
1724                                             "PhoneticExtensionsSupplement"});
1725 
1726         /**
1727          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1728          * character block.
1729          * @since 1.7
1730          */
1731         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1732             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1733                              new String[] { "Combining Diacritical Marks Supplement",
1734                                             "CombiningDiacriticalMarksSupplement"});
1735 
1736         /**
1737          * Constant for the "Glagolitic" Unicode character block.
1738          * @since 1.7
1739          */
1740         public static final UnicodeBlock GLAGOLITIC =
1741             new UnicodeBlock("GLAGOLITIC");
1742 
1743         /**
1744          * Constant for the "Latin Extended-C" Unicode character block.
1745          * @since 1.7
1746          */
1747         public static final UnicodeBlock LATIN_EXTENDED_C =
1748             new UnicodeBlock("LATIN_EXTENDED_C",
1749                              new String[] { "Latin Extended-C",
1750                                             "LatinExtended-C"});
1751 
1752         /**
1753          * Constant for the "Coptic" Unicode character block.
1754          * @since 1.7
1755          */
1756         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
1757 
1758         /**
1759          * Constant for the "Georgian Supplement" Unicode character block.
1760          * @since 1.7
1761          */
1762         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1763             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1764                              new String[] { "Georgian Supplement",
1765                                             "GeorgianSupplement"});
1766 
1767         /**
1768          * Constant for the "Tifinagh" Unicode character block.
1769          * @since 1.7
1770          */
1771         public static final UnicodeBlock TIFINAGH =
1772             new UnicodeBlock("TIFINAGH");
1773 
1774         /**
1775          * Constant for the "Ethiopic Extended" Unicode character block.
1776          * @since 1.7
1777          */
1778         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1779             new UnicodeBlock("ETHIOPIC_EXTENDED",
1780                              new String[] { "Ethiopic Extended",
1781                                             "EthiopicExtended"});
1782 
1783         /**
1784          * Constant for the "Cyrillic Extended-A" Unicode character block.
1785          * @since 1.7
1786          */
1787         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1788             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1789                              new String[] { "Cyrillic Extended-A",
1790                                             "CyrillicExtended-A"});
1791 
1792         /**
1793          * Constant for the "Supplemental Punctuation" Unicode character block.
1794          * @since 1.7
1795          */
1796         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1797             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1798                              new String[] { "Supplemental Punctuation",
1799                                             "SupplementalPunctuation"});
1800 
1801         /**
1802          * Constant for the "CJK Strokes" Unicode character block.
1803          * @since 1.7
1804          */
1805         public static final UnicodeBlock CJK_STROKES =
1806             new UnicodeBlock("CJK_STROKES",
1807                              new String[] { "CJK Strokes",
1808                                             "CJKStrokes"});
1809 
1810         /**
1811          * Constant for the "Vai" Unicode character block.
1812          * @since 1.7
1813          */
1814         public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
1815 
1816         /**
1817          * Constant for the "Cyrillic Extended-B" Unicode character block.
1818          * @since 1.7
1819          */
1820         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1821             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1822                              new String[] { "Cyrillic Extended-B",
1823                                             "CyrillicExtended-B"});
1824 
1825         /**
1826          * Constant for the "Modifier Tone Letters" Unicode character block.
1827          * @since 1.7
1828          */
1829         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1830             new UnicodeBlock("MODIFIER_TONE_LETTERS",
1831                              new String[] { "Modifier Tone Letters",
1832                                             "ModifierToneLetters"});
1833 
1834         /**
1835          * Constant for the "Latin Extended-D" Unicode character block.
1836          * @since 1.7
1837          */
1838         public static final UnicodeBlock LATIN_EXTENDED_D =
1839             new UnicodeBlock("LATIN_EXTENDED_D",
1840                              new String[] { "Latin Extended-D",
1841                                             "LatinExtended-D"});
1842 
1843         /**
1844          * Constant for the "Syloti Nagri" Unicode character block.
1845          * @since 1.7
1846          */
1847         public static final UnicodeBlock SYLOTI_NAGRI =
1848             new UnicodeBlock("SYLOTI_NAGRI",
1849                              new String[] { "Syloti Nagri",
1850                                             "SylotiNagri"});
1851 
1852         /**
1853          * Constant for the "Phags-pa" Unicode character block.
1854          * @since 1.7
1855          */
1856         public static final UnicodeBlock PHAGS_PA =
1857             new UnicodeBlock("PHAGS_PA", new String[] { "Phags-pa"});
1858 
1859         /**
1860          * Constant for the "Saurashtra" Unicode character block.
1861          * @since 1.7
1862          */
1863         public static final UnicodeBlock SAURASHTRA =
1864             new UnicodeBlock("SAURASHTRA");
1865 
1866         /**
1867          * Constant for the "Kayah Li" Unicode character block.
1868          * @since 1.7
1869          */
1870         public static final UnicodeBlock KAYAH_LI =
1871             new UnicodeBlock("KAYAH_LI",
1872                              new String[] { "Kayah Li",
1873                                             "KayahLi"});
1874 
1875         /**
1876          * Constant for the "Rejang" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
1880 
1881         /**
1882          * Constant for the "Cham" Unicode character block.
1883          * @since 1.7
1884          */
1885         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
1886 
1887         /**
1888          * Constant for the "Vertical Forms" Unicode character block.
1889          * @since 1.7
1890          */
1891         public static final UnicodeBlock VERTICAL_FORMS =
1892             new UnicodeBlock("VERTICAL_FORMS",
1893                              new String[] { "Vertical Forms",
1894                                             "VerticalForms"});
1895 
1896         /**
1897          * Constant for the "Ancient Greek Numbers" Unicode character block.
1898          * @since 1.7
1899          */
1900         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1901             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
1902                              new String[] { "Ancient Greek Numbers",
1903                                             "AncientGreekNumbers"});
1904 
1905         /**
1906          * Constant for the "Ancient Symbols" Unicode character block.
1907          * @since 1.7
1908          */
1909         public static final UnicodeBlock ANCIENT_SYMBOLS =
1910             new UnicodeBlock("ANCIENT_SYMBOLS",
1911                              new String[] { "Ancient Symbols",
1912                                             "AncientSymbols"});
1913 
1914         /**
1915          * Constant for the "Phaistos Disc" Unicode character block.
1916          * @since 1.7
1917          */
1918         public static final UnicodeBlock PHAISTOS_DISC =
1919             new UnicodeBlock("PHAISTOS_DISC",
1920                              new String[] { "Phaistos Disc",
1921                                             "PhaistosDisc"});
1922 
1923         /**
1924          * Constant for the "Lycian" Unicode character block.
1925          * @since 1.7
1926          */
1927         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
1928 
1929         /**
1930          * Constant for the "Carian" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
1934 
1935         /**
1936          * Constant for the "Old Persian" Unicode character block.
1937          * @since 1.7
1938          */
1939         public static final UnicodeBlock OLD_PERSIAN =
1940             new UnicodeBlock("OLD_PERSIAN",
1941                              new String[] { "Old Persian",
1942                                             "OldPersian"});
1943 
1944         /**
1945          * Constant for the "Phoenician" Unicode character block.
1946          * @since 1.7
1947          */
1948         public static final UnicodeBlock PHOENICIAN =
1949             new UnicodeBlock("PHOENICIAN");
1950 
1951         /**
1952          * Constant for the "Lydian" Unicode character block.
1953          * @since 1.7
1954          */
1955         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
1956 
1957         /**
1958          * Constant for the "Kharoshthi" Unicode character block.
1959          * @since 1.7
1960          */
1961         public static final UnicodeBlock KHAROSHTHI =
1962             new UnicodeBlock("KHAROSHTHI");
1963 
1964         /**
1965          * Constant for the "Cuneiform" Unicode character block.
1966          * @since 1.7
1967          */
1968         public static final UnicodeBlock CUNEIFORM =
1969             new UnicodeBlock("CUNEIFORM");
1970 
1971         /**
1972          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
1973          * character block.
1974          * @since 1.7
1975          */
1976         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1977             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1978                              new String[] { "Cuneiform Numbers and Punctuation",
1979                                             "CuneiformNumbersandPunctuation"});
1980 
1981         /**
1982          * Constant for the "Ancient Greek Musical Notation" Unicode character
1983          * block.
1984          * @since 1.7
1985          */
1986         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1987             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1988                              new String[] { "Ancient Greek Musical Notation",
1989                                             "AncientGreekMusicalNotation"});
1990 
1991         /**
1992          * Constant for the "Counting Rod Numerals" Unicode character block.
1993          * @since 1.7
1994          */
1995         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1996             new UnicodeBlock("COUNTING_ROD_NUMERALS",
1997                              new String[] { "Counting Rod Numerals",
1998                                             "CountingRodNumerals"});
1999 
2000         /**
2001          * Constant for the "Mahjong Tiles" Unicode character block.
2002          * @since 1.7
2003          */
2004         public static final UnicodeBlock MAHJONG_TILES =
2005             new UnicodeBlock("MAHJONG_TILES",
2006                              new String[] { "Mahjong Tiles",
2007                                             "MahjongTiles"});
2008 
2009         /**
2010          * Constant for the "Domino Tiles" Unicode character block.
2011          * @since 1.7
2012          */
2013         public static final UnicodeBlock DOMINO_TILES =
2014             new UnicodeBlock("DOMINO_TILES",
2015                              new String[] { "Domino Tiles",
2016                                             "DominoTiles"});
2017 
2018         private static final int blockStarts[] = {
2019             0x0000,   // 0000..007F; Basic Latin
2020             0x0080,   // 0080..00FF; Latin-1 Supplement
2021             0x0100,   // 0100..017F; Latin Extended-A
2022             0x0180,   // 0180..024F; Latin Extended-B
2023             0x0250,   // 0250..02AF; IPA Extensions
2024             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2025             0x0300,   // 0300..036F; Combining Diacritical Marks
2026             0x0370,   // 0370..03FF; Greek and Coptic
2027             0x0400,   // 0400..04FF; Cyrillic
2028             0x0500,   // 0500..052F; Cyrillic Supplement
2029             0x0530,   // 0530..058F; Armenian
2030             0x0590,   // 0590..05FF; Hebrew
2031             0x0600,   // 0600..06FF; Arabic
2032             0x0700,   // 0700..074F; Syria
2033             0x0750,   // 0750..077F; Arabic Supplement
2034             0x0780,   // 0780..07BF; Thaana
2035             0x07C0,   // 07C0..07FF; NKo
2036             0x0800,   //             unassigned
2037             0x0900,   // 0900..097F; Devanagari
2038             0x0980,   // 0980..09FF; Bengali
2039             0x0A00,   // 0A00..0A7F; Gurmukhi
2040             0x0A80,   // 0A80..0AFF; Gujarati
2041             0x0B00,   // 0B00..0B7F; Oriya
2042             0x0B80,   // 0B80..0BFF; Tamil
2043             0x0C00,   // 0C00..0C7F; Telugu
2044             0x0C80,   // 0C80..0CFF; Kannada
2045             0x0D00,   // 0D00..0D7F; Malayalam
2046             0x0D80,   // 0D80..0DFF; Sinhala
2047             0x0E00,   // 0E00..0E7F; Thai
2048             0x0E80,   // 0E80..0EFF; Lao
2049             0x0F00,   // 0F00..0FFF; Tibetan
2050             0x1000,   // 1000..109F; Myanmar
2051             0x10A0,   // 10A0..10FF; Georgian
2052             0x1100,   // 1100..11FF; Hangul Jamo
2053             0x1200,   // 1200..137F; Ethiopic
2054             0x1380,   // 1380..139F; Ethiopic Supplement
2055             0x13A0,   // 13A0..13FF; Cherokee
2056             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2057             0x1680,   // 1680..169F; Ogham
2058             0x16A0,   // 16A0..16FF; Runic
2059             0x1700,   // 1700..171F; Tagalog
2060             0x1720,   // 1720..173F; Hanunoo
2061             0x1740,   // 1740..175F; Buhid
2062             0x1760,   // 1760..177F; Tagbanwa
2063             0x1780,   // 1780..17FF; Khmer
2064             0x1800,   // 1800..18AF; Mongolian
2065             0x18B0,   //             unassigned
2066             0x1900,   // 1900..194F; Limbu
2067             0x1950,   // 1950..197F; Tai Le
2068             0x1980,   // 1980..19DF; New Tai Lue
2069             0x19E0,   // 19E0..19FF; Khmer Symbols
2070             0x1A00,   // 1A00..1A1F; Buginese
2071             0x1A20,   //             unassigned
2072             0x1B00,   // 1B00..1B7F; Balinese
2073             0x1B80,   // 1B80..1BBF; Sundanese
2074             0x1BC0,   //             unassigned
2075             0x1C00,   // 1C00..1C4F; Lepcha
2076             0x1C50,   // 1C50..1C7F; Ol Chiki
2077             0x1C80,   //             unassigned
2078             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2079             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2080             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2081             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2082             0x1F00,   // 1F00..1FFF; Greek Extended
2083             0x2000,   // 2000..206F; General Punctuation
2084             0x2070,   // 2070..209F; Superscripts and Subscripts
2085             0x20A0,   // 20A0..20CF; Currency Symbols
2086             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2087             0x2100,   // 2100..214F; Letterlike Symbols
2088             0x2150,   // 2150..218F; Number Forms
2089             0x2190,   // 2190..21FF; Arrows
2090             0x2200,   // 2200..22FF; Mathematical Operators
2091             0x2300,   // 2300..23FF; Miscellaneous Technical
2092             0x2400,   // 2400..243F; Control Pictures
2093             0x2440,   // 2440..245F; Optical Character Recognition
2094             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2095             0x2500,   // 2500..257F; Box Drawing
2096             0x2580,   // 2580..259F; Block Elements
2097             0x25A0,   // 25A0..25FF; Geometric Shapes
2098             0x2600,   // 2600..26FF; Miscellaneous Symbols
2099             0x2700,   // 2700..27BF; Dingbats
2100             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2101             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2102             0x2800,   // 2800..28FF; Braille Patterns
2103             0x2900,   // 2900..297F; Supplemental Arrows-B
2104             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2105             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2106             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2107             0x2C00,   // 2C00..2C5F; Glagolitic
2108             0x2C60,   // 2C60..2C7F; Latin Extended-C
2109             0x2C80,   // 2C80..2CFF; Coptic
2110             0x2D00,   // 2D00..2D2F; Georgian Supplement
2111             0x2D30,   // 2D30..2D7F; Tifinagh
2112             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2113             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2114             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2115             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2116             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2117             0x2FE0,   //             unassigned
2118             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2119             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2120             0x3040,   // 3040..309F; Hiragana
2121             0x30A0,   // 30A0..30FF; Katakana
2122             0x3100,   // 3100..312F; Bopomofo
2123             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2124             0x3190,   // 3190..319F; Kanbun
2125             0x31A0,   // 31A0..31BF; Bopomofo Extended
2126             0x31C0,   // 31C0..31EF; CJK Strokes
2127             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2128             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2129             0x3300,   // 3300..33FF; CJK Compatibility
2130             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2131             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2132             0x4E00,   // 4E00..9FFF; CJK Unified Ideograph
2133             0xA000,   // A000..A48F; Yi Syllables
2134             0xA490,   // A490..A4CF; Yi Radicals
2135             0xA4D0,   //             unassigned
2136             0xA500,   // A500..A63F; Vai
2137             0xA640,   // A640..A69F; Cyrillic Extended-B
2138             0xA6A0,   //             unassigned
2139             0xA700,   // A700..A71F; Modifier Tone Letters
2140             0xA720,   // A720..A7FF; Latin Extended-D
2141             0xA800,   // A800..A82F; Syloti Nagri
2142             0xA830,   //             unassigned
2143             0xA840,   // A840..A87F; Phags-pa
2144             0xA880,   // A880..A8DF; Saurashtra
2145             0xA8E0,   //             unassigned
2146             0xA900,   // A900..A92F; Kayah Li
2147             0xA930,   // A930..A95F; Rejang
2148             0xA960,   //             unassigned
2149             0xAA00,   // AA00..AA5F; Cham
2150             0xAA60,   //             unassigned
2151             0xAC00,   // AC00..D7AF; Hangul Syllables
2152             0xD7B0,   //             unassigned
2153             0xD800,   // D800..DB7F; High Surrogates
2154             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2155             0xDC00,   // DC00..DFFF; Low Surrogates
2156             0xE000,   // E000..F8FF; Private Use Area
2157             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2158             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2159             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2160             0xFE00,   // FE00..FE0F; Variation Selectors
2161             0xFE10,   // FE10..FE1F; Vertical Forms
2162             0xFE20,   // FE20..FE2F; Combining Half Marks
2163             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2164             0xFE50,   // FE50..FE6F; Small Form Variants
2165             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2166             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2167             0xFFF0,   // FFF0..FFFF; Specials
2168             0x10000,  // 10000..1007F; Linear B Syllabary
2169             0x10080,  // 10080..100FF; Linear B Ideograms
2170             0x10100,  // 10100..1013F; Aegean Numbers
2171             0x10140,  // 10140..1018F; Ancient Greek Numbers
2172             0x10190,  // 10190..101CF; Ancient Symbols
2173             0x101D0,  // 101D0..101FF; Phaistos Disc
2174             0x10200,  //               unassigned
2175             0x10280,  // 10280..1029F; Lycian
2176             0x102A0,  // 102A0..102DF; Carian
2177             0x102E0,  //               unassigned
2178             0x10300,  // 10300..1032F; Old Italic
2179             0x10330,  // 10330..1034F; Gothic
2180             0x10350,  //               unassigned
2181             0x10380,  // 10380..1039F; Ugaritic
2182             0x103A0,  // 103A0..103DF; Old Persian
2183             0x103E0,  //               unassigned
2184             0x10400,  // 10400..1044F; Desere
2185             0x10450,  // 10450..1047F; Shavian
2186             0x10480,  // 10480..104AF; Osmanya
2187             0x104B0,  //               unassigned
2188             0x10800,  // 10800..1083F; Cypriot Syllabary
2189             0x10840,  //               unassigned
2190             0x10900,  // 10900..1091F; Phoenician
2191             0x10920,  // 10920..1093F; Lydian
2192             0x10940,  //               unassigned
2193             0x10A00,  // 10A00..10A5F; Kharoshthi
2194             0x10A60,  //               unassigned
2195             0x12000,  // 12000..123FF; Cuneiform
2196             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2197             0x12480,  //               unassigned
2198             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2199             0x1D100,  // 1D100..1D1FF; Musical Symbols
2200             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2201             0x1D250,  //               unassigned
2202             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2203             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2204             0x1D380,  //               unassigned
2205             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2206             0x1D800,  //               unassigned
2207             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2208             0x1F030,  // 1F030..1F09F; Domino Tiles
2209             0x1F0A0,  //               unassigned
2210             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2211             0x2A6E0,  //               unassigned
2212             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2213             0x2FA20,  //               unassigned
2214             0xE0000,  // E0000..E007F; Tags
2215             0xE0080,  //               unassigned
2216             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2217             0xE01F0,  //               unassigned
2218             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2219             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
2220         };
2221 
2222         private static final UnicodeBlock[] blocks = {
2223             BASIC_LATIN,
2224             LATIN_1_SUPPLEMENT,
2225             LATIN_EXTENDED_A,
2226             LATIN_EXTENDED_B,
2227             IPA_EXTENSIONS,
2228             SPACING_MODIFIER_LETTERS,
2229             COMBINING_DIACRITICAL_MARKS,
2230             GREEK,
2231             CYRILLIC,
2232             CYRILLIC_SUPPLEMENTARY,
2233             ARMENIAN,
2234             HEBREW,
2235             ARABIC,
2236             SYRIAC,
2237             ARABIC_SUPPLEMENT,
2238             THAANA,
2239             NKO,
2240             null,
2241             DEVANAGARI,
2242             BENGALI,
2243             GURMUKHI,
2244             GUJARATI,
2245             ORIYA,
2246             TAMIL,
2247             TELUGU,
2248             KANNADA,
2249             MALAYALAM,
2250             SINHALA,
2251             THAI,
2252             LAO,
2253             TIBETAN,
2254             MYANMAR,
2255             GEORGIAN,
2256             HANGUL_JAMO,
2257             ETHIOPIC,
2258             ETHIOPIC_SUPPLEMENT,
2259             CHEROKEE,
2260             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2261             OGHAM,
2262             RUNIC,
2263             TAGALOG,
2264             HANUNOO,
2265             BUHID,
2266             TAGBANWA,
2267             KHMER,
2268             MONGOLIAN,
2269             null,
2270             LIMBU,
2271             TAI_LE,
2272             NEW_TAI_LUE,
2273             KHMER_SYMBOLS,
2274             BUGINESE,
2275             null,
2276             BALINESE,
2277             SUNDANESE,
2278             null,
2279             LEPCHA,
2280             OL_CHIKI,
2281             null,
2282             PHONETIC_EXTENSIONS,
2283             PHONETIC_EXTENSIONS_SUPPLEMENT,
2284             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2285             LATIN_EXTENDED_ADDITIONAL,
2286             GREEK_EXTENDED,
2287             GENERAL_PUNCTUATION,
2288             SUPERSCRIPTS_AND_SUBSCRIPTS,
2289             CURRENCY_SYMBOLS,
2290             COMBINING_MARKS_FOR_SYMBOLS,
2291             LETTERLIKE_SYMBOLS,
2292             NUMBER_FORMS,
2293             ARROWS,
2294             MATHEMATICAL_OPERATORS,
2295             MISCELLANEOUS_TECHNICAL,
2296             CONTROL_PICTURES,
2297             OPTICAL_CHARACTER_RECOGNITION,
2298             ENCLOSED_ALPHANUMERICS,
2299             BOX_DRAWING,
2300             BLOCK_ELEMENTS,
2301             GEOMETRIC_SHAPES,
2302             MISCELLANEOUS_SYMBOLS,
2303             DINGBATS,
2304             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2305             SUPPLEMENTAL_ARROWS_A,
2306             BRAILLE_PATTERNS,
2307             SUPPLEMENTAL_ARROWS_B,
2308             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2309             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2310             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2311             GLAGOLITIC,
2312             LATIN_EXTENDED_C,
2313             COPTIC,
2314             GEORGIAN_SUPPLEMENT,
2315             TIFINAGH,
2316             ETHIOPIC_EXTENDED,
2317             CYRILLIC_EXTENDED_A,
2318             SUPPLEMENTAL_PUNCTUATION,
2319             CJK_RADICALS_SUPPLEMENT,
2320             KANGXI_RADICALS,
2321             null,
2322             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2323             CJK_SYMBOLS_AND_PUNCTUATION,
2324             HIRAGANA,
2325             KATAKANA,
2326             BOPOMOFO,
2327             HANGUL_COMPATIBILITY_JAMO,
2328             KANBUN,
2329             BOPOMOFO_EXTENDED,
2330             CJK_STROKES,
2331             KATAKANA_PHONETIC_EXTENSIONS,
2332             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2333             CJK_COMPATIBILITY,
2334             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2335             YIJING_HEXAGRAM_SYMBOLS,
2336             CJK_UNIFIED_IDEOGRAPHS,
2337             YI_SYLLABLES,
2338             YI_RADICALS,
2339             null,
2340             VAI,
2341             CYRILLIC_EXTENDED_B,
2342             null,
2343             MODIFIER_TONE_LETTERS,
2344             LATIN_EXTENDED_D,
2345             SYLOTI_NAGRI,
2346             null,
2347             PHAGS_PA,
2348             SAURASHTRA,
2349             null,
2350             KAYAH_LI,
2351             REJANG,
2352             null,
2353             CHAM,
2354             null,
2355             HANGUL_SYLLABLES,
2356             null,
2357             HIGH_SURROGATES,
2358             HIGH_PRIVATE_USE_SURROGATES,
2359             LOW_SURROGATES,
2360             PRIVATE_USE_AREA,
2361             CJK_COMPATIBILITY_IDEOGRAPHS,
2362             ALPHABETIC_PRESENTATION_FORMS,
2363             ARABIC_PRESENTATION_FORMS_A,
2364             VARIATION_SELECTORS,
2365             VERTICAL_FORMS,
2366             COMBINING_HALF_MARKS,
2367             CJK_COMPATIBILITY_FORMS,
2368             SMALL_FORM_VARIANTS,
2369             ARABIC_PRESENTATION_FORMS_B,
2370             HALFWIDTH_AND_FULLWIDTH_FORMS,
2371             SPECIALS,
2372             LINEAR_B_SYLLABARY,
2373             LINEAR_B_IDEOGRAMS,
2374             AEGEAN_NUMBERS,
2375             ANCIENT_GREEK_NUMBERS,
2376             ANCIENT_SYMBOLS,
2377             PHAISTOS_DISC,
2378             null,
2379             LYCIAN,
2380             CARIAN,
2381             null,
2382             OLD_ITALIC,
2383             GOTHIC,
2384             null,
2385             UGARITIC,
2386             OLD_PERSIAN,
2387             null,
2388             DESERET,
2389             SHAVIAN,
2390             OSMANYA,
2391             null,
2392             CYPRIOT_SYLLABARY,
2393             null,
2394             PHOENICIAN,
2395             LYDIAN,
2396             null,
2397             KHAROSHTHI,
2398             null,
2399             CUNEIFORM,
2400             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2401             null,
2402             BYZANTINE_MUSICAL_SYMBOLS,
2403             MUSICAL_SYMBOLS,
2404             ANCIENT_GREEK_MUSICAL_NOTATION,
2405             null,
2406             TAI_XUAN_JING_SYMBOLS,
2407             COUNTING_ROD_NUMERALS,
2408             null,
2409             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2410             null,
2411             MAHJONG_TILES,
2412             DOMINO_TILES,
2413             null,
2414             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2415             null,
2416             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2417             null,
2418             TAGS,
2419             null,
2420             VARIATION_SELECTORS_SUPPLEMENT,
2421             null,
2422             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2423             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2424         };
2425 
2426 
2427         /**
2428          * Returns the object representing the Unicode block containing the
2429          * given character, or <code>null</code> if the character is not a
2430          * member of a defined block.
2431          *
2432                  * <p><b>Note:</b> This method cannot handle <a
2433                  * href="Character.html#supplementary"> supplementary
2434                  * characters</a>. To support all Unicode characters,
2435                  * including supplementary characters, use the {@link
2436                  * #of(int)} method.
2437          *
2438          * @param   c  The character in question
2439          * @return  The <code>UnicodeBlock</code> instance representing the
2440          *          Unicode block of which this character is a member, or
2441          *          <code>null</code> if the character is not a member of any
2442          *          Unicode block
2443          */
2444         public static UnicodeBlock of(char c) {
2445             return of((int)c);
2446         }
2447 
2448 
2449         /**
2450          * Returns the object representing the Unicode block
2451          * containing the given character (Unicode code point), or
2452          * <code>null</code> if the character is not a member of a
2453          * defined block.
2454          *
2455                  * @param   codePoint the character (Unicode code point) in question.
2456          * @return  The <code>UnicodeBlock</code> instance representing the
2457          *          Unicode block of which this character is a member, or
2458          *          <code>null</code> if the character is not a member of any
2459          *          Unicode block
2460                  * @exception IllegalArgumentException if the specified
2461                  * <code>codePoint</code> is an invalid Unicode code point.
2462                  * @see Character#isValidCodePoint(int)
2463                  * @since   1.5
2464          */
2465         public static UnicodeBlock of(int codePoint) {
2466             if (!isValidCodePoint(codePoint)) {
2467                 throw new IllegalArgumentException();
2468             }
2469 
2470             int top, bottom, current;
2471             bottom = 0;
2472             top = blockStarts.length;
2473             current = top/2;
2474 
2475             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
2476             while (top - bottom > 1) {
2477                 if (codePoint >= blockStarts[current]) {
2478                     bottom = current;
2479                 } else {
2480                     top = current;
2481                 }
2482                 current = (top + bottom) / 2;
2483             }
2484             return blocks[current];
2485         }
2486 
2487         /**
2488          * Returns the UnicodeBlock with the given name. Block
2489          * names are determined by The Unicode Standard. The file
2490          * Blocks-&lt;version&gt;.txt defines blocks for a particular
2491          * version of the standard. The {@link Character} class specifies
2492          * the version of the standard that it supports.
2493          * <p>
2494          * This method accepts block names in the following forms:
2495          * <ol>
2496          * <li> Canonical block names as defined by the Unicode Standard.
2497          * For example, the standard defines a "Basic Latin" block. Therefore, this
2498          * method accepts "Basic Latin" as a valid block name. The documentation of
2499          * each UnicodeBlock provides the canonical name.
2500          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2501          * is a valid block name for the "Basic Latin" block.
2502          * <li>The text representation of each constant UnicodeBlock identifier.
2503          * For example, this method will return the {@link #BASIC_LATIN} block if
2504          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2505          *  hyphens in the canonical name with underscores.
2506          * </ol>
2507          * Finally, character case is ignored for all of the valid block name forms.
2508          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2509          * The en_US locale's case mapping rules are used to provide case-insensitive
2510          * string comparisons for block name validation.
2511          * <p>
2512          * If the Unicode Standard changes block names, both the previous and
2513          * current names will be accepted.
2514          *
2515          * @param blockName A <code>UnicodeBlock</code> name.
2516          * @return The <code>UnicodeBlock</code> instance identified
2517          *         by <code>blockName</code>
2518          * @throws IllegalArgumentException if <code>blockName</code> is an
2519          *         invalid name
2520          * @throws NullPointerException if <code>blockName</code> is null
2521          * @since 1.5
2522          */
2523         public static final UnicodeBlock forName(String blockName) {
2524             UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2525             if (block == null) {
2526                 throw new IllegalArgumentException();
2527             }
2528             return block;
2529         }
2530     }
2531 
2532 
2533     /**
2534      * The value of the <code>Character</code>.
2535      *
2536      * @serial
2537      */
2538     private final char value;
2539 
2540     /** use serialVersionUID from JDK 1.0.2 for interoperability */
2541     private static final long serialVersionUID = 3786198910865385080L;
2542 
2543     /**
2544      * Constructs a newly allocated <code>Character</code> object that
2545      * represents the specified <code>char</code> value.
2546      *
2547      * @param  value   the value to be represented by the
2548      *                  <code>Character</code> object.
2549      */
2550     public Character(char value) {
2551         this.value = value;
2552     }
2553 
2554     private static class CharacterCache {
2555         private CharacterCache(){}
2556 
2557         static final Character cache[] = new Character[127 + 1];
2558 
2559         static {
2560             for(int i = 0; i < cache.length; i++)
2561                 cache[i] = new Character((char)i);
2562         }
2563     }
2564 
2565     /**
2566      * Returns a <tt>Character</tt> instance representing the specified
2567      * <tt>char</tt> value.
2568      * If a new <tt>Character</tt> instance is not required, this method
2569      * should generally be used in preference to the constructor
2570      * {@link #Character(char)}, as this method is likely to yield
2571      * significantly better space and time performance by caching
2572      * frequently requested values.
2573      *
2574      * @param  c a char value.
2575      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2576      * @since  1.5
2577      */
2578     public static Character valueOf(char c) {
2579         if(c <= 127) { // must cache
2580             return CharacterCache.cache[(int)c];
2581         }
2582         return new Character(c);
2583     }
2584 
2585     /**
2586      * Returns the value of this <code>Character</code> object.
2587      * @return  the primitive <code>char</code> value represented by
2588      *          this object.
2589      */
2590     public char charValue() {
2591         return value;
2592     }
2593 
2594     /**
2595      * Returns a hash code for this <code>Character</code>.
2596      * @return  a hash code value for this object.
2597      */
2598     public int hashCode() {
2599         return (int)value;
2600     }
2601 
2602     /**
2603      * Compares this object against the specified object.
2604      * The result is <code>true</code> if and only if the argument is not
2605      * <code>null</code> and is a <code>Character</code> object that
2606      * represents the same <code>char</code> value as this object.
2607      *
2608      * @param   obj   the object to compare with.
2609      * @return  <code>true</code> if the objects are the same;
2610      *          <code>false</code> otherwise.
2611      */
2612     public boolean equals(Object obj) {
2613         if (obj instanceof Character) {
2614             return value == ((Character)obj).charValue();
2615         }
2616         return false;
2617     }
2618 
2619     /**
2620      * Returns a <code>String</code> object representing this
2621      * <code>Character</code>'s value.  The result is a string of
2622      * length 1 whose sole component is the primitive
2623      * <code>char</code> value represented by this
2624      * <code>Character</code> object.
2625      *
2626      * @return  a string representation of this object.
2627      */
2628     public String toString() {
2629         char buf[] = {value};
2630         return String.valueOf(buf);
2631     }
2632 
2633     /**
2634      * Returns a <code>String</code> object representing the
2635      * specified <code>char</code>.  The result is a string of length
2636      * 1 consisting solely of the specified <code>char</code>.
2637      *
2638      * @param c the <code>char</code> to be converted
2639      * @return the string representation of the specified <code>char</code>
2640      * @since 1.4
2641      */
2642     public static String toString(char c) {
2643         return String.valueOf(c);
2644     }
2645 
2646     /**
2647      * Determines whether the specified code point is a valid Unicode
2648      * code point value in the range of <code>0x0000</code> to
2649      * <code>0x10FFFF</code> inclusive. This method is equivalent to
2650      * the expression:
2651      *
2652      * <blockquote><pre>
2653      * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2654      * </pre></blockquote>
2655      *
2656      * @param  codePoint the Unicode code point to be tested
2657      * @return <code>true</code> if the specified code point value
2658      * is a valid code point value;
2659      * <code>false</code> otherwise.
2660      * @since  1.5
2661      */
2662     public static boolean isValidCodePoint(int codePoint) {
2663         return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2664     }
2665 
2666     /**
2667      * Determines whether the specified character (Unicode code point)
2668      * is in the supplementary character range. The method call is
2669      * equivalent to the expression:
2670      * <blockquote><pre>
2671      * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2672      * </pre></blockquote>
2673      *
2674      * @param  codePoint the character (Unicode code point) to be tested
2675      * @return <code>true</code> if the specified character is in the Unicode
2676      *         supplementary character range; <code>false</code> otherwise.
2677      * @since  1.5
2678      */
2679     public static boolean isSupplementaryCodePoint(int codePoint) {
2680         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2681             && codePoint <= MAX_CODE_POINT;
2682     }
2683 
2684     /**
2685      * Determines if the given <code>char</code> value is a
2686      * high-surrogate code unit (also known as <i>leading-surrogate
2687      * code unit</i>). Such values do not represent characters by
2688      * themselves, but are used in the representation of <a
2689      * href="#supplementary">supplementary characters</a> in the
2690      * UTF-16 encoding.
2691      *
2692      * <p>This method returns <code>true</code> if and only if
2693      * <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF'
2694      * </pre></blockquote>
2695      * is <code>true</code>.
2696      *
2697      * @param   ch   the <code>char</code> value to be tested.
2698      * @return  <code>true</code> if the <code>char</code> value
2699      *          is between '&#92;uD800' and '&#92;uDBFF' inclusive;
2700      *          <code>false</code> otherwise.
2701      * @see     java.lang.Character#isLowSurrogate(char)
2702      * @see     Character.UnicodeBlock#of(int)
2703      * @since   1.5
2704      */
2705     public static boolean isHighSurrogate(char ch) {
2706         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2707     }
2708 
2709     /**
2710      * Determines if the given <code>char</code> value is a
2711      * low-surrogate code unit (also known as <i>trailing-surrogate code
2712      * unit</i>). Such values do not represent characters by themselves,
2713      * but are used in the representation of <a
2714      * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2715      *
2716      * <p> This method returns <code>true</code> if and only if
2717      * <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF'
2718      * </pre></blockquote> is <code>true</code>.
2719      *
2720      * @param   ch   the <code>char</code> value to be tested.
2721      * @return  <code>true</code> if the <code>char</code> value
2722      *          is between '&#92;uDC00' and '&#92;uDFFF' inclusive;
2723      *          <code>false</code> otherwise.
2724      * @see java.lang.Character#isHighSurrogate(char)
2725      * @since   1.5
2726      */
2727     public static boolean isLowSurrogate(char ch) {
2728         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2729     }
2730 
2731     /**
2732      * Determines whether the specified pair of <code>char</code>
2733      * values is a valid surrogate pair. This method is equivalent to
2734      * the expression:
2735      * <blockquote><pre>
2736      * isHighSurrogate(high) && isLowSurrogate(low)
2737      * </pre></blockquote>
2738      *
2739      * @param  high the high-surrogate code value to be tested
2740      * @param  low the low-surrogate code value to be tested
2741      * @return <code>true</code> if the specified high and
2742      * low-surrogate code values represent a valid surrogate pair;
2743      * <code>false</code> otherwise.
2744      * @since  1.5
2745      */
2746     public static boolean isSurrogatePair(char high, char low) {
2747         return isHighSurrogate(high) && isLowSurrogate(low);
2748     }
2749 
2750     /**
2751      * Determines the number of <code>char</code> values needed to
2752      * represent the specified character (Unicode code point). If the
2753      * specified character is equal to or greater than 0x10000, then
2754      * the method returns 2. Otherwise, the method returns 1.
2755      *
2756      * <p>This method doesn't validate the specified character to be a
2757      * valid Unicode code point. The caller must validate the
2758      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2759      * if necessary.
2760      *
2761      * @param   codePoint the character (Unicode code point) to be tested.
2762      * @return  2 if the character is a valid supplementary character; 1 otherwise.
2763      * @see     #isSupplementaryCodePoint(int)
2764      * @since   1.5
2765      */
2766     public static int charCount(int codePoint) {
2767         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2768     }
2769 
2770     /**
2771      * Converts the specified surrogate pair to its supplementary code
2772      * point value. This method does not validate the specified
2773      * surrogate pair. The caller must validate it using {@link
2774      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2775      *
2776      * @param  high the high-surrogate code unit
2777      * @param  low the low-surrogate code unit
2778      * @return the supplementary code point composed from the
2779      *         specified surrogate pair.
2780      * @since  1.5
2781      */
2782     public static int toCodePoint(char high, char low) {
2783         return ((high - MIN_HIGH_SURROGATE) << 10)
2784             + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
2785     }
2786 
2787     /**
2788      * Returns the code point at the given index of the
2789      * <code>CharSequence</code>. If the <code>char</code> value at
2790      * the given index in the <code>CharSequence</code> is in the
2791      * high-surrogate range, the following index is less than the
2792      * length of the <code>CharSequence</code>, and the
2793      * <code>char</code> value at the following index is in the
2794      * low-surrogate range, then the supplementary code point
2795      * corresponding to this surrogate pair is returned. Otherwise,
2796      * the <code>char</code> value at the given index is returned.
2797      *
2798      * @param seq a sequence of <code>char</code> values (Unicode code
2799      * units)
2800      * @param index the index to the <code>char</code> values (Unicode
2801      * code units) in <code>seq</code> to be converted
2802      * @return the Unicode code point at the given index
2803      * @exception NullPointerException if <code>seq</code> is null.
2804      * @exception IndexOutOfBoundsException if the value
2805      * <code>index</code> is negative or not less than
2806      * {@link CharSequence#length() seq.length()}.
2807      * @since  1.5
2808      */
2809     public static int codePointAt(CharSequence seq, int index) {
2810         char c1 = seq.charAt(index++);
2811         if (isHighSurrogate(c1)) {
2812             if (index < seq.length()) {
2813                 char c2 = seq.charAt(index);
2814                 if (isLowSurrogate(c2)) {
2815                     return toCodePoint(c1, c2);
2816                 }
2817             }
2818         }
2819         return c1;
2820     }
2821 
2822     /**
2823      * Returns the code point at the given index of the
2824      * <code>char</code> array. If the <code>char</code> value at
2825      * the given index in the <code>char</code> array is in the
2826      * high-surrogate range, the following index is less than the
2827      * length of the <code>char</code> array, and the
2828      * <code>char</code> value at the following index is in the
2829      * low-surrogate range, then the supplementary code point
2830      * corresponding to this surrogate pair is returned. Otherwise,
2831      * the <code>char</code> value at the given index is returned.
2832      *
2833      * @param a the <code>char</code> array
2834      * @param index the index to the <code>char</code> values (Unicode
2835      * code units) in the <code>char</code> array to be converted
2836      * @return the Unicode code point at the given index
2837      * @exception NullPointerException if <code>a</code> is null.
2838      * @exception IndexOutOfBoundsException if the value
2839      * <code>index</code> is negative or not less than
2840      * the length of the <code>char</code> array.
2841      * @since  1.5
2842      */
2843     public static int codePointAt(char[] a, int index) {
2844         return codePointAtImpl(a, index, a.length);
2845     }
2846 
2847     /**
2848      * Returns the code point at the given index of the
2849      * <code>char</code> array, where only array elements with
2850      * <code>index</code> less than <code>limit</code> can be used. If
2851      * the <code>char</code> value at the given index in the
2852      * <code>char</code> array is in the high-surrogate range, the
2853      * following index is less than the <code>limit</code>, and the
2854      * <code>char</code> value at the following index is in the
2855      * low-surrogate range, then the supplementary code point
2856      * corresponding to this surrogate pair is returned. Otherwise,
2857      * the <code>char</code> value at the given index is returned.
2858      *
2859      * @param a the <code>char</code> array
2860      * @param index the index to the <code>char</code> values (Unicode
2861      * code units) in the <code>char</code> array to be converted
2862      * @param limit the index after the last array element that can be used in the
2863      * <code>char</code> array
2864      * @return the Unicode code point at the given index
2865      * @exception NullPointerException if <code>a</code> is null.
2866      * @exception IndexOutOfBoundsException if the <code>index</code>
2867      * argument is negative or not less than the <code>limit</code>
2868      * argument, or if the <code>limit</code> argument is negative or
2869      * greater than the length of the <code>char</code> array.
2870      * @since  1.5
2871      */
2872     public static int codePointAt(char[] a, int index, int limit) {
2873         if (index >= limit || limit < 0 || limit > a.length) {
2874             throw new IndexOutOfBoundsException();
2875         }
2876         return codePointAtImpl(a, index, limit);
2877     }
2878 
2879     static int codePointAtImpl(char[] a, int index, int limit) {
2880         char c1 = a[index++];
2881         if (isHighSurrogate(c1)) {
2882             if (index < limit) {
2883                 char c2 = a[index];
2884                 if (isLowSurrogate(c2)) {
2885                     return toCodePoint(c1, c2);
2886                 }
2887             }
2888         }
2889         return c1;
2890     }
2891 
2892     /**
2893      * Returns the code point preceding the given index of the
2894      * <code>CharSequence</code>. If the <code>char</code> value at
2895      * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2896      * the low-surrogate range, <code>(index - 2)</code> is not
2897      * negative, and the <code>char</code> value at <code>(index -
2898      * 2)</code> in the <code>CharSequence</code> is in the
2899      * high-surrogate range, then the supplementary code point
2900      * corresponding to this surrogate pair is returned. Otherwise,
2901      * the <code>char</code> value at <code>(index - 1)</code> is
2902      * returned.
2903      *
2904      * @param seq the <code>CharSequence</code> instance
2905      * @param index the index following the code point that should be returned
2906      * @return the Unicode code point value before the given index.
2907      * @exception NullPointerException if <code>seq</code> is null.
2908      * @exception IndexOutOfBoundsException if the <code>index</code>
2909      * argument is less than 1 or greater than {@link
2910      * CharSequence#length() seq.length()}.
2911      * @since  1.5
2912      */
2913     public static int codePointBefore(CharSequence seq, int index) {
2914         char c2 = seq.charAt(--index);
2915         if (isLowSurrogate(c2)) {
2916             if (index > 0) {
2917                 char c1 = seq.charAt(--index);
2918                 if (isHighSurrogate(c1)) {
2919                     return toCodePoint(c1, c2);
2920                 }
2921             }
2922         }
2923         return c2;
2924     }
2925 
2926     /**
2927      * Returns the code point preceding the given index of the
2928      * <code>char</code> array. If the <code>char</code> value at
2929      * <code>(index - 1)</code> in the <code>char</code> array is in
2930      * the low-surrogate range, <code>(index - 2)</code> is not
2931      * negative, and the <code>char</code> value at <code>(index -
2932      * 2)</code> in the <code>char</code> array is in the
2933      * high-surrogate range, then the supplementary code point
2934      * corresponding to this surrogate pair is returned. Otherwise,
2935      * the <code>char</code> value at <code>(index - 1)</code> is
2936      * returned.
2937      *
2938      * @param a the <code>char</code> array
2939      * @param index the index following the code point that should be returned
2940      * @return the Unicode code point value before the given index.
2941      * @exception NullPointerException if <code>a</code> is null.
2942      * @exception IndexOutOfBoundsException if the <code>index</code>
2943      * argument is less than 1 or greater than the length of the
2944      * <code>char</code> array
2945      * @since  1.5
2946      */
2947     public static int codePointBefore(char[] a, int index) {
2948         return codePointBeforeImpl(a, index, 0);
2949     }
2950 
2951     /**
2952      * Returns the code point preceding the given index of the
2953      * <code>char</code> array, where only array elements with
2954      * <code>index</code> greater than or equal to <code>start</code>
2955      * can be used. If the <code>char</code> value at <code>(index -
2956      * 1)</code> in the <code>char</code> array is in the
2957      * low-surrogate range, <code>(index - 2)</code> is not less than
2958      * <code>start</code>, and the <code>char</code> value at
2959      * <code>(index - 2)</code> in the <code>char</code> array is in
2960      * the high-surrogate range, then the supplementary code point
2961      * corresponding to this surrogate pair is returned. Otherwise,
2962      * the <code>char</code> value at <code>(index - 1)</code> is
2963      * returned.
2964      *
2965      * @param a the <code>char</code> array
2966      * @param index the index following the code point that should be returned
2967      * @param start the index of the first array element in the
2968      * <code>char</code> array
2969      * @return the Unicode code point value before the given index.
2970      * @exception NullPointerException if <code>a</code> is null.
2971      * @exception IndexOutOfBoundsException if the <code>index</code>
2972      * argument is not greater than the <code>start</code> argument or
2973      * is greater than the length of the <code>char</code> array, or
2974      * if the <code>start</code> argument is negative or not less than
2975      * the length of the <code>char</code> array.
2976      * @since  1.5
2977      */
2978     public static int codePointBefore(char[] a, int index, int start) {
2979         if (index <= start || start < 0 || start >= a.length) {
2980             throw new IndexOutOfBoundsException();
2981         }
2982         return codePointBeforeImpl(a, index, start);
2983     }
2984 
2985     static int codePointBeforeImpl(char[] a, int index, int start) {
2986         char c2 = a[--index];
2987         if (isLowSurrogate(c2)) {
2988             if (index > start) {
2989                 char c1 = a[--index];
2990                 if (isHighSurrogate(c1)) {
2991                     return toCodePoint(c1, c2);
2992                 }
2993             }
2994         }
2995         return c2;
2996     }
2997 
2998     /**
2999      * Converts the specified character (Unicode code point) to its
3000      * UTF-16 representation. If the specified code point is a BMP
3001      * (Basic Multilingual Plane or Plane 0) value, the same value is
3002      * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
3003      * specified code point is a supplementary character, its
3004      * surrogate values are stored in <code>dst[dstIndex]</code>
3005      * (high-surrogate) and <code>dst[dstIndex+1]</code>
3006      * (low-surrogate), and 2 is returned.
3007      *
3008      * @param  codePoint the character (Unicode code point) to be converted.
3009      * @param  dst an array of <code>char</code> in which the
3010      * <code>codePoint</code>'s UTF-16 value is stored.
3011      * @param dstIndex the start index into the <code>dst</code>
3012      * array where the converted value is stored.
3013      * @return 1 if the code point is a BMP code point, 2 if the
3014      * code point is a supplementary code point.
3015      * @exception IllegalArgumentException if the specified
3016      * <code>codePoint</code> is not a valid Unicode code point.
3017      * @exception NullPointerException if the specified <code>dst</code> is null.
3018      * @exception IndexOutOfBoundsException if <code>dstIndex</code>
3019      * is negative or not less than <code>dst.length</code>, or if
3020      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
3021      * array element(s) to store the resulting <code>char</code>
3022      * value(s). (If <code>dstIndex</code> is equal to
3023      * <code>dst.length-1</code> and the specified
3024      * <code>codePoint</code> is a supplementary character, the
3025      * high-surrogate value is not stored in
3026      * <code>dst[dstIndex]</code>.)
3027      * @since  1.5
3028      */
3029     public static int toChars(int codePoint, char[] dst, int dstIndex) {
3030         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3031             throw new IllegalArgumentException();
3032         }
3033         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3034             dst[dstIndex] = (char) codePoint;
3035             return 1;
3036         }
3037         toSurrogates(codePoint, dst, dstIndex);
3038         return 2;
3039     }
3040 
3041     /**
3042      * Converts the specified character (Unicode code point) to its
3043      * UTF-16 representation stored in a <code>char</code> array. If
3044      * the specified code point is a BMP (Basic Multilingual Plane or
3045      * Plane 0) value, the resulting <code>char</code> array has
3046      * the same value as <code>codePoint</code>. If the specified code
3047      * point is a supplementary code point, the resulting
3048      * <code>char</code> array has the corresponding surrogate pair.
3049      *
3050      * @param  codePoint a Unicode code point
3051      * @return a <code>char</code> array having
3052      *         <code>codePoint</code>'s UTF-16 representation.
3053      * @exception IllegalArgumentException if the specified
3054      * <code>codePoint</code> is not a valid Unicode code point.
3055      * @since  1.5
3056      */
3057     public static char[] toChars(int codePoint) {
3058         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3059             throw new IllegalArgumentException();
3060         }
3061         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3062                 return new char[] { (char) codePoint };
3063         }
3064         char[] result = new char[2];
3065         toSurrogates(codePoint, result, 0);
3066         return result;
3067     }
3068 
3069     static void toSurrogates(int codePoint, char[] dst, int index) {
3070         int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
3071         dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
3072         dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
3073     }
3074 
3075     /**
3076      * Returns the number of Unicode code points in the text range of
3077      * the specified char sequence. The text range begins at the
3078      * specified <code>beginIndex</code> and extends to the
3079      * <code>char</code> at index <code>endIndex - 1</code>. Thus the
3080      * length (in <code>char</code>s) of the text range is
3081      * <code>endIndex-beginIndex</code>. Unpaired surrogates within
3082      * the text range count as one code point each.
3083      *
3084      * @param seq the char sequence
3085      * @param beginIndex the index to the first <code>char</code> of
3086      * the text range.
3087      * @param endIndex the index after the last <code>char</code> of
3088      * the text range.
3089      * @return the number of Unicode code points in the specified text
3090      * range
3091      * @exception NullPointerException if <code>seq</code> is null.
3092      * @exception IndexOutOfBoundsException if the
3093      * <code>beginIndex</code> is negative, or <code>endIndex</code>
3094      * is larger than the length of the given sequence, or
3095      * <code>beginIndex</code> is larger than <code>endIndex</code>.
3096      * @since  1.5
3097      */
3098     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
3099         int length = seq.length();
3100         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
3101             throw new IndexOutOfBoundsException();
3102         }
3103         int n = 0;
3104         for (int i = beginIndex; i < endIndex; ) {
3105             n++;
3106             if (isHighSurrogate(seq.charAt(i++))) {
3107                 if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
3108                     i++;
3109                 }
3110             }
3111         }
3112         return n;
3113     }
3114 
3115     /**
3116      * Returns the number of Unicode code points in a subarray of the
3117      * <code>char</code> array argument. The <code>offset</code>
3118      * argument is the index of the first <code>char</code> of the
3119      * subarray and the <code>count</code> argument specifies the
3120      * length of the subarray in <code>char</code>s. Unpaired
3121      * surrogates within the subarray count as one code point each.
3122      *
3123      * @param a the <code>char</code> array
3124      * @param offset the index of the first <code>char</code> in the
3125      * given <code>char</code> array
3126      * @param count the length of the subarray in <code>char</code>s
3127      * @return the number of Unicode code points in the specified subarray
3128      * @exception NullPointerException if <code>a</code> is null.
3129      * @exception IndexOutOfBoundsException if <code>offset</code> or
3130      * <code>count</code> is negative, or if <code>offset +
3131      * count</code> is larger than the length of the given array.
3132      * @since  1.5
3133      */
3134     public static int codePointCount(char[] a, int offset, int count) {
3135         if (count > a.length - offset || offset < 0 || count < 0) {
3136             throw new IndexOutOfBoundsException();
3137         }
3138         return codePointCountImpl(a, offset, count);
3139     }
3140 
3141     static int codePointCountImpl(char[] a, int offset, int count) {
3142         int endIndex = offset + count;
3143         int n = 0;
3144         for (int i = offset; i < endIndex; ) {
3145             n++;
3146             if (isHighSurrogate(a[i++])) {
3147                 if (i < endIndex && isLowSurrogate(a[i])) {
3148                     i++;
3149                 }
3150             }
3151         }
3152         return n;
3153     }
3154 
3155     /**
3156      * Returns the index within the given char sequence that is offset
3157      * from the given <code>index</code> by <code>codePointOffset</code>
3158      * code points. Unpaired surrogates within the text range given by
3159      * <code>index</code> and <code>codePointOffset</code> count as
3160      * one code point each.
3161      *
3162      * @param seq the char sequence
3163      * @param index the index to be offset
3164      * @param codePointOffset the offset in code points
3165      * @return the index within the char sequence
3166      * @exception NullPointerException if <code>seq</code> is null.
3167      * @exception IndexOutOfBoundsException if <code>index</code>
3168      *   is negative or larger then the length of the char sequence,
3169      *   or if <code>codePointOffset</code> is positive and the
3170      *   subsequence starting with <code>index</code> has fewer than
3171      *   <code>codePointOffset</code> code points, or if
3172      *   <code>codePointOffset</code> is negative and the subsequence
3173      *   before <code>index</code> has fewer than the absolute value
3174      *   of <code>codePointOffset</code> code points.
3175      * @since 1.5
3176      */
3177     public static int offsetByCodePoints(CharSequence seq, int index,
3178                                          int codePointOffset) {
3179         int length = seq.length();
3180         if (index < 0 || index > length) {
3181             throw new IndexOutOfBoundsException();
3182         }
3183 
3184         int x = index;
3185         if (codePointOffset >= 0) {
3186             int i;
3187             for (i = 0; x < length && i < codePointOffset; i++) {
3188                 if (isHighSurrogate(seq.charAt(x++))) {
3189                     if (x < length && isLowSurrogate(seq.charAt(x))) {
3190                         x++;
3191                     }
3192                 }
3193             }
3194             if (i < codePointOffset) {
3195                 throw new IndexOutOfBoundsException();
3196             }
3197         } else {
3198             int i;
3199             for (i = codePointOffset; x > 0 && i < 0; i++) {
3200                 if (isLowSurrogate(seq.charAt(--x))) {
3201                     if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
3202                         x--;
3203                     }
3204                 }
3205             }
3206             if (i < 0) {
3207                 throw new IndexOutOfBoundsException();
3208             }
3209         }
3210         return x;
3211     }
3212 
3213     /**
3214      * Returns the index within the given <code>char</code> subarray
3215      * that is offset from the given <code>index</code> by
3216      * <code>codePointOffset</code> code points. The
3217      * <code>start</code> and <code>count</code> arguments specify a
3218      * subarray of the <code>char</code> array. Unpaired surrogates
3219      * within the text range given by <code>index</code> and
3220      * <code>codePointOffset</code> count as one code point each.
3221      *
3222      * @param a the <code>char</code> array
3223      * @param start the index of the first <code>char</code> of the
3224      * subarray
3225      * @param count the length of the subarray in <code>char</code>s
3226      * @param index the index to be offset
3227      * @param codePointOffset the offset in code points
3228      * @return the index within the subarray
3229      * @exception NullPointerException if <code>a</code> is null.
3230      * @exception IndexOutOfBoundsException
3231      *   if <code>start</code> or <code>count</code> is negative,
3232      *   or if <code>start + count</code> is larger than the length of
3233      *   the given array,
3234      *   or if <code>index</code> is less than <code>start</code> or
3235      *   larger then <code>start + count</code>,
3236      *   or if <code>codePointOffset</code> is positive and the text range
3237      *   starting with <code>index</code> and ending with <code>start
3238      *   + count - 1</code> has fewer than <code>codePointOffset</code> code
3239      *   points,
3240      *   or if <code>codePointOffset</code> is negative and the text range
3241      *   starting with <code>start</code> and ending with <code>index
3242      *   - 1</code> has fewer than the absolute value of
3243      *   <code>codePointOffset</code> code points.
3244      * @since 1.5
3245      */
3246     public static int offsetByCodePoints(char[] a, int start, int count,
3247                                          int index, int codePointOffset) {
3248         if (count > a.length-start || start < 0 || count < 0
3249             || index < start || index > start+count) {
3250             throw new IndexOutOfBoundsException();
3251         }
3252         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
3253     }
3254 
3255     static int offsetByCodePointsImpl(char[]a, int start, int count,
3256                                       int index, int codePointOffset) {
3257         int x = index;
3258         if (codePointOffset >= 0) {
3259             int limit = start + count;
3260             int i;
3261             for (i = 0; x < limit && i < codePointOffset; i++) {
3262                 if (isHighSurrogate(a[x++])) {
3263                     if (x < limit && isLowSurrogate(a[x])) {
3264                         x++;
3265                     }
3266                 }
3267             }
3268             if (i < codePointOffset) {
3269                 throw new IndexOutOfBoundsException();
3270             }
3271         } else {
3272             int i;
3273             for (i = codePointOffset; x > start && i < 0; i++) {
3274                 if (isLowSurrogate(a[--x])) {
3275                     if (x > start && isHighSurrogate(a[x-1])) {
3276                         x--;
3277                     }
3278                 }
3279             }
3280             if (i < 0) {
3281                 throw new IndexOutOfBoundsException();
3282             }
3283         }
3284         return x;
3285     }
3286 
3287    /**
3288      * Determines if the specified character is a lowercase character.
3289      * <p>
3290      * A character is lowercase if its general category type, provided
3291      * by <code>Character.getType(ch)</code>, is
3292      * <code>LOWERCASE_LETTER</code>.
3293      * <p>
3294      * The following are examples of lowercase characters:
3295      * <p><blockquote><pre>
3296      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3297      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3298      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3299      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3300      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3301      * </pre></blockquote>
3302      * <p> Many other Unicode characters are lowercase too.
3303      *
3304      * <p><b>Note:</b> This method cannot handle <a
3305      * href="#supplementary"> supplementary characters</a>. To support
3306      * all Unicode characters, including supplementary characters, use
3307      * the {@link #isLowerCase(int)} method.
3308      *
3309      * @param   ch   the character to be tested.
3310      * @return  <code>true</code> if the character is lowercase;
3311      *          <code>false</code> otherwise.
3312      * @see     java.lang.Character#isLowerCase(char)
3313      * @see     java.lang.Character#isTitleCase(char)
3314      * @see     java.lang.Character#toLowerCase(char)
3315      * @see     java.lang.Character#getType(char)
3316      */
3317     public static boolean isLowerCase(char ch) {
3318         return isLowerCase((int)ch);
3319     }
3320 
3321     /**
3322      * Determines if the specified character (Unicode code point) is a
3323      * lowercase character.
3324      * <p>
3325      * A character is lowercase if its general category type, provided
3326      * by {@link Character#getType getType(codePoint)}, is
3327      * <code>LOWERCASE_LETTER</code>.
3328      * <p>
3329      * The following are examples of lowercase characters:
3330      * <p><blockquote><pre>
3331      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3332      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3333      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3334      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3335      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3336      * </pre></blockquote>
3337      * <p> Many other Unicode characters are lowercase too.
3338      *
3339      * @param   codePoint the character (Unicode code point) to be tested.
3340      * @return  <code>true</code> if the character is lowercase;
3341      *          <code>false</code> otherwise.
3342      * @see     java.lang.Character#isLowerCase(int)
3343      * @see     java.lang.Character#isTitleCase(int)
3344      * @see     java.lang.Character#toLowerCase(int)
3345      * @see     java.lang.Character#getType(int)
3346      * @since   1.5
3347      */
3348     public static boolean isLowerCase(int codePoint) {
3349         return getType(codePoint) == Character.LOWERCASE_LETTER;
3350     }
3351 
3352    /**
3353      * Determines if the specified character is an uppercase character.
3354      * <p>
3355      * A character is uppercase if its general category type, provided by
3356      * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
3357      * <p>
3358      * The following are examples of uppercase characters:
3359      * <p><blockquote><pre>
3360      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3361      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3362      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3363      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3364      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3365      * </pre></blockquote>
3366      * <p> Many other Unicode characters are uppercase too.<p>
3367      *
3368      * <p><b>Note:</b> This method cannot handle <a
3369      * href="#supplementary"> supplementary characters</a>. To support
3370      * all Unicode characters, including supplementary characters, use
3371      * the {@link #isUpperCase(int)} method.
3372      *
3373      * @param   ch   the character to be tested.
3374      * @return  <code>true</code> if the character is uppercase;
3375      *          <code>false</code> otherwise.
3376      * @see     java.lang.Character#isLowerCase(char)
3377      * @see     java.lang.Character#isTitleCase(char)
3378      * @see     java.lang.Character#toUpperCase(char)
3379      * @see     java.lang.Character#getType(char)
3380      * @since   1.0
3381      */
3382     public static boolean isUpperCase(char ch) {
3383         return isUpperCase((int)ch);
3384     }
3385 
3386     /**
3387      * Determines if the specified character (Unicode code point) is an uppercase character.
3388      * <p>
3389      * A character is uppercase if its general category type, provided by
3390      * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
3391      * <p>
3392      * The following are examples of uppercase characters:
3393      * <p><blockquote><pre>
3394      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3395      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3396      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3397      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3398      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3399      * </pre></blockquote>
3400      * <p> Many other Unicode characters are uppercase too.<p>
3401      *
3402      * @param   codePoint the character (Unicode code point) to be tested.
3403      * @return  <code>true</code> if the character is uppercase;
3404      *          <code>false</code> otherwise.
3405      * @see     java.lang.Character#isLowerCase(int)
3406      * @see     java.lang.Character#isTitleCase(int)
3407      * @see     java.lang.Character#toUpperCase(int)
3408      * @see     java.lang.Character#getType(int)
3409      * @since   1.5
3410      */
3411     public static boolean isUpperCase(int codePoint) {
3412         return getType(codePoint) == Character.UPPERCASE_LETTER;
3413     }
3414 
3415     /**
3416      * Determines if the specified character is a titlecase character.
3417      * <p>
3418      * A character is a titlecase character if its general
3419      * category type, provided by <code>Character.getType(ch)</code>,
3420      * is <code>TITLECASE_LETTER</code>.
3421      * <p>
3422      * Some characters look like pairs of Latin letters. For example, there
3423      * is an uppercase letter that looks like "LJ" and has a corresponding
3424      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3425      * is the appropriate form to use when rendering a word in lowercase
3426      * with initial capitals, as for a book title.
3427      * <p>
3428      * These are some of the Unicode characters for which this method returns
3429      * <code>true</code>:
3430      * <ul>
3431      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3432      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3433      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3434      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3435      * </ul>
3436      * <p> Many other Unicode characters are titlecase too.<p>
3437      *
3438      * <p><b>Note:</b> This method cannot handle <a
3439      * href="#supplementary"> supplementary characters</a>. To support
3440      * all Unicode characters, including supplementary characters, use
3441      * the {@link #isTitleCase(int)} method.
3442      *
3443      * @param   ch   the character to be tested.
3444      * @return  <code>true</code> if the character is titlecase;
3445      *          <code>false</code> otherwise.
3446      * @see     java.lang.Character#isLowerCase(char)
3447      * @see     java.lang.Character#isUpperCase(char)
3448      * @see     java.lang.Character#toTitleCase(char)
3449      * @see     java.lang.Character#getType(char)
3450      * @since   1.0.2
3451      */
3452     public static boolean isTitleCase(char ch) {
3453         return isTitleCase((int)ch);
3454     }
3455 
3456     /**
3457      * Determines if the specified character (Unicode code point) is a titlecase character.
3458      * <p>
3459      * A character is a titlecase character if its general
3460      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3461      * is <code>TITLECASE_LETTER</code>.
3462      * <p>
3463      * Some characters look like pairs of Latin letters. For example, there
3464      * is an uppercase letter that looks like "LJ" and has a corresponding
3465      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3466      * is the appropriate form to use when rendering a word in lowercase
3467      * with initial capitals, as for a book title.
3468      * <p>
3469      * These are some of the Unicode characters for which this method returns
3470      * <code>true</code>:
3471      * <ul>
3472      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3473      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3474      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3475      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3476      * </ul>
3477      * <p> Many other Unicode characters are titlecase too.<p>
3478      *
3479      * @param   codePoint the character (Unicode code point) to be tested.
3480      * @return  <code>true</code> if the character is titlecase;
3481      *          <code>false</code> otherwise.
3482      * @see     java.lang.Character#isLowerCase(int)
3483      * @see     java.lang.Character#isUpperCase(int)
3484      * @see     java.lang.Character#toTitleCase(int)
3485      * @see     java.lang.Character#getType(int)
3486      * @since   1.5
3487      */
3488     public static boolean isTitleCase(int codePoint) {
3489         return getType(codePoint) == Character.TITLECASE_LETTER;
3490     }
3491 
3492     /**
3493      * Determines if the specified character is a digit.
3494      * <p>
3495      * A character is a digit if its general category type, provided
3496      * by <code>Character.getType(ch)</code>, is
3497      * <code>DECIMAL_DIGIT_NUMBER</code>.
3498      * <p>
3499      * Some Unicode character ranges that contain digits:
3500      * <ul>
3501      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3502      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3503      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3504      *     Arabic-Indic digits
3505      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3506      *     Extended Arabic-Indic digits
3507      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3508      *     Devanagari digits
3509      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3510      *     Fullwidth digits
3511      * </ul>
3512      *
3513      * Many other character ranges contain digits as well.
3514      *
3515      * <p><b>Note:</b> This method cannot handle <a
3516      * href="#supplementary"> supplementary characters</a>. To support
3517      * all Unicode characters, including supplementary characters, use
3518      * the {@link #isDigit(int)} method.
3519      *
3520      * @param   ch   the character to be tested.
3521      * @return  <code>true</code> if the character is a digit;
3522      *          <code>false</code> otherwise.
3523      * @see     java.lang.Character#digit(char, int)
3524      * @see     java.lang.Character#forDigit(int, int)
3525      * @see     java.lang.Character#getType(char)
3526      */
3527     public static boolean isDigit(char ch) {
3528         return isDigit((int)ch);
3529     }
3530 
3531     /**
3532      * Determines if the specified character (Unicode code point) is a digit.
3533      * <p>
3534      * A character is a digit if its general category type, provided
3535      * by {@link Character#getType(int) getType(codePoint)}, is
3536      * <code>DECIMAL_DIGIT_NUMBER</code>.
3537      * <p>
3538      * Some Unicode character ranges that contain digits:
3539      * <ul>
3540      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3541      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3542      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3543      *     Arabic-Indic digits
3544      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3545      *     Extended Arabic-Indic digits
3546      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3547      *     Devanagari digits
3548      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3549      *     Fullwidth digits
3550      * </ul>
3551      *
3552      * Many other character ranges contain digits as well.
3553      *
3554      * @param   codePoint the character (Unicode code point) to be tested.
3555      * @return  <code>true</code> if the character is a digit;
3556      *          <code>false</code> otherwise.
3557      * @see     java.lang.Character#forDigit(int, int)
3558      * @see     java.lang.Character#getType(int)
3559      * @since   1.5
3560      */
3561     public static boolean isDigit(int codePoint) {
3562         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3563     }
3564 
3565     /**
3566      * Determines if a character is defined in Unicode.
3567      * <p>
3568      * A character is defined if at least one of the following is true:
3569      * <ul>
3570      * <li>It has an entry in the UnicodeData file.
3571      * <li>It has a value in a range defined by the UnicodeData file.
3572      * </ul>
3573      *
3574      * <p><b>Note:</b> This method cannot handle <a
3575      * href="#supplementary"> supplementary characters</a>. To support
3576      * all Unicode characters, including supplementary characters, use
3577      * the {@link #isDefined(int)} method.
3578      *
3579      * @param   ch   the character to be tested
3580      * @return  <code>true</code> if the character has a defined meaning
3581      *          in Unicode; <code>false</code> otherwise.
3582      * @see     java.lang.Character#isDigit(char)
3583      * @see     java.lang.Character#isLetter(char)
3584      * @see     java.lang.Character#isLetterOrDigit(char)
3585      * @see     java.lang.Character#isLowerCase(char)
3586      * @see     java.lang.Character#isTitleCase(char)
3587      * @see     java.lang.Character#isUpperCase(char)
3588      * @since   1.0.2
3589      */
3590     public static boolean isDefined(char ch) {
3591         return isDefined((int)ch);
3592     }
3593 
3594     /**
3595      * Determines if a character (Unicode code point) is defined in Unicode.
3596      * <p>
3597      * A character is defined if at least one of the following is true:
3598      * <ul>
3599      * <li>It has an entry in the UnicodeData file.
3600      * <li>It has a value in a range defined by the UnicodeData file.
3601      * </ul>
3602      *
3603      * @param   codePoint the character (Unicode code point) to be tested.
3604      * @return  <code>true</code> if the character has a defined meaning
3605      *          in Unicode; <code>false</code> otherwise.
3606      * @see     java.lang.Character#isDigit(int)
3607      * @see     java.lang.Character#isLetter(int)
3608      * @see     java.lang.Character#isLetterOrDigit(int)
3609      * @see     java.lang.Character#isLowerCase(int)
3610      * @see     java.lang.Character#isTitleCase(int)
3611      * @see     java.lang.Character#isUpperCase(int)
3612      * @since   1.5
3613      */
3614     public static boolean isDefined(int codePoint) {
3615         return getType(codePoint) != Character.UNASSIGNED;
3616     }
3617 
3618     /**
3619      * Determines if the specified character is a letter.
3620      * <p>
3621      * A character is considered to be a letter if its general
3622      * category type, provided by <code>Character.getType(ch)</code>,
3623      * is any of the following:
3624      * <ul>
3625      * <li> <code>UPPERCASE_LETTER</code>
3626      * <li> <code>LOWERCASE_LETTER</code>
3627      * <li> <code>TITLECASE_LETTER</code>
3628      * <li> <code>MODIFIER_LETTER</code>
3629      * <li> <code>OTHER_LETTER</code>
3630      * </ul>
3631      *
3632      * Not all letters have case. Many characters are
3633      * letters but are neither uppercase nor lowercase nor titlecase.
3634      *
3635      * <p><b>Note:</b> This method cannot handle <a
3636      * href="#supplementary"> supplementary characters</a>. To support
3637      * all Unicode characters, including supplementary characters, use
3638      * the {@link #isLetter(int)} method.
3639      *
3640      * @param   ch   the character to be tested.
3641      * @return  <code>true</code> if the character is a letter;
3642      *          <code>false</code> otherwise.
3643      * @see     java.lang.Character#isDigit(char)
3644      * @see     java.lang.Character#isJavaIdentifierStart(char)
3645      * @see     java.lang.Character#isJavaLetter(char)
3646      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3647      * @see     java.lang.Character#isLetterOrDigit(char)
3648      * @see     java.lang.Character#isLowerCase(char)
3649      * @see     java.lang.Character#isTitleCase(char)
3650      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3651      * @see     java.lang.Character#isUpperCase(char)
3652      */
3653     public static boolean isLetter(char ch) {
3654         return isLetter((int)ch);
3655     }
3656 
3657     /**
3658      * Determines if the specified character (Unicode code point) is a letter.
3659      * <p>
3660      * A character is considered to be a letter if its general
3661      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3662      * is any of the following:
3663      * <ul>
3664      * <li> <code>UPPERCASE_LETTER</code>
3665      * <li> <code>LOWERCASE_LETTER</code>
3666      * <li> <code>TITLECASE_LETTER</code>
3667      * <li> <code>MODIFIER_LETTER</code>
3668      * <li> <code>OTHER_LETTER</code>
3669      * </ul>
3670      *
3671      * Not all letters have case. Many characters are
3672      * letters but are neither uppercase nor lowercase nor titlecase.
3673      *
3674      * @param   codePoint the character (Unicode code point) to be tested.
3675      * @return  <code>true</code> if the character is a letter;
3676      *          <code>false</code> otherwise.
3677      * @see     java.lang.Character#isDigit(int)
3678      * @see     java.lang.Character#isJavaIdentifierStart(int)
3679      * @see     java.lang.Character#isLetterOrDigit(int)
3680      * @see     java.lang.Character#isLowerCase(int)
3681      * @see     java.lang.Character#isTitleCase(int)
3682      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3683      * @see     java.lang.Character#isUpperCase(int)
3684      * @since   1.5
3685      */
3686     public static boolean isLetter(int codePoint) {
3687         return ((((1 << Character.UPPERCASE_LETTER) |
3688             (1 << Character.LOWERCASE_LETTER) |
3689             (1 << Character.TITLECASE_LETTER) |
3690             (1 << Character.MODIFIER_LETTER) |
3691             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
3692             != 0;
3693     }
3694 
3695     /**
3696      * Determines if the specified character is a letter or digit.
3697      * <p>
3698      * A character is considered to be a letter or digit if either
3699      * <code>Character.isLetter(char ch)</code> or
3700      * <code>Character.isDigit(char ch)</code> returns
3701      * <code>true</code> for the character.
3702      *
3703      * <p><b>Note:</b> This method cannot handle <a
3704      * href="#supplementary"> supplementary characters</a>. To support
3705      * all Unicode characters, including supplementary characters, use
3706      * the {@link #isLetterOrDigit(int)} method.
3707      *
3708      * @param   ch   the character to be tested.
3709      * @return  <code>true</code> if the character is a letter or digit;
3710      *          <code>false</code> otherwise.
3711      * @see     java.lang.Character#isDigit(char)
3712      * @see     java.lang.Character#isJavaIdentifierPart(char)
3713      * @see     java.lang.Character#isJavaLetter(char)
3714      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3715      * @see     java.lang.Character#isLetter(char)
3716      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3717      * @since   1.0.2
3718      */
3719     public static boolean isLetterOrDigit(char ch) {
3720         return isLetterOrDigit((int)ch);
3721     }
3722 
3723     /**
3724      * Determines if the specified character (Unicode code point) is a letter or digit.
3725      * <p>
3726      * A character is considered to be a letter or digit if either
3727      * {@link #isLetter(int) isLetter(codePoint)} or
3728      * {@link #isDigit(int) isDigit(codePoint)} returns
3729      * <code>true</code> for the character.
3730      *
3731      * @param   codePoint the character (Unicode code point) to be tested.
3732      * @return  <code>true</code> if the character is a letter or digit;
3733      *          <code>false</code> otherwise.
3734      * @see     java.lang.Character#isDigit(int)
3735      * @see     java.lang.Character#isJavaIdentifierPart(int)
3736      * @see     java.lang.Character#isLetter(int)
3737      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3738      * @since   1.5
3739      */
3740     public static boolean isLetterOrDigit(int codePoint) {
3741         return ((((1 << Character.UPPERCASE_LETTER) |
3742             (1 << Character.LOWERCASE_LETTER) |
3743             (1 << Character.TITLECASE_LETTER) |
3744             (1 << Character.MODIFIER_LETTER) |
3745             (1 << Character.OTHER_LETTER) |
3746             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
3747             != 0;
3748     }
3749 
3750     /**
3751      * Determines if the specified character is permissible as the first
3752      * character in a Java identifier.
3753      * <p>
3754      * A character may start a Java identifier if and only if
3755      * one of the following is true:
3756      * <ul>
3757      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3758      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3759      * <li> ch is a currency symbol (such as "$")
3760      * <li> ch is a connecting punctuation character (such as "_").
3761      * </ul>
3762      *
3763      * @param   ch the character to be tested.
3764      * @return  <code>true</code> if the character may start a Java
3765      *          identifier; <code>false</code> otherwise.
3766      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3767      * @see     java.lang.Character#isJavaIdentifierStart(char)
3768      * @see     java.lang.Character#isJavaIdentifierPart(char)
3769      * @see     java.lang.Character#isLetter(char)
3770      * @see     java.lang.Character#isLetterOrDigit(char)
3771      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3772      * @since   1.02
3773      * @deprecated Replaced by isJavaIdentifierStart(char).
3774      */
3775     @Deprecated
3776     public static boolean isJavaLetter(char ch) {
3777         return isJavaIdentifierStart(ch);
3778     }
3779 
3780     /**
3781      * Determines if the specified character may be part of a Java
3782      * identifier as other than the first character.
3783      * <p>
3784      * A character may be part of a Java identifier if and only if any
3785      * of the following are true:
3786      * <ul>
3787      * <li>  it is a letter
3788      * <li>  it is a currency symbol (such as <code>'$'</code>)
3789      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3790      * <li>  it is a digit
3791      * <li>  it is a numeric letter (such as a Roman numeral character)
3792      * <li>  it is a combining mark
3793      * <li>  it is a non-spacing mark
3794      * <li> <code>isIdentifierIgnorable</code> returns
3795      * <code>true</code> for the character.
3796      * </ul>
3797      *
3798      * @param   ch the character to be tested.
3799      * @return  <code>true</code> if the character may be part of a
3800      *          Java identifier; <code>false</code> otherwise.
3801      * @see     java.lang.Character#isJavaLetter(char)
3802      * @see     java.lang.Character#isJavaIdentifierStart(char)
3803      * @see     java.lang.Character#isJavaIdentifierPart(char)
3804      * @see     java.lang.Character#isLetter(char)
3805      * @see     java.lang.Character#isLetterOrDigit(char)
3806      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3807      * @see     java.lang.Character#isIdentifierIgnorable(char)
3808      * @since   1.02
3809      * @deprecated Replaced by isJavaIdentifierPart(char).
3810      */
3811     @Deprecated
3812     public static boolean isJavaLetterOrDigit(char ch) {
3813         return isJavaIdentifierPart(ch);
3814     }
3815 
3816     /**
3817      * Determines if the specified character is
3818      * permissible as the first character in a Java identifier.
3819      * <p>
3820      * A character may start a Java identifier if and only if
3821      * one of the following conditions is true:
3822      * <ul>
3823      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3824      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3825      * <li> ch is a currency symbol (such as "$")
3826      * <li> ch is a connecting punctuation character (such as "_").
3827      * </ul>
3828      *
3829      * <p><b>Note:</b> This method cannot handle <a
3830      * href="#supplementary"> supplementary characters</a>. To support
3831      * all Unicode characters, including supplementary characters, use
3832      * the {@link #isJavaIdentifierStart(int)} method.
3833      *
3834      * @param   ch the character to be tested.
3835      * @return  <code>true</code> if the character may start a Java identifier;
3836      *          <code>false</code> otherwise.
3837      * @see     java.lang.Character#isJavaIdentifierPart(char)
3838      * @see     java.lang.Character#isLetter(char)
3839      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3840      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3841      * @since   1.1
3842      */
3843     public static boolean isJavaIdentifierStart(char ch) {
3844         return isJavaIdentifierStart((int)ch);
3845     }
3846 
3847     /**
3848      * Determines if the character (Unicode code point) is
3849      * permissible as the first character in a Java identifier.
3850      * <p>
3851      * A character may start a Java identifier if and only if
3852      * one of the following conditions is true:
3853      * <ul>
3854      * <li> {@link #isLetter(int) isLetter(codePoint)}
3855      *      returns <code>true</code>
3856      * <li> {@link #getType(int) getType(codePoint)}
3857      *      returns <code>LETTER_NUMBER</code>
3858      * <li> the referenced character is a currency symbol (such as "$")
3859      * <li> the referenced character is a connecting punctuation character
3860      *      (such as "_").
3861      * </ul>
3862      *
3863      * @param   codePoint the character (Unicode code point) to be tested.
3864      * @return  <code>true</code> if the character may start a Java identifier;
3865      *          <code>false</code> otherwise.
3866      * @see     java.lang.Character#isJavaIdentifierPart(int)
3867      * @see     java.lang.Character#isLetter(int)
3868      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3869      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3870      * @since   1.5
3871      */
3872     public static boolean isJavaIdentifierStart(int codePoint) {
3873         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
3874     }
3875 
3876     /**
3877      * Determines if the specified character may be part of a Java
3878      * identifier as other than the first character.
3879      * <p>
3880      * A character may be part of a Java identifier if any of the following
3881      * are true:
3882      * <ul>
3883      * <li>  it is a letter
3884      * <li>  it is a currency symbol (such as <code>'$'</code>)
3885      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3886      * <li>  it is a digit
3887      * <li>  it is a numeric letter (such as a Roman numeral character)
3888      * <li>  it is a combining mark
3889      * <li>  it is a non-spacing mark
3890      * <li> <code>isIdentifierIgnorable</code> returns
3891      * <code>true</code> for the character
3892      * </ul>
3893      *
3894      * <p><b>Note:</b> This method cannot handle <a
3895      * href="#supplementary"> supplementary characters</a>. To support
3896      * all Unicode characters, including supplementary characters, use
3897      * the {@link #isJavaIdentifierPart(int)} method.
3898      *
3899      * @param   ch      the character to be tested.
3900      * @return <code>true</code> if the character may be part of a
3901      *          Java identifier; <code>false</code> otherwise.
3902      * @see     java.lang.Character#isIdentifierIgnorable(char)
3903      * @see     java.lang.Character#isJavaIdentifierStart(char)
3904      * @see     java.lang.Character#isLetterOrDigit(char)
3905      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3906      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3907      * @since   1.1
3908      */
3909     public static boolean isJavaIdentifierPart(char ch) {
3910         return isJavaIdentifierPart((int)ch);
3911     }
3912 
3913     /**
3914      * Determines if the character (Unicode code point) may be part of a Java
3915      * identifier as other than the first character.
3916      * <p>
3917      * A character may be part of a Java identifier if any of the following
3918      * are true:
3919      * <ul>
3920      * <li>  it is a letter
3921      * <li>  it is a currency symbol (such as <code>'$'</code>)
3922      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3923      * <li>  it is a digit
3924      * <li>  it is a numeric letter (such as a Roman numeral character)
3925      * <li>  it is a combining mark
3926      * <li>  it is a non-spacing mark
3927      * <li> {@link #isIdentifierIgnorable(int)
3928      * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3929      * the character
3930      * </ul>
3931      *
3932      * @param   codePoint the character (Unicode code point) to be tested.
3933      * @return <code>true</code> if the character may be part of a
3934      *          Java identifier; <code>false</code> otherwise.
3935      * @see     java.lang.Character#isIdentifierIgnorable(int)
3936      * @see     java.lang.Character#isJavaIdentifierStart(int)
3937      * @see     java.lang.Character#isLetterOrDigit(int)
3938      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3939      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3940      * @since   1.5
3941      */
3942     public static boolean isJavaIdentifierPart(int codePoint) {
3943         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
3944     }
3945 
3946     /**
3947      * Determines if the specified character is permissible as the
3948      * first character in a Unicode identifier.
3949      * <p>
3950      * A character may start a Unicode identifier if and only if
3951      * one of the following conditions is true:
3952      * <ul>
3953      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3954      * <li> {@link #getType(char) getType(ch)} returns
3955      *      <code>LETTER_NUMBER</code>.
3956      * </ul>
3957      *
3958      * <p><b>Note:</b> This method cannot handle <a
3959      * href="#supplementary"> supplementary characters</a>. To support
3960      * all Unicode characters, including supplementary characters, use
3961      * the {@link #isUnicodeIdentifierStart(int)} method.
3962      *
3963      * @param   ch      the character to be tested.
3964      * @return  <code>true</code> if the character may start a Unicode
3965      *          identifier; <code>false</code> otherwise.
3966      * @see     java.lang.Character#isJavaIdentifierStart(char)
3967      * @see     java.lang.Character#isLetter(char)
3968      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3969      * @since   1.1
3970      */
3971     public static boolean isUnicodeIdentifierStart(char ch) {
3972         return isUnicodeIdentifierStart((int)ch);
3973     }
3974 
3975     /**
3976      * Determines if the specified character (Unicode code point) is permissible as the
3977      * first character in a Unicode identifier.
3978      * <p>
3979      * A character may start a Unicode identifier if and only if
3980      * one of the following conditions is true:
3981      * <ul>
3982      * <li> {@link #isLetter(int) isLetter(codePoint)}
3983      *      returns <code>true</code>
3984      * <li> {@link #getType(int) getType(codePoint)}
3985      *      returns <code>LETTER_NUMBER</code>.
3986      * </ul>
3987      * @param   codePoint the character (Unicode code point) to be tested.
3988      * @return  <code>true</code> if the character may start a Unicode
3989      *          identifier; <code>false</code> otherwise.
3990      * @see     java.lang.Character#isJavaIdentifierStart(int)
3991      * @see     java.lang.Character#isLetter(int)
3992      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3993      * @since   1.5
3994      */
3995     public static boolean isUnicodeIdentifierStart(int codePoint) {
3996         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
3997     }
3998 
3999     /**
4000      * Determines if the specified character may be part of a Unicode
4001      * identifier as other than the first character.
4002      * <p>
4003      * A character may be part of a Unicode identifier if and only if
4004      * one of the following statements is true:
4005      * <ul>
4006      * <li>  it is a letter
4007      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4008      * <li>  it is a digit
4009      * <li>  it is a numeric letter (such as a Roman numeral character)
4010      * <li>  it is a combining mark
4011      * <li>  it is a non-spacing mark
4012      * <li> <code>isIdentifierIgnorable</code> returns
4013      * <code>true</code> for this character.
4014      * </ul>
4015      *
4016      * <p><b>Note:</b> This method cannot handle <a
4017      * href="#supplementary"> supplementary characters</a>. To support
4018      * all Unicode characters, including supplementary characters, use
4019      * the {@link #isUnicodeIdentifierPart(int)} method.
4020      *
4021      * @param   ch      the character to be tested.
4022      * @return  <code>true</code> if the character may be part of a
4023      *          Unicode identifier; <code>false</code> otherwise.
4024      * @see     java.lang.Character#isIdentifierIgnorable(char)
4025      * @see     java.lang.Character#isJavaIdentifierPart(char)
4026      * @see     java.lang.Character#isLetterOrDigit(char)
4027      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
4028      * @since   1.1
4029      */
4030     public static boolean isUnicodeIdentifierPart(char ch) {
4031         return isUnicodeIdentifierPart((int)ch);
4032     }
4033 
4034     /**
4035      * Determines if the specified character (Unicode code point) may be part of a Unicode
4036      * identifier as other than the first character.
4037      * <p>
4038      * A character may be part of a Unicode identifier if and only if
4039      * one of the following statements is true:
4040      * <ul>
4041      * <li>  it is a letter
4042      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4043      * <li>  it is a digit
4044      * <li>  it is a numeric letter (such as a Roman numeral character)
4045      * <li>  it is a combining mark
4046      * <li>  it is a non-spacing mark
4047      * <li> <code>isIdentifierIgnorable</code> returns
4048      * <code>true</code> for this character.
4049      * </ul>
4050      * @param   codePoint the character (Unicode code point) to be tested.
4051      * @return  <code>true</code> if the character may be part of a
4052      *          Unicode identifier; <code>false</code> otherwise.
4053      * @see     java.lang.Character#isIdentifierIgnorable(int)
4054      * @see     java.lang.Character#isJavaIdentifierPart(int)
4055      * @see     java.lang.Character#isLetterOrDigit(int)
4056      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
4057      * @since   1.5
4058      */
4059     public static boolean isUnicodeIdentifierPart(int codePoint) {
4060         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
4061     }
4062 
4063     /**
4064      * Determines if the specified character should be regarded as
4065      * an ignorable character in a Java identifier or a Unicode identifier.
4066      * <p>
4067      * The following Unicode characters are ignorable in a Java identifier
4068      * or a Unicode identifier:
4069      * <ul>
4070      * <li>ISO control characters that are not whitespace
4071      * <ul>
4072      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4073      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4074      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4075      * </ul>
4076      *
4077      * <li>all characters that have the <code>FORMAT</code> general
4078      * category value
4079      * </ul>
4080      *
4081      * <p><b>Note:</b> This method cannot handle <a
4082      * href="#supplementary"> supplementary characters</a>. To support
4083      * all Unicode characters, including supplementary characters, use
4084      * the {@link #isIdentifierIgnorable(int)} method.
4085      *
4086      * @param   ch      the character to be tested.
4087      * @return  <code>true</code> if the character is an ignorable control
4088      *          character that may be part of a Java or Unicode identifier;
4089      *           <code>false</code> otherwise.
4090      * @see     java.lang.Character#isJavaIdentifierPart(char)
4091      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
4092      * @since   1.1
4093      */
4094     public static boolean isIdentifierIgnorable(char ch) {
4095         return isIdentifierIgnorable((int)ch);
4096     }
4097 
4098     /**
4099      * Determines if the specified character (Unicode code point) should be regarded as
4100      * an ignorable character in a Java identifier or a Unicode identifier.
4101      * <p>
4102      * The following Unicode characters are ignorable in a Java identifier
4103      * or a Unicode identifier:
4104      * <ul>
4105      * <li>ISO control characters that are not whitespace
4106      * <ul>
4107      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4108      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4109      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4110      * </ul>
4111      *
4112      * <li>all characters that have the <code>FORMAT</code> general
4113      * category value
4114      * </ul>
4115      *
4116      * @param   codePoint the character (Unicode code point) to be tested.
4117      * @return  <code>true</code> if the character is an ignorable control
4118      *          character that may be part of a Java or Unicode identifier;
4119      *          <code>false</code> otherwise.
4120      * @see     java.lang.Character#isJavaIdentifierPart(int)
4121      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
4122      * @since   1.5
4123      */
4124     public static boolean isIdentifierIgnorable(int codePoint) {
4125         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
4126     }
4127 
4128     /**
4129      * Converts the character argument to lowercase using case
4130      * mapping information from the UnicodeData file.
4131      * <p>
4132      * Note that
4133      * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
4134      * does not always return <code>true</code> for some ranges of
4135      * characters, particularly those that are symbols or ideographs.
4136      *
4137      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4138      * characters to lowercase. <code>String</code> case mapping methods
4139      * have several benefits over <code>Character</code> case mapping methods.
4140      * <code>String</code> case mapping methods can perform locale-sensitive
4141      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4142      * the <code>Character</code> case mapping methods cannot.
4143      *
4144      * <p><b>Note:</b> This method cannot handle <a
4145      * href="#supplementary"> supplementary characters</a>. To support
4146      * all Unicode characters, including supplementary characters, use
4147      * the {@link #toLowerCase(int)} method.
4148      *
4149      * @param   ch   the character to be converted.
4150      * @return  the lowercase equivalent of the character, if any;
4151      *          otherwise, the character itself.
4152      * @see     java.lang.Character#isLowerCase(char)
4153      * @see     java.lang.String#toLowerCase()
4154      */
4155     public static char toLowerCase(char ch) {
4156         return (char)toLowerCase((int)ch);
4157     }
4158 
4159     /**
4160      * Converts the character (Unicode code point) argument to
4161      * lowercase using case mapping information from the UnicodeData
4162      * file.
4163      *
4164      * <p> Note that
4165      * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
4166      * does not always return <code>true</code> for some ranges of
4167      * characters, particularly those that are symbols or ideographs.
4168      *
4169      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4170      * characters to lowercase. <code>String</code> case mapping methods
4171      * have several benefits over <code>Character</code> case mapping methods.
4172      * <code>String</code> case mapping methods can perform locale-sensitive
4173      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4174      * the <code>Character</code> case mapping methods cannot.
4175      *
4176      * @param   codePoint   the character (Unicode code point) to be converted.
4177      * @return  the lowercase equivalent of the character (Unicode code
4178      *          point), if any; otherwise, the character itself.
4179      * @see     java.lang.Character#isLowerCase(int)
4180      * @see     java.lang.String#toLowerCase()
4181      *
4182      * @since   1.5
4183      */
4184     public static int toLowerCase(int codePoint) {
4185         return CharacterData.of(codePoint).toLowerCase(codePoint);
4186     }
4187 
4188     /**
4189      * Converts the character argument to uppercase using case mapping
4190      * information from the UnicodeData file.
4191      * <p>
4192      * Note that
4193      * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
4194      * does not always return <code>true</code> for some ranges of
4195      * characters, particularly those that are symbols or ideographs.
4196      *
4197      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4198      * characters to uppercase. <code>String</code> case mapping methods
4199      * have several benefits over <code>Character</code> case mapping methods.
4200      * <code>String</code> case mapping methods can perform locale-sensitive
4201      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4202      * the <code>Character</code> case mapping methods cannot.
4203      *
4204      * <p><b>Note:</b> This method cannot handle <a
4205      * href="#supplementary"> supplementary characters</a>. To support
4206      * all Unicode characters, including supplementary characters, use
4207      * the {@link #toUpperCase(int)} method.
4208      *
4209      * @param   ch   the character to be converted.
4210      * @return  the uppercase equivalent of the character, if any;
4211      *          otherwise, the character itself.
4212      * @see     java.lang.Character#isUpperCase(char)
4213      * @see     java.lang.String#toUpperCase()
4214      */
4215     public static char toUpperCase(char ch) {
4216         return (char)toUpperCase((int)ch);
4217     }
4218 
4219     /**
4220      * Converts the character (Unicode code point) argument to
4221      * uppercase using case mapping information from the UnicodeData
4222      * file.
4223      *
4224      * <p>Note that
4225      * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
4226      * does not always return <code>true</code> for some ranges of
4227      * characters, particularly those that are symbols or ideographs.
4228      *
4229      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4230      * characters to uppercase. <code>String</code> case mapping methods
4231      * have several benefits over <code>Character</code> case mapping methods.
4232      * <code>String</code> case mapping methods can perform locale-sensitive
4233      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4234      * the <code>Character</code> case mapping methods cannot.
4235      *
4236      * @param   codePoint   the character (Unicode code point) to be converted.
4237      * @return  the uppercase equivalent of the character, if any;
4238      *          otherwise, the character itself.
4239      * @see     java.lang.Character#isUpperCase(int)
4240      * @see     java.lang.String#toUpperCase()
4241      *
4242      * @since   1.5
4243      */
4244     public static int toUpperCase(int codePoint) {
4245         return CharacterData.of(codePoint).toUpperCase(codePoint);
4246     }
4247 
4248     /**
4249      * Converts the character argument to titlecase using case mapping
4250      * information from the UnicodeData file. If a character has no
4251      * explicit titlecase mapping and is not itself a titlecase char
4252      * according to UnicodeData, then the uppercase mapping is
4253      * returned as an equivalent titlecase mapping. If the
4254      * <code>char</code> argument is already a titlecase
4255      * <code>char</code>, the same <code>char</code> value will be
4256      * returned.
4257      * <p>
4258      * Note that
4259      * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
4260      * does not always return <code>true</code> for some ranges of
4261      * characters.
4262      *
4263      * <p><b>Note:</b> This method cannot handle <a
4264      * href="#supplementary"> supplementary characters</a>. To support
4265      * all Unicode characters, including supplementary characters, use
4266      * the {@link #toTitleCase(int)} method.
4267      *
4268      * @param   ch   the character to be converted.
4269      * @return  the titlecase equivalent of the character, if any;
4270      *          otherwise, the character itself.
4271      * @see     java.lang.Character#isTitleCase(char)
4272      * @see     java.lang.Character#toLowerCase(char)
4273      * @see     java.lang.Character#toUpperCase(char)
4274      * @since   1.0.2
4275      */
4276     public static char toTitleCase(char ch) {
4277         return (char)toTitleCase((int)ch);
4278     }
4279 
4280     /**
4281      * Converts the character (Unicode code point) argument to titlecase using case mapping
4282      * information from the UnicodeData file. If a character has no
4283      * explicit titlecase mapping and is not itself a titlecase char
4284      * according to UnicodeData, then the uppercase mapping is
4285      * returned as an equivalent titlecase mapping. If the
4286      * character argument is already a titlecase
4287      * character, the same character value will be
4288      * returned.
4289      *
4290      * <p>Note that
4291      * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
4292      * does not always return <code>true</code> for some ranges of
4293      * characters.
4294      *
4295      * @param   codePoint   the character (Unicode code point) to be converted.
4296      * @return  the titlecase equivalent of the character, if any;
4297      *          otherwise, the character itself.
4298      * @see     java.lang.Character#isTitleCase(int)
4299      * @see     java.lang.Character#toLowerCase(int)
4300      * @see     java.lang.Character#toUpperCase(int)
4301      * @since   1.5
4302      */
4303     public static int toTitleCase(int codePoint) {
4304         return CharacterData.of(codePoint).toTitleCase(codePoint);
4305     }
4306 
4307     /**
4308      * Returns the numeric value of the character <code>ch</code> in the
4309      * specified radix.
4310      * <p>
4311      * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4312      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4313      * value of <code>ch</code> is not a valid digit in the specified
4314      * radix, <code>-1</code> is returned. A character is a valid digit
4315      * if at least one of the following is true:
4316      * <ul>
4317      * <li>The method <code>isDigit</code> is <code>true</code> of the character
4318      *     and the Unicode decimal digit value of the character (or its
4319      *     single-character decomposition) is less than the specified radix.
4320      *     In this case the decimal digit value is returned.
4321      * <li>The character is one of the uppercase Latin letters
4322      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4323      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4324      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4325      *     is returned.
4326      * <li>The character is one of the lowercase Latin letters
4327      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4328      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4329      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4330      *     is returned.
4331      * </ul>
4332      *
4333      * <p><b>Note:</b> This method cannot handle <a
4334      * href="#supplementary"> supplementary characters</a>. To support
4335      * all Unicode characters, including supplementary characters, use
4336      * the {@link #digit(int, int)} method.
4337      *
4338      * @param   ch      the character to be converted.
4339      * @param   radix   the radix.
4340      * @return  the numeric value represented by the character in the
4341      *          specified radix.
4342      * @see     java.lang.Character#forDigit(int, int)
4343      * @see     java.lang.Character#isDigit(char)
4344      */
4345     public static int digit(char ch, int radix) {
4346         return digit((int)ch, radix);
4347     }
4348 
4349     /**
4350      * Returns the numeric value of the specified character (Unicode
4351      * code point) in the specified radix.
4352      *
4353      * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4354      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4355      * character is not a valid digit in the specified
4356      * radix, <code>-1</code> is returned. A character is a valid digit
4357      * if at least one of the following is true:
4358      * <ul>
4359      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
4360      *     and the Unicode decimal digit value of the character (or its
4361      *     single-character decomposition) is less than the specified radix.
4362      *     In this case the decimal digit value is returned.
4363      * <li>The character is one of the uppercase Latin letters
4364      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4365      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4366      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4367      *     is returned.
4368      * <li>The character is one of the lowercase Latin letters
4369      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4370      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4371      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4372      *     is returned.
4373      * </ul>
4374      *
4375      * @param   codePoint the character (Unicode code point) to be converted.
4376      * @param   radix   the radix.
4377      * @return  the numeric value represented by the character in the
4378      *          specified radix.
4379      * @see     java.lang.Character#forDigit(int, int)
4380      * @see     java.lang.Character#isDigit(int)
4381      * @since   1.5
4382      */
4383     public static int digit(int codePoint, int radix) {
4384         return CharacterData.of(codePoint).digit(codePoint, radix);
4385     }
4386 
4387     /**
4388      * Returns the <code>int</code> value that the specified Unicode
4389      * character represents. For example, the character
4390      * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
4391      * an int with a value of 50.
4392      * <p>
4393      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4394      * <code>'&#92;u005A'</code>), lowercase
4395      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4396      * full width variant (<code>'&#92;uFF21'</code> through
4397      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4398      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4399      * through 35. This is independent of the Unicode specification,
4400      * which does not assign numeric values to these <code>char</code>
4401      * values.
4402      * <p>
4403      * If the character does not have a numeric value, then -1 is returned.
4404      * If the character has a numeric value that cannot be represented as a
4405      * nonnegative integer (for example, a fractional value), then -2
4406      * is returned.
4407      *
4408      * <p><b>Note:</b> This method cannot handle <a
4409      * href="#supplementary"> supplementary characters</a>. To support
4410      * all Unicode characters, including supplementary characters, use
4411      * the {@link #getNumericValue(int)} method.
4412      *
4413      * @param   ch      the character to be converted.
4414      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4415      *           value; -2 if the character has a numeric value that is not a
4416      *          nonnegative integer; -1 if the character has no numeric value.
4417      * @see     java.lang.Character#forDigit(int, int)
4418      * @see     java.lang.Character#isDigit(char)
4419      * @since   1.1
4420      */
4421     public static int getNumericValue(char ch) {
4422         return getNumericValue((int)ch);
4423     }
4424 
4425     /**
4426      * Returns the <code>int</code> value that the specified
4427      * character (Unicode code point) represents. For example, the character
4428      * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
4429      * an <code>int</code> with a value of 50.
4430      * <p>
4431      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4432      * <code>'&#92;u005A'</code>), lowercase
4433      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4434      * full width variant (<code>'&#92;uFF21'</code> through
4435      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4436      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4437      * through 35. This is independent of the Unicode specification,
4438      * which does not assign numeric values to these <code>char</code>
4439      * values.
4440      * <p>
4441      * If the character does not have a numeric value, then -1 is returned.
4442      * If the character has a numeric value that cannot be represented as a
4443      * nonnegative integer (for example, a fractional value), then -2
4444      * is returned.
4445      *
4446      * @param   codePoint the character (Unicode code point) to be converted.
4447      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4448      *          value; -2 if the character has a numeric value that is not a
4449      *          nonnegative integer; -1 if the character has no numeric value.
4450      * @see     java.lang.Character#forDigit(int, int)
4451      * @see     java.lang.Character#isDigit(int)
4452      * @since   1.5
4453      */
4454     public static int getNumericValue(int codePoint) {
4455         return CharacterData.of(codePoint).getNumericValue(codePoint);
4456     }
4457 
4458     /**
4459      * Determines if the specified character is ISO-LATIN-1 white space.
4460      * This method returns <code>true</code> for the following five
4461      * characters only:
4462      * <table>
4463      * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
4464      *     <td><code>HORIZONTAL TABULATION</code></td></tr>
4465      * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
4466      *     <td><code>NEW LINE</code></td></tr>
4467      * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
4468      *     <td><code>FORM FEED</code></td></tr>
4469      * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
4470      *     <td><code>CARRIAGE RETURN</code></td></tr>
4471      * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
4472      *     <td><code>SPACE</code></td></tr>
4473      * </table>
4474      *
4475      * @param      ch   the character to be tested.
4476      * @return     <code>true</code> if the character is ISO-LATIN-1 white
4477      *             space; <code>false</code> otherwise.
4478      * @see        java.lang.Character#isSpaceChar(char)
4479      * @see        java.lang.Character#isWhitespace(char)
4480      * @deprecated Replaced by isWhitespace(char).
4481      */
4482     @Deprecated
4483     public static boolean isSpace(char ch) {
4484         return (ch <= 0x0020) &&
4485             (((((1L << 0x0009) |
4486             (1L << 0x000A) |
4487             (1L << 0x000C) |
4488             (1L << 0x000D) |
4489             (1L << 0x0020)) >> ch) & 1L) != 0);
4490     }
4491 
4492 
4493     /**
4494      * Determines if the specified character is a Unicode space character.
4495      * A character is considered to be a space character if and only if
4496      * it is specified to be a space character by the Unicode standard. This
4497      * method returns true if the character's general category type is any of
4498      * the following:
4499      * <ul>
4500      * <li> <code>SPACE_SEPARATOR</code>
4501      * <li> <code>LINE_SEPARATOR</code>
4502      * <li> <code>PARAGRAPH_SEPARATOR</code>
4503      * </ul>
4504      *
4505      * <p><b>Note:</b> This method cannot handle <a
4506      * href="#supplementary"> supplementary characters</a>. To support
4507      * all Unicode characters, including supplementary characters, use
4508      * the {@link #isSpaceChar(int)} method.
4509      *
4510      * @param   ch      the character to be tested.
4511      * @return  <code>true</code> if the character is a space character;
4512      *          <code>false</code> otherwise.
4513      * @see     java.lang.Character#isWhitespace(char)
4514      * @since   1.1
4515      */
4516     public static boolean isSpaceChar(char ch) {
4517         return isSpaceChar((int)ch);
4518     }
4519 
4520     /**
4521      * Determines if the specified character (Unicode code point) is a
4522      * Unicode space character.  A character is considered to be a
4523      * space character if and only if it is specified to be a space
4524      * character by the Unicode standard. This method returns true if
4525      * the character's general category type is any of the following:
4526      *
4527      * <ul>
4528      * <li> {@link #SPACE_SEPARATOR}
4529      * <li> {@link #LINE_SEPARATOR}
4530      * <li> {@link #PARAGRAPH_SEPARATOR}
4531      * </ul>
4532      *
4533      * @param   codePoint the character (Unicode code point) to be tested.
4534      * @return  <code>true</code> if the character is a space character;
4535      *          <code>false</code> otherwise.
4536      * @see     java.lang.Character#isWhitespace(int)
4537      * @since   1.5
4538      */
4539     public static boolean isSpaceChar(int codePoint) {
4540         return ((((1 << Character.SPACE_SEPARATOR) |
4541                   (1 << Character.LINE_SEPARATOR) |
4542                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
4543             != 0;
4544     }
4545 
4546     /**
4547      * Determines if the specified character is white space according to Java.
4548      * A character is a Java whitespace character if and only if it satisfies
4549      * one of the following criteria:
4550      * <ul>
4551      * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4552      *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4553      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4554      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4555      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4556      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4557      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4558      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4559      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4560      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4561      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4562      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4563      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4564      * </ul>
4565      *
4566      * <p><b>Note:</b> This method cannot handle <a
4567      * href="#supplementary"> supplementary characters</a>. To support
4568      * all Unicode characters, including supplementary characters, use
4569      * the {@link #isWhitespace(int)} method.
4570      *
4571      * @param   ch the character to be tested.
4572      * @return  <code>true</code> if the character is a Java whitespace
4573      *          character; <code>false</code> otherwise.
4574      * @see     java.lang.Character#isSpaceChar(char)
4575      * @since   1.1
4576      */
4577     public static boolean isWhitespace(char ch) {
4578         return isWhitespace((int)ch);
4579     }
4580 
4581     /**
4582      * Determines if the specified character (Unicode code point) is
4583      * white space according to Java.  A character is a Java
4584      * whitespace character if and only if it satisfies one of the
4585      * following criteria:
4586      * <ul>
4587      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4588      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4589      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4590      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4591      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4592      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4593      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4594      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4595      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4596      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4597      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4598      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4599      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4600      * </ul>
4601      * <p>
4602      *
4603      * @param   codePoint the character (Unicode code point) to be tested.
4604      * @return  <code>true</code> if the character is a Java whitespace
4605      *          character; <code>false</code> otherwise.
4606      * @see     java.lang.Character#isSpaceChar(int)
4607      * @since   1.5
4608      */
4609     public static boolean isWhitespace(int codePoint) {
4610         return CharacterData.of(codePoint).isWhitespace(codePoint);
4611     }
4612 
4613     /**
4614      * Determines if the specified character is an ISO control
4615      * character.  A character is considered to be an ISO control
4616      * character if its code is in the range <code>'&#92;u0000'</code>
4617      * through <code>'&#92;u001F'</code> or in the range
4618      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4619      *
4620      * <p><b>Note:</b> This method cannot handle <a
4621      * href="#supplementary"> supplementary characters</a>. To support
4622      * all Unicode characters, including supplementary characters, use
4623      * the {@link #isISOControl(int)} method.
4624      *
4625      * @param   ch      the character to be tested.
4626      * @return  <code>true</code> if the character is an ISO control character;
4627      *          <code>false</code> otherwise.
4628      *
4629      * @see     java.lang.Character#isSpaceChar(char)
4630      * @see     java.lang.Character#isWhitespace(char)
4631      * @since   1.1
4632      */
4633     public static boolean isISOControl(char ch) {
4634         return isISOControl((int)ch);
4635     }
4636 
4637     /**
4638      * Determines if the referenced character (Unicode code point) is an ISO control
4639      * character.  A character is considered to be an ISO control
4640      * character if its code is in the range <code>'&#92;u0000'</code>
4641      * through <code>'&#92;u001F'</code> or in the range
4642      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4643      *
4644      * @param   codePoint the character (Unicode code point) to be tested.
4645      * @return  <code>true</code> if the character is an ISO control character;
4646      *          <code>false</code> otherwise.
4647      * @see     java.lang.Character#isSpaceChar(int)
4648      * @see     java.lang.Character#isWhitespace(int)
4649      * @since   1.5
4650      */
4651     public static boolean isISOControl(int codePoint) {
4652         return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
4653             (codePoint >= 0x007F && codePoint <= 0x009F);
4654     }
4655 
4656     /**
4657      * Returns a value indicating a character's general category.
4658      *
4659      * <p><b>Note:</b> This method cannot handle <a
4660      * href="#supplementary"> supplementary characters</a>. To support
4661      * all Unicode characters, including supplementary characters, use
4662      * the {@link #getType(int)} method.
4663      *
4664      * @param   ch      the character to be tested.
4665      * @return  a value of type <code>int</code> representing the
4666      *          character's general category.
4667      * @see     java.lang.Character#COMBINING_SPACING_MARK
4668      * @see     java.lang.Character#CONNECTOR_PUNCTUATION
4669      * @see     java.lang.Character#CONTROL
4670      * @see     java.lang.Character#CURRENCY_SYMBOL
4671      * @see     java.lang.Character#DASH_PUNCTUATION
4672      * @see     java.lang.Character#DECIMAL_DIGIT_NUMBER
4673      * @see     java.lang.Character#ENCLOSING_MARK
4674      * @see     java.lang.Character#END_PUNCTUATION
4675      * @see     java.lang.Character#FINAL_QUOTE_PUNCTUATION
4676      * @see     java.lang.Character#FORMAT
4677      * @see     java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4678      * @see     java.lang.Character#LETTER_NUMBER
4679      * @see     java.lang.Character#LINE_SEPARATOR
4680      * @see     java.lang.Character#LOWERCASE_LETTER
4681      * @see     java.lang.Character#MATH_SYMBOL
4682      * @see     java.lang.Character#MODIFIER_LETTER
4683      * @see     java.lang.Character#MODIFIER_SYMBOL
4684      * @see     java.lang.Character#NON_SPACING_MARK
4685      * @see     java.lang.Character#OTHER_LETTER
4686      * @see     java.lang.Character#OTHER_NUMBER
4687      * @see     java.lang.Character#OTHER_PUNCTUATION
4688      * @see     java.lang.Character#OTHER_SYMBOL
4689      * @see     java.lang.Character#PARAGRAPH_SEPARATOR
4690      * @see     java.lang.Character#PRIVATE_USE
4691      * @see     java.lang.Character#SPACE_SEPARATOR
4692      * @see     java.lang.Character#START_PUNCTUATION
4693      * @see     java.lang.Character#SURROGATE
4694      * @see     java.lang.Character#TITLECASE_LETTER
4695      * @see     java.lang.Character#UNASSIGNED
4696      * @see     java.lang.Character#UPPERCASE_LETTER
4697      * @since   1.1
4698      */
4699     public static int getType(char ch) {
4700         return getType((int)ch);
4701     }
4702 
4703     /**
4704      * Returns a value indicating a character's general category.
4705      *
4706      * @param   codePoint the character (Unicode code point) to be tested.
4707      * @return  a value of type <code>int</code> representing the
4708      *          character's general category.
4709      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4710      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4711      * @see     Character#CONTROL CONTROL
4712      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4713      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
4714      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4715      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
4716      * @see     Character#END_PUNCTUATION END_PUNCTUATION
4717      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4718      * @see     Character#FORMAT FORMAT
4719      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4720      * @see     Character#LETTER_NUMBER LETTER_NUMBER
4721      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
4722      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
4723      * @see     Character#MATH_SYMBOL MATH_SYMBOL
4724      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
4725      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4726      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
4727      * @see     Character#OTHER_LETTER OTHER_LETTER
4728      * @see     Character#OTHER_NUMBER OTHER_NUMBER
4729      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4730      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
4731      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4732      * @see     Character#PRIVATE_USE PRIVATE_USE
4733      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
4734      * @see     Character#START_PUNCTUATION START_PUNCTUATION
4735      * @see     Character#SURROGATE SURROGATE
4736      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
4737      * @see     Character#UNASSIGNED UNASSIGNED
4738      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
4739      * @since   1.5
4740      */
4741     public static int getType(int codePoint) {
4742         return CharacterData.of(codePoint).getType(codePoint);
4743     }
4744 
4745     /**
4746      * Determines the character representation for a specific digit in
4747      * the specified radix. If the value of <code>radix</code> is not a
4748      * valid radix, or the value of <code>digit</code> is not a valid
4749      * digit in the specified radix, the null character
4750      * (<code>'&#92;u0000'</code>) is returned.
4751      * <p>
4752      * The <code>radix</code> argument is valid if it is greater than or
4753      * equal to <code>MIN_RADIX</code> and less than or equal to
4754      * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4755      * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
4756      * <p>
4757      * If the digit is less than 10, then
4758      * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
4759      * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
4760      *
4761      * @param   digit   the number to convert to a character.
4762      * @param   radix   the radix.
4763      * @return  the <code>char</code> representation of the specified digit
4764      *          in the specified radix.
4765      * @see     java.lang.Character#MIN_RADIX
4766      * @see     java.lang.Character#MAX_RADIX
4767      * @see     java.lang.Character#digit(char, int)
4768      */
4769     public static char forDigit(int digit, int radix) {
4770         if ((digit >= radix) || (digit < 0)) {
4771             return '\0';
4772         }
4773         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
4774             return '\0';
4775         }
4776         if (digit < 10) {
4777             return (char)('0' + digit);
4778         }
4779         return (char)('a' - 10 + digit);
4780     }
4781 
4782     /**
4783      * Returns the Unicode directionality property for the given
4784      * character.  Character directionality is used to calculate the
4785      * visual ordering of text. The directionality value of undefined
4786      * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4787      *
4788      * <p><b>Note:</b> This method cannot handle <a
4789      * href="#supplementary"> supplementary characters</a>. To support
4790      * all Unicode characters, including supplementary characters, use
4791      * the {@link #getDirectionality(int)} method.
4792      *
4793      * @param  ch <code>char</code> for which the directionality property
4794      *            is requested.
4795      * @return the directionality property of the <code>char</code> value.
4796      *
4797      * @see Character#DIRECTIONALITY_UNDEFINED
4798      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4799      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4800      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4801      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4802      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4803      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4804      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4805      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4806      * @see Character#DIRECTIONALITY_NONSPACING_MARK
4807      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4808      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4809      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4810      * @see Character#DIRECTIONALITY_WHITESPACE
4811      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4812      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4813      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4814      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4815      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4816      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4817      * @since 1.4
4818      */
4819     public static byte getDirectionality(char ch) {
4820         return getDirectionality((int)ch);
4821     }
4822 
4823     /**
4824      * Returns the Unicode directionality property for the given
4825      * character (Unicode code point).  Character directionality is
4826      * used to calculate the visual ordering of text. The
4827      * directionality value of undefined character is {@link
4828      * #DIRECTIONALITY_UNDEFINED}.
4829      *
4830      * @param   codePoint the character (Unicode code point) for which
4831      *          the directionality property is requested.
4832      * @return the directionality property of the character.
4833      *
4834      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4835      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4836      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4837      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4838      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4839      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4840      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4841      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4842      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4843      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4844      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4845      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4846      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4847      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4848      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4849      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4850      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4851      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4852      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4853      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4854      * @since    1.5
4855      */
4856     public static byte getDirectionality(int codePoint) {
4857         return CharacterData.of(codePoint).getDirectionality(codePoint);
4858     }
4859 
4860     /**
4861      * Determines whether the character is mirrored according to the
4862      * Unicode specification.  Mirrored characters should have their
4863      * glyphs horizontally mirrored when displayed in text that is
4864      * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
4865      * PARENTHESIS is semantically defined to be an <i>opening
4866      * parenthesis</i>.  This will appear as a "(" in text that is
4867      * left-to-right but as a ")" in text that is right-to-left.
4868      *
4869      * <p><b>Note:</b> This method cannot handle <a
4870      * href="#supplementary"> supplementary characters</a>. To support
4871      * all Unicode characters, including supplementary characters, use
4872      * the {@link #isMirrored(int)} method.
4873      *
4874      * @param  ch <code>char</code> for which the mirrored property is requested
4875      * @return <code>true</code> if the char is mirrored, <code>false</code>
4876      *         if the <code>char</code> is not mirrored or is not defined.
4877      * @since 1.4
4878      */
4879     public static boolean isMirrored(char ch) {
4880         return isMirrored((int)ch);
4881     }
4882 
4883     /**
4884      * Determines whether the specified character (Unicode code point)
4885      * is mirrored according to the Unicode specification.  Mirrored
4886      * characters should have their glyphs horizontally mirrored when
4887      * displayed in text that is right-to-left.  For example,
4888      * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
4889      * defined to be an <i>opening parenthesis</i>.  This will appear
4890      * as a "(" in text that is left-to-right but as a ")" in text
4891      * that is right-to-left.
4892      *
4893      * @param   codePoint the character (Unicode code point) to be tested.
4894      * @return  <code>true</code> if the character is mirrored, <code>false</code>
4895      *          if the character is not mirrored or is not defined.
4896      * @since   1.5
4897      */
4898     public static boolean isMirrored(int codePoint) {
4899         return CharacterData.of(codePoint).isMirrored(codePoint);
4900     }
4901 
4902     /**
4903      * Compares two <code>Character</code> objects numerically.
4904      *
4905      * @param   anotherCharacter   the <code>Character</code> to be compared.
4906 
4907      * @return  the value <code>0</code> if the argument <code>Character</code>
4908      *          is equal to this <code>Character</code>; a value less than
4909      *          <code>0</code> if this <code>Character</code> is numerically less
4910      *          than the <code>Character</code> argument; and a value greater than
4911      *          <code>0</code> if this <code>Character</code> is numerically greater
4912      *          than the <code>Character</code> argument (unsigned comparison).
4913      *          Note that this is strictly a numerical comparison; it is not
4914      *          locale-dependent.
4915      * @since   1.2
4916      */
4917     public int compareTo(Character anotherCharacter) {
4918         return this.value - anotherCharacter.value;
4919     }
4920 
4921     /**
4922      * Converts the character (Unicode code point) argument to uppercase using
4923      * information from the UnicodeData file.
4924      * <p>
4925      *
4926      * @param   codePoint   the character (Unicode code point) to be converted.
4927      * @return  either the uppercase equivalent of the character, if
4928      *          any, or an error flag (<code>Character.ERROR</code>)
4929      *          that indicates that a 1:M <code>char</code> mapping exists.
4930      * @see     java.lang.Character#isLowerCase(char)
4931      * @see     java.lang.Character#isUpperCase(char)
4932      * @see     java.lang.Character#toLowerCase(char)
4933      * @see     java.lang.Character#toTitleCase(char)
4934      * @since 1.4
4935      */
4936     static int toUpperCaseEx(int codePoint) {
4937         assert isValidCodePoint(codePoint);
4938         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
4939     }
4940 
4941     /**
4942      * Converts the character (Unicode code point) argument to uppercase using case
4943      * mapping information from the SpecialCasing file in the Unicode
4944      * specification. If a character has no explicit uppercase
4945      * mapping, then the <code>char</code> itself is returned in the
4946      * <code>char[]</code>.
4947      *
4948      * @param   codePoint   the character (Unicode code point) to be converted.
4949      * @return a <code>char[]</code> with the uppercased character.
4950      * @since 1.4
4951      */
4952     static char[] toUpperCaseCharArray(int codePoint) {
4953         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
4954         assert isValidCodePoint(codePoint) &&
4955                !isSupplementaryCodePoint(codePoint);
4956         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
4957     }
4958 
4959     /**
4960      * The number of bits used to represent a <tt>char</tt> value in unsigned
4961      * binary form.
4962      *
4963      * @since 1.5
4964      */
4965     public static final int SIZE = 16;
4966 
4967     /**
4968      * Returns the value obtained by reversing the order of the bytes in the
4969      * specified <tt>char</tt> value.
4970      *
4971      * @return the value obtained by reversing (or, equivalently, swapping)
4972      *     the bytes in the specified <tt>char</tt> value.
4973      * @since 1.5
4974      */
4975     public static char reverseBytes(char ch) {
4976         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
4977     }
4978 }