1 /*
   2  * Copyright 2002-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 package java.lang;
  27 import java.util.Map;
  28 import java.util.HashMap;
  29 import java.util.Locale;
  30 
  31 /**
  32  * The <code>Character</code> class wraps a value of the primitive
  33  * type <code>char</code> in an object. An object of type
  34  * <code>Character</code> contains a single field whose type is
  35  * <code>char</code>.
  36  * <p>
  37  * In addition, this class provides several methods for determining
  38  * a character's category (lowercase letter, digit, etc.) and for converting
  39  * characters from uppercase to lowercase and vice versa.
  40  * <p>
  41  * Character information is based on the Unicode Standard, version 4.0.
  42  * <p>
  43  * The methods and data of class <code>Character</code> are defined by
  44  * the information in the <i>UnicodeData</i> file that is part of the
  45  * Unicode Character Database maintained by the Unicode
  46  * Consortium. This file specifies various properties including name
  47  * and general category for every defined Unicode code point or
  48  * character range.
  49  * <p>
  50  * The file and its description are available from the Unicode Consortium at:
  51  * <ul>
  52  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  53  * </ul>
  54  *
  55  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  56  *
  57  * <p>The <code>char</code> data type (and therefore the value that a
  58  * <code>Character</code> object encapsulates) are based on the
  59  * original Unicode specification, which defined characters as
  60  * fixed-width 16-bit entities. The Unicode standard has since been
  61  * changed to allow for characters whose representation requires more
  62  * than 16 bits.  The range of legal <em>code point</em>s is now
  63  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  64  * (Refer to the <a
  65  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  66  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  67  * standard.)
  68  *
  69  * <p>The set of characters from U+0000 to U+FFFF is sometimes
  70  * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
  71  * name="supplementary">Characters</a> whose code points are greater
  72  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  73  * 2 platform uses the UTF-16 representation in <code>char</code>
  74  * arrays and in the <code>String</code> and <code>StringBuffer</code>
  75  * classes. In this representation, supplementary characters are
  76  * represented as a pair of <code>char</code> values, the first from
  77  * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
  78  * second from the <em>low-surrogates</em> range
  79  * (&#92;uDC00-&#92;uDFFF).
  80  *
  81  * <p>A <code>char</code> value, therefore, represents Basic
  82  * Multilingual Plane (BMP) code points, including the surrogate
  83  * code points, or code units of the UTF-16 encoding. An
  84  * <code>int</code> value represents all Unicode code points,
  85  * including supplementary code points. The lower (least significant)
  86  * 21 bits of <code>int</code> are used to represent Unicode code
  87  * points and the upper (most significant) 11 bits must be zero.
  88  * Unless otherwise specified, the behavior with respect to
  89  * supplementary characters and surrogate <code>char</code> values is
  90  * as follows:
  91  *
  92  * <ul>
  93  * <li>The methods that only accept a <code>char</code> value cannot support
  94  * supplementary characters. They treat <code>char</code> values from the
  95  * surrogate ranges as undefined characters. For example,
  96  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
  97  * this specific value if followed by any low-surrogate value in a string
  98  * would represent a letter.
  99  *
 100  * <li>The methods that accept an <code>int</code> value support all
 101  * Unicode characters, including supplementary characters. For
 102  * example, <code>Character.isLetter(0x2F81A)</code> returns
 103  * <code>true</code> because the code point value represents a letter
 104  * (a CJK ideograph).
 105  * </ul>
 106  *
 107  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 108  * used for character values in the range between U+0000 and U+10FFFF,
 109  * and <em>Unicode code unit</em> is used for 16-bit
 110  * <code>char</code> values that are code units of the <em>UTF-16</em>
 111  * encoding. For more information on Unicode terminology, refer to the
 112  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 113  *
 114  * @author  Lee Boynton
 115  * @author  Guy Steele
 116  * @author  Akira Tanaka
 117  * @since   1.0
 118  */
 119 public final
 120 class Character extends Object implements java.io.Serializable, Comparable<Character> {
 121     /**
 122      * The minimum radix available for conversion to and from strings.
 123      * The constant value of this field is the smallest value permitted
 124      * for the radix argument in radix-conversion methods such as the
 125      * <code>digit</code> method, the <code>forDigit</code>
 126      * method, and the <code>toString</code> method of class
 127      * <code>Integer</code>.
 128      *
 129      * @see     java.lang.Character#digit(char, int)
 130      * @see     java.lang.Character#forDigit(int, int)
 131      * @see     java.lang.Integer#toString(int, int)
 132      * @see     java.lang.Integer#valueOf(java.lang.String)
 133      */
 134     public static final int MIN_RADIX = 2;
 135 
 136     /**
 137      * The maximum radix available for conversion to and from strings.
 138      * The constant value of this field is the largest value permitted
 139      * for the radix argument in radix-conversion methods such as the
 140      * <code>digit</code> method, the <code>forDigit</code>
 141      * method, and the <code>toString</code> method of class
 142      * <code>Integer</code>.
 143      *
 144      * @see     java.lang.Character#digit(char, int)
 145      * @see     java.lang.Character#forDigit(int, int)
 146      * @see     java.lang.Integer#toString(int, int)
 147      * @see     java.lang.Integer#valueOf(java.lang.String)
 148      */
 149     public static final int MAX_RADIX = 36;
 150 
 151     /**
 152      * The constant value of this field is the smallest value of type
 153      * <code>char</code>, <code>'&#92;u0000'</code>.
 154      *
 155      * @since   1.0.2
 156      */
 157     public static final char   MIN_VALUE = '\u0000';
 158 
 159     /**
 160      * The constant value of this field is the largest value of type
 161      * <code>char</code>, <code>'&#92;uFFFF'</code>.
 162      *
 163      * @since   1.0.2
 164      */
 165     public static final char   MAX_VALUE = '\uffff';
 166 
 167     /**
 168      * The <code>Class</code> instance representing the primitive type
 169      * <code>char</code>.
 170      *
 171      * @since   1.1
 172      */
 173     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 174 
 175    /*
 176     * Normative general types
 177     */
 178 
 179    /*
 180     * General character types
 181     */
 182 
 183    /**
 184     * General category "Cn" in the Unicode specification.
 185     * @since   1.1
 186     */
 187     public static final byte
 188         UNASSIGNED                  = 0;
 189 
 190    /**
 191     * General category "Lu" in the Unicode specification.
 192     * @since   1.1
 193     */
 194     public static final byte
 195         UPPERCASE_LETTER            = 1;
 196 
 197    /**
 198     * General category "Ll" in the Unicode specification.
 199     * @since   1.1
 200     */
 201     public static final byte
 202         LOWERCASE_LETTER            = 2;
 203 
 204    /**
 205     * General category "Lt" in the Unicode specification.
 206     * @since   1.1
 207     */
 208     public static final byte
 209         TITLECASE_LETTER            = 3;
 210 
 211    /**
 212     * General category "Lm" in the Unicode specification.
 213     * @since   1.1
 214     */
 215     public static final byte
 216         MODIFIER_LETTER             = 4;
 217 
 218    /**
 219     * General category "Lo" in the Unicode specification.
 220     * @since   1.1
 221     */
 222     public static final byte
 223         OTHER_LETTER                = 5;
 224 
 225    /**
 226     * General category "Mn" in the Unicode specification.
 227     * @since   1.1
 228     */
 229     public static final byte
 230         NON_SPACING_MARK            = 6;
 231 
 232    /**
 233     * General category "Me" in the Unicode specification.
 234     * @since   1.1
 235     */
 236     public static final byte
 237         ENCLOSING_MARK              = 7;
 238 
 239    /**
 240     * General category "Mc" in the Unicode specification.
 241     * @since   1.1
 242     */
 243     public static final byte
 244         COMBINING_SPACING_MARK      = 8;
 245 
 246    /**
 247     * General category "Nd" in the Unicode specification.
 248     * @since   1.1
 249     */
 250     public static final byte
 251         DECIMAL_DIGIT_NUMBER        = 9;
 252 
 253    /**
 254     * General category "Nl" in the Unicode specification.
 255     * @since   1.1
 256     */
 257     public static final byte
 258         LETTER_NUMBER               = 10;
 259 
 260    /**
 261     * General category "No" in the Unicode specification.
 262     * @since   1.1
 263     */
 264     public static final byte
 265         OTHER_NUMBER                = 11;
 266 
 267    /**
 268     * General category "Zs" in the Unicode specification.
 269     * @since   1.1
 270     */
 271     public static final byte
 272         SPACE_SEPARATOR             = 12;
 273 
 274    /**
 275     * General category "Zl" in the Unicode specification.
 276     * @since   1.1
 277     */
 278     public static final byte
 279         LINE_SEPARATOR              = 13;
 280 
 281    /**
 282     * General category "Zp" in the Unicode specification.
 283     * @since   1.1
 284     */
 285     public static final byte
 286         PARAGRAPH_SEPARATOR         = 14;
 287 
 288    /**
 289     * General category "Cc" in the Unicode specification.
 290     * @since   1.1
 291     */
 292     public static final byte
 293         CONTROL                     = 15;
 294 
 295    /**
 296     * General category "Cf" in the Unicode specification.
 297     * @since   1.1
 298     */
 299     public static final byte
 300         FORMAT                      = 16;
 301 
 302    /**
 303     * General category "Co" in the Unicode specification.
 304     * @since   1.1
 305     */
 306     public static final byte
 307         PRIVATE_USE                 = 18;
 308 
 309    /**
 310     * General category "Cs" in the Unicode specification.
 311     * @since   1.1
 312     */
 313     public static final byte
 314         SURROGATE                   = 19;
 315 
 316    /**
 317     * General category "Pd" in the Unicode specification.
 318     * @since   1.1
 319     */
 320     public static final byte
 321         DASH_PUNCTUATION            = 20;
 322 
 323    /**
 324     * General category "Ps" in the Unicode specification.
 325     * @since   1.1
 326     */
 327     public static final byte
 328         START_PUNCTUATION           = 21;
 329 
 330    /**
 331     * General category "Pe" in the Unicode specification.
 332     * @since   1.1
 333     */
 334     public static final byte
 335         END_PUNCTUATION             = 22;
 336 
 337    /**
 338     * General category "Pc" in the Unicode specification.
 339     * @since   1.1
 340     */
 341     public static final byte
 342         CONNECTOR_PUNCTUATION       = 23;
 343 
 344    /**
 345     * General category "Po" in the Unicode specification.
 346     * @since   1.1
 347     */
 348     public static final byte
 349         OTHER_PUNCTUATION           = 24;
 350 
 351    /**
 352     * General category "Sm" in the Unicode specification.
 353     * @since   1.1
 354     */
 355     public static final byte
 356         MATH_SYMBOL                 = 25;
 357 
 358    /**
 359     * General category "Sc" in the Unicode specification.
 360     * @since   1.1
 361     */
 362     public static final byte
 363         CURRENCY_SYMBOL             = 26;
 364 
 365    /**
 366     * General category "Sk" in the Unicode specification.
 367     * @since   1.1
 368     */
 369     public static final byte
 370         MODIFIER_SYMBOL             = 27;
 371 
 372    /**
 373     * General category "So" in the Unicode specification.
 374     * @since   1.1
 375     */
 376     public static final byte
 377         OTHER_SYMBOL                = 28;
 378 
 379    /**
 380     * General category "Pi" in the Unicode specification.
 381     * @since   1.4
 382     */
 383     public static final byte
 384         INITIAL_QUOTE_PUNCTUATION   = 29;
 385 
 386    /**
 387     * General category "Pf" in the Unicode specification.
 388     * @since   1.4
 389     */
 390     public static final byte
 391         FINAL_QUOTE_PUNCTUATION     = 30;
 392 
 393     /**
 394      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 395      */
 396      static final int ERROR = 0xFFFFFFFF;
 397 
 398 
 399     /**
 400      * Undefined bidirectional character type. Undefined <code>char</code>
 401      * values have undefined directionality in the Unicode specification.
 402      * @since 1.4
 403      */
 404      public static final byte DIRECTIONALITY_UNDEFINED = -1;
 405 
 406     /**
 407      * Strong bidirectional character type "L" in the Unicode specification.
 408      * @since 1.4
 409      */
 410     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 411 
 412     /**
 413      * Strong bidirectional character type "R" in the Unicode specification.
 414      * @since 1.4
 415      */
 416     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 417 
 418     /**
 419     * Strong bidirectional character type "AL" in the Unicode specification.
 420      * @since 1.4
 421      */
 422     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 423 
 424     /**
 425      * Weak bidirectional character type "EN" in the Unicode specification.
 426      * @since 1.4
 427      */
 428     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 429 
 430     /**
 431      * Weak bidirectional character type "ES" in the Unicode specification.
 432      * @since 1.4
 433      */
 434     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 435 
 436     /**
 437      * Weak bidirectional character type "ET" in the Unicode specification.
 438      * @since 1.4
 439      */
 440     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 441 
 442     /**
 443      * Weak bidirectional character type "AN" in the Unicode specification.
 444      * @since 1.4
 445      */
 446     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 447 
 448     /**
 449      * Weak bidirectional character type "CS" in the Unicode specification.
 450      * @since 1.4
 451      */
 452     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 453 
 454     /**
 455      * Weak bidirectional character type "NSM" in the Unicode specification.
 456      * @since 1.4
 457      */
 458     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 459 
 460     /**
 461      * Weak bidirectional character type "BN" in the Unicode specification.
 462      * @since 1.4
 463      */
 464     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 465 
 466     /**
 467      * Neutral bidirectional character type "B" in the Unicode specification.
 468      * @since 1.4
 469      */
 470     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 471 
 472     /**
 473      * Neutral bidirectional character type "S" in the Unicode specification.
 474      * @since 1.4
 475      */
 476     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 477 
 478     /**
 479      * Neutral bidirectional character type "WS" in the Unicode specification.
 480      * @since 1.4
 481      */
 482     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 483 
 484     /**
 485      * Neutral bidirectional character type "ON" in the Unicode specification.
 486      * @since 1.4
 487      */
 488     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 489 
 490     /**
 491      * Strong bidirectional character type "LRE" in the Unicode specification.
 492      * @since 1.4
 493      */
 494     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 495 
 496     /**
 497      * Strong bidirectional character type "LRO" in the Unicode specification.
 498      * @since 1.4
 499      */
 500     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 501 
 502     /**
 503      * Strong bidirectional character type "RLE" in the Unicode specification.
 504      * @since 1.4
 505      */
 506     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 507 
 508     /**
 509      * Strong bidirectional character type "RLO" in the Unicode specification.
 510      * @since 1.4
 511      */
 512     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 513 
 514     /**
 515      * Weak bidirectional character type "PDF" in the Unicode specification.
 516      * @since 1.4
 517      */
 518     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 519 
 520     /**
 521      * The minimum value of a Unicode high-surrogate code unit in the
 522      * UTF-16 encoding. A high-surrogate is also known as a
 523      * <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a Unicode high-surrogate code unit in the
 531      * UTF-16 encoding. A high-surrogate is also known as a
 532      * <i>leading-surrogate</i>.
 533      *
 534      * @since 1.5
 535      */
 536     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 537 
 538     /**
 539      * The minimum value of a Unicode low-surrogate code unit in the
 540      * UTF-16 encoding. A low-surrogate is also known as a
 541      * <i>trailing-surrogate</i>.
 542      *
 543      * @since 1.5
 544      */
 545     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 546 
 547     /**
 548      * The maximum value of a Unicode low-surrogate code unit in the
 549      * UTF-16 encoding. A low-surrogate is also known as a
 550      * <i>trailing-surrogate</i>.
 551      *
 552      * @since 1.5
 553      */
 554     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 555 
 556     /**
 557      * The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
 558      *
 559      * @since 1.5
 560      */
 561     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 562 
 563     /**
 564      * The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 569 
 570     /**
 571      * The minimum value of a supplementary code point.
 572      *
 573      * @since 1.5
 574      */
 575     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 576 
 577     /**
 578      * The minimum value of a Unicode code point.
 579      *
 580      * @since 1.5
 581      */
 582     public static final int MIN_CODE_POINT = 0x000000;
 583 
 584     /**
 585      * The maximum value of a Unicode code point.
 586      *
 587      * @since 1.5
 588      */
 589     public static final int MAX_CODE_POINT = 0x10ffff;
 590 
 591 
 592     /**
 593      * Instances of this class represent particular subsets of the Unicode
 594      * character set.  The only family of subsets defined in the
 595      * <code>Character</code> class is <code>{@link Character.UnicodeBlock
 596      * UnicodeBlock}</code>.  Other portions of the Java API may define other
 597      * subsets for their own purposes.
 598      *
 599      * @since 1.2
 600      */
 601     public static class Subset  {
 602 
 603         private String name;
 604 
 605         /**
 606          * Constructs a new <code>Subset</code> instance.
 607          *
 608          * @exception NullPointerException if name is <code>null</code>
 609          * @param  name  The name of this subset
 610          */
 611         protected Subset(String name) {
 612             if (name == null) {
 613                 throw new NullPointerException("name");
 614             }
 615             this.name = name;
 616         }
 617 
 618         /**
 619          * Compares two <code>Subset</code> objects for equality.
 620          * This method returns <code>true</code> if and only if
 621          * <code>this</code> and the argument refer to the same
 622          * object; since this method is <code>final</code>, this
 623          * guarantee holds for all subclasses.
 624          */
 625         public final boolean equals(Object obj) {
 626             return (this == obj);
 627         }
 628 
 629         /**
 630          * Returns the standard hash code as defined by the
 631          * <code>{@link Object#hashCode}</code> method.  This method
 632          * is <code>final</code> in order to ensure that the
 633          * <code>equals</code> and <code>hashCode</code> methods will
 634          * be consistent in all subclasses.
 635          */
 636         public final int hashCode() {
 637             return super.hashCode();
 638         }
 639 
 640         /**
 641          * Returns the name of this subset.
 642          */
 643         public final String toString() {
 644             return name;
 645         }
 646     }
 647 
 648     /**
 649      * A family of character subsets representing the character blocks in the
 650      * Unicode specification. Character blocks generally define characters
 651      * used for a specific script or purpose. A character is contained by
 652      * at most one Unicode block.
 653      *
 654      * @since 1.2
 655      */
 656     public static final class UnicodeBlock extends Subset {
 657 
 658         private static Map map = new HashMap();
 659 
 660         /**
 661          * Create a UnicodeBlock with the given identifier name.
 662          * This name must be the same as the block identifier.
 663          */
 664         private UnicodeBlock(String idName) {
 665             super(idName);
 666             map.put(idName.toUpperCase(Locale.US), this);
 667         }
 668 
 669         /**
 670          * Create a UnicodeBlock with the given identifier name and
 671          * alias name.
 672          */
 673         private UnicodeBlock(String idName, String alias) {
 674             this(idName);
 675             map.put(alias.toUpperCase(Locale.US), this);
 676         }
 677 
 678         /**
 679          * Create a UnicodeBlock with the given identifier name and
 680          * alias names.
 681          */
 682         private UnicodeBlock(String idName, String[] aliasName) {
 683             this(idName);
 684             if (aliasName != null) {
 685                 for(int x=0; x<aliasName.length; ++x) {
 686                     map.put(aliasName[x].toUpperCase(Locale.US), this);
 687                 }
 688             }
 689         }
 690 
 691         /**
 692          * Constant for the "Basic Latin" Unicode character block.
 693          * @since 1.2
 694          */
 695         public static final UnicodeBlock  BASIC_LATIN =
 696             new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" });
 697 
 698         /**
 699          * Constant for the "Latin-1 Supplement" Unicode character block.
 700          * @since 1.2
 701          */
 702         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 703             new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"});
 704 
 705         /**
 706          * Constant for the "Latin Extended-A" Unicode character block.
 707          * @since 1.2
 708          */
 709         public static final UnicodeBlock LATIN_EXTENDED_A =
 710             new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"});
 711 
 712         /**
 713          * Constant for the "Latin Extended-B" Unicode character block.
 714          * @since 1.2
 715          */
 716         public static final UnicodeBlock LATIN_EXTENDED_B =
 717             new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"});
 718 
 719         /**
 720          * Constant for the "IPA Extensions" Unicode character block.
 721          * @since 1.2
 722          */
 723         public static final UnicodeBlock IPA_EXTENSIONS =
 724             new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"});
 725 
 726         /**
 727          * Constant for the "Spacing Modifier Letters" Unicode character block.
 728          * @since 1.2
 729          */
 730         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 731             new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters",
 732                                                                         "SpacingModifierLetters"});
 733 
 734         /**
 735          * Constant for the "Combining Diacritical Marks" Unicode character block.
 736          * @since 1.2
 737          */
 738         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 739             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks",
 740                                                                           "CombiningDiacriticalMarks" });
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK
 750             = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"});
 751 
 752         /**
 753          * Constant for the "Cyrillic" Unicode character block.
 754          * @since 1.2
 755          */
 756         public static final UnicodeBlock CYRILLIC =
 757             new UnicodeBlock("CYRILLIC");
 758 
 759         /**
 760          * Constant for the "Armenian" Unicode character block.
 761          * @since 1.2
 762          */
 763         public static final UnicodeBlock ARMENIAN =
 764             new UnicodeBlock("ARMENIAN");
 765 
 766         /**
 767          * Constant for the "Hebrew" Unicode character block.
 768          * @since 1.2
 769          */
 770         public static final UnicodeBlock HEBREW =
 771             new UnicodeBlock("HEBREW");
 772 
 773         /**
 774          * Constant for the "Arabic" Unicode character block.
 775          * @since 1.2
 776          */
 777         public static final UnicodeBlock ARABIC =
 778             new UnicodeBlock("ARABIC");
 779 
 780         /**
 781          * Constant for the "Devanagari" Unicode character block.
 782          * @since 1.2
 783          */
 784         public static final UnicodeBlock DEVANAGARI =
 785             new UnicodeBlock("DEVANAGARI");
 786 
 787         /**
 788          * Constant for the "Bengali" Unicode character block.
 789          * @since 1.2
 790          */
 791         public static final UnicodeBlock BENGALI =
 792             new UnicodeBlock("BENGALI");
 793 
 794         /**
 795          * Constant for the "Gurmukhi" Unicode character block.
 796          * @since 1.2
 797          */
 798         public static final UnicodeBlock GURMUKHI =
 799             new UnicodeBlock("GURMUKHI");
 800 
 801         /**
 802          * Constant for the "Gujarati" Unicode character block.
 803          * @since 1.2
 804          */
 805         public static final UnicodeBlock GUJARATI =
 806             new UnicodeBlock("GUJARATI");
 807 
 808         /**
 809          * Constant for the "Oriya" Unicode character block.
 810          * @since 1.2
 811          */
 812         public static final UnicodeBlock ORIYA =
 813             new UnicodeBlock("ORIYA");
 814 
 815         /**
 816          * Constant for the "Tamil" Unicode character block.
 817          * @since 1.2
 818          */
 819         public static final UnicodeBlock TAMIL =
 820             new UnicodeBlock("TAMIL");
 821 
 822         /**
 823          * Constant for the "Telugu" Unicode character block.
 824          * @since 1.2
 825          */
 826         public static final UnicodeBlock TELUGU =
 827             new UnicodeBlock("TELUGU");
 828 
 829         /**
 830          * Constant for the "Kannada" Unicode character block.
 831          * @since 1.2
 832          */
 833         public static final UnicodeBlock KANNADA =
 834             new UnicodeBlock("KANNADA");
 835 
 836         /**
 837          * Constant for the "Malayalam" Unicode character block.
 838          * @since 1.2
 839          */
 840         public static final UnicodeBlock MALAYALAM =
 841             new UnicodeBlock("MALAYALAM");
 842 
 843         /**
 844          * Constant for the "Thai" Unicode character block.
 845          * @since 1.2
 846          */
 847         public static final UnicodeBlock THAI =
 848             new UnicodeBlock("THAI");
 849 
 850         /**
 851          * Constant for the "Lao" Unicode character block.
 852          * @since 1.2
 853          */
 854         public static final UnicodeBlock LAO =
 855             new UnicodeBlock("LAO");
 856 
 857         /**
 858          * Constant for the "Tibetan" Unicode character block.
 859          * @since 1.2
 860          */
 861         public static final UnicodeBlock TIBETAN =
 862             new UnicodeBlock("TIBETAN");
 863 
 864         /**
 865          * Constant for the "Georgian" Unicode character block.
 866          * @since 1.2
 867          */
 868         public static final UnicodeBlock GEORGIAN =
 869             new UnicodeBlock("GEORGIAN");
 870 
 871         /**
 872          * Constant for the "Hangul Jamo" Unicode character block.
 873          * @since 1.2
 874          */
 875         public static final UnicodeBlock HANGUL_JAMO =
 876             new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"});
 877 
 878         /**
 879          * Constant for the "Latin Extended Additional" Unicode character block.
 880          * @since 1.2
 881          */
 882         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 883             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional",
 884                                                                         "LatinExtendedAdditional"});
 885 
 886         /**
 887          * Constant for the "Greek Extended" Unicode character block.
 888          * @since 1.2
 889          */
 890         public static final UnicodeBlock GREEK_EXTENDED =
 891             new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"});
 892 
 893         /**
 894          * Constant for the "General Punctuation" Unicode character block.
 895          * @since 1.2
 896          */
 897         public static final UnicodeBlock GENERAL_PUNCTUATION =
 898             new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"});
 899 
 900         /**
 901          * Constant for the "Superscripts and Subscripts" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 905             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts",
 906                                                                           "SuperscriptsandSubscripts" });
 907 
 908         /**
 909          * Constant for the "Currency Symbols" Unicode character block.
 910          * @since 1.2
 911          */
 912         public static final UnicodeBlock CURRENCY_SYMBOLS =
 913             new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"});
 914 
 915         /**
 916          * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block.
 917          * <p>
 918          * This block was previously known as "Combining Marks for Symbols".
 919          * @since 1.2
 920          */
 921         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 922             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols",
 923                                                                                                                                                    "CombiningDiacriticalMarksforSymbols",
 924                                                                            "Combining Marks for Symbols",
 925                                                                            "CombiningMarksforSymbols" });
 926 
 927         /**
 928          * Constant for the "Letterlike Symbols" Unicode character block.
 929          * @since 1.2
 930          */
 931         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 932             new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"});
 933 
 934         /**
 935          * Constant for the "Number Forms" Unicode character block.
 936          * @since 1.2
 937          */
 938         public static final UnicodeBlock NUMBER_FORMS =
 939             new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"});
 940 
 941         /**
 942          * Constant for the "Arrows" Unicode character block.
 943          * @since 1.2
 944          */
 945         public static final UnicodeBlock ARROWS =
 946             new UnicodeBlock("ARROWS");
 947 
 948         /**
 949          * Constant for the "Mathematical Operators" Unicode character block.
 950          * @since 1.2
 951          */
 952         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 953             new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators",
 954                                                                      "MathematicalOperators"});
 955 
 956         /**
 957          * Constant for the "Miscellaneous Technical" Unicode character block.
 958          * @since 1.2
 959          */
 960         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 961             new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical",
 962                                                                       "MiscellaneousTechnical"});
 963 
 964         /**
 965          * Constant for the "Control Pictures" Unicode character block.
 966          * @since 1.2
 967          */
 968         public static final UnicodeBlock CONTROL_PICTURES =
 969             new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"});
 970 
 971         /**
 972          * Constant for the "Optical Character Recognition" Unicode character block.
 973          * @since 1.2
 974          */
 975         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 976             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition",
 977                                                                             "OpticalCharacterRecognition"});
 978 
 979         /**
 980          * Constant for the "Enclosed Alphanumerics" Unicode character block.
 981          * @since 1.2
 982          */
 983         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
 984             new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics",
 985                                                                      "EnclosedAlphanumerics"});
 986 
 987         /**
 988          * Constant for the "Box Drawing" Unicode character block.
 989          * @since 1.2
 990          */
 991         public static final UnicodeBlock BOX_DRAWING =
 992             new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"});
 993 
 994         /**
 995          * Constant for the "Block Elements" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock BLOCK_ELEMENTS =
 999             new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"});
1000 
1001         /**
1002          * Constant for the "Geometric Shapes" Unicode character block.
1003          * @since 1.2
1004          */
1005         public static final UnicodeBlock GEOMETRIC_SHAPES =
1006             new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"});
1007 
1008         /**
1009          * Constant for the "Miscellaneous Symbols" Unicode character block.
1010          * @since 1.2
1011          */
1012         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1013             new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols",
1014                                                                     "MiscellaneousSymbols"});
1015 
1016         /**
1017          * Constant for the "Dingbats" Unicode character block.
1018          * @since 1.2
1019          */
1020         public static final UnicodeBlock DINGBATS =
1021             new UnicodeBlock("DINGBATS");
1022 
1023         /**
1024          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1025          * @since 1.2
1026          */
1027         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1028             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation",
1029                                                                           "CJKSymbolsandPunctuation"});
1030 
1031         /**
1032          * Constant for the "Hiragana" Unicode character block.
1033          * @since 1.2
1034          */
1035         public static final UnicodeBlock HIRAGANA =
1036             new UnicodeBlock("HIRAGANA");
1037 
1038         /**
1039          * Constant for the "Katakana" Unicode character block.
1040          * @since 1.2
1041          */
1042         public static final UnicodeBlock KATAKANA =
1043             new UnicodeBlock("KATAKANA");
1044 
1045         /**
1046          * Constant for the "Bopomofo" Unicode character block.
1047          * @since 1.2
1048          */
1049         public static final UnicodeBlock BOPOMOFO =
1050             new UnicodeBlock("BOPOMOFO");
1051 
1052         /**
1053          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1054          * @since 1.2
1055          */
1056         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1057             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo",
1058                                                                         "HangulCompatibilityJamo"});
1059 
1060         /**
1061          * Constant for the "Kanbun" Unicode character block.
1062          * @since 1.2
1063          */
1064         public static final UnicodeBlock KANBUN =
1065             new UnicodeBlock("KANBUN");
1066 
1067         /**
1068          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1069          * @since 1.2
1070          */
1071         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1072             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months",
1073                                                                               "EnclosedCJKLettersandMonths"});
1074 
1075         /**
1076          * Constant for the "CJK Compatibility" Unicode character block.
1077          * @since 1.2
1078          */
1079         public static final UnicodeBlock CJK_COMPATIBILITY =
1080             new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"});
1081 
1082         /**
1083          * Constant for the "CJK Unified Ideographs" Unicode character block.
1084          * @since 1.2
1085          */
1086         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1087             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs",
1088                                                                      "CJKUnifiedIdeographs"});
1089 
1090         /**
1091          * Constant for the "Hangul Syllables" Unicode character block.
1092          * @since 1.2
1093          */
1094         public static final UnicodeBlock HANGUL_SYLLABLES =
1095             new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"});
1096 
1097         /**
1098          * Constant for the "Private Use Area" Unicode character block.
1099          * @since 1.2
1100          */
1101         public static final UnicodeBlock PRIVATE_USE_AREA =
1102             new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"});
1103 
1104         /**
1105          * Constant for the "CJK Compatibility Ideographs" Unicode character block.
1106          * @since 1.2
1107          */
1108         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1109             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1110                              new String[] {"CJK Compatibility Ideographs",
1111                                            "CJKCompatibilityIdeographs"});
1112 
1113         /**
1114          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1115          * @since 1.2
1116          */
1117         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1118             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms",
1119                                                                             "AlphabeticPresentationForms"});
1120 
1121         /**
1122          * Constant for the "Arabic Presentation Forms-A" Unicode character block.
1123          * @since 1.2
1124          */
1125         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1126             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A",
1127                                                                           "ArabicPresentationForms-A"});
1128 
1129         /**
1130          * Constant for the "Combining Half Marks" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock COMBINING_HALF_MARKS =
1134             new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks",
1135                                                                    "CombiningHalfMarks"});
1136 
1137         /**
1138          * Constant for the "CJK Compatibility Forms" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1142             new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms",
1143                                                                       "CJKCompatibilityForms"});
1144 
1145         /**
1146          * Constant for the "Small Form Variants" Unicode character block.
1147          * @since 1.2
1148          */
1149         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1150             new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants",
1151                                                                   "SmallFormVariants"});
1152 
1153         /**
1154          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1155          * @since 1.2
1156          */
1157         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1158             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B",
1159                                                                           "ArabicPresentationForms-B"});
1160 
1161         /**
1162          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block.
1163          * @since 1.2
1164          */
1165         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1166             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1167                              new String[] {"Halfwidth and Fullwidth Forms",
1168                                            "HalfwidthandFullwidthForms"});
1169 
1170         /**
1171          * Constant for the "Specials" Unicode character block.
1172          * @since 1.2
1173          */
1174         public static final UnicodeBlock SPECIALS =
1175             new UnicodeBlock("SPECIALS");
1176 
1177         /**
1178          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1179          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1180          *             {@link #LOW_SURROGATES}. These new constants match
1181          *             the block definitions of the Unicode Standard.
1182          *             The {@link #of(char)} and {@link #of(int)} methods
1183          *             return the new constants, not SURROGATES_AREA.
1184          */
1185         @Deprecated
1186         public static final UnicodeBlock SURROGATES_AREA =
1187             new UnicodeBlock("SURROGATES_AREA");
1188 
1189         /**
1190          * Constant for the "Syriac" Unicode character block.
1191          * @since 1.4
1192          */
1193         public static final UnicodeBlock SYRIAC =
1194             new UnicodeBlock("SYRIAC");
1195 
1196         /**
1197          * Constant for the "Thaana" Unicode character block.
1198          * @since 1.4
1199          */
1200         public static final UnicodeBlock THAANA =
1201             new UnicodeBlock("THAANA");
1202 
1203         /**
1204          * Constant for the "Sinhala" Unicode character block.
1205          * @since 1.4
1206          */
1207         public static final UnicodeBlock SINHALA =
1208             new UnicodeBlock("SINHALA");
1209 
1210         /**
1211          * Constant for the "Myanmar" Unicode character block.
1212          * @since 1.4
1213          */
1214         public static final UnicodeBlock MYANMAR =
1215             new UnicodeBlock("MYANMAR");
1216 
1217         /**
1218          * Constant for the "Ethiopic" Unicode character block.
1219          * @since 1.4
1220          */
1221         public static final UnicodeBlock ETHIOPIC =
1222             new UnicodeBlock("ETHIOPIC");
1223 
1224         /**
1225          * Constant for the "Cherokee" Unicode character block.
1226          * @since 1.4
1227          */
1228         public static final UnicodeBlock CHEROKEE =
1229             new UnicodeBlock("CHEROKEE");
1230 
1231         /**
1232          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1233          * @since 1.4
1234          */
1235         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1236             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1237                              new String[] {"Unified Canadian Aboriginal Syllabics",
1238                                            "UnifiedCanadianAboriginalSyllabics"});
1239 
1240         /**
1241          * Constant for the "Ogham" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock OGHAM =
1245                              new UnicodeBlock("OGHAM");
1246 
1247         /**
1248          * Constant for the "Runic" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock RUNIC =
1252                              new UnicodeBlock("RUNIC");
1253 
1254         /**
1255          * Constant for the "Khmer" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock KHMER =
1259                              new UnicodeBlock("KHMER");
1260 
1261         /**
1262          * Constant for the "Mongolian" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MONGOLIAN =
1266                              new UnicodeBlock("MONGOLIAN");
1267 
1268         /**
1269          * Constant for the "Braille Patterns" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock BRAILLE_PATTERNS =
1273             new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns",
1274                                                                "BraillePatterns"});
1275 
1276         /**
1277          * Constant for the "CJK Radicals Supplement" Unicode character block.
1278          * @since 1.4
1279          */
1280         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1281              new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement",
1282                                                                        "CJKRadicalsSupplement"});
1283 
1284         /**
1285          * Constant for the "Kangxi Radicals" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock KANGXI_RADICALS =
1289             new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"});
1290 
1291         /**
1292          * Constant for the "Ideographic Description Characters" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1296             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters",
1297                                                                                  "IdeographicDescriptionCharacters"});
1298 
1299         /**
1300          * Constant for the "Bopomofo Extended" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1304             new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended",
1305                                                                 "BopomofoExtended"});
1306 
1307         /**
1308          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1309          * @since 1.4
1310          */
1311         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1312             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A",
1313                                                                                  "CJKUnifiedIdeographsExtensionA"});
1314 
1315         /**
1316          * Constant for the "Yi Syllables" Unicode character block.
1317          * @since 1.4
1318          */
1319         public static final UnicodeBlock YI_SYLLABLES =
1320             new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"});
1321 
1322         /**
1323          * Constant for the "Yi Radicals" Unicode character block.
1324          * @since 1.4
1325          */
1326         public static final UnicodeBlock YI_RADICALS =
1327             new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"});
1328 
1329 
1330         /**
1331          * Constant for the "Cyrillic Supplementary" Unicode character block.
1332          * @since 1.5
1333          */
1334         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1335             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1336                              new String[] {"Cyrillic Supplementary",
1337                                            "CyrillicSupplementary",
1338                                            "Cyrillic Supplement",
1339                                            "CyrillicSupplement"});
1340 
1341         /**
1342          * Constant for the "Tagalog" Unicode character block.
1343          * @since 1.5
1344          */
1345         public static final UnicodeBlock TAGALOG =
1346             new UnicodeBlock("TAGALOG");
1347 
1348         /**
1349          * Constant for the "Hanunoo" Unicode character block.
1350          * @since 1.5
1351          */
1352         public static final UnicodeBlock HANUNOO =
1353             new UnicodeBlock("HANUNOO");
1354 
1355         /**
1356          * Constant for the "Buhid" Unicode character block.
1357          * @since 1.5
1358          */
1359         public static final UnicodeBlock BUHID =
1360             new UnicodeBlock("BUHID");
1361 
1362         /**
1363          * Constant for the "Tagbanwa" Unicode character block.
1364          * @since 1.5
1365          */
1366         public static final UnicodeBlock TAGBANWA =
1367             new UnicodeBlock("TAGBANWA");
1368 
1369         /**
1370          * Constant for the "Limbu" Unicode character block.
1371          * @since 1.5
1372          */
1373         public static final UnicodeBlock LIMBU =
1374             new UnicodeBlock("LIMBU");
1375 
1376         /**
1377          * Constant for the "Tai Le" Unicode character block.
1378          * @since 1.5
1379          */
1380         public static final UnicodeBlock TAI_LE =
1381             new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"});
1382 
1383         /**
1384          * Constant for the "Khmer Symbols" Unicode character block.
1385          * @since 1.5
1386          */
1387         public static final UnicodeBlock KHMER_SYMBOLS =
1388             new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"});
1389 
1390         /**
1391          * Constant for the "Phonetic Extensions" Unicode character block.
1392          * @since 1.5
1393          */
1394         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1395             new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"});
1396 
1397         /**
1398          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1399          * @since 1.5
1400          */
1401         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1402             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1403                              new String[]{"Miscellaneous Mathematical Symbols-A",
1404                                           "MiscellaneousMathematicalSymbols-A"});
1405 
1406         /**
1407          * Constant for the "Supplemental Arrows-A" Unicode character block.
1408          * @since 1.5
1409          */
1410         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1411             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A",
1412                                                                     "SupplementalArrows-A"});
1413 
1414         /**
1415          * Constant for the "Supplemental Arrows-B" Unicode character block.
1416          * @since 1.5
1417          */
1418         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1419             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B",
1420                                                                     "SupplementalArrows-B"});
1421 
1422         /**
1423          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block.
1424          * @since 1.5
1425          */
1426         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1427                 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1428                                    new String[] {"Miscellaneous Mathematical Symbols-B",
1429                                                  "MiscellaneousMathematicalSymbols-B"});
1430 
1431         /**
1432          * Constant for the "Supplemental Mathematical Operators" Unicode character block.
1433          * @since 1.5
1434          */
1435         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1436             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1437                              new String[]{"Supplemental Mathematical Operators",
1438                                           "SupplementalMathematicalOperators"} );
1439 
1440         /**
1441          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block.
1442          * @since 1.5
1443          */
1444         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1445             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows",
1446                                                                                "MiscellaneousSymbolsandArrows"});
1447 
1448         /**
1449          * Constant for the "Katakana Phonetic Extensions" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1453             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions",
1454                                                                            "KatakanaPhoneticExtensions"});
1455 
1456         /**
1457          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1458          * @since 1.5
1459          */
1460         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1461             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols",
1462                                                                       "YijingHexagramSymbols"});
1463 
1464         /**
1465          * Constant for the "Variation Selectors" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock VARIATION_SELECTORS =
1469             new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"});
1470 
1471         /**
1472          * Constant for the "Linear B Syllabary" Unicode character block.
1473          * @since 1.5
1474          */
1475         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1476             new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"});
1477 
1478         /**
1479          * Constant for the "Linear B Ideograms" Unicode character block.
1480          * @since 1.5
1481          */
1482         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1483             new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"});
1484 
1485         /**
1486          * Constant for the "Aegean Numbers" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock AEGEAN_NUMBERS =
1490             new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"});
1491 
1492         /**
1493          * Constant for the "Old Italic" Unicode character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock OLD_ITALIC =
1497             new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"});
1498 
1499         /**
1500          * Constant for the "Gothic" Unicode character block.
1501          * @since 1.5
1502          */
1503         public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC");
1504 
1505         /**
1506          * Constant for the "Ugaritic" Unicode character block.
1507          * @since 1.5
1508          */
1509         public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC");
1510 
1511         /**
1512          * Constant for the "Deseret" Unicode character block.
1513          * @since 1.5
1514          */
1515         public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET");
1516 
1517         /**
1518          * Constant for the "Shavian" Unicode character block.
1519          * @since 1.5
1520          */
1521         public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN");
1522 
1523         /**
1524          * Constant for the "Osmanya" Unicode character block.
1525          * @since 1.5
1526          */
1527         public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA");
1528 
1529         /**
1530          * Constant for the "Cypriot Syllabary" Unicode character block.
1531          * @since 1.5
1532          */
1533         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1534             new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"});
1535 
1536         /**
1537          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1538          * @since 1.5
1539          */
1540         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1541             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols",
1542                                                                         "ByzantineMusicalSymbols"});
1543 
1544         /**
1545          * Constant for the "Musical Symbols" Unicode character block.
1546          * @since 1.5
1547          */
1548         public static final UnicodeBlock MUSICAL_SYMBOLS =
1549             new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"});
1550 
1551         /**
1552          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1553          * @since 1.5
1554          */
1555         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1556             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols",
1557                                                                      "TaiXuanJingSymbols"});
1558 
1559         /**
1560          * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block.
1561          * @since 1.5
1562          */
1563         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1564             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1565                              new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"});
1566 
1567         /**
1568          * Constant for the "CJK Unified Ideographs Extension B" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1572             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1573                              new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"});
1574 
1575         /**
1576          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1577          * @since 1.5
1578          */
1579         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1580             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1581                              new String[]{"CJK Compatibility Ideographs Supplement",
1582                                           "CJKCompatibilityIdeographsSupplement"});
1583 
1584         /**
1585          * Constant for the "Tags" Unicode character block.
1586          * @since 1.5
1587          */
1588         public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS");
1589 
1590         /**
1591          * Constant for the "Variation Selectors Supplement" Unicode character block.
1592          * @since 1.5
1593          */
1594         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1595             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement",
1596                                                                              "VariationSelectorsSupplement"});
1597 
1598         /**
1599          * Constant for the "Supplementary Private Use Area-A" Unicode character block.
1600          * @since 1.5
1601          */
1602         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1603             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1604                              new String[] {"Supplementary Private Use Area-A",
1605                                            "SupplementaryPrivateUseArea-A"});
1606 
1607         /**
1608          * Constant for the "Supplementary Private Use Area-B" Unicode character block.
1609          * @since 1.5
1610          */
1611         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1612             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1613                              new String[] {"Supplementary Private Use Area-B",
1614                                            "SupplementaryPrivateUseArea-B"});
1615 
1616         /**
1617          * Constant for the "High Surrogates" Unicode character block.
1618          * This block represents codepoint values in the high surrogate
1619          * range: 0xD800 through 0xDB7F
1620          *
1621          * @since 1.5
1622          */
1623         public static final UnicodeBlock HIGH_SURROGATES =
1624             new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"});
1625 
1626         /**
1627          * Constant for the "High Private Use Surrogates" Unicode character block.
1628          * This block represents codepoint values in the high surrogate
1629          * range: 0xDB80 through 0xDBFF
1630          *
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1634             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates",
1635                                                                            "HighPrivateUseSurrogates"});
1636 
1637         /**
1638          * Constant for the "Low Surrogates" Unicode character block.
1639          * This block represents codepoint values in the high surrogate
1640          * range: 0xDC00 through 0xDFFF
1641          *
1642          * @since 1.5
1643          */
1644         public static final UnicodeBlock LOW_SURROGATES =
1645             new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"});
1646 
1647         /**
1648          * Constant for the "Arabic Supplement" Unicode character block.
1649          * @since 1.7
1650          */
1651         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1652             new UnicodeBlock("ARABIC_SUPPLEMENT",
1653                              new String[] { "Arabic Supplement",
1654                                             "ArabicSupplement"});
1655 
1656         /**
1657          * Constant for the "NKo" Unicode character block.
1658          * @since 1.7
1659          */
1660         public static final UnicodeBlock NKO = new UnicodeBlock("NKO");
1661 
1662         /**
1663          * Constant for the "Ethiopic Supplement" Unicode character block.
1664          * @since 1.7
1665          */
1666         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1667             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1668                              new String[] { "Ethiopic Supplement",
1669                                             "EthiopicSupplement"});
1670 
1671         /**
1672          * Constant for the "New Tai Lue" Unicode character block.
1673          * @since 1.7
1674          */
1675         public static final UnicodeBlock NEW_TAI_LUE =
1676             new UnicodeBlock("NEW_TAI_LUE",
1677                              new String[] { "New Tai Lue",
1678                                             "NewTaiLue"});
1679 
1680         /**
1681          * Constant for the "Buginese" Unicode character block.
1682          * @since 1.7
1683          */
1684         public static final UnicodeBlock BUGINESE =
1685             new UnicodeBlock("BUGINESE");
1686 
1687         /**
1688          * Constant for the "Balinese" Unicode character block.
1689          * @since 1.7
1690          */
1691         public static final UnicodeBlock BALINESE =
1692             new UnicodeBlock("BALINESE");
1693 
1694         /**
1695          * Constant for the "Sundanese" Unicode character block.
1696          * @since 1.7
1697          */
1698         public static final UnicodeBlock SUNDANESE =
1699             new UnicodeBlock("SUNDANESE");
1700 
1701         /**
1702          * Constant for the "Lepcha" Unicode character block.
1703          * @since 1.7
1704          */
1705         public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA");
1706 
1707         /**
1708          * Constant for the "Ol Chiki" Unicode character block.
1709          * @since 1.7
1710          */
1711         public static final UnicodeBlock OL_CHIKI =
1712             new UnicodeBlock("OL_CHIKI",
1713                              new String[] { "Ol Chiki",
1714                                             "OlChiki"});
1715 
1716         /**
1717          * Constant for the "Phonetic Extensions Supplement" Unicode character
1718          * block.
1719          * @since 1.7
1720          */
1721         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1722             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1723                              new String[] { "Phonetic Extensions Supplement",
1724                                             "PhoneticExtensionsSupplement"});
1725 
1726         /**
1727          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1728          * character block.
1729          * @since 1.7
1730          */
1731         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1732             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1733                              new String[] { "Combining Diacritical Marks Supplement",
1734                                             "CombiningDiacriticalMarksSupplement"});
1735 
1736         /**
1737          * Constant for the "Glagolitic" Unicode character block.
1738          * @since 1.7
1739          */
1740         public static final UnicodeBlock GLAGOLITIC =
1741             new UnicodeBlock("GLAGOLITIC");
1742 
1743         /**
1744          * Constant for the "Latin Extended-C" Unicode character block.
1745          * @since 1.7
1746          */
1747         public static final UnicodeBlock LATIN_EXTENDED_C =
1748             new UnicodeBlock("LATIN_EXTENDED_C",
1749                              new String[] { "Latin Extended-C",
1750                                             "LatinExtended-C"});
1751 
1752         /**
1753          * Constant for the "Coptic" Unicode character block.
1754          * @since 1.7
1755          */
1756         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC");
1757 
1758         /**
1759          * Constant for the "Georgian Supplement" Unicode character block.
1760          * @since 1.7
1761          */
1762         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1763             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1764                              new String[] { "Georgian Supplement",
1765                                             "GeorgianSupplement"});
1766 
1767         /**
1768          * Constant for the "Tifinagh" Unicode character block.
1769          * @since 1.7
1770          */
1771         public static final UnicodeBlock TIFINAGH =
1772             new UnicodeBlock("TIFINAGH");
1773 
1774         /**
1775          * Constant for the "Ethiopic Extended" Unicode character block.
1776          * @since 1.7
1777          */
1778         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1779             new UnicodeBlock("ETHIOPIC_EXTENDED",
1780                              new String[] { "Ethiopic Extended",
1781                                             "EthiopicExtended"});
1782 
1783         /**
1784          * Constant for the "Cyrillic Extended-A" Unicode character block.
1785          * @since 1.7
1786          */
1787         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1788             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1789                              new String[] { "Cyrillic Extended-A",
1790                                             "CyrillicExtended-A"});
1791 
1792         /**
1793          * Constant for the "Supplemental Punctuation" Unicode character block.
1794          * @since 1.7
1795          */
1796         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1797             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1798                              new String[] { "Supplemental Punctuation",
1799                                             "SupplementalPunctuation"});
1800 
1801         /**
1802          * Constant for the "CJK Strokes" Unicode character block.
1803          * @since 1.7
1804          */
1805         public static final UnicodeBlock CJK_STROKES =
1806             new UnicodeBlock("CJK_STROKES",
1807                              new String[] { "CJK Strokes",
1808                                             "CJKStrokes"});
1809 
1810         /**
1811          * Constant for the "Vai" Unicode character block.
1812          * @since 1.7
1813          */
1814         public static final UnicodeBlock VAI = new UnicodeBlock("VAI");
1815 
1816         /**
1817          * Constant for the "Cyrillic Extended-B" Unicode character block.
1818          * @since 1.7
1819          */
1820         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1821             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1822                              new String[] { "Cyrillic Extended-B",
1823                                             "CyrillicExtended-B"});
1824 
1825         /**
1826          * Constant for the "Modifier Tone Letters" Unicode character block.
1827          * @since 1.7
1828          */
1829         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1830             new UnicodeBlock("MODIFIER_TONE_LETTERS",
1831                              new String[] { "Modifier Tone Letters",
1832                                             "ModifierToneLetters"});
1833 
1834         /**
1835          * Constant for the "Latin Extended-D" Unicode character block.
1836          * @since 1.7
1837          */
1838         public static final UnicodeBlock LATIN_EXTENDED_D =
1839             new UnicodeBlock("LATIN_EXTENDED_D",
1840                              new String[] { "Latin Extended-D",
1841                                             "LatinExtended-D"});
1842 
1843         /**
1844          * Constant for the "Syloti Nagri" Unicode character block.
1845          * @since 1.7
1846          */
1847         public static final UnicodeBlock SYLOTI_NAGRI =
1848             new UnicodeBlock("SYLOTI_NAGRI",
1849                              new String[] { "Syloti Nagri",
1850                                             "SylotiNagri"});
1851 
1852         /**
1853          * Constant for the "Phags-pa" Unicode character block.
1854          * @since 1.7
1855          */
1856         public static final UnicodeBlock PHAGS_PA =
1857             new UnicodeBlock("PHAGS_PA", new String[] { "Phags-pa"});
1858 
1859         /**
1860          * Constant for the "Saurashtra" Unicode character block.
1861          * @since 1.7
1862          */
1863         public static final UnicodeBlock SAURASHTRA =
1864             new UnicodeBlock("SAURASHTRA");
1865 
1866         /**
1867          * Constant for the "Kayah Li" Unicode character block.
1868          * @since 1.7
1869          */
1870         public static final UnicodeBlock KAYAH_LI =
1871             new UnicodeBlock("KAYAH_LI",
1872                              new String[] { "Kayah Li",
1873                                             "KayahLi"});
1874 
1875         /**
1876          * Constant for the "Rejang" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG");
1880 
1881         /**
1882          * Constant for the "Cham" Unicode character block.
1883          * @since 1.7
1884          */
1885         public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM");
1886 
1887         /**
1888          * Constant for the "Vertical Forms" Unicode character block.
1889          * @since 1.7
1890          */
1891         public static final UnicodeBlock VERTICAL_FORMS =
1892             new UnicodeBlock("VERTICAL_FORMS",
1893                              new String[] { "Vertical Forms",
1894                                             "VerticalForms"});
1895 
1896         /**
1897          * Constant for the "Ancient Greek Numbers" Unicode character block.
1898          * @since 1.7
1899          */
1900         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1901             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
1902                              new String[] { "Ancient Greek Numbers",
1903                                             "AncientGreekNumbers"});
1904 
1905         /**
1906          * Constant for the "Ancient Symbols" Unicode character block.
1907          * @since 1.7
1908          */
1909         public static final UnicodeBlock ANCIENT_SYMBOLS =
1910             new UnicodeBlock("ANCIENT_SYMBOLS",
1911                              new String[] { "Ancient Symbols",
1912                                             "AncientSymbols"});
1913 
1914         /**
1915          * Constant for the "Phaistos Disc" Unicode character block.
1916          * @since 1.7
1917          */
1918         public static final UnicodeBlock PHAISTOS_DISC =
1919             new UnicodeBlock("PHAISTOS_DISC",
1920                              new String[] { "Phaistos Disc",
1921                                             "PhaistosDisc"});
1922 
1923         /**
1924          * Constant for the "Lycian" Unicode character block.
1925          * @since 1.7
1926          */
1927         public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN");
1928 
1929         /**
1930          * Constant for the "Carian" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN");
1934 
1935         /**
1936          * Constant for the "Old Persian" Unicode character block.
1937          * @since 1.7
1938          */
1939         public static final UnicodeBlock OLD_PERSIAN =
1940             new UnicodeBlock("OLD_PERSIAN",
1941                              new String[] { "Old Persian",
1942                                             "OldPersian"});
1943 
1944         /**
1945          * Constant for the "Phoenician" Unicode character block.
1946          * @since 1.7
1947          */
1948         public static final UnicodeBlock PHOENICIAN =
1949             new UnicodeBlock("PHOENICIAN");
1950 
1951         /**
1952          * Constant for the "Lydian" Unicode character block.
1953          * @since 1.7
1954          */
1955         public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN");
1956 
1957         /**
1958          * Constant for the "Kharoshthi" Unicode character block.
1959          * @since 1.7
1960          */
1961         public static final UnicodeBlock KHAROSHTHI =
1962             new UnicodeBlock("KHAROSHTHI");
1963 
1964         /**
1965          * Constant for the "Cuneiform" Unicode character block.
1966          * @since 1.7
1967          */
1968         public static final UnicodeBlock CUNEIFORM =
1969             new UnicodeBlock("CUNEIFORM");
1970 
1971         /**
1972          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
1973          * character block.
1974          * @since 1.7
1975          */
1976         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1977             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1978                              new String[] { "Cuneiform Numbers and Punctuation",
1979                                             "CuneiformNumbersandPunctuation"});
1980 
1981         /**
1982          * Constant for the "Ancient Greek Musical Notation" Unicode character
1983          * block.
1984          * @since 1.7
1985          */
1986         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1987             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1988                              new String[] { "Ancient Greek Musical Notation",
1989                                             "AncientGreekMusicalNotation"});
1990 
1991         /**
1992          * Constant for the "Counting Rod Numerals" Unicode character block.
1993          * @since 1.7
1994          */
1995         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1996             new UnicodeBlock("COUNTING_ROD_NUMERALS",
1997                              new String[] { "Counting Rod Numerals",
1998                                             "CountingRodNumerals"});
1999 
2000         /**
2001          * Constant for the "Mahjong Tiles" Unicode character block.
2002          * @since 1.7
2003          */
2004         public static final UnicodeBlock MAHJONG_TILES =
2005             new UnicodeBlock("MAHJONG_TILES",
2006                              new String[] { "Mahjong Tiles",
2007                                             "MahjongTiles"});
2008 
2009         /**
2010          * Constant for the "Domino Tiles" Unicode character block.
2011          * @since 1.7
2012          */
2013         public static final UnicodeBlock DOMINO_TILES =
2014             new UnicodeBlock("DOMINO_TILES",
2015                              new String[] { "Domino Tiles",
2016                                             "DominoTiles"});
2017 
2018         private static final int blockStarts[] = {
2019             0x0000,   // 0000..007F; Basic Latin
2020             0x0080,   // 0080..00FF; Latin-1 Supplement
2021             0x0100,   // 0100..017F; Latin Extended-A
2022             0x0180,   // 0180..024F; Latin Extended-B
2023             0x0250,   // 0250..02AF; IPA Extensions
2024             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2025             0x0300,   // 0300..036F; Combining Diacritical Marks
2026             0x0370,   // 0370..03FF; Greek and Coptic
2027             0x0400,   // 0400..04FF; Cyrillic
2028             0x0500,   // 0500..052F; Cyrillic Supplement
2029             0x0530,   // 0530..058F; Armenian
2030             0x0590,   // 0590..05FF; Hebrew
2031             0x0600,   // 0600..06FF; Arabic
2032             0x0700,   // 0700..074F; Syria
2033             0x0750,   // 0750..077F; Arabic Supplement
2034             0x0780,   // 0780..07BF; Thaana
2035             0x07C0,   // 07C0..07FF; NKo
2036             0x0800,   //             unassigned
2037             0x0900,   // 0900..097F; Devanagari
2038             0x0980,   // 0980..09FF; Bengali
2039             0x0A00,   // 0A00..0A7F; Gurmukhi
2040             0x0A80,   // 0A80..0AFF; Gujarati
2041             0x0B00,   // 0B00..0B7F; Oriya
2042             0x0B80,   // 0B80..0BFF; Tamil
2043             0x0C00,   // 0C00..0C7F; Telugu
2044             0x0C80,   // 0C80..0CFF; Kannada
2045             0x0D00,   // 0D00..0D7F; Malayalam
2046             0x0D80,   // 0D80..0DFF; Sinhala
2047             0x0E00,   // 0E00..0E7F; Thai
2048             0x0E80,   // 0E80..0EFF; Lao
2049             0x0F00,   // 0F00..0FFF; Tibetan
2050             0x1000,   // 1000..109F; Myanmar
2051             0x10A0,   // 10A0..10FF; Georgian
2052             0x1100,   // 1100..11FF; Hangul Jamo
2053             0x1200,   // 1200..137F; Ethiopic
2054             0x1380,   // 1380..139F; Ethiopic Supplement
2055             0x13A0,   // 13A0..13FF; Cherokee
2056             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2057             0x1680,   // 1680..169F; Ogham
2058             0x16A0,   // 16A0..16FF; Runic
2059             0x1700,   // 1700..171F; Tagalog
2060             0x1720,   // 1720..173F; Hanunoo
2061             0x1740,   // 1740..175F; Buhid
2062             0x1760,   // 1760..177F; Tagbanwa
2063             0x1780,   // 1780..17FF; Khmer
2064             0x1800,   // 1800..18AF; Mongolian
2065             0x18B0,   //             unassigned
2066             0x1900,   // 1900..194F; Limbu
2067             0x1950,   // 1950..197F; Tai Le
2068             0x1980,   // 1980..19DF; New Tai Lue
2069             0x19E0,   // 19E0..19FF; Khmer Symbols
2070             0x1A00,   // 1A00..1A1F; Buginese
2071             0x1A20,   //             unassigned
2072             0x1B00,   // 1B00..1B7F; Balinese
2073             0x1B80,   // 1B80..1BBF; Sundanese
2074             0x1BC0,   //             unassigned
2075             0x1C00,   // 1C00..1C4F; Lepcha
2076             0x1C50,   // 1C50..1C7F; Ol Chiki
2077             0x1C80,   //             unassigned
2078             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2079             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2080             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2081             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2082             0x1F00,   // 1F00..1FFF; Greek Extended
2083             0x2000,   // 2000..206F; General Punctuation
2084             0x2070,   // 2070..209F; Superscripts and Subscripts
2085             0x20A0,   // 20A0..20CF; Currency Symbols
2086             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2087             0x2100,   // 2100..214F; Letterlike Symbols
2088             0x2150,   // 2150..218F; Number Forms
2089             0x2190,   // 2190..21FF; Arrows
2090             0x2200,   // 2200..22FF; Mathematical Operators
2091             0x2300,   // 2300..23FF; Miscellaneous Technical
2092             0x2400,   // 2400..243F; Control Pictures
2093             0x2440,   // 2440..245F; Optical Character Recognition
2094             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2095             0x2500,   // 2500..257F; Box Drawing
2096             0x2580,   // 2580..259F; Block Elements
2097             0x25A0,   // 25A0..25FF; Geometric Shapes
2098             0x2600,   // 2600..26FF; Miscellaneous Symbols
2099             0x2700,   // 2700..27BF; Dingbats
2100             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2101             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2102             0x2800,   // 2800..28FF; Braille Patterns
2103             0x2900,   // 2900..297F; Supplemental Arrows-B
2104             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2105             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2106             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2107             0x2C00,   // 2C00..2C5F; Glagolitic
2108             0x2C60,   // 2C60..2C7F; Latin Extended-C
2109             0x2C80,   // 2C80..2CFF; Coptic
2110             0x2D00,   // 2D00..2D2F; Georgian Supplement
2111             0x2D30,   // 2D30..2D7F; Tifinagh
2112             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2113             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2114             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2115             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2116             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2117             0x2FE0,   //             unassigned
2118             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2119             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2120             0x3040,   // 3040..309F; Hiragana
2121             0x30A0,   // 30A0..30FF; Katakana
2122             0x3100,   // 3100..312F; Bopomofo
2123             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2124             0x3190,   // 3190..319F; Kanbun
2125             0x31A0,   // 31A0..31BF; Bopomofo Extended
2126             0x31C0,   // 31C0..31EF; CJK Strokes
2127             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2128             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2129             0x3300,   // 3300..33FF; CJK Compatibility
2130             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2131             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2132             0x4E00,   // 4E00..9FFF; CJK Unified Ideograph
2133             0xA000,   // A000..A48F; Yi Syllables
2134             0xA490,   // A490..A4CF; Yi Radicals
2135             0xA4D0,   //             unassigned
2136             0xA500,   // A500..A63F; Vai
2137             0xA640,   // A640..A69F; Cyrillic Extended-B
2138             0xA6A0,   //             unassigned
2139             0xA700,   // A700..A71F; Modifier Tone Letters
2140             0xA720,   // A720..A7FF; Latin Extended-D
2141             0xA800,   // A800..A82F; Syloti Nagri
2142             0xA830,   //             unassigned
2143             0xA840,   // A840..A87F; Phags-pa
2144             0xA880,   // A880..A8DF; Saurashtra
2145             0xA8E0,   //             unassigned
2146             0xA900,   // A900..A92F; Kayah Li
2147             0xA930,   // A930..A95F; Rejang
2148             0xA960,   //             unassigned
2149             0xAA00,   // AA00..AA5F; Cham
2150             0xAA60,   //             unassigned
2151             0xAC00,   // AC00..D7AF; Hangul Syllables
2152             0xD7B0,   //             unassigned
2153             0xD800,   // D800..DB7F; High Surrogates
2154             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2155             0xDC00,   // DC00..DFFF; Low Surrogates
2156             0xE000,   // E000..F8FF; Private Use Area
2157             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2158             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2159             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2160             0xFE00,   // FE00..FE0F; Variation Selectors
2161             0xFE10,   // FE10..FE1F; Vertical Forms
2162             0xFE20,   // FE20..FE2F; Combining Half Marks
2163             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2164             0xFE50,   // FE50..FE6F; Small Form Variants
2165             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2166             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2167             0xFFF0,   // FFF0..FFFF; Specials
2168             0x10000,  // 10000..1007F; Linear B Syllabary
2169             0x10080,  // 10080..100FF; Linear B Ideograms
2170             0x10100,  // 10100..1013F; Aegean Numbers
2171             0x10140,  // 10140..1018F; Ancient Greek Numbers
2172             0x10190,  // 10190..101CF; Ancient Symbols
2173             0x101D0,  // 101D0..101FF; Phaistos Disc
2174             0x10200,  //               unassigned
2175             0x10280,  // 10280..1029F; Lycian
2176             0x102A0,  // 102A0..102DF; Carian
2177             0x102E0,  //               unassigned
2178             0x10300,  // 10300..1032F; Old Italic
2179             0x10330,  // 10330..1034F; Gothic
2180             0x10350,  //               unassigned
2181             0x10380,  // 10380..1039F; Ugaritic
2182             0x103A0,  // 103A0..103DF; Old Persian
2183             0x103E0,  //               unassigned
2184             0x10400,  // 10400..1044F; Desere
2185             0x10450,  // 10450..1047F; Shavian
2186             0x10480,  // 10480..104AF; Osmanya
2187             0x104B0,  //               unassigned
2188             0x10800,  // 10800..1083F; Cypriot Syllabary
2189             0x10840,  //               unassigned
2190             0x10900,  // 10900..1091F; Phoenician
2191             0x10920,  // 10920..1093F; Lydian
2192             0x10940,  //               unassigned
2193             0x10A00,  // 10A00..10A5F; Kharoshthi
2194             0x10A60,  //               unassigned
2195             0x12000,  // 12000..123FF; Cuneiform
2196             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2197             0x12480,  //               unassigned
2198             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2199             0x1D100,  // 1D100..1D1FF; Musical Symbols
2200             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2201             0x1D250,  //               unassigned
2202             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2203             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2204             0x1D380,  //               unassigned
2205             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2206             0x1D800,  //               unassigned
2207             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2208             0x1F030,  // 1F030..1F09F; Domino Tiles
2209             0x1F0A0,  //               unassigned
2210             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2211             0x2A6E0,  //               unassigned
2212             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2213             0x2FA20,  //               unassigned
2214             0xE0000,  // E0000..E007F; Tags
2215             0xE0080,  //               unassigned
2216             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2217             0xE01F0,  //               unassigned
2218             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2219             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
2220         };
2221 
2222         private static final UnicodeBlock[] blocks = {
2223             BASIC_LATIN,
2224             LATIN_1_SUPPLEMENT,
2225             LATIN_EXTENDED_A,
2226             LATIN_EXTENDED_B,
2227             IPA_EXTENSIONS,
2228             SPACING_MODIFIER_LETTERS,
2229             COMBINING_DIACRITICAL_MARKS,
2230             GREEK,
2231             CYRILLIC,
2232             CYRILLIC_SUPPLEMENTARY,
2233             ARMENIAN,
2234             HEBREW,
2235             ARABIC,
2236             SYRIAC,
2237             ARABIC_SUPPLEMENT,
2238             THAANA,
2239             NKO,
2240             null,
2241             DEVANAGARI,
2242             BENGALI,
2243             GURMUKHI,
2244             GUJARATI,
2245             ORIYA,
2246             TAMIL,
2247             TELUGU,
2248             KANNADA,
2249             MALAYALAM,
2250             SINHALA,
2251             THAI,
2252             LAO,
2253             TIBETAN,
2254             MYANMAR,
2255             GEORGIAN,
2256             HANGUL_JAMO,
2257             ETHIOPIC,
2258             ETHIOPIC_SUPPLEMENT,
2259             CHEROKEE,
2260             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2261             OGHAM,
2262             RUNIC,
2263             TAGALOG,
2264             HANUNOO,
2265             BUHID,
2266             TAGBANWA,
2267             KHMER,
2268             MONGOLIAN,
2269             null,
2270             LIMBU,
2271             TAI_LE,
2272             NEW_TAI_LUE,
2273             KHMER_SYMBOLS,
2274             BUGINESE,
2275             null,
2276             BALINESE,
2277             SUNDANESE,
2278             null,
2279             LEPCHA,
2280             OL_CHIKI,
2281             null,
2282             PHONETIC_EXTENSIONS,
2283             PHONETIC_EXTENSIONS_SUPPLEMENT,
2284             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2285             LATIN_EXTENDED_ADDITIONAL,
2286             GREEK_EXTENDED,
2287             GENERAL_PUNCTUATION,
2288             SUPERSCRIPTS_AND_SUBSCRIPTS,
2289             CURRENCY_SYMBOLS,
2290             COMBINING_MARKS_FOR_SYMBOLS,
2291             LETTERLIKE_SYMBOLS,
2292             NUMBER_FORMS,
2293             ARROWS,
2294             MATHEMATICAL_OPERATORS,
2295             MISCELLANEOUS_TECHNICAL,
2296             CONTROL_PICTURES,
2297             OPTICAL_CHARACTER_RECOGNITION,
2298             ENCLOSED_ALPHANUMERICS,
2299             BOX_DRAWING,
2300             BLOCK_ELEMENTS,
2301             GEOMETRIC_SHAPES,
2302             MISCELLANEOUS_SYMBOLS,
2303             DINGBATS,
2304             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2305             SUPPLEMENTAL_ARROWS_A,
2306             BRAILLE_PATTERNS,
2307             SUPPLEMENTAL_ARROWS_B,
2308             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2309             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2310             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2311             GLAGOLITIC,
2312             LATIN_EXTENDED_C,
2313             COPTIC,
2314             GEORGIAN_SUPPLEMENT,
2315             TIFINAGH,
2316             ETHIOPIC_EXTENDED,
2317             CYRILLIC_EXTENDED_A,
2318             SUPPLEMENTAL_PUNCTUATION,
2319             CJK_RADICALS_SUPPLEMENT,
2320             KANGXI_RADICALS,
2321             null,
2322             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2323             CJK_SYMBOLS_AND_PUNCTUATION,
2324             HIRAGANA,
2325             KATAKANA,
2326             BOPOMOFO,
2327             HANGUL_COMPATIBILITY_JAMO,
2328             KANBUN,
2329             BOPOMOFO_EXTENDED,
2330             CJK_STROKES,
2331             KATAKANA_PHONETIC_EXTENSIONS,
2332             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2333             CJK_COMPATIBILITY,
2334             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2335             YIJING_HEXAGRAM_SYMBOLS,
2336             CJK_UNIFIED_IDEOGRAPHS,
2337             YI_SYLLABLES,
2338             YI_RADICALS,
2339             null,
2340             VAI,
2341             CYRILLIC_EXTENDED_B,
2342             null,
2343             MODIFIER_TONE_LETTERS,
2344             LATIN_EXTENDED_D,
2345             SYLOTI_NAGRI,
2346             null,
2347             PHAGS_PA,
2348             SAURASHTRA,
2349             null,
2350             KAYAH_LI,
2351             REJANG,
2352             null,
2353             CHAM,
2354             null,
2355             HANGUL_SYLLABLES,
2356             null,
2357             HIGH_SURROGATES,
2358             HIGH_PRIVATE_USE_SURROGATES,
2359             LOW_SURROGATES,
2360             PRIVATE_USE_AREA,
2361             CJK_COMPATIBILITY_IDEOGRAPHS,
2362             ALPHABETIC_PRESENTATION_FORMS,
2363             ARABIC_PRESENTATION_FORMS_A,
2364             VARIATION_SELECTORS,
2365             VERTICAL_FORMS,
2366             COMBINING_HALF_MARKS,
2367             CJK_COMPATIBILITY_FORMS,
2368             SMALL_FORM_VARIANTS,
2369             ARABIC_PRESENTATION_FORMS_B,
2370             HALFWIDTH_AND_FULLWIDTH_FORMS,
2371             SPECIALS,
2372             LINEAR_B_SYLLABARY,
2373             LINEAR_B_IDEOGRAMS,
2374             AEGEAN_NUMBERS,
2375             ANCIENT_GREEK_NUMBERS,
2376             ANCIENT_SYMBOLS,
2377             PHAISTOS_DISC,
2378             null,
2379             LYCIAN,
2380             CARIAN,
2381             null,
2382             OLD_ITALIC,
2383             GOTHIC,
2384             null,
2385             UGARITIC,
2386             OLD_PERSIAN,
2387             null,
2388             DESERET,
2389             SHAVIAN,
2390             OSMANYA,
2391             null,
2392             CYPRIOT_SYLLABARY,
2393             null,
2394             PHOENICIAN,
2395             LYDIAN,
2396             null,
2397             KHAROSHTHI,
2398             null,
2399             CUNEIFORM,
2400             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2401             null,
2402             BYZANTINE_MUSICAL_SYMBOLS,
2403             MUSICAL_SYMBOLS,
2404             ANCIENT_GREEK_MUSICAL_NOTATION,
2405             null,
2406             TAI_XUAN_JING_SYMBOLS,
2407             COUNTING_ROD_NUMERALS,
2408             null,
2409             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2410             null,
2411             MAHJONG_TILES,
2412             DOMINO_TILES,
2413             null,
2414             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2415             null,
2416             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2417             null,
2418             TAGS,
2419             null,
2420             VARIATION_SELECTORS_SUPPLEMENT,
2421             null,
2422             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2423             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2424         };
2425 
2426 
2427         /**
2428          * Returns the object representing the Unicode block containing the
2429          * given character, or <code>null</code> if the character is not a
2430          * member of a defined block.
2431          *
2432                  * <p><b>Note:</b> This method cannot handle <a
2433                  * href="Character.html#supplementary"> supplementary
2434                  * characters</a>. To support all Unicode characters,
2435                  * including supplementary characters, use the {@link
2436                  * #of(int)} method.
2437          *
2438          * @param   c  The character in question
2439          * @return  The <code>UnicodeBlock</code> instance representing the
2440          *          Unicode block of which this character is a member, or
2441          *          <code>null</code> if the character is not a member of any
2442          *          Unicode block
2443          */
2444         public static UnicodeBlock of(char c) {
2445             return of((int)c);
2446         }
2447 
2448 
2449         /**
2450          * Returns the object representing the Unicode block
2451          * containing the given character (Unicode code point), or
2452          * <code>null</code> if the character is not a member of a
2453          * defined block.
2454          *
2455                  * @param   codePoint the character (Unicode code point) in question.
2456          * @return  The <code>UnicodeBlock</code> instance representing the
2457          *          Unicode block of which this character is a member, or
2458          *          <code>null</code> if the character is not a member of any
2459          *          Unicode block
2460                  * @exception IllegalArgumentException if the specified
2461                  * <code>codePoint</code> is an invalid Unicode code point.
2462                  * @see Character#isValidCodePoint(int)
2463                  * @since   1.5
2464          */
2465         public static UnicodeBlock of(int codePoint) {
2466             if (!isValidCodePoint(codePoint)) {
2467                 throw new IllegalArgumentException();
2468             }
2469 
2470             int top, bottom, current;
2471             bottom = 0;
2472             top = blockStarts.length;
2473             current = top/2;
2474 
2475             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
2476             while (top - bottom > 1) {
2477                 if (codePoint >= blockStarts[current]) {
2478                     bottom = current;
2479                 } else {
2480                     top = current;
2481                 }
2482                 current = (top + bottom) / 2;
2483             }
2484             return blocks[current];
2485         }
2486 
2487         /**
2488          * Returns the UnicodeBlock with the given name. Block
2489          * names are determined by The Unicode Standard. The file
2490          * Blocks-&lt;version&gt;.txt defines blocks for a particular
2491          * version of the standard. The {@link Character} class specifies
2492          * the version of the standard that it supports.
2493          * <p>
2494          * This method accepts block names in the following forms:
2495          * <ol>
2496          * <li> Canonical block names as defined by the Unicode Standard.
2497          * For example, the standard defines a "Basic Latin" block. Therefore, this
2498          * method accepts "Basic Latin" as a valid block name. The documentation of
2499          * each UnicodeBlock provides the canonical name.
2500          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
2501          * is a valid block name for the "Basic Latin" block.
2502          * <li>The text representation of each constant UnicodeBlock identifier.
2503          * For example, this method will return the {@link #BASIC_LATIN} block if
2504          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
2505          *  hyphens in the canonical name with underscores.
2506          * </ol>
2507          * Finally, character case is ignored for all of the valid block name forms.
2508          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
2509          * The en_US locale's case mapping rules are used to provide case-insensitive
2510          * string comparisons for block name validation.
2511          * <p>
2512          * If the Unicode Standard changes block names, both the previous and
2513          * current names will be accepted.
2514          *
2515          * @param blockName A <code>UnicodeBlock</code> name.
2516          * @return The <code>UnicodeBlock</code> instance identified
2517          *         by <code>blockName</code>
2518          * @throws IllegalArgumentException if <code>blockName</code> is an
2519          *         invalid name
2520          * @throws NullPointerException if <code>blockName</code> is null
2521          * @since 1.5
2522          */
2523         public static final UnicodeBlock forName(String blockName) {
2524             UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US));
2525             if (block == null) {
2526                 throw new IllegalArgumentException();
2527             }
2528             return block;
2529         }
2530     }
2531 
2532 
2533     /**
2534      * The value of the <code>Character</code>.
2535      *
2536      * @serial
2537      */
2538     private final char value;
2539 
2540     /** use serialVersionUID from JDK 1.0.2 for interoperability */
2541     private static final long serialVersionUID = 3786198910865385080L;
2542 
2543     /**
2544      * Constructs a newly allocated <code>Character</code> object that
2545      * represents the specified <code>char</code> value.
2546      *
2547      * @param  value   the value to be represented by the
2548      *                  <code>Character</code> object.
2549      */
2550     public Character(char value) {
2551         this.value = value;
2552     }
2553 
2554     private static class CharacterCache {
2555         private CharacterCache(){}
2556 
2557         static final Character cache[] = new Character[127 + 1];
2558 
2559         static {
2560             for(int i = 0; i < cache.length; i++)
2561                 cache[i] = new Character((char)i);
2562         }
2563     }
2564 
2565     /**
2566      * Returns a <tt>Character</tt> instance representing the specified
2567      * <tt>char</tt> value.
2568      * If a new <tt>Character</tt> instance is not required, this method
2569      * should generally be used in preference to the constructor
2570      * {@link #Character(char)}, as this method is likely to yield
2571      * significantly better space and time performance by caching
2572      * frequently requested values.
2573      *
2574      * This method will always cache values in the range '&#92;u0000'
2575      * to '&#92;u007f'", inclusive, and may cache other values outside
2576      * of this range.
2577      *
2578      * @param  c a char value.
2579      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
2580      * @since  1.5
2581      */
2582     public static Character valueOf(char c) {
2583         if(c <= 127) { // must cache
2584             return CharacterCache.cache[(int)c];
2585         }
2586         return new Character(c);
2587     }
2588 
2589     /**
2590      * Returns the value of this <code>Character</code> object.
2591      * @return  the primitive <code>char</code> value represented by
2592      *          this object.
2593      */
2594     public char charValue() {
2595         return value;
2596     }
2597 
2598     /**
2599      * Returns a hash code for this <code>Character</code>.
2600      * @return  a hash code value for this object.
2601      */
2602     public int hashCode() {
2603         return (int)value;
2604     }
2605 
2606     /**
2607      * Compares this object against the specified object.
2608      * The result is <code>true</code> if and only if the argument is not
2609      * <code>null</code> and is a <code>Character</code> object that
2610      * represents the same <code>char</code> value as this object.
2611      *
2612      * @param   obj   the object to compare with.
2613      * @return  <code>true</code> if the objects are the same;
2614      *          <code>false</code> otherwise.
2615      */
2616     public boolean equals(Object obj) {
2617         if (obj instanceof Character) {
2618             return value == ((Character)obj).charValue();
2619         }
2620         return false;
2621     }
2622 
2623     /**
2624      * Returns a <code>String</code> object representing this
2625      * <code>Character</code>'s value.  The result is a string of
2626      * length 1 whose sole component is the primitive
2627      * <code>char</code> value represented by this
2628      * <code>Character</code> object.
2629      *
2630      * @return  a string representation of this object.
2631      */
2632     public String toString() {
2633         char buf[] = {value};
2634         return String.valueOf(buf);
2635     }
2636 
2637     /**
2638      * Returns a <code>String</code> object representing the
2639      * specified <code>char</code>.  The result is a string of length
2640      * 1 consisting solely of the specified <code>char</code>.
2641      *
2642      * @param c the <code>char</code> to be converted
2643      * @return the string representation of the specified <code>char</code>
2644      * @since 1.4
2645      */
2646     public static String toString(char c) {
2647         return String.valueOf(c);
2648     }
2649 
2650     /**
2651      * Determines whether the specified code point is a valid Unicode
2652      * code point value in the range of <code>0x0000</code> to
2653      * <code>0x10FFFF</code> inclusive. This method is equivalent to
2654      * the expression:
2655      *
2656      * <blockquote><pre>
2657      * codePoint >= 0x0000 && codePoint <= 0x10FFFF
2658      * </pre></blockquote>
2659      *
2660      * @param  codePoint the Unicode code point to be tested
2661      * @return <code>true</code> if the specified code point value
2662      * is a valid code point value;
2663      * <code>false</code> otherwise.
2664      * @since  1.5
2665      */
2666     public static boolean isValidCodePoint(int codePoint) {
2667         return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2668     }
2669 
2670     /**
2671      * Determines whether the specified character (Unicode code point)
2672      * is in the supplementary character range. The method call is
2673      * equivalent to the expression:
2674      * <blockquote><pre>
2675      * codePoint >= 0x10000 && codePoint <= 0x10FFFF
2676      * </pre></blockquote>
2677      *
2678      * @param  codePoint the character (Unicode code point) to be tested
2679      * @return <code>true</code> if the specified character is in the Unicode
2680      *         supplementary character range; <code>false</code> otherwise.
2681      * @since  1.5
2682      */
2683     public static boolean isSupplementaryCodePoint(int codePoint) {
2684         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2685             && codePoint <= MAX_CODE_POINT;
2686     }
2687 
2688     /**
2689      * Determines if the given <code>char</code> value is a
2690      * high-surrogate code unit (also known as <i>leading-surrogate
2691      * code unit</i>). Such values do not represent characters by
2692      * themselves, but are used in the representation of <a
2693      * href="#supplementary">supplementary characters</a> in the
2694      * UTF-16 encoding.
2695      *
2696      * <p>This method returns <code>true</code> if and only if
2697      * <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF'
2698      * </pre></blockquote>
2699      * is <code>true</code>.
2700      *
2701      * @param   ch   the <code>char</code> value to be tested.
2702      * @return  <code>true</code> if the <code>char</code> value
2703      *          is between '&#92;uD800' and '&#92;uDBFF' inclusive;
2704      *          <code>false</code> otherwise.
2705      * @see     java.lang.Character#isLowSurrogate(char)
2706      * @see     Character.UnicodeBlock#of(int)
2707      * @since   1.5
2708      */
2709     public static boolean isHighSurrogate(char ch) {
2710         return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2711     }
2712 
2713     /**
2714      * Determines if the given <code>char</code> value is a
2715      * low-surrogate code unit (also known as <i>trailing-surrogate code
2716      * unit</i>). Such values do not represent characters by themselves,
2717      * but are used in the representation of <a
2718      * href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
2719      *
2720      * <p> This method returns <code>true</code> if and only if
2721      * <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF'
2722      * </pre></blockquote> is <code>true</code>.
2723      *
2724      * @param   ch   the <code>char</code> value to be tested.
2725      * @return  <code>true</code> if the <code>char</code> value
2726      *          is between '&#92;uDC00' and '&#92;uDFFF' inclusive;
2727      *          <code>false</code> otherwise.
2728      * @see java.lang.Character#isHighSurrogate(char)
2729      * @since   1.5
2730      */
2731     public static boolean isLowSurrogate(char ch) {
2732         return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2733     }
2734 
2735     /**
2736      * Determines whether the specified pair of <code>char</code>
2737      * values is a valid surrogate pair. This method is equivalent to
2738      * the expression:
2739      * <blockquote><pre>
2740      * isHighSurrogate(high) && isLowSurrogate(low)
2741      * </pre></blockquote>
2742      *
2743      * @param  high the high-surrogate code value to be tested
2744      * @param  low the low-surrogate code value to be tested
2745      * @return <code>true</code> if the specified high and
2746      * low-surrogate code values represent a valid surrogate pair;
2747      * <code>false</code> otherwise.
2748      * @since  1.5
2749      */
2750     public static boolean isSurrogatePair(char high, char low) {
2751         return isHighSurrogate(high) && isLowSurrogate(low);
2752     }
2753 
2754     /**
2755      * Determines the number of <code>char</code> values needed to
2756      * represent the specified character (Unicode code point). If the
2757      * specified character is equal to or greater than 0x10000, then
2758      * the method returns 2. Otherwise, the method returns 1.
2759      *
2760      * <p>This method doesn't validate the specified character to be a
2761      * valid Unicode code point. The caller must validate the
2762      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
2763      * if necessary.
2764      *
2765      * @param   codePoint the character (Unicode code point) to be tested.
2766      * @return  2 if the character is a valid supplementary character; 1 otherwise.
2767      * @see     #isSupplementaryCodePoint(int)
2768      * @since   1.5
2769      */
2770     public static int charCount(int codePoint) {
2771         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
2772     }
2773 
2774     /**
2775      * Converts the specified surrogate pair to its supplementary code
2776      * point value. This method does not validate the specified
2777      * surrogate pair. The caller must validate it using {@link
2778      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
2779      *
2780      * @param  high the high-surrogate code unit
2781      * @param  low the low-surrogate code unit
2782      * @return the supplementary code point composed from the
2783      *         specified surrogate pair.
2784      * @since  1.5
2785      */
2786     public static int toCodePoint(char high, char low) {
2787         return ((high - MIN_HIGH_SURROGATE) << 10)
2788             + (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
2789     }
2790 
2791     /**
2792      * Returns the code point at the given index of the
2793      * <code>CharSequence</code>. If the <code>char</code> value at
2794      * the given index in the <code>CharSequence</code> is in the
2795      * high-surrogate range, the following index is less than the
2796      * length of the <code>CharSequence</code>, and the
2797      * <code>char</code> value at the following index is in the
2798      * low-surrogate range, then the supplementary code point
2799      * corresponding to this surrogate pair is returned. Otherwise,
2800      * the <code>char</code> value at the given index is returned.
2801      *
2802      * @param seq a sequence of <code>char</code> values (Unicode code
2803      * units)
2804      * @param index the index to the <code>char</code> values (Unicode
2805      * code units) in <code>seq</code> to be converted
2806      * @return the Unicode code point at the given index
2807      * @exception NullPointerException if <code>seq</code> is null.
2808      * @exception IndexOutOfBoundsException if the value
2809      * <code>index</code> is negative or not less than
2810      * {@link CharSequence#length() seq.length()}.
2811      * @since  1.5
2812      */
2813     public static int codePointAt(CharSequence seq, int index) {
2814         char c1 = seq.charAt(index++);
2815         if (isHighSurrogate(c1)) {
2816             if (index < seq.length()) {
2817                 char c2 = seq.charAt(index);
2818                 if (isLowSurrogate(c2)) {
2819                     return toCodePoint(c1, c2);
2820                 }
2821             }
2822         }
2823         return c1;
2824     }
2825 
2826     /**
2827      * Returns the code point at the given index of the
2828      * <code>char</code> array. If the <code>char</code> value at
2829      * the given index in the <code>char</code> array is in the
2830      * high-surrogate range, the following index is less than the
2831      * length of the <code>char</code> array, and the
2832      * <code>char</code> value at the following index is in the
2833      * low-surrogate range, then the supplementary code point
2834      * corresponding to this surrogate pair is returned. Otherwise,
2835      * the <code>char</code> value at the given index is returned.
2836      *
2837      * @param a the <code>char</code> array
2838      * @param index the index to the <code>char</code> values (Unicode
2839      * code units) in the <code>char</code> array to be converted
2840      * @return the Unicode code point at the given index
2841      * @exception NullPointerException if <code>a</code> is null.
2842      * @exception IndexOutOfBoundsException if the value
2843      * <code>index</code> is negative or not less than
2844      * the length of the <code>char</code> array.
2845      * @since  1.5
2846      */
2847     public static int codePointAt(char[] a, int index) {
2848         return codePointAtImpl(a, index, a.length);
2849     }
2850 
2851     /**
2852      * Returns the code point at the given index of the
2853      * <code>char</code> array, where only array elements with
2854      * <code>index</code> less than <code>limit</code> can be used. If
2855      * the <code>char</code> value at the given index in the
2856      * <code>char</code> array is in the high-surrogate range, the
2857      * following index is less than the <code>limit</code>, and the
2858      * <code>char</code> value at the following index is in the
2859      * low-surrogate range, then the supplementary code point
2860      * corresponding to this surrogate pair is returned. Otherwise,
2861      * the <code>char</code> value at the given index is returned.
2862      *
2863      * @param a the <code>char</code> array
2864      * @param index the index to the <code>char</code> values (Unicode
2865      * code units) in the <code>char</code> array to be converted
2866      * @param limit the index after the last array element that can be used in the
2867      * <code>char</code> array
2868      * @return the Unicode code point at the given index
2869      * @exception NullPointerException if <code>a</code> is null.
2870      * @exception IndexOutOfBoundsException if the <code>index</code>
2871      * argument is negative or not less than the <code>limit</code>
2872      * argument, or if the <code>limit</code> argument is negative or
2873      * greater than the length of the <code>char</code> array.
2874      * @since  1.5
2875      */
2876     public static int codePointAt(char[] a, int index, int limit) {
2877         if (index >= limit || limit < 0 || limit > a.length) {
2878             throw new IndexOutOfBoundsException();
2879         }
2880         return codePointAtImpl(a, index, limit);
2881     }
2882 
2883     static int codePointAtImpl(char[] a, int index, int limit) {
2884         char c1 = a[index++];
2885         if (isHighSurrogate(c1)) {
2886             if (index < limit) {
2887                 char c2 = a[index];
2888                 if (isLowSurrogate(c2)) {
2889                     return toCodePoint(c1, c2);
2890                 }
2891             }
2892         }
2893         return c1;
2894     }
2895 
2896     /**
2897      * Returns the code point preceding the given index of the
2898      * <code>CharSequence</code>. If the <code>char</code> value at
2899      * <code>(index - 1)</code> in the <code>CharSequence</code> is in
2900      * the low-surrogate range, <code>(index - 2)</code> is not
2901      * negative, and the <code>char</code> value at <code>(index -
2902      * 2)</code> in the <code>CharSequence</code> is in the
2903      * high-surrogate range, then the supplementary code point
2904      * corresponding to this surrogate pair is returned. Otherwise,
2905      * the <code>char</code> value at <code>(index - 1)</code> is
2906      * returned.
2907      *
2908      * @param seq the <code>CharSequence</code> instance
2909      * @param index the index following the code point that should be returned
2910      * @return the Unicode code point value before the given index.
2911      * @exception NullPointerException if <code>seq</code> is null.
2912      * @exception IndexOutOfBoundsException if the <code>index</code>
2913      * argument is less than 1 or greater than {@link
2914      * CharSequence#length() seq.length()}.
2915      * @since  1.5
2916      */
2917     public static int codePointBefore(CharSequence seq, int index) {
2918         char c2 = seq.charAt(--index);
2919         if (isLowSurrogate(c2)) {
2920             if (index > 0) {
2921                 char c1 = seq.charAt(--index);
2922                 if (isHighSurrogate(c1)) {
2923                     return toCodePoint(c1, c2);
2924                 }
2925             }
2926         }
2927         return c2;
2928     }
2929 
2930     /**
2931      * Returns the code point preceding the given index of the
2932      * <code>char</code> array. If the <code>char</code> value at
2933      * <code>(index - 1)</code> in the <code>char</code> array is in
2934      * the low-surrogate range, <code>(index - 2)</code> is not
2935      * negative, and the <code>char</code> value at <code>(index -
2936      * 2)</code> in the <code>char</code> array is in the
2937      * high-surrogate range, then the supplementary code point
2938      * corresponding to this surrogate pair is returned. Otherwise,
2939      * the <code>char</code> value at <code>(index - 1)</code> is
2940      * returned.
2941      *
2942      * @param a the <code>char</code> array
2943      * @param index the index following the code point that should be returned
2944      * @return the Unicode code point value before the given index.
2945      * @exception NullPointerException if <code>a</code> is null.
2946      * @exception IndexOutOfBoundsException if the <code>index</code>
2947      * argument is less than 1 or greater than the length of the
2948      * <code>char</code> array
2949      * @since  1.5
2950      */
2951     public static int codePointBefore(char[] a, int index) {
2952         return codePointBeforeImpl(a, index, 0);
2953     }
2954 
2955     /**
2956      * Returns the code point preceding the given index of the
2957      * <code>char</code> array, where only array elements with
2958      * <code>index</code> greater than or equal to <code>start</code>
2959      * can be used. If the <code>char</code> value at <code>(index -
2960      * 1)</code> in the <code>char</code> array is in the
2961      * low-surrogate range, <code>(index - 2)</code> is not less than
2962      * <code>start</code>, and the <code>char</code> value at
2963      * <code>(index - 2)</code> in the <code>char</code> array is in
2964      * the high-surrogate range, then the supplementary code point
2965      * corresponding to this surrogate pair is returned. Otherwise,
2966      * the <code>char</code> value at <code>(index - 1)</code> is
2967      * returned.
2968      *
2969      * @param a the <code>char</code> array
2970      * @param index the index following the code point that should be returned
2971      * @param start the index of the first array element in the
2972      * <code>char</code> array
2973      * @return the Unicode code point value before the given index.
2974      * @exception NullPointerException if <code>a</code> is null.
2975      * @exception IndexOutOfBoundsException if the <code>index</code>
2976      * argument is not greater than the <code>start</code> argument or
2977      * is greater than the length of the <code>char</code> array, or
2978      * if the <code>start</code> argument is negative or not less than
2979      * the length of the <code>char</code> array.
2980      * @since  1.5
2981      */
2982     public static int codePointBefore(char[] a, int index, int start) {
2983         if (index <= start || start < 0 || start >= a.length) {
2984             throw new IndexOutOfBoundsException();
2985         }
2986         return codePointBeforeImpl(a, index, start);
2987     }
2988 
2989     static int codePointBeforeImpl(char[] a, int index, int start) {
2990         char c2 = a[--index];
2991         if (isLowSurrogate(c2)) {
2992             if (index > start) {
2993                 char c1 = a[--index];
2994                 if (isHighSurrogate(c1)) {
2995                     return toCodePoint(c1, c2);
2996                 }
2997             }
2998         }
2999         return c2;
3000     }
3001 
3002     /**
3003      * Converts the specified character (Unicode code point) to its
3004      * UTF-16 representation. If the specified code point is a BMP
3005      * (Basic Multilingual Plane or Plane 0) value, the same value is
3006      * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
3007      * specified code point is a supplementary character, its
3008      * surrogate values are stored in <code>dst[dstIndex]</code>
3009      * (high-surrogate) and <code>dst[dstIndex+1]</code>
3010      * (low-surrogate), and 2 is returned.
3011      *
3012      * @param  codePoint the character (Unicode code point) to be converted.
3013      * @param  dst an array of <code>char</code> in which the
3014      * <code>codePoint</code>'s UTF-16 value is stored.
3015      * @param dstIndex the start index into the <code>dst</code>
3016      * array where the converted value is stored.
3017      * @return 1 if the code point is a BMP code point, 2 if the
3018      * code point is a supplementary code point.
3019      * @exception IllegalArgumentException if the specified
3020      * <code>codePoint</code> is not a valid Unicode code point.
3021      * @exception NullPointerException if the specified <code>dst</code> is null.
3022      * @exception IndexOutOfBoundsException if <code>dstIndex</code>
3023      * is negative or not less than <code>dst.length</code>, or if
3024      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
3025      * array element(s) to store the resulting <code>char</code>
3026      * value(s). (If <code>dstIndex</code> is equal to
3027      * <code>dst.length-1</code> and the specified
3028      * <code>codePoint</code> is a supplementary character, the
3029      * high-surrogate value is not stored in
3030      * <code>dst[dstIndex]</code>.)
3031      * @since  1.5
3032      */
3033     public static int toChars(int codePoint, char[] dst, int dstIndex) {
3034         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3035             throw new IllegalArgumentException();
3036         }
3037         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3038             dst[dstIndex] = (char) codePoint;
3039             return 1;
3040         }
3041         toSurrogates(codePoint, dst, dstIndex);
3042         return 2;
3043     }
3044 
3045     /**
3046      * Converts the specified character (Unicode code point) to its
3047      * UTF-16 representation stored in a <code>char</code> array. If
3048      * the specified code point is a BMP (Basic Multilingual Plane or
3049      * Plane 0) value, the resulting <code>char</code> array has
3050      * the same value as <code>codePoint</code>. If the specified code
3051      * point is a supplementary code point, the resulting
3052      * <code>char</code> array has the corresponding surrogate pair.
3053      *
3054      * @param  codePoint a Unicode code point
3055      * @return a <code>char</code> array having
3056      *         <code>codePoint</code>'s UTF-16 representation.
3057      * @exception IllegalArgumentException if the specified
3058      * <code>codePoint</code> is not a valid Unicode code point.
3059      * @since  1.5
3060      */
3061     public static char[] toChars(int codePoint) {
3062         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
3063             throw new IllegalArgumentException();
3064         }
3065         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
3066                 return new char[] { (char) codePoint };
3067         }
3068         char[] result = new char[2];
3069         toSurrogates(codePoint, result, 0);
3070         return result;
3071     }
3072 
3073     static void toSurrogates(int codePoint, char[] dst, int index) {
3074         int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
3075         dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
3076         dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
3077     }
3078 
3079     /**
3080      * Returns the number of Unicode code points in the text range of
3081      * the specified char sequence. The text range begins at the
3082      * specified <code>beginIndex</code> and extends to the
3083      * <code>char</code> at index <code>endIndex - 1</code>. Thus the
3084      * length (in <code>char</code>s) of the text range is
3085      * <code>endIndex-beginIndex</code>. Unpaired surrogates within
3086      * the text range count as one code point each.
3087      *
3088      * @param seq the char sequence
3089      * @param beginIndex the index to the first <code>char</code> of
3090      * the text range.
3091      * @param endIndex the index after the last <code>char</code> of
3092      * the text range.
3093      * @return the number of Unicode code points in the specified text
3094      * range
3095      * @exception NullPointerException if <code>seq</code> is null.
3096      * @exception IndexOutOfBoundsException if the
3097      * <code>beginIndex</code> is negative, or <code>endIndex</code>
3098      * is larger than the length of the given sequence, or
3099      * <code>beginIndex</code> is larger than <code>endIndex</code>.
3100      * @since  1.5
3101      */
3102     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
3103         int length = seq.length();
3104         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
3105             throw new IndexOutOfBoundsException();
3106         }
3107         int n = 0;
3108         for (int i = beginIndex; i < endIndex; ) {
3109             n++;
3110             if (isHighSurrogate(seq.charAt(i++))) {
3111                 if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
3112                     i++;
3113                 }
3114             }
3115         }
3116         return n;
3117     }
3118 
3119     /**
3120      * Returns the number of Unicode code points in a subarray of the
3121      * <code>char</code> array argument. The <code>offset</code>
3122      * argument is the index of the first <code>char</code> of the
3123      * subarray and the <code>count</code> argument specifies the
3124      * length of the subarray in <code>char</code>s. Unpaired
3125      * surrogates within the subarray count as one code point each.
3126      *
3127      * @param a the <code>char</code> array
3128      * @param offset the index of the first <code>char</code> in the
3129      * given <code>char</code> array
3130      * @param count the length of the subarray in <code>char</code>s
3131      * @return the number of Unicode code points in the specified subarray
3132      * @exception NullPointerException if <code>a</code> is null.
3133      * @exception IndexOutOfBoundsException if <code>offset</code> or
3134      * <code>count</code> is negative, or if <code>offset +
3135      * count</code> is larger than the length of the given array.
3136      * @since  1.5
3137      */
3138     public static int codePointCount(char[] a, int offset, int count) {
3139         if (count > a.length - offset || offset < 0 || count < 0) {
3140             throw new IndexOutOfBoundsException();
3141         }
3142         return codePointCountImpl(a, offset, count);
3143     }
3144 
3145     static int codePointCountImpl(char[] a, int offset, int count) {
3146         int endIndex = offset + count;
3147         int n = 0;
3148         for (int i = offset; i < endIndex; ) {
3149             n++;
3150             if (isHighSurrogate(a[i++])) {
3151                 if (i < endIndex && isLowSurrogate(a[i])) {
3152                     i++;
3153                 }
3154             }
3155         }
3156         return n;
3157     }
3158 
3159     /**
3160      * Returns the index within the given char sequence that is offset
3161      * from the given <code>index</code> by <code>codePointOffset</code>
3162      * code points. Unpaired surrogates within the text range given by
3163      * <code>index</code> and <code>codePointOffset</code> count as
3164      * one code point each.
3165      *
3166      * @param seq the char sequence
3167      * @param index the index to be offset
3168      * @param codePointOffset the offset in code points
3169      * @return the index within the char sequence
3170      * @exception NullPointerException if <code>seq</code> is null.
3171      * @exception IndexOutOfBoundsException if <code>index</code>
3172      *   is negative or larger then the length of the char sequence,
3173      *   or if <code>codePointOffset</code> is positive and the
3174      *   subsequence starting with <code>index</code> has fewer than
3175      *   <code>codePointOffset</code> code points, or if
3176      *   <code>codePointOffset</code> is negative and the subsequence
3177      *   before <code>index</code> has fewer than the absolute value
3178      *   of <code>codePointOffset</code> code points.
3179      * @since 1.5
3180      */
3181     public static int offsetByCodePoints(CharSequence seq, int index,
3182                                          int codePointOffset) {
3183         int length = seq.length();
3184         if (index < 0 || index > length) {
3185             throw new IndexOutOfBoundsException();
3186         }
3187 
3188         int x = index;
3189         if (codePointOffset >= 0) {
3190             int i;
3191             for (i = 0; x < length && i < codePointOffset; i++) {
3192                 if (isHighSurrogate(seq.charAt(x++))) {
3193                     if (x < length && isLowSurrogate(seq.charAt(x))) {
3194                         x++;
3195                     }
3196                 }
3197             }
3198             if (i < codePointOffset) {
3199                 throw new IndexOutOfBoundsException();
3200             }
3201         } else {
3202             int i;
3203             for (i = codePointOffset; x > 0 && i < 0; i++) {
3204                 if (isLowSurrogate(seq.charAt(--x))) {
3205                     if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
3206                         x--;
3207                     }
3208                 }
3209             }
3210             if (i < 0) {
3211                 throw new IndexOutOfBoundsException();
3212             }
3213         }
3214         return x;
3215     }
3216 
3217     /**
3218      * Returns the index within the given <code>char</code> subarray
3219      * that is offset from the given <code>index</code> by
3220      * <code>codePointOffset</code> code points. The
3221      * <code>start</code> and <code>count</code> arguments specify a
3222      * subarray of the <code>char</code> array. Unpaired surrogates
3223      * within the text range given by <code>index</code> and
3224      * <code>codePointOffset</code> count as one code point each.
3225      *
3226      * @param a the <code>char</code> array
3227      * @param start the index of the first <code>char</code> of the
3228      * subarray
3229      * @param count the length of the subarray in <code>char</code>s
3230      * @param index the index to be offset
3231      * @param codePointOffset the offset in code points
3232      * @return the index within the subarray
3233      * @exception NullPointerException if <code>a</code> is null.
3234      * @exception IndexOutOfBoundsException
3235      *   if <code>start</code> or <code>count</code> is negative,
3236      *   or if <code>start + count</code> is larger than the length of
3237      *   the given array,
3238      *   or if <code>index</code> is less than <code>start</code> or
3239      *   larger then <code>start + count</code>,
3240      *   or if <code>codePointOffset</code> is positive and the text range
3241      *   starting with <code>index</code> and ending with <code>start
3242      *   + count - 1</code> has fewer than <code>codePointOffset</code> code
3243      *   points,
3244      *   or if <code>codePointOffset</code> is negative and the text range
3245      *   starting with <code>start</code> and ending with <code>index
3246      *   - 1</code> has fewer than the absolute value of
3247      *   <code>codePointOffset</code> code points.
3248      * @since 1.5
3249      */
3250     public static int offsetByCodePoints(char[] a, int start, int count,
3251                                          int index, int codePointOffset) {
3252         if (count > a.length-start || start < 0 || count < 0
3253             || index < start || index > start+count) {
3254             throw new IndexOutOfBoundsException();
3255         }
3256         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
3257     }
3258 
3259     static int offsetByCodePointsImpl(char[]a, int start, int count,
3260                                       int index, int codePointOffset) {
3261         int x = index;
3262         if (codePointOffset >= 0) {
3263             int limit = start + count;
3264             int i;
3265             for (i = 0; x < limit && i < codePointOffset; i++) {
3266                 if (isHighSurrogate(a[x++])) {
3267                     if (x < limit && isLowSurrogate(a[x])) {
3268                         x++;
3269                     }
3270                 }
3271             }
3272             if (i < codePointOffset) {
3273                 throw new IndexOutOfBoundsException();
3274             }
3275         } else {
3276             int i;
3277             for (i = codePointOffset; x > start && i < 0; i++) {
3278                 if (isLowSurrogate(a[--x])) {
3279                     if (x > start && isHighSurrogate(a[x-1])) {
3280                         x--;
3281                     }
3282                 }
3283             }
3284             if (i < 0) {
3285                 throw new IndexOutOfBoundsException();
3286             }
3287         }
3288         return x;
3289     }
3290 
3291    /**
3292      * Determines if the specified character is a lowercase character.
3293      * <p>
3294      * A character is lowercase if its general category type, provided
3295      * by <code>Character.getType(ch)</code>, is
3296      * <code>LOWERCASE_LETTER</code>.
3297      * <p>
3298      * The following are examples of lowercase characters:
3299      * <p><blockquote><pre>
3300      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3301      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3302      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3303      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3304      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3305      * </pre></blockquote>
3306      * <p> Many other Unicode characters are lowercase too.
3307      *
3308      * <p><b>Note:</b> This method cannot handle <a
3309      * href="#supplementary"> supplementary characters</a>. To support
3310      * all Unicode characters, including supplementary characters, use
3311      * the {@link #isLowerCase(int)} method.
3312      *
3313      * @param   ch   the character to be tested.
3314      * @return  <code>true</code> if the character is lowercase;
3315      *          <code>false</code> otherwise.
3316      * @see     java.lang.Character#isLowerCase(char)
3317      * @see     java.lang.Character#isTitleCase(char)
3318      * @see     java.lang.Character#toLowerCase(char)
3319      * @see     java.lang.Character#getType(char)
3320      */
3321     public static boolean isLowerCase(char ch) {
3322         return isLowerCase((int)ch);
3323     }
3324 
3325     /**
3326      * Determines if the specified character (Unicode code point) is a
3327      * lowercase character.
3328      * <p>
3329      * A character is lowercase if its general category type, provided
3330      * by {@link Character#getType getType(codePoint)}, is
3331      * <code>LOWERCASE_LETTER</code>.
3332      * <p>
3333      * The following are examples of lowercase characters:
3334      * <p><blockquote><pre>
3335      * a b c d e f g h i j k l m n o p q r s t u v w x y z
3336      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
3337      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
3338      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
3339      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
3340      * </pre></blockquote>
3341      * <p> Many other Unicode characters are lowercase too.
3342      *
3343      * @param   codePoint the character (Unicode code point) to be tested.
3344      * @return  <code>true</code> if the character is lowercase;
3345      *          <code>false</code> otherwise.
3346      * @see     java.lang.Character#isLowerCase(int)
3347      * @see     java.lang.Character#isTitleCase(int)
3348      * @see     java.lang.Character#toLowerCase(int)
3349      * @see     java.lang.Character#getType(int)
3350      * @since   1.5
3351      */
3352     public static boolean isLowerCase(int codePoint) {
3353         return getType(codePoint) == Character.LOWERCASE_LETTER;
3354     }
3355 
3356    /**
3357      * Determines if the specified character is an uppercase character.
3358      * <p>
3359      * A character is uppercase if its general category type, provided by
3360      * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
3361      * <p>
3362      * The following are examples of uppercase characters:
3363      * <p><blockquote><pre>
3364      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3365      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3366      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3367      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3368      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3369      * </pre></blockquote>
3370      * <p> Many other Unicode characters are uppercase too.<p>
3371      *
3372      * <p><b>Note:</b> This method cannot handle <a
3373      * href="#supplementary"> supplementary characters</a>. To support
3374      * all Unicode characters, including supplementary characters, use
3375      * the {@link #isUpperCase(int)} method.
3376      *
3377      * @param   ch   the character to be tested.
3378      * @return  <code>true</code> if the character is uppercase;
3379      *          <code>false</code> otherwise.
3380      * @see     java.lang.Character#isLowerCase(char)
3381      * @see     java.lang.Character#isTitleCase(char)
3382      * @see     java.lang.Character#toUpperCase(char)
3383      * @see     java.lang.Character#getType(char)
3384      * @since   1.0
3385      */
3386     public static boolean isUpperCase(char ch) {
3387         return isUpperCase((int)ch);
3388     }
3389 
3390     /**
3391      * Determines if the specified character (Unicode code point) is an uppercase character.
3392      * <p>
3393      * A character is uppercase if its general category type, provided by
3394      * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
3395      * <p>
3396      * The following are examples of uppercase characters:
3397      * <p><blockquote><pre>
3398      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
3399      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
3400      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
3401      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
3402      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
3403      * </pre></blockquote>
3404      * <p> Many other Unicode characters are uppercase too.<p>
3405      *
3406      * @param   codePoint the character (Unicode code point) to be tested.
3407      * @return  <code>true</code> if the character is uppercase;
3408      *          <code>false</code> otherwise.
3409      * @see     java.lang.Character#isLowerCase(int)
3410      * @see     java.lang.Character#isTitleCase(int)
3411      * @see     java.lang.Character#toUpperCase(int)
3412      * @see     java.lang.Character#getType(int)
3413      * @since   1.5
3414      */
3415     public static boolean isUpperCase(int codePoint) {
3416         return getType(codePoint) == Character.UPPERCASE_LETTER;
3417     }
3418 
3419     /**
3420      * Determines if the specified character is a titlecase character.
3421      * <p>
3422      * A character is a titlecase character if its general
3423      * category type, provided by <code>Character.getType(ch)</code>,
3424      * is <code>TITLECASE_LETTER</code>.
3425      * <p>
3426      * Some characters look like pairs of Latin letters. For example, there
3427      * is an uppercase letter that looks like "LJ" and has a corresponding
3428      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3429      * is the appropriate form to use when rendering a word in lowercase
3430      * with initial capitals, as for a book title.
3431      * <p>
3432      * These are some of the Unicode characters for which this method returns
3433      * <code>true</code>:
3434      * <ul>
3435      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3436      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3437      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3438      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3439      * </ul>
3440      * <p> Many other Unicode characters are titlecase too.<p>
3441      *
3442      * <p><b>Note:</b> This method cannot handle <a
3443      * href="#supplementary"> supplementary characters</a>. To support
3444      * all Unicode characters, including supplementary characters, use
3445      * the {@link #isTitleCase(int)} method.
3446      *
3447      * @param   ch   the character to be tested.
3448      * @return  <code>true</code> if the character is titlecase;
3449      *          <code>false</code> otherwise.
3450      * @see     java.lang.Character#isLowerCase(char)
3451      * @see     java.lang.Character#isUpperCase(char)
3452      * @see     java.lang.Character#toTitleCase(char)
3453      * @see     java.lang.Character#getType(char)
3454      * @since   1.0.2
3455      */
3456     public static boolean isTitleCase(char ch) {
3457         return isTitleCase((int)ch);
3458     }
3459 
3460     /**
3461      * Determines if the specified character (Unicode code point) is a titlecase character.
3462      * <p>
3463      * A character is a titlecase character if its general
3464      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3465      * is <code>TITLECASE_LETTER</code>.
3466      * <p>
3467      * Some characters look like pairs of Latin letters. For example, there
3468      * is an uppercase letter that looks like "LJ" and has a corresponding
3469      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
3470      * is the appropriate form to use when rendering a word in lowercase
3471      * with initial capitals, as for a book title.
3472      * <p>
3473      * These are some of the Unicode characters for which this method returns
3474      * <code>true</code>:
3475      * <ul>
3476      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
3477      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
3478      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
3479      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
3480      * </ul>
3481      * <p> Many other Unicode characters are titlecase too.<p>
3482      *
3483      * @param   codePoint the character (Unicode code point) to be tested.
3484      * @return  <code>true</code> if the character is titlecase;
3485      *          <code>false</code> otherwise.
3486      * @see     java.lang.Character#isLowerCase(int)
3487      * @see     java.lang.Character#isUpperCase(int)
3488      * @see     java.lang.Character#toTitleCase(int)
3489      * @see     java.lang.Character#getType(int)
3490      * @since   1.5
3491      */
3492     public static boolean isTitleCase(int codePoint) {
3493         return getType(codePoint) == Character.TITLECASE_LETTER;
3494     }
3495 
3496     /**
3497      * Determines if the specified character is a digit.
3498      * <p>
3499      * A character is a digit if its general category type, provided
3500      * by <code>Character.getType(ch)</code>, is
3501      * <code>DECIMAL_DIGIT_NUMBER</code>.
3502      * <p>
3503      * Some Unicode character ranges that contain digits:
3504      * <ul>
3505      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3506      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3507      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3508      *     Arabic-Indic digits
3509      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3510      *     Extended Arabic-Indic digits
3511      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3512      *     Devanagari digits
3513      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3514      *     Fullwidth digits
3515      * </ul>
3516      *
3517      * Many other character ranges contain digits as well.
3518      *
3519      * <p><b>Note:</b> This method cannot handle <a
3520      * href="#supplementary"> supplementary characters</a>. To support
3521      * all Unicode characters, including supplementary characters, use
3522      * the {@link #isDigit(int)} method.
3523      *
3524      * @param   ch   the character to be tested.
3525      * @return  <code>true</code> if the character is a digit;
3526      *          <code>false</code> otherwise.
3527      * @see     java.lang.Character#digit(char, int)
3528      * @see     java.lang.Character#forDigit(int, int)
3529      * @see     java.lang.Character#getType(char)
3530      */
3531     public static boolean isDigit(char ch) {
3532         return isDigit((int)ch);
3533     }
3534 
3535     /**
3536      * Determines if the specified character (Unicode code point) is a digit.
3537      * <p>
3538      * A character is a digit if its general category type, provided
3539      * by {@link Character#getType(int) getType(codePoint)}, is
3540      * <code>DECIMAL_DIGIT_NUMBER</code>.
3541      * <p>
3542      * Some Unicode character ranges that contain digits:
3543      * <ul>
3544      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
3545      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
3546      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
3547      *     Arabic-Indic digits
3548      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
3549      *     Extended Arabic-Indic digits
3550      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
3551      *     Devanagari digits
3552      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
3553      *     Fullwidth digits
3554      * </ul>
3555      *
3556      * Many other character ranges contain digits as well.
3557      *
3558      * @param   codePoint the character (Unicode code point) to be tested.
3559      * @return  <code>true</code> if the character is a digit;
3560      *          <code>false</code> otherwise.
3561      * @see     java.lang.Character#forDigit(int, int)
3562      * @see     java.lang.Character#getType(int)
3563      * @since   1.5
3564      */
3565     public static boolean isDigit(int codePoint) {
3566         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
3567     }
3568 
3569     /**
3570      * Determines if a character is defined in Unicode.
3571      * <p>
3572      * A character is defined if at least one of the following is true:
3573      * <ul>
3574      * <li>It has an entry in the UnicodeData file.
3575      * <li>It has a value in a range defined by the UnicodeData file.
3576      * </ul>
3577      *
3578      * <p><b>Note:</b> This method cannot handle <a
3579      * href="#supplementary"> supplementary characters</a>. To support
3580      * all Unicode characters, including supplementary characters, use
3581      * the {@link #isDefined(int)} method.
3582      *
3583      * @param   ch   the character to be tested
3584      * @return  <code>true</code> if the character has a defined meaning
3585      *          in Unicode; <code>false</code> otherwise.
3586      * @see     java.lang.Character#isDigit(char)
3587      * @see     java.lang.Character#isLetter(char)
3588      * @see     java.lang.Character#isLetterOrDigit(char)
3589      * @see     java.lang.Character#isLowerCase(char)
3590      * @see     java.lang.Character#isTitleCase(char)
3591      * @see     java.lang.Character#isUpperCase(char)
3592      * @since   1.0.2
3593      */
3594     public static boolean isDefined(char ch) {
3595         return isDefined((int)ch);
3596     }
3597 
3598     /**
3599      * Determines if a character (Unicode code point) is defined in Unicode.
3600      * <p>
3601      * A character is defined if at least one of the following is true:
3602      * <ul>
3603      * <li>It has an entry in the UnicodeData file.
3604      * <li>It has a value in a range defined by the UnicodeData file.
3605      * </ul>
3606      *
3607      * @param   codePoint the character (Unicode code point) to be tested.
3608      * @return  <code>true</code> if the character has a defined meaning
3609      *          in Unicode; <code>false</code> otherwise.
3610      * @see     java.lang.Character#isDigit(int)
3611      * @see     java.lang.Character#isLetter(int)
3612      * @see     java.lang.Character#isLetterOrDigit(int)
3613      * @see     java.lang.Character#isLowerCase(int)
3614      * @see     java.lang.Character#isTitleCase(int)
3615      * @see     java.lang.Character#isUpperCase(int)
3616      * @since   1.5
3617      */
3618     public static boolean isDefined(int codePoint) {
3619         return getType(codePoint) != Character.UNASSIGNED;
3620     }
3621 
3622     /**
3623      * Determines if the specified character is a letter.
3624      * <p>
3625      * A character is considered to be a letter if its general
3626      * category type, provided by <code>Character.getType(ch)</code>,
3627      * is any of the following:
3628      * <ul>
3629      * <li> <code>UPPERCASE_LETTER</code>
3630      * <li> <code>LOWERCASE_LETTER</code>
3631      * <li> <code>TITLECASE_LETTER</code>
3632      * <li> <code>MODIFIER_LETTER</code>
3633      * <li> <code>OTHER_LETTER</code>
3634      * </ul>
3635      *
3636      * Not all letters have case. Many characters are
3637      * letters but are neither uppercase nor lowercase nor titlecase.
3638      *
3639      * <p><b>Note:</b> This method cannot handle <a
3640      * href="#supplementary"> supplementary characters</a>. To support
3641      * all Unicode characters, including supplementary characters, use
3642      * the {@link #isLetter(int)} method.
3643      *
3644      * @param   ch   the character to be tested.
3645      * @return  <code>true</code> if the character is a letter;
3646      *          <code>false</code> otherwise.
3647      * @see     java.lang.Character#isDigit(char)
3648      * @see     java.lang.Character#isJavaIdentifierStart(char)
3649      * @see     java.lang.Character#isJavaLetter(char)
3650      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3651      * @see     java.lang.Character#isLetterOrDigit(char)
3652      * @see     java.lang.Character#isLowerCase(char)
3653      * @see     java.lang.Character#isTitleCase(char)
3654      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3655      * @see     java.lang.Character#isUpperCase(char)
3656      */
3657     public static boolean isLetter(char ch) {
3658         return isLetter((int)ch);
3659     }
3660 
3661     /**
3662      * Determines if the specified character (Unicode code point) is a letter.
3663      * <p>
3664      * A character is considered to be a letter if its general
3665      * category type, provided by {@link Character#getType(int) getType(codePoint)},
3666      * is any of the following:
3667      * <ul>
3668      * <li> <code>UPPERCASE_LETTER</code>
3669      * <li> <code>LOWERCASE_LETTER</code>
3670      * <li> <code>TITLECASE_LETTER</code>
3671      * <li> <code>MODIFIER_LETTER</code>
3672      * <li> <code>OTHER_LETTER</code>
3673      * </ul>
3674      *
3675      * Not all letters have case. Many characters are
3676      * letters but are neither uppercase nor lowercase nor titlecase.
3677      *
3678      * @param   codePoint the character (Unicode code point) to be tested.
3679      * @return  <code>true</code> if the character is a letter;
3680      *          <code>false</code> otherwise.
3681      * @see     java.lang.Character#isDigit(int)
3682      * @see     java.lang.Character#isJavaIdentifierStart(int)
3683      * @see     java.lang.Character#isLetterOrDigit(int)
3684      * @see     java.lang.Character#isLowerCase(int)
3685      * @see     java.lang.Character#isTitleCase(int)
3686      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3687      * @see     java.lang.Character#isUpperCase(int)
3688      * @since   1.5
3689      */
3690     public static boolean isLetter(int codePoint) {
3691         return ((((1 << Character.UPPERCASE_LETTER) |
3692             (1 << Character.LOWERCASE_LETTER) |
3693             (1 << Character.TITLECASE_LETTER) |
3694             (1 << Character.MODIFIER_LETTER) |
3695             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
3696             != 0;
3697     }
3698 
3699     /**
3700      * Determines if the specified character is a letter or digit.
3701      * <p>
3702      * A character is considered to be a letter or digit if either
3703      * <code>Character.isLetter(char ch)</code> or
3704      * <code>Character.isDigit(char ch)</code> returns
3705      * <code>true</code> for the character.
3706      *
3707      * <p><b>Note:</b> This method cannot handle <a
3708      * href="#supplementary"> supplementary characters</a>. To support
3709      * all Unicode characters, including supplementary characters, use
3710      * the {@link #isLetterOrDigit(int)} method.
3711      *
3712      * @param   ch   the character to be tested.
3713      * @return  <code>true</code> if the character is a letter or digit;
3714      *          <code>false</code> otherwise.
3715      * @see     java.lang.Character#isDigit(char)
3716      * @see     java.lang.Character#isJavaIdentifierPart(char)
3717      * @see     java.lang.Character#isJavaLetter(char)
3718      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3719      * @see     java.lang.Character#isLetter(char)
3720      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3721      * @since   1.0.2
3722      */
3723     public static boolean isLetterOrDigit(char ch) {
3724         return isLetterOrDigit((int)ch);
3725     }
3726 
3727     /**
3728      * Determines if the specified character (Unicode code point) is a letter or digit.
3729      * <p>
3730      * A character is considered to be a letter or digit if either
3731      * {@link #isLetter(int) isLetter(codePoint)} or
3732      * {@link #isDigit(int) isDigit(codePoint)} returns
3733      * <code>true</code> for the character.
3734      *
3735      * @param   codePoint the character (Unicode code point) to be tested.
3736      * @return  <code>true</code> if the character is a letter or digit;
3737      *          <code>false</code> otherwise.
3738      * @see     java.lang.Character#isDigit(int)
3739      * @see     java.lang.Character#isJavaIdentifierPart(int)
3740      * @see     java.lang.Character#isLetter(int)
3741      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3742      * @since   1.5
3743      */
3744     public static boolean isLetterOrDigit(int codePoint) {
3745         return ((((1 << Character.UPPERCASE_LETTER) |
3746             (1 << Character.LOWERCASE_LETTER) |
3747             (1 << Character.TITLECASE_LETTER) |
3748             (1 << Character.MODIFIER_LETTER) |
3749             (1 << Character.OTHER_LETTER) |
3750             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
3751             != 0;
3752     }
3753 
3754     /**
3755      * Determines if the specified character is permissible as the first
3756      * character in a Java identifier.
3757      * <p>
3758      * A character may start a Java identifier if and only if
3759      * one of the following is true:
3760      * <ul>
3761      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3762      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3763      * <li> ch is a currency symbol (such as "$")
3764      * <li> ch is a connecting punctuation character (such as "_").
3765      * </ul>
3766      *
3767      * @param   ch the character to be tested.
3768      * @return  <code>true</code> if the character may start a Java
3769      *          identifier; <code>false</code> otherwise.
3770      * @see     java.lang.Character#isJavaLetterOrDigit(char)
3771      * @see     java.lang.Character#isJavaIdentifierStart(char)
3772      * @see     java.lang.Character#isJavaIdentifierPart(char)
3773      * @see     java.lang.Character#isLetter(char)
3774      * @see     java.lang.Character#isLetterOrDigit(char)
3775      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3776      * @since   1.02
3777      * @deprecated Replaced by isJavaIdentifierStart(char).
3778      */
3779     @Deprecated
3780     public static boolean isJavaLetter(char ch) {
3781         return isJavaIdentifierStart(ch);
3782     }
3783 
3784     /**
3785      * Determines if the specified character may be part of a Java
3786      * identifier as other than the first character.
3787      * <p>
3788      * A character may be part of a Java identifier if and only if any
3789      * of the following are true:
3790      * <ul>
3791      * <li>  it is a letter
3792      * <li>  it is a currency symbol (such as <code>'$'</code>)
3793      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3794      * <li>  it is a digit
3795      * <li>  it is a numeric letter (such as a Roman numeral character)
3796      * <li>  it is a combining mark
3797      * <li>  it is a non-spacing mark
3798      * <li> <code>isIdentifierIgnorable</code> returns
3799      * <code>true</code> for the character.
3800      * </ul>
3801      *
3802      * @param   ch the character to be tested.
3803      * @return  <code>true</code> if the character may be part of a
3804      *          Java identifier; <code>false</code> otherwise.
3805      * @see     java.lang.Character#isJavaLetter(char)
3806      * @see     java.lang.Character#isJavaIdentifierStart(char)
3807      * @see     java.lang.Character#isJavaIdentifierPart(char)
3808      * @see     java.lang.Character#isLetter(char)
3809      * @see     java.lang.Character#isLetterOrDigit(char)
3810      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3811      * @see     java.lang.Character#isIdentifierIgnorable(char)
3812      * @since   1.02
3813      * @deprecated Replaced by isJavaIdentifierPart(char).
3814      */
3815     @Deprecated
3816     public static boolean isJavaLetterOrDigit(char ch) {
3817         return isJavaIdentifierPart(ch);
3818     }
3819 
3820     /**
3821      * Determines if the specified character is
3822      * permissible as the first character in a Java identifier.
3823      * <p>
3824      * A character may start a Java identifier if and only if
3825      * one of the following conditions is true:
3826      * <ul>
3827      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3828      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
3829      * <li> ch is a currency symbol (such as "$")
3830      * <li> ch is a connecting punctuation character (such as "_").
3831      * </ul>
3832      *
3833      * <p><b>Note:</b> This method cannot handle <a
3834      * href="#supplementary"> supplementary characters</a>. To support
3835      * all Unicode characters, including supplementary characters, use
3836      * the {@link #isJavaIdentifierStart(int)} method.
3837      *
3838      * @param   ch the character to be tested.
3839      * @return  <code>true</code> if the character may start a Java identifier;
3840      *          <code>false</code> otherwise.
3841      * @see     java.lang.Character#isJavaIdentifierPart(char)
3842      * @see     java.lang.Character#isLetter(char)
3843      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
3844      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3845      * @since   1.1
3846      */
3847     public static boolean isJavaIdentifierStart(char ch) {
3848         return isJavaIdentifierStart((int)ch);
3849     }
3850 
3851     /**
3852      * Determines if the character (Unicode code point) is
3853      * permissible as the first character in a Java identifier.
3854      * <p>
3855      * A character may start a Java identifier if and only if
3856      * one of the following conditions is true:
3857      * <ul>
3858      * <li> {@link #isLetter(int) isLetter(codePoint)}
3859      *      returns <code>true</code>
3860      * <li> {@link #getType(int) getType(codePoint)}
3861      *      returns <code>LETTER_NUMBER</code>
3862      * <li> the referenced character is a currency symbol (such as "$")
3863      * <li> the referenced character is a connecting punctuation character
3864      *      (such as "_").
3865      * </ul>
3866      *
3867      * @param   codePoint the character (Unicode code point) to be tested.
3868      * @return  <code>true</code> if the character may start a Java identifier;
3869      *          <code>false</code> otherwise.
3870      * @see     java.lang.Character#isJavaIdentifierPart(int)
3871      * @see     java.lang.Character#isLetter(int)
3872      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
3873      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3874      * @since   1.5
3875      */
3876     public static boolean isJavaIdentifierStart(int codePoint) {
3877         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
3878     }
3879 
3880     /**
3881      * Determines if the specified character may be part of a Java
3882      * identifier as other than the first character.
3883      * <p>
3884      * A character may be part of a Java identifier if any of the following
3885      * are true:
3886      * <ul>
3887      * <li>  it is a letter
3888      * <li>  it is a currency symbol (such as <code>'$'</code>)
3889      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3890      * <li>  it is a digit
3891      * <li>  it is a numeric letter (such as a Roman numeral character)
3892      * <li>  it is a combining mark
3893      * <li>  it is a non-spacing mark
3894      * <li> <code>isIdentifierIgnorable</code> returns
3895      * <code>true</code> for the character
3896      * </ul>
3897      *
3898      * <p><b>Note:</b> This method cannot handle <a
3899      * href="#supplementary"> supplementary characters</a>. To support
3900      * all Unicode characters, including supplementary characters, use
3901      * the {@link #isJavaIdentifierPart(int)} method.
3902      *
3903      * @param   ch      the character to be tested.
3904      * @return <code>true</code> if the character may be part of a
3905      *          Java identifier; <code>false</code> otherwise.
3906      * @see     java.lang.Character#isIdentifierIgnorable(char)
3907      * @see     java.lang.Character#isJavaIdentifierStart(char)
3908      * @see     java.lang.Character#isLetterOrDigit(char)
3909      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3910      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3911      * @since   1.1
3912      */
3913     public static boolean isJavaIdentifierPart(char ch) {
3914         return isJavaIdentifierPart((int)ch);
3915     }
3916 
3917     /**
3918      * Determines if the character (Unicode code point) may be part of a Java
3919      * identifier as other than the first character.
3920      * <p>
3921      * A character may be part of a Java identifier if any of the following
3922      * are true:
3923      * <ul>
3924      * <li>  it is a letter
3925      * <li>  it is a currency symbol (such as <code>'$'</code>)
3926      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
3927      * <li>  it is a digit
3928      * <li>  it is a numeric letter (such as a Roman numeral character)
3929      * <li>  it is a combining mark
3930      * <li>  it is a non-spacing mark
3931      * <li> {@link #isIdentifierIgnorable(int)
3932      * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
3933      * the character
3934      * </ul>
3935      *
3936      * @param   codePoint the character (Unicode code point) to be tested.
3937      * @return <code>true</code> if the character may be part of a
3938      *          Java identifier; <code>false</code> otherwise.
3939      * @see     java.lang.Character#isIdentifierIgnorable(int)
3940      * @see     java.lang.Character#isJavaIdentifierStart(int)
3941      * @see     java.lang.Character#isLetterOrDigit(int)
3942      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3943      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
3944      * @since   1.5
3945      */
3946     public static boolean isJavaIdentifierPart(int codePoint) {
3947         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
3948     }
3949 
3950     /**
3951      * Determines if the specified character is permissible as the
3952      * first character in a Unicode identifier.
3953      * <p>
3954      * A character may start a Unicode identifier if and only if
3955      * one of the following conditions is true:
3956      * <ul>
3957      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
3958      * <li> {@link #getType(char) getType(ch)} returns
3959      *      <code>LETTER_NUMBER</code>.
3960      * </ul>
3961      *
3962      * <p><b>Note:</b> This method cannot handle <a
3963      * href="#supplementary"> supplementary characters</a>. To support
3964      * all Unicode characters, including supplementary characters, use
3965      * the {@link #isUnicodeIdentifierStart(int)} method.
3966      *
3967      * @param   ch      the character to be tested.
3968      * @return  <code>true</code> if the character may start a Unicode
3969      *          identifier; <code>false</code> otherwise.
3970      * @see     java.lang.Character#isJavaIdentifierStart(char)
3971      * @see     java.lang.Character#isLetter(char)
3972      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
3973      * @since   1.1
3974      */
3975     public static boolean isUnicodeIdentifierStart(char ch) {
3976         return isUnicodeIdentifierStart((int)ch);
3977     }
3978 
3979     /**
3980      * Determines if the specified character (Unicode code point) is permissible as the
3981      * first character in a Unicode identifier.
3982      * <p>
3983      * A character may start a Unicode identifier if and only if
3984      * one of the following conditions is true:
3985      * <ul>
3986      * <li> {@link #isLetter(int) isLetter(codePoint)}
3987      *      returns <code>true</code>
3988      * <li> {@link #getType(int) getType(codePoint)}
3989      *      returns <code>LETTER_NUMBER</code>.
3990      * </ul>
3991      * @param   codePoint the character (Unicode code point) to be tested.
3992      * @return  <code>true</code> if the character may start a Unicode
3993      *          identifier; <code>false</code> otherwise.
3994      * @see     java.lang.Character#isJavaIdentifierStart(int)
3995      * @see     java.lang.Character#isLetter(int)
3996      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
3997      * @since   1.5
3998      */
3999     public static boolean isUnicodeIdentifierStart(int codePoint) {
4000         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
4001     }
4002 
4003     /**
4004      * Determines if the specified character may be part of a Unicode
4005      * identifier as other than the first character.
4006      * <p>
4007      * A character may be part of a Unicode identifier if and only if
4008      * one of the following statements is true:
4009      * <ul>
4010      * <li>  it is a letter
4011      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4012      * <li>  it is a digit
4013      * <li>  it is a numeric letter (such as a Roman numeral character)
4014      * <li>  it is a combining mark
4015      * <li>  it is a non-spacing mark
4016      * <li> <code>isIdentifierIgnorable</code> returns
4017      * <code>true</code> for this character.
4018      * </ul>
4019      *
4020      * <p><b>Note:</b> This method cannot handle <a
4021      * href="#supplementary"> supplementary characters</a>. To support
4022      * all Unicode characters, including supplementary characters, use
4023      * the {@link #isUnicodeIdentifierPart(int)} method.
4024      *
4025      * @param   ch      the character to be tested.
4026      * @return  <code>true</code> if the character may be part of a
4027      *          Unicode identifier; <code>false</code> otherwise.
4028      * @see     java.lang.Character#isIdentifierIgnorable(char)
4029      * @see     java.lang.Character#isJavaIdentifierPart(char)
4030      * @see     java.lang.Character#isLetterOrDigit(char)
4031      * @see     java.lang.Character#isUnicodeIdentifierStart(char)
4032      * @since   1.1
4033      */
4034     public static boolean isUnicodeIdentifierPart(char ch) {
4035         return isUnicodeIdentifierPart((int)ch);
4036     }
4037 
4038     /**
4039      * Determines if the specified character (Unicode code point) may be part of a Unicode
4040      * identifier as other than the first character.
4041      * <p>
4042      * A character may be part of a Unicode identifier if and only if
4043      * one of the following statements is true:
4044      * <ul>
4045      * <li>  it is a letter
4046      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
4047      * <li>  it is a digit
4048      * <li>  it is a numeric letter (such as a Roman numeral character)
4049      * <li>  it is a combining mark
4050      * <li>  it is a non-spacing mark
4051      * <li> <code>isIdentifierIgnorable</code> returns
4052      * <code>true</code> for this character.
4053      * </ul>
4054      * @param   codePoint the character (Unicode code point) to be tested.
4055      * @return  <code>true</code> if the character may be part of a
4056      *          Unicode identifier; <code>false</code> otherwise.
4057      * @see     java.lang.Character#isIdentifierIgnorable(int)
4058      * @see     java.lang.Character#isJavaIdentifierPart(int)
4059      * @see     java.lang.Character#isLetterOrDigit(int)
4060      * @see     java.lang.Character#isUnicodeIdentifierStart(int)
4061      * @since   1.5
4062      */
4063     public static boolean isUnicodeIdentifierPart(int codePoint) {
4064         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
4065     }
4066 
4067     /**
4068      * Determines if the specified character should be regarded as
4069      * an ignorable character in a Java identifier or a Unicode identifier.
4070      * <p>
4071      * The following Unicode characters are ignorable in a Java identifier
4072      * or a Unicode identifier:
4073      * <ul>
4074      * <li>ISO control characters that are not whitespace
4075      * <ul>
4076      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4077      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4078      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4079      * </ul>
4080      *
4081      * <li>all characters that have the <code>FORMAT</code> general
4082      * category value
4083      * </ul>
4084      *
4085      * <p><b>Note:</b> This method cannot handle <a
4086      * href="#supplementary"> supplementary characters</a>. To support
4087      * all Unicode characters, including supplementary characters, use
4088      * the {@link #isIdentifierIgnorable(int)} method.
4089      *
4090      * @param   ch      the character to be tested.
4091      * @return  <code>true</code> if the character is an ignorable control
4092      *          character that may be part of a Java or Unicode identifier;
4093      *           <code>false</code> otherwise.
4094      * @see     java.lang.Character#isJavaIdentifierPart(char)
4095      * @see     java.lang.Character#isUnicodeIdentifierPart(char)
4096      * @since   1.1
4097      */
4098     public static boolean isIdentifierIgnorable(char ch) {
4099         return isIdentifierIgnorable((int)ch);
4100     }
4101 
4102     /**
4103      * Determines if the specified character (Unicode code point) should be regarded as
4104      * an ignorable character in a Java identifier or a Unicode identifier.
4105      * <p>
4106      * The following Unicode characters are ignorable in a Java identifier
4107      * or a Unicode identifier:
4108      * <ul>
4109      * <li>ISO control characters that are not whitespace
4110      * <ul>
4111      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
4112      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
4113      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
4114      * </ul>
4115      *
4116      * <li>all characters that have the <code>FORMAT</code> general
4117      * category value
4118      * </ul>
4119      *
4120      * @param   codePoint the character (Unicode code point) to be tested.
4121      * @return  <code>true</code> if the character is an ignorable control
4122      *          character that may be part of a Java or Unicode identifier;
4123      *          <code>false</code> otherwise.
4124      * @see     java.lang.Character#isJavaIdentifierPart(int)
4125      * @see     java.lang.Character#isUnicodeIdentifierPart(int)
4126      * @since   1.5
4127      */
4128     public static boolean isIdentifierIgnorable(int codePoint) {
4129         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
4130     }
4131 
4132     /**
4133      * Converts the character argument to lowercase using case
4134      * mapping information from the UnicodeData file.
4135      * <p>
4136      * Note that
4137      * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
4138      * does not always return <code>true</code> for some ranges of
4139      * characters, particularly those that are symbols or ideographs.
4140      *
4141      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4142      * characters to lowercase. <code>String</code> case mapping methods
4143      * have several benefits over <code>Character</code> case mapping methods.
4144      * <code>String</code> case mapping methods can perform locale-sensitive
4145      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4146      * the <code>Character</code> case mapping methods cannot.
4147      *
4148      * <p><b>Note:</b> This method cannot handle <a
4149      * href="#supplementary"> supplementary characters</a>. To support
4150      * all Unicode characters, including supplementary characters, use
4151      * the {@link #toLowerCase(int)} method.
4152      *
4153      * @param   ch   the character to be converted.
4154      * @return  the lowercase equivalent of the character, if any;
4155      *          otherwise, the character itself.
4156      * @see     java.lang.Character#isLowerCase(char)
4157      * @see     java.lang.String#toLowerCase()
4158      */
4159     public static char toLowerCase(char ch) {
4160         return (char)toLowerCase((int)ch);
4161     }
4162 
4163     /**
4164      * Converts the character (Unicode code point) argument to
4165      * lowercase using case mapping information from the UnicodeData
4166      * file.
4167      *
4168      * <p> Note that
4169      * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
4170      * does not always return <code>true</code> for some ranges of
4171      * characters, particularly those that are symbols or ideographs.
4172      *
4173      * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map
4174      * characters to lowercase. <code>String</code> case mapping methods
4175      * have several benefits over <code>Character</code> case mapping methods.
4176      * <code>String</code> case mapping methods can perform locale-sensitive
4177      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4178      * the <code>Character</code> case mapping methods cannot.
4179      *
4180      * @param   codePoint   the character (Unicode code point) to be converted.
4181      * @return  the lowercase equivalent of the character (Unicode code
4182      *          point), if any; otherwise, the character itself.
4183      * @see     java.lang.Character#isLowerCase(int)
4184      * @see     java.lang.String#toLowerCase()
4185      *
4186      * @since   1.5
4187      */
4188     public static int toLowerCase(int codePoint) {
4189         return CharacterData.of(codePoint).toLowerCase(codePoint);
4190     }
4191 
4192     /**
4193      * Converts the character argument to uppercase using case mapping
4194      * information from the UnicodeData file.
4195      * <p>
4196      * Note that
4197      * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
4198      * does not always return <code>true</code> for some ranges of
4199      * characters, particularly those that are symbols or ideographs.
4200      *
4201      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4202      * characters to uppercase. <code>String</code> case mapping methods
4203      * have several benefits over <code>Character</code> case mapping methods.
4204      * <code>String</code> case mapping methods can perform locale-sensitive
4205      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4206      * the <code>Character</code> case mapping methods cannot.
4207      *
4208      * <p><b>Note:</b> This method cannot handle <a
4209      * href="#supplementary"> supplementary characters</a>. To support
4210      * all Unicode characters, including supplementary characters, use
4211      * the {@link #toUpperCase(int)} method.
4212      *
4213      * @param   ch   the character to be converted.
4214      * @return  the uppercase equivalent of the character, if any;
4215      *          otherwise, the character itself.
4216      * @see     java.lang.Character#isUpperCase(char)
4217      * @see     java.lang.String#toUpperCase()
4218      */
4219     public static char toUpperCase(char ch) {
4220         return (char)toUpperCase((int)ch);
4221     }
4222 
4223     /**
4224      * Converts the character (Unicode code point) argument to
4225      * uppercase using case mapping information from the UnicodeData
4226      * file.
4227      *
4228      * <p>Note that
4229      * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
4230      * does not always return <code>true</code> for some ranges of
4231      * characters, particularly those that are symbols or ideographs.
4232      *
4233      * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map
4234      * characters to uppercase. <code>String</code> case mapping methods
4235      * have several benefits over <code>Character</code> case mapping methods.
4236      * <code>String</code> case mapping methods can perform locale-sensitive
4237      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
4238      * the <code>Character</code> case mapping methods cannot.
4239      *
4240      * @param   codePoint   the character (Unicode code point) to be converted.
4241      * @return  the uppercase equivalent of the character, if any;
4242      *          otherwise, the character itself.
4243      * @see     java.lang.Character#isUpperCase(int)
4244      * @see     java.lang.String#toUpperCase()
4245      *
4246      * @since   1.5
4247      */
4248     public static int toUpperCase(int codePoint) {
4249         return CharacterData.of(codePoint).toUpperCase(codePoint);
4250     }
4251 
4252     /**
4253      * Converts the character argument to titlecase using case mapping
4254      * information from the UnicodeData file. If a character has no
4255      * explicit titlecase mapping and is not itself a titlecase char
4256      * according to UnicodeData, then the uppercase mapping is
4257      * returned as an equivalent titlecase mapping. If the
4258      * <code>char</code> argument is already a titlecase
4259      * <code>char</code>, the same <code>char</code> value will be
4260      * returned.
4261      * <p>
4262      * Note that
4263      * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
4264      * does not always return <code>true</code> for some ranges of
4265      * characters.
4266      *
4267      * <p><b>Note:</b> This method cannot handle <a
4268      * href="#supplementary"> supplementary characters</a>. To support
4269      * all Unicode characters, including supplementary characters, use
4270      * the {@link #toTitleCase(int)} method.
4271      *
4272      * @param   ch   the character to be converted.
4273      * @return  the titlecase equivalent of the character, if any;
4274      *          otherwise, the character itself.
4275      * @see     java.lang.Character#isTitleCase(char)
4276      * @see     java.lang.Character#toLowerCase(char)
4277      * @see     java.lang.Character#toUpperCase(char)
4278      * @since   1.0.2
4279      */
4280     public static char toTitleCase(char ch) {
4281         return (char)toTitleCase((int)ch);
4282     }
4283 
4284     /**
4285      * Converts the character (Unicode code point) argument to titlecase using case mapping
4286      * information from the UnicodeData file. If a character has no
4287      * explicit titlecase mapping and is not itself a titlecase char
4288      * according to UnicodeData, then the uppercase mapping is
4289      * returned as an equivalent titlecase mapping. If the
4290      * character argument is already a titlecase
4291      * character, the same character value will be
4292      * returned.
4293      *
4294      * <p>Note that
4295      * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
4296      * does not always return <code>true</code> for some ranges of
4297      * characters.
4298      *
4299      * @param   codePoint   the character (Unicode code point) to be converted.
4300      * @return  the titlecase equivalent of the character, if any;
4301      *          otherwise, the character itself.
4302      * @see     java.lang.Character#isTitleCase(int)
4303      * @see     java.lang.Character#toLowerCase(int)
4304      * @see     java.lang.Character#toUpperCase(int)
4305      * @since   1.5
4306      */
4307     public static int toTitleCase(int codePoint) {
4308         return CharacterData.of(codePoint).toTitleCase(codePoint);
4309     }
4310 
4311     /**
4312      * Returns the numeric value of the character <code>ch</code> in the
4313      * specified radix.
4314      * <p>
4315      * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4316      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4317      * value of <code>ch</code> is not a valid digit in the specified
4318      * radix, <code>-1</code> is returned. A character is a valid digit
4319      * if at least one of the following is true:
4320      * <ul>
4321      * <li>The method <code>isDigit</code> is <code>true</code> of the character
4322      *     and the Unicode decimal digit value of the character (or its
4323      *     single-character decomposition) is less than the specified radix.
4324      *     In this case the decimal digit value is returned.
4325      * <li>The character is one of the uppercase Latin letters
4326      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4327      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4328      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4329      *     is returned.
4330      * <li>The character is one of the lowercase Latin letters
4331      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4332      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4333      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4334      *     is returned.
4335      * </ul>
4336      *
4337      * <p><b>Note:</b> This method cannot handle <a
4338      * href="#supplementary"> supplementary characters</a>. To support
4339      * all Unicode characters, including supplementary characters, use
4340      * the {@link #digit(int, int)} method.
4341      *
4342      * @param   ch      the character to be converted.
4343      * @param   radix   the radix.
4344      * @return  the numeric value represented by the character in the
4345      *          specified radix.
4346      * @see     java.lang.Character#forDigit(int, int)
4347      * @see     java.lang.Character#isDigit(char)
4348      */
4349     public static int digit(char ch, int radix) {
4350         return digit((int)ch, radix);
4351     }
4352 
4353     /**
4354      * Returns the numeric value of the specified character (Unicode
4355      * code point) in the specified radix.
4356      *
4357      * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
4358      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
4359      * character is not a valid digit in the specified
4360      * radix, <code>-1</code> is returned. A character is a valid digit
4361      * if at least one of the following is true:
4362      * <ul>
4363      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
4364      *     and the Unicode decimal digit value of the character (or its
4365      *     single-character decomposition) is less than the specified radix.
4366      *     In this case the decimal digit value is returned.
4367      * <li>The character is one of the uppercase Latin letters
4368      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
4369      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
4370      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
4371      *     is returned.
4372      * <li>The character is one of the lowercase Latin letters
4373      *     <code>'a'</code> through <code>'z'</code> and its code is less than
4374      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
4375      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
4376      *     is returned.
4377      * </ul>
4378      *
4379      * @param   codePoint the character (Unicode code point) to be converted.
4380      * @param   radix   the radix.
4381      * @return  the numeric value represented by the character in the
4382      *          specified radix.
4383      * @see     java.lang.Character#forDigit(int, int)
4384      * @see     java.lang.Character#isDigit(int)
4385      * @since   1.5
4386      */
4387     public static int digit(int codePoint, int radix) {
4388         return CharacterData.of(codePoint).digit(codePoint, radix);
4389     }
4390 
4391     /**
4392      * Returns the <code>int</code> value that the specified Unicode
4393      * character represents. For example, the character
4394      * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
4395      * an int with a value of 50.
4396      * <p>
4397      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4398      * <code>'&#92;u005A'</code>), lowercase
4399      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4400      * full width variant (<code>'&#92;uFF21'</code> through
4401      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4402      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4403      * through 35. This is independent of the Unicode specification,
4404      * which does not assign numeric values to these <code>char</code>
4405      * values.
4406      * <p>
4407      * If the character does not have a numeric value, then -1 is returned.
4408      * If the character has a numeric value that cannot be represented as a
4409      * nonnegative integer (for example, a fractional value), then -2
4410      * is returned.
4411      *
4412      * <p><b>Note:</b> This method cannot handle <a
4413      * href="#supplementary"> supplementary characters</a>. To support
4414      * all Unicode characters, including supplementary characters, use
4415      * the {@link #getNumericValue(int)} method.
4416      *
4417      * @param   ch      the character to be converted.
4418      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4419      *           value; -2 if the character has a numeric value that is not a
4420      *          nonnegative integer; -1 if the character has no numeric value.
4421      * @see     java.lang.Character#forDigit(int, int)
4422      * @see     java.lang.Character#isDigit(char)
4423      * @since   1.1
4424      */
4425     public static int getNumericValue(char ch) {
4426         return getNumericValue((int)ch);
4427     }
4428 
4429     /**
4430      * Returns the <code>int</code> value that the specified
4431      * character (Unicode code point) represents. For example, the character
4432      * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
4433      * an <code>int</code> with a value of 50.
4434      * <p>
4435      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
4436      * <code>'&#92;u005A'</code>), lowercase
4437      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
4438      * full width variant (<code>'&#92;uFF21'</code> through
4439      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
4440      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
4441      * through 35. This is independent of the Unicode specification,
4442      * which does not assign numeric values to these <code>char</code>
4443      * values.
4444      * <p>
4445      * If the character does not have a numeric value, then -1 is returned.
4446      * If the character has a numeric value that cannot be represented as a
4447      * nonnegative integer (for example, a fractional value), then -2
4448      * is returned.
4449      *
4450      * @param   codePoint the character (Unicode code point) to be converted.
4451      * @return  the numeric value of the character, as a nonnegative <code>int</code>
4452      *          value; -2 if the character has a numeric value that is not a
4453      *          nonnegative integer; -1 if the character has no numeric value.
4454      * @see     java.lang.Character#forDigit(int, int)
4455      * @see     java.lang.Character#isDigit(int)
4456      * @since   1.5
4457      */
4458     public static int getNumericValue(int codePoint) {
4459         return CharacterData.of(codePoint).getNumericValue(codePoint);
4460     }
4461 
4462     /**
4463      * Determines if the specified character is ISO-LATIN-1 white space.
4464      * This method returns <code>true</code> for the following five
4465      * characters only:
4466      * <table>
4467      * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
4468      *     <td><code>HORIZONTAL TABULATION</code></td></tr>
4469      * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
4470      *     <td><code>NEW LINE</code></td></tr>
4471      * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
4472      *     <td><code>FORM FEED</code></td></tr>
4473      * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
4474      *     <td><code>CARRIAGE RETURN</code></td></tr>
4475      * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
4476      *     <td><code>SPACE</code></td></tr>
4477      * </table>
4478      *
4479      * @param      ch   the character to be tested.
4480      * @return     <code>true</code> if the character is ISO-LATIN-1 white
4481      *             space; <code>false</code> otherwise.
4482      * @see        java.lang.Character#isSpaceChar(char)
4483      * @see        java.lang.Character#isWhitespace(char)
4484      * @deprecated Replaced by isWhitespace(char).
4485      */
4486     @Deprecated
4487     public static boolean isSpace(char ch) {
4488         return (ch <= 0x0020) &&
4489             (((((1L << 0x0009) |
4490             (1L << 0x000A) |
4491             (1L << 0x000C) |
4492             (1L << 0x000D) |
4493             (1L << 0x0020)) >> ch) & 1L) != 0);
4494     }
4495 
4496 
4497     /**
4498      * Determines if the specified character is a Unicode space character.
4499      * A character is considered to be a space character if and only if
4500      * it is specified to be a space character by the Unicode standard. This
4501      * method returns true if the character's general category type is any of
4502      * the following:
4503      * <ul>
4504      * <li> <code>SPACE_SEPARATOR</code>
4505      * <li> <code>LINE_SEPARATOR</code>
4506      * <li> <code>PARAGRAPH_SEPARATOR</code>
4507      * </ul>
4508      *
4509      * <p><b>Note:</b> This method cannot handle <a
4510      * href="#supplementary"> supplementary characters</a>. To support
4511      * all Unicode characters, including supplementary characters, use
4512      * the {@link #isSpaceChar(int)} method.
4513      *
4514      * @param   ch      the character to be tested.
4515      * @return  <code>true</code> if the character is a space character;
4516      *          <code>false</code> otherwise.
4517      * @see     java.lang.Character#isWhitespace(char)
4518      * @since   1.1
4519      */
4520     public static boolean isSpaceChar(char ch) {
4521         return isSpaceChar((int)ch);
4522     }
4523 
4524     /**
4525      * Determines if the specified character (Unicode code point) is a
4526      * Unicode space character.  A character is considered to be a
4527      * space character if and only if it is specified to be a space
4528      * character by the Unicode standard. This method returns true if
4529      * the character's general category type is any of the following:
4530      *
4531      * <ul>
4532      * <li> {@link #SPACE_SEPARATOR}
4533      * <li> {@link #LINE_SEPARATOR}
4534      * <li> {@link #PARAGRAPH_SEPARATOR}
4535      * </ul>
4536      *
4537      * @param   codePoint the character (Unicode code point) to be tested.
4538      * @return  <code>true</code> if the character is a space character;
4539      *          <code>false</code> otherwise.
4540      * @see     java.lang.Character#isWhitespace(int)
4541      * @since   1.5
4542      */
4543     public static boolean isSpaceChar(int codePoint) {
4544         return ((((1 << Character.SPACE_SEPARATOR) |
4545                   (1 << Character.LINE_SEPARATOR) |
4546                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
4547             != 0;
4548     }
4549 
4550     /**
4551      * Determines if the specified character is white space according to Java.
4552      * A character is a Java whitespace character if and only if it satisfies
4553      * one of the following criteria:
4554      * <ul>
4555      * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
4556      *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
4557      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4558      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4559      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4560      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4561      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4562      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4563      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4564      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4565      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4566      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4567      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4568      * </ul>
4569      *
4570      * <p><b>Note:</b> This method cannot handle <a
4571      * href="#supplementary"> supplementary characters</a>. To support
4572      * all Unicode characters, including supplementary characters, use
4573      * the {@link #isWhitespace(int)} method.
4574      *
4575      * @param   ch the character to be tested.
4576      * @return  <code>true</code> if the character is a Java whitespace
4577      *          character; <code>false</code> otherwise.
4578      * @see     java.lang.Character#isSpaceChar(char)
4579      * @since   1.1
4580      */
4581     public static boolean isWhitespace(char ch) {
4582         return isWhitespace((int)ch);
4583     }
4584 
4585     /**
4586      * Determines if the specified character (Unicode code point) is
4587      * white space according to Java.  A character is a Java
4588      * whitespace character if and only if it satisfies one of the
4589      * following criteria:
4590      * <ul>
4591      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
4592      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
4593      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
4594      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
4595      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
4596      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
4597      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
4598      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
4599      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
4600      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
4601      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
4602      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
4603      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
4604      * </ul>
4605      * <p>
4606      *
4607      * @param   codePoint the character (Unicode code point) to be tested.
4608      * @return  <code>true</code> if the character is a Java whitespace
4609      *          character; <code>false</code> otherwise.
4610      * @see     java.lang.Character#isSpaceChar(int)
4611      * @since   1.5
4612      */
4613     public static boolean isWhitespace(int codePoint) {
4614         return CharacterData.of(codePoint).isWhitespace(codePoint);
4615     }
4616 
4617     /**
4618      * Determines if the specified character is an ISO control
4619      * character.  A character is considered to be an ISO control
4620      * character if its code is in the range <code>'&#92;u0000'</code>
4621      * through <code>'&#92;u001F'</code> or in the range
4622      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4623      *
4624      * <p><b>Note:</b> This method cannot handle <a
4625      * href="#supplementary"> supplementary characters</a>. To support
4626      * all Unicode characters, including supplementary characters, use
4627      * the {@link #isISOControl(int)} method.
4628      *
4629      * @param   ch      the character to be tested.
4630      * @return  <code>true</code> if the character is an ISO control character;
4631      *          <code>false</code> otherwise.
4632      *
4633      * @see     java.lang.Character#isSpaceChar(char)
4634      * @see     java.lang.Character#isWhitespace(char)
4635      * @since   1.1
4636      */
4637     public static boolean isISOControl(char ch) {
4638         return isISOControl((int)ch);
4639     }
4640 
4641     /**
4642      * Determines if the referenced character (Unicode code point) is an ISO control
4643      * character.  A character is considered to be an ISO control
4644      * character if its code is in the range <code>'&#92;u0000'</code>
4645      * through <code>'&#92;u001F'</code> or in the range
4646      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
4647      *
4648      * @param   codePoint the character (Unicode code point) to be tested.
4649      * @return  <code>true</code> if the character is an ISO control character;
4650      *          <code>false</code> otherwise.
4651      * @see     java.lang.Character#isSpaceChar(int)
4652      * @see     java.lang.Character#isWhitespace(int)
4653      * @since   1.5
4654      */
4655     public static boolean isISOControl(int codePoint) {
4656         return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
4657             (codePoint >= 0x007F && codePoint <= 0x009F);
4658     }
4659 
4660     /**
4661      * Returns a value indicating a character's general category.
4662      *
4663      * <p><b>Note:</b> This method cannot handle <a
4664      * href="#supplementary"> supplementary characters</a>. To support
4665      * all Unicode characters, including supplementary characters, use
4666      * the {@link #getType(int)} method.
4667      *
4668      * @param   ch      the character to be tested.
4669      * @return  a value of type <code>int</code> representing the
4670      *          character's general category.
4671      * @see     java.lang.Character#COMBINING_SPACING_MARK
4672      * @see     java.lang.Character#CONNECTOR_PUNCTUATION
4673      * @see     java.lang.Character#CONTROL
4674      * @see     java.lang.Character#CURRENCY_SYMBOL
4675      * @see     java.lang.Character#DASH_PUNCTUATION
4676      * @see     java.lang.Character#DECIMAL_DIGIT_NUMBER
4677      * @see     java.lang.Character#ENCLOSING_MARK
4678      * @see     java.lang.Character#END_PUNCTUATION
4679      * @see     java.lang.Character#FINAL_QUOTE_PUNCTUATION
4680      * @see     java.lang.Character#FORMAT
4681      * @see     java.lang.Character#INITIAL_QUOTE_PUNCTUATION
4682      * @see     java.lang.Character#LETTER_NUMBER
4683      * @see     java.lang.Character#LINE_SEPARATOR
4684      * @see     java.lang.Character#LOWERCASE_LETTER
4685      * @see     java.lang.Character#MATH_SYMBOL
4686      * @see     java.lang.Character#MODIFIER_LETTER
4687      * @see     java.lang.Character#MODIFIER_SYMBOL
4688      * @see     java.lang.Character#NON_SPACING_MARK
4689      * @see     java.lang.Character#OTHER_LETTER
4690      * @see     java.lang.Character#OTHER_NUMBER
4691      * @see     java.lang.Character#OTHER_PUNCTUATION
4692      * @see     java.lang.Character#OTHER_SYMBOL
4693      * @see     java.lang.Character#PARAGRAPH_SEPARATOR
4694      * @see     java.lang.Character#PRIVATE_USE
4695      * @see     java.lang.Character#SPACE_SEPARATOR
4696      * @see     java.lang.Character#START_PUNCTUATION
4697      * @see     java.lang.Character#SURROGATE
4698      * @see     java.lang.Character#TITLECASE_LETTER
4699      * @see     java.lang.Character#UNASSIGNED
4700      * @see     java.lang.Character#UPPERCASE_LETTER
4701      * @since   1.1
4702      */
4703     public static int getType(char ch) {
4704         return getType((int)ch);
4705     }
4706 
4707     /**
4708      * Returns a value indicating a character's general category.
4709      *
4710      * @param   codePoint the character (Unicode code point) to be tested.
4711      * @return  a value of type <code>int</code> representing the
4712      *          character's general category.
4713      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
4714      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
4715      * @see     Character#CONTROL CONTROL
4716      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
4717      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
4718      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
4719      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
4720      * @see     Character#END_PUNCTUATION END_PUNCTUATION
4721      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
4722      * @see     Character#FORMAT FORMAT
4723      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
4724      * @see     Character#LETTER_NUMBER LETTER_NUMBER
4725      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
4726      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
4727      * @see     Character#MATH_SYMBOL MATH_SYMBOL
4728      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
4729      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
4730      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
4731      * @see     Character#OTHER_LETTER OTHER_LETTER
4732      * @see     Character#OTHER_NUMBER OTHER_NUMBER
4733      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
4734      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
4735      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
4736      * @see     Character#PRIVATE_USE PRIVATE_USE
4737      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
4738      * @see     Character#START_PUNCTUATION START_PUNCTUATION
4739      * @see     Character#SURROGATE SURROGATE
4740      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
4741      * @see     Character#UNASSIGNED UNASSIGNED
4742      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
4743      * @since   1.5
4744      */
4745     public static int getType(int codePoint) {
4746         return CharacterData.of(codePoint).getType(codePoint);
4747     }
4748 
4749     /**
4750      * Determines the character representation for a specific digit in
4751      * the specified radix. If the value of <code>radix</code> is not a
4752      * valid radix, or the value of <code>digit</code> is not a valid
4753      * digit in the specified radix, the null character
4754      * (<code>'&#92;u0000'</code>) is returned.
4755      * <p>
4756      * The <code>radix</code> argument is valid if it is greater than or
4757      * equal to <code>MIN_RADIX</code> and less than or equal to
4758      * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
4759      * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
4760      * <p>
4761      * If the digit is less than 10, then
4762      * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
4763      * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
4764      *
4765      * @param   digit   the number to convert to a character.
4766      * @param   radix   the radix.
4767      * @return  the <code>char</code> representation of the specified digit
4768      *          in the specified radix.
4769      * @see     java.lang.Character#MIN_RADIX
4770      * @see     java.lang.Character#MAX_RADIX
4771      * @see     java.lang.Character#digit(char, int)
4772      */
4773     public static char forDigit(int digit, int radix) {
4774         if ((digit >= radix) || (digit < 0)) {
4775             return '\0';
4776         }
4777         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
4778             return '\0';
4779         }
4780         if (digit < 10) {
4781             return (char)('0' + digit);
4782         }
4783         return (char)('a' - 10 + digit);
4784     }
4785 
4786     /**
4787      * Returns the Unicode directionality property for the given
4788      * character.  Character directionality is used to calculate the
4789      * visual ordering of text. The directionality value of undefined
4790      * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
4791      *
4792      * <p><b>Note:</b> This method cannot handle <a
4793      * href="#supplementary"> supplementary characters</a>. To support
4794      * all Unicode characters, including supplementary characters, use
4795      * the {@link #getDirectionality(int)} method.
4796      *
4797      * @param  ch <code>char</code> for which the directionality property
4798      *            is requested.
4799      * @return the directionality property of the <code>char</code> value.
4800      *
4801      * @see Character#DIRECTIONALITY_UNDEFINED
4802      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
4803      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
4804      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4805      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
4806      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4807      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4808      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
4809      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4810      * @see Character#DIRECTIONALITY_NONSPACING_MARK
4811      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
4812      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
4813      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
4814      * @see Character#DIRECTIONALITY_WHITESPACE
4815      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
4816      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4817      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4818      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4819      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4820      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4821      * @since 1.4
4822      */
4823     public static byte getDirectionality(char ch) {
4824         return getDirectionality((int)ch);
4825     }
4826 
4827     /**
4828      * Returns the Unicode directionality property for the given
4829      * character (Unicode code point).  Character directionality is
4830      * used to calculate the visual ordering of text. The
4831      * directionality value of undefined character is {@link
4832      * #DIRECTIONALITY_UNDEFINED}.
4833      *
4834      * @param   codePoint the character (Unicode code point) for which
4835      *          the directionality property is requested.
4836      * @return the directionality property of the character.
4837      *
4838      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
4839      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
4840      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
4841      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4842      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
4843      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4844      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4845      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
4846      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4847      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
4848      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
4849      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
4850      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
4851      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
4852      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
4853      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4854      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4855      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4856      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4857      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4858      * @since    1.5
4859      */
4860     public static byte getDirectionality(int codePoint) {
4861         return CharacterData.of(codePoint).getDirectionality(codePoint);
4862     }
4863 
4864     /**
4865      * Determines whether the character is mirrored according to the
4866      * Unicode specification.  Mirrored characters should have their
4867      * glyphs horizontally mirrored when displayed in text that is
4868      * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
4869      * PARENTHESIS is semantically defined to be an <i>opening
4870      * parenthesis</i>.  This will appear as a "(" in text that is
4871      * left-to-right but as a ")" in text that is right-to-left.
4872      *
4873      * <p><b>Note:</b> This method cannot handle <a
4874      * href="#supplementary"> supplementary characters</a>. To support
4875      * all Unicode characters, including supplementary characters, use
4876      * the {@link #isMirrored(int)} method.
4877      *
4878      * @param  ch <code>char</code> for which the mirrored property is requested
4879      * @return <code>true</code> if the char is mirrored, <code>false</code>
4880      *         if the <code>char</code> is not mirrored or is not defined.
4881      * @since 1.4
4882      */
4883     public static boolean isMirrored(char ch) {
4884         return isMirrored((int)ch);
4885     }
4886 
4887     /**
4888      * Determines whether the specified character (Unicode code point)
4889      * is mirrored according to the Unicode specification.  Mirrored
4890      * characters should have their glyphs horizontally mirrored when
4891      * displayed in text that is right-to-left.  For example,
4892      * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
4893      * defined to be an <i>opening parenthesis</i>.  This will appear
4894      * as a "(" in text that is left-to-right but as a ")" in text
4895      * that is right-to-left.
4896      *
4897      * @param   codePoint the character (Unicode code point) to be tested.
4898      * @return  <code>true</code> if the character is mirrored, <code>false</code>
4899      *          if the character is not mirrored or is not defined.
4900      * @since   1.5
4901      */
4902     public static boolean isMirrored(int codePoint) {
4903         return CharacterData.of(codePoint).isMirrored(codePoint);
4904     }
4905 
4906     /**
4907      * Compares two <code>Character</code> objects numerically.
4908      *
4909      * @param   anotherCharacter   the <code>Character</code> to be compared.
4910 
4911      * @return  the value <code>0</code> if the argument <code>Character</code>
4912      *          is equal to this <code>Character</code>; a value less than
4913      *          <code>0</code> if this <code>Character</code> is numerically less
4914      *          than the <code>Character</code> argument; and a value greater than
4915      *          <code>0</code> if this <code>Character</code> is numerically greater
4916      *          than the <code>Character</code> argument (unsigned comparison).
4917      *          Note that this is strictly a numerical comparison; it is not
4918      *          locale-dependent.
4919      * @since   1.2
4920      */
4921     public int compareTo(Character anotherCharacter) {
4922         return this.value - anotherCharacter.value;
4923     }
4924 
4925     /**
4926      * Converts the character (Unicode code point) argument to uppercase using
4927      * information from the UnicodeData file.
4928      * <p>
4929      *
4930      * @param   codePoint   the character (Unicode code point) to be converted.
4931      * @return  either the uppercase equivalent of the character, if
4932      *          any, or an error flag (<code>Character.ERROR</code>)
4933      *          that indicates that a 1:M <code>char</code> mapping exists.
4934      * @see     java.lang.Character#isLowerCase(char)
4935      * @see     java.lang.Character#isUpperCase(char)
4936      * @see     java.lang.Character#toLowerCase(char)
4937      * @see     java.lang.Character#toTitleCase(char)
4938      * @since 1.4
4939      */
4940     static int toUpperCaseEx(int codePoint) {
4941         assert isValidCodePoint(codePoint);
4942         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
4943     }
4944 
4945     /**
4946      * Converts the character (Unicode code point) argument to uppercase using case
4947      * mapping information from the SpecialCasing file in the Unicode
4948      * specification. If a character has no explicit uppercase
4949      * mapping, then the <code>char</code> itself is returned in the
4950      * <code>char[]</code>.
4951      *
4952      * @param   codePoint   the character (Unicode code point) to be converted.
4953      * @return a <code>char[]</code> with the uppercased character.
4954      * @since 1.4
4955      */
4956     static char[] toUpperCaseCharArray(int codePoint) {
4957         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
4958         assert isValidCodePoint(codePoint) &&
4959                !isSupplementaryCodePoint(codePoint);
4960         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
4961     }
4962 
4963     /**
4964      * The number of bits used to represent a <tt>char</tt> value in unsigned
4965      * binary form.
4966      *
4967      * @since 1.5
4968      */
4969     public static final int SIZE = 16;
4970 
4971     /**
4972      * Returns the value obtained by reversing the order of the bytes in the
4973      * specified <tt>char</tt> value.
4974      *
4975      * @return the value obtained by reversing (or, equivalently, swapping)
4976      *     the bytes in the specified <tt>char</tt> value.
4977      * @since 1.5
4978      */
4979     public static char reverseBytes(char ch) {
4980         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
4981     }
4982 }