1 /*
   2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The <code>Character</code> class wraps a value of the primitive
  35  * type <code>char</code> in an object. An object of type
  36  * <code>Character</code> contains a single field whose type is
  37  * <code>char</code>.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.0.0.
  44  * <p>
  45  * The methods and data of class <code>Character</code> are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  58  *
  59  * <p>The <code>char</code> data type (and therefore the value that a
  60  * <code>Character</code> object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in <code>char</code> arrays and
  76  * in the <code>String</code> and <code>StringBuffer</code> classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of <code>char</code> values, the first from the <em>high-surrogates</em>
  79  * range, (&#92;uD800-&#92;uDBFF), the second from the
  80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
  81  *
  82  * <p>A <code>char</code> value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * <code>int</code> value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of <code>int</code> are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate <code>char</code> values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a <code>char</code> value cannot support
  95  * supplementary characters. They treat <code>char</code> values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an <code>int</code> value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, <code>Character.isLetter(0x2F81A)</code> returns
 104  * <code>true</code> because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * <code>char</code> values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * <code>digit</code> method, the <code>forDigit</code>
 129      * method, and the <code>toString</code> method of class
 130      * <code>Integer</code>.
 131      *
 132      * @see     Character#digit(char, int)
 133      * @see     Character#forDigit(int, int)
 134      * @see     Integer#toString(int, int)
 135      * @see     Integer#valueOf(String)
 136      */
 137     public static final int MIN_RADIX = 2;
 138 
 139     /**
 140      * The maximum radix available for conversion to and from strings.
 141      * The constant value of this field is the largest value permitted
 142      * for the radix argument in radix-conversion methods such as the
 143      * <code>digit</code> method, the <code>forDigit</code>
 144      * method, and the <code>toString</code> method of class
 145      * <code>Integer</code>.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * <code>char</code>, <code>'&#92;u0000'</code>.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * <code>char</code>, <code>'&#92;uFFFF'</code>.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The <code>Class</code> instance representing the primitive type
 172      * <code>char</code>.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined <code>char</code>
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * The minimum value of a
 496      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 497      * Unicode high-surrogate code unit</a>
 498      * in the UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 499      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 500      *
 501      * @since 1.5
 502      */
 503     public static final char MIN_HIGH_SURROGATE = '\uD800';
 504 
 505     /**
 506      * The maximum value of a
 507      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 508      * Unicode high-surrogate code unit</a>
 509      * in the UTF-16 encoding, constant <code>'&#92;uDBFF'</code>.
 510      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 511      *
 512      * @since 1.5
 513      */
 514     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 515 
 516     /**
 517      * The minimum value of a
 518      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 519      * Unicode low-surrogate code unit</a>
 520      * in the UTF-16 encoding, constant <code>'&#92;uDC00'</code>.
 521      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 522      *
 523      * @since 1.5
 524      */
 525     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 526 
 527     /**
 528      * The maximum value of a
 529      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 530      * Unicode low-surrogate code unit</a>
 531      * in the UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 532      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 533      *
 534      * @since 1.5
 535      */
 536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 537 
 538     /**
 539      * The minimum value of a Unicode surrogate code unit in the
 540      * UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 541      *
 542      * @since 1.5
 543      */
 544     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 545 
 546     /**
 547      * The maximum value of a Unicode surrogate code unit in the
 548      * UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 549      *
 550      * @since 1.5
 551      */
 552     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 553 
 554     /**
 555      * The minimum value of a
 556      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 557      * Unicode supplementary code point</a>, constant {@code U+10000}.
 558      *
 559      * @since 1.5
 560      */
 561     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 562 
 563     /**
 564      * The minimum value of a
 565      * <a href="http://www.unicode.org/glossary/#code_point">
 566      * Unicode code point</a>, constant {@code U+0000}.
 567      *
 568      * @since 1.5
 569      */
 570     public static final int MIN_CODE_POINT = 0x000000;
 571 
 572     /**
 573      * The maximum value of a
 574      * <a href="http://www.unicode.org/glossary/#code_point">
 575      * Unicode code point</a>, constant {@code U+10FFFF}.
 576      *
 577      * @since 1.5
 578      */
 579     public static final int MAX_CODE_POINT = 0X10FFFF;
 580 
 581 
 582     /**
 583      * Instances of this class represent particular subsets of the Unicode
 584      * character set.  The only family of subsets defined in the
 585      * <code>Character</code> class is {@link Character.UnicodeBlock}.
 586      * Other portions of the Java API may define other subsets for their
 587      * own purposes.
 588      *
 589      * @since 1.2
 590      */
 591     public static class Subset  {
 592 
 593         private String name;
 594 
 595         /**
 596          * Constructs a new <code>Subset</code> instance.
 597          *
 598          * @param  name  The name of this subset
 599          * @exception NullPointerException if name is <code>null</code>
 600          */
 601         protected Subset(String name) {
 602             if (name == null) {
 603                 throw new NullPointerException("name");
 604             }
 605             this.name = name;
 606         }
 607 
 608         /**
 609          * Compares two <code>Subset</code> objects for equality.
 610          * This method returns <code>true</code> if and only if
 611          * <code>this</code> and the argument refer to the same
 612          * object; since this method is <code>final</code>, this
 613          * guarantee holds for all subclasses.
 614          */
 615         public final boolean equals(Object obj) {
 616             return (this == obj);
 617         }
 618 
 619         /**
 620          * Returns the standard hash code as defined by the
 621          * <code>{@link Object#hashCode}</code> method.  This method
 622          * is <code>final</code> in order to ensure that the
 623          * <code>equals</code> and <code>hashCode</code> methods will
 624          * be consistent in all subclasses.
 625          */
 626         public final int hashCode() {
 627             return super.hashCode();
 628         }
 629 
 630         /**
 631          * Returns the name of this subset.
 632          */
 633         public final String toString() {
 634             return name;
 635         }
 636     }
 637 
 638     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 639     // for the latest specification of Unicode Blocks.
 640 
 641     /**
 642      * A family of character subsets representing the character blocks in the
 643      * Unicode specification. Character blocks generally define characters
 644      * used for a specific script or purpose. A character is contained by
 645      * at most one Unicode block.
 646      *
 647      * @since 1.2
 648      */
 649     public static final class UnicodeBlock extends Subset {
 650 
 651         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 652 
 653         /**
 654          * Creates a UnicodeBlock with the given identifier name.
 655          * This name must be the same as the block identifier.
 656          */
 657         private UnicodeBlock(String idName) {
 658             super(idName);
 659             map.put(idName, this);
 660         }
 661 
 662         /**
 663          * Creates a UnicodeBlock with the given identifier name and
 664          * alias name.
 665          */
 666         private UnicodeBlock(String idName, String alias) {
 667             this(idName);
 668             map.put(alias, this);
 669         }
 670 
 671         /**
 672          * Creates a UnicodeBlock with the given identifier name and
 673          * alias names.
 674          */
 675         private UnicodeBlock(String idName, String... aliases) {
 676             this(idName);
 677             for (String alias : aliases)
 678                 map.put(alias, this);
 679         }
 680 
 681         /**
 682          * Constant for the "Basic Latin" Unicode character block.
 683          * @since 1.2
 684          */
 685         public static final UnicodeBlock  BASIC_LATIN =
 686             new UnicodeBlock("BASIC_LATIN",
 687                              "BASIC LATIN",
 688                              "BASICLATIN");
 689 
 690         /**
 691          * Constant for the "Latin-1 Supplement" Unicode character block.
 692          * @since 1.2
 693          */
 694         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 695             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 696                              "LATIN-1 SUPPLEMENT",
 697                              "LATIN-1SUPPLEMENT");
 698 
 699         /**
 700          * Constant for the "Latin Extended-A" Unicode character block.
 701          * @since 1.2
 702          */
 703         public static final UnicodeBlock LATIN_EXTENDED_A =
 704             new UnicodeBlock("LATIN_EXTENDED_A",
 705                              "LATIN EXTENDED-A",
 706                              "LATINEXTENDED-A");
 707 
 708         /**
 709          * Constant for the "Latin Extended-B" Unicode character block.
 710          * @since 1.2
 711          */
 712         public static final UnicodeBlock LATIN_EXTENDED_B =
 713             new UnicodeBlock("LATIN_EXTENDED_B",
 714                              "LATIN EXTENDED-B",
 715                              "LATINEXTENDED-B");
 716 
 717         /**
 718          * Constant for the "IPA Extensions" Unicode character block.
 719          * @since 1.2
 720          */
 721         public static final UnicodeBlock IPA_EXTENSIONS =
 722             new UnicodeBlock("IPA_EXTENSIONS",
 723                              "IPA EXTENSIONS",
 724                              "IPAEXTENSIONS");
 725 
 726         /**
 727          * Constant for the "Spacing Modifier Letters" Unicode character block.
 728          * @since 1.2
 729          */
 730         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 731             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 732                              "SPACING MODIFIER LETTERS",
 733                              "SPACINGMODIFIERLETTERS");
 734 
 735         /**
 736          * Constant for the "Combining Diacritical Marks" Unicode character block.
 737          * @since 1.2
 738          */
 739         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 740             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 741                              "COMBINING DIACRITICAL MARKS",
 742                              "COMBININGDIACRITICALMARKS");
 743 
 744         /**
 745          * Constant for the "Greek and Coptic" Unicode character block.
 746          * <p>
 747          * This block was previously known as the "Greek" block.
 748          *
 749          * @since 1.2
 750          */
 751         public static final UnicodeBlock GREEK =
 752             new UnicodeBlock("GREEK",
 753                              "GREEK AND COPTIC",
 754                              "GREEKANDCOPTIC");
 755 
 756         /**
 757          * Constant for the "Cyrillic" Unicode character block.
 758          * @since 1.2
 759          */
 760         public static final UnicodeBlock CYRILLIC =
 761             new UnicodeBlock("CYRILLIC");
 762 
 763         /**
 764          * Constant for the "Armenian" Unicode character block.
 765          * @since 1.2
 766          */
 767         public static final UnicodeBlock ARMENIAN =
 768             new UnicodeBlock("ARMENIAN");
 769 
 770         /**
 771          * Constant for the "Hebrew" Unicode character block.
 772          * @since 1.2
 773          */
 774         public static final UnicodeBlock HEBREW =
 775             new UnicodeBlock("HEBREW");
 776 
 777         /**
 778          * Constant for the "Arabic" Unicode character block.
 779          * @since 1.2
 780          */
 781         public static final UnicodeBlock ARABIC =
 782             new UnicodeBlock("ARABIC");
 783 
 784         /**
 785          * Constant for the "Devanagari" Unicode character block.
 786          * @since 1.2
 787          */
 788         public static final UnicodeBlock DEVANAGARI =
 789             new UnicodeBlock("DEVANAGARI");
 790 
 791         /**
 792          * Constant for the "Bengali" Unicode character block.
 793          * @since 1.2
 794          */
 795         public static final UnicodeBlock BENGALI =
 796             new UnicodeBlock("BENGALI");
 797 
 798         /**
 799          * Constant for the "Gurmukhi" Unicode character block.
 800          * @since 1.2
 801          */
 802         public static final UnicodeBlock GURMUKHI =
 803             new UnicodeBlock("GURMUKHI");
 804 
 805         /**
 806          * Constant for the "Gujarati" Unicode character block.
 807          * @since 1.2
 808          */
 809         public static final UnicodeBlock GUJARATI =
 810             new UnicodeBlock("GUJARATI");
 811 
 812         /**
 813          * Constant for the "Oriya" Unicode character block.
 814          * @since 1.2
 815          */
 816         public static final UnicodeBlock ORIYA =
 817             new UnicodeBlock("ORIYA");
 818 
 819         /**
 820          * Constant for the "Tamil" Unicode character block.
 821          * @since 1.2
 822          */
 823         public static final UnicodeBlock TAMIL =
 824             new UnicodeBlock("TAMIL");
 825 
 826         /**
 827          * Constant for the "Telugu" Unicode character block.
 828          * @since 1.2
 829          */
 830         public static final UnicodeBlock TELUGU =
 831             new UnicodeBlock("TELUGU");
 832 
 833         /**
 834          * Constant for the "Kannada" Unicode character block.
 835          * @since 1.2
 836          */
 837         public static final UnicodeBlock KANNADA =
 838             new UnicodeBlock("KANNADA");
 839 
 840         /**
 841          * Constant for the "Malayalam" Unicode character block.
 842          * @since 1.2
 843          */
 844         public static final UnicodeBlock MALAYALAM =
 845             new UnicodeBlock("MALAYALAM");
 846 
 847         /**
 848          * Constant for the "Thai" Unicode character block.
 849          * @since 1.2
 850          */
 851         public static final UnicodeBlock THAI =
 852             new UnicodeBlock("THAI");
 853 
 854         /**
 855          * Constant for the "Lao" Unicode character block.
 856          * @since 1.2
 857          */
 858         public static final UnicodeBlock LAO =
 859             new UnicodeBlock("LAO");
 860 
 861         /**
 862          * Constant for the "Tibetan" Unicode character block.
 863          * @since 1.2
 864          */
 865         public static final UnicodeBlock TIBETAN =
 866             new UnicodeBlock("TIBETAN");
 867 
 868         /**
 869          * Constant for the "Georgian" Unicode character block.
 870          * @since 1.2
 871          */
 872         public static final UnicodeBlock GEORGIAN =
 873             new UnicodeBlock("GEORGIAN");
 874 
 875         /**
 876          * Constant for the "Hangul Jamo" Unicode character block.
 877          * @since 1.2
 878          */
 879         public static final UnicodeBlock HANGUL_JAMO =
 880             new UnicodeBlock("HANGUL_JAMO",
 881                              "HANGUL JAMO",
 882                              "HANGULJAMO");
 883 
 884         /**
 885          * Constant for the "Latin Extended Additional" Unicode character block.
 886          * @since 1.2
 887          */
 888         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 889             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 890                              "LATIN EXTENDED ADDITIONAL",
 891                              "LATINEXTENDEDADDITIONAL");
 892 
 893         /**
 894          * Constant for the "Greek Extended" Unicode character block.
 895          * @since 1.2
 896          */
 897         public static final UnicodeBlock GREEK_EXTENDED =
 898             new UnicodeBlock("GREEK_EXTENDED",
 899                              "GREEK EXTENDED",
 900                              "GREEKEXTENDED");
 901 
 902         /**
 903          * Constant for the "General Punctuation" Unicode character block.
 904          * @since 1.2
 905          */
 906         public static final UnicodeBlock GENERAL_PUNCTUATION =
 907             new UnicodeBlock("GENERAL_PUNCTUATION",
 908                              "GENERAL PUNCTUATION",
 909                              "GENERALPUNCTUATION");
 910 
 911         /**
 912          * Constant for the "Superscripts and Subscripts" Unicode character
 913          * block.
 914          * @since 1.2
 915          */
 916         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 917             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 918                              "SUPERSCRIPTS AND SUBSCRIPTS",
 919                              "SUPERSCRIPTSANDSUBSCRIPTS");
 920 
 921         /**
 922          * Constant for the "Currency Symbols" Unicode character block.
 923          * @since 1.2
 924          */
 925         public static final UnicodeBlock CURRENCY_SYMBOLS =
 926             new UnicodeBlock("CURRENCY_SYMBOLS",
 927                              "CURRENCY SYMBOLS",
 928                              "CURRENCYSYMBOLS");
 929 
 930         /**
 931          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 932          * character block.
 933          * <p>
 934          * This block was previously known as "Combining Marks for Symbols".
 935          * @since 1.2
 936          */
 937         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 938             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 939                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 940                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 941                              "COMBINING MARKS FOR SYMBOLS",
 942                              "COMBININGMARKSFORSYMBOLS");
 943 
 944         /**
 945          * Constant for the "Letterlike Symbols" Unicode character block.
 946          * @since 1.2
 947          */
 948         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 949             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 950                              "LETTERLIKE SYMBOLS",
 951                              "LETTERLIKESYMBOLS");
 952 
 953         /**
 954          * Constant for the "Number Forms" Unicode character block.
 955          * @since 1.2
 956          */
 957         public static final UnicodeBlock NUMBER_FORMS =
 958             new UnicodeBlock("NUMBER_FORMS",
 959                              "NUMBER FORMS",
 960                              "NUMBERFORMS");
 961 
 962         /**
 963          * Constant for the "Arrows" Unicode character block.
 964          * @since 1.2
 965          */
 966         public static final UnicodeBlock ARROWS =
 967             new UnicodeBlock("ARROWS");
 968 
 969         /**
 970          * Constant for the "Mathematical Operators" Unicode character block.
 971          * @since 1.2
 972          */
 973         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 974             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 975                              "MATHEMATICAL OPERATORS",
 976                              "MATHEMATICALOPERATORS");
 977 
 978         /**
 979          * Constant for the "Miscellaneous Technical" Unicode character block.
 980          * @since 1.2
 981          */
 982         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 983             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 984                              "MISCELLANEOUS TECHNICAL",
 985                              "MISCELLANEOUSTECHNICAL");
 986 
 987         /**
 988          * Constant for the "Control Pictures" Unicode character block.
 989          * @since 1.2
 990          */
 991         public static final UnicodeBlock CONTROL_PICTURES =
 992             new UnicodeBlock("CONTROL_PICTURES",
 993                              "CONTROL PICTURES",
 994                              "CONTROLPICTURES");
 995 
 996         /**
 997          * Constant for the "Optical Character Recognition" Unicode character block.
 998          * @since 1.2
 999          */
1000         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1001             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1002                              "OPTICAL CHARACTER RECOGNITION",
1003                              "OPTICALCHARACTERRECOGNITION");
1004 
1005         /**
1006          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1007          * @since 1.2
1008          */
1009         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1010             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1011                              "ENCLOSED ALPHANUMERICS",
1012                              "ENCLOSEDALPHANUMERICS");
1013 
1014         /**
1015          * Constant for the "Box Drawing" Unicode character block.
1016          * @since 1.2
1017          */
1018         public static final UnicodeBlock BOX_DRAWING =
1019             new UnicodeBlock("BOX_DRAWING",
1020                              "BOX DRAWING",
1021                              "BOXDRAWING");
1022 
1023         /**
1024          * Constant for the "Block Elements" Unicode character block.
1025          * @since 1.2
1026          */
1027         public static final UnicodeBlock BLOCK_ELEMENTS =
1028             new UnicodeBlock("BLOCK_ELEMENTS",
1029                              "BLOCK ELEMENTS",
1030                              "BLOCKELEMENTS");
1031 
1032         /**
1033          * Constant for the "Geometric Shapes" Unicode character block.
1034          * @since 1.2
1035          */
1036         public static final UnicodeBlock GEOMETRIC_SHAPES =
1037             new UnicodeBlock("GEOMETRIC_SHAPES",
1038                              "GEOMETRIC SHAPES",
1039                              "GEOMETRICSHAPES");
1040 
1041         /**
1042          * Constant for the "Miscellaneous Symbols" Unicode character block.
1043          * @since 1.2
1044          */
1045         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1046             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1047                              "MISCELLANEOUS SYMBOLS",
1048                              "MISCELLANEOUSSYMBOLS");
1049 
1050         /**
1051          * Constant for the "Dingbats" Unicode character block.
1052          * @since 1.2
1053          */
1054         public static final UnicodeBlock DINGBATS =
1055             new UnicodeBlock("DINGBATS");
1056 
1057         /**
1058          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1059          * @since 1.2
1060          */
1061         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1062             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1063                              "CJK SYMBOLS AND PUNCTUATION",
1064                              "CJKSYMBOLSANDPUNCTUATION");
1065 
1066         /**
1067          * Constant for the "Hiragana" Unicode character block.
1068          * @since 1.2
1069          */
1070         public static final UnicodeBlock HIRAGANA =
1071             new UnicodeBlock("HIRAGANA");
1072 
1073         /**
1074          * Constant for the "Katakana" Unicode character block.
1075          * @since 1.2
1076          */
1077         public static final UnicodeBlock KATAKANA =
1078             new UnicodeBlock("KATAKANA");
1079 
1080         /**
1081          * Constant for the "Bopomofo" Unicode character block.
1082          * @since 1.2
1083          */
1084         public static final UnicodeBlock BOPOMOFO =
1085             new UnicodeBlock("BOPOMOFO");
1086 
1087         /**
1088          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1089          * @since 1.2
1090          */
1091         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1092             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1093                              "HANGUL COMPATIBILITY JAMO",
1094                              "HANGULCOMPATIBILITYJAMO");
1095 
1096         /**
1097          * Constant for the "Kanbun" Unicode character block.
1098          * @since 1.2
1099          */
1100         public static final UnicodeBlock KANBUN =
1101             new UnicodeBlock("KANBUN");
1102 
1103         /**
1104          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1105          * @since 1.2
1106          */
1107         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1108             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1109                              "ENCLOSED CJK LETTERS AND MONTHS",
1110                              "ENCLOSEDCJKLETTERSANDMONTHS");
1111 
1112         /**
1113          * Constant for the "CJK Compatibility" Unicode character block.
1114          * @since 1.2
1115          */
1116         public static final UnicodeBlock CJK_COMPATIBILITY =
1117             new UnicodeBlock("CJK_COMPATIBILITY",
1118                              "CJK COMPATIBILITY",
1119                              "CJKCOMPATIBILITY");
1120 
1121         /**
1122          * Constant for the "CJK Unified Ideographs" Unicode character block.
1123          * @since 1.2
1124          */
1125         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1126             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1127                              "CJK UNIFIED IDEOGRAPHS",
1128                              "CJKUNIFIEDIDEOGRAPHS");
1129 
1130         /**
1131          * Constant for the "Hangul Syllables" Unicode character block.
1132          * @since 1.2
1133          */
1134         public static final UnicodeBlock HANGUL_SYLLABLES =
1135             new UnicodeBlock("HANGUL_SYLLABLES",
1136                              "HANGUL SYLLABLES",
1137                              "HANGULSYLLABLES");
1138 
1139         /**
1140          * Constant for the "Private Use Area" Unicode character block.
1141          * @since 1.2
1142          */
1143         public static final UnicodeBlock PRIVATE_USE_AREA =
1144             new UnicodeBlock("PRIVATE_USE_AREA",
1145                              "PRIVATE USE AREA",
1146                              "PRIVATEUSEAREA");
1147 
1148         /**
1149          * Constant for the "CJK Compatibility Ideographs" Unicode character
1150          * block.
1151          * @since 1.2
1152          */
1153         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1154             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1155                              "CJK COMPATIBILITY IDEOGRAPHS",
1156                              "CJKCOMPATIBILITYIDEOGRAPHS");
1157 
1158         /**
1159          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1160          * @since 1.2
1161          */
1162         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1163             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1164                              "ALPHABETIC PRESENTATION FORMS",
1165                              "ALPHABETICPRESENTATIONFORMS");
1166 
1167         /**
1168          * Constant for the "Arabic Presentation Forms-A" Unicode character
1169          * block.
1170          * @since 1.2
1171          */
1172         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1173             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1174                              "ARABIC PRESENTATION FORMS-A",
1175                              "ARABICPRESENTATIONFORMS-A");
1176 
1177         /**
1178          * Constant for the "Combining Half Marks" Unicode character block.
1179          * @since 1.2
1180          */
1181         public static final UnicodeBlock COMBINING_HALF_MARKS =
1182             new UnicodeBlock("COMBINING_HALF_MARKS",
1183                              "COMBINING HALF MARKS",
1184                              "COMBININGHALFMARKS");
1185 
1186         /**
1187          * Constant for the "CJK Compatibility Forms" Unicode character block.
1188          * @since 1.2
1189          */
1190         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1191             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1192                              "CJK COMPATIBILITY FORMS",
1193                              "CJKCOMPATIBILITYFORMS");
1194 
1195         /**
1196          * Constant for the "Small Form Variants" Unicode character block.
1197          * @since 1.2
1198          */
1199         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1200             new UnicodeBlock("SMALL_FORM_VARIANTS",
1201                              "SMALL FORM VARIANTS",
1202                              "SMALLFORMVARIANTS");
1203 
1204         /**
1205          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1206          * @since 1.2
1207          */
1208         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1209             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1210                              "ARABIC PRESENTATION FORMS-B",
1211                              "ARABICPRESENTATIONFORMS-B");
1212 
1213         /**
1214          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1215          * block.
1216          * @since 1.2
1217          */
1218         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1219             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1220                              "HALFWIDTH AND FULLWIDTH FORMS",
1221                              "HALFWIDTHANDFULLWIDTHFORMS");
1222 
1223         /**
1224          * Constant for the "Specials" Unicode character block.
1225          * @since 1.2
1226          */
1227         public static final UnicodeBlock SPECIALS =
1228             new UnicodeBlock("SPECIALS");
1229 
1230         /**
1231          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1232          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1233          *             {@link #LOW_SURROGATES}. These new constants match
1234          *             the block definitions of the Unicode Standard.
1235          *             The {@link #of(char)} and {@link #of(int)} methods
1236          *             return the new constants, not SURROGATES_AREA.
1237          */
1238         @Deprecated
1239         public static final UnicodeBlock SURROGATES_AREA =
1240             new UnicodeBlock("SURROGATES_AREA");
1241 
1242         /**
1243          * Constant for the "Syriac" Unicode character block.
1244          * @since 1.4
1245          */
1246         public static final UnicodeBlock SYRIAC =
1247             new UnicodeBlock("SYRIAC");
1248 
1249         /**
1250          * Constant for the "Thaana" Unicode character block.
1251          * @since 1.4
1252          */
1253         public static final UnicodeBlock THAANA =
1254             new UnicodeBlock("THAANA");
1255 
1256         /**
1257          * Constant for the "Sinhala" Unicode character block.
1258          * @since 1.4
1259          */
1260         public static final UnicodeBlock SINHALA =
1261             new UnicodeBlock("SINHALA");
1262 
1263         /**
1264          * Constant for the "Myanmar" Unicode character block.
1265          * @since 1.4
1266          */
1267         public static final UnicodeBlock MYANMAR =
1268             new UnicodeBlock("MYANMAR");
1269 
1270         /**
1271          * Constant for the "Ethiopic" Unicode character block.
1272          * @since 1.4
1273          */
1274         public static final UnicodeBlock ETHIOPIC =
1275             new UnicodeBlock("ETHIOPIC");
1276 
1277         /**
1278          * Constant for the "Cherokee" Unicode character block.
1279          * @since 1.4
1280          */
1281         public static final UnicodeBlock CHEROKEE =
1282             new UnicodeBlock("CHEROKEE");
1283 
1284         /**
1285          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1289             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1290                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1291                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1292 
1293         /**
1294          * Constant for the "Ogham" Unicode character block.
1295          * @since 1.4
1296          */
1297         public static final UnicodeBlock OGHAM =
1298             new UnicodeBlock("OGHAM");
1299 
1300         /**
1301          * Constant for the "Runic" Unicode character block.
1302          * @since 1.4
1303          */
1304         public static final UnicodeBlock RUNIC =
1305             new UnicodeBlock("RUNIC");
1306 
1307         /**
1308          * Constant for the "Khmer" Unicode character block.
1309          * @since 1.4
1310          */
1311         public static final UnicodeBlock KHMER =
1312             new UnicodeBlock("KHMER");
1313 
1314         /**
1315          * Constant for the "Mongolian" Unicode character block.
1316          * @since 1.4
1317          */
1318         public static final UnicodeBlock MONGOLIAN =
1319             new UnicodeBlock("MONGOLIAN");
1320 
1321         /**
1322          * Constant for the "Braille Patterns" Unicode character block.
1323          * @since 1.4
1324          */
1325         public static final UnicodeBlock BRAILLE_PATTERNS =
1326             new UnicodeBlock("BRAILLE_PATTERNS",
1327                              "BRAILLE PATTERNS",
1328                              "BRAILLEPATTERNS");
1329 
1330         /**
1331          * Constant for the "CJK Radicals Supplement" Unicode character block.
1332          * @since 1.4
1333          */
1334         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1335             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1336                              "CJK RADICALS SUPPLEMENT",
1337                              "CJKRADICALSSUPPLEMENT");
1338 
1339         /**
1340          * Constant for the "Kangxi Radicals" Unicode character block.
1341          * @since 1.4
1342          */
1343         public static final UnicodeBlock KANGXI_RADICALS =
1344             new UnicodeBlock("KANGXI_RADICALS",
1345                              "KANGXI RADICALS",
1346                              "KANGXIRADICALS");
1347 
1348         /**
1349          * Constant for the "Ideographic Description Characters" Unicode character block.
1350          * @since 1.4
1351          */
1352         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1353             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1354                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1355                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1356 
1357         /**
1358          * Constant for the "Bopomofo Extended" Unicode character block.
1359          * @since 1.4
1360          */
1361         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1362             new UnicodeBlock("BOPOMOFO_EXTENDED",
1363                              "BOPOMOFO EXTENDED",
1364                              "BOPOMOFOEXTENDED");
1365 
1366         /**
1367          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1368          * @since 1.4
1369          */
1370         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1371             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1372                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1373                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1374 
1375         /**
1376          * Constant for the "Yi Syllables" Unicode character block.
1377          * @since 1.4
1378          */
1379         public static final UnicodeBlock YI_SYLLABLES =
1380             new UnicodeBlock("YI_SYLLABLES",
1381                              "YI SYLLABLES",
1382                              "YISYLLABLES");
1383 
1384         /**
1385          * Constant for the "Yi Radicals" Unicode character block.
1386          * @since 1.4
1387          */
1388         public static final UnicodeBlock YI_RADICALS =
1389             new UnicodeBlock("YI_RADICALS",
1390                              "YI RADICALS",
1391                              "YIRADICALS");
1392 
1393         /**
1394          * Constant for the "Cyrillic Supplementary" Unicode character block.
1395          * @since 1.5
1396          */
1397         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1398             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1399                              "CYRILLIC SUPPLEMENTARY",
1400                              "CYRILLICSUPPLEMENTARY",
1401                              "CYRILLIC SUPPLEMENT",
1402                              "CYRILLICSUPPLEMENT");
1403 
1404         /**
1405          * Constant for the "Tagalog" Unicode character block.
1406          * @since 1.5
1407          */
1408         public static final UnicodeBlock TAGALOG =
1409             new UnicodeBlock("TAGALOG");
1410 
1411         /**
1412          * Constant for the "Hanunoo" Unicode character block.
1413          * @since 1.5
1414          */
1415         public static final UnicodeBlock HANUNOO =
1416             new UnicodeBlock("HANUNOO");
1417 
1418         /**
1419          * Constant for the "Buhid" Unicode character block.
1420          * @since 1.5
1421          */
1422         public static final UnicodeBlock BUHID =
1423             new UnicodeBlock("BUHID");
1424 
1425         /**
1426          * Constant for the "Tagbanwa" Unicode character block.
1427          * @since 1.5
1428          */
1429         public static final UnicodeBlock TAGBANWA =
1430             new UnicodeBlock("TAGBANWA");
1431 
1432         /**
1433          * Constant for the "Limbu" Unicode character block.
1434          * @since 1.5
1435          */
1436         public static final UnicodeBlock LIMBU =
1437             new UnicodeBlock("LIMBU");
1438 
1439         /**
1440          * Constant for the "Tai Le" Unicode character block.
1441          * @since 1.5
1442          */
1443         public static final UnicodeBlock TAI_LE =
1444             new UnicodeBlock("TAI_LE",
1445                              "TAI LE",
1446                              "TAILE");
1447 
1448         /**
1449          * Constant for the "Khmer Symbols" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock KHMER_SYMBOLS =
1453             new UnicodeBlock("KHMER_SYMBOLS",
1454                              "KHMER SYMBOLS",
1455                              "KHMERSYMBOLS");
1456 
1457         /**
1458          * Constant for the "Phonetic Extensions" Unicode character block.
1459          * @since 1.5
1460          */
1461         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1462             new UnicodeBlock("PHONETIC_EXTENSIONS",
1463                              "PHONETIC EXTENSIONS",
1464                              "PHONETICEXTENSIONS");
1465 
1466         /**
1467          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1468          * @since 1.5
1469          */
1470         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1471             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1472                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1473                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1474 
1475         /**
1476          * Constant for the "Supplemental Arrows-A" Unicode character block.
1477          * @since 1.5
1478          */
1479         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1480             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1481                              "SUPPLEMENTAL ARROWS-A",
1482                              "SUPPLEMENTALARROWS-A");
1483 
1484         /**
1485          * Constant for the "Supplemental Arrows-B" Unicode character block.
1486          * @since 1.5
1487          */
1488         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1489             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1490                              "SUPPLEMENTAL ARROWS-B",
1491                              "SUPPLEMENTALARROWS-B");
1492 
1493         /**
1494          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1495          * character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1499             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1500                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1501                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1502 
1503         /**
1504          * Constant for the "Supplemental Mathematical Operators" Unicode
1505          * character block.
1506          * @since 1.5
1507          */
1508         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1509             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1510                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1511                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1512 
1513         /**
1514          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1515          * block.
1516          * @since 1.5
1517          */
1518         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1519             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1520                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1521                              "MISCELLANEOUSSYMBOLSANDARROWS");
1522 
1523         /**
1524          * Constant for the "Katakana Phonetic Extensions" Unicode character
1525          * block.
1526          * @since 1.5
1527          */
1528         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1529             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1530                              "KATAKANA PHONETIC EXTENSIONS",
1531                              "KATAKANAPHONETICEXTENSIONS");
1532 
1533         /**
1534          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1535          * @since 1.5
1536          */
1537         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1538             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1539                              "YIJING HEXAGRAM SYMBOLS",
1540                              "YIJINGHEXAGRAMSYMBOLS");
1541 
1542         /**
1543          * Constant for the "Variation Selectors" Unicode character block.
1544          * @since 1.5
1545          */
1546         public static final UnicodeBlock VARIATION_SELECTORS =
1547             new UnicodeBlock("VARIATION_SELECTORS",
1548                              "VARIATION SELECTORS",
1549                              "VARIATIONSELECTORS");
1550 
1551         /**
1552          * Constant for the "Linear B Syllabary" Unicode character block.
1553          * @since 1.5
1554          */
1555         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1556             new UnicodeBlock("LINEAR_B_SYLLABARY",
1557                              "LINEAR B SYLLABARY",
1558                              "LINEARBSYLLABARY");
1559 
1560         /**
1561          * Constant for the "Linear B Ideograms" Unicode character block.
1562          * @since 1.5
1563          */
1564         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1565             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1566                              "LINEAR B IDEOGRAMS",
1567                              "LINEARBIDEOGRAMS");
1568 
1569         /**
1570          * Constant for the "Aegean Numbers" Unicode character block.
1571          * @since 1.5
1572          */
1573         public static final UnicodeBlock AEGEAN_NUMBERS =
1574             new UnicodeBlock("AEGEAN_NUMBERS",
1575                              "AEGEAN NUMBERS",
1576                              "AEGEANNUMBERS");
1577 
1578         /**
1579          * Constant for the "Old Italic" Unicode character block.
1580          * @since 1.5
1581          */
1582         public static final UnicodeBlock OLD_ITALIC =
1583             new UnicodeBlock("OLD_ITALIC",
1584                              "OLD ITALIC",
1585                              "OLDITALIC");
1586 
1587         /**
1588          * Constant for the "Gothic" Unicode character block.
1589          * @since 1.5
1590          */
1591         public static final UnicodeBlock GOTHIC =
1592             new UnicodeBlock("GOTHIC");
1593 
1594         /**
1595          * Constant for the "Ugaritic" Unicode character block.
1596          * @since 1.5
1597          */
1598         public static final UnicodeBlock UGARITIC =
1599             new UnicodeBlock("UGARITIC");
1600 
1601         /**
1602          * Constant for the "Deseret" Unicode character block.
1603          * @since 1.5
1604          */
1605         public static final UnicodeBlock DESERET =
1606             new UnicodeBlock("DESERET");
1607 
1608         /**
1609          * Constant for the "Shavian" Unicode character block.
1610          * @since 1.5
1611          */
1612         public static final UnicodeBlock SHAVIAN =
1613             new UnicodeBlock("SHAVIAN");
1614 
1615         /**
1616          * Constant for the "Osmanya" Unicode character block.
1617          * @since 1.5
1618          */
1619         public static final UnicodeBlock OSMANYA =
1620             new UnicodeBlock("OSMANYA");
1621 
1622         /**
1623          * Constant for the "Cypriot Syllabary" Unicode character block.
1624          * @since 1.5
1625          */
1626         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1627             new UnicodeBlock("CYPRIOT_SYLLABARY",
1628                              "CYPRIOT SYLLABARY",
1629                              "CYPRIOTSYLLABARY");
1630 
1631         /**
1632          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1633          * @since 1.5
1634          */
1635         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1636             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1637                              "BYZANTINE MUSICAL SYMBOLS",
1638                              "BYZANTINEMUSICALSYMBOLS");
1639 
1640         /**
1641          * Constant for the "Musical Symbols" Unicode character block.
1642          * @since 1.5
1643          */
1644         public static final UnicodeBlock MUSICAL_SYMBOLS =
1645             new UnicodeBlock("MUSICAL_SYMBOLS",
1646                              "MUSICAL SYMBOLS",
1647                              "MUSICALSYMBOLS");
1648 
1649         /**
1650          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1651          * @since 1.5
1652          */
1653         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1654             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1655                              "TAI XUAN JING SYMBOLS",
1656                              "TAIXUANJINGSYMBOLS");
1657 
1658         /**
1659          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1660          * character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1664             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1665                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1666                              "MATHEMATICALALPHANUMERICSYMBOLS");
1667 
1668         /**
1669          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1670          * character block.
1671          * @since 1.5
1672          */
1673         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1674             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1675                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1676                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1677 
1678         /**
1679          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1680          * @since 1.5
1681          */
1682         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1683             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1684                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1685                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1686 
1687         /**
1688          * Constant for the "Tags" Unicode character block.
1689          * @since 1.5
1690          */
1691         public static final UnicodeBlock TAGS =
1692             new UnicodeBlock("TAGS");
1693 
1694         /**
1695          * Constant for the "Variation Selectors Supplement" Unicode character
1696          * block.
1697          * @since 1.5
1698          */
1699         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1700             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1701                              "VARIATION SELECTORS SUPPLEMENT",
1702                              "VARIATIONSELECTORSSUPPLEMENT");
1703 
1704         /**
1705          * Constant for the "Supplementary Private Use Area-A" Unicode character
1706          * block.
1707          * @since 1.5
1708          */
1709         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1710             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1711                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1712                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1713 
1714         /**
1715          * Constant for the "Supplementary Private Use Area-B" Unicode character
1716          * block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1720             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1721                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1722                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1723 
1724         /**
1725          * Constant for the "High Surrogates" Unicode character block.
1726          * This block represents codepoint values in the high surrogate
1727          * range: U+D800 through U+DB7F
1728          *
1729          * @since 1.5
1730          */
1731         public static final UnicodeBlock HIGH_SURROGATES =
1732             new UnicodeBlock("HIGH_SURROGATES",
1733                              "HIGH SURROGATES",
1734                              "HIGHSURROGATES");
1735 
1736         /**
1737          * Constant for the "High Private Use Surrogates" Unicode character
1738          * block.
1739          * This block represents codepoint values in the private use high
1740          * surrogate range: U+DB80 through U+DBFF
1741          *
1742          * @since 1.5
1743          */
1744         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1745             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1746                              "HIGH PRIVATE USE SURROGATES",
1747                              "HIGHPRIVATEUSESURROGATES");
1748 
1749         /**
1750          * Constant for the "Low Surrogates" Unicode character block.
1751          * This block represents codepoint values in the low surrogate
1752          * range: U+DC00 through U+DFFF
1753          *
1754          * @since 1.5
1755          */
1756         public static final UnicodeBlock LOW_SURROGATES =
1757             new UnicodeBlock("LOW_SURROGATES",
1758                              "LOW SURROGATES",
1759                              "LOWSURROGATES");
1760 
1761         /**
1762          * Constant for the "Arabic Supplement" Unicode character block.
1763          * @since 1.7
1764          */
1765         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1766             new UnicodeBlock("ARABIC_SUPPLEMENT",
1767                              "ARABIC SUPPLEMENT",
1768                              "ARABICSUPPLEMENT");
1769 
1770         /**
1771          * Constant for the "NKo" Unicode character block.
1772          * @since 1.7
1773          */
1774         public static final UnicodeBlock NKO =
1775             new UnicodeBlock("NKO");
1776 
1777         /**
1778          * Constant for the "Samaritan" Unicode character block.
1779          * @since 1.7
1780          */
1781         public static final UnicodeBlock SAMARITAN =
1782             new UnicodeBlock("SAMARITAN");
1783 
1784         /**
1785          * Constant for the "Mandaic" Unicode character block.
1786          * @since 1.7
1787          */
1788         public static final UnicodeBlock MANDAIC =
1789             new UnicodeBlock("MANDAIC");
1790 
1791         /**
1792          * Constant for the "Ethiopic Supplement" Unicode character block.
1793          * @since 1.7
1794          */
1795         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1796             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1797                              "ETHIOPIC SUPPLEMENT",
1798                              "ETHIOPICSUPPLEMENT");
1799 
1800         /**
1801          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1802          * Unicode character block.
1803          * @since 1.7
1804          */
1805         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1806             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1807                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1808                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1809 
1810         /**
1811          * Constant for the "New Tai Lue" Unicode character block.
1812          * @since 1.7
1813          */
1814         public static final UnicodeBlock NEW_TAI_LUE =
1815             new UnicodeBlock("NEW_TAI_LUE",
1816                              "NEW TAI LUE",
1817                              "NEWTAILUE");
1818 
1819         /**
1820          * Constant for the "Buginese" Unicode character block.
1821          * @since 1.7
1822          */
1823         public static final UnicodeBlock BUGINESE =
1824             new UnicodeBlock("BUGINESE");
1825 
1826         /**
1827          * Constant for the "Tai Tham" Unicode character block.
1828          * @since 1.7
1829          */
1830         public static final UnicodeBlock TAI_THAM =
1831             new UnicodeBlock("TAI_THAM",
1832                              "TAI THAM",
1833                              "TAITHAM");
1834 
1835         /**
1836          * Constant for the "Balinese" Unicode character block.
1837          * @since 1.7
1838          */
1839         public static final UnicodeBlock BALINESE =
1840             new UnicodeBlock("BALINESE");
1841 
1842         /**
1843          * Constant for the "Sundanese" Unicode character block.
1844          * @since 1.7
1845          */
1846         public static final UnicodeBlock SUNDANESE =
1847             new UnicodeBlock("SUNDANESE");
1848 
1849         /**
1850          * Constant for the "Batak" Unicode character block.
1851          * @since 1.7
1852          */
1853         public static final UnicodeBlock BATAK =
1854             new UnicodeBlock("BATAK");
1855 
1856         /**
1857          * Constant for the "Lepcha" Unicode character block.
1858          * @since 1.7
1859          */
1860         public static final UnicodeBlock LEPCHA =
1861             new UnicodeBlock("LEPCHA");
1862 
1863         /**
1864          * Constant for the "Ol Chiki" Unicode character block.
1865          * @since 1.7
1866          */
1867         public static final UnicodeBlock OL_CHIKI =
1868             new UnicodeBlock("OL_CHIKI",
1869                              "OL CHIKI",
1870                              "OLCHIKI");
1871 
1872         /**
1873          * Constant for the "Vedic Extensions" Unicode character block.
1874          * @since 1.7
1875          */
1876         public static final UnicodeBlock VEDIC_EXTENSIONS =
1877             new UnicodeBlock("VEDIC_EXTENSIONS",
1878                              "VEDIC EXTENSIONS",
1879                              "VEDICEXTENSIONS");
1880 
1881         /**
1882          * Constant for the "Phonetic Extensions Supplement" Unicode character
1883          * block.
1884          * @since 1.7
1885          */
1886         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1887             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1888                              "PHONETIC EXTENSIONS SUPPLEMENT",
1889                              "PHONETICEXTENSIONSSUPPLEMENT");
1890 
1891         /**
1892          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1893          * character block.
1894          * @since 1.7
1895          */
1896         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1897             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1898                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1899                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1900 
1901         /**
1902          * Constant for the "Glagolitic" Unicode character block.
1903          * @since 1.7
1904          */
1905         public static final UnicodeBlock GLAGOLITIC =
1906             new UnicodeBlock("GLAGOLITIC");
1907 
1908         /**
1909          * Constant for the "Latin Extended-C" Unicode character block.
1910          * @since 1.7
1911          */
1912         public static final UnicodeBlock LATIN_EXTENDED_C =
1913             new UnicodeBlock("LATIN_EXTENDED_C",
1914                              "LATIN EXTENDED-C",
1915                              "LATINEXTENDED-C");
1916 
1917         /**
1918          * Constant for the "Coptic" Unicode character block.
1919          * @since 1.7
1920          */
1921         public static final UnicodeBlock COPTIC =
1922             new UnicodeBlock("COPTIC");
1923 
1924         /**
1925          * Constant for the "Georgian Supplement" Unicode character block.
1926          * @since 1.7
1927          */
1928         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1929             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1930                              "GEORGIAN SUPPLEMENT",
1931                              "GEORGIANSUPPLEMENT");
1932 
1933         /**
1934          * Constant for the "Tifinagh" Unicode character block.
1935          * @since 1.7
1936          */
1937         public static final UnicodeBlock TIFINAGH =
1938             new UnicodeBlock("TIFINAGH");
1939 
1940         /**
1941          * Constant for the "Ethiopic Extended" Unicode character block.
1942          * @since 1.7
1943          */
1944         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1945             new UnicodeBlock("ETHIOPIC_EXTENDED",
1946                              "ETHIOPIC EXTENDED",
1947                              "ETHIOPICEXTENDED");
1948 
1949         /**
1950          * Constant for the "Cyrillic Extended-A" Unicode character block.
1951          * @since 1.7
1952          */
1953         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1954             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1955                              "CYRILLIC EXTENDED-A",
1956                              "CYRILLICEXTENDED-A");
1957 
1958         /**
1959          * Constant for the "Supplemental Punctuation" Unicode character block.
1960          * @since 1.7
1961          */
1962         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1963             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1964                              "SUPPLEMENTAL PUNCTUATION",
1965                              "SUPPLEMENTALPUNCTUATION");
1966 
1967         /**
1968          * Constant for the "CJK Strokes" Unicode character block.
1969          * @since 1.7
1970          */
1971         public static final UnicodeBlock CJK_STROKES =
1972             new UnicodeBlock("CJK_STROKES",
1973                              "CJK STROKES",
1974                              "CJKSTROKES");
1975 
1976         /**
1977          * Constant for the "Lisu" Unicode character block.
1978          * @since 1.7
1979          */
1980         public static final UnicodeBlock LISU =
1981             new UnicodeBlock("LISU");
1982 
1983         /**
1984          * Constant for the "Vai" Unicode character block.
1985          * @since 1.7
1986          */
1987         public static final UnicodeBlock VAI =
1988             new UnicodeBlock("VAI");
1989 
1990         /**
1991          * Constant for the "Cyrillic Extended-B" Unicode character block.
1992          * @since 1.7
1993          */
1994         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1995             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1996                              "CYRILLIC EXTENDED-B",
1997                              "CYRILLICEXTENDED-B");
1998 
1999         /**
2000          * Constant for the "Bamum" Unicode character block.
2001          * @since 1.7
2002          */
2003         public static final UnicodeBlock BAMUM =
2004             new UnicodeBlock("BAMUM");
2005 
2006         /**
2007          * Constant for the "Modifier Tone Letters" Unicode character block.
2008          * @since 1.7
2009          */
2010         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2011             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2012                              "MODIFIER TONE LETTERS",
2013                              "MODIFIERTONELETTERS");
2014 
2015         /**
2016          * Constant for the "Latin Extended-D" Unicode character block.
2017          * @since 1.7
2018          */
2019         public static final UnicodeBlock LATIN_EXTENDED_D =
2020             new UnicodeBlock("LATIN_EXTENDED_D",
2021                              "LATIN EXTENDED-D",
2022                              "LATINEXTENDED-D");
2023 
2024         /**
2025          * Constant for the "Syloti Nagri" Unicode character block.
2026          * @since 1.7
2027          */
2028         public static final UnicodeBlock SYLOTI_NAGRI =
2029             new UnicodeBlock("SYLOTI_NAGRI",
2030                              "SYLOTI NAGRI",
2031                              "SYLOTINAGRI");
2032 
2033         /**
2034          * Constant for the "Common Indic Number Forms" Unicode character block.
2035          * @since 1.7
2036          */
2037         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2038             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2039                              "COMMON INDIC NUMBER FORMS",
2040                              "COMMONINDICNUMBERFORMS");
2041 
2042         /**
2043          * Constant for the "Phags-pa" Unicode character block.
2044          * @since 1.7
2045          */
2046         public static final UnicodeBlock PHAGS_PA =
2047             new UnicodeBlock("PHAGS_PA",
2048                              "PHAGS-PA");
2049 
2050         /**
2051          * Constant for the "Saurashtra" Unicode character block.
2052          * @since 1.7
2053          */
2054         public static final UnicodeBlock SAURASHTRA =
2055             new UnicodeBlock("SAURASHTRA");
2056 
2057         /**
2058          * Constant for the "Devanagari Extended" Unicode character block.
2059          * @since 1.7
2060          */
2061         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2062             new UnicodeBlock("DEVANAGARI_EXTENDED",
2063                              "DEVANAGARI EXTENDED",
2064                              "DEVANAGARIEXTENDED");
2065 
2066         /**
2067          * Constant for the "Kayah Li" Unicode character block.
2068          * @since 1.7
2069          */
2070         public static final UnicodeBlock KAYAH_LI =
2071             new UnicodeBlock("KAYAH_LI",
2072                              "KAYAH LI",
2073                              "KAYAHLI");
2074 
2075         /**
2076          * Constant for the "Rejang" Unicode character block.
2077          * @since 1.7
2078          */
2079         public static final UnicodeBlock REJANG =
2080             new UnicodeBlock("REJANG");
2081 
2082         /**
2083          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2084          * @since 1.7
2085          */
2086         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2087             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2088                              "HANGUL JAMO EXTENDED-A",
2089                              "HANGULJAMOEXTENDED-A");
2090 
2091         /**
2092          * Constant for the "Javanese" Unicode character block.
2093          * @since 1.7
2094          */
2095         public static final UnicodeBlock JAVANESE =
2096             new UnicodeBlock("JAVANESE");
2097 
2098         /**
2099          * Constant for the "Cham" Unicode character block.
2100          * @since 1.7
2101          */
2102         public static final UnicodeBlock CHAM =
2103             new UnicodeBlock("CHAM");
2104 
2105         /**
2106          * Constant for the "Myanmar Extended-A" Unicode character block.
2107          * @since 1.7
2108          */
2109         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2110             new UnicodeBlock("MYANMAR_EXTENDED_A",
2111                              "MYANMAR EXTENDED-A",
2112                              "MYANMAREXTENDED-A");
2113 
2114         /**
2115          * Constant for the "Tai Viet" Unicode character block.
2116          * @since 1.7
2117          */
2118         public static final UnicodeBlock TAI_VIET =
2119             new UnicodeBlock("TAI_VIET",
2120                              "TAI VIET",
2121                              "TAIVIET");
2122 
2123         /**
2124          * Constant for the "Ethiopic Extended-A" Unicode character block.
2125          * @since 1.7
2126          */
2127         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2128             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2129                              "ETHIOPIC EXTENDED-A",
2130                              "ETHIOPICEXTENDED-A");
2131 
2132         /**
2133          * Constant for the "Meetei Mayek" Unicode character block.
2134          * @since 1.7
2135          */
2136         public static final UnicodeBlock MEETEI_MAYEK =
2137             new UnicodeBlock("MEETEI_MAYEK",
2138                              "MEETEI MAYEK",
2139                              "MEETEIMAYEK");
2140 
2141         /**
2142          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2143          * @since 1.7
2144          */
2145         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2146             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2147                              "HANGUL JAMO EXTENDED-B",
2148                              "HANGULJAMOEXTENDED-B");
2149 
2150         /**
2151          * Constant for the "Vertical Forms" Unicode character block.
2152          * @since 1.7
2153          */
2154         public static final UnicodeBlock VERTICAL_FORMS =
2155             new UnicodeBlock("VERTICAL_FORMS",
2156                              "VERTICAL FORMS",
2157                              "VERTICALFORMS");
2158 
2159         /**
2160          * Constant for the "Ancient Greek Numbers" Unicode character block.
2161          * @since 1.7
2162          */
2163         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2164             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2165                              "ANCIENT GREEK NUMBERS",
2166                              "ANCIENTGREEKNUMBERS");
2167 
2168         /**
2169          * Constant for the "Ancient Symbols" Unicode character block.
2170          * @since 1.7
2171          */
2172         public static final UnicodeBlock ANCIENT_SYMBOLS =
2173             new UnicodeBlock("ANCIENT_SYMBOLS",
2174                              "ANCIENT SYMBOLS",
2175                              "ANCIENTSYMBOLS");
2176 
2177         /**
2178          * Constant for the "Phaistos Disc" Unicode character block.
2179          * @since 1.7
2180          */
2181         public static final UnicodeBlock PHAISTOS_DISC =
2182             new UnicodeBlock("PHAISTOS_DISC",
2183                              "PHAISTOS DISC",
2184                              "PHAISTOSDISC");
2185 
2186         /**
2187          * Constant for the "Lycian" Unicode character block.
2188          * @since 1.7
2189          */
2190         public static final UnicodeBlock LYCIAN =
2191             new UnicodeBlock("LYCIAN");
2192 
2193         /**
2194          * Constant for the "Carian" Unicode character block.
2195          * @since 1.7
2196          */
2197         public static final UnicodeBlock CARIAN =
2198             new UnicodeBlock("CARIAN");
2199 
2200         /**
2201          * Constant for the "Old Persian" Unicode character block.
2202          * @since 1.7
2203          */
2204         public static final UnicodeBlock OLD_PERSIAN =
2205             new UnicodeBlock("OLD_PERSIAN",
2206                              "OLD PERSIAN",
2207                              "OLDPERSIAN");
2208 
2209         /**
2210          * Constant for the "Imperial Aramaic" Unicode character block.
2211          * @since 1.7
2212          */
2213         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2214             new UnicodeBlock("IMPERIAL_ARAMAIC",
2215                              "IMPERIAL ARAMAIC",
2216                              "IMPERIALARAMAIC");
2217 
2218         /**
2219          * Constant for the "Phoenician" Unicode character block.
2220          * @since 1.7
2221          */
2222         public static final UnicodeBlock PHOENICIAN =
2223             new UnicodeBlock("PHOENICIAN");
2224 
2225         /**
2226          * Constant for the "Lydian" Unicode character block.
2227          * @since 1.7
2228          */
2229         public static final UnicodeBlock LYDIAN =
2230             new UnicodeBlock("LYDIAN");
2231 
2232         /**
2233          * Constant for the "Kharoshthi" Unicode character block.
2234          * @since 1.7
2235          */
2236         public static final UnicodeBlock KHAROSHTHI =
2237             new UnicodeBlock("KHAROSHTHI");
2238 
2239         /**
2240          * Constant for the "Old South Arabian" Unicode character block.
2241          * @since 1.7
2242          */
2243         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2244             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2245                              "OLD SOUTH ARABIAN",
2246                              "OLDSOUTHARABIAN");
2247 
2248         /**
2249          * Constant for the "Avestan" Unicode character block.
2250          * @since 1.7
2251          */
2252         public static final UnicodeBlock AVESTAN =
2253             new UnicodeBlock("AVESTAN");
2254 
2255         /**
2256          * Constant for the "Inscriptional Parthian" Unicode character block.
2257          * @since 1.7
2258          */
2259         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2260             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2261                              "INSCRIPTIONAL PARTHIAN",
2262                              "INSCRIPTIONALPARTHIAN");
2263 
2264         /**
2265          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2266          * @since 1.7
2267          */
2268         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2269             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2270                              "INSCRIPTIONAL PAHLAVI",
2271                              "INSCRIPTIONALPAHLAVI");
2272 
2273         /**
2274          * Constant for the "Old Turkic" Unicode character block.
2275          * @since 1.7
2276          */
2277         public static final UnicodeBlock OLD_TURKIC =
2278             new UnicodeBlock("OLD_TURKIC",
2279                              "OLD TURKIC",
2280                              "OLDTURKIC");
2281 
2282         /**
2283          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2284          * @since 1.7
2285          */
2286         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2287             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2288                              "RUMI NUMERAL SYMBOLS",
2289                              "RUMINUMERALSYMBOLS");
2290 
2291         /**
2292          * Constant for the "Brahmi" Unicode character block.
2293          * @since 1.7
2294          */
2295         public static final UnicodeBlock BRAHMI =
2296             new UnicodeBlock("BRAHMI");
2297 
2298         /**
2299          * Constant for the "Kaithi" Unicode character block.
2300          * @since 1.7
2301          */
2302         public static final UnicodeBlock KAITHI =
2303             new UnicodeBlock("KAITHI");
2304 
2305         /**
2306          * Constant for the "Cuneiform" Unicode character block.
2307          * @since 1.7
2308          */
2309         public static final UnicodeBlock CUNEIFORM =
2310             new UnicodeBlock("CUNEIFORM");
2311 
2312         /**
2313          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2314          * character block.
2315          * @since 1.7
2316          */
2317         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2318             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2319                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2320                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2321 
2322         /**
2323          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2324          * @since 1.7
2325          */
2326         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2327             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2328                              "EGYPTIAN HIEROGLYPHS",
2329                              "EGYPTIANHIEROGLYPHS");
2330 
2331         /**
2332          * Constant for the "Bamum Supplement" Unicode character block.
2333          * @since 1.7
2334          */
2335         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2336             new UnicodeBlock("BAMUM_SUPPLEMENT",
2337                              "BAMUM SUPPLEMENT",
2338                              "BAMUMSUPPLEMENT");
2339 
2340         /**
2341          * Constant for the "Kana Supplement" Unicode character block.
2342          * @since 1.7
2343          */
2344         public static final UnicodeBlock KANA_SUPPLEMENT =
2345             new UnicodeBlock("KANA_SUPPLEMENT",
2346                              "KANA SUPPLEMENT",
2347                              "KANASUPPLEMENT");
2348 
2349         /**
2350          * Constant for the "Ancient Greek Musical Notation" Unicode character
2351          * block.
2352          * @since 1.7
2353          */
2354         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2355             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2356                              "ANCIENT GREEK MUSICAL NOTATION",
2357                              "ANCIENTGREEKMUSICALNOTATION");
2358 
2359         /**
2360          * Constant for the "Counting Rod Numerals" Unicode character block.
2361          * @since 1.7
2362          */
2363         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2364             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2365                              "COUNTING ROD NUMERALS",
2366                              "COUNTINGRODNUMERALS");
2367 
2368         /**
2369          * Constant for the "Mahjong Tiles" Unicode character block.
2370          * @since 1.7
2371          */
2372         public static final UnicodeBlock MAHJONG_TILES =
2373             new UnicodeBlock("MAHJONG_TILES",
2374                              "MAHJONG TILES",
2375                              "MAHJONGTILES");
2376 
2377         /**
2378          * Constant for the "Domino Tiles" Unicode character block.
2379          * @since 1.7
2380          */
2381         public static final UnicodeBlock DOMINO_TILES =
2382             new UnicodeBlock("DOMINO_TILES",
2383                              "DOMINO TILES",
2384                              "DOMINOTILES");
2385 
2386         /**
2387          * Constant for the "Playing Cards" Unicode character block.
2388          * @since 1.7
2389          */
2390         public static final UnicodeBlock PLAYING_CARDS =
2391             new UnicodeBlock("PLAYING_CARDS",
2392                              "PLAYING CARDS",
2393                              "PLAYINGCARDS");
2394 
2395         /**
2396          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2397          * block.
2398          * @since 1.7
2399          */
2400         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2401             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2402                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2403                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2404 
2405         /**
2406          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2407          * block.
2408          * @since 1.7
2409          */
2410         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2411             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2412                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2413                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2414 
2415         /**
2416          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2417          * character block.
2418          * @since 1.7
2419          */
2420         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2421             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2422                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2423                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2424 
2425         /**
2426          * Constant for the "Emoticons" Unicode character block.
2427          * @since 1.7
2428          */
2429         public static final UnicodeBlock EMOTICONS =
2430             new UnicodeBlock("EMOTICONS");
2431 
2432         /**
2433          * Constant for the "Transport And Map Symbols" Unicode character block.
2434          * @since 1.7
2435          */
2436         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2437             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2438                              "TRANSPORT AND MAP SYMBOLS",
2439                              "TRANSPORTANDMAPSYMBOLS");
2440 
2441         /**
2442          * Constant for the "Alchemical Symbols" Unicode character block.
2443          * @since 1.7
2444          */
2445         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2446             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2447                              "ALCHEMICAL SYMBOLS",
2448                              "ALCHEMICALSYMBOLS");
2449 
2450         /**
2451          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2452          * character block.
2453          * @since 1.7
2454          */
2455         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2456             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2457                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2458                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2459 
2460         /**
2461          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2462          * character block.
2463          * @since 1.7
2464          */
2465         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2466             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2467                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2468                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2469 
2470         private static final int blockStarts[] = {
2471             0x0000,   // 0000..007F; Basic Latin
2472             0x0080,   // 0080..00FF; Latin-1 Supplement
2473             0x0100,   // 0100..017F; Latin Extended-A
2474             0x0180,   // 0180..024F; Latin Extended-B
2475             0x0250,   // 0250..02AF; IPA Extensions
2476             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2477             0x0300,   // 0300..036F; Combining Diacritical Marks
2478             0x0370,   // 0370..03FF; Greek and Coptic
2479             0x0400,   // 0400..04FF; Cyrillic
2480             0x0500,   // 0500..052F; Cyrillic Supplement
2481             0x0530,   // 0530..058F; Armenian
2482             0x0590,   // 0590..05FF; Hebrew
2483             0x0600,   // 0600..06FF; Arabic
2484             0x0700,   // 0700..074F; Syriac
2485             0x0750,   // 0750..077F; Arabic Supplement
2486             0x0780,   // 0780..07BF; Thaana
2487             0x07C0,   // 07C0..07FF; NKo
2488             0x0800,   // 0800..083F; Samaritan
2489             0x0840,   // 0840..085F; Mandaic
2490             0x0860,   //             unassigned
2491             0x0900,   // 0900..097F; Devanagari
2492             0x0980,   // 0980..09FF; Bengali
2493             0x0A00,   // 0A00..0A7F; Gurmukhi
2494             0x0A80,   // 0A80..0AFF; Gujarati
2495             0x0B00,   // 0B00..0B7F; Oriya
2496             0x0B80,   // 0B80..0BFF; Tamil
2497             0x0C00,   // 0C00..0C7F; Telugu
2498             0x0C80,   // 0C80..0CFF; Kannada
2499             0x0D00,   // 0D00..0D7F; Malayalam
2500             0x0D80,   // 0D80..0DFF; Sinhala
2501             0x0E00,   // 0E00..0E7F; Thai
2502             0x0E80,   // 0E80..0EFF; Lao
2503             0x0F00,   // 0F00..0FFF; Tibetan
2504             0x1000,   // 1000..109F; Myanmar
2505             0x10A0,   // 10A0..10FF; Georgian
2506             0x1100,   // 1100..11FF; Hangul Jamo
2507             0x1200,   // 1200..137F; Ethiopic
2508             0x1380,   // 1380..139F; Ethiopic Supplement
2509             0x13A0,   // 13A0..13FF; Cherokee
2510             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2511             0x1680,   // 1680..169F; Ogham
2512             0x16A0,   // 16A0..16FF; Runic
2513             0x1700,   // 1700..171F; Tagalog
2514             0x1720,   // 1720..173F; Hanunoo
2515             0x1740,   // 1740..175F; Buhid
2516             0x1760,   // 1760..177F; Tagbanwa
2517             0x1780,   // 1780..17FF; Khmer
2518             0x1800,   // 1800..18AF; Mongolian
2519             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2520             0x1900,   // 1900..194F; Limbu
2521             0x1950,   // 1950..197F; Tai Le
2522             0x1980,   // 1980..19DF; New Tai Lue
2523             0x19E0,   // 19E0..19FF; Khmer Symbols
2524             0x1A00,   // 1A00..1A1F; Buginese
2525             0x1A20,   // 1A20..1AAF; Tai Tham
2526             0x1AB0,   //             unassigned
2527             0x1B00,   // 1B00..1B7F; Balinese
2528             0x1B80,   // 1B80..1BBF; Sundanese
2529             0x1BC0,   // 1BC0..1BFF; Batak
2530             0x1C00,   // 1C00..1C4F; Lepcha
2531             0x1C50,   // 1C50..1C7F; Ol Chiki
2532             0x1C80,   //             unassigned
2533             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2534             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2535             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2536             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2537             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2538             0x1F00,   // 1F00..1FFF; Greek Extended
2539             0x2000,   // 2000..206F; General Punctuation
2540             0x2070,   // 2070..209F; Superscripts and Subscripts
2541             0x20A0,   // 20A0..20CF; Currency Symbols
2542             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2543             0x2100,   // 2100..214F; Letterlike Symbols
2544             0x2150,   // 2150..218F; Number Forms
2545             0x2190,   // 2190..21FF; Arrows
2546             0x2200,   // 2200..22FF; Mathematical Operators
2547             0x2300,   // 2300..23FF; Miscellaneous Technical
2548             0x2400,   // 2400..243F; Control Pictures
2549             0x2440,   // 2440..245F; Optical Character Recognition
2550             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2551             0x2500,   // 2500..257F; Box Drawing
2552             0x2580,   // 2580..259F; Block Elements
2553             0x25A0,   // 25A0..25FF; Geometric Shapes
2554             0x2600,   // 2600..26FF; Miscellaneous Symbols
2555             0x2700,   // 2700..27BF; Dingbats
2556             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2557             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2558             0x2800,   // 2800..28FF; Braille Patterns
2559             0x2900,   // 2900..297F; Supplemental Arrows-B
2560             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2561             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2562             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2563             0x2C00,   // 2C00..2C5F; Glagolitic
2564             0x2C60,   // 2C60..2C7F; Latin Extended-C
2565             0x2C80,   // 2C80..2CFF; Coptic
2566             0x2D00,   // 2D00..2D2F; Georgian Supplement
2567             0x2D30,   // 2D30..2D7F; Tifinagh
2568             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2569             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2570             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2571             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2572             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2573             0x2FE0,   //             unassigned
2574             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2575             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2576             0x3040,   // 3040..309F; Hiragana
2577             0x30A0,   // 30A0..30FF; Katakana
2578             0x3100,   // 3100..312F; Bopomofo
2579             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2580             0x3190,   // 3190..319F; Kanbun
2581             0x31A0,   // 31A0..31BF; Bopomofo Extended
2582             0x31C0,   // 31C0..31EF; CJK Strokes
2583             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2584             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2585             0x3300,   // 3300..33FF; CJK Compatibility
2586             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2587             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2588             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2589             0xA000,   // A000..A48F; Yi Syllables
2590             0xA490,   // A490..A4CF; Yi Radicals
2591             0xA4D0,   // A4D0..A4FF; Lisu
2592             0xA500,   // A500..A63F; Vai
2593             0xA640,   // A640..A69F; Cyrillic Extended-B
2594             0xA6A0,   // A6A0..A6FF; Bamum
2595             0xA700,   // A700..A71F; Modifier Tone Letters
2596             0xA720,   // A720..A7FF; Latin Extended-D
2597             0xA800,   // A800..A82F; Syloti Nagri
2598             0xA830,   // A830..A83F; Common Indic Number Forms
2599             0xA840,   // A840..A87F; Phags-pa
2600             0xA880,   // A880..A8DF; Saurashtra
2601             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2602             0xA900,   // A900..A92F; Kayah Li
2603             0xA930,   // A930..A95F; Rejang
2604             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2605             0xA980,   // A980..A9DF; Javanese
2606             0xA9E0,   //             unassigned
2607             0xAA00,   // AA00..AA5F; Cham
2608             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2609             0xAA80,   // AA80..AADF; Tai Viet
2610             0xAAE0,   //             unassigned
2611             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2612             0xAB30,   //             unassigned
2613             0xABC0,   // ABC0..ABFF; Meetei Mayek
2614             0xAC00,   // AC00..D7AF; Hangul Syllables
2615             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2616             0xD800,   // D800..DB7F; High Surrogates
2617             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2618             0xDC00,   // DC00..DFFF; Low Surrogates
2619             0xE000,   // E000..F8FF; Private Use Area
2620             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2621             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2622             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2623             0xFE00,   // FE00..FE0F; Variation Selectors
2624             0xFE10,   // FE10..FE1F; Vertical Forms
2625             0xFE20,   // FE20..FE2F; Combining Half Marks
2626             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2627             0xFE50,   // FE50..FE6F; Small Form Variants
2628             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2629             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2630             0xFFF0,   // FFF0..FFFF; Specials
2631             0x10000,  // 10000..1007F; Linear B Syllabary
2632             0x10080,  // 10080..100FF; Linear B Ideograms
2633             0x10100,  // 10100..1013F; Aegean Numbers
2634             0x10140,  // 10140..1018F; Ancient Greek Numbers
2635             0x10190,  // 10190..101CF; Ancient Symbols
2636             0x101D0,  // 101D0..101FF; Phaistos Disc
2637             0x10200,  //               unassigned
2638             0x10280,  // 10280..1029F; Lycian
2639             0x102A0,  // 102A0..102DF; Carian
2640             0x102E0,  //               unassigned
2641             0x10300,  // 10300..1032F; Old Italic
2642             0x10330,  // 10330..1034F; Gothic
2643             0x10350,  //               unassigned
2644             0x10380,  // 10380..1039F; Ugaritic
2645             0x103A0,  // 103A0..103DF; Old Persian
2646             0x103E0,  //               unassigned
2647             0x10400,  // 10400..1044F; Deseret
2648             0x10450,  // 10450..1047F; Shavian
2649             0x10480,  // 10480..104AF; Osmanya
2650             0x104B0,  //               unassigned
2651             0x10800,  // 10800..1083F; Cypriot Syllabary
2652             0x10840,  // 10840..1085F; Imperial Aramaic
2653             0x10860,  //               unassigned
2654             0x10900,  // 10900..1091F; Phoenician
2655             0x10920,  // 10920..1093F; Lydian
2656             0x10940,  //               unassigned
2657             0x10A00,  // 10A00..10A5F; Kharoshthi
2658             0x10A60,  // 10A60..10A7F; Old South Arabian
2659             0x10A80,  //               unassigned
2660             0x10B00,  // 10B00..10B3F; Avestan
2661             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2662             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2663             0x10B80,  //               unassigned
2664             0x10C00,  // 10C00..10C4F; Old Turkic
2665             0x10C50,  //               unassigned
2666             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2667             0x10E80,  //               unassigned
2668             0x11000,  // 11000..1107F; Brahmi
2669             0x11080,  // 11080..110CF; Kaithi
2670             0x110D0,  //               unassigned
2671             0x12000,  // 12000..123FF; Cuneiform
2672             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2673             0x12480,  //               unassigned
2674             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2675             0x13430,  //               unassigned
2676             0x16800,  // 16800..16A3F; Bamum Supplement
2677             0x16A40,  //               unassigned
2678             0x1B000,  // 1B000..1B0FF; Kana Supplement
2679             0x1B100,  //               unassigned
2680             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2681             0x1D100,  // 1D100..1D1FF; Musical Symbols
2682             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2683             0x1D250,  //               unassigned
2684             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2685             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2686             0x1D380,  //               unassigned
2687             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2688             0x1D800,  //               unassigned
2689             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2690             0x1F030,  // 1F030..1F09F; Domino Tiles
2691             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2692             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2693             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2694             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2695             0x1F600,  // 1F600..1F64F; Emoticons
2696             0x1F650,  //               unassigned
2697             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2698             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2699             0x1F780,  //               unassigned
2700             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2701             0x2A6E0,  //               unassigned
2702             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2703             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2704             0x2B820,  //               unassigned
2705             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2706             0x2FA20,  //               unassigned
2707             0xE0000,  // E0000..E007F; Tags
2708             0xE0080,  //               unassigned
2709             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2710             0xE01F0,  //               unassigned
2711             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2712             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2713         };
2714 
2715         private static final UnicodeBlock[] blocks = {
2716             BASIC_LATIN,
2717             LATIN_1_SUPPLEMENT,
2718             LATIN_EXTENDED_A,
2719             LATIN_EXTENDED_B,
2720             IPA_EXTENSIONS,
2721             SPACING_MODIFIER_LETTERS,
2722             COMBINING_DIACRITICAL_MARKS,
2723             GREEK,
2724             CYRILLIC,
2725             CYRILLIC_SUPPLEMENTARY,
2726             ARMENIAN,
2727             HEBREW,
2728             ARABIC,
2729             SYRIAC,
2730             ARABIC_SUPPLEMENT,
2731             THAANA,
2732             NKO,
2733             SAMARITAN,
2734             MANDAIC,
2735             null,
2736             DEVANAGARI,
2737             BENGALI,
2738             GURMUKHI,
2739             GUJARATI,
2740             ORIYA,
2741             TAMIL,
2742             TELUGU,
2743             KANNADA,
2744             MALAYALAM,
2745             SINHALA,
2746             THAI,
2747             LAO,
2748             TIBETAN,
2749             MYANMAR,
2750             GEORGIAN,
2751             HANGUL_JAMO,
2752             ETHIOPIC,
2753             ETHIOPIC_SUPPLEMENT,
2754             CHEROKEE,
2755             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2756             OGHAM,
2757             RUNIC,
2758             TAGALOG,
2759             HANUNOO,
2760             BUHID,
2761             TAGBANWA,
2762             KHMER,
2763             MONGOLIAN,
2764             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2765             LIMBU,
2766             TAI_LE,
2767             NEW_TAI_LUE,
2768             KHMER_SYMBOLS,
2769             BUGINESE,
2770             TAI_THAM,
2771             null,
2772             BALINESE,
2773             SUNDANESE,
2774             BATAK,
2775             LEPCHA,
2776             OL_CHIKI,
2777             null,
2778             VEDIC_EXTENSIONS,
2779             PHONETIC_EXTENSIONS,
2780             PHONETIC_EXTENSIONS_SUPPLEMENT,
2781             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2782             LATIN_EXTENDED_ADDITIONAL,
2783             GREEK_EXTENDED,
2784             GENERAL_PUNCTUATION,
2785             SUPERSCRIPTS_AND_SUBSCRIPTS,
2786             CURRENCY_SYMBOLS,
2787             COMBINING_MARKS_FOR_SYMBOLS,
2788             LETTERLIKE_SYMBOLS,
2789             NUMBER_FORMS,
2790             ARROWS,
2791             MATHEMATICAL_OPERATORS,
2792             MISCELLANEOUS_TECHNICAL,
2793             CONTROL_PICTURES,
2794             OPTICAL_CHARACTER_RECOGNITION,
2795             ENCLOSED_ALPHANUMERICS,
2796             BOX_DRAWING,
2797             BLOCK_ELEMENTS,
2798             GEOMETRIC_SHAPES,
2799             MISCELLANEOUS_SYMBOLS,
2800             DINGBATS,
2801             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2802             SUPPLEMENTAL_ARROWS_A,
2803             BRAILLE_PATTERNS,
2804             SUPPLEMENTAL_ARROWS_B,
2805             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2806             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2807             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2808             GLAGOLITIC,
2809             LATIN_EXTENDED_C,
2810             COPTIC,
2811             GEORGIAN_SUPPLEMENT,
2812             TIFINAGH,
2813             ETHIOPIC_EXTENDED,
2814             CYRILLIC_EXTENDED_A,
2815             SUPPLEMENTAL_PUNCTUATION,
2816             CJK_RADICALS_SUPPLEMENT,
2817             KANGXI_RADICALS,
2818             null,
2819             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2820             CJK_SYMBOLS_AND_PUNCTUATION,
2821             HIRAGANA,
2822             KATAKANA,
2823             BOPOMOFO,
2824             HANGUL_COMPATIBILITY_JAMO,
2825             KANBUN,
2826             BOPOMOFO_EXTENDED,
2827             CJK_STROKES,
2828             KATAKANA_PHONETIC_EXTENSIONS,
2829             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2830             CJK_COMPATIBILITY,
2831             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2832             YIJING_HEXAGRAM_SYMBOLS,
2833             CJK_UNIFIED_IDEOGRAPHS,
2834             YI_SYLLABLES,
2835             YI_RADICALS,
2836             LISU,
2837             VAI,
2838             CYRILLIC_EXTENDED_B,
2839             BAMUM,
2840             MODIFIER_TONE_LETTERS,
2841             LATIN_EXTENDED_D,
2842             SYLOTI_NAGRI,
2843             COMMON_INDIC_NUMBER_FORMS,
2844             PHAGS_PA,
2845             SAURASHTRA,
2846             DEVANAGARI_EXTENDED,
2847             KAYAH_LI,
2848             REJANG,
2849             HANGUL_JAMO_EXTENDED_A,
2850             JAVANESE,
2851             null,
2852             CHAM,
2853             MYANMAR_EXTENDED_A,
2854             TAI_VIET,
2855             null,
2856             ETHIOPIC_EXTENDED_A,
2857             null,
2858             MEETEI_MAYEK,
2859             HANGUL_SYLLABLES,
2860             HANGUL_JAMO_EXTENDED_B,
2861             HIGH_SURROGATES,
2862             HIGH_PRIVATE_USE_SURROGATES,
2863             LOW_SURROGATES,
2864             PRIVATE_USE_AREA,
2865             CJK_COMPATIBILITY_IDEOGRAPHS,
2866             ALPHABETIC_PRESENTATION_FORMS,
2867             ARABIC_PRESENTATION_FORMS_A,
2868             VARIATION_SELECTORS,
2869             VERTICAL_FORMS,
2870             COMBINING_HALF_MARKS,
2871             CJK_COMPATIBILITY_FORMS,
2872             SMALL_FORM_VARIANTS,
2873             ARABIC_PRESENTATION_FORMS_B,
2874             HALFWIDTH_AND_FULLWIDTH_FORMS,
2875             SPECIALS,
2876             LINEAR_B_SYLLABARY,
2877             LINEAR_B_IDEOGRAMS,
2878             AEGEAN_NUMBERS,
2879             ANCIENT_GREEK_NUMBERS,
2880             ANCIENT_SYMBOLS,
2881             PHAISTOS_DISC,
2882             null,
2883             LYCIAN,
2884             CARIAN,
2885             null,
2886             OLD_ITALIC,
2887             GOTHIC,
2888             null,
2889             UGARITIC,
2890             OLD_PERSIAN,
2891             null,
2892             DESERET,
2893             SHAVIAN,
2894             OSMANYA,
2895             null,
2896             CYPRIOT_SYLLABARY,
2897             IMPERIAL_ARAMAIC,
2898             null,
2899             PHOENICIAN,
2900             LYDIAN,
2901             null,
2902             KHAROSHTHI,
2903             OLD_SOUTH_ARABIAN,
2904             null,
2905             AVESTAN,
2906             INSCRIPTIONAL_PARTHIAN,
2907             INSCRIPTIONAL_PAHLAVI,
2908             null,
2909             OLD_TURKIC,
2910             null,
2911             RUMI_NUMERAL_SYMBOLS,
2912             null,
2913             BRAHMI,
2914             KAITHI,
2915             null,
2916             CUNEIFORM,
2917             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2918             null,
2919             EGYPTIAN_HIEROGLYPHS,
2920             null,
2921             BAMUM_SUPPLEMENT,
2922             null,
2923             KANA_SUPPLEMENT,
2924             null,
2925             BYZANTINE_MUSICAL_SYMBOLS,
2926             MUSICAL_SYMBOLS,
2927             ANCIENT_GREEK_MUSICAL_NOTATION,
2928             null,
2929             TAI_XUAN_JING_SYMBOLS,
2930             COUNTING_ROD_NUMERALS,
2931             null,
2932             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2933             null,
2934             MAHJONG_TILES,
2935             DOMINO_TILES,
2936             PLAYING_CARDS,
2937             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2938             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2939             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2940             EMOTICONS,
2941             null,
2942             TRANSPORT_AND_MAP_SYMBOLS,
2943             ALCHEMICAL_SYMBOLS,
2944             null,
2945             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2946             null,
2947             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2948             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2949             null,
2950             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2951             null,
2952             TAGS,
2953             null,
2954             VARIATION_SELECTORS_SUPPLEMENT,
2955             null,
2956             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2957             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2958         };
2959 
2960 
2961         /**
2962          * Returns the object representing the Unicode block containing the
2963          * given character, or <code>null</code> if the character is not a
2964          * member of a defined block.
2965          *
2966          * <p><b>Note:</b> This method cannot handle
2967          * <a href="Character.html#supplementary"> supplementary
2968          * characters</a>.  To support all Unicode characters, including
2969          * supplementary characters, use the {@link #of(int)} method.
2970          *
2971          * @param   c  The character in question
2972          * @return  The <code>UnicodeBlock</code> instance representing the
2973          *          Unicode block of which this character is a member, or
2974          *          <code>null</code> if the character is not a member of any
2975          *          Unicode block
2976          */
2977         public static UnicodeBlock of(char c) {
2978             return of((int)c);
2979         }
2980 
2981         /**
2982          * Returns the object representing the Unicode block
2983          * containing the given character (Unicode code point), or
2984          * <code>null</code> if the character is not a member of a
2985          * defined block.
2986          *
2987          * @param   codePoint the character (Unicode code point) in question.
2988          * @return  The <code>UnicodeBlock</code> instance representing the
2989          *          Unicode block of which this character is a member, or
2990          *          <code>null</code> if the character is not a member of any
2991          *          Unicode block
2992          * @exception IllegalArgumentException if the specified
2993          * <code>codePoint</code> is an invalid Unicode code point.
2994          * @see Character#isValidCodePoint(int)
2995          * @since   1.5
2996          */
2997         public static UnicodeBlock of(int codePoint) {
2998             if (!isValidCodePoint(codePoint)) {
2999                 throw new IllegalArgumentException();
3000             }
3001 
3002             int top, bottom, current;
3003             bottom = 0;
3004             top = blockStarts.length;
3005             current = top/2;
3006 
3007             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3008             while (top - bottom > 1) {
3009                 if (codePoint >= blockStarts[current]) {
3010                     bottom = current;
3011                 } else {
3012                     top = current;
3013                 }
3014                 current = (top + bottom) / 2;
3015             }
3016             return blocks[current];
3017         }
3018 
3019         /**
3020          * Returns the UnicodeBlock with the given name. Block
3021          * names are determined by The Unicode Standard. The file
3022          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3023          * version of the standard. The {@link Character} class specifies
3024          * the version of the standard that it supports.
3025          * <p>
3026          * This method accepts block names in the following forms:
3027          * <ol>
3028          * <li> Canonical block names as defined by the Unicode Standard.
3029          * For example, the standard defines a "Basic Latin" block. Therefore, this
3030          * method accepts "Basic Latin" as a valid block name. The documentation of
3031          * each UnicodeBlock provides the canonical name.
3032          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3033          * is a valid block name for the "Basic Latin" block.
3034          * <li>The text representation of each constant UnicodeBlock identifier.
3035          * For example, this method will return the {@link #BASIC_LATIN} block if
3036          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3037          * hyphens in the canonical name with underscores.
3038          * </ol>
3039          * Finally, character case is ignored for all of the valid block name forms.
3040          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3041          * The en_US locale's case mapping rules are used to provide case-insensitive
3042          * string comparisons for block name validation.
3043          * <p>
3044          * If the Unicode Standard changes block names, both the previous and
3045          * current names will be accepted.
3046          *
3047          * @param blockName A <code>UnicodeBlock</code> name.
3048          * @return The <code>UnicodeBlock</code> instance identified
3049          *         by <code>blockName</code>
3050          * @throws IllegalArgumentException if <code>blockName</code> is an
3051          *         invalid name
3052          * @throws NullPointerException if <code>blockName</code> is null
3053          * @since 1.5
3054          */
3055         public static final UnicodeBlock forName(String blockName) {
3056             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3057             if (block == null) {
3058                 throw new IllegalArgumentException();
3059             }
3060             return block;
3061         }
3062     }
3063 
3064 
3065     /**
3066      * A family of character subsets representing the character scripts
3067      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3068      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3069      * character is assigned to a single Unicode script, either a specific
3070      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3071      * one of the following three special values,
3072      * {@link Character.UnicodeScript#INHERITED Inherited},
3073      * {@link Character.UnicodeScript#COMMON Common} or
3074      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3075      *
3076      * @since 1.7
3077      */
3078     public static enum UnicodeScript {
3079         /**
3080          * Unicode script "Common".
3081          */
3082         COMMON,
3083 
3084         /**
3085          * Unicode script "Latin".
3086          */
3087         LATIN,
3088 
3089         /**
3090          * Unicode script "Greek".
3091          */
3092         GREEK,
3093 
3094         /**
3095          * Unicode script "Cyrillic".
3096          */
3097         CYRILLIC,
3098 
3099         /**
3100          * Unicode script "Armenian".
3101          */
3102         ARMENIAN,
3103 
3104         /**
3105          * Unicode script "Hebrew".
3106          */
3107         HEBREW,
3108 
3109         /**
3110          * Unicode script "Arabic".
3111          */
3112         ARABIC,
3113 
3114         /**
3115          * Unicode script "Syriac".
3116          */
3117         SYRIAC,
3118 
3119         /**
3120          * Unicode script "Thaana".
3121          */
3122         THAANA,
3123 
3124         /**
3125          * Unicode script "Devanagari".
3126          */
3127         DEVANAGARI,
3128 
3129         /**
3130          * Unicode script "Bengali".
3131          */
3132         BENGALI,
3133 
3134         /**
3135          * Unicode script "Gurmukhi".
3136          */
3137         GURMUKHI,
3138 
3139         /**
3140          * Unicode script "Gujarati".
3141          */
3142         GUJARATI,
3143 
3144         /**
3145          * Unicode script "Oriya".
3146          */
3147         ORIYA,
3148 
3149         /**
3150          * Unicode script "Tamil".
3151          */
3152         TAMIL,
3153 
3154         /**
3155          * Unicode script "Telugu".
3156          */
3157         TELUGU,
3158 
3159         /**
3160          * Unicode script "Kannada".
3161          */
3162         KANNADA,
3163 
3164         /**
3165          * Unicode script "Malayalam".
3166          */
3167         MALAYALAM,
3168 
3169         /**
3170          * Unicode script "Sinhala".
3171          */
3172         SINHALA,
3173 
3174         /**
3175          * Unicode script "Thai".
3176          */
3177         THAI,
3178 
3179         /**
3180          * Unicode script "Lao".
3181          */
3182         LAO,
3183 
3184         /**
3185          * Unicode script "Tibetan".
3186          */
3187         TIBETAN,
3188 
3189         /**
3190          * Unicode script "Myanmar".
3191          */
3192         MYANMAR,
3193 
3194         /**
3195          * Unicode script "Georgian".
3196          */
3197         GEORGIAN,
3198 
3199         /**
3200          * Unicode script "Hangul".
3201          */
3202         HANGUL,
3203 
3204         /**
3205          * Unicode script "Ethiopic".
3206          */
3207         ETHIOPIC,
3208 
3209         /**
3210          * Unicode script "Cherokee".
3211          */
3212         CHEROKEE,
3213 
3214         /**
3215          * Unicode script "Canadian_Aboriginal".
3216          */
3217         CANADIAN_ABORIGINAL,
3218 
3219         /**
3220          * Unicode script "Ogham".
3221          */
3222         OGHAM,
3223 
3224         /**
3225          * Unicode script "Runic".
3226          */
3227         RUNIC,
3228 
3229         /**
3230          * Unicode script "Khmer".
3231          */
3232         KHMER,
3233 
3234         /**
3235          * Unicode script "Mongolian".
3236          */
3237         MONGOLIAN,
3238 
3239         /**
3240          * Unicode script "Hiragana".
3241          */
3242         HIRAGANA,
3243 
3244         /**
3245          * Unicode script "Katakana".
3246          */
3247         KATAKANA,
3248 
3249         /**
3250          * Unicode script "Bopomofo".
3251          */
3252         BOPOMOFO,
3253 
3254         /**
3255          * Unicode script "Han".
3256          */
3257         HAN,
3258 
3259         /**
3260          * Unicode script "Yi".
3261          */
3262         YI,
3263 
3264         /**
3265          * Unicode script "Old_Italic".
3266          */
3267         OLD_ITALIC,
3268 
3269         /**
3270          * Unicode script "Gothic".
3271          */
3272         GOTHIC,
3273 
3274         /**
3275          * Unicode script "Deseret".
3276          */
3277         DESERET,
3278 
3279         /**
3280          * Unicode script "Inherited".
3281          */
3282         INHERITED,
3283 
3284         /**
3285          * Unicode script "Tagalog".
3286          */
3287         TAGALOG,
3288 
3289         /**
3290          * Unicode script "Hanunoo".
3291          */
3292         HANUNOO,
3293 
3294         /**
3295          * Unicode script "Buhid".
3296          */
3297         BUHID,
3298 
3299         /**
3300          * Unicode script "Tagbanwa".
3301          */
3302         TAGBANWA,
3303 
3304         /**
3305          * Unicode script "Limbu".
3306          */
3307         LIMBU,
3308 
3309         /**
3310          * Unicode script "Tai_Le".
3311          */
3312         TAI_LE,
3313 
3314         /**
3315          * Unicode script "Linear_B".
3316          */
3317         LINEAR_B,
3318 
3319         /**
3320          * Unicode script "Ugaritic".
3321          */
3322         UGARITIC,
3323 
3324         /**
3325          * Unicode script "Shavian".
3326          */
3327         SHAVIAN,
3328 
3329         /**
3330          * Unicode script "Osmanya".
3331          */
3332         OSMANYA,
3333 
3334         /**
3335          * Unicode script "Cypriot".
3336          */
3337         CYPRIOT,
3338 
3339         /**
3340          * Unicode script "Braille".
3341          */
3342         BRAILLE,
3343 
3344         /**
3345          * Unicode script "Buginese".
3346          */
3347         BUGINESE,
3348 
3349         /**
3350          * Unicode script "Coptic".
3351          */
3352         COPTIC,
3353 
3354         /**
3355          * Unicode script "New_Tai_Lue".
3356          */
3357         NEW_TAI_LUE,
3358 
3359         /**
3360          * Unicode script "Glagolitic".
3361          */
3362         GLAGOLITIC,
3363 
3364         /**
3365          * Unicode script "Tifinagh".
3366          */
3367         TIFINAGH,
3368 
3369         /**
3370          * Unicode script "Syloti_Nagri".
3371          */
3372         SYLOTI_NAGRI,
3373 
3374         /**
3375          * Unicode script "Old_Persian".
3376          */
3377         OLD_PERSIAN,
3378 
3379         /**
3380          * Unicode script "Kharoshthi".
3381          */
3382         KHAROSHTHI,
3383 
3384         /**
3385          * Unicode script "Balinese".
3386          */
3387         BALINESE,
3388 
3389         /**
3390          * Unicode script "Cuneiform".
3391          */
3392         CUNEIFORM,
3393 
3394         /**
3395          * Unicode script "Phoenician".
3396          */
3397         PHOENICIAN,
3398 
3399         /**
3400          * Unicode script "Phags_Pa".
3401          */
3402         PHAGS_PA,
3403 
3404         /**
3405          * Unicode script "Nko".
3406          */
3407         NKO,
3408 
3409         /**
3410          * Unicode script "Sundanese".
3411          */
3412         SUNDANESE,
3413 
3414         /**
3415          * Unicode script "Batak".
3416          */
3417         BATAK,
3418 
3419         /**
3420          * Unicode script "Lepcha".
3421          */
3422         LEPCHA,
3423 
3424         /**
3425          * Unicode script "Ol_Chiki".
3426          */
3427         OL_CHIKI,
3428 
3429         /**
3430          * Unicode script "Vai".
3431          */
3432         VAI,
3433 
3434         /**
3435          * Unicode script "Saurashtra".
3436          */
3437         SAURASHTRA,
3438 
3439         /**
3440          * Unicode script "Kayah_Li".
3441          */
3442         KAYAH_LI,
3443 
3444         /**
3445          * Unicode script "Rejang".
3446          */
3447         REJANG,
3448 
3449         /**
3450          * Unicode script "Lycian".
3451          */
3452         LYCIAN,
3453 
3454         /**
3455          * Unicode script "Carian".
3456          */
3457         CARIAN,
3458 
3459         /**
3460          * Unicode script "Lydian".
3461          */
3462         LYDIAN,
3463 
3464         /**
3465          * Unicode script "Cham".
3466          */
3467         CHAM,
3468 
3469         /**
3470          * Unicode script "Tai_Tham".
3471          */
3472         TAI_THAM,
3473 
3474         /**
3475          * Unicode script "Tai_Viet".
3476          */
3477         TAI_VIET,
3478 
3479         /**
3480          * Unicode script "Avestan".
3481          */
3482         AVESTAN,
3483 
3484         /**
3485          * Unicode script "Egyptian_Hieroglyphs".
3486          */
3487         EGYPTIAN_HIEROGLYPHS,
3488 
3489         /**
3490          * Unicode script "Samaritan".
3491          */
3492         SAMARITAN,
3493 
3494         /**
3495          * Unicode script "Mandaic".
3496          */
3497         MANDAIC,
3498 
3499         /**
3500          * Unicode script "Lisu".
3501          */
3502         LISU,
3503 
3504         /**
3505          * Unicode script "Bamum".
3506          */
3507         BAMUM,
3508 
3509         /**
3510          * Unicode script "Javanese".
3511          */
3512         JAVANESE,
3513 
3514         /**
3515          * Unicode script "Meetei_Mayek".
3516          */
3517         MEETEI_MAYEK,
3518 
3519         /**
3520          * Unicode script "Imperial_Aramaic".
3521          */
3522         IMPERIAL_ARAMAIC,
3523 
3524         /**
3525          * Unicode script "Old_South_Arabian".
3526          */
3527         OLD_SOUTH_ARABIAN,
3528 
3529         /**
3530          * Unicode script "Inscriptional_Parthian".
3531          */
3532         INSCRIPTIONAL_PARTHIAN,
3533 
3534         /**
3535          * Unicode script "Inscriptional_Pahlavi".
3536          */
3537         INSCRIPTIONAL_PAHLAVI,
3538 
3539         /**
3540          * Unicode script "Old_Turkic".
3541          */
3542         OLD_TURKIC,
3543 
3544         /**
3545          * Unicode script "Brahmi".
3546          */
3547         BRAHMI,
3548 
3549         /**
3550          * Unicode script "Kaithi".
3551          */
3552         KAITHI,
3553 
3554         /**
3555          * Unicode script "Unknown".
3556          */
3557         UNKNOWN;
3558 
3559         private static final int[] scriptStarts = {
3560             0x0000,   // 0000..0040; COMMON
3561             0x0041,   // 0041..005A; LATIN
3562             0x005B,   // 005B..0060; COMMON
3563             0x0061,   // 0061..007A; LATIN
3564             0x007B,   // 007B..00A9; COMMON
3565             0x00AA,   // 00AA..00AA; LATIN
3566             0x00AB,   // 00AB..00B9; COMMON
3567             0x00BA,   // 00BA..00BA; LATIN
3568             0x00BB,   // 00BB..00BF; COMMON
3569             0x00C0,   // 00C0..00D6; LATIN
3570             0x00D7,   // 00D7..00D7; COMMON
3571             0x00D8,   // 00D8..00F6; LATIN
3572             0x00F7,   // 00F7..00F7; COMMON
3573             0x00F8,   // 00F8..02B8; LATIN
3574             0x02B9,   // 02B9..02DF; COMMON
3575             0x02E0,   // 02E0..02E4; LATIN
3576             0x02E5,   // 02E5..02E9; COMMON
3577             0x02EA,   // 02EA..02EB; BOPOMOFO
3578             0x02EC,   // 02EC..02FF; COMMON
3579             0x0300,   // 0300..036F; INHERITED
3580             0x0370,   // 0370..0373; GREEK
3581             0x0374,   // 0374..0374; COMMON
3582             0x0375,   // 0375..037D; GREEK
3583             0x037E,   // 037E..0383; COMMON
3584             0x0384,   // 0384..0384; GREEK
3585             0x0385,   // 0385..0385; COMMON
3586             0x0386,   // 0386..0386; GREEK
3587             0x0387,   // 0387..0387; COMMON
3588             0x0388,   // 0388..03E1; GREEK
3589             0x03E2,   // 03E2..03EF; COPTIC
3590             0x03F0,   // 03F0..03FF; GREEK
3591             0x0400,   // 0400..0484; CYRILLIC
3592             0x0485,   // 0485..0486; INHERITED
3593             0x0487,   // 0487..0530; CYRILLIC
3594             0x0531,   // 0531..0588; ARMENIAN
3595             0x0589,   // 0589..0589; COMMON
3596             0x058A,   // 058A..0590; ARMENIAN
3597             0x0591,   // 0591..05FF; HEBREW
3598             0x0600,   // 0600..060B; ARABIC
3599             0x060C,   // 060C..060C; COMMON
3600             0x060D,   // 060D..061A; ARABIC
3601             0x061B,   // 061B..061D; COMMON
3602             0x061E,   // 061E..061E; ARABIC
3603             0x061F,   // 061F..061F; COMMON
3604             0x0620,   // 0620..063F; ARABIC
3605             0x0640,   // 0640..0640; COMMON
3606             0x0641,   // 0641..064A; ARABIC
3607             0x064B,   // 064B..0655; INHERITED
3608             0x0656,   // 0656..065E; ARABIC
3609             0x065F,   // 065F..065F; INHERITED
3610             0x0660,   // 0660..0669; COMMON
3611             0x066A,   // 066A..066F; ARABIC
3612             0x0670,   // 0670..0670; INHERITED
3613             0x0671,   // 0671..06DC; ARABIC
3614             0x06DD,   // 06DD..06DD; COMMON
3615             0x06DE,   // 06DE..06FF; ARABIC
3616             0x0700,   // 0700..074F; SYRIAC
3617             0x0750,   // 0750..077F; ARABIC
3618             0x0780,   // 0780..07BF; THAANA
3619             0x07C0,   // 07C0..07FF; NKO
3620             0x0800,   // 0800..083F; SAMARITAN
3621             0x0840,   // 0840..08FF; MANDAIC
3622             0x0900,   // 0900..0950; DEVANAGARI
3623             0x0951,   // 0951..0952; INHERITED
3624             0x0953,   // 0953..0963; DEVANAGARI
3625             0x0964,   // 0964..0965; COMMON
3626             0x0966,   // 0966..096F; DEVANAGARI
3627             0x0970,   // 0970..0970; COMMON
3628             0x0971,   // 0971..0980; DEVANAGARI
3629             0x0981,   // 0981..0A00; BENGALI
3630             0x0A01,   // 0A01..0A80; GURMUKHI
3631             0x0A81,   // 0A81..0B00; GUJARATI
3632             0x0B01,   // 0B01..0B81; ORIYA
3633             0x0B82,   // 0B82..0C00; TAMIL
3634             0x0C01,   // 0C01..0C81; TELUGU
3635             0x0C82,   // 0C82..0CF0; KANNADA
3636             0x0D02,   // 0D02..0D81; MALAYALAM
3637             0x0D82,   // 0D82..0E00; SINHALA
3638             0x0E01,   // 0E01..0E3E; THAI
3639             0x0E3F,   // 0E3F..0E3F; COMMON
3640             0x0E40,   // 0E40..0E80; THAI
3641             0x0E81,   // 0E81..0EFF; LAO
3642             0x0F00,   // 0F00..0FD4; TIBETAN
3643             0x0FD5,   // 0FD5..0FD8; COMMON
3644             0x0FD9,   // 0FD9..0FFF; TIBETAN
3645             0x1000,   // 1000..109F; MYANMAR
3646             0x10A0,   // 10A0..10FA; GEORGIAN
3647             0x10FB,   // 10FB..10FB; COMMON
3648             0x10FC,   // 10FC..10FF; GEORGIAN
3649             0x1100,   // 1100..11FF; HANGUL
3650             0x1200,   // 1200..139F; ETHIOPIC
3651             0x13A0,   // 13A0..13FF; CHEROKEE
3652             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3653             0x1680,   // 1680..169F; OGHAM
3654             0x16A0,   // 16A0..16EA; RUNIC
3655             0x16EB,   // 16EB..16ED; COMMON
3656             0x16EE,   // 16EE..16FF; RUNIC
3657             0x1700,   // 1700..171F; TAGALOG
3658             0x1720,   // 1720..1734; HANUNOO
3659             0x1735,   // 1735..173F; COMMON
3660             0x1740,   // 1740..175F; BUHID
3661             0x1760,   // 1760..177F; TAGBANWA
3662             0x1780,   // 1780..17FF; KHMER
3663             0x1800,   // 1800..1801; MONGOLIAN
3664             0x1802,   // 1802..1803; COMMON
3665             0x1804,   // 1804..1804; MONGOLIAN
3666             0x1805,   // 1805..1805; COMMON
3667             0x1806,   // 1806..18AF; MONGOLIAN
3668             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3669             0x1900,   // 1900..194F; LIMBU
3670             0x1950,   // 1950..197F; TAI_LE
3671             0x1980,   // 1980..19DF; NEW_TAI_LUE
3672             0x19E0,   // 19E0..19FF; KHMER
3673             0x1A00,   // 1A00..1A1F; BUGINESE
3674             0x1A20,   // 1A20..1AFF; TAI_THAM
3675             0x1B00,   // 1B00..1B7F; BALINESE
3676             0x1B80,   // 1B80..1BBF; SUNDANESE
3677             0x1BC0,   // 1BC0..1BFF; BATAK
3678             0x1C00,   // 1C00..1C4F; LEPCHA
3679             0x1C50,   // 1C50..1CCF; OL_CHIKI
3680             0x1CD0,   // 1CD0..1CD2; INHERITED
3681             0x1CD3,   // 1CD3..1CD3; COMMON
3682             0x1CD4,   // 1CD4..1CE0; INHERITED
3683             0x1CE1,   // 1CE1..1CE1; COMMON
3684             0x1CE2,   // 1CE2..1CE8; INHERITED
3685             0x1CE9,   // 1CE9..1CEC; COMMON
3686             0x1CED,   // 1CED..1CED; INHERITED
3687             0x1CEE,   // 1CEE..1CFF; COMMON
3688             0x1D00,   // 1D00..1D25; LATIN
3689             0x1D26,   // 1D26..1D2A; GREEK
3690             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3691             0x1D2C,   // 1D2C..1D5C; LATIN
3692             0x1D5D,   // 1D5D..1D61; GREEK
3693             0x1D62,   // 1D62..1D65; LATIN
3694             0x1D66,   // 1D66..1D6A; GREEK
3695             0x1D6B,   // 1D6B..1D77; LATIN
3696             0x1D78,   // 1D78..1D78; CYRILLIC
3697             0x1D79,   // 1D79..1DBE; LATIN
3698             0x1DBF,   // 1DBF..1DBF; GREEK
3699             0x1DC0,   // 1DC0..1DFF; INHERITED
3700             0x1E00,   // 1E00..1EFF; LATIN
3701             0x1F00,   // 1F00..1FFF; GREEK
3702             0x2000,   // 2000..200B; COMMON
3703             0x200C,   // 200C..200D; INHERITED
3704             0x200E,   // 200E..2070; COMMON
3705             0x2071,   // 2071..2073; LATIN
3706             0x2074,   // 2074..207E; COMMON
3707             0x207F,   // 207F..207F; LATIN
3708             0x2080,   // 2080..208F; COMMON
3709             0x2090,   // 2090..209F; LATIN
3710             0x20A0,   // 20A0..20CF; COMMON
3711             0x20D0,   // 20D0..20FF; INHERITED
3712             0x2100,   // 2100..2125; COMMON
3713             0x2126,   // 2126..2126; GREEK
3714             0x2127,   // 2127..2129; COMMON
3715             0x212A,   // 212A..212B; LATIN
3716             0x212C,   // 212C..2131; COMMON
3717             0x2132,   // 2132..2132; LATIN
3718             0x2133,   // 2133..214D; COMMON
3719             0x214E,   // 214E..214E; LATIN
3720             0x214F,   // 214F..215F; COMMON
3721             0x2160,   // 2160..2188; LATIN
3722             0x2189,   // 2189..27FF; COMMON
3723             0x2800,   // 2800..28FF; BRAILLE
3724             0x2900,   // 2900..2BFF; COMMON
3725             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3726             0x2C60,   // 2C60..2C7F; LATIN
3727             0x2C80,   // 2C80..2CFF; COPTIC
3728             0x2D00,   // 2D00..2D2F; GEORGIAN
3729             0x2D30,   // 2D30..2D7F; TIFINAGH
3730             0x2D80,   // 2D80..2DDF; ETHIOPIC
3731             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3732             0x2E00,   // 2E00..2E7F; COMMON
3733             0x2E80,   // 2E80..2FEF; HAN
3734             0x2FF0,   // 2FF0..3004; COMMON
3735             0x3005,   // 3005..3005; HAN
3736             0x3006,   // 3006..3006; COMMON
3737             0x3007,   // 3007..3007; HAN
3738             0x3008,   // 3008..3020; COMMON
3739             0x3021,   // 3021..3029; HAN
3740             0x302A,   // 302A..302D; INHERITED
3741             0x302E,   // 302E..302F; HANGUL
3742             0x3030,   // 3030..3037; COMMON
3743             0x3038,   // 3038..303B; HAN
3744             0x303C,   // 303C..3040; COMMON
3745             0x3041,   // 3041..3098; HIRAGANA
3746             0x3099,   // 3099..309A; INHERITED
3747             0x309B,   // 309B..309C; COMMON
3748             0x309D,   // 309D..309F; HIRAGANA
3749             0x30A0,   // 30A0..30A0; COMMON
3750             0x30A1,   // 30A1..30FA; KATAKANA
3751             0x30FB,   // 30FB..30FC; COMMON
3752             0x30FD,   // 30FD..3104; KATAKANA
3753             0x3105,   // 3105..3130; BOPOMOFO
3754             0x3131,   // 3131..318F; HANGUL
3755             0x3190,   // 3190..319F; COMMON
3756             0x31A0,   // 31A0..31BF; BOPOMOFO
3757             0x31C0,   // 31C0..31EF; COMMON
3758             0x31F0,   // 31F0..31FF; KATAKANA
3759             0x3200,   // 3200..321F; HANGUL
3760             0x3220,   // 3220..325F; COMMON
3761             0x3260,   // 3260..327E; HANGUL
3762             0x327F,   // 327F..32CF; COMMON
3763             0x32D0,   // 32D0..3357; KATAKANA
3764             0x3358,   // 3358..33FF; COMMON
3765             0x3400,   // 3400..4DBF; HAN
3766             0x4DC0,   // 4DC0..4DFF; COMMON
3767             0x4E00,   // 4E00..9FFF; HAN
3768             0xA000,   // A000..A4CF; YI
3769             0xA4D0,   // A4D0..A4FF; LISU
3770             0xA500,   // A500..A63F; VAI
3771             0xA640,   // A640..A69F; CYRILLIC
3772             0xA6A0,   // A6A0..A6FF; BAMUM
3773             0xA700,   // A700..A721; COMMON
3774             0xA722,   // A722..A787; LATIN
3775             0xA788,   // A788..A78A; COMMON
3776             0xA78B,   // A78B..A7FF; LATIN
3777             0xA800,   // A800..A82F; SYLOTI_NAGRI
3778             0xA830,   // A830..A83F; COMMON
3779             0xA840,   // A840..A87F; PHAGS_PA
3780             0xA880,   // A880..A8DF; SAURASHTRA
3781             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3782             0xA900,   // A900..A92F; KAYAH_LI
3783             0xA930,   // A930..A95F; REJANG
3784             0xA960,   // A960..A97F; HANGUL
3785             0xA980,   // A980..A9FF; JAVANESE
3786             0xAA00,   // AA00..AA5F; CHAM
3787             0xAA60,   // AA60..AA7F; MYANMAR
3788             0xAA80,   // AA80..AB00; TAI_VIET
3789             0xAB01,   // AB01..ABBF; ETHIOPIC
3790             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3791             0xAC00,   // AC00..D7FB; HANGUL
3792             0xD7FC,   // D7FC..F8FF; UNKNOWN
3793             0xF900,   // F900..FAFF; HAN
3794             0xFB00,   // FB00..FB12; LATIN
3795             0xFB13,   // FB13..FB1C; ARMENIAN
3796             0xFB1D,   // FB1D..FB4F; HEBREW
3797             0xFB50,   // FB50..FD3D; ARABIC
3798             0xFD3E,   // FD3E..FD4F; COMMON
3799             0xFD50,   // FD50..FDFC; ARABIC
3800             0xFDFD,   // FDFD..FDFF; COMMON
3801             0xFE00,   // FE00..FE0F; INHERITED
3802             0xFE10,   // FE10..FE1F; COMMON
3803             0xFE20,   // FE20..FE2F; INHERITED
3804             0xFE30,   // FE30..FE6F; COMMON
3805             0xFE70,   // FE70..FEFE; ARABIC
3806             0xFEFF,   // FEFF..FF20; COMMON
3807             0xFF21,   // FF21..FF3A; LATIN
3808             0xFF3B,   // FF3B..FF40; COMMON
3809             0xFF41,   // FF41..FF5A; LATIN
3810             0xFF5B,   // FF5B..FF65; COMMON
3811             0xFF66,   // FF66..FF6F; KATAKANA
3812             0xFF70,   // FF70..FF70; COMMON
3813             0xFF71,   // FF71..FF9D; KATAKANA
3814             0xFF9E,   // FF9E..FF9F; COMMON
3815             0xFFA0,   // FFA0..FFDF; HANGUL
3816             0xFFE0,   // FFE0..FFFF; COMMON
3817             0x10000,  // 10000..100FF; LINEAR_B
3818             0x10100,  // 10100..1013F; COMMON
3819             0x10140,  // 10140..1018F; GREEK
3820             0x10190,  // 10190..101FC; COMMON
3821             0x101FD,  // 101FD..1027F; INHERITED
3822             0x10280,  // 10280..1029F; LYCIAN
3823             0x102A0,  // 102A0..102FF; CARIAN
3824             0x10300,  // 10300..1032F; OLD_ITALIC
3825             0x10330,  // 10330..1037F; GOTHIC
3826             0x10380,  // 10380..1039F; UGARITIC
3827             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3828             0x10400,  // 10400..1044F; DESERET
3829             0x10450,  // 10450..1047F; SHAVIAN
3830             0x10480,  // 10480..107FF; OSMANYA
3831             0x10800,  // 10800..1083F; CYPRIOT
3832             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3833             0x10900,  // 10900..1091F; PHOENICIAN
3834             0x10920,  // 10920..109FF; LYDIAN
3835             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3836             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3837             0x10B00,  // 10B00..10B3F; AVESTAN
3838             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3839             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3840             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3841             0x10E60,  // 10E60..10FFF; ARABIC
3842             0x11000,  // 11000..1107F; BRAHMI
3843             0x11080,  // 11080..11FFF; KAITHI
3844             0x12000,  // 12000..12FFF; CUNEIFORM
3845             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3846             0x16800,  // 16800..16A38; BAMUM
3847             0x1B000,  // 1B000..1B000; KATAKANA
3848             0x1B001,  // 1B001..1CFFF; HIRAGANA
3849             0x1D000,  // 1D000..1D166; COMMON
3850             0x1D167,  // 1D167..1D169; INHERITED
3851             0x1D16A,  // 1D16A..1D17A; COMMON
3852             0x1D17B,  // 1D17B..1D182; INHERITED
3853             0x1D183,  // 1D183..1D184; COMMON
3854             0x1D185,  // 1D185..1D18B; INHERITED
3855             0x1D18C,  // 1D18C..1D1A9; COMMON
3856             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3857             0x1D1AE,  // 1D1AE..1D1FF; COMMON
3858             0x1D200,  // 1D200..1D2FF; GREEK
3859             0x1D300,  // 1D300..1F1FF; COMMON
3860             0x1F200,  // 1F200..1F200; HIRAGANA
3861             0x1F201,  // 1F210..1FFFF; COMMON
3862             0x20000,  // 20000..E0000; HAN
3863             0xE0001,  // E0001..E00FF; COMMON
3864             0xE0100,  // E0100..E01EF; INHERITED
3865             0xE01F0   // E01F0..10FFFF; UNKNOWN
3866 
3867         };
3868 
3869         private static final UnicodeScript[] scripts = {
3870             COMMON,
3871             LATIN,
3872             COMMON,
3873             LATIN,
3874             COMMON,
3875             LATIN,
3876             COMMON,
3877             LATIN,
3878             COMMON,
3879             LATIN,
3880             COMMON,
3881             LATIN,
3882             COMMON,
3883             LATIN,
3884             COMMON,
3885             LATIN,
3886             COMMON,
3887             BOPOMOFO,
3888             COMMON,
3889             INHERITED,
3890             GREEK,
3891             COMMON,
3892             GREEK,
3893             COMMON,
3894             GREEK,
3895             COMMON,
3896             GREEK,
3897             COMMON,
3898             GREEK,
3899             COPTIC,
3900             GREEK,
3901             CYRILLIC,
3902             INHERITED,
3903             CYRILLIC,
3904             ARMENIAN,
3905             COMMON,
3906             ARMENIAN,
3907             HEBREW,
3908             ARABIC,
3909             COMMON,
3910             ARABIC,
3911             COMMON,
3912             ARABIC,
3913             COMMON,
3914             ARABIC,
3915             COMMON,
3916             ARABIC,
3917             INHERITED,
3918             ARABIC,
3919             INHERITED,
3920             COMMON,
3921             ARABIC,
3922             INHERITED,
3923             ARABIC,
3924             COMMON,
3925             ARABIC,
3926             SYRIAC,
3927             ARABIC,
3928             THAANA,
3929             NKO,
3930             SAMARITAN,
3931             MANDAIC,
3932             DEVANAGARI,
3933             INHERITED,
3934             DEVANAGARI,
3935             COMMON,
3936             DEVANAGARI,
3937             COMMON,
3938             DEVANAGARI,
3939             BENGALI,
3940             GURMUKHI,
3941             GUJARATI,
3942             ORIYA,
3943             TAMIL,
3944             TELUGU,
3945             KANNADA,
3946             MALAYALAM,
3947             SINHALA,
3948             THAI,
3949             COMMON,
3950             THAI,
3951             LAO,
3952             TIBETAN,
3953             COMMON,
3954             TIBETAN,
3955             MYANMAR,
3956             GEORGIAN,
3957             COMMON,
3958             GEORGIAN,
3959             HANGUL,
3960             ETHIOPIC,
3961             CHEROKEE,
3962             CANADIAN_ABORIGINAL,
3963             OGHAM,
3964             RUNIC,
3965             COMMON,
3966             RUNIC,
3967             TAGALOG,
3968             HANUNOO,
3969             COMMON,
3970             BUHID,
3971             TAGBANWA,
3972             KHMER,
3973             MONGOLIAN,
3974             COMMON,
3975             MONGOLIAN,
3976             COMMON,
3977             MONGOLIAN,
3978             CANADIAN_ABORIGINAL,
3979             LIMBU,
3980             TAI_LE,
3981             NEW_TAI_LUE,
3982             KHMER,
3983             BUGINESE,
3984             TAI_THAM,
3985             BALINESE,
3986             SUNDANESE,
3987             BATAK,
3988             LEPCHA,
3989             OL_CHIKI,
3990             INHERITED,
3991             COMMON,
3992             INHERITED,
3993             COMMON,
3994             INHERITED,
3995             COMMON,
3996             INHERITED,
3997             COMMON,
3998             LATIN,
3999             GREEK,
4000             CYRILLIC,
4001             LATIN,
4002             GREEK,
4003             LATIN,
4004             GREEK,
4005             LATIN,
4006             CYRILLIC,
4007             LATIN,
4008             GREEK,
4009             INHERITED,
4010             LATIN,
4011             GREEK,
4012             COMMON,
4013             INHERITED,
4014             COMMON,
4015             LATIN,
4016             COMMON,
4017             LATIN,
4018             COMMON,
4019             LATIN,
4020             COMMON,
4021             INHERITED,
4022             COMMON,
4023             GREEK,
4024             COMMON,
4025             LATIN,
4026             COMMON,
4027             LATIN,
4028             COMMON,
4029             LATIN,
4030             COMMON,
4031             LATIN,
4032             COMMON,
4033             BRAILLE,
4034             COMMON,
4035             GLAGOLITIC,
4036             LATIN,
4037             COPTIC,
4038             GEORGIAN,
4039             TIFINAGH,
4040             ETHIOPIC,
4041             CYRILLIC,
4042             COMMON,
4043             HAN,
4044             COMMON,
4045             HAN,
4046             COMMON,
4047             HAN,
4048             COMMON,
4049             HAN,
4050             INHERITED,
4051             HANGUL,
4052             COMMON,
4053             HAN,
4054             COMMON,
4055             HIRAGANA,
4056             INHERITED,
4057             COMMON,
4058             HIRAGANA,
4059             COMMON,
4060             KATAKANA,
4061             COMMON,
4062             KATAKANA,
4063             BOPOMOFO,
4064             HANGUL,
4065             COMMON,
4066             BOPOMOFO,
4067             COMMON,
4068             KATAKANA,
4069             HANGUL,
4070             COMMON,
4071             HANGUL,
4072             COMMON,
4073             KATAKANA,
4074             COMMON,
4075             HAN,
4076             COMMON,
4077             HAN,
4078             YI,
4079             LISU,
4080             VAI,
4081             CYRILLIC,
4082             BAMUM,
4083             COMMON,
4084             LATIN,
4085             COMMON,
4086             LATIN,
4087             SYLOTI_NAGRI,
4088             COMMON,
4089             PHAGS_PA,
4090             SAURASHTRA,
4091             DEVANAGARI,
4092             KAYAH_LI,
4093             REJANG,
4094             HANGUL,
4095             JAVANESE,
4096             CHAM,
4097             MYANMAR,
4098             TAI_VIET,
4099             ETHIOPIC,
4100             MEETEI_MAYEK,
4101             HANGUL,
4102             UNKNOWN,
4103             HAN,
4104             LATIN,
4105             ARMENIAN,
4106             HEBREW,
4107             ARABIC,
4108             COMMON,
4109             ARABIC,
4110             COMMON,
4111             INHERITED,
4112             COMMON,
4113             INHERITED,
4114             COMMON,
4115             ARABIC,
4116             COMMON,
4117             LATIN,
4118             COMMON,
4119             LATIN,
4120             COMMON,
4121             KATAKANA,
4122             COMMON,
4123             KATAKANA,
4124             COMMON,
4125             HANGUL,
4126             COMMON,
4127             LINEAR_B,
4128             COMMON,
4129             GREEK,
4130             COMMON,
4131             INHERITED,
4132             LYCIAN,
4133             CARIAN,
4134             OLD_ITALIC,
4135             GOTHIC,
4136             UGARITIC,
4137             OLD_PERSIAN,
4138             DESERET,
4139             SHAVIAN,
4140             OSMANYA,
4141             CYPRIOT,
4142             IMPERIAL_ARAMAIC,
4143             PHOENICIAN,
4144             LYDIAN,
4145             KHAROSHTHI,
4146             OLD_SOUTH_ARABIAN,
4147             AVESTAN,
4148             INSCRIPTIONAL_PARTHIAN,
4149             INSCRIPTIONAL_PAHLAVI,
4150             OLD_TURKIC,
4151             ARABIC,
4152             BRAHMI,
4153             KAITHI,
4154             CUNEIFORM,
4155             EGYPTIAN_HIEROGLYPHS,
4156             BAMUM,
4157             KATAKANA,
4158             HIRAGANA,
4159             COMMON,
4160             INHERITED,
4161             COMMON,
4162             INHERITED,
4163             COMMON,
4164             INHERITED,
4165             COMMON,
4166             INHERITED,
4167             COMMON,
4168             GREEK,
4169             COMMON,
4170             HIRAGANA,
4171             COMMON,
4172             HAN,
4173             COMMON,
4174             INHERITED,
4175             UNKNOWN
4176         };
4177 
4178         private static HashMap<String, Character.UnicodeScript> aliases;
4179         static {
4180             aliases = new HashMap<>(128);
4181             aliases.put("ARAB", ARABIC);
4182             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4183             aliases.put("ARMN", ARMENIAN);
4184             aliases.put("AVST", AVESTAN);
4185             aliases.put("BALI", BALINESE);
4186             aliases.put("BAMU", BAMUM);
4187             aliases.put("BENG", BENGALI);
4188             aliases.put("BOPO", BOPOMOFO);
4189             aliases.put("BRAI", BRAILLE);
4190             aliases.put("BUGI", BUGINESE);
4191             aliases.put("BUHD", BUHID);
4192             aliases.put("CANS", CANADIAN_ABORIGINAL);
4193             aliases.put("CARI", CARIAN);
4194             aliases.put("CHAM", CHAM);
4195             aliases.put("CHER", CHEROKEE);
4196             aliases.put("COPT", COPTIC);
4197             aliases.put("CPRT", CYPRIOT);
4198             aliases.put("CYRL", CYRILLIC);
4199             aliases.put("DEVA", DEVANAGARI);
4200             aliases.put("DSRT", DESERET);
4201             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4202             aliases.put("ETHI", ETHIOPIC);
4203             aliases.put("GEOR", GEORGIAN);
4204             aliases.put("GLAG", GLAGOLITIC);
4205             aliases.put("GOTH", GOTHIC);
4206             aliases.put("GREK", GREEK);
4207             aliases.put("GUJR", GUJARATI);
4208             aliases.put("GURU", GURMUKHI);
4209             aliases.put("HANG", HANGUL);
4210             aliases.put("HANI", HAN);
4211             aliases.put("HANO", HANUNOO);
4212             aliases.put("HEBR", HEBREW);
4213             aliases.put("HIRA", HIRAGANA);
4214             // it appears we don't have the KATAKANA_OR_HIRAGANA
4215             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4216             aliases.put("ITAL", OLD_ITALIC);
4217             aliases.put("JAVA", JAVANESE);
4218             aliases.put("KALI", KAYAH_LI);
4219             aliases.put("KANA", KATAKANA);
4220             aliases.put("KHAR", KHAROSHTHI);
4221             aliases.put("KHMR", KHMER);
4222             aliases.put("KNDA", KANNADA);
4223             aliases.put("KTHI", KAITHI);
4224             aliases.put("LANA", TAI_THAM);
4225             aliases.put("LAOO", LAO);
4226             aliases.put("LATN", LATIN);
4227             aliases.put("LEPC", LEPCHA);
4228             aliases.put("LIMB", LIMBU);
4229             aliases.put("LINB", LINEAR_B);
4230             aliases.put("LISU", LISU);
4231             aliases.put("LYCI", LYCIAN);
4232             aliases.put("LYDI", LYDIAN);
4233             aliases.put("MLYM", MALAYALAM);
4234             aliases.put("MONG", MONGOLIAN);
4235             aliases.put("MTEI", MEETEI_MAYEK);
4236             aliases.put("MYMR", MYANMAR);
4237             aliases.put("NKOO", NKO);
4238             aliases.put("OGAM", OGHAM);
4239             aliases.put("OLCK", OL_CHIKI);
4240             aliases.put("ORKH", OLD_TURKIC);
4241             aliases.put("ORYA", ORIYA);
4242             aliases.put("OSMA", OSMANYA);
4243             aliases.put("PHAG", PHAGS_PA);
4244             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4245             aliases.put("PHNX", PHOENICIAN);
4246             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4247             aliases.put("RJNG", REJANG);
4248             aliases.put("RUNR", RUNIC);
4249             aliases.put("SAMR", SAMARITAN);
4250             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4251             aliases.put("SAUR", SAURASHTRA);
4252             aliases.put("SHAW", SHAVIAN);
4253             aliases.put("SINH", SINHALA);
4254             aliases.put("SUND", SUNDANESE);
4255             aliases.put("SYLO", SYLOTI_NAGRI);
4256             aliases.put("SYRC", SYRIAC);
4257             aliases.put("TAGB", TAGBANWA);
4258             aliases.put("TALE", TAI_LE);
4259             aliases.put("TALU", NEW_TAI_LUE);
4260             aliases.put("TAML", TAMIL);
4261             aliases.put("TAVT", TAI_VIET);
4262             aliases.put("TELU", TELUGU);
4263             aliases.put("TFNG", TIFINAGH);
4264             aliases.put("TGLG", TAGALOG);
4265             aliases.put("THAA", THAANA);
4266             aliases.put("THAI", THAI);
4267             aliases.put("TIBT", TIBETAN);
4268             aliases.put("UGAR", UGARITIC);
4269             aliases.put("VAII", VAI);
4270             aliases.put("XPEO", OLD_PERSIAN);
4271             aliases.put("XSUX", CUNEIFORM);
4272             aliases.put("YIII", YI);
4273             aliases.put("ZINH", INHERITED);
4274             aliases.put("ZYYY", COMMON);
4275             aliases.put("ZZZZ", UNKNOWN);
4276         }
4277 
4278         /**
4279          * Returns the enum constant representing the Unicode script of which
4280          * the given character (Unicode code point) is assigned to.
4281          *
4282          * @param   codePoint the character (Unicode code point) in question.
4283          * @return  The <code>UnicodeScript</code> constant representing the
4284          *          Unicode script of which this character is assigned to.
4285          *
4286          * @exception IllegalArgumentException if the specified
4287          * <code>codePoint</code> is an invalid Unicode code point.
4288          * @see Character#isValidCodePoint(int)
4289          *
4290          */
4291         public static UnicodeScript of(int codePoint) {
4292             if (!isValidCodePoint(codePoint))
4293                 throw new IllegalArgumentException();
4294             int type = getType(codePoint);
4295             // leave SURROGATE and PRIVATE_USE for table lookup
4296             if (type == UNASSIGNED)
4297                 return UNKNOWN;
4298             int index = Arrays.binarySearch(scriptStarts, codePoint);
4299             if (index < 0)
4300                 index = -index - 2;
4301             return scripts[index];
4302         }
4303 
4304         /**
4305          * Returns the UnicodeScript constant with the given Unicode script
4306          * name or the script name alias. Script names and their aliases are
4307          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4308          * and PropertyValueAliases&lt;version&gt;.txt define script names
4309          * and the script name aliases for a particular version of the
4310          * standard. The {@link Character} class specifies the version of
4311          * the standard that it supports.
4312          * <p>
4313          * Character case is ignored for all of the valid script names.
4314          * The en_US locale's case mapping rules are used to provide
4315          * case-insensitive string comparisons for script name validation.
4316          * <p>
4317          *
4318          * @param scriptName A <code>UnicodeScript</code> name.
4319          * @return The <code>UnicodeScript</code> constant identified
4320          *         by <code>scriptName</code>
4321          * @throws IllegalArgumentException if <code>scriptName</code> is an
4322          *         invalid name
4323          * @throws NullPointerException if <code>scriptName</code> is null
4324          */
4325         public static final UnicodeScript forName(String scriptName) {
4326             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4327                                  //.replace(' ', '_'));
4328             UnicodeScript sc = aliases.get(scriptName);
4329             if (sc != null)
4330                 return sc;
4331             return valueOf(scriptName);
4332         }
4333     }
4334 
4335     /**
4336      * The value of the <code>Character</code>.
4337      *
4338      * @serial
4339      */
4340     private final char value;
4341 
4342     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4343     private static final long serialVersionUID = 3786198910865385080L;
4344 
4345     /**
4346      * Constructs a newly allocated <code>Character</code> object that
4347      * represents the specified <code>char</code> value.
4348      *
4349      * @param  value   the value to be represented by the
4350      *                  <code>Character</code> object.
4351      */
4352     public Character(char value) {
4353         this.value = value;
4354     }
4355 
4356     private static class CharacterCache {
4357         private CharacterCache(){}
4358 
4359         static final Character cache[] = new Character[127 + 1];
4360 
4361         static {
4362             for (int i = 0; i < cache.length; i++)
4363                 cache[i] = new Character((char)i);
4364         }
4365     }
4366 
4367     /**
4368      * Returns a <tt>Character</tt> instance representing the specified
4369      * <tt>char</tt> value.
4370      * If a new <tt>Character</tt> instance is not required, this method
4371      * should generally be used in preference to the constructor
4372      * {@link #Character(char)}, as this method is likely to yield
4373      * significantly better space and time performance by caching
4374      * frequently requested values.
4375      *
4376      * This method will always cache values in the range {@code
4377      * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may
4378      * cache other values outside of this range.
4379      *
4380      * @param  c a char value.
4381      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4382      * @since  1.5
4383      */
4384     public static Character valueOf(char c) {
4385         if (c <= 127) { // must cache
4386             return CharacterCache.cache[(int)c];
4387         }
4388         return new Character(c);
4389     }
4390 
4391     /**
4392      * Returns the value of this <code>Character</code> object.
4393      * @return  the primitive <code>char</code> value represented by
4394      *          this object.
4395      */
4396     public char charValue() {
4397         return value;
4398     }
4399 
4400     /**
4401      * Returns a hash code for this {@code Character}; equal to the result
4402      * of invoking {@code charValue()}.
4403      *
4404      * @return a hash code value for this {@code Character}
4405      */
4406     public int hashCode() {
4407         return (int)value;
4408     }
4409 
4410     /**
4411      * Compares this object against the specified object.
4412      * The result is <code>true</code> if and only if the argument is not
4413      * <code>null</code> and is a <code>Character</code> object that
4414      * represents the same <code>char</code> value as this object.
4415      *
4416      * @param   obj   the object to compare with.
4417      * @return  <code>true</code> if the objects are the same;
4418      *          <code>false</code> otherwise.
4419      */
4420     public boolean equals(Object obj) {
4421         if (obj instanceof Character) {
4422             return value == ((Character)obj).charValue();
4423         }
4424         return false;
4425     }
4426 
4427     /**
4428      * Returns a <code>String</code> object representing this
4429      * <code>Character</code>'s value.  The result is a string of
4430      * length 1 whose sole component is the primitive
4431      * <code>char</code> value represented by this
4432      * <code>Character</code> object.
4433      *
4434      * @return  a string representation of this object.
4435      */
4436     public String toString() {
4437         char buf[] = {value};
4438         return String.valueOf(buf);
4439     }
4440 
4441     /**
4442      * Returns a <code>String</code> object representing the
4443      * specified <code>char</code>.  The result is a string of length
4444      * 1 consisting solely of the specified <code>char</code>.
4445      *
4446      * @param c the <code>char</code> to be converted
4447      * @return the string representation of the specified <code>char</code>
4448      * @since 1.4
4449      */
4450     public static String toString(char c) {
4451         return String.valueOf(c);
4452     }
4453 
4454     /**
4455      * Determines whether the specified code point is a valid
4456      * <a href="http://www.unicode.org/glossary/#code_point">
4457      * Unicode code point value</a>.
4458      *
4459      * @param  codePoint the Unicode code point to be tested
4460      * @return {@code true} if the specified code point value is between
4461      *         {@link #MIN_CODE_POINT} and
4462      *         {@link #MAX_CODE_POINT} inclusive;
4463      *         {@code false} otherwise.
4464      * @since  1.5
4465      */
4466     public static boolean isValidCodePoint(int codePoint) {
4467         // Optimized form of:
4468         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4469         int plane = codePoint >>> 16;
4470         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4471     }
4472 
4473     /**
4474      * Determines whether the specified character (Unicode code point)
4475      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4476      * Such code points can be represented using a single {@code char}.
4477      *
4478      * @param  codePoint the character (Unicode code point) to be tested
4479      * @return {@code true} if the specified code point is between
4480      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4481      *         {@code false} otherwise.
4482      * @since  1.7
4483      */
4484     public static boolean isBmpCodePoint(int codePoint) {
4485         return codePoint >>> 16 == 0;
4486         // Optimized form of:
4487         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4488         // We consistently use logical shift (>>>) to facilitate
4489         // additional runtime optimizations.
4490     }
4491 
4492     /**
4493      * Determines whether the specified character (Unicode code point)
4494      * is in the <a href="#supplementary">supplementary character</a> range.
4495      *
4496      * @param  codePoint the character (Unicode code point) to be tested
4497      * @return {@code true} if the specified code point is between
4498      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4499      *         {@link #MAX_CODE_POINT} inclusive;
4500      *         {@code false} otherwise.
4501      * @since  1.5
4502      */
4503     public static boolean isSupplementaryCodePoint(int codePoint) {
4504         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4505             && codePoint <  MAX_CODE_POINT + 1;
4506     }
4507 
4508     /**
4509      * Determines if the given {@code char} value is a
4510      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4511      * Unicode high-surrogate code unit</a>
4512      * (also known as <i>leading-surrogate code unit</i>).
4513      *
4514      * <p>Such values do not represent characters by themselves,
4515      * but are used in the representation of
4516      * <a href="#supplementary">supplementary characters</a>
4517      * in the UTF-16 encoding.
4518      *
4519      * @param  ch the {@code char} value to be tested.
4520      * @return {@code true} if the {@code char} value is between
4521      *         {@link #MIN_HIGH_SURROGATE} and
4522      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4523      *         {@code false} otherwise.
4524      * @see    Character#isLowSurrogate(char)
4525      * @see    Character.UnicodeBlock#of(int)
4526      * @since  1.5
4527      */
4528     public static boolean isHighSurrogate(char ch) {
4529         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4530         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4531     }
4532 
4533     /**
4534      * Determines if the given {@code char} value is a
4535      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4536      * Unicode low-surrogate code unit</a>
4537      * (also known as <i>trailing-surrogate code unit</i>).
4538      *
4539      * <p>Such values do not represent characters by themselves,
4540      * but are used in the representation of
4541      * <a href="#supplementary">supplementary characters</a>
4542      * in the UTF-16 encoding.
4543      *
4544      * @param  ch the {@code char} value to be tested.
4545      * @return {@code true} if the {@code char} value is between
4546      *         {@link #MIN_LOW_SURROGATE} and
4547      *         {@link #MAX_LOW_SURROGATE} inclusive;
4548      *         {@code false} otherwise.
4549      * @see    Character#isHighSurrogate(char)
4550      * @since  1.5
4551      */
4552     public static boolean isLowSurrogate(char ch) {
4553         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4554     }
4555 
4556     /**
4557      * Determines if the given {@code char} value is a Unicode
4558      * <i>surrogate code unit</i>.
4559      *
4560      * <p>Such values do not represent characters by themselves,
4561      * but are used in the representation of
4562      * <a href="#supplementary">supplementary characters</a>
4563      * in the UTF-16 encoding.
4564      *
4565      * <p>A char value is a surrogate code unit if and only if it is either
4566      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4567      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4568      *
4569      * @param  ch the {@code char} value to be tested.
4570      * @return {@code true} if the {@code char} value is between
4571      *         {@link #MIN_SURROGATE} and
4572      *         {@link #MAX_SURROGATE} inclusive;
4573      *         {@code false} otherwise.
4574      * @since  1.7
4575      */
4576     public static boolean isSurrogate(char ch) {
4577         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4578     }
4579 
4580     /**
4581      * Determines whether the specified pair of <code>char</code>
4582      * values is a valid
4583      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4584      * Unicode surrogate pair</a>.
4585 
4586      * <p>This method is equivalent to the expression:
4587      * <blockquote><pre>
4588      * isHighSurrogate(high) && isLowSurrogate(low)
4589      * </pre></blockquote>
4590      *
4591      * @param  high the high-surrogate code value to be tested
4592      * @param  low the low-surrogate code value to be tested
4593      * @return <code>true</code> if the specified high and
4594      * low-surrogate code values represent a valid surrogate pair;
4595      * <code>false</code> otherwise.
4596      * @since  1.5
4597      */
4598     public static boolean isSurrogatePair(char high, char low) {
4599         return isHighSurrogate(high) && isLowSurrogate(low);
4600     }
4601 
4602     /**
4603      * Determines the number of <code>char</code> values needed to
4604      * represent the specified character (Unicode code point). If the
4605      * specified character is equal to or greater than 0x10000, then
4606      * the method returns 2. Otherwise, the method returns 1.
4607      *
4608      * <p>This method doesn't validate the specified character to be a
4609      * valid Unicode code point. The caller must validate the
4610      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4611      * if necessary.
4612      *
4613      * @param   codePoint the character (Unicode code point) to be tested.
4614      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4615      * @see     Character#isSupplementaryCodePoint(int)
4616      * @since   1.5
4617      */
4618     public static int charCount(int codePoint) {
4619         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4620     }
4621 
4622     /**
4623      * Converts the specified surrogate pair to its supplementary code
4624      * point value. This method does not validate the specified
4625      * surrogate pair. The caller must validate it using {@link
4626      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4627      *
4628      * @param  high the high-surrogate code unit
4629      * @param  low the low-surrogate code unit
4630      * @return the supplementary code point composed from the
4631      *         specified surrogate pair.
4632      * @since  1.5
4633      */
4634     public static int toCodePoint(char high, char low) {
4635         // Optimized form of:
4636         // return ((high - MIN_HIGH_SURROGATE) << 10)
4637         //         + (low - MIN_LOW_SURROGATE)
4638         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4639         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4640                                        - (MIN_HIGH_SURROGATE << 10)
4641                                        - MIN_LOW_SURROGATE);
4642     }
4643 
4644     /**
4645      * Returns the code point at the given index of the
4646      * <code>CharSequence</code>. If the <code>char</code> value at
4647      * the given index in the <code>CharSequence</code> is in the
4648      * high-surrogate range, the following index is less than the
4649      * length of the <code>CharSequence</code>, and the
4650      * <code>char</code> value at the following index is in the
4651      * low-surrogate range, then the supplementary code point
4652      * corresponding to this surrogate pair is returned. Otherwise,
4653      * the <code>char</code> value at the given index is returned.
4654      *
4655      * @param seq a sequence of <code>char</code> values (Unicode code
4656      * units)
4657      * @param index the index to the <code>char</code> values (Unicode
4658      * code units) in <code>seq</code> to be converted
4659      * @return the Unicode code point at the given index
4660      * @exception NullPointerException if <code>seq</code> is null.
4661      * @exception IndexOutOfBoundsException if the value
4662      * <code>index</code> is negative or not less than
4663      * {@link CharSequence#length() seq.length()}.
4664      * @since  1.5
4665      */
4666     public static int codePointAt(CharSequence seq, int index) {
4667         char c1 = seq.charAt(index++);
4668         if (isHighSurrogate(c1)) {
4669             if (index < seq.length()) {
4670                 char c2 = seq.charAt(index);
4671                 if (isLowSurrogate(c2)) {
4672                     return toCodePoint(c1, c2);
4673                 }
4674             }
4675         }
4676         return c1;
4677     }
4678 
4679     /**
4680      * Returns the code point at the given index of the
4681      * <code>char</code> array. If the <code>char</code> value at
4682      * the given index in the <code>char</code> array is in the
4683      * high-surrogate range, the following index is less than the
4684      * length of the <code>char</code> array, and the
4685      * <code>char</code> value at the following index is in the
4686      * low-surrogate range, then the supplementary code point
4687      * corresponding to this surrogate pair is returned. Otherwise,
4688      * the <code>char</code> value at the given index is returned.
4689      *
4690      * @param a the <code>char</code> array
4691      * @param index the index to the <code>char</code> values (Unicode
4692      * code units) in the <code>char</code> array to be converted
4693      * @return the Unicode code point at the given index
4694      * @exception NullPointerException if <code>a</code> is null.
4695      * @exception IndexOutOfBoundsException if the value
4696      * <code>index</code> is negative or not less than
4697      * the length of the <code>char</code> array.
4698      * @since  1.5
4699      */
4700     public static int codePointAt(char[] a, int index) {
4701         return codePointAtImpl(a, index, a.length);
4702     }
4703 
4704     /**
4705      * Returns the code point at the given index of the
4706      * <code>char</code> array, where only array elements with
4707      * <code>index</code> less than <code>limit</code> can be used. If
4708      * the <code>char</code> value at the given index in the
4709      * <code>char</code> array is in the high-surrogate range, the
4710      * following index is less than the <code>limit</code>, and the
4711      * <code>char</code> value at the following index is in the
4712      * low-surrogate range, then the supplementary code point
4713      * corresponding to this surrogate pair is returned. Otherwise,
4714      * the <code>char</code> value at the given index is returned.
4715      *
4716      * @param a the <code>char</code> array
4717      * @param index the index to the <code>char</code> values (Unicode
4718      * code units) in the <code>char</code> array to be converted
4719      * @param limit the index after the last array element that can be used in the
4720      * <code>char</code> array
4721      * @return the Unicode code point at the given index
4722      * @exception NullPointerException if <code>a</code> is null.
4723      * @exception IndexOutOfBoundsException if the <code>index</code>
4724      * argument is negative or not less than the <code>limit</code>
4725      * argument, or if the <code>limit</code> argument is negative or
4726      * greater than the length of the <code>char</code> array.
4727      * @since  1.5
4728      */
4729     public static int codePointAt(char[] a, int index, int limit) {
4730         if (index >= limit || limit < 0 || limit > a.length) {
4731             throw new IndexOutOfBoundsException();
4732         }
4733         return codePointAtImpl(a, index, limit);
4734     }
4735 
4736     // throws ArrayIndexOutofBoundsException if index out of bounds
4737     static int codePointAtImpl(char[] a, int index, int limit) {
4738         char c1 = a[index++];
4739         if (isHighSurrogate(c1)) {
4740             if (index < limit) {
4741                 char c2 = a[index];
4742                 if (isLowSurrogate(c2)) {
4743                     return toCodePoint(c1, c2);
4744                 }
4745             }
4746         }
4747         return c1;
4748     }
4749 
4750     /**
4751      * Returns the code point preceding the given index of the
4752      * <code>CharSequence</code>. If the <code>char</code> value at
4753      * <code>(index - 1)</code> in the <code>CharSequence</code> is in
4754      * the low-surrogate range, <code>(index - 2)</code> is not
4755      * negative, and the <code>char</code> value at <code>(index -
4756      * 2)</code> in the <code>CharSequence</code> is in the
4757      * high-surrogate range, then the supplementary code point
4758      * corresponding to this surrogate pair is returned. Otherwise,
4759      * the <code>char</code> value at <code>(index - 1)</code> is
4760      * returned.
4761      *
4762      * @param seq the <code>CharSequence</code> instance
4763      * @param index the index following the code point that should be returned
4764      * @return the Unicode code point value before the given index.
4765      * @exception NullPointerException if <code>seq</code> is null.
4766      * @exception IndexOutOfBoundsException if the <code>index</code>
4767      * argument is less than 1 or greater than {@link
4768      * CharSequence#length() seq.length()}.
4769      * @since  1.5
4770      */
4771     public static int codePointBefore(CharSequence seq, int index) {
4772         char c2 = seq.charAt(--index);
4773         if (isLowSurrogate(c2)) {
4774             if (index > 0) {
4775                 char c1 = seq.charAt(--index);
4776                 if (isHighSurrogate(c1)) {
4777                     return toCodePoint(c1, c2);
4778                 }
4779             }
4780         }
4781         return c2;
4782     }
4783 
4784     /**
4785      * Returns the code point preceding the given index of the
4786      * <code>char</code> array. If the <code>char</code> value at
4787      * <code>(index - 1)</code> in the <code>char</code> array is in
4788      * the low-surrogate range, <code>(index - 2)</code> is not
4789      * negative, and the <code>char</code> value at <code>(index -
4790      * 2)</code> in the <code>char</code> array is in the
4791      * high-surrogate range, then the supplementary code point
4792      * corresponding to this surrogate pair is returned. Otherwise,
4793      * the <code>char</code> value at <code>(index - 1)</code> is
4794      * returned.
4795      *
4796      * @param a the <code>char</code> array
4797      * @param index the index following the code point that should be returned
4798      * @return the Unicode code point value before the given index.
4799      * @exception NullPointerException if <code>a</code> is null.
4800      * @exception IndexOutOfBoundsException if the <code>index</code>
4801      * argument is less than 1 or greater than the length of the
4802      * <code>char</code> array
4803      * @since  1.5
4804      */
4805     public static int codePointBefore(char[] a, int index) {
4806         return codePointBeforeImpl(a, index, 0);
4807     }
4808 
4809     /**
4810      * Returns the code point preceding the given index of the
4811      * <code>char</code> array, where only array elements with
4812      * <code>index</code> greater than or equal to <code>start</code>
4813      * can be used. If the <code>char</code> value at <code>(index -
4814      * 1)</code> in the <code>char</code> array is in the
4815      * low-surrogate range, <code>(index - 2)</code> is not less than
4816      * <code>start</code>, and the <code>char</code> value at
4817      * <code>(index - 2)</code> in the <code>char</code> array is in
4818      * the high-surrogate range, then the supplementary code point
4819      * corresponding to this surrogate pair is returned. Otherwise,
4820      * the <code>char</code> value at <code>(index - 1)</code> is
4821      * returned.
4822      *
4823      * @param a the <code>char</code> array
4824      * @param index the index following the code point that should be returned
4825      * @param start the index of the first array element in the
4826      * <code>char</code> array
4827      * @return the Unicode code point value before the given index.
4828      * @exception NullPointerException if <code>a</code> is null.
4829      * @exception IndexOutOfBoundsException if the <code>index</code>
4830      * argument is not greater than the <code>start</code> argument or
4831      * is greater than the length of the <code>char</code> array, or
4832      * if the <code>start</code> argument is negative or not less than
4833      * the length of the <code>char</code> array.
4834      * @since  1.5
4835      */
4836     public static int codePointBefore(char[] a, int index, int start) {
4837         if (index <= start || start < 0 || start >= a.length) {
4838             throw new IndexOutOfBoundsException();
4839         }
4840         return codePointBeforeImpl(a, index, start);
4841     }
4842 
4843     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4844     static int codePointBeforeImpl(char[] a, int index, int start) {
4845         char c2 = a[--index];
4846         if (isLowSurrogate(c2)) {
4847             if (index > start) {
4848                 char c1 = a[--index];
4849                 if (isHighSurrogate(c1)) {
4850                     return toCodePoint(c1, c2);
4851                 }
4852             }
4853         }
4854         return c2;
4855     }
4856 
4857     /**
4858      * Returns the leading surrogate (a
4859      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4860      * high surrogate code unit</a>) of the
4861      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4862      * surrogate pair</a>
4863      * representing the specified supplementary character (Unicode
4864      * code point) in the UTF-16 encoding.  If the specified character
4865      * is not a
4866      * <a href="Character.html#supplementary">supplementary character</a>,
4867      * an unspecified {@code char} is returned.
4868      *
4869      * <p>If
4870      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4871      * is {@code true}, then
4872      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4873      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4874      * are also always {@code true}.
4875      *
4876      * @param   codePoint a supplementary character (Unicode code point)
4877      * @return  the leading surrogate code unit used to represent the
4878      *          character in the UTF-16 encoding
4879      * @since   1.7
4880      */
4881     public static char highSurrogate(int codePoint) {
4882         return (char) ((codePoint >>> 10)
4883             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4884     }
4885 
4886     /**
4887      * Returns the trailing surrogate (a
4888      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4889      * low surrogate code unit</a>) of the
4890      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4891      * surrogate pair</a>
4892      * representing the specified supplementary character (Unicode
4893      * code point) in the UTF-16 encoding.  If the specified character
4894      * is not a
4895      * <a href="Character.html#supplementary">supplementary character</a>,
4896      * an unspecified {@code char} is returned.
4897      *
4898      * <p>If
4899      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4900      * is {@code true}, then
4901      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4902      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4903      * are also always {@code true}.
4904      *
4905      * @param   codePoint a supplementary character (Unicode code point)
4906      * @return  the trailing surrogate code unit used to represent the
4907      *          character in the UTF-16 encoding
4908      * @since   1.7
4909      */
4910     public static char lowSurrogate(int codePoint) {
4911         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4912     }
4913 
4914     /**
4915      * Converts the specified character (Unicode code point) to its
4916      * UTF-16 representation. If the specified code point is a BMP
4917      * (Basic Multilingual Plane or Plane 0) value, the same value is
4918      * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
4919      * specified code point is a supplementary character, its
4920      * surrogate values are stored in <code>dst[dstIndex]</code>
4921      * (high-surrogate) and <code>dst[dstIndex+1]</code>
4922      * (low-surrogate), and 2 is returned.
4923      *
4924      * @param  codePoint the character (Unicode code point) to be converted.
4925      * @param  dst an array of <code>char</code> in which the
4926      * <code>codePoint</code>'s UTF-16 value is stored.
4927      * @param dstIndex the start index into the <code>dst</code>
4928      * array where the converted value is stored.
4929      * @return 1 if the code point is a BMP code point, 2 if the
4930      * code point is a supplementary code point.
4931      * @exception IllegalArgumentException if the specified
4932      * <code>codePoint</code> is not a valid Unicode code point.
4933      * @exception NullPointerException if the specified <code>dst</code> is null.
4934      * @exception IndexOutOfBoundsException if <code>dstIndex</code>
4935      * is negative or not less than <code>dst.length</code>, or if
4936      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
4937      * array element(s) to store the resulting <code>char</code>
4938      * value(s). (If <code>dstIndex</code> is equal to
4939      * <code>dst.length-1</code> and the specified
4940      * <code>codePoint</code> is a supplementary character, the
4941      * high-surrogate value is not stored in
4942      * <code>dst[dstIndex]</code>.)
4943      * @since  1.5
4944      */
4945     public static int toChars(int codePoint, char[] dst, int dstIndex) {
4946         if (isBmpCodePoint(codePoint)) {
4947             dst[dstIndex] = (char) codePoint;
4948             return 1;
4949         } else if (isValidCodePoint(codePoint)) {
4950             toSurrogates(codePoint, dst, dstIndex);
4951             return 2;
4952         } else {
4953             throw new IllegalArgumentException();
4954         }
4955     }
4956 
4957     /**
4958      * Converts the specified character (Unicode code point) to its
4959      * UTF-16 representation stored in a <code>char</code> array. If
4960      * the specified code point is a BMP (Basic Multilingual Plane or
4961      * Plane 0) value, the resulting <code>char</code> array has
4962      * the same value as <code>codePoint</code>. If the specified code
4963      * point is a supplementary code point, the resulting
4964      * <code>char</code> array has the corresponding surrogate pair.
4965      *
4966      * @param  codePoint a Unicode code point
4967      * @return a <code>char</code> array having
4968      *         <code>codePoint</code>'s UTF-16 representation.
4969      * @exception IllegalArgumentException if the specified
4970      * <code>codePoint</code> is not a valid Unicode code point.
4971      * @since  1.5
4972      */
4973     public static char[] toChars(int codePoint) {
4974         if (isBmpCodePoint(codePoint)) {
4975             return new char[] { (char) codePoint };
4976         } else if (isValidCodePoint(codePoint)) {
4977             char[] result = new char[2];
4978             toSurrogates(codePoint, result, 0);
4979             return result;
4980         } else {
4981             throw new IllegalArgumentException();
4982         }
4983     }
4984 
4985     static void toSurrogates(int codePoint, char[] dst, int index) {
4986         // We write elements "backwards" to guarantee all-or-nothing
4987         dst[index+1] = lowSurrogate(codePoint);
4988         dst[index] = highSurrogate(codePoint);
4989     }
4990 
4991     /**
4992      * Returns the number of Unicode code points in the text range of
4993      * the specified char sequence. The text range begins at the
4994      * specified <code>beginIndex</code> and extends to the
4995      * <code>char</code> at index <code>endIndex - 1</code>. Thus the
4996      * length (in <code>char</code>s) of the text range is
4997      * <code>endIndex-beginIndex</code>. Unpaired surrogates within
4998      * the text range count as one code point each.
4999      *
5000      * @param seq the char sequence
5001      * @param beginIndex the index to the first <code>char</code> of
5002      * the text range.
5003      * @param endIndex the index after the last <code>char</code> of
5004      * the text range.
5005      * @return the number of Unicode code points in the specified text
5006      * range
5007      * @exception NullPointerException if <code>seq</code> is null.
5008      * @exception IndexOutOfBoundsException if the
5009      * <code>beginIndex</code> is negative, or <code>endIndex</code>
5010      * is larger than the length of the given sequence, or
5011      * <code>beginIndex</code> is larger than <code>endIndex</code>.
5012      * @since  1.5
5013      */
5014     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5015         int length = seq.length();
5016         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5017             throw new IndexOutOfBoundsException();
5018         }
5019         int n = endIndex - beginIndex;
5020         for (int i = beginIndex; i < endIndex; ) {
5021             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5022                 isLowSurrogate(seq.charAt(i))) {
5023                 n--;
5024                 i++;
5025             }
5026         }
5027         return n;
5028     }
5029 
5030     /**
5031      * Returns the number of Unicode code points in a subarray of the
5032      * <code>char</code> array argument. The <code>offset</code>
5033      * argument is the index of the first <code>char</code> of the
5034      * subarray and the <code>count</code> argument specifies the
5035      * length of the subarray in <code>char</code>s. Unpaired
5036      * surrogates within the subarray count as one code point each.
5037      *
5038      * @param a the <code>char</code> array
5039      * @param offset the index of the first <code>char</code> in the
5040      * given <code>char</code> array
5041      * @param count the length of the subarray in <code>char</code>s
5042      * @return the number of Unicode code points in the specified subarray
5043      * @exception NullPointerException if <code>a</code> is null.
5044      * @exception IndexOutOfBoundsException if <code>offset</code> or
5045      * <code>count</code> is negative, or if <code>offset +
5046      * count</code> is larger than the length of the given array.
5047      * @since  1.5
5048      */
5049     public static int codePointCount(char[] a, int offset, int count) {
5050         if (count > a.length - offset || offset < 0 || count < 0) {
5051             throw new IndexOutOfBoundsException();
5052         }
5053         return codePointCountImpl(a, offset, count);
5054     }
5055 
5056     static int codePointCountImpl(char[] a, int offset, int count) {
5057         int endIndex = offset + count;
5058         int n = count;
5059         for (int i = offset; i < endIndex; ) {
5060             if (isHighSurrogate(a[i++]) && i < endIndex &&
5061                 isLowSurrogate(a[i])) {
5062                 n--;
5063                 i++;
5064             }
5065         }
5066         return n;
5067     }
5068 
5069     /**
5070      * Returns the index within the given char sequence that is offset
5071      * from the given <code>index</code> by <code>codePointOffset</code>
5072      * code points. Unpaired surrogates within the text range given by
5073      * <code>index</code> and <code>codePointOffset</code> count as
5074      * one code point each.
5075      *
5076      * @param seq the char sequence
5077      * @param index the index to be offset
5078      * @param codePointOffset the offset in code points
5079      * @return the index within the char sequence
5080      * @exception NullPointerException if <code>seq</code> is null.
5081      * @exception IndexOutOfBoundsException if <code>index</code>
5082      *   is negative or larger then the length of the char sequence,
5083      *   or if <code>codePointOffset</code> is positive and the
5084      *   subsequence starting with <code>index</code> has fewer than
5085      *   <code>codePointOffset</code> code points, or if
5086      *   <code>codePointOffset</code> is negative and the subsequence
5087      *   before <code>index</code> has fewer than the absolute value
5088      *   of <code>codePointOffset</code> code points.
5089      * @since 1.5
5090      */
5091     public static int offsetByCodePoints(CharSequence seq, int index,
5092                                          int codePointOffset) {
5093         int length = seq.length();
5094         if (index < 0 || index > length) {
5095             throw new IndexOutOfBoundsException();
5096         }
5097 
5098         int x = index;
5099         if (codePointOffset >= 0) {
5100             int i;
5101             for (i = 0; x < length && i < codePointOffset; i++) {
5102                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5103                     isLowSurrogate(seq.charAt(x))) {
5104                     x++;
5105                 }
5106             }
5107             if (i < codePointOffset) {
5108                 throw new IndexOutOfBoundsException();
5109             }
5110         } else {
5111             int i;
5112             for (i = codePointOffset; x > 0 && i < 0; i++) {
5113                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5114                     isHighSurrogate(seq.charAt(x-1))) {
5115                     x--;
5116                 }
5117             }
5118             if (i < 0) {
5119                 throw new IndexOutOfBoundsException();
5120             }
5121         }
5122         return x;
5123     }
5124 
5125     /**
5126      * Returns the index within the given <code>char</code> subarray
5127      * that is offset from the given <code>index</code> by
5128      * <code>codePointOffset</code> code points. The
5129      * <code>start</code> and <code>count</code> arguments specify a
5130      * subarray of the <code>char</code> array. Unpaired surrogates
5131      * within the text range given by <code>index</code> and
5132      * <code>codePointOffset</code> count as one code point each.
5133      *
5134      * @param a the <code>char</code> array
5135      * @param start the index of the first <code>char</code> of the
5136      * subarray
5137      * @param count the length of the subarray in <code>char</code>s
5138      * @param index the index to be offset
5139      * @param codePointOffset the offset in code points
5140      * @return the index within the subarray
5141      * @exception NullPointerException if <code>a</code> is null.
5142      * @exception IndexOutOfBoundsException
5143      *   if <code>start</code> or <code>count</code> is negative,
5144      *   or if <code>start + count</code> is larger than the length of
5145      *   the given array,
5146      *   or if <code>index</code> is less than <code>start</code> or
5147      *   larger then <code>start + count</code>,
5148      *   or if <code>codePointOffset</code> is positive and the text range
5149      *   starting with <code>index</code> and ending with <code>start
5150      *   + count - 1</code> has fewer than <code>codePointOffset</code> code
5151      *   points,
5152      *   or if <code>codePointOffset</code> is negative and the text range
5153      *   starting with <code>start</code> and ending with <code>index
5154      *   - 1</code> has fewer than the absolute value of
5155      *   <code>codePointOffset</code> code points.
5156      * @since 1.5
5157      */
5158     public static int offsetByCodePoints(char[] a, int start, int count,
5159                                          int index, int codePointOffset) {
5160         if (count > a.length-start || start < 0 || count < 0
5161             || index < start || index > start+count) {
5162             throw new IndexOutOfBoundsException();
5163         }
5164         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5165     }
5166 
5167     static int offsetByCodePointsImpl(char[]a, int start, int count,
5168                                       int index, int codePointOffset) {
5169         int x = index;
5170         if (codePointOffset >= 0) {
5171             int limit = start + count;
5172             int i;
5173             for (i = 0; x < limit && i < codePointOffset; i++) {
5174                 if (isHighSurrogate(a[x++]) && x < limit &&
5175                     isLowSurrogate(a[x])) {
5176                     x++;
5177                 }
5178             }
5179             if (i < codePointOffset) {
5180                 throw new IndexOutOfBoundsException();
5181             }
5182         } else {
5183             int i;
5184             for (i = codePointOffset; x > start && i < 0; i++) {
5185                 if (isLowSurrogate(a[--x]) && x > start &&
5186                     isHighSurrogate(a[x-1])) {
5187                     x--;
5188                 }
5189             }
5190             if (i < 0) {
5191                 throw new IndexOutOfBoundsException();
5192             }
5193         }
5194         return x;
5195     }
5196 
5197     /**
5198      * Determines if the specified character is a lowercase character.
5199      * <p>
5200      * A character is lowercase if its general category type, provided
5201      * by <code>Character.getType(ch)</code>, is
5202      * <code>LOWERCASE_LETTER</code>.
5203      * <p>
5204      * The following are examples of lowercase characters:
5205      * <p><blockquote><pre>
5206      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5207      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5208      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5209      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5210      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5211      * </pre></blockquote>
5212      * <p> Many other Unicode characters are lowercase too.
5213      *
5214      * <p><b>Note:</b> This method cannot handle <a
5215      * href="#supplementary"> supplementary characters</a>. To support
5216      * all Unicode characters, including supplementary characters, use
5217      * the {@link #isLowerCase(int)} method.
5218      *
5219      * @param   ch   the character to be tested.
5220      * @return  <code>true</code> if the character is lowercase;
5221      *          <code>false</code> otherwise.
5222      * @see     Character#isLowerCase(char)
5223      * @see     Character#isTitleCase(char)
5224      * @see     Character#toLowerCase(char)
5225      * @see     Character#getType(char)
5226      */
5227     public static boolean isLowerCase(char ch) {
5228         return isLowerCase((int)ch);
5229     }
5230 
5231     /**
5232      * Determines if the specified character (Unicode code point) is a
5233      * lowercase character.
5234      * <p>
5235      * A character is lowercase if its general category type, provided
5236      * by {@link Character#getType getType(codePoint)}, is
5237      * <code>LOWERCASE_LETTER</code>.
5238      * <p>
5239      * The following are examples of lowercase characters:
5240      * <p><blockquote><pre>
5241      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5242      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5243      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5244      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5245      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5246      * </pre></blockquote>
5247      * <p> Many other Unicode characters are lowercase too.
5248      *
5249      * @param   codePoint the character (Unicode code point) to be tested.
5250      * @return  <code>true</code> if the character is lowercase;
5251      *          <code>false</code> otherwise.
5252      * @see     Character#isLowerCase(int)
5253      * @see     Character#isTitleCase(int)
5254      * @see     Character#toLowerCase(int)
5255      * @see     Character#getType(int)
5256      * @since   1.5
5257      */
5258     public static boolean isLowerCase(int codePoint) {
5259         return getType(codePoint) == Character.LOWERCASE_LETTER;
5260     }
5261 
5262     /**
5263      * Determines if the specified character is an uppercase character.
5264      * <p>
5265      * A character is uppercase if its general category type, provided by
5266      * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
5267      * <p>
5268      * The following are examples of uppercase characters:
5269      * <p><blockquote><pre>
5270      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5271      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5272      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5273      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5274      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5275      * </pre></blockquote>
5276      * <p> Many other Unicode characters are uppercase too.<p>
5277      *
5278      * <p><b>Note:</b> This method cannot handle <a
5279      * href="#supplementary"> supplementary characters</a>. To support
5280      * all Unicode characters, including supplementary characters, use
5281      * the {@link #isUpperCase(int)} method.
5282      *
5283      * @param   ch   the character to be tested.
5284      * @return  <code>true</code> if the character is uppercase;
5285      *          <code>false</code> otherwise.
5286      * @see     Character#isLowerCase(char)
5287      * @see     Character#isTitleCase(char)
5288      * @see     Character#toUpperCase(char)
5289      * @see     Character#getType(char)
5290      * @since   1.0
5291      */
5292     public static boolean isUpperCase(char ch) {
5293         return isUpperCase((int)ch);
5294     }
5295 
5296     /**
5297      * Determines if the specified character (Unicode code point) is an uppercase character.
5298      * <p>
5299      * A character is uppercase if its general category type, provided by
5300      * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
5301      * <p>
5302      * The following are examples of uppercase characters:
5303      * <p><blockquote><pre>
5304      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5305      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5306      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5307      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5308      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5309      * </pre></blockquote>
5310      * <p> Many other Unicode characters are uppercase too.<p>
5311      *
5312      * @param   codePoint the character (Unicode code point) to be tested.
5313      * @return  <code>true</code> if the character is uppercase;
5314      *          <code>false</code> otherwise.
5315      * @see     Character#isLowerCase(int)
5316      * @see     Character#isTitleCase(int)
5317      * @see     Character#toUpperCase(int)
5318      * @see     Character#getType(int)
5319      * @since   1.5
5320      */
5321     public static boolean isUpperCase(int codePoint) {
5322         return getType(codePoint) == Character.UPPERCASE_LETTER;
5323     }
5324 
5325     /**
5326      * Determines if the specified character is a titlecase character.
5327      * <p>
5328      * A character is a titlecase character if its general
5329      * category type, provided by <code>Character.getType(ch)</code>,
5330      * is <code>TITLECASE_LETTER</code>.
5331      * <p>
5332      * Some characters look like pairs of Latin letters. For example, there
5333      * is an uppercase letter that looks like "LJ" and has a corresponding
5334      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5335      * is the appropriate form to use when rendering a word in lowercase
5336      * with initial capitals, as for a book title.
5337      * <p>
5338      * These are some of the Unicode characters for which this method returns
5339      * <code>true</code>:
5340      * <ul>
5341      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
5342      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
5343      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
5344      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
5345      * </ul>
5346      * <p> Many other Unicode characters are titlecase too.<p>
5347      *
5348      * <p><b>Note:</b> This method cannot handle <a
5349      * href="#supplementary"> supplementary characters</a>. To support
5350      * all Unicode characters, including supplementary characters, use
5351      * the {@link #isTitleCase(int)} method.
5352      *
5353      * @param   ch   the character to be tested.
5354      * @return  <code>true</code> if the character is titlecase;
5355      *          <code>false</code> otherwise.
5356      * @see     Character#isLowerCase(char)
5357      * @see     Character#isUpperCase(char)
5358      * @see     Character#toTitleCase(char)
5359      * @see     Character#getType(char)
5360      * @since   1.0.2
5361      */
5362     public static boolean isTitleCase(char ch) {
5363         return isTitleCase((int)ch);
5364     }
5365 
5366     /**
5367      * Determines if the specified character (Unicode code point) is a titlecase character.
5368      * <p>
5369      * A character is a titlecase character if its general
5370      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5371      * is <code>TITLECASE_LETTER</code>.
5372      * <p>
5373      * Some characters look like pairs of Latin letters. For example, there
5374      * is an uppercase letter that looks like "LJ" and has a corresponding
5375      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5376      * is the appropriate form to use when rendering a word in lowercase
5377      * with initial capitals, as for a book title.
5378      * <p>
5379      * These are some of the Unicode characters for which this method returns
5380      * <code>true</code>:
5381      * <ul>
5382      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
5383      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
5384      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
5385      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
5386      * </ul>
5387      * <p> Many other Unicode characters are titlecase too.<p>
5388      *
5389      * @param   codePoint the character (Unicode code point) to be tested.
5390      * @return  <code>true</code> if the character is titlecase;
5391      *          <code>false</code> otherwise.
5392      * @see     Character#isLowerCase(int)
5393      * @see     Character#isUpperCase(int)
5394      * @see     Character#toTitleCase(int)
5395      * @see     Character#getType(int)
5396      * @since   1.5
5397      */
5398     public static boolean isTitleCase(int codePoint) {
5399         return getType(codePoint) == Character.TITLECASE_LETTER;
5400     }
5401 
5402     /**
5403      * Determines if the specified character is a digit.
5404      * <p>
5405      * A character is a digit if its general category type, provided
5406      * by <code>Character.getType(ch)</code>, is
5407      * <code>DECIMAL_DIGIT_NUMBER</code>.
5408      * <p>
5409      * Some Unicode character ranges that contain digits:
5410      * <ul>
5411      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
5412      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
5413      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
5414      *     Arabic-Indic digits
5415      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
5416      *     Extended Arabic-Indic digits
5417      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
5418      *     Devanagari digits
5419      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
5420      *     Fullwidth digits
5421      * </ul>
5422      *
5423      * Many other character ranges contain digits as well.
5424      *
5425      * <p><b>Note:</b> This method cannot handle <a
5426      * href="#supplementary"> supplementary characters</a>. To support
5427      * all Unicode characters, including supplementary characters, use
5428      * the {@link #isDigit(int)} method.
5429      *
5430      * @param   ch   the character to be tested.
5431      * @return  <code>true</code> if the character is a digit;
5432      *          <code>false</code> otherwise.
5433      * @see     Character#digit(char, int)
5434      * @see     Character#forDigit(int, int)
5435      * @see     Character#getType(char)
5436      */
5437     public static boolean isDigit(char ch) {
5438         return isDigit((int)ch);
5439     }
5440 
5441     /**
5442      * Determines if the specified character (Unicode code point) is a digit.
5443      * <p>
5444      * A character is a digit if its general category type, provided
5445      * by {@link Character#getType(int) getType(codePoint)}, is
5446      * <code>DECIMAL_DIGIT_NUMBER</code>.
5447      * <p>
5448      * Some Unicode character ranges that contain digits:
5449      * <ul>
5450      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
5451      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
5452      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
5453      *     Arabic-Indic digits
5454      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
5455      *     Extended Arabic-Indic digits
5456      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
5457      *     Devanagari digits
5458      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
5459      *     Fullwidth digits
5460      * </ul>
5461      *
5462      * Many other character ranges contain digits as well.
5463      *
5464      * @param   codePoint the character (Unicode code point) to be tested.
5465      * @return  <code>true</code> if the character is a digit;
5466      *          <code>false</code> otherwise.
5467      * @see     Character#forDigit(int, int)
5468      * @see     Character#getType(int)
5469      * @since   1.5
5470      */
5471     public static boolean isDigit(int codePoint) {
5472         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5473     }
5474 
5475     /**
5476      * Determines if a character is defined in Unicode.
5477      * <p>
5478      * A character is defined if at least one of the following is true:
5479      * <ul>
5480      * <li>It has an entry in the UnicodeData file.
5481      * <li>It has a value in a range defined by the UnicodeData file.
5482      * </ul>
5483      *
5484      * <p><b>Note:</b> This method cannot handle <a
5485      * href="#supplementary"> supplementary characters</a>. To support
5486      * all Unicode characters, including supplementary characters, use
5487      * the {@link #isDefined(int)} method.
5488      *
5489      * @param   ch   the character to be tested
5490      * @return  <code>true</code> if the character has a defined meaning
5491      *          in Unicode; <code>false</code> otherwise.
5492      * @see     Character#isDigit(char)
5493      * @see     Character#isLetter(char)
5494      * @see     Character#isLetterOrDigit(char)
5495      * @see     Character#isLowerCase(char)
5496      * @see     Character#isTitleCase(char)
5497      * @see     Character#isUpperCase(char)
5498      * @since   1.0.2
5499      */
5500     public static boolean isDefined(char ch) {
5501         return isDefined((int)ch);
5502     }
5503 
5504     /**
5505      * Determines if a character (Unicode code point) is defined in Unicode.
5506      * <p>
5507      * A character is defined if at least one of the following is true:
5508      * <ul>
5509      * <li>It has an entry in the UnicodeData file.
5510      * <li>It has a value in a range defined by the UnicodeData file.
5511      * </ul>
5512      *
5513      * @param   codePoint the character (Unicode code point) to be tested.
5514      * @return  <code>true</code> if the character has a defined meaning
5515      *          in Unicode; <code>false</code> otherwise.
5516      * @see     Character#isDigit(int)
5517      * @see     Character#isLetter(int)
5518      * @see     Character#isLetterOrDigit(int)
5519      * @see     Character#isLowerCase(int)
5520      * @see     Character#isTitleCase(int)
5521      * @see     Character#isUpperCase(int)
5522      * @since   1.5
5523      */
5524     public static boolean isDefined(int codePoint) {
5525         return getType(codePoint) != Character.UNASSIGNED;
5526     }
5527 
5528     /**
5529      * Determines if the specified character is a letter.
5530      * <p>
5531      * A character is considered to be a letter if its general
5532      * category type, provided by <code>Character.getType(ch)</code>,
5533      * is any of the following:
5534      * <ul>
5535      * <li> <code>UPPERCASE_LETTER</code>
5536      * <li> <code>LOWERCASE_LETTER</code>
5537      * <li> <code>TITLECASE_LETTER</code>
5538      * <li> <code>MODIFIER_LETTER</code>
5539      * <li> <code>OTHER_LETTER</code>
5540      * </ul>
5541      *
5542      * Not all letters have case. Many characters are
5543      * letters but are neither uppercase nor lowercase nor titlecase.
5544      *
5545      * <p><b>Note:</b> This method cannot handle <a
5546      * href="#supplementary"> supplementary characters</a>. To support
5547      * all Unicode characters, including supplementary characters, use
5548      * the {@link #isLetter(int)} method.
5549      *
5550      * @param   ch   the character to be tested.
5551      * @return  <code>true</code> if the character is a letter;
5552      *          <code>false</code> otherwise.
5553      * @see     Character#isDigit(char)
5554      * @see     Character#isJavaIdentifierStart(char)
5555      * @see     Character#isJavaLetter(char)
5556      * @see     Character#isJavaLetterOrDigit(char)
5557      * @see     Character#isLetterOrDigit(char)
5558      * @see     Character#isLowerCase(char)
5559      * @see     Character#isTitleCase(char)
5560      * @see     Character#isUnicodeIdentifierStart(char)
5561      * @see     Character#isUpperCase(char)
5562      */
5563     public static boolean isLetter(char ch) {
5564         return isLetter((int)ch);
5565     }
5566 
5567     /**
5568      * Determines if the specified character (Unicode code point) is a letter.
5569      * <p>
5570      * A character is considered to be a letter if its general
5571      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5572      * is any of the following:
5573      * <ul>
5574      * <li> <code>UPPERCASE_LETTER</code>
5575      * <li> <code>LOWERCASE_LETTER</code>
5576      * <li> <code>TITLECASE_LETTER</code>
5577      * <li> <code>MODIFIER_LETTER</code>
5578      * <li> <code>OTHER_LETTER</code>
5579      * </ul>
5580      *
5581      * Not all letters have case. Many characters are
5582      * letters but are neither uppercase nor lowercase nor titlecase.
5583      *
5584      * @param   codePoint the character (Unicode code point) to be tested.
5585      * @return  <code>true</code> if the character is a letter;
5586      *          <code>false</code> otherwise.
5587      * @see     Character#isDigit(int)
5588      * @see     Character#isJavaIdentifierStart(int)
5589      * @see     Character#isLetterOrDigit(int)
5590      * @see     Character#isLowerCase(int)
5591      * @see     Character#isTitleCase(int)
5592      * @see     Character#isUnicodeIdentifierStart(int)
5593      * @see     Character#isUpperCase(int)
5594      * @since   1.5
5595      */
5596     public static boolean isLetter(int codePoint) {
5597         return ((((1 << Character.UPPERCASE_LETTER) |
5598             (1 << Character.LOWERCASE_LETTER) |
5599             (1 << Character.TITLECASE_LETTER) |
5600             (1 << Character.MODIFIER_LETTER) |
5601             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5602             != 0;
5603     }
5604 
5605     /**
5606      * Determines if the specified character is a letter or digit.
5607      * <p>
5608      * A character is considered to be a letter or digit if either
5609      * <code>Character.isLetter(char ch)</code> or
5610      * <code>Character.isDigit(char ch)</code> returns
5611      * <code>true</code> for the character.
5612      *
5613      * <p><b>Note:</b> This method cannot handle <a
5614      * href="#supplementary"> supplementary characters</a>. To support
5615      * all Unicode characters, including supplementary characters, use
5616      * the {@link #isLetterOrDigit(int)} method.
5617      *
5618      * @param   ch   the character to be tested.
5619      * @return  <code>true</code> if the character is a letter or digit;
5620      *          <code>false</code> otherwise.
5621      * @see     Character#isDigit(char)
5622      * @see     Character#isJavaIdentifierPart(char)
5623      * @see     Character#isJavaLetter(char)
5624      * @see     Character#isJavaLetterOrDigit(char)
5625      * @see     Character#isLetter(char)
5626      * @see     Character#isUnicodeIdentifierPart(char)
5627      * @since   1.0.2
5628      */
5629     public static boolean isLetterOrDigit(char ch) {
5630         return isLetterOrDigit((int)ch);
5631     }
5632 
5633     /**
5634      * Determines if the specified character (Unicode code point) is a letter or digit.
5635      * <p>
5636      * A character is considered to be a letter or digit if either
5637      * {@link #isLetter(int) isLetter(codePoint)} or
5638      * {@link #isDigit(int) isDigit(codePoint)} returns
5639      * <code>true</code> for the character.
5640      *
5641      * @param   codePoint the character (Unicode code point) to be tested.
5642      * @return  <code>true</code> if the character is a letter or digit;
5643      *          <code>false</code> otherwise.
5644      * @see     Character#isDigit(int)
5645      * @see     Character#isJavaIdentifierPart(int)
5646      * @see     Character#isLetter(int)
5647      * @see     Character#isUnicodeIdentifierPart(int)
5648      * @since   1.5
5649      */
5650     public static boolean isLetterOrDigit(int codePoint) {
5651         return ((((1 << Character.UPPERCASE_LETTER) |
5652             (1 << Character.LOWERCASE_LETTER) |
5653             (1 << Character.TITLECASE_LETTER) |
5654             (1 << Character.MODIFIER_LETTER) |
5655             (1 << Character.OTHER_LETTER) |
5656             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5657             != 0;
5658     }
5659 
5660     /**
5661      * Determines if the specified character is permissible as the first
5662      * character in a Java identifier.
5663      * <p>
5664      * A character may start a Java identifier if and only if
5665      * one of the following is true:
5666      * <ul>
5667      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5668      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
5669      * <li> ch is a currency symbol (such as "$")
5670      * <li> ch is a connecting punctuation character (such as "_").
5671      * </ul>
5672      *
5673      * @param   ch the character to be tested.
5674      * @return  <code>true</code> if the character may start a Java
5675      *          identifier; <code>false</code> otherwise.
5676      * @see     Character#isJavaLetterOrDigit(char)
5677      * @see     Character#isJavaIdentifierStart(char)
5678      * @see     Character#isJavaIdentifierPart(char)
5679      * @see     Character#isLetter(char)
5680      * @see     Character#isLetterOrDigit(char)
5681      * @see     Character#isUnicodeIdentifierStart(char)
5682      * @since   1.02
5683      * @deprecated Replaced by isJavaIdentifierStart(char).
5684      */
5685     @Deprecated
5686     public static boolean isJavaLetter(char ch) {
5687         return isJavaIdentifierStart(ch);
5688     }
5689 
5690     /**
5691      * Determines if the specified character may be part of a Java
5692      * identifier as other than the first character.
5693      * <p>
5694      * A character may be part of a Java identifier if and only if any
5695      * of the following are true:
5696      * <ul>
5697      * <li>  it is a letter
5698      * <li>  it is a currency symbol (such as <code>'$'</code>)
5699      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5700      * <li>  it is a digit
5701      * <li>  it is a numeric letter (such as a Roman numeral character)
5702      * <li>  it is a combining mark
5703      * <li>  it is a non-spacing mark
5704      * <li> <code>isIdentifierIgnorable</code> returns
5705      * <code>true</code> for the character.
5706      * </ul>
5707      *
5708      * @param   ch the character to be tested.
5709      * @return  <code>true</code> if the character may be part of a
5710      *          Java identifier; <code>false</code> otherwise.
5711      * @see     Character#isJavaLetter(char)
5712      * @see     Character#isJavaIdentifierStart(char)
5713      * @see     Character#isJavaIdentifierPart(char)
5714      * @see     Character#isLetter(char)
5715      * @see     Character#isLetterOrDigit(char)
5716      * @see     Character#isUnicodeIdentifierPart(char)
5717      * @see     Character#isIdentifierIgnorable(char)
5718      * @since   1.02
5719      * @deprecated Replaced by isJavaIdentifierPart(char).
5720      */
5721     @Deprecated
5722     public static boolean isJavaLetterOrDigit(char ch) {
5723         return isJavaIdentifierPart(ch);
5724     }
5725 
5726     /**
5727      * Determines if the specified character is
5728      * permissible as the first character in a Java identifier.
5729      * <p>
5730      * A character may start a Java identifier if and only if
5731      * one of the following conditions is true:
5732      * <ul>
5733      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5734      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
5735      * <li> ch is a currency symbol (such as "$")
5736      * <li> ch is a connecting punctuation character (such as "_").
5737      * </ul>
5738      *
5739      * <p><b>Note:</b> This method cannot handle <a
5740      * href="#supplementary"> supplementary characters</a>. To support
5741      * all Unicode characters, including supplementary characters, use
5742      * the {@link #isJavaIdentifierStart(int)} method.
5743      *
5744      * @param   ch the character to be tested.
5745      * @return  <code>true</code> if the character may start a Java identifier;
5746      *          <code>false</code> otherwise.
5747      * @see     Character#isJavaIdentifierPart(char)
5748      * @see     Character#isLetter(char)
5749      * @see     Character#isUnicodeIdentifierStart(char)
5750      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5751      * @since   1.1
5752      */
5753     public static boolean isJavaIdentifierStart(char ch) {
5754         return isJavaIdentifierStart((int)ch);
5755     }
5756 
5757     /**
5758      * Determines if the character (Unicode code point) is
5759      * permissible as the first character in a Java identifier.
5760      * <p>
5761      * A character may start a Java identifier if and only if
5762      * one of the following conditions is true:
5763      * <ul>
5764      * <li> {@link #isLetter(int) isLetter(codePoint)}
5765      *      returns <code>true</code>
5766      * <li> {@link #getType(int) getType(codePoint)}
5767      *      returns <code>LETTER_NUMBER</code>
5768      * <li> the referenced character is a currency symbol (such as "$")
5769      * <li> the referenced character is a connecting punctuation character
5770      *      (such as "_").
5771      * </ul>
5772      *
5773      * @param   codePoint the character (Unicode code point) to be tested.
5774      * @return  <code>true</code> if the character may start a Java identifier;
5775      *          <code>false</code> otherwise.
5776      * @see     Character#isJavaIdentifierPart(int)
5777      * @see     Character#isLetter(int)
5778      * @see     Character#isUnicodeIdentifierStart(int)
5779      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5780      * @since   1.5
5781      */
5782     public static boolean isJavaIdentifierStart(int codePoint) {
5783         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
5784     }
5785 
5786     /**
5787      * Determines if the specified character may be part of a Java
5788      * identifier as other than the first character.
5789      * <p>
5790      * A character may be part of a Java identifier if any of the following
5791      * are true:
5792      * <ul>
5793      * <li>  it is a letter
5794      * <li>  it is a currency symbol (such as <code>'$'</code>)
5795      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5796      * <li>  it is a digit
5797      * <li>  it is a numeric letter (such as a Roman numeral character)
5798      * <li>  it is a combining mark
5799      * <li>  it is a non-spacing mark
5800      * <li> <code>isIdentifierIgnorable</code> returns
5801      * <code>true</code> for the character
5802      * </ul>
5803      *
5804      * <p><b>Note:</b> This method cannot handle <a
5805      * href="#supplementary"> supplementary characters</a>. To support
5806      * all Unicode characters, including supplementary characters, use
5807      * the {@link #isJavaIdentifierPart(int)} method.
5808      *
5809      * @param   ch      the character to be tested.
5810      * @return <code>true</code> if the character may be part of a
5811      *          Java identifier; <code>false</code> otherwise.
5812      * @see     Character#isIdentifierIgnorable(char)
5813      * @see     Character#isJavaIdentifierStart(char)
5814      * @see     Character#isLetterOrDigit(char)
5815      * @see     Character#isUnicodeIdentifierPart(char)
5816      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5817      * @since   1.1
5818      */
5819     public static boolean isJavaIdentifierPart(char ch) {
5820         return isJavaIdentifierPart((int)ch);
5821     }
5822 
5823     /**
5824      * Determines if the character (Unicode code point) may be part of a Java
5825      * identifier as other than the first character.
5826      * <p>
5827      * A character may be part of a Java identifier if any of the following
5828      * are true:
5829      * <ul>
5830      * <li>  it is a letter
5831      * <li>  it is a currency symbol (such as <code>'$'</code>)
5832      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5833      * <li>  it is a digit
5834      * <li>  it is a numeric letter (such as a Roman numeral character)
5835      * <li>  it is a combining mark
5836      * <li>  it is a non-spacing mark
5837      * <li> {@link #isIdentifierIgnorable(int)
5838      * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
5839      * the character
5840      * </ul>
5841      *
5842      * @param   codePoint the character (Unicode code point) to be tested.
5843      * @return <code>true</code> if the character may be part of a
5844      *          Java identifier; <code>false</code> otherwise.
5845      * @see     Character#isIdentifierIgnorable(int)
5846      * @see     Character#isJavaIdentifierStart(int)
5847      * @see     Character#isLetterOrDigit(int)
5848      * @see     Character#isUnicodeIdentifierPart(int)
5849      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5850      * @since   1.5
5851      */
5852     public static boolean isJavaIdentifierPart(int codePoint) {
5853         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
5854     }
5855 
5856     /**
5857      * Determines if the specified character is permissible as the
5858      * first character in a Unicode identifier.
5859      * <p>
5860      * A character may start a Unicode identifier if and only if
5861      * one of the following conditions is true:
5862      * <ul>
5863      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5864      * <li> {@link #getType(char) getType(ch)} returns
5865      *      <code>LETTER_NUMBER</code>.
5866      * </ul>
5867      *
5868      * <p><b>Note:</b> This method cannot handle <a
5869      * href="#supplementary"> supplementary characters</a>. To support
5870      * all Unicode characters, including supplementary characters, use
5871      * the {@link #isUnicodeIdentifierStart(int)} method.
5872      *
5873      * @param   ch      the character to be tested.
5874      * @return  <code>true</code> if the character may start a Unicode
5875      *          identifier; <code>false</code> otherwise.
5876      * @see     Character#isJavaIdentifierStart(char)
5877      * @see     Character#isLetter(char)
5878      * @see     Character#isUnicodeIdentifierPart(char)
5879      * @since   1.1
5880      */
5881     public static boolean isUnicodeIdentifierStart(char ch) {
5882         return isUnicodeIdentifierStart((int)ch);
5883     }
5884 
5885     /**
5886      * Determines if the specified character (Unicode code point) is permissible as the
5887      * first character in a Unicode identifier.
5888      * <p>
5889      * A character may start a Unicode identifier if and only if
5890      * one of the following conditions is true:
5891      * <ul>
5892      * <li> {@link #isLetter(int) isLetter(codePoint)}
5893      *      returns <code>true</code>
5894      * <li> {@link #getType(int) getType(codePoint)}
5895      *      returns <code>LETTER_NUMBER</code>.
5896      * </ul>
5897      * @param   codePoint the character (Unicode code point) to be tested.
5898      * @return  <code>true</code> if the character may start a Unicode
5899      *          identifier; <code>false</code> otherwise.
5900      * @see     Character#isJavaIdentifierStart(int)
5901      * @see     Character#isLetter(int)
5902      * @see     Character#isUnicodeIdentifierPart(int)
5903      * @since   1.5
5904      */
5905     public static boolean isUnicodeIdentifierStart(int codePoint) {
5906         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
5907     }
5908 
5909     /**
5910      * Determines if the specified character may be part of a Unicode
5911      * identifier as other than the first character.
5912      * <p>
5913      * A character may be part of a Unicode identifier if and only if
5914      * one of the following statements is true:
5915      * <ul>
5916      * <li>  it is a letter
5917      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5918      * <li>  it is a digit
5919      * <li>  it is a numeric letter (such as a Roman numeral character)
5920      * <li>  it is a combining mark
5921      * <li>  it is a non-spacing mark
5922      * <li> <code>isIdentifierIgnorable</code> returns
5923      * <code>true</code> for this character.
5924      * </ul>
5925      *
5926      * <p><b>Note:</b> This method cannot handle <a
5927      * href="#supplementary"> supplementary characters</a>. To support
5928      * all Unicode characters, including supplementary characters, use
5929      * the {@link #isUnicodeIdentifierPart(int)} method.
5930      *
5931      * @param   ch      the character to be tested.
5932      * @return  <code>true</code> if the character may be part of a
5933      *          Unicode identifier; <code>false</code> otherwise.
5934      * @see     Character#isIdentifierIgnorable(char)
5935      * @see     Character#isJavaIdentifierPart(char)
5936      * @see     Character#isLetterOrDigit(char)
5937      * @see     Character#isUnicodeIdentifierStart(char)
5938      * @since   1.1
5939      */
5940     public static boolean isUnicodeIdentifierPart(char ch) {
5941         return isUnicodeIdentifierPart((int)ch);
5942     }
5943 
5944     /**
5945      * Determines if the specified character (Unicode code point) may be part of a Unicode
5946      * identifier as other than the first character.
5947      * <p>
5948      * A character may be part of a Unicode identifier if and only if
5949      * one of the following statements is true:
5950      * <ul>
5951      * <li>  it is a letter
5952      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5953      * <li>  it is a digit
5954      * <li>  it is a numeric letter (such as a Roman numeral character)
5955      * <li>  it is a combining mark
5956      * <li>  it is a non-spacing mark
5957      * <li> <code>isIdentifierIgnorable</code> returns
5958      * <code>true</code> for this character.
5959      * </ul>
5960      * @param   codePoint the character (Unicode code point) to be tested.
5961      * @return  <code>true</code> if the character may be part of a
5962      *          Unicode identifier; <code>false</code> otherwise.
5963      * @see     Character#isIdentifierIgnorable(int)
5964      * @see     Character#isJavaIdentifierPart(int)
5965      * @see     Character#isLetterOrDigit(int)
5966      * @see     Character#isUnicodeIdentifierStart(int)
5967      * @since   1.5
5968      */
5969     public static boolean isUnicodeIdentifierPart(int codePoint) {
5970         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
5971     }
5972 
5973     /**
5974      * Determines if the specified character should be regarded as
5975      * an ignorable character in a Java identifier or a Unicode identifier.
5976      * <p>
5977      * The following Unicode characters are ignorable in a Java identifier
5978      * or a Unicode identifier:
5979      * <ul>
5980      * <li>ISO control characters that are not whitespace
5981      * <ul>
5982      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
5983      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
5984      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
5985      * </ul>
5986      *
5987      * <li>all characters that have the <code>FORMAT</code> general
5988      * category value
5989      * </ul>
5990      *
5991      * <p><b>Note:</b> This method cannot handle <a
5992      * href="#supplementary"> supplementary characters</a>. To support
5993      * all Unicode characters, including supplementary characters, use
5994      * the {@link #isIdentifierIgnorable(int)} method.
5995      *
5996      * @param   ch      the character to be tested.
5997      * @return  <code>true</code> if the character is an ignorable control
5998      *          character that may be part of a Java or Unicode identifier;
5999      *           <code>false</code> otherwise.
6000      * @see     Character#isJavaIdentifierPart(char)
6001      * @see     Character#isUnicodeIdentifierPart(char)
6002      * @since   1.1
6003      */
6004     public static boolean isIdentifierIgnorable(char ch) {
6005         return isIdentifierIgnorable((int)ch);
6006     }
6007 
6008     /**
6009      * Determines if the specified character (Unicode code point) should be regarded as
6010      * an ignorable character in a Java identifier or a Unicode identifier.
6011      * <p>
6012      * The following Unicode characters are ignorable in a Java identifier
6013      * or a Unicode identifier:
6014      * <ul>
6015      * <li>ISO control characters that are not whitespace
6016      * <ul>
6017      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
6018      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
6019      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
6020      * </ul>
6021      *
6022      * <li>all characters that have the <code>FORMAT</code> general
6023      * category value
6024      * </ul>
6025      *
6026      * @param   codePoint the character (Unicode code point) to be tested.
6027      * @return  <code>true</code> if the character is an ignorable control
6028      *          character that may be part of a Java or Unicode identifier;
6029      *          <code>false</code> otherwise.
6030      * @see     Character#isJavaIdentifierPart(int)
6031      * @see     Character#isUnicodeIdentifierPart(int)
6032      * @since   1.5
6033      */
6034     public static boolean isIdentifierIgnorable(int codePoint) {
6035         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6036     }
6037 
6038     /**
6039      * Converts the character argument to lowercase using case
6040      * mapping information from the UnicodeData file.
6041      * <p>
6042      * Note that
6043      * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
6044      * does not always return <code>true</code> for some ranges of
6045      * characters, particularly those that are symbols or ideographs.
6046      *
6047      * <p>In general, {@link String#toLowerCase()} should be used to map
6048      * characters to lowercase. <code>String</code> case mapping methods
6049      * have several benefits over <code>Character</code> case mapping methods.
6050      * <code>String</code> case mapping methods can perform locale-sensitive
6051      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6052      * the <code>Character</code> case mapping methods cannot.
6053      *
6054      * <p><b>Note:</b> This method cannot handle <a
6055      * href="#supplementary"> supplementary characters</a>. To support
6056      * all Unicode characters, including supplementary characters, use
6057      * the {@link #toLowerCase(int)} method.
6058      *
6059      * @param   ch   the character to be converted.
6060      * @return  the lowercase equivalent of the character, if any;
6061      *          otherwise, the character itself.
6062      * @see     Character#isLowerCase(char)
6063      * @see     String#toLowerCase()
6064      */
6065     public static char toLowerCase(char ch) {
6066         return (char)toLowerCase((int)ch);
6067     }
6068 
6069     /**
6070      * Converts the character (Unicode code point) argument to
6071      * lowercase using case mapping information from the UnicodeData
6072      * file.
6073      *
6074      * <p> Note that
6075      * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
6076      * does not always return <code>true</code> for some ranges of
6077      * characters, particularly those that are symbols or ideographs.
6078      *
6079      * <p>In general, {@link String#toLowerCase()} should be used to map
6080      * characters to lowercase. <code>String</code> case mapping methods
6081      * have several benefits over <code>Character</code> case mapping methods.
6082      * <code>String</code> case mapping methods can perform locale-sensitive
6083      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6084      * the <code>Character</code> case mapping methods cannot.
6085      *
6086      * @param   codePoint   the character (Unicode code point) to be converted.
6087      * @return  the lowercase equivalent of the character (Unicode code
6088      *          point), if any; otherwise, the character itself.
6089      * @see     Character#isLowerCase(int)
6090      * @see     String#toLowerCase()
6091      *
6092      * @since   1.5
6093      */
6094     public static int toLowerCase(int codePoint) {
6095         return CharacterData.of(codePoint).toLowerCase(codePoint);
6096     }
6097 
6098     /**
6099      * Converts the character argument to uppercase using case mapping
6100      * information from the UnicodeData file.
6101      * <p>
6102      * Note that
6103      * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
6104      * does not always return <code>true</code> for some ranges of
6105      * characters, particularly those that are symbols or ideographs.
6106      *
6107      * <p>In general, {@link String#toUpperCase()} should be used to map
6108      * characters to uppercase. <code>String</code> case mapping methods
6109      * have several benefits over <code>Character</code> case mapping methods.
6110      * <code>String</code> case mapping methods can perform locale-sensitive
6111      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6112      * the <code>Character</code> case mapping methods cannot.
6113      *
6114      * <p><b>Note:</b> This method cannot handle <a
6115      * href="#supplementary"> supplementary characters</a>. To support
6116      * all Unicode characters, including supplementary characters, use
6117      * the {@link #toUpperCase(int)} method.
6118      *
6119      * @param   ch   the character to be converted.
6120      * @return  the uppercase equivalent of the character, if any;
6121      *          otherwise, the character itself.
6122      * @see     Character#isUpperCase(char)
6123      * @see     String#toUpperCase()
6124      */
6125     public static char toUpperCase(char ch) {
6126         return (char)toUpperCase((int)ch);
6127     }
6128 
6129     /**
6130      * Converts the character (Unicode code point) argument to
6131      * uppercase using case mapping information from the UnicodeData
6132      * file.
6133      *
6134      * <p>Note that
6135      * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
6136      * does not always return <code>true</code> for some ranges of
6137      * characters, particularly those that are symbols or ideographs.
6138      *
6139      * <p>In general, {@link String#toUpperCase()} should be used to map
6140      * characters to uppercase. <code>String</code> case mapping methods
6141      * have several benefits over <code>Character</code> case mapping methods.
6142      * <code>String</code> case mapping methods can perform locale-sensitive
6143      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6144      * the <code>Character</code> case mapping methods cannot.
6145      *
6146      * @param   codePoint   the character (Unicode code point) to be converted.
6147      * @return  the uppercase equivalent of the character, if any;
6148      *          otherwise, the character itself.
6149      * @see     Character#isUpperCase(int)
6150      * @see     String#toUpperCase()
6151      *
6152      * @since   1.5
6153      */
6154     public static int toUpperCase(int codePoint) {
6155         return CharacterData.of(codePoint).toUpperCase(codePoint);
6156     }
6157 
6158     /**
6159      * Converts the character argument to titlecase using case mapping
6160      * information from the UnicodeData file. If a character has no
6161      * explicit titlecase mapping and is not itself a titlecase char
6162      * according to UnicodeData, then the uppercase mapping is
6163      * returned as an equivalent titlecase mapping. If the
6164      * <code>char</code> argument is already a titlecase
6165      * <code>char</code>, the same <code>char</code> value will be
6166      * returned.
6167      * <p>
6168      * Note that
6169      * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
6170      * does not always return <code>true</code> for some ranges of
6171      * characters.
6172      *
6173      * <p><b>Note:</b> This method cannot handle <a
6174      * href="#supplementary"> supplementary characters</a>. To support
6175      * all Unicode characters, including supplementary characters, use
6176      * the {@link #toTitleCase(int)} method.
6177      *
6178      * @param   ch   the character to be converted.
6179      * @return  the titlecase equivalent of the character, if any;
6180      *          otherwise, the character itself.
6181      * @see     Character#isTitleCase(char)
6182      * @see     Character#toLowerCase(char)
6183      * @see     Character#toUpperCase(char)
6184      * @since   1.0.2
6185      */
6186     public static char toTitleCase(char ch) {
6187         return (char)toTitleCase((int)ch);
6188     }
6189 
6190     /**
6191      * Converts the character (Unicode code point) argument to titlecase using case mapping
6192      * information from the UnicodeData file. If a character has no
6193      * explicit titlecase mapping and is not itself a titlecase char
6194      * according to UnicodeData, then the uppercase mapping is
6195      * returned as an equivalent titlecase mapping. If the
6196      * character argument is already a titlecase
6197      * character, the same character value will be
6198      * returned.
6199      *
6200      * <p>Note that
6201      * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
6202      * does not always return <code>true</code> for some ranges of
6203      * characters.
6204      *
6205      * @param   codePoint   the character (Unicode code point) to be converted.
6206      * @return  the titlecase equivalent of the character, if any;
6207      *          otherwise, the character itself.
6208      * @see     Character#isTitleCase(int)
6209      * @see     Character#toLowerCase(int)
6210      * @see     Character#toUpperCase(int)
6211      * @since   1.5
6212      */
6213     public static int toTitleCase(int codePoint) {
6214         return CharacterData.of(codePoint).toTitleCase(codePoint);
6215     }
6216 
6217     /**
6218      * Returns the numeric value of the character <code>ch</code> in the
6219      * specified radix.
6220      * <p>
6221      * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
6222      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
6223      * value of <code>ch</code> is not a valid digit in the specified
6224      * radix, <code>-1</code> is returned. A character is a valid digit
6225      * if at least one of the following is true:
6226      * <ul>
6227      * <li>The method <code>isDigit</code> is <code>true</code> of the character
6228      *     and the Unicode decimal digit value of the character (or its
6229      *     single-character decomposition) is less than the specified radix.
6230      *     In this case the decimal digit value is returned.
6231      * <li>The character is one of the uppercase Latin letters
6232      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
6233      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
6234      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
6235      *     is returned.
6236      * <li>The character is one of the lowercase Latin letters
6237      *     <code>'a'</code> through <code>'z'</code> and its code is less than
6238      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
6239      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
6240      *     is returned.
6241      * </ul>
6242      *
6243      * <p><b>Note:</b> This method cannot handle <a
6244      * href="#supplementary"> supplementary characters</a>. To support
6245      * all Unicode characters, including supplementary characters, use
6246      * the {@link #digit(int, int)} method.
6247      *
6248      * @param   ch      the character to be converted.
6249      * @param   radix   the radix.
6250      * @return  the numeric value represented by the character in the
6251      *          specified radix.
6252      * @see     Character#forDigit(int, int)
6253      * @see     Character#isDigit(char)
6254      */
6255     public static int digit(char ch, int radix) {
6256         return digit((int)ch, radix);
6257     }
6258 
6259     /**
6260      * Returns the numeric value of the specified character (Unicode
6261      * code point) in the specified radix.
6262      *
6263      * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
6264      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
6265      * character is not a valid digit in the specified
6266      * radix, <code>-1</code> is returned. A character is a valid digit
6267      * if at least one of the following is true:
6268      * <ul>
6269      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
6270      *     and the Unicode decimal digit value of the character (or its
6271      *     single-character decomposition) is less than the specified radix.
6272      *     In this case the decimal digit value is returned.
6273      * <li>The character is one of the uppercase Latin letters
6274      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
6275      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
6276      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
6277      *     is returned.
6278      * <li>The character is one of the lowercase Latin letters
6279      *     <code>'a'</code> through <code>'z'</code> and its code is less than
6280      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
6281      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
6282      *     is returned.
6283      * </ul>
6284      *
6285      * @param   codePoint the character (Unicode code point) to be converted.
6286      * @param   radix   the radix.
6287      * @return  the numeric value represented by the character in the
6288      *          specified radix.
6289      * @see     Character#forDigit(int, int)
6290      * @see     Character#isDigit(int)
6291      * @since   1.5
6292      */
6293     public static int digit(int codePoint, int radix) {
6294         return CharacterData.of(codePoint).digit(codePoint, radix);
6295     }
6296 
6297     /**
6298      * Returns the <code>int</code> value that the specified Unicode
6299      * character represents. For example, the character
6300      * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
6301      * an int with a value of 50.
6302      * <p>
6303      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
6304      * <code>'&#92;u005A'</code>), lowercase
6305      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
6306      * full width variant (<code>'&#92;uFF21'</code> through
6307      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
6308      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
6309      * through 35. This is independent of the Unicode specification,
6310      * which does not assign numeric values to these <code>char</code>
6311      * values.
6312      * <p>
6313      * If the character does not have a numeric value, then -1 is returned.
6314      * If the character has a numeric value that cannot be represented as a
6315      * nonnegative integer (for example, a fractional value), then -2
6316      * is returned.
6317      *
6318      * <p><b>Note:</b> This method cannot handle <a
6319      * href="#supplementary"> supplementary characters</a>. To support
6320      * all Unicode characters, including supplementary characters, use
6321      * the {@link #getNumericValue(int)} method.
6322      *
6323      * @param   ch      the character to be converted.
6324      * @return  the numeric value of the character, as a nonnegative <code>int</code>
6325      *           value; -2 if the character has a numeric value that is not a
6326      *          nonnegative integer; -1 if the character has no numeric value.
6327      * @see     Character#forDigit(int, int)
6328      * @see     Character#isDigit(char)
6329      * @since   1.1
6330      */
6331     public static int getNumericValue(char ch) {
6332         return getNumericValue((int)ch);
6333     }
6334 
6335     /**
6336      * Returns the <code>int</code> value that the specified
6337      * character (Unicode code point) represents. For example, the character
6338      * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
6339      * an <code>int</code> with a value of 50.
6340      * <p>
6341      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
6342      * <code>'&#92;u005A'</code>), lowercase
6343      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
6344      * full width variant (<code>'&#92;uFF21'</code> through
6345      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
6346      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
6347      * through 35. This is independent of the Unicode specification,
6348      * which does not assign numeric values to these <code>char</code>
6349      * values.
6350      * <p>
6351      * If the character does not have a numeric value, then -1 is returned.
6352      * If the character has a numeric value that cannot be represented as a
6353      * nonnegative integer (for example, a fractional value), then -2
6354      * is returned.
6355      *
6356      * @param   codePoint the character (Unicode code point) to be converted.
6357      * @return  the numeric value of the character, as a nonnegative <code>int</code>
6358      *          value; -2 if the character has a numeric value that is not a
6359      *          nonnegative integer; -1 if the character has no numeric value.
6360      * @see     Character#forDigit(int, int)
6361      * @see     Character#isDigit(int)
6362      * @since   1.5
6363      */
6364     public static int getNumericValue(int codePoint) {
6365         return CharacterData.of(codePoint).getNumericValue(codePoint);
6366     }
6367 
6368     /**
6369      * Determines if the specified character is ISO-LATIN-1 white space.
6370      * This method returns <code>true</code> for the following five
6371      * characters only:
6372      * <table>
6373      * <tr><td><code>'\t'</code></td>            <td><code>U+0009</code></td>
6374      *     <td><code>HORIZONTAL TABULATION</code></td></tr>
6375      * <tr><td><code>'\n'</code></td>            <td><code>U+000A</code></td>
6376      *     <td><code>NEW LINE</code></td></tr>
6377      * <tr><td><code>'\f'</code></td>            <td><code>U+000C</code></td>
6378      *     <td><code>FORM FEED</code></td></tr>
6379      * <tr><td><code>'\r'</code></td>            <td><code>U+000D</code></td>
6380      *     <td><code>CARRIAGE RETURN</code></td></tr>
6381      * <tr><td><code>'&nbsp;'</code></td>  <td><code>U+0020</code></td>
6382      *     <td><code>SPACE</code></td></tr>
6383      * </table>
6384      *
6385      * @param      ch   the character to be tested.
6386      * @return     <code>true</code> if the character is ISO-LATIN-1 white
6387      *             space; <code>false</code> otherwise.
6388      * @see        Character#isSpaceChar(char)
6389      * @see        Character#isWhitespace(char)
6390      * @deprecated Replaced by isWhitespace(char).
6391      */
6392     @Deprecated
6393     public static boolean isSpace(char ch) {
6394         return (ch <= 0x0020) &&
6395             (((((1L << 0x0009) |
6396             (1L << 0x000A) |
6397             (1L << 0x000C) |
6398             (1L << 0x000D) |
6399             (1L << 0x0020)) >> ch) & 1L) != 0);
6400     }
6401 
6402 
6403     /**
6404      * Determines if the specified character is a Unicode space character.
6405      * A character is considered to be a space character if and only if
6406      * it is specified to be a space character by the Unicode standard. This
6407      * method returns true if the character's general category type is any of
6408      * the following:
6409      * <ul>
6410      * <li> <code>SPACE_SEPARATOR</code>
6411      * <li> <code>LINE_SEPARATOR</code>
6412      * <li> <code>PARAGRAPH_SEPARATOR</code>
6413      * </ul>
6414      *
6415      * <p><b>Note:</b> This method cannot handle <a
6416      * href="#supplementary"> supplementary characters</a>. To support
6417      * all Unicode characters, including supplementary characters, use
6418      * the {@link #isSpaceChar(int)} method.
6419      *
6420      * @param   ch      the character to be tested.
6421      * @return  <code>true</code> if the character is a space character;
6422      *          <code>false</code> otherwise.
6423      * @see     Character#isWhitespace(char)
6424      * @since   1.1
6425      */
6426     public static boolean isSpaceChar(char ch) {
6427         return isSpaceChar((int)ch);
6428     }
6429 
6430     /**
6431      * Determines if the specified character (Unicode code point) is a
6432      * Unicode space character.  A character is considered to be a
6433      * space character if and only if it is specified to be a space
6434      * character by the Unicode standard. This method returns true if
6435      * the character's general category type is any of the following:
6436      *
6437      * <ul>
6438      * <li> {@link #SPACE_SEPARATOR}
6439      * <li> {@link #LINE_SEPARATOR}
6440      * <li> {@link #PARAGRAPH_SEPARATOR}
6441      * </ul>
6442      *
6443      * @param   codePoint the character (Unicode code point) to be tested.
6444      * @return  <code>true</code> if the character is a space character;
6445      *          <code>false</code> otherwise.
6446      * @see     Character#isWhitespace(int)
6447      * @since   1.5
6448      */
6449     public static boolean isSpaceChar(int codePoint) {
6450         return ((((1 << Character.SPACE_SEPARATOR) |
6451                   (1 << Character.LINE_SEPARATOR) |
6452                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6453             != 0;
6454     }
6455 
6456     /**
6457      * Determines if the specified character is white space according to Java.
6458      * A character is a Java whitespace character if and only if it satisfies
6459      * one of the following criteria:
6460      * <ul>
6461      * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
6462      *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
6463      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
6464      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
6465      * <li> It is <code>'&#92;t'</code>, U+0009 HORIZONTAL TABULATION.
6466      * <li> It is <code>'&#92;n'</code>, U+000A LINE FEED.
6467      * <li> It is <code>'&#92;u000B'</code>, U+000B VERTICAL TABULATION.
6468      * <li> It is <code>'&#92;f'</code>, U+000C FORM FEED.
6469      * <li> It is <code>'&#92;r'</code>, U+000D CARRIAGE RETURN.
6470      * <li> It is <code>'&#92;u001C'</code>, U+001C FILE SEPARATOR.
6471      * <li> It is <code>'&#92;u001D'</code>, U+001D GROUP SEPARATOR.
6472      * <li> It is <code>'&#92;u001E'</code>, U+001E RECORD SEPARATOR.
6473      * <li> It is <code>'&#92;u001F'</code>, U+001F UNIT SEPARATOR.
6474      * </ul>
6475      *
6476      * <p><b>Note:</b> This method cannot handle <a
6477      * href="#supplementary"> supplementary characters</a>. To support
6478      * all Unicode characters, including supplementary characters, use
6479      * the {@link #isWhitespace(int)} method.
6480      *
6481      * @param   ch the character to be tested.
6482      * @return  <code>true</code> if the character is a Java whitespace
6483      *          character; <code>false</code> otherwise.
6484      * @see     Character#isSpaceChar(char)
6485      * @since   1.1
6486      */
6487     public static boolean isWhitespace(char ch) {
6488         return isWhitespace((int)ch);
6489     }
6490 
6491     /**
6492      * Determines if the specified character (Unicode code point) is
6493      * white space according to Java.  A character is a Java
6494      * whitespace character if and only if it satisfies one of the
6495      * following criteria:
6496      * <ul>
6497      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6498      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6499      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
6500      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
6501      * <li> It is <code>'&#92;t'</code>, U+0009 HORIZONTAL TABULATION.
6502      * <li> It is <code>'&#92;n'</code>, U+000A LINE FEED.
6503      * <li> It is <code>'&#92;u000B'</code>, U+000B VERTICAL TABULATION.
6504      * <li> It is <code>'&#92;f'</code>, U+000C FORM FEED.
6505      * <li> It is <code>'&#92;r'</code>, U+000D CARRIAGE RETURN.
6506      * <li> It is <code>'&#92;u001C'</code>, U+001C FILE SEPARATOR.
6507      * <li> It is <code>'&#92;u001D'</code>, U+001D GROUP SEPARATOR.
6508      * <li> It is <code>'&#92;u001E'</code>, U+001E RECORD SEPARATOR.
6509      * <li> It is <code>'&#92;u001F'</code>, U+001F UNIT SEPARATOR.
6510      * </ul>
6511      * <p>
6512      *
6513      * @param   codePoint the character (Unicode code point) to be tested.
6514      * @return  <code>true</code> if the character is a Java whitespace
6515      *          character; <code>false</code> otherwise.
6516      * @see     Character#isSpaceChar(int)
6517      * @since   1.5
6518      */
6519     public static boolean isWhitespace(int codePoint) {
6520         return CharacterData.of(codePoint).isWhitespace(codePoint);
6521     }
6522 
6523     /**
6524      * Determines if the specified character is an ISO control
6525      * character.  A character is considered to be an ISO control
6526      * character if its code is in the range <code>'&#92;u0000'</code>
6527      * through <code>'&#92;u001F'</code> or in the range
6528      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
6529      *
6530      * <p><b>Note:</b> This method cannot handle <a
6531      * href="#supplementary"> supplementary characters</a>. To support
6532      * all Unicode characters, including supplementary characters, use
6533      * the {@link #isISOControl(int)} method.
6534      *
6535      * @param   ch      the character to be tested.
6536      * @return  <code>true</code> if the character is an ISO control character;
6537      *          <code>false</code> otherwise.
6538      *
6539      * @see     Character#isSpaceChar(char)
6540      * @see     Character#isWhitespace(char)
6541      * @since   1.1
6542      */
6543     public static boolean isISOControl(char ch) {
6544         return isISOControl((int)ch);
6545     }
6546 
6547     /**
6548      * Determines if the referenced character (Unicode code point) is an ISO control
6549      * character.  A character is considered to be an ISO control
6550      * character if its code is in the range <code>'&#92;u0000'</code>
6551      * through <code>'&#92;u001F'</code> or in the range
6552      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
6553      *
6554      * @param   codePoint the character (Unicode code point) to be tested.
6555      * @return  <code>true</code> if the character is an ISO control character;
6556      *          <code>false</code> otherwise.
6557      * @see     Character#isSpaceChar(int)
6558      * @see     Character#isWhitespace(int)
6559      * @since   1.5
6560      */
6561     public static boolean isISOControl(int codePoint) {
6562         // Optimized form of:
6563         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6564         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6565         return codePoint <= 0x9F &&
6566             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6567     }
6568 
6569     /**
6570      * Returns a value indicating a character's general category.
6571      *
6572      * <p><b>Note:</b> This method cannot handle <a
6573      * href="#supplementary"> supplementary characters</a>. To support
6574      * all Unicode characters, including supplementary characters, use
6575      * the {@link #getType(int)} method.
6576      *
6577      * @param   ch      the character to be tested.
6578      * @return  a value of type <code>int</code> representing the
6579      *          character's general category.
6580      * @see     Character#COMBINING_SPACING_MARK
6581      * @see     Character#CONNECTOR_PUNCTUATION
6582      * @see     Character#CONTROL
6583      * @see     Character#CURRENCY_SYMBOL
6584      * @see     Character#DASH_PUNCTUATION
6585      * @see     Character#DECIMAL_DIGIT_NUMBER
6586      * @see     Character#ENCLOSING_MARK
6587      * @see     Character#END_PUNCTUATION
6588      * @see     Character#FINAL_QUOTE_PUNCTUATION
6589      * @see     Character#FORMAT
6590      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6591      * @see     Character#LETTER_NUMBER
6592      * @see     Character#LINE_SEPARATOR
6593      * @see     Character#LOWERCASE_LETTER
6594      * @see     Character#MATH_SYMBOL
6595      * @see     Character#MODIFIER_LETTER
6596      * @see     Character#MODIFIER_SYMBOL
6597      * @see     Character#NON_SPACING_MARK
6598      * @see     Character#OTHER_LETTER
6599      * @see     Character#OTHER_NUMBER
6600      * @see     Character#OTHER_PUNCTUATION
6601      * @see     Character#OTHER_SYMBOL
6602      * @see     Character#PARAGRAPH_SEPARATOR
6603      * @see     Character#PRIVATE_USE
6604      * @see     Character#SPACE_SEPARATOR
6605      * @see     Character#START_PUNCTUATION
6606      * @see     Character#SURROGATE
6607      * @see     Character#TITLECASE_LETTER
6608      * @see     Character#UNASSIGNED
6609      * @see     Character#UPPERCASE_LETTER
6610      * @since   1.1
6611      */
6612     public static int getType(char ch) {
6613         return getType((int)ch);
6614     }
6615 
6616     /**
6617      * Returns a value indicating a character's general category.
6618      *
6619      * @param   codePoint the character (Unicode code point) to be tested.
6620      * @return  a value of type <code>int</code> representing the
6621      *          character's general category.
6622      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6623      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6624      * @see     Character#CONTROL CONTROL
6625      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6626      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6627      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6628      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6629      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6630      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6631      * @see     Character#FORMAT FORMAT
6632      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6633      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6634      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6635      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6636      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6637      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6638      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6639      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6640      * @see     Character#OTHER_LETTER OTHER_LETTER
6641      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6642      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6643      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6644      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6645      * @see     Character#PRIVATE_USE PRIVATE_USE
6646      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6647      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6648      * @see     Character#SURROGATE SURROGATE
6649      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6650      * @see     Character#UNASSIGNED UNASSIGNED
6651      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6652      * @since   1.5
6653      */
6654     public static int getType(int codePoint) {
6655         return CharacterData.of(codePoint).getType(codePoint);
6656     }
6657 
6658     /**
6659      * Determines the character representation for a specific digit in
6660      * the specified radix. If the value of <code>radix</code> is not a
6661      * valid radix, or the value of <code>digit</code> is not a valid
6662      * digit in the specified radix, the null character
6663      * (<code>'&#92;u0000'</code>) is returned.
6664      * <p>
6665      * The <code>radix</code> argument is valid if it is greater than or
6666      * equal to <code>MIN_RADIX</code> and less than or equal to
6667      * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
6668      * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
6669      * <p>
6670      * If the digit is less than 10, then
6671      * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
6672      * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
6673      *
6674      * @param   digit   the number to convert to a character.
6675      * @param   radix   the radix.
6676      * @return  the <code>char</code> representation of the specified digit
6677      *          in the specified radix.
6678      * @see     Character#MIN_RADIX
6679      * @see     Character#MAX_RADIX
6680      * @see     Character#digit(char, int)
6681      */
6682     public static char forDigit(int digit, int radix) {
6683         if ((digit >= radix) || (digit < 0)) {
6684             return '\0';
6685         }
6686         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6687             return '\0';
6688         }
6689         if (digit < 10) {
6690             return (char)('0' + digit);
6691         }
6692         return (char)('a' - 10 + digit);
6693     }
6694 
6695     /**
6696      * Returns the Unicode directionality property for the given
6697      * character.  Character directionality is used to calculate the
6698      * visual ordering of text. The directionality value of undefined
6699      * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
6700      *
6701      * <p><b>Note:</b> This method cannot handle <a
6702      * href="#supplementary"> supplementary characters</a>. To support
6703      * all Unicode characters, including supplementary characters, use
6704      * the {@link #getDirectionality(int)} method.
6705      *
6706      * @param  ch <code>char</code> for which the directionality property
6707      *            is requested.
6708      * @return the directionality property of the <code>char</code> value.
6709      *
6710      * @see Character#DIRECTIONALITY_UNDEFINED
6711      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6712      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6713      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6714      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6715      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6716      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6717      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6718      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6719      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6720      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6721      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6722      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6723      * @see Character#DIRECTIONALITY_WHITESPACE
6724      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6725      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6726      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6727      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6728      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6729      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6730      * @since 1.4
6731      */
6732     public static byte getDirectionality(char ch) {
6733         return getDirectionality((int)ch);
6734     }
6735 
6736     /**
6737      * Returns the Unicode directionality property for the given
6738      * character (Unicode code point).  Character directionality is
6739      * used to calculate the visual ordering of text. The
6740      * directionality value of undefined character is {@link
6741      * #DIRECTIONALITY_UNDEFINED}.
6742      *
6743      * @param   codePoint the character (Unicode code point) for which
6744      *          the directionality property is requested.
6745      * @return the directionality property of the character.
6746      *
6747      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
6748      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
6749      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
6750      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6751      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
6752      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6753      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6754      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
6755      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6756      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
6757      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
6758      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
6759      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
6760      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
6761      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
6762      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6763      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6764      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6765      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6766      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6767      * @since    1.5
6768      */
6769     public static byte getDirectionality(int codePoint) {
6770         return CharacterData.of(codePoint).getDirectionality(codePoint);
6771     }
6772 
6773     /**
6774      * Determines whether the character is mirrored according to the
6775      * Unicode specification.  Mirrored characters should have their
6776      * glyphs horizontally mirrored when displayed in text that is
6777      * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
6778      * PARENTHESIS is semantically defined to be an <i>opening
6779      * parenthesis</i>.  This will appear as a "(" in text that is
6780      * left-to-right but as a ")" in text that is right-to-left.
6781      *
6782      * <p><b>Note:</b> This method cannot handle <a
6783      * href="#supplementary"> supplementary characters</a>. To support
6784      * all Unicode characters, including supplementary characters, use
6785      * the {@link #isMirrored(int)} method.
6786      *
6787      * @param  ch <code>char</code> for which the mirrored property is requested
6788      * @return <code>true</code> if the char is mirrored, <code>false</code>
6789      *         if the <code>char</code> is not mirrored or is not defined.
6790      * @since 1.4
6791      */
6792     public static boolean isMirrored(char ch) {
6793         return isMirrored((int)ch);
6794     }
6795 
6796     /**
6797      * Determines whether the specified character (Unicode code point)
6798      * is mirrored according to the Unicode specification.  Mirrored
6799      * characters should have their glyphs horizontally mirrored when
6800      * displayed in text that is right-to-left.  For example,
6801      * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
6802      * defined to be an <i>opening parenthesis</i>.  This will appear
6803      * as a "(" in text that is left-to-right but as a ")" in text
6804      * that is right-to-left.
6805      *
6806      * @param   codePoint the character (Unicode code point) to be tested.
6807      * @return  <code>true</code> if the character is mirrored, <code>false</code>
6808      *          if the character is not mirrored or is not defined.
6809      * @since   1.5
6810      */
6811     public static boolean isMirrored(int codePoint) {
6812         return CharacterData.of(codePoint).isMirrored(codePoint);
6813     }
6814 
6815     /**
6816      * Compares two <code>Character</code> objects numerically.
6817      *
6818      * @param   anotherCharacter   the <code>Character</code> to be compared.
6819 
6820      * @return  the value <code>0</code> if the argument <code>Character</code>
6821      *          is equal to this <code>Character</code>; a value less than
6822      *          <code>0</code> if this <code>Character</code> is numerically less
6823      *          than the <code>Character</code> argument; and a value greater than
6824      *          <code>0</code> if this <code>Character</code> is numerically greater
6825      *          than the <code>Character</code> argument (unsigned comparison).
6826      *          Note that this is strictly a numerical comparison; it is not
6827      *          locale-dependent.
6828      * @since   1.2
6829      */
6830     public int compareTo(Character anotherCharacter) {
6831         return compare(this.value, anotherCharacter.value);
6832     }
6833 
6834     /**
6835      * Compares two {@code char} values numerically.
6836      * The value returned is identical to what would be returned by:
6837      * <pre>
6838      *    Character.valueOf(x).compareTo(Character.valueOf(y))
6839      * </pre>
6840      *
6841      * @param  x the first {@code char} to compare
6842      * @param  y the second {@code char} to compare
6843      * @return the value {@code 0} if {@code x == y};
6844      *         a value less than {@code 0} if {@code x < y}; and
6845      *         a value greater than {@code 0} if {@code x > y}
6846      * @since 1.7
6847      */
6848     public static int compare(char x, char y) {
6849         return x - y;
6850     }
6851 
6852     /**
6853      * Converts the character (Unicode code point) argument to uppercase using
6854      * information from the UnicodeData file.
6855      * <p>
6856      *
6857      * @param   codePoint   the character (Unicode code point) to be converted.
6858      * @return  either the uppercase equivalent of the character, if
6859      *          any, or an error flag (<code>Character.ERROR</code>)
6860      *          that indicates that a 1:M <code>char</code> mapping exists.
6861      * @see     Character#isLowerCase(char)
6862      * @see     Character#isUpperCase(char)
6863      * @see     Character#toLowerCase(char)
6864      * @see     Character#toTitleCase(char)
6865      * @since 1.4
6866      */
6867     static int toUpperCaseEx(int codePoint) {
6868         assert isValidCodePoint(codePoint);
6869         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
6870     }
6871 
6872     /**
6873      * Converts the character (Unicode code point) argument to uppercase using case
6874      * mapping information from the SpecialCasing file in the Unicode
6875      * specification. If a character has no explicit uppercase
6876      * mapping, then the <code>char</code> itself is returned in the
6877      * <code>char[]</code>.
6878      *
6879      * @param   codePoint   the character (Unicode code point) to be converted.
6880      * @return a <code>char[]</code> with the uppercased character.
6881      * @since 1.4
6882      */
6883     static char[] toUpperCaseCharArray(int codePoint) {
6884         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
6885         assert isBmpCodePoint(codePoint);
6886         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
6887     }
6888 
6889     /**
6890      * The number of bits used to represent a <tt>char</tt> value in unsigned
6891      * binary form, constant {@code 16}.
6892      *
6893      * @since 1.5
6894      */
6895     public static final int SIZE = 16;
6896 
6897     /**
6898      * Returns the value obtained by reversing the order of the bytes in the
6899      * specified <tt>char</tt> value.
6900      *
6901      * @return the value obtained by reversing (or, equivalently, swapping)
6902      *     the bytes in the specified <tt>char</tt> value.
6903      * @since 1.5
6904      */
6905     public static char reverseBytes(char ch) {
6906         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
6907     }
6908 
6909     /**
6910      * Returns the Unicode name of the specified character
6911      * <code>codePoint</code>, or null if the code point is
6912      * {@link #UNASSIGNED unassigned}.
6913      * <p>
6914      * Note: if the specified character is not assigned a name by
6915      * the <i>UnicodeData</i> file (part of the Unicode Character
6916      * Database maintained by the Unicode Consortium), the returned
6917      * name is the same as the result of expression
6918      *
6919      * <blockquote><code>
6920      *     Character.UnicodeBlock.of(codePoint)
6921      *                           .toString()
6922      *                           .replace('_', ' ')
6923      *     + " "
6924      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6925      *
6926      * </code></blockquote>
6927      *
6928      * @param  codePoint the character (Unicode code point)
6929      *
6930      * @return the Unicode name of the specified character, or null if
6931      *         the code point is unassigned.
6932      *
6933      * @exception IllegalArgumentException if the specified
6934      *            <code>codePoint</code> is not a valid Unicode
6935      *            code point.
6936      *
6937      * @since 1.7
6938      */
6939     public static String getName(int codePoint) {
6940         if (!isValidCodePoint(codePoint)) {
6941             throw new IllegalArgumentException();
6942         }
6943         String name = CharacterName.get(codePoint);
6944         if (name != null)
6945             return name;
6946         if (getType(codePoint) == UNASSIGNED)
6947             return null;
6948         UnicodeBlock block = UnicodeBlock.of(codePoint);
6949         if (block != null)
6950             return block.toString().replace('_', ' ') + " "
6951                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6952         // should never come here
6953         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6954     }
6955 }