New src/share/classes/java/lang/Character.java

   1 /*
   2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The <code>Character</code> class wraps a value of the primitive
  35  * type <code>char</code> in an object. An object of type
  36  * <code>Character</code> contains a single field whose type is
  37  * <code>char</code>.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.0.0.
  44  * <p>
  45  * The methods and data of class <code>Character</code> are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  58  *
  59  * <p>The <code>char</code> data type (and therefore the value that a
  60  * <code>Character</code> object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in <code>char</code> arrays and
  76  * in the <code>String</code> and <code>StringBuffer</code> classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of <code>char</code> values, the first from the <em>high-surrogates</em>
  79  * range, (&#92;uD800-&#92;uDBFF), the second from the
  80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
  81  *
  82  * <p>A <code>char</code> value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * <code>int</code> value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of <code>int</code> are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate <code>char</code> values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a <code>char</code> value cannot support
  95  * supplementary characters. They treat <code>char</code> values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * <code>Character.isLetter('&#92;uD840')</code> returns <code>false</code>, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an <code>int</code> value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, <code>Character.isLetter(0x2F81A)</code> returns
 104  * <code>true</code> because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * <code>char</code> values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * <code>digit</code> method, the <code>forDigit</code>
 129      * method, and the <code>toString</code> method of class
 130      * <code>Integer</code>.
 131      *
 132      * @see     Character#digit(char, int)
 133      * @see     Character#forDigit(int, int)
 134      * @see     Integer#toString(int, int)
 135      * @see     Integer#valueOf(String)
 136      */
 137     public static final int MIN_RADIX = 2;
 138 
 139     /**
 140      * The maximum radix available for conversion to and from strings.
 141      * The constant value of this field is the largest value permitted
 142      * for the radix argument in radix-conversion methods such as the
 143      * <code>digit</code> method, the <code>forDigit</code>
 144      * method, and the <code>toString</code> method of class
 145      * <code>Integer</code>.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * <code>char</code>, <code>'&#92;u0000'</code>.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * <code>char</code>, <code>'&#92;uFFFF'</code>.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The <code>Class</code> instance representing the primitive type
 172      * <code>char</code>.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined <code>char</code>
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * The minimum value of a
 496      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 497      * Unicode high-surrogate code unit</a>
 498      * in the UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 499      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 500      *
 501      * @since 1.5
 502      */
 503     public static final char MIN_HIGH_SURROGATE = '\uD800';
 504 
 505     /**
 506      * The maximum value of a
 507      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 508      * Unicode high-surrogate code unit</a>
 509      * in the UTF-16 encoding, constant <code>'&#92;uDBFF'</code>.
 510      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 511      *
 512      * @since 1.5
 513      */
 514     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 515 
 516     /**
 517      * The minimum value of a
 518      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 519      * Unicode low-surrogate code unit</a>
 520      * in the UTF-16 encoding, constant <code>'&#92;uDC00'</code>.
 521      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 522      *
 523      * @since 1.5
 524      */
 525     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 526 
 527     /**
 528      * The maximum value of a
 529      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 530      * Unicode low-surrogate code unit</a>
 531      * in the UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 532      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 533      *
 534      * @since 1.5
 535      */
 536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 537 
 538     /**
 539      * The minimum value of a Unicode surrogate code unit in the
 540      * UTF-16 encoding, constant <code>'&#92;uD800'</code>.
 541      *
 542      * @since 1.5
 543      */
 544     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 545 
 546     /**
 547      * The maximum value of a Unicode surrogate code unit in the
 548      * UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
 549      *
 550      * @since 1.5
 551      */
 552     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 553 
 554     /**
 555      * The minimum value of a
 556      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 557      * Unicode supplementary code point</a>, constant {@code U+10000}.
 558      *
 559      * @since 1.5
 560      */
 561     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 562 
 563     /**
 564      * The minimum value of a
 565      * <a href="http://www.unicode.org/glossary/#code_point">
 566      * Unicode code point</a>, constant {@code U+0000}.
 567      *
 568      * @since 1.5
 569      */
 570     public static final int MIN_CODE_POINT = 0x000000;
 571 
 572     /**
 573      * The maximum value of a
 574      * <a href="http://www.unicode.org/glossary/#code_point">
 575      * Unicode code point</a>, constant {@code U+10FFFF}.
 576      *
 577      * @since 1.5
 578      */
 579     public static final int MAX_CODE_POINT = 0X10FFFF;
 580 
 581 
 582     /**
 583      * Instances of this class represent particular subsets of the Unicode
 584      * character set.  The only family of subsets defined in the
 585      * <code>Character</code> class is {@link Character.UnicodeBlock}.
 586      * Other portions of the Java API may define other subsets for their
 587      * own purposes.
 588      *
 589      * @since 1.2
 590      */
 591     public static class Subset  {
 592 
 593         private String name;
 594 
 595         /**
 596          * Constructs a new <code>Subset</code> instance.
 597          *
 598          * @param  name  The name of this subset
 599          * @exception NullPointerException if name is <code>null</code>
 600          */
 601         protected Subset(String name) {
 602             if (name == null) {
 603                 throw new NullPointerException("name");
 604             }
 605             this.name = name;
 606         }
 607 
 608         /**
 609          * Compares two <code>Subset</code> objects for equality.
 610          * This method returns <code>true</code> if and only if
 611          * <code>this</code> and the argument refer to the same
 612          * object; since this method is <code>final</code>, this
 613          * guarantee holds for all subclasses.
 614          */
 615         public final boolean equals(Object obj) {
 616             return (this == obj);
 617         }
 618 
 619         /**
 620          * Returns the standard hash code as defined by the
 621          * <code>{@link Object#hashCode}</code> method.  This method
 622          * is <code>final</code> in order to ensure that the
 623          * <code>equals</code> and <code>hashCode</code> methods will
 624          * be consistent in all subclasses.
 625          */
 626         public final int hashCode() {
 627             return super.hashCode();
 628         }
 629 
 630         /**
 631          * Returns the name of this subset.
 632          */
 633         public final String toString() {
 634             return name;
 635         }
 636     }
 637 
 638     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 639     // for the latest specification of Unicode Blocks.
 640 
 641     /**
 642      * A family of character subsets representing the character blocks in the
 643      * Unicode specification. Character blocks generally define characters
 644      * used for a specific script or purpose. A character is contained by
 645      * at most one Unicode block.
 646      *
 647      * @since 1.2
 648      */
 649     public static final class UnicodeBlock extends Subset {
 650 
 651         private static Map<String, UnicodeBlock> map
 652             = new HashMap<>(256);
 653 
 654         /**
 655          * Creates a UnicodeBlock with the given identifier name.
 656          * This name must be the same as the block identifier.
 657          */
 658         private UnicodeBlock(String idName) {
 659             super(idName);
 660             map.put(idName, this);
 661         }
 662 
 663         /**
 664          * Creates a UnicodeBlock with the given identifier name and
 665          * alias name.
 666          */
 667         private UnicodeBlock(String idName, String alias) {
 668             this(idName);
 669             map.put(alias, this);
 670         }
 671 
 672         /**
 673          * Creates a UnicodeBlock with the given identifier name and
 674          * alias names.
 675          */
 676         private UnicodeBlock(String idName, String... aliases) {
 677             this(idName);
 678             for (String alias : aliases)
 679                 map.put(alias, this);
 680         }
 681 
 682         /**
 683          * Constant for the "Basic Latin" Unicode character block.
 684          * @since 1.2
 685          */
 686         public static final UnicodeBlock  BASIC_LATIN =
 687             new UnicodeBlock("BASIC_LATIN",
 688                              "BASIC LATIN",
 689                              "BASICLATIN");
 690 
 691         /**
 692          * Constant for the "Latin-1 Supplement" Unicode character block.
 693          * @since 1.2
 694          */
 695         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 696             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 697                              "LATIN-1 SUPPLEMENT",
 698                              "LATIN-1SUPPLEMENT");
 699 
 700         /**
 701          * Constant for the "Latin Extended-A" Unicode character block.
 702          * @since 1.2
 703          */
 704         public static final UnicodeBlock LATIN_EXTENDED_A =
 705             new UnicodeBlock("LATIN_EXTENDED_A",
 706                              "LATIN EXTENDED-A",
 707                              "LATINEXTENDED-A");
 708 
 709         /**
 710          * Constant for the "Latin Extended-B" Unicode character block.
 711          * @since 1.2
 712          */
 713         public static final UnicodeBlock LATIN_EXTENDED_B =
 714             new UnicodeBlock("LATIN_EXTENDED_B",
 715                              "LATIN EXTENDED-B",
 716                              "LATINEXTENDED-B");
 717 
 718         /**
 719          * Constant for the "IPA Extensions" Unicode character block.
 720          * @since 1.2
 721          */
 722         public static final UnicodeBlock IPA_EXTENSIONS =
 723             new UnicodeBlock("IPA_EXTENSIONS",
 724                              "IPA EXTENSIONS",
 725                              "IPAEXTENSIONS");
 726 
 727         /**
 728          * Constant for the "Spacing Modifier Letters" Unicode character block.
 729          * @since 1.2
 730          */
 731         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 732             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 733                              "SPACING MODIFIER LETTERS",
 734                              "SPACINGMODIFIERLETTERS");
 735 
 736         /**
 737          * Constant for the "Combining Diacritical Marks" Unicode character block.
 738          * @since 1.2
 739          */
 740         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 741             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 742                              "COMBINING DIACRITICAL MARKS",
 743                              "COMBININGDIACRITICALMARKS");
 744 
 745         /**
 746          * Constant for the "Greek and Coptic" Unicode character block.
 747          * <p>
 748          * This block was previously known as the "Greek" block.
 749          *
 750          * @since 1.2
 751          */
 752         public static final UnicodeBlock GREEK =
 753             new UnicodeBlock("GREEK",
 754                              "GREEK AND COPTIC",
 755                              "GREEKANDCOPTIC");
 756 
 757         /**
 758          * Constant for the "Cyrillic" Unicode character block.
 759          * @since 1.2
 760          */
 761         public static final UnicodeBlock CYRILLIC =
 762             new UnicodeBlock("CYRILLIC");
 763 
 764         /**
 765          * Constant for the "Armenian" Unicode character block.
 766          * @since 1.2
 767          */
 768         public static final UnicodeBlock ARMENIAN =
 769             new UnicodeBlock("ARMENIAN");
 770 
 771         /**
 772          * Constant for the "Hebrew" Unicode character block.
 773          * @since 1.2
 774          */
 775         public static final UnicodeBlock HEBREW =
 776             new UnicodeBlock("HEBREW");
 777 
 778         /**
 779          * Constant for the "Arabic" Unicode character block.
 780          * @since 1.2
 781          */
 782         public static final UnicodeBlock ARABIC =
 783             new UnicodeBlock("ARABIC");
 784 
 785         /**
 786          * Constant for the "Devanagari" Unicode character block.
 787          * @since 1.2
 788          */
 789         public static final UnicodeBlock DEVANAGARI =
 790             new UnicodeBlock("DEVANAGARI");
 791 
 792         /**
 793          * Constant for the "Bengali" Unicode character block.
 794          * @since 1.2
 795          */
 796         public static final UnicodeBlock BENGALI =
 797             new UnicodeBlock("BENGALI");
 798 
 799         /**
 800          * Constant for the "Gurmukhi" Unicode character block.
 801          * @since 1.2
 802          */
 803         public static final UnicodeBlock GURMUKHI =
 804             new UnicodeBlock("GURMUKHI");
 805 
 806         /**
 807          * Constant for the "Gujarati" Unicode character block.
 808          * @since 1.2
 809          */
 810         public static final UnicodeBlock GUJARATI =
 811             new UnicodeBlock("GUJARATI");
 812 
 813         /**
 814          * Constant for the "Oriya" Unicode character block.
 815          * @since 1.2
 816          */
 817         public static final UnicodeBlock ORIYA =
 818             new UnicodeBlock("ORIYA");
 819 
 820         /**
 821          * Constant for the "Tamil" Unicode character block.
 822          * @since 1.2
 823          */
 824         public static final UnicodeBlock TAMIL =
 825             new UnicodeBlock("TAMIL");
 826 
 827         /**
 828          * Constant for the "Telugu" Unicode character block.
 829          * @since 1.2
 830          */
 831         public static final UnicodeBlock TELUGU =
 832             new UnicodeBlock("TELUGU");
 833 
 834         /**
 835          * Constant for the "Kannada" Unicode character block.
 836          * @since 1.2
 837          */
 838         public static final UnicodeBlock KANNADA =
 839             new UnicodeBlock("KANNADA");
 840 
 841         /**
 842          * Constant for the "Malayalam" Unicode character block.
 843          * @since 1.2
 844          */
 845         public static final UnicodeBlock MALAYALAM =
 846             new UnicodeBlock("MALAYALAM");
 847 
 848         /**
 849          * Constant for the "Thai" Unicode character block.
 850          * @since 1.2
 851          */
 852         public static final UnicodeBlock THAI =
 853             new UnicodeBlock("THAI");
 854 
 855         /**
 856          * Constant for the "Lao" Unicode character block.
 857          * @since 1.2
 858          */
 859         public static final UnicodeBlock LAO =
 860             new UnicodeBlock("LAO");
 861 
 862         /**
 863          * Constant for the "Tibetan" Unicode character block.
 864          * @since 1.2
 865          */
 866         public static final UnicodeBlock TIBETAN =
 867             new UnicodeBlock("TIBETAN");
 868 
 869         /**
 870          * Constant for the "Georgian" Unicode character block.
 871          * @since 1.2
 872          */
 873         public static final UnicodeBlock GEORGIAN =
 874             new UnicodeBlock("GEORGIAN");
 875 
 876         /**
 877          * Constant for the "Hangul Jamo" Unicode character block.
 878          * @since 1.2
 879          */
 880         public static final UnicodeBlock HANGUL_JAMO =
 881             new UnicodeBlock("HANGUL_JAMO",
 882                              "HANGUL JAMO",
 883                              "HANGULJAMO");
 884 
 885         /**
 886          * Constant for the "Latin Extended Additional" Unicode character block.
 887          * @since 1.2
 888          */
 889         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 890             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 891                              "LATIN EXTENDED ADDITIONAL",
 892                              "LATINEXTENDEDADDITIONAL");
 893 
 894         /**
 895          * Constant for the "Greek Extended" Unicode character block.
 896          * @since 1.2
 897          */
 898         public static final UnicodeBlock GREEK_EXTENDED =
 899             new UnicodeBlock("GREEK_EXTENDED",
 900                              "GREEK EXTENDED",
 901                              "GREEKEXTENDED");
 902 
 903         /**
 904          * Constant for the "General Punctuation" Unicode character block.
 905          * @since 1.2
 906          */
 907         public static final UnicodeBlock GENERAL_PUNCTUATION =
 908             new UnicodeBlock("GENERAL_PUNCTUATION",
 909                              "GENERAL PUNCTUATION",
 910                              "GENERALPUNCTUATION");
 911 
 912         /**
 913          * Constant for the "Superscripts and Subscripts" Unicode character
 914          * block.
 915          * @since 1.2
 916          */
 917         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 918             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 919                              "SUPERSCRIPTS AND SUBSCRIPTS",
 920                              "SUPERSCRIPTSANDSUBSCRIPTS");
 921 
 922         /**
 923          * Constant for the "Currency Symbols" Unicode character block.
 924          * @since 1.2
 925          */
 926         public static final UnicodeBlock CURRENCY_SYMBOLS =
 927             new UnicodeBlock("CURRENCY_SYMBOLS",
 928                              "CURRENCY SYMBOLS",
 929                              "CURRENCYSYMBOLS");
 930 
 931         /**
 932          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 933          * character block.
 934          * <p>
 935          * This block was previously known as "Combining Marks for Symbols".
 936          * @since 1.2
 937          */
 938         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 939             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 940                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 941                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 942                              "COMBINING MARKS FOR SYMBOLS",
 943                              "COMBININGMARKSFORSYMBOLS");
 944 
 945         /**
 946          * Constant for the "Letterlike Symbols" Unicode character block.
 947          * @since 1.2
 948          */
 949         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 950             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 951                              "LETTERLIKE SYMBOLS",
 952                              "LETTERLIKESYMBOLS");
 953 
 954         /**
 955          * Constant for the "Number Forms" Unicode character block.
 956          * @since 1.2
 957          */
 958         public static final UnicodeBlock NUMBER_FORMS =
 959             new UnicodeBlock("NUMBER_FORMS",
 960                              "NUMBER FORMS",
 961                              "NUMBERFORMS");
 962 
 963         /**
 964          * Constant for the "Arrows" Unicode character block.
 965          * @since 1.2
 966          */
 967         public static final UnicodeBlock ARROWS =
 968             new UnicodeBlock("ARROWS");
 969 
 970         /**
 971          * Constant for the "Mathematical Operators" Unicode character block.
 972          * @since 1.2
 973          */
 974         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 975             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 976                              "MATHEMATICAL OPERATORS",
 977                              "MATHEMATICALOPERATORS");
 978 
 979         /**
 980          * Constant for the "Miscellaneous Technical" Unicode character block.
 981          * @since 1.2
 982          */
 983         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 984             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 985                              "MISCELLANEOUS TECHNICAL",
 986                              "MISCELLANEOUSTECHNICAL");
 987 
 988         /**
 989          * Constant for the "Control Pictures" Unicode character block.
 990          * @since 1.2
 991          */
 992         public static final UnicodeBlock CONTROL_PICTURES =
 993             new UnicodeBlock("CONTROL_PICTURES",
 994                              "CONTROL PICTURES",
 995                              "CONTROLPICTURES");
 996 
 997         /**
 998          * Constant for the "Optical Character Recognition" Unicode character block.
 999          * @since 1.2
1000          */
1001         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1002             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1003                              "OPTICAL CHARACTER RECOGNITION",
1004                              "OPTICALCHARACTERRECOGNITION");
1005 
1006         /**
1007          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1008          * @since 1.2
1009          */
1010         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1011             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1012                              "ENCLOSED ALPHANUMERICS",
1013                              "ENCLOSEDALPHANUMERICS");
1014 
1015         /**
1016          * Constant for the "Box Drawing" Unicode character block.
1017          * @since 1.2
1018          */
1019         public static final UnicodeBlock BOX_DRAWING =
1020             new UnicodeBlock("BOX_DRAWING",
1021                              "BOX DRAWING",
1022                              "BOXDRAWING");
1023 
1024         /**
1025          * Constant for the "Block Elements" Unicode character block.
1026          * @since 1.2
1027          */
1028         public static final UnicodeBlock BLOCK_ELEMENTS =
1029             new UnicodeBlock("BLOCK_ELEMENTS",
1030                              "BLOCK ELEMENTS",
1031                              "BLOCKELEMENTS");
1032 
1033         /**
1034          * Constant for the "Geometric Shapes" Unicode character block.
1035          * @since 1.2
1036          */
1037         public static final UnicodeBlock GEOMETRIC_SHAPES =
1038             new UnicodeBlock("GEOMETRIC_SHAPES",
1039                              "GEOMETRIC SHAPES",
1040                              "GEOMETRICSHAPES");
1041 
1042         /**
1043          * Constant for the "Miscellaneous Symbols" Unicode character block.
1044          * @since 1.2
1045          */
1046         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1047             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1048                              "MISCELLANEOUS SYMBOLS",
1049                              "MISCELLANEOUSSYMBOLS");
1050 
1051         /**
1052          * Constant for the "Dingbats" Unicode character block.
1053          * @since 1.2
1054          */
1055         public static final UnicodeBlock DINGBATS =
1056             new UnicodeBlock("DINGBATS");
1057 
1058         /**
1059          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1060          * @since 1.2
1061          */
1062         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1063             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1064                              "CJK SYMBOLS AND PUNCTUATION",
1065                              "CJKSYMBOLSANDPUNCTUATION");
1066 
1067         /**
1068          * Constant for the "Hiragana" Unicode character block.
1069          * @since 1.2
1070          */
1071         public static final UnicodeBlock HIRAGANA =
1072             new UnicodeBlock("HIRAGANA");
1073 
1074         /**
1075          * Constant for the "Katakana" Unicode character block.
1076          * @since 1.2
1077          */
1078         public static final UnicodeBlock KATAKANA =
1079             new UnicodeBlock("KATAKANA");
1080 
1081         /**
1082          * Constant for the "Bopomofo" Unicode character block.
1083          * @since 1.2
1084          */
1085         public static final UnicodeBlock BOPOMOFO =
1086             new UnicodeBlock("BOPOMOFO");
1087 
1088         /**
1089          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1090          * @since 1.2
1091          */
1092         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1093             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1094                              "HANGUL COMPATIBILITY JAMO",
1095                              "HANGULCOMPATIBILITYJAMO");
1096 
1097         /**
1098          * Constant for the "Kanbun" Unicode character block.
1099          * @since 1.2
1100          */
1101         public static final UnicodeBlock KANBUN =
1102             new UnicodeBlock("KANBUN");
1103 
1104         /**
1105          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1106          * @since 1.2
1107          */
1108         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1109             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1110                              "ENCLOSED CJK LETTERS AND MONTHS",
1111                              "ENCLOSEDCJKLETTERSANDMONTHS");
1112 
1113         /**
1114          * Constant for the "CJK Compatibility" Unicode character block.
1115          * @since 1.2
1116          */
1117         public static final UnicodeBlock CJK_COMPATIBILITY =
1118             new UnicodeBlock("CJK_COMPATIBILITY",
1119                              "CJK COMPATIBILITY",
1120                              "CJKCOMPATIBILITY");
1121 
1122         /**
1123          * Constant for the "CJK Unified Ideographs" Unicode character block.
1124          * @since 1.2
1125          */
1126         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1127             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1128                              "CJK UNIFIED IDEOGRAPHS",
1129                              "CJKUNIFIEDIDEOGRAPHS");
1130 
1131         /**
1132          * Constant for the "Hangul Syllables" Unicode character block.
1133          * @since 1.2
1134          */
1135         public static final UnicodeBlock HANGUL_SYLLABLES =
1136             new UnicodeBlock("HANGUL_SYLLABLES",
1137                              "HANGUL SYLLABLES",
1138                              "HANGULSYLLABLES");
1139 
1140         /**
1141          * Constant for the "Private Use Area" Unicode character block.
1142          * @since 1.2
1143          */
1144         public static final UnicodeBlock PRIVATE_USE_AREA =
1145             new UnicodeBlock("PRIVATE_USE_AREA",
1146                              "PRIVATE USE AREA",
1147                              "PRIVATEUSEAREA");
1148 
1149         /**
1150          * Constant for the "CJK Compatibility Ideographs" Unicode character
1151          * block.
1152          * @since 1.2
1153          */
1154         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1155             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1156                              "CJK COMPATIBILITY IDEOGRAPHS",
1157                              "CJKCOMPATIBILITYIDEOGRAPHS");
1158 
1159         /**
1160          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1161          * @since 1.2
1162          */
1163         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1164             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1165                              "ALPHABETIC PRESENTATION FORMS",
1166                              "ALPHABETICPRESENTATIONFORMS");
1167 
1168         /**
1169          * Constant for the "Arabic Presentation Forms-A" Unicode character
1170          * block.
1171          * @since 1.2
1172          */
1173         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1174             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1175                              "ARABIC PRESENTATION FORMS-A",
1176                              "ARABICPRESENTATIONFORMS-A");
1177 
1178         /**
1179          * Constant for the "Combining Half Marks" Unicode character block.
1180          * @since 1.2
1181          */
1182         public static final UnicodeBlock COMBINING_HALF_MARKS =
1183             new UnicodeBlock("COMBINING_HALF_MARKS",
1184                              "COMBINING HALF MARKS",
1185                              "COMBININGHALFMARKS");
1186 
1187         /**
1188          * Constant for the "CJK Compatibility Forms" Unicode character block.
1189          * @since 1.2
1190          */
1191         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1192             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1193                              "CJK COMPATIBILITY FORMS",
1194                              "CJKCOMPATIBILITYFORMS");
1195 
1196         /**
1197          * Constant for the "Small Form Variants" Unicode character block.
1198          * @since 1.2
1199          */
1200         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1201             new UnicodeBlock("SMALL_FORM_VARIANTS",
1202                              "SMALL FORM VARIANTS",
1203                              "SMALLFORMVARIANTS");
1204 
1205         /**
1206          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1207          * @since 1.2
1208          */
1209         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1210             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1211                              "ARABIC PRESENTATION FORMS-B",
1212                              "ARABICPRESENTATIONFORMS-B");
1213 
1214         /**
1215          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1216          * block.
1217          * @since 1.2
1218          */
1219         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1220             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1221                              "HALFWIDTH AND FULLWIDTH FORMS",
1222                              "HALFWIDTHANDFULLWIDTHFORMS");
1223 
1224         /**
1225          * Constant for the "Specials" Unicode character block.
1226          * @since 1.2
1227          */
1228         public static final UnicodeBlock SPECIALS =
1229             new UnicodeBlock("SPECIALS");
1230 
1231         /**
1232          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1233          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1234          *             {@link #LOW_SURROGATES}. These new constants match
1235          *             the block definitions of the Unicode Standard.
1236          *             The {@link #of(char)} and {@link #of(int)} methods
1237          *             return the new constants, not SURROGATES_AREA.
1238          */
1239         @Deprecated
1240         public static final UnicodeBlock SURROGATES_AREA =
1241             new UnicodeBlock("SURROGATES_AREA");
1242 
1243         /**
1244          * Constant for the "Syriac" Unicode character block.
1245          * @since 1.4
1246          */
1247         public static final UnicodeBlock SYRIAC =
1248             new UnicodeBlock("SYRIAC");
1249 
1250         /**
1251          * Constant for the "Thaana" Unicode character block.
1252          * @since 1.4
1253          */
1254         public static final UnicodeBlock THAANA =
1255             new UnicodeBlock("THAANA");
1256 
1257         /**
1258          * Constant for the "Sinhala" Unicode character block.
1259          * @since 1.4
1260          */
1261         public static final UnicodeBlock SINHALA =
1262             new UnicodeBlock("SINHALA");
1263 
1264         /**
1265          * Constant for the "Myanmar" Unicode character block.
1266          * @since 1.4
1267          */
1268         public static final UnicodeBlock MYANMAR =
1269             new UnicodeBlock("MYANMAR");
1270 
1271         /**
1272          * Constant for the "Ethiopic" Unicode character block.
1273          * @since 1.4
1274          */
1275         public static final UnicodeBlock ETHIOPIC =
1276             new UnicodeBlock("ETHIOPIC");
1277 
1278         /**
1279          * Constant for the "Cherokee" Unicode character block.
1280          * @since 1.4
1281          */
1282         public static final UnicodeBlock CHEROKEE =
1283             new UnicodeBlock("CHEROKEE");
1284 
1285         /**
1286          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1287          * @since 1.4
1288          */
1289         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1290             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1291                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1292                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1293 
1294         /**
1295          * Constant for the "Ogham" Unicode character block.
1296          * @since 1.4
1297          */
1298         public static final UnicodeBlock OGHAM =
1299             new UnicodeBlock("OGHAM");
1300 
1301         /**
1302          * Constant for the "Runic" Unicode character block.
1303          * @since 1.4
1304          */
1305         public static final UnicodeBlock RUNIC =
1306             new UnicodeBlock("RUNIC");
1307 
1308         /**
1309          * Constant for the "Khmer" Unicode character block.
1310          * @since 1.4
1311          */
1312         public static final UnicodeBlock KHMER =
1313             new UnicodeBlock("KHMER");
1314 
1315         /**
1316          * Constant for the "Mongolian" Unicode character block.
1317          * @since 1.4
1318          */
1319         public static final UnicodeBlock MONGOLIAN =
1320             new UnicodeBlock("MONGOLIAN");
1321 
1322         /**
1323          * Constant for the "Braille Patterns" Unicode character block.
1324          * @since 1.4
1325          */
1326         public static final UnicodeBlock BRAILLE_PATTERNS =
1327             new UnicodeBlock("BRAILLE_PATTERNS",
1328                              "BRAILLE PATTERNS",
1329                              "BRAILLEPATTERNS");
1330 
1331         /**
1332          * Constant for the "CJK Radicals Supplement" Unicode character block.
1333          * @since 1.4
1334          */
1335         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1336             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1337                              "CJK RADICALS SUPPLEMENT",
1338                              "CJKRADICALSSUPPLEMENT");
1339 
1340         /**
1341          * Constant for the "Kangxi Radicals" Unicode character block.
1342          * @since 1.4
1343          */
1344         public static final UnicodeBlock KANGXI_RADICALS =
1345             new UnicodeBlock("KANGXI_RADICALS",
1346                              "KANGXI RADICALS",
1347                              "KANGXIRADICALS");
1348 
1349         /**
1350          * Constant for the "Ideographic Description Characters" Unicode character block.
1351          * @since 1.4
1352          */
1353         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1354             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1355                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1356                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1357 
1358         /**
1359          * Constant for the "Bopomofo Extended" Unicode character block.
1360          * @since 1.4
1361          */
1362         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1363             new UnicodeBlock("BOPOMOFO_EXTENDED",
1364                              "BOPOMOFO EXTENDED",
1365                              "BOPOMOFOEXTENDED");
1366 
1367         /**
1368          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1369          * @since 1.4
1370          */
1371         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1372             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1373                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1374                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1375 
1376         /**
1377          * Constant for the "Yi Syllables" Unicode character block.
1378          * @since 1.4
1379          */
1380         public static final UnicodeBlock YI_SYLLABLES =
1381             new UnicodeBlock("YI_SYLLABLES",
1382                              "YI SYLLABLES",
1383                              "YISYLLABLES");
1384 
1385         /**
1386          * Constant for the "Yi Radicals" Unicode character block.
1387          * @since 1.4
1388          */
1389         public static final UnicodeBlock YI_RADICALS =
1390             new UnicodeBlock("YI_RADICALS",
1391                              "YI RADICALS",
1392                              "YIRADICALS");
1393 
1394         /**
1395          * Constant for the "Cyrillic Supplementary" Unicode character block.
1396          * @since 1.5
1397          */
1398         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1399             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1400                              "CYRILLIC SUPPLEMENTARY",
1401                              "CYRILLICSUPPLEMENTARY",
1402                              "CYRILLIC SUPPLEMENT",
1403                              "CYRILLICSUPPLEMENT");
1404 
1405         /**
1406          * Constant for the "Tagalog" Unicode character block.
1407          * @since 1.5
1408          */
1409         public static final UnicodeBlock TAGALOG =
1410             new UnicodeBlock("TAGALOG");
1411 
1412         /**
1413          * Constant for the "Hanunoo" Unicode character block.
1414          * @since 1.5
1415          */
1416         public static final UnicodeBlock HANUNOO =
1417             new UnicodeBlock("HANUNOO");
1418 
1419         /**
1420          * Constant for the "Buhid" Unicode character block.
1421          * @since 1.5
1422          */
1423         public static final UnicodeBlock BUHID =
1424             new UnicodeBlock("BUHID");
1425 
1426         /**
1427          * Constant for the "Tagbanwa" Unicode character block.
1428          * @since 1.5
1429          */
1430         public static final UnicodeBlock TAGBANWA =
1431             new UnicodeBlock("TAGBANWA");
1432 
1433         /**
1434          * Constant for the "Limbu" Unicode character block.
1435          * @since 1.5
1436          */
1437         public static final UnicodeBlock LIMBU =
1438             new UnicodeBlock("LIMBU");
1439 
1440         /**
1441          * Constant for the "Tai Le" Unicode character block.
1442          * @since 1.5
1443          */
1444         public static final UnicodeBlock TAI_LE =
1445             new UnicodeBlock("TAI_LE",
1446                              "TAI LE",
1447                              "TAILE");
1448 
1449         /**
1450          * Constant for the "Khmer Symbols" Unicode character block.
1451          * @since 1.5
1452          */
1453         public static final UnicodeBlock KHMER_SYMBOLS =
1454             new UnicodeBlock("KHMER_SYMBOLS",
1455                              "KHMER SYMBOLS",
1456                              "KHMERSYMBOLS");
1457 
1458         /**
1459          * Constant for the "Phonetic Extensions" Unicode character block.
1460          * @since 1.5
1461          */
1462         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1463             new UnicodeBlock("PHONETIC_EXTENSIONS",
1464                              "PHONETIC EXTENSIONS",
1465                              "PHONETICEXTENSIONS");
1466 
1467         /**
1468          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1469          * @since 1.5
1470          */
1471         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1472             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1473                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1474                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1475 
1476         /**
1477          * Constant for the "Supplemental Arrows-A" Unicode character block.
1478          * @since 1.5
1479          */
1480         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1481             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1482                              "SUPPLEMENTAL ARROWS-A",
1483                              "SUPPLEMENTALARROWS-A");
1484 
1485         /**
1486          * Constant for the "Supplemental Arrows-B" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1490             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1491                              "SUPPLEMENTAL ARROWS-B",
1492                              "SUPPLEMENTALARROWS-B");
1493 
1494         /**
1495          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1496          * character block.
1497          * @since 1.5
1498          */
1499         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1500             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1501                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1502                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1503 
1504         /**
1505          * Constant for the "Supplemental Mathematical Operators" Unicode
1506          * character block.
1507          * @since 1.5
1508          */
1509         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1510             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1511                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1512                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1513 
1514         /**
1515          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1516          * block.
1517          * @since 1.5
1518          */
1519         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1520             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1521                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1522                              "MISCELLANEOUSSYMBOLSANDARROWS");
1523 
1524         /**
1525          * Constant for the "Katakana Phonetic Extensions" Unicode character
1526          * block.
1527          * @since 1.5
1528          */
1529         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1530             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1531                              "KATAKANA PHONETIC EXTENSIONS",
1532                              "KATAKANAPHONETICEXTENSIONS");
1533 
1534         /**
1535          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1536          * @since 1.5
1537          */
1538         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1539             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1540                              "YIJING HEXAGRAM SYMBOLS",
1541                              "YIJINGHEXAGRAMSYMBOLS");
1542 
1543         /**
1544          * Constant for the "Variation Selectors" Unicode character block.
1545          * @since 1.5
1546          */
1547         public static final UnicodeBlock VARIATION_SELECTORS =
1548             new UnicodeBlock("VARIATION_SELECTORS",
1549                              "VARIATION SELECTORS",
1550                              "VARIATIONSELECTORS");
1551 
1552         /**
1553          * Constant for the "Linear B Syllabary" Unicode character block.
1554          * @since 1.5
1555          */
1556         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1557             new UnicodeBlock("LINEAR_B_SYLLABARY",
1558                              "LINEAR B SYLLABARY",
1559                              "LINEARBSYLLABARY");
1560 
1561         /**
1562          * Constant for the "Linear B Ideograms" Unicode character block.
1563          * @since 1.5
1564          */
1565         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1566             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1567                              "LINEAR B IDEOGRAMS",
1568                              "LINEARBIDEOGRAMS");
1569 
1570         /**
1571          * Constant for the "Aegean Numbers" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock AEGEAN_NUMBERS =
1575             new UnicodeBlock("AEGEAN_NUMBERS",
1576                              "AEGEAN NUMBERS",
1577                              "AEGEANNUMBERS");
1578 
1579         /**
1580          * Constant for the "Old Italic" Unicode character block.
1581          * @since 1.5
1582          */
1583         public static final UnicodeBlock OLD_ITALIC =
1584             new UnicodeBlock("OLD_ITALIC",
1585                              "OLD ITALIC",
1586                              "OLDITALIC");
1587 
1588         /**
1589          * Constant for the "Gothic" Unicode character block.
1590          * @since 1.5
1591          */
1592         public static final UnicodeBlock GOTHIC =
1593             new UnicodeBlock("GOTHIC");
1594 
1595         /**
1596          * Constant for the "Ugaritic" Unicode character block.
1597          * @since 1.5
1598          */
1599         public static final UnicodeBlock UGARITIC =
1600             new UnicodeBlock("UGARITIC");
1601 
1602         /**
1603          * Constant for the "Deseret" Unicode character block.
1604          * @since 1.5
1605          */
1606         public static final UnicodeBlock DESERET =
1607             new UnicodeBlock("DESERET");
1608 
1609         /**
1610          * Constant for the "Shavian" Unicode character block.
1611          * @since 1.5
1612          */
1613         public static final UnicodeBlock SHAVIAN =
1614             new UnicodeBlock("SHAVIAN");
1615 
1616         /**
1617          * Constant for the "Osmanya" Unicode character block.
1618          * @since 1.5
1619          */
1620         public static final UnicodeBlock OSMANYA =
1621             new UnicodeBlock("OSMANYA");
1622 
1623         /**
1624          * Constant for the "Cypriot Syllabary" Unicode character block.
1625          * @since 1.5
1626          */
1627         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1628             new UnicodeBlock("CYPRIOT_SYLLABARY",
1629                              "CYPRIOT SYLLABARY",
1630                              "CYPRIOTSYLLABARY");
1631 
1632         /**
1633          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1634          * @since 1.5
1635          */
1636         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1637             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1638                              "BYZANTINE MUSICAL SYMBOLS",
1639                              "BYZANTINEMUSICALSYMBOLS");
1640 
1641         /**
1642          * Constant for the "Musical Symbols" Unicode character block.
1643          * @since 1.5
1644          */
1645         public static final UnicodeBlock MUSICAL_SYMBOLS =
1646             new UnicodeBlock("MUSICAL_SYMBOLS",
1647                              "MUSICAL SYMBOLS",
1648                              "MUSICALSYMBOLS");
1649 
1650         /**
1651          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1652          * @since 1.5
1653          */
1654         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1655             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1656                              "TAI XUAN JING SYMBOLS",
1657                              "TAIXUANJINGSYMBOLS");
1658 
1659         /**
1660          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1661          * character block.
1662          * @since 1.5
1663          */
1664         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1665             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1666                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1667                              "MATHEMATICALALPHANUMERICSYMBOLS");
1668 
1669         /**
1670          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1671          * character block.
1672          * @since 1.5
1673          */
1674         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1675             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1676                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1677                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1678 
1679         /**
1680          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1681          * @since 1.5
1682          */
1683         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1684             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1685                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1686                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1687 
1688         /**
1689          * Constant for the "Tags" Unicode character block.
1690          * @since 1.5
1691          */
1692         public static final UnicodeBlock TAGS =
1693             new UnicodeBlock("TAGS");
1694 
1695         /**
1696          * Constant for the "Variation Selectors Supplement" Unicode character
1697          * block.
1698          * @since 1.5
1699          */
1700         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1701             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1702                              "VARIATION SELECTORS SUPPLEMENT",
1703                              "VARIATIONSELECTORSSUPPLEMENT");
1704 
1705         /**
1706          * Constant for the "Supplementary Private Use Area-A" Unicode character
1707          * block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1711             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1712                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1713                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1714 
1715         /**
1716          * Constant for the "Supplementary Private Use Area-B" Unicode character
1717          * block.
1718          * @since 1.5
1719          */
1720         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1721             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1722                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1723                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1724 
1725         /**
1726          * Constant for the "High Surrogates" Unicode character block.
1727          * This block represents codepoint values in the high surrogate
1728          * range: U+D800 through U+DB7F
1729          *
1730          * @since 1.5
1731          */
1732         public static final UnicodeBlock HIGH_SURROGATES =
1733             new UnicodeBlock("HIGH_SURROGATES",
1734                              "HIGH SURROGATES",
1735                              "HIGHSURROGATES");
1736 
1737         /**
1738          * Constant for the "High Private Use Surrogates" Unicode character
1739          * block.
1740          * This block represents codepoint values in the private use high
1741          * surrogate range: U+DB80 through U+DBFF
1742          *
1743          * @since 1.5
1744          */
1745         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1746             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1747                              "HIGH PRIVATE USE SURROGATES",
1748                              "HIGHPRIVATEUSESURROGATES");
1749 
1750         /**
1751          * Constant for the "Low Surrogates" Unicode character block.
1752          * This block represents codepoint values in the low surrogate
1753          * range: U+DC00 through U+DFFF
1754          *
1755          * @since 1.5
1756          */
1757         public static final UnicodeBlock LOW_SURROGATES =
1758             new UnicodeBlock("LOW_SURROGATES",
1759                              "LOW SURROGATES",
1760                              "LOWSURROGATES");
1761 
1762         /**
1763          * Constant for the "Arabic Supplement" Unicode character block.
1764          * @since 1.7
1765          */
1766         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1767             new UnicodeBlock("ARABIC_SUPPLEMENT",
1768                              "ARABIC SUPPLEMENT",
1769                              "ARABICSUPPLEMENT");
1770 
1771         /**
1772          * Constant for the "NKo" Unicode character block.
1773          * @since 1.7
1774          */
1775         public static final UnicodeBlock NKO =
1776             new UnicodeBlock("NKO");
1777 
1778         /**
1779          * Constant for the "Samaritan" Unicode character block.
1780          * @since 1.7
1781          */
1782         public static final UnicodeBlock SAMARITAN =
1783             new UnicodeBlock("SAMARITAN");
1784 
1785         /**
1786          * Constant for the "Mandaic" Unicode character block.
1787          * @since 1.7
1788          */
1789         public static final UnicodeBlock MANDAIC =
1790             new UnicodeBlock("MANDAIC");
1791 
1792         /**
1793          * Constant for the "Ethiopic Supplement" Unicode character block.
1794          * @since 1.7
1795          */
1796         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1797             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1798                              "ETHIOPIC SUPPLEMENT",
1799                              "ETHIOPICSUPPLEMENT");
1800 
1801         /**
1802          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1803          * Unicode character block.
1804          * @since 1.7
1805          */
1806         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1807             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1808                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1809                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1810 
1811         /**
1812          * Constant for the "New Tai Lue" Unicode character block.
1813          * @since 1.7
1814          */
1815         public static final UnicodeBlock NEW_TAI_LUE =
1816             new UnicodeBlock("NEW_TAI_LUE",
1817                              "NEW TAI LUE",
1818                              "NEWTAILUE");
1819 
1820         /**
1821          * Constant for the "Buginese" Unicode character block.
1822          * @since 1.7
1823          */
1824         public static final UnicodeBlock BUGINESE =
1825             new UnicodeBlock("BUGINESE");
1826 
1827         /**
1828          * Constant for the "Tai Tham" Unicode character block.
1829          * @since 1.7
1830          */
1831         public static final UnicodeBlock TAI_THAM =
1832             new UnicodeBlock("TAI_THAM",
1833                              "TAI THAM",
1834                              "TAITHAM");
1835 
1836         /**
1837          * Constant for the "Balinese" Unicode character block.
1838          * @since 1.7
1839          */
1840         public static final UnicodeBlock BALINESE =
1841             new UnicodeBlock("BALINESE");
1842 
1843         /**
1844          * Constant for the "Sundanese" Unicode character block.
1845          * @since 1.7
1846          */
1847         public static final UnicodeBlock SUNDANESE =
1848             new UnicodeBlock("SUNDANESE");
1849 
1850         /**
1851          * Constant for the "Batak" Unicode character block.
1852          * @since 1.7
1853          */
1854         public static final UnicodeBlock BATAK =
1855             new UnicodeBlock("BATAK");
1856 
1857         /**
1858          * Constant for the "Lepcha" Unicode character block.
1859          * @since 1.7
1860          */
1861         public static final UnicodeBlock LEPCHA =
1862             new UnicodeBlock("LEPCHA");
1863 
1864         /**
1865          * Constant for the "Ol Chiki" Unicode character block.
1866          * @since 1.7
1867          */
1868         public static final UnicodeBlock OL_CHIKI =
1869             new UnicodeBlock("OL_CHIKI",
1870                              "OL CHIKI",
1871                              "OLCHIKI");
1872 
1873         /**
1874          * Constant for the "Vedic Extensions" Unicode character block.
1875          * @since 1.7
1876          */
1877         public static final UnicodeBlock VEDIC_EXTENSIONS =
1878             new UnicodeBlock("VEDIC_EXTENSIONS",
1879                              "VEDIC EXTENSIONS",
1880                              "VEDICEXTENSIONS");
1881 
1882         /**
1883          * Constant for the "Phonetic Extensions Supplement" Unicode character
1884          * block.
1885          * @since 1.7
1886          */
1887         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1888             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1889                              "PHONETIC EXTENSIONS SUPPLEMENT",
1890                              "PHONETICEXTENSIONSSUPPLEMENT");
1891 
1892         /**
1893          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1894          * character block.
1895          * @since 1.7
1896          */
1897         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1898             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1899                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1900                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1901 
1902         /**
1903          * Constant for the "Glagolitic" Unicode character block.
1904          * @since 1.7
1905          */
1906         public static final UnicodeBlock GLAGOLITIC =
1907             new UnicodeBlock("GLAGOLITIC");
1908 
1909         /**
1910          * Constant for the "Latin Extended-C" Unicode character block.
1911          * @since 1.7
1912          */
1913         public static final UnicodeBlock LATIN_EXTENDED_C =
1914             new UnicodeBlock("LATIN_EXTENDED_C",
1915                              "LATIN EXTENDED-C",
1916                              "LATINEXTENDED-C");
1917 
1918         /**
1919          * Constant for the "Coptic" Unicode character block.
1920          * @since 1.7
1921          */
1922         public static final UnicodeBlock COPTIC =
1923             new UnicodeBlock("COPTIC");
1924 
1925         /**
1926          * Constant for the "Georgian Supplement" Unicode character block.
1927          * @since 1.7
1928          */
1929         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1930             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1931                              "GEORGIAN SUPPLEMENT",
1932                              "GEORGIANSUPPLEMENT");
1933 
1934         /**
1935          * Constant for the "Tifinagh" Unicode character block.
1936          * @since 1.7
1937          */
1938         public static final UnicodeBlock TIFINAGH =
1939             new UnicodeBlock("TIFINAGH");
1940 
1941         /**
1942          * Constant for the "Ethiopic Extended" Unicode character block.
1943          * @since 1.7
1944          */
1945         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1946             new UnicodeBlock("ETHIOPIC_EXTENDED",
1947                              "ETHIOPIC EXTENDED",
1948                              "ETHIOPICEXTENDED");
1949 
1950         /**
1951          * Constant for the "Cyrillic Extended-A" Unicode character block.
1952          * @since 1.7
1953          */
1954         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1955             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1956                              "CYRILLIC EXTENDED-A",
1957                              "CYRILLICEXTENDED-A");
1958 
1959         /**
1960          * Constant for the "Supplemental Punctuation" Unicode character block.
1961          * @since 1.7
1962          */
1963         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1964             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1965                              "SUPPLEMENTAL PUNCTUATION",
1966                              "SUPPLEMENTALPUNCTUATION");
1967 
1968         /**
1969          * Constant for the "CJK Strokes" Unicode character block.
1970          * @since 1.7
1971          */
1972         public static final UnicodeBlock CJK_STROKES =
1973             new UnicodeBlock("CJK_STROKES",
1974                              "CJK STROKES",
1975                              "CJKSTROKES");
1976 
1977         /**
1978          * Constant for the "Lisu" Unicode character block.
1979          * @since 1.7
1980          */
1981         public static final UnicodeBlock LISU =
1982             new UnicodeBlock("LISU");
1983 
1984         /**
1985          * Constant for the "Vai" Unicode character block.
1986          * @since 1.7
1987          */
1988         public static final UnicodeBlock VAI =
1989             new UnicodeBlock("VAI");
1990 
1991         /**
1992          * Constant for the "Cyrillic Extended-B" Unicode character block.
1993          * @since 1.7
1994          */
1995         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1996             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1997                              "CYRILLIC EXTENDED-B",
1998                              "CYRILLICEXTENDED-B");
1999 
2000         /**
2001          * Constant for the "Bamum" Unicode character block.
2002          * @since 1.7
2003          */
2004         public static final UnicodeBlock BAMUM =
2005             new UnicodeBlock("BAMUM");
2006 
2007         /**
2008          * Constant for the "Modifier Tone Letters" Unicode character block.
2009          * @since 1.7
2010          */
2011         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2012             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2013                              "MODIFIER TONE LETTERS",
2014                              "MODIFIERTONELETTERS");
2015 
2016         /**
2017          * Constant for the "Latin Extended-D" Unicode character block.
2018          * @since 1.7
2019          */
2020         public static final UnicodeBlock LATIN_EXTENDED_D =
2021             new UnicodeBlock("LATIN_EXTENDED_D",
2022                              "LATIN EXTENDED-D",
2023                              "LATINEXTENDED-D");
2024 
2025         /**
2026          * Constant for the "Syloti Nagri" Unicode character block.
2027          * @since 1.7
2028          */
2029         public static final UnicodeBlock SYLOTI_NAGRI =
2030             new UnicodeBlock("SYLOTI_NAGRI",
2031                              "SYLOTI NAGRI",
2032                              "SYLOTINAGRI");
2033 
2034         /**
2035          * Constant for the "Common Indic Number Forms" Unicode character block.
2036          * @since 1.7
2037          */
2038         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2039             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2040                              "COMMON INDIC NUMBER FORMS",
2041                              "COMMONINDICNUMBERFORMS");
2042 
2043         /**
2044          * Constant for the "Phags-pa" Unicode character block.
2045          * @since 1.7
2046          */
2047         public static final UnicodeBlock PHAGS_PA =
2048             new UnicodeBlock("PHAGS_PA",
2049                              "PHAGS-PA");
2050 
2051         /**
2052          * Constant for the "Saurashtra" Unicode character block.
2053          * @since 1.7
2054          */
2055         public static final UnicodeBlock SAURASHTRA =
2056             new UnicodeBlock("SAURASHTRA");
2057 
2058         /**
2059          * Constant for the "Devanagari Extended" Unicode character block.
2060          * @since 1.7
2061          */
2062         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2063             new UnicodeBlock("DEVANAGARI_EXTENDED",
2064                              "DEVANAGARI EXTENDED",
2065                              "DEVANAGARIEXTENDED");
2066 
2067         /**
2068          * Constant for the "Kayah Li" Unicode character block.
2069          * @since 1.7
2070          */
2071         public static final UnicodeBlock KAYAH_LI =
2072             new UnicodeBlock("KAYAH_LI",
2073                              "KAYAH LI",
2074                              "KAYAHLI");
2075 
2076         /**
2077          * Constant for the "Rejang" Unicode character block.
2078          * @since 1.7
2079          */
2080         public static final UnicodeBlock REJANG =
2081             new UnicodeBlock("REJANG");
2082 
2083         /**
2084          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2085          * @since 1.7
2086          */
2087         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2088             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2089                              "HANGUL JAMO EXTENDED-A",
2090                              "HANGULJAMOEXTENDED-A");
2091 
2092         /**
2093          * Constant for the "Javanese" Unicode character block.
2094          * @since 1.7
2095          */
2096         public static final UnicodeBlock JAVANESE =
2097             new UnicodeBlock("JAVANESE");
2098 
2099         /**
2100          * Constant for the "Cham" Unicode character block.
2101          * @since 1.7
2102          */
2103         public static final UnicodeBlock CHAM =
2104             new UnicodeBlock("CHAM");
2105 
2106         /**
2107          * Constant for the "Myanmar Extended-A" Unicode character block.
2108          * @since 1.7
2109          */
2110         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2111             new UnicodeBlock("MYANMAR_EXTENDED_A",
2112                              "MYANMAR EXTENDED-A",
2113                              "MYANMAREXTENDED-A");
2114 
2115         /**
2116          * Constant for the "Tai Viet" Unicode character block.
2117          * @since 1.7
2118          */
2119         public static final UnicodeBlock TAI_VIET =
2120             new UnicodeBlock("TAI_VIET",
2121                              "TAI VIET",
2122                              "TAIVIET");
2123 
2124         /**
2125          * Constant for the "Ethiopic Extended-A" Unicode character block.
2126          * @since 1.7
2127          */
2128         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2129             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2130                              "ETHIOPIC EXTENDED-A",
2131                              "ETHIOPICEXTENDED-A");
2132 
2133         /**
2134          * Constant for the "Meetei Mayek" Unicode character block.
2135          * @since 1.7
2136          */
2137         public static final UnicodeBlock MEETEI_MAYEK =
2138             new UnicodeBlock("MEETEI_MAYEK",
2139                              "MEETEI MAYEK",
2140                              "MEETEIMAYEK");
2141 
2142         /**
2143          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2144          * @since 1.7
2145          */
2146         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2147             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2148                              "HANGUL JAMO EXTENDED-B",
2149                              "HANGULJAMOEXTENDED-B");
2150 
2151         /**
2152          * Constant for the "Vertical Forms" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock VERTICAL_FORMS =
2156             new UnicodeBlock("VERTICAL_FORMS",
2157                              "VERTICAL FORMS",
2158                              "VERTICALFORMS");
2159 
2160         /**
2161          * Constant for the "Ancient Greek Numbers" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2165             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2166                              "ANCIENT GREEK NUMBERS",
2167                              "ANCIENTGREEKNUMBERS");
2168 
2169         /**
2170          * Constant for the "Ancient Symbols" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock ANCIENT_SYMBOLS =
2174             new UnicodeBlock("ANCIENT_SYMBOLS",
2175                              "ANCIENT SYMBOLS",
2176                              "ANCIENTSYMBOLS");
2177 
2178         /**
2179          * Constant for the "Phaistos Disc" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock PHAISTOS_DISC =
2183             new UnicodeBlock("PHAISTOS_DISC",
2184                              "PHAISTOS DISC",
2185                              "PHAISTOSDISC");
2186 
2187         /**
2188          * Constant for the "Lycian" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock LYCIAN =
2192             new UnicodeBlock("LYCIAN");
2193 
2194         /**
2195          * Constant for the "Carian" Unicode character block.
2196          * @since 1.7
2197          */
2198         public static final UnicodeBlock CARIAN =
2199             new UnicodeBlock("CARIAN");
2200 
2201         /**
2202          * Constant for the "Old Persian" Unicode character block.
2203          * @since 1.7
2204          */
2205         public static final UnicodeBlock OLD_PERSIAN =
2206             new UnicodeBlock("OLD_PERSIAN",
2207                              "OLD PERSIAN",
2208                              "OLDPERSIAN");
2209 
2210         /**
2211          * Constant for the "Imperial Aramaic" Unicode character block.
2212          * @since 1.7
2213          */
2214         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2215             new UnicodeBlock("IMPERIAL_ARAMAIC",
2216                              "IMPERIAL ARAMAIC",
2217                              "IMPERIALARAMAIC");
2218 
2219         /**
2220          * Constant for the "Phoenician" Unicode character block.
2221          * @since 1.7
2222          */
2223         public static final UnicodeBlock PHOENICIAN =
2224             new UnicodeBlock("PHOENICIAN");
2225 
2226         /**
2227          * Constant for the "Lydian" Unicode character block.
2228          * @since 1.7
2229          */
2230         public static final UnicodeBlock LYDIAN =
2231             new UnicodeBlock("LYDIAN");
2232 
2233         /**
2234          * Constant for the "Kharoshthi" Unicode character block.
2235          * @since 1.7
2236          */
2237         public static final UnicodeBlock KHAROSHTHI =
2238             new UnicodeBlock("KHAROSHTHI");
2239 
2240         /**
2241          * Constant for the "Old South Arabian" Unicode character block.
2242          * @since 1.7
2243          */
2244         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2245             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2246                              "OLD SOUTH ARABIAN",
2247                              "OLDSOUTHARABIAN");
2248 
2249         /**
2250          * Constant for the "Avestan" Unicode character block.
2251          * @since 1.7
2252          */
2253         public static final UnicodeBlock AVESTAN =
2254             new UnicodeBlock("AVESTAN");
2255 
2256         /**
2257          * Constant for the "Inscriptional Parthian" Unicode character block.
2258          * @since 1.7
2259          */
2260         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2261             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2262                              "INSCRIPTIONAL PARTHIAN",
2263                              "INSCRIPTIONALPARTHIAN");
2264 
2265         /**
2266          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2267          * @since 1.7
2268          */
2269         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2270             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2271                              "INSCRIPTIONAL PAHLAVI",
2272                              "INSCRIPTIONALPAHLAVI");
2273 
2274         /**
2275          * Constant for the "Old Turkic" Unicode character block.
2276          * @since 1.7
2277          */
2278         public static final UnicodeBlock OLD_TURKIC =
2279             new UnicodeBlock("OLD_TURKIC",
2280                              "OLD TURKIC",
2281                              "OLDTURKIC");
2282 
2283         /**
2284          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2285          * @since 1.7
2286          */
2287         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2288             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2289                              "RUMI NUMERAL SYMBOLS",
2290                              "RUMINUMERALSYMBOLS");
2291 
2292         /**
2293          * Constant for the "Brahmi" Unicode character block.
2294          * @since 1.7
2295          */
2296         public static final UnicodeBlock BRAHMI =
2297             new UnicodeBlock("BRAHMI");
2298 
2299         /**
2300          * Constant for the "Kaithi" Unicode character block.
2301          * @since 1.7
2302          */
2303         public static final UnicodeBlock KAITHI =
2304             new UnicodeBlock("KAITHI");
2305 
2306         /**
2307          * Constant for the "Cuneiform" Unicode character block.
2308          * @since 1.7
2309          */
2310         public static final UnicodeBlock CUNEIFORM =
2311             new UnicodeBlock("CUNEIFORM");
2312 
2313         /**
2314          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2315          * character block.
2316          * @since 1.7
2317          */
2318         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2319             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2320                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2321                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2322 
2323         /**
2324          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2325          * @since 1.7
2326          */
2327         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2328             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2329                              "EGYPTIAN HIEROGLYPHS",
2330                              "EGYPTIANHIEROGLYPHS");
2331 
2332         /**
2333          * Constant for the "Bamum Supplement" Unicode character block.
2334          * @since 1.7
2335          */
2336         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2337             new UnicodeBlock("BAMUM_SUPPLEMENT",
2338                              "BAMUM SUPPLEMENT",
2339                              "BAMUMSUPPLEMENT");
2340 
2341         /**
2342          * Constant for the "Kana Supplement" Unicode character block.
2343          * @since 1.7
2344          */
2345         public static final UnicodeBlock KANA_SUPPLEMENT =
2346             new UnicodeBlock("KANA_SUPPLEMENT",
2347                              "KANA SUPPLEMENT",
2348                              "KANASUPPLEMENT");
2349 
2350         /**
2351          * Constant for the "Ancient Greek Musical Notation" Unicode character
2352          * block.
2353          * @since 1.7
2354          */
2355         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2356             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2357                              "ANCIENT GREEK MUSICAL NOTATION",
2358                              "ANCIENTGREEKMUSICALNOTATION");
2359 
2360         /**
2361          * Constant for the "Counting Rod Numerals" Unicode character block.
2362          * @since 1.7
2363          */
2364         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2365             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2366                              "COUNTING ROD NUMERALS",
2367                              "COUNTINGRODNUMERALS");
2368 
2369         /**
2370          * Constant for the "Mahjong Tiles" Unicode character block.
2371          * @since 1.7
2372          */
2373         public static final UnicodeBlock MAHJONG_TILES =
2374             new UnicodeBlock("MAHJONG_TILES",
2375                              "MAHJONG TILES",
2376                              "MAHJONGTILES");
2377 
2378         /**
2379          * Constant for the "Domino Tiles" Unicode character block.
2380          * @since 1.7
2381          */
2382         public static final UnicodeBlock DOMINO_TILES =
2383             new UnicodeBlock("DOMINO_TILES",
2384                              "DOMINO TILES",
2385                              "DOMINOTILES");
2386 
2387         /**
2388          * Constant for the "Playing Cards" Unicode character block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock PLAYING_CARDS =
2392             new UnicodeBlock("PLAYING_CARDS",
2393                              "PLAYING CARDS",
2394                              "PLAYINGCARDS");
2395 
2396         /**
2397          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2398          * block.
2399          * @since 1.7
2400          */
2401         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2402             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2403                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2404                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2405 
2406         /**
2407          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2408          * block.
2409          * @since 1.7
2410          */
2411         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2412             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2413                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2414                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2415 
2416         /**
2417          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2418          * character block.
2419          * @since 1.7
2420          */
2421         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2422             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2423                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2424                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2425 
2426         /**
2427          * Constant for the "Emoticons" Unicode character block.
2428          * @since 1.7
2429          */
2430         public static final UnicodeBlock EMOTICONS =
2431             new UnicodeBlock("EMOTICONS");
2432 
2433         /**
2434          * Constant for the "Transport And Map Symbols" Unicode character block.
2435          * @since 1.7
2436          */
2437         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2438             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2439                              "TRANSPORT AND MAP SYMBOLS",
2440                              "TRANSPORTANDMAPSYMBOLS");
2441 
2442         /**
2443          * Constant for the "Alchemical Symbols" Unicode character block.
2444          * @since 1.7
2445          */
2446         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2447             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2448                              "ALCHEMICAL SYMBOLS",
2449                              "ALCHEMICALSYMBOLS");
2450 
2451         /**
2452          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2453          * character block.
2454          * @since 1.7
2455          */
2456         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2457             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2458                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2459                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2460 
2461         /**
2462          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2463          * character block.
2464          * @since 1.7
2465          */
2466         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2467             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2468                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2469                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2470 
2471         private static final int blockStarts[] = {
2472             0x0000,   // 0000..007F; Basic Latin
2473             0x0080,   // 0080..00FF; Latin-1 Supplement
2474             0x0100,   // 0100..017F; Latin Extended-A
2475             0x0180,   // 0180..024F; Latin Extended-B
2476             0x0250,   // 0250..02AF; IPA Extensions
2477             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2478             0x0300,   // 0300..036F; Combining Diacritical Marks
2479             0x0370,   // 0370..03FF; Greek and Coptic
2480             0x0400,   // 0400..04FF; Cyrillic
2481             0x0500,   // 0500..052F; Cyrillic Supplement
2482             0x0530,   // 0530..058F; Armenian
2483             0x0590,   // 0590..05FF; Hebrew
2484             0x0600,   // 0600..06FF; Arabic
2485             0x0700,   // 0700..074F; Syriac
2486             0x0750,   // 0750..077F; Arabic Supplement
2487             0x0780,   // 0780..07BF; Thaana
2488             0x07C0,   // 07C0..07FF; NKo
2489             0x0800,   // 0800..083F; Samaritan
2490             0x0840,   // 0840..085F; Mandaic
2491             0x0860,   //             unassigned
2492             0x0900,   // 0900..097F; Devanagari
2493             0x0980,   // 0980..09FF; Bengali
2494             0x0A00,   // 0A00..0A7F; Gurmukhi
2495             0x0A80,   // 0A80..0AFF; Gujarati
2496             0x0B00,   // 0B00..0B7F; Oriya
2497             0x0B80,   // 0B80..0BFF; Tamil
2498             0x0C00,   // 0C00..0C7F; Telugu
2499             0x0C80,   // 0C80..0CFF; Kannada
2500             0x0D00,   // 0D00..0D7F; Malayalam
2501             0x0D80,   // 0D80..0DFF; Sinhala
2502             0x0E00,   // 0E00..0E7F; Thai
2503             0x0E80,   // 0E80..0EFF; Lao
2504             0x0F00,   // 0F00..0FFF; Tibetan
2505             0x1000,   // 1000..109F; Myanmar
2506             0x10A0,   // 10A0..10FF; Georgian
2507             0x1100,   // 1100..11FF; Hangul Jamo
2508             0x1200,   // 1200..137F; Ethiopic
2509             0x1380,   // 1380..139F; Ethiopic Supplement
2510             0x13A0,   // 13A0..13FF; Cherokee
2511             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2512             0x1680,   // 1680..169F; Ogham
2513             0x16A0,   // 16A0..16FF; Runic
2514             0x1700,   // 1700..171F; Tagalog
2515             0x1720,   // 1720..173F; Hanunoo
2516             0x1740,   // 1740..175F; Buhid
2517             0x1760,   // 1760..177F; Tagbanwa
2518             0x1780,   // 1780..17FF; Khmer
2519             0x1800,   // 1800..18AF; Mongolian
2520             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2521             0x1900,   // 1900..194F; Limbu
2522             0x1950,   // 1950..197F; Tai Le
2523             0x1980,   // 1980..19DF; New Tai Lue
2524             0x19E0,   // 19E0..19FF; Khmer Symbols
2525             0x1A00,   // 1A00..1A1F; Buginese
2526             0x1A20,   // 1A20..1AAF; Tai Tham
2527             0x1AB0,   //             unassigned
2528             0x1B00,   // 1B00..1B7F; Balinese
2529             0x1B80,   // 1B80..1BBF; Sundanese
2530             0x1BC0,   // 1BC0..1BFF; Batak
2531             0x1C00,   // 1C00..1C4F; Lepcha
2532             0x1C50,   // 1C50..1C7F; Ol Chiki
2533             0x1C80,   //             unassigned
2534             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2535             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2536             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2537             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2538             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2539             0x1F00,   // 1F00..1FFF; Greek Extended
2540             0x2000,   // 2000..206F; General Punctuation
2541             0x2070,   // 2070..209F; Superscripts and Subscripts
2542             0x20A0,   // 20A0..20CF; Currency Symbols
2543             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2544             0x2100,   // 2100..214F; Letterlike Symbols
2545             0x2150,   // 2150..218F; Number Forms
2546             0x2190,   // 2190..21FF; Arrows
2547             0x2200,   // 2200..22FF; Mathematical Operators
2548             0x2300,   // 2300..23FF; Miscellaneous Technical
2549             0x2400,   // 2400..243F; Control Pictures
2550             0x2440,   // 2440..245F; Optical Character Recognition
2551             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2552             0x2500,   // 2500..257F; Box Drawing
2553             0x2580,   // 2580..259F; Block Elements
2554             0x25A0,   // 25A0..25FF; Geometric Shapes
2555             0x2600,   // 2600..26FF; Miscellaneous Symbols
2556             0x2700,   // 2700..27BF; Dingbats
2557             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2558             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2559             0x2800,   // 2800..28FF; Braille Patterns
2560             0x2900,   // 2900..297F; Supplemental Arrows-B
2561             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2562             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2563             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2564             0x2C00,   // 2C00..2C5F; Glagolitic
2565             0x2C60,   // 2C60..2C7F; Latin Extended-C
2566             0x2C80,   // 2C80..2CFF; Coptic
2567             0x2D00,   // 2D00..2D2F; Georgian Supplement
2568             0x2D30,   // 2D30..2D7F; Tifinagh
2569             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2570             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2571             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2572             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2573             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2574             0x2FE0,   //             unassigned
2575             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2576             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2577             0x3040,   // 3040..309F; Hiragana
2578             0x30A0,   // 30A0..30FF; Katakana
2579             0x3100,   // 3100..312F; Bopomofo
2580             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2581             0x3190,   // 3190..319F; Kanbun
2582             0x31A0,   // 31A0..31BF; Bopomofo Extended
2583             0x31C0,   // 31C0..31EF; CJK Strokes
2584             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2585             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2586             0x3300,   // 3300..33FF; CJK Compatibility
2587             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2588             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2589             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2590             0xA000,   // A000..A48F; Yi Syllables
2591             0xA490,   // A490..A4CF; Yi Radicals
2592             0xA4D0,   // A4D0..A4FF; Lisu
2593             0xA500,   // A500..A63F; Vai
2594             0xA640,   // A640..A69F; Cyrillic Extended-B
2595             0xA6A0,   // A6A0..A6FF; Bamum
2596             0xA700,   // A700..A71F; Modifier Tone Letters
2597             0xA720,   // A720..A7FF; Latin Extended-D
2598             0xA800,   // A800..A82F; Syloti Nagri
2599             0xA830,   // A830..A83F; Common Indic Number Forms
2600             0xA840,   // A840..A87F; Phags-pa
2601             0xA880,   // A880..A8DF; Saurashtra
2602             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2603             0xA900,   // A900..A92F; Kayah Li
2604             0xA930,   // A930..A95F; Rejang
2605             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2606             0xA980,   // A980..A9DF; Javanese
2607             0xA9E0,   //             unassigned
2608             0xAA00,   // AA00..AA5F; Cham
2609             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2610             0xAA80,   // AA80..AADF; Tai Viet
2611             0xAAE0,   //             unassigned
2612             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2613             0xAB30,   //             unassigned
2614             0xABC0,   // ABC0..ABFF; Meetei Mayek
2615             0xAC00,   // AC00..D7AF; Hangul Syllables
2616             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2617             0xD800,   // D800..DB7F; High Surrogates
2618             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2619             0xDC00,   // DC00..DFFF; Low Surrogates
2620             0xE000,   // E000..F8FF; Private Use Area
2621             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2622             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2623             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2624             0xFE00,   // FE00..FE0F; Variation Selectors
2625             0xFE10,   // FE10..FE1F; Vertical Forms
2626             0xFE20,   // FE20..FE2F; Combining Half Marks
2627             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2628             0xFE50,   // FE50..FE6F; Small Form Variants
2629             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2630             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2631             0xFFF0,   // FFF0..FFFF; Specials
2632             0x10000,  // 10000..1007F; Linear B Syllabary
2633             0x10080,  // 10080..100FF; Linear B Ideograms
2634             0x10100,  // 10100..1013F; Aegean Numbers
2635             0x10140,  // 10140..1018F; Ancient Greek Numbers
2636             0x10190,  // 10190..101CF; Ancient Symbols
2637             0x101D0,  // 101D0..101FF; Phaistos Disc
2638             0x10200,  //               unassigned
2639             0x10280,  // 10280..1029F; Lycian
2640             0x102A0,  // 102A0..102DF; Carian
2641             0x102E0,  //               unassigned
2642             0x10300,  // 10300..1032F; Old Italic
2643             0x10330,  // 10330..1034F; Gothic
2644             0x10350,  //               unassigned
2645             0x10380,  // 10380..1039F; Ugaritic
2646             0x103A0,  // 103A0..103DF; Old Persian
2647             0x103E0,  //               unassigned
2648             0x10400,  // 10400..1044F; Deseret
2649             0x10450,  // 10450..1047F; Shavian
2650             0x10480,  // 10480..104AF; Osmanya
2651             0x104B0,  //               unassigned
2652             0x10800,  // 10800..1083F; Cypriot Syllabary
2653             0x10840,  // 10840..1085F; Imperial Aramaic
2654             0x10860,  //               unassigned
2655             0x10900,  // 10900..1091F; Phoenician
2656             0x10920,  // 10920..1093F; Lydian
2657             0x10940,  //               unassigned
2658             0x10A00,  // 10A00..10A5F; Kharoshthi
2659             0x10A60,  // 10A60..10A7F; Old South Arabian
2660             0x10A80,  //               unassigned
2661             0x10B00,  // 10B00..10B3F; Avestan
2662             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2663             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2664             0x10B80,  //               unassigned
2665             0x10C00,  // 10C00..10C4F; Old Turkic
2666             0x10C50,  //               unassigned
2667             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2668             0x10E80,  //               unassigned
2669             0x11000,  // 11000..1107F; Brahmi
2670             0x11080,  // 11080..110CF; Kaithi
2671             0x110D0,  //               unassigned
2672             0x12000,  // 12000..123FF; Cuneiform
2673             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2674             0x12480,  //               unassigned
2675             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2676             0x13430,  //               unassigned
2677             0x16800,  // 16800..16A3F; Bamum Supplement
2678             0x16A40,  //               unassigned
2679             0x1B000,  // 1B000..1B0FF; Kana Supplement
2680             0x1B100,  //               unassigned
2681             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2682             0x1D100,  // 1D100..1D1FF; Musical Symbols
2683             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2684             0x1D250,  //               unassigned
2685             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2686             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2687             0x1D380,  //               unassigned
2688             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2689             0x1D800,  //               unassigned
2690             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2691             0x1F030,  // 1F030..1F09F; Domino Tiles
2692             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2693             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2694             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2695             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2696             0x1F600,  // 1F600..1F64F; Emoticons
2697             0x1F650,  //               unassigned
2698             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2699             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2700             0x1F780,  //               unassigned
2701             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2702             0x2A6E0,  //               unassigned
2703             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2704             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2705             0x2B820,  //               unassigned
2706             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2707             0x2FA20,  //               unassigned
2708             0xE0000,  // E0000..E007F; Tags
2709             0xE0080,  //               unassigned
2710             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2711             0xE01F0,  //               unassigned
2712             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2713             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2714         };
2715 
2716         private static final UnicodeBlock[] blocks = {
2717             BASIC_LATIN,
2718             LATIN_1_SUPPLEMENT,
2719             LATIN_EXTENDED_A,
2720             LATIN_EXTENDED_B,
2721             IPA_EXTENSIONS,
2722             SPACING_MODIFIER_LETTERS,
2723             COMBINING_DIACRITICAL_MARKS,
2724             GREEK,
2725             CYRILLIC,
2726             CYRILLIC_SUPPLEMENTARY,
2727             ARMENIAN,
2728             HEBREW,
2729             ARABIC,
2730             SYRIAC,
2731             ARABIC_SUPPLEMENT,
2732             THAANA,
2733             NKO,
2734             SAMARITAN,
2735             MANDAIC,
2736             null,
2737             DEVANAGARI,
2738             BENGALI,
2739             GURMUKHI,
2740             GUJARATI,
2741             ORIYA,
2742             TAMIL,
2743             TELUGU,
2744             KANNADA,
2745             MALAYALAM,
2746             SINHALA,
2747             THAI,
2748             LAO,
2749             TIBETAN,
2750             MYANMAR,
2751             GEORGIAN,
2752             HANGUL_JAMO,
2753             ETHIOPIC,
2754             ETHIOPIC_SUPPLEMENT,
2755             CHEROKEE,
2756             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2757             OGHAM,
2758             RUNIC,
2759             TAGALOG,
2760             HANUNOO,
2761             BUHID,
2762             TAGBANWA,
2763             KHMER,
2764             MONGOLIAN,
2765             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2766             LIMBU,
2767             TAI_LE,
2768             NEW_TAI_LUE,
2769             KHMER_SYMBOLS,
2770             BUGINESE,
2771             TAI_THAM,
2772             null,
2773             BALINESE,
2774             SUNDANESE,
2775             BATAK,
2776             LEPCHA,
2777             OL_CHIKI,
2778             null,
2779             VEDIC_EXTENSIONS,
2780             PHONETIC_EXTENSIONS,
2781             PHONETIC_EXTENSIONS_SUPPLEMENT,
2782             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2783             LATIN_EXTENDED_ADDITIONAL,
2784             GREEK_EXTENDED,
2785             GENERAL_PUNCTUATION,
2786             SUPERSCRIPTS_AND_SUBSCRIPTS,
2787             CURRENCY_SYMBOLS,
2788             COMBINING_MARKS_FOR_SYMBOLS,
2789             LETTERLIKE_SYMBOLS,
2790             NUMBER_FORMS,
2791             ARROWS,
2792             MATHEMATICAL_OPERATORS,
2793             MISCELLANEOUS_TECHNICAL,
2794             CONTROL_PICTURES,
2795             OPTICAL_CHARACTER_RECOGNITION,
2796             ENCLOSED_ALPHANUMERICS,
2797             BOX_DRAWING,
2798             BLOCK_ELEMENTS,
2799             GEOMETRIC_SHAPES,
2800             MISCELLANEOUS_SYMBOLS,
2801             DINGBATS,
2802             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2803             SUPPLEMENTAL_ARROWS_A,
2804             BRAILLE_PATTERNS,
2805             SUPPLEMENTAL_ARROWS_B,
2806             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2807             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2808             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2809             GLAGOLITIC,
2810             LATIN_EXTENDED_C,
2811             COPTIC,
2812             GEORGIAN_SUPPLEMENT,
2813             TIFINAGH,
2814             ETHIOPIC_EXTENDED,
2815             CYRILLIC_EXTENDED_A,
2816             SUPPLEMENTAL_PUNCTUATION,
2817             CJK_RADICALS_SUPPLEMENT,
2818             KANGXI_RADICALS,
2819             null,
2820             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2821             CJK_SYMBOLS_AND_PUNCTUATION,
2822             HIRAGANA,
2823             KATAKANA,
2824             BOPOMOFO,
2825             HANGUL_COMPATIBILITY_JAMO,
2826             KANBUN,
2827             BOPOMOFO_EXTENDED,
2828             CJK_STROKES,
2829             KATAKANA_PHONETIC_EXTENSIONS,
2830             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2831             CJK_COMPATIBILITY,
2832             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2833             YIJING_HEXAGRAM_SYMBOLS,
2834             CJK_UNIFIED_IDEOGRAPHS,
2835             YI_SYLLABLES,
2836             YI_RADICALS,
2837             LISU,
2838             VAI,
2839             CYRILLIC_EXTENDED_B,
2840             BAMUM,
2841             MODIFIER_TONE_LETTERS,
2842             LATIN_EXTENDED_D,
2843             SYLOTI_NAGRI,
2844             COMMON_INDIC_NUMBER_FORMS,
2845             PHAGS_PA,
2846             SAURASHTRA,
2847             DEVANAGARI_EXTENDED,
2848             KAYAH_LI,
2849             REJANG,
2850             HANGUL_JAMO_EXTENDED_A,
2851             JAVANESE,
2852             null,
2853             CHAM,
2854             MYANMAR_EXTENDED_A,
2855             TAI_VIET,
2856             null,
2857             ETHIOPIC_EXTENDED_A,
2858             null,
2859             MEETEI_MAYEK,
2860             HANGUL_SYLLABLES,
2861             HANGUL_JAMO_EXTENDED_B,
2862             HIGH_SURROGATES,
2863             HIGH_PRIVATE_USE_SURROGATES,
2864             LOW_SURROGATES,
2865             PRIVATE_USE_AREA,
2866             CJK_COMPATIBILITY_IDEOGRAPHS,
2867             ALPHABETIC_PRESENTATION_FORMS,
2868             ARABIC_PRESENTATION_FORMS_A,
2869             VARIATION_SELECTORS,
2870             VERTICAL_FORMS,
2871             COMBINING_HALF_MARKS,
2872             CJK_COMPATIBILITY_FORMS,
2873             SMALL_FORM_VARIANTS,
2874             ARABIC_PRESENTATION_FORMS_B,
2875             HALFWIDTH_AND_FULLWIDTH_FORMS,
2876             SPECIALS,
2877             LINEAR_B_SYLLABARY,
2878             LINEAR_B_IDEOGRAMS,
2879             AEGEAN_NUMBERS,
2880             ANCIENT_GREEK_NUMBERS,
2881             ANCIENT_SYMBOLS,
2882             PHAISTOS_DISC,
2883             null,
2884             LYCIAN,
2885             CARIAN,
2886             null,
2887             OLD_ITALIC,
2888             GOTHIC,
2889             null,
2890             UGARITIC,
2891             OLD_PERSIAN,
2892             null,
2893             DESERET,
2894             SHAVIAN,
2895             OSMANYA,
2896             null,
2897             CYPRIOT_SYLLABARY,
2898             IMPERIAL_ARAMAIC,
2899             null,
2900             PHOENICIAN,
2901             LYDIAN,
2902             null,
2903             KHAROSHTHI,
2904             OLD_SOUTH_ARABIAN,
2905             null,
2906             AVESTAN,
2907             INSCRIPTIONAL_PARTHIAN,
2908             INSCRIPTIONAL_PAHLAVI,
2909             null,
2910             OLD_TURKIC,
2911             null,
2912             RUMI_NUMERAL_SYMBOLS,
2913             null,
2914             BRAHMI,
2915             KAITHI,
2916             null,
2917             CUNEIFORM,
2918             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2919             null,
2920             EGYPTIAN_HIEROGLYPHS,
2921             null,
2922             BAMUM_SUPPLEMENT,
2923             null,
2924             KANA_SUPPLEMENT,
2925             null,
2926             BYZANTINE_MUSICAL_SYMBOLS,
2927             MUSICAL_SYMBOLS,
2928             ANCIENT_GREEK_MUSICAL_NOTATION,
2929             null,
2930             TAI_XUAN_JING_SYMBOLS,
2931             COUNTING_ROD_NUMERALS,
2932             null,
2933             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2934             null,
2935             MAHJONG_TILES,
2936             DOMINO_TILES,
2937             PLAYING_CARDS,
2938             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2939             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2940             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2941             EMOTICONS,
2942             null,
2943             TRANSPORT_AND_MAP_SYMBOLS,
2944             ALCHEMICAL_SYMBOLS,
2945             null,
2946             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2947             null,
2948             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2949             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2950             null,
2951             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2952             null,
2953             TAGS,
2954             null,
2955             VARIATION_SELECTORS_SUPPLEMENT,
2956             null,
2957             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2958             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2959         };
2960 
2961 
2962         /**
2963          * Returns the object representing the Unicode block containing the
2964          * given character, or <code>null</code> if the character is not a
2965          * member of a defined block.
2966          *
2967          * <p><b>Note:</b> This method cannot handle
2968          * <a href="Character.html#supplementary"> supplementary
2969          * characters</a>.  To support all Unicode characters, including
2970          * supplementary characters, use the {@link #of(int)} method.
2971          *
2972          * @param   c  The character in question
2973          * @return  The <code>UnicodeBlock</code> instance representing the
2974          *          Unicode block of which this character is a member, or
2975          *          <code>null</code> if the character is not a member of any
2976          *          Unicode block
2977          */
2978         public static UnicodeBlock of(char c) {
2979             return of((int)c);
2980         }
2981 
2982         /**
2983          * Returns the object representing the Unicode block
2984          * containing the given character (Unicode code point), or
2985          * <code>null</code> if the character is not a member of a
2986          * defined block.
2987          *
2988          * @param   codePoint the character (Unicode code point) in question.
2989          * @return  The <code>UnicodeBlock</code> instance representing the
2990          *          Unicode block of which this character is a member, or
2991          *          <code>null</code> if the character is not a member of any
2992          *          Unicode block
2993          * @exception IllegalArgumentException if the specified
2994          * <code>codePoint</code> is an invalid Unicode code point.
2995          * @see Character#isValidCodePoint(int)
2996          * @since   1.5
2997          */
2998         public static UnicodeBlock of(int codePoint) {
2999             if (!isValidCodePoint(codePoint)) {
3000                 throw new IllegalArgumentException();
3001             }
3002 
3003             int top, bottom, current;
3004             bottom = 0;
3005             top = blockStarts.length;
3006             current = top/2;
3007 
3008             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3009             while (top - bottom > 1) {
3010                 if (codePoint >= blockStarts[current]) {
3011                     bottom = current;
3012                 } else {
3013                     top = current;
3014                 }
3015                 current = (top + bottom) / 2;
3016             }
3017             return blocks[current];
3018         }
3019 
3020         /**
3021          * Returns the UnicodeBlock with the given name. Block
3022          * names are determined by The Unicode Standard. The file
3023          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3024          * version of the standard. The {@link Character} class specifies
3025          * the version of the standard that it supports.
3026          * <p>
3027          * This method accepts block names in the following forms:
3028          * <ol>
3029          * <li> Canonical block names as defined by the Unicode Standard.
3030          * For example, the standard defines a "Basic Latin" block. Therefore, this
3031          * method accepts "Basic Latin" as a valid block name. The documentation of
3032          * each UnicodeBlock provides the canonical name.
3033          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3034          * is a valid block name for the "Basic Latin" block.
3035          * <li>The text representation of each constant UnicodeBlock identifier.
3036          * For example, this method will return the {@link #BASIC_LATIN} block if
3037          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3038          * hyphens in the canonical name with underscores.
3039          * </ol>
3040          * Finally, character case is ignored for all of the valid block name forms.
3041          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3042          * The en_US locale's case mapping rules are used to provide case-insensitive
3043          * string comparisons for block name validation.
3044          * <p>
3045          * If the Unicode Standard changes block names, both the previous and
3046          * current names will be accepted.
3047          *
3048          * @param blockName A <code>UnicodeBlock</code> name.
3049          * @return The <code>UnicodeBlock</code> instance identified
3050          *         by <code>blockName</code>
3051          * @throws IllegalArgumentException if <code>blockName</code> is an
3052          *         invalid name
3053          * @throws NullPointerException if <code>blockName</code> is null
3054          * @since 1.5
3055          */
3056         public static final UnicodeBlock forName(String blockName) {
3057             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3058             if (block == null) {
3059                 throw new IllegalArgumentException();
3060             }
3061             return block;
3062         }
3063     }
3064 
3065 
3066     /**
3067      * A family of character subsets representing the character scripts
3068      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3069      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3070      * character is assigned to a single Unicode script, either a specific
3071      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3072      * one of the following three special values,
3073      * {@link Character.UnicodeScript#INHERITED Inherited},
3074      * {@link Character.UnicodeScript#COMMON Common} or
3075      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3076      *
3077      * @since 1.7
3078      */
3079     public static enum UnicodeScript {
3080         /**
3081          * Unicode script "Common".
3082          */
3083         COMMON,
3084 
3085         /**
3086          * Unicode script "Latin".
3087          */
3088         LATIN,
3089 
3090         /**
3091          * Unicode script "Greek".
3092          */
3093         GREEK,
3094 
3095         /**
3096          * Unicode script "Cyrillic".
3097          */
3098         CYRILLIC,
3099 
3100         /**
3101          * Unicode script "Armenian".
3102          */
3103         ARMENIAN,
3104 
3105         /**
3106          * Unicode script "Hebrew".
3107          */
3108         HEBREW,
3109 
3110         /**
3111          * Unicode script "Arabic".
3112          */
3113         ARABIC,
3114 
3115         /**
3116          * Unicode script "Syriac".
3117          */
3118         SYRIAC,
3119 
3120         /**
3121          * Unicode script "Thaana".
3122          */
3123         THAANA,
3124 
3125         /**
3126          * Unicode script "Devanagari".
3127          */
3128         DEVANAGARI,
3129 
3130         /**
3131          * Unicode script "Bengali".
3132          */
3133         BENGALI,
3134 
3135         /**
3136          * Unicode script "Gurmukhi".
3137          */
3138         GURMUKHI,
3139 
3140         /**
3141          * Unicode script "Gujarati".
3142          */
3143         GUJARATI,
3144 
3145         /**
3146          * Unicode script "Oriya".
3147          */
3148         ORIYA,
3149 
3150         /**
3151          * Unicode script "Tamil".
3152          */
3153         TAMIL,
3154 
3155         /**
3156          * Unicode script "Telugu".
3157          */
3158         TELUGU,
3159 
3160         /**
3161          * Unicode script "Kannada".
3162          */
3163         KANNADA,
3164 
3165         /**
3166          * Unicode script "Malayalam".
3167          */
3168         MALAYALAM,
3169 
3170         /**
3171          * Unicode script "Sinhala".
3172          */
3173         SINHALA,
3174 
3175         /**
3176          * Unicode script "Thai".
3177          */
3178         THAI,
3179 
3180         /**
3181          * Unicode script "Lao".
3182          */
3183         LAO,
3184 
3185         /**
3186          * Unicode script "Tibetan".
3187          */
3188         TIBETAN,
3189 
3190         /**
3191          * Unicode script "Myanmar".
3192          */
3193         MYANMAR,
3194 
3195         /**
3196          * Unicode script "Georgian".
3197          */
3198         GEORGIAN,
3199 
3200         /**
3201          * Unicode script "Hangul".
3202          */
3203         HANGUL,
3204 
3205         /**
3206          * Unicode script "Ethiopic".
3207          */
3208         ETHIOPIC,
3209 
3210         /**
3211          * Unicode script "Cherokee".
3212          */
3213         CHEROKEE,
3214 
3215         /**
3216          * Unicode script "Canadian_Aboriginal".
3217          */
3218         CANADIAN_ABORIGINAL,
3219 
3220         /**
3221          * Unicode script "Ogham".
3222          */
3223         OGHAM,
3224 
3225         /**
3226          * Unicode script "Runic".
3227          */
3228         RUNIC,
3229 
3230         /**
3231          * Unicode script "Khmer".
3232          */
3233         KHMER,
3234 
3235         /**
3236          * Unicode script "Mongolian".
3237          */
3238         MONGOLIAN,
3239 
3240         /**
3241          * Unicode script "Hiragana".
3242          */
3243         HIRAGANA,
3244 
3245         /**
3246          * Unicode script "Katakana".
3247          */
3248         KATAKANA,
3249 
3250         /**
3251          * Unicode script "Bopomofo".
3252          */
3253         BOPOMOFO,
3254 
3255         /**
3256          * Unicode script "Han".
3257          */
3258         HAN,
3259 
3260         /**
3261          * Unicode script "Yi".
3262          */
3263         YI,
3264 
3265         /**
3266          * Unicode script "Old_Italic".
3267          */
3268         OLD_ITALIC,
3269 
3270         /**
3271          * Unicode script "Gothic".
3272          */
3273         GOTHIC,
3274 
3275         /**
3276          * Unicode script "Deseret".
3277          */
3278         DESERET,
3279 
3280         /**
3281          * Unicode script "Inherited".
3282          */
3283         INHERITED,
3284 
3285         /**
3286          * Unicode script "Tagalog".
3287          */
3288         TAGALOG,
3289 
3290         /**
3291          * Unicode script "Hanunoo".
3292          */
3293         HANUNOO,
3294 
3295         /**
3296          * Unicode script "Buhid".
3297          */
3298         BUHID,
3299 
3300         /**
3301          * Unicode script "Tagbanwa".
3302          */
3303         TAGBANWA,
3304 
3305         /**
3306          * Unicode script "Limbu".
3307          */
3308         LIMBU,
3309 
3310         /**
3311          * Unicode script "Tai_Le".
3312          */
3313         TAI_LE,
3314 
3315         /**
3316          * Unicode script "Linear_B".
3317          */
3318         LINEAR_B,
3319 
3320         /**
3321          * Unicode script "Ugaritic".
3322          */
3323         UGARITIC,
3324 
3325         /**
3326          * Unicode script "Shavian".
3327          */
3328         SHAVIAN,
3329 
3330         /**
3331          * Unicode script "Osmanya".
3332          */
3333         OSMANYA,
3334 
3335         /**
3336          * Unicode script "Cypriot".
3337          */
3338         CYPRIOT,
3339 
3340         /**
3341          * Unicode script "Braille".
3342          */
3343         BRAILLE,
3344 
3345         /**
3346          * Unicode script "Buginese".
3347          */
3348         BUGINESE,
3349 
3350         /**
3351          * Unicode script "Coptic".
3352          */
3353         COPTIC,
3354 
3355         /**
3356          * Unicode script "New_Tai_Lue".
3357          */
3358         NEW_TAI_LUE,
3359 
3360         /**
3361          * Unicode script "Glagolitic".
3362          */
3363         GLAGOLITIC,
3364 
3365         /**
3366          * Unicode script "Tifinagh".
3367          */
3368         TIFINAGH,
3369 
3370         /**
3371          * Unicode script "Syloti_Nagri".
3372          */
3373         SYLOTI_NAGRI,
3374 
3375         /**
3376          * Unicode script "Old_Persian".
3377          */
3378         OLD_PERSIAN,
3379 
3380         /**
3381          * Unicode script "Kharoshthi".
3382          */
3383         KHAROSHTHI,
3384 
3385         /**
3386          * Unicode script "Balinese".
3387          */
3388         BALINESE,
3389 
3390         /**
3391          * Unicode script "Cuneiform".
3392          */
3393         CUNEIFORM,
3394 
3395         /**
3396          * Unicode script "Phoenician".
3397          */
3398         PHOENICIAN,
3399 
3400         /**
3401          * Unicode script "Phags_Pa".
3402          */
3403         PHAGS_PA,
3404 
3405         /**
3406          * Unicode script "Nko".
3407          */
3408         NKO,
3409 
3410         /**
3411          * Unicode script "Sundanese".
3412          */
3413         SUNDANESE,
3414 
3415         /**
3416          * Unicode script "Batak".
3417          */
3418         BATAK,
3419 
3420         /**
3421          * Unicode script "Lepcha".
3422          */
3423         LEPCHA,
3424 
3425         /**
3426          * Unicode script "Ol_Chiki".
3427          */
3428         OL_CHIKI,
3429 
3430         /**
3431          * Unicode script "Vai".
3432          */
3433         VAI,
3434 
3435         /**
3436          * Unicode script "Saurashtra".
3437          */
3438         SAURASHTRA,
3439 
3440         /**
3441          * Unicode script "Kayah_Li".
3442          */
3443         KAYAH_LI,
3444 
3445         /**
3446          * Unicode script "Rejang".
3447          */
3448         REJANG,
3449 
3450         /**
3451          * Unicode script "Lycian".
3452          */
3453         LYCIAN,
3454 
3455         /**
3456          * Unicode script "Carian".
3457          */
3458         CARIAN,
3459 
3460         /**
3461          * Unicode script "Lydian".
3462          */
3463         LYDIAN,
3464 
3465         /**
3466          * Unicode script "Cham".
3467          */
3468         CHAM,
3469 
3470         /**
3471          * Unicode script "Tai_Tham".
3472          */
3473         TAI_THAM,
3474 
3475         /**
3476          * Unicode script "Tai_Viet".
3477          */
3478         TAI_VIET,
3479 
3480         /**
3481          * Unicode script "Avestan".
3482          */
3483         AVESTAN,
3484 
3485         /**
3486          * Unicode script "Egyptian_Hieroglyphs".
3487          */
3488         EGYPTIAN_HIEROGLYPHS,
3489 
3490         /**
3491          * Unicode script "Samaritan".
3492          */
3493         SAMARITAN,
3494 
3495         /**
3496          * Unicode script "Mandaic".
3497          */
3498         MANDAIC,
3499 
3500         /**
3501          * Unicode script "Lisu".
3502          */
3503         LISU,
3504 
3505         /**
3506          * Unicode script "Bamum".
3507          */
3508         BAMUM,
3509 
3510         /**
3511          * Unicode script "Javanese".
3512          */
3513         JAVANESE,
3514 
3515         /**
3516          * Unicode script "Meetei_Mayek".
3517          */
3518         MEETEI_MAYEK,
3519 
3520         /**
3521          * Unicode script "Imperial_Aramaic".
3522          */
3523         IMPERIAL_ARAMAIC,
3524 
3525         /**
3526          * Unicode script "Old_South_Arabian".
3527          */
3528         OLD_SOUTH_ARABIAN,
3529 
3530         /**
3531          * Unicode script "Inscriptional_Parthian".
3532          */
3533         INSCRIPTIONAL_PARTHIAN,
3534 
3535         /**
3536          * Unicode script "Inscriptional_Pahlavi".
3537          */
3538         INSCRIPTIONAL_PAHLAVI,
3539 
3540         /**
3541          * Unicode script "Old_Turkic".
3542          */
3543         OLD_TURKIC,
3544 
3545         /**
3546          * Unicode script "Brahmi".
3547          */
3548         BRAHMI,
3549 
3550         /**
3551          * Unicode script "Kaithi".
3552          */
3553         KAITHI,
3554 
3555         /**
3556          * Unicode script "Unknown".
3557          */
3558         UNKNOWN;
3559 
3560         private static final int[] scriptStarts = {
3561             0x0000,   // 0000..0040; COMMON
3562             0x0041,   // 0041..005A; LATIN
3563             0x005B,   // 005B..0060; COMMON
3564             0x0061,   // 0061..007A; LATIN
3565             0x007B,   // 007B..00A9; COMMON
3566             0x00AA,   // 00AA..00AA; LATIN
3567             0x00AB,   // 00AB..00B9; COMMON
3568             0x00BA,   // 00BA..00BA; LATIN
3569             0x00BB,   // 00BB..00BF; COMMON
3570             0x00C0,   // 00C0..00D6; LATIN
3571             0x00D7,   // 00D7..00D7; COMMON
3572             0x00D8,   // 00D8..00F6; LATIN
3573             0x00F7,   // 00F7..00F7; COMMON
3574             0x00F8,   // 00F8..02B8; LATIN
3575             0x02B9,   // 02B9..02DF; COMMON
3576             0x02E0,   // 02E0..02E4; LATIN
3577             0x02E5,   // 02E5..02E9; COMMON
3578             0x02EA,   // 02EA..02EB; BOPOMOFO
3579             0x02EC,   // 02EC..02FF; COMMON
3580             0x0300,   // 0300..036F; INHERITED
3581             0x0370,   // 0370..0373; GREEK
3582             0x0374,   // 0374..0374; COMMON
3583             0x0375,   // 0375..037D; GREEK
3584             0x037E,   // 037E..0383; COMMON
3585             0x0384,   // 0384..0384; GREEK
3586             0x0385,   // 0385..0385; COMMON
3587             0x0386,   // 0386..0386; GREEK
3588             0x0387,   // 0387..0387; COMMON
3589             0x0388,   // 0388..03E1; GREEK
3590             0x03E2,   // 03E2..03EF; COPTIC
3591             0x03F0,   // 03F0..03FF; GREEK
3592             0x0400,   // 0400..0484; CYRILLIC
3593             0x0485,   // 0485..0486; INHERITED
3594             0x0487,   // 0487..0530; CYRILLIC
3595             0x0531,   // 0531..0588; ARMENIAN
3596             0x0589,   // 0589..0589; COMMON
3597             0x058A,   // 058A..0590; ARMENIAN
3598             0x0591,   // 0591..05FF; HEBREW
3599             0x0600,   // 0600..060B; ARABIC
3600             0x060C,   // 060C..060C; COMMON
3601             0x060D,   // 060D..061A; ARABIC
3602             0x061B,   // 061B..061D; COMMON
3603             0x061E,   // 061E..061E; ARABIC
3604             0x061F,   // 061F..061F; COMMON
3605             0x0620,   // 0620..063F; ARABIC
3606             0x0640,   // 0640..0640; COMMON
3607             0x0641,   // 0641..064A; ARABIC
3608             0x064B,   // 064B..0655; INHERITED
3609             0x0656,   // 0656..065E; ARABIC
3610             0x065F,   // 065F..065F; INHERITED
3611             0x0660,   // 0660..0669; COMMON
3612             0x066A,   // 066A..066F; ARABIC
3613             0x0670,   // 0670..0670; INHERITED
3614             0x0671,   // 0671..06DC; ARABIC
3615             0x06DD,   // 06DD..06DD; COMMON
3616             0x06DE,   // 06DE..06FF; ARABIC
3617             0x0700,   // 0700..074F; SYRIAC
3618             0x0750,   // 0750..077F; ARABIC
3619             0x0780,   // 0780..07BF; THAANA
3620             0x07C0,   // 07C0..07FF; NKO
3621             0x0800,   // 0800..083F; SAMARITAN
3622             0x0840,   // 0840..08FF; MANDAIC
3623             0x0900,   // 0900..0950; DEVANAGARI
3624             0x0951,   // 0951..0952; INHERITED
3625             0x0953,   // 0953..0963; DEVANAGARI
3626             0x0964,   // 0964..0965; COMMON
3627             0x0966,   // 0966..096F; DEVANAGARI
3628             0x0970,   // 0970..0970; COMMON
3629             0x0971,   // 0971..0980; DEVANAGARI
3630             0x0981,   // 0981..0A00; BENGALI
3631             0x0A01,   // 0A01..0A80; GURMUKHI
3632             0x0A81,   // 0A81..0B00; GUJARATI
3633             0x0B01,   // 0B01..0B81; ORIYA
3634             0x0B82,   // 0B82..0C00; TAMIL
3635             0x0C01,   // 0C01..0C81; TELUGU
3636             0x0C82,   // 0C82..0CF0; KANNADA
3637             0x0D02,   // 0D02..0D81; MALAYALAM
3638             0x0D82,   // 0D82..0E00; SINHALA
3639             0x0E01,   // 0E01..0E3E; THAI
3640             0x0E3F,   // 0E3F..0E3F; COMMON
3641             0x0E40,   // 0E40..0E80; THAI
3642             0x0E81,   // 0E81..0EFF; LAO
3643             0x0F00,   // 0F00..0FD4; TIBETAN
3644             0x0FD5,   // 0FD5..0FD8; COMMON
3645             0x0FD9,   // 0FD9..0FFF; TIBETAN
3646             0x1000,   // 1000..109F; MYANMAR
3647             0x10A0,   // 10A0..10FA; GEORGIAN
3648             0x10FB,   // 10FB..10FB; COMMON
3649             0x10FC,   // 10FC..10FF; GEORGIAN
3650             0x1100,   // 1100..11FF; HANGUL
3651             0x1200,   // 1200..139F; ETHIOPIC
3652             0x13A0,   // 13A0..13FF; CHEROKEE
3653             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3654             0x1680,   // 1680..169F; OGHAM
3655             0x16A0,   // 16A0..16EA; RUNIC
3656             0x16EB,   // 16EB..16ED; COMMON
3657             0x16EE,   // 16EE..16FF; RUNIC
3658             0x1700,   // 1700..171F; TAGALOG
3659             0x1720,   // 1720..1734; HANUNOO
3660             0x1735,   // 1735..173F; COMMON
3661             0x1740,   // 1740..175F; BUHID
3662             0x1760,   // 1760..177F; TAGBANWA
3663             0x1780,   // 1780..17FF; KHMER
3664             0x1800,   // 1800..1801; MONGOLIAN
3665             0x1802,   // 1802..1803; COMMON
3666             0x1804,   // 1804..1804; MONGOLIAN
3667             0x1805,   // 1805..1805; COMMON
3668             0x1806,   // 1806..18AF; MONGOLIAN
3669             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3670             0x1900,   // 1900..194F; LIMBU
3671             0x1950,   // 1950..197F; TAI_LE
3672             0x1980,   // 1980..19DF; NEW_TAI_LUE
3673             0x19E0,   // 19E0..19FF; KHMER
3674             0x1A00,   // 1A00..1A1F; BUGINESE
3675             0x1A20,   // 1A20..1AFF; TAI_THAM
3676             0x1B00,   // 1B00..1B7F; BALINESE
3677             0x1B80,   // 1B80..1BBF; SUNDANESE
3678             0x1BC0,   // 1BC0..1BFF; BATAK
3679             0x1C00,   // 1C00..1C4F; LEPCHA
3680             0x1C50,   // 1C50..1CCF; OL_CHIKI
3681             0x1CD0,   // 1CD0..1CD2; INHERITED
3682             0x1CD3,   // 1CD3..1CD3; COMMON
3683             0x1CD4,   // 1CD4..1CE0; INHERITED
3684             0x1CE1,   // 1CE1..1CE1; COMMON
3685             0x1CE2,   // 1CE2..1CE8; INHERITED
3686             0x1CE9,   // 1CE9..1CEC; COMMON
3687             0x1CED,   // 1CED..1CED; INHERITED
3688             0x1CEE,   // 1CEE..1CFF; COMMON
3689             0x1D00,   // 1D00..1D25; LATIN
3690             0x1D26,   // 1D26..1D2A; GREEK
3691             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3692             0x1D2C,   // 1D2C..1D5C; LATIN
3693             0x1D5D,   // 1D5D..1D61; GREEK
3694             0x1D62,   // 1D62..1D65; LATIN
3695             0x1D66,   // 1D66..1D6A; GREEK
3696             0x1D6B,   // 1D6B..1D77; LATIN
3697             0x1D78,   // 1D78..1D78; CYRILLIC
3698             0x1D79,   // 1D79..1DBE; LATIN
3699             0x1DBF,   // 1DBF..1DBF; GREEK
3700             0x1DC0,   // 1DC0..1DFF; INHERITED
3701             0x1E00,   // 1E00..1EFF; LATIN
3702             0x1F00,   // 1F00..1FFF; GREEK
3703             0x2000,   // 2000..200B; COMMON
3704             0x200C,   // 200C..200D; INHERITED
3705             0x200E,   // 200E..2070; COMMON
3706             0x2071,   // 2071..2073; LATIN
3707             0x2074,   // 2074..207E; COMMON
3708             0x207F,   // 207F..207F; LATIN
3709             0x2080,   // 2080..208F; COMMON
3710             0x2090,   // 2090..209F; LATIN
3711             0x20A0,   // 20A0..20CF; COMMON
3712             0x20D0,   // 20D0..20FF; INHERITED
3713             0x2100,   // 2100..2125; COMMON
3714             0x2126,   // 2126..2126; GREEK
3715             0x2127,   // 2127..2129; COMMON
3716             0x212A,   // 212A..212B; LATIN
3717             0x212C,   // 212C..2131; COMMON
3718             0x2132,   // 2132..2132; LATIN
3719             0x2133,   // 2133..214D; COMMON
3720             0x214E,   // 214E..214E; LATIN
3721             0x214F,   // 214F..215F; COMMON
3722             0x2160,   // 2160..2188; LATIN
3723             0x2189,   // 2189..27FF; COMMON
3724             0x2800,   // 2800..28FF; BRAILLE
3725             0x2900,   // 2900..2BFF; COMMON
3726             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3727             0x2C60,   // 2C60..2C7F; LATIN
3728             0x2C80,   // 2C80..2CFF; COPTIC
3729             0x2D00,   // 2D00..2D2F; GEORGIAN
3730             0x2D30,   // 2D30..2D7F; TIFINAGH
3731             0x2D80,   // 2D80..2DDF; ETHIOPIC
3732             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3733             0x2E00,   // 2E00..2E7F; COMMON
3734             0x2E80,   // 2E80..2FEF; HAN
3735             0x2FF0,   // 2FF0..3004; COMMON
3736             0x3005,   // 3005..3005; HAN
3737             0x3006,   // 3006..3006; COMMON
3738             0x3007,   // 3007..3007; HAN
3739             0x3008,   // 3008..3020; COMMON
3740             0x3021,   // 3021..3029; HAN
3741             0x302A,   // 302A..302D; INHERITED
3742             0x302E,   // 302E..302F; HANGUL
3743             0x3030,   // 3030..3037; COMMON
3744             0x3038,   // 3038..303B; HAN
3745             0x303C,   // 303C..3040; COMMON
3746             0x3041,   // 3041..3098; HIRAGANA
3747             0x3099,   // 3099..309A; INHERITED
3748             0x309B,   // 309B..309C; COMMON
3749             0x309D,   // 309D..309F; HIRAGANA
3750             0x30A0,   // 30A0..30A0; COMMON
3751             0x30A1,   // 30A1..30FA; KATAKANA
3752             0x30FB,   // 30FB..30FC; COMMON
3753             0x30FD,   // 30FD..3104; KATAKANA
3754             0x3105,   // 3105..3130; BOPOMOFO
3755             0x3131,   // 3131..318F; HANGUL
3756             0x3190,   // 3190..319F; COMMON
3757             0x31A0,   // 31A0..31BF; BOPOMOFO
3758             0x31C0,   // 31C0..31EF; COMMON
3759             0x31F0,   // 31F0..31FF; KATAKANA
3760             0x3200,   // 3200..321F; HANGUL
3761             0x3220,   // 3220..325F; COMMON
3762             0x3260,   // 3260..327E; HANGUL
3763             0x327F,   // 327F..32CF; COMMON
3764             0x32D0,   // 32D0..3357; KATAKANA
3765             0x3358,   // 3358..33FF; COMMON
3766             0x3400,   // 3400..4DBF; HAN
3767             0x4DC0,   // 4DC0..4DFF; COMMON
3768             0x4E00,   // 4E00..9FFF; HAN
3769             0xA000,   // A000..A4CF; YI
3770             0xA4D0,   // A4D0..A4FF; LISU
3771             0xA500,   // A500..A63F; VAI
3772             0xA640,   // A640..A69F; CYRILLIC
3773             0xA6A0,   // A6A0..A6FF; BAMUM
3774             0xA700,   // A700..A721; COMMON
3775             0xA722,   // A722..A787; LATIN
3776             0xA788,   // A788..A78A; COMMON
3777             0xA78B,   // A78B..A7FF; LATIN
3778             0xA800,   // A800..A82F; SYLOTI_NAGRI
3779             0xA830,   // A830..A83F; COMMON
3780             0xA840,   // A840..A87F; PHAGS_PA
3781             0xA880,   // A880..A8DF; SAURASHTRA
3782             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3783             0xA900,   // A900..A92F; KAYAH_LI
3784             0xA930,   // A930..A95F; REJANG
3785             0xA960,   // A960..A97F; HANGUL
3786             0xA980,   // A980..A9FF; JAVANESE
3787             0xAA00,   // AA00..AA5F; CHAM
3788             0xAA60,   // AA60..AA7F; MYANMAR
3789             0xAA80,   // AA80..AB00; TAI_VIET
3790             0xAB01,   // AB01..ABBF; ETHIOPIC
3791             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3792             0xAC00,   // AC00..D7FB; HANGUL
3793             0xD7FC,   // D7FC..F8FF; UNKNOWN
3794             0xF900,   // F900..FAFF; HAN
3795             0xFB00,   // FB00..FB12; LATIN
3796             0xFB13,   // FB13..FB1C; ARMENIAN
3797             0xFB1D,   // FB1D..FB4F; HEBREW
3798             0xFB50,   // FB50..FD3D; ARABIC
3799             0xFD3E,   // FD3E..FD4F; COMMON
3800             0xFD50,   // FD50..FDFC; ARABIC
3801             0xFDFD,   // FDFD..FDFF; COMMON
3802             0xFE00,   // FE00..FE0F; INHERITED
3803             0xFE10,   // FE10..FE1F; COMMON
3804             0xFE20,   // FE20..FE2F; INHERITED
3805             0xFE30,   // FE30..FE6F; COMMON
3806             0xFE70,   // FE70..FEFE; ARABIC
3807             0xFEFF,   // FEFF..FF20; COMMON
3808             0xFF21,   // FF21..FF3A; LATIN
3809             0xFF3B,   // FF3B..FF40; COMMON
3810             0xFF41,   // FF41..FF5A; LATIN
3811             0xFF5B,   // FF5B..FF65; COMMON
3812             0xFF66,   // FF66..FF6F; KATAKANA
3813             0xFF70,   // FF70..FF70; COMMON
3814             0xFF71,   // FF71..FF9D; KATAKANA
3815             0xFF9E,   // FF9E..FF9F; COMMON
3816             0xFFA0,   // FFA0..FFDF; HANGUL
3817             0xFFE0,   // FFE0..FFFF; COMMON
3818             0x10000,  // 10000..100FF; LINEAR_B
3819             0x10100,  // 10100..1013F; COMMON
3820             0x10140,  // 10140..1018F; GREEK
3821             0x10190,  // 10190..101FC; COMMON
3822             0x101FD,  // 101FD..1027F; INHERITED
3823             0x10280,  // 10280..1029F; LYCIAN
3824             0x102A0,  // 102A0..102FF; CARIAN
3825             0x10300,  // 10300..1032F; OLD_ITALIC
3826             0x10330,  // 10330..1037F; GOTHIC
3827             0x10380,  // 10380..1039F; UGARITIC
3828             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3829             0x10400,  // 10400..1044F; DESERET
3830             0x10450,  // 10450..1047F; SHAVIAN
3831             0x10480,  // 10480..107FF; OSMANYA
3832             0x10800,  // 10800..1083F; CYPRIOT
3833             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3834             0x10900,  // 10900..1091F; PHOENICIAN
3835             0x10920,  // 10920..109FF; LYDIAN
3836             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3837             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3838             0x10B00,  // 10B00..10B3F; AVESTAN
3839             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3840             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3841             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3842             0x10E60,  // 10E60..10FFF; ARABIC
3843             0x11000,  // 11000..1107F; BRAHMI
3844             0x11080,  // 11080..11FFF; KAITHI
3845             0x12000,  // 12000..12FFF; CUNEIFORM
3846             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3847             0x16800,  // 16800..16A38; BAMUM
3848             0x1B000,  // 1B000..1B000; KATAKANA
3849             0x1B001,  // 1B001..1CFFF; HIRAGANA
3850             0x1D000,  // 1D000..1D166; COMMON
3851             0x1D167,  // 1D167..1D169; INHERITED
3852             0x1D16A,  // 1D16A..1D17A; COMMON
3853             0x1D17B,  // 1D17B..1D182; INHERITED
3854             0x1D183,  // 1D183..1D184; COMMON
3855             0x1D185,  // 1D185..1D18B; INHERITED
3856             0x1D18C,  // 1D18C..1D1A9; COMMON
3857             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3858             0x1D1AE,  // 1D1AE..1D1FF; COMMON
3859             0x1D200,  // 1D200..1D2FF; GREEK
3860             0x1D300,  // 1D300..1F1FF; COMMON
3861             0x1F200,  // 1F200..1F200; HIRAGANA
3862             0x1F201,  // 1F210..1FFFF; COMMON
3863             0x20000,  // 20000..E0000; HAN
3864             0xE0001,  // E0001..E00FF; COMMON
3865             0xE0100,  // E0100..E01EF; INHERITED
3866             0xE01F0   // E01F0..10FFFF; UNKNOWN
3867 
3868         };
3869 
3870         private static final UnicodeScript[] scripts = {
3871             COMMON,
3872             LATIN,
3873             COMMON,
3874             LATIN,
3875             COMMON,
3876             LATIN,
3877             COMMON,
3878             LATIN,
3879             COMMON,
3880             LATIN,
3881             COMMON,
3882             LATIN,
3883             COMMON,
3884             LATIN,
3885             COMMON,
3886             LATIN,
3887             COMMON,
3888             BOPOMOFO,
3889             COMMON,
3890             INHERITED,
3891             GREEK,
3892             COMMON,
3893             GREEK,
3894             COMMON,
3895             GREEK,
3896             COMMON,
3897             GREEK,
3898             COMMON,
3899             GREEK,
3900             COPTIC,
3901             GREEK,
3902             CYRILLIC,
3903             INHERITED,
3904             CYRILLIC,
3905             ARMENIAN,
3906             COMMON,
3907             ARMENIAN,
3908             HEBREW,
3909             ARABIC,
3910             COMMON,
3911             ARABIC,
3912             COMMON,
3913             ARABIC,
3914             COMMON,
3915             ARABIC,
3916             COMMON,
3917             ARABIC,
3918             INHERITED,
3919             ARABIC,
3920             INHERITED,
3921             COMMON,
3922             ARABIC,
3923             INHERITED,
3924             ARABIC,
3925             COMMON,
3926             ARABIC,
3927             SYRIAC,
3928             ARABIC,
3929             THAANA,
3930             NKO,
3931             SAMARITAN,
3932             MANDAIC,
3933             DEVANAGARI,
3934             INHERITED,
3935             DEVANAGARI,
3936             COMMON,
3937             DEVANAGARI,
3938             COMMON,
3939             DEVANAGARI,
3940             BENGALI,
3941             GURMUKHI,
3942             GUJARATI,
3943             ORIYA,
3944             TAMIL,
3945             TELUGU,
3946             KANNADA,
3947             MALAYALAM,
3948             SINHALA,
3949             THAI,
3950             COMMON,
3951             THAI,
3952             LAO,
3953             TIBETAN,
3954             COMMON,
3955             TIBETAN,
3956             MYANMAR,
3957             GEORGIAN,
3958             COMMON,
3959             GEORGIAN,
3960             HANGUL,
3961             ETHIOPIC,
3962             CHEROKEE,
3963             CANADIAN_ABORIGINAL,
3964             OGHAM,
3965             RUNIC,
3966             COMMON,
3967             RUNIC,
3968             TAGALOG,
3969             HANUNOO,
3970             COMMON,
3971             BUHID,
3972             TAGBANWA,
3973             KHMER,
3974             MONGOLIAN,
3975             COMMON,
3976             MONGOLIAN,
3977             COMMON,
3978             MONGOLIAN,
3979             CANADIAN_ABORIGINAL,
3980             LIMBU,
3981             TAI_LE,
3982             NEW_TAI_LUE,
3983             KHMER,
3984             BUGINESE,
3985             TAI_THAM,
3986             BALINESE,
3987             SUNDANESE,
3988             BATAK,
3989             LEPCHA,
3990             OL_CHIKI,
3991             INHERITED,
3992             COMMON,
3993             INHERITED,
3994             COMMON,
3995             INHERITED,
3996             COMMON,
3997             INHERITED,
3998             COMMON,
3999             LATIN,
4000             GREEK,
4001             CYRILLIC,
4002             LATIN,
4003             GREEK,
4004             LATIN,
4005             GREEK,
4006             LATIN,
4007             CYRILLIC,
4008             LATIN,
4009             GREEK,
4010             INHERITED,
4011             LATIN,
4012             GREEK,
4013             COMMON,
4014             INHERITED,
4015             COMMON,
4016             LATIN,
4017             COMMON,
4018             LATIN,
4019             COMMON,
4020             LATIN,
4021             COMMON,
4022             INHERITED,
4023             COMMON,
4024             GREEK,
4025             COMMON,
4026             LATIN,
4027             COMMON,
4028             LATIN,
4029             COMMON,
4030             LATIN,
4031             COMMON,
4032             LATIN,
4033             COMMON,
4034             BRAILLE,
4035             COMMON,
4036             GLAGOLITIC,
4037             LATIN,
4038             COPTIC,
4039             GEORGIAN,
4040             TIFINAGH,
4041             ETHIOPIC,
4042             CYRILLIC,
4043             COMMON,
4044             HAN,
4045             COMMON,
4046             HAN,
4047             COMMON,
4048             HAN,
4049             COMMON,
4050             HAN,
4051             INHERITED,
4052             HANGUL,
4053             COMMON,
4054             HAN,
4055             COMMON,
4056             HIRAGANA,
4057             INHERITED,
4058             COMMON,
4059             HIRAGANA,
4060             COMMON,
4061             KATAKANA,
4062             COMMON,
4063             KATAKANA,
4064             BOPOMOFO,
4065             HANGUL,
4066             COMMON,
4067             BOPOMOFO,
4068             COMMON,
4069             KATAKANA,
4070             HANGUL,
4071             COMMON,
4072             HANGUL,
4073             COMMON,
4074             KATAKANA,
4075             COMMON,
4076             HAN,
4077             COMMON,
4078             HAN,
4079             YI,
4080             LISU,
4081             VAI,
4082             CYRILLIC,
4083             BAMUM,
4084             COMMON,
4085             LATIN,
4086             COMMON,
4087             LATIN,
4088             SYLOTI_NAGRI,
4089             COMMON,
4090             PHAGS_PA,
4091             SAURASHTRA,
4092             DEVANAGARI,
4093             KAYAH_LI,
4094             REJANG,
4095             HANGUL,
4096             JAVANESE,
4097             CHAM,
4098             MYANMAR,
4099             TAI_VIET,
4100             ETHIOPIC,
4101             MEETEI_MAYEK,
4102             HANGUL,
4103             UNKNOWN,
4104             HAN,
4105             LATIN,
4106             ARMENIAN,
4107             HEBREW,
4108             ARABIC,
4109             COMMON,
4110             ARABIC,
4111             COMMON,
4112             INHERITED,
4113             COMMON,
4114             INHERITED,
4115             COMMON,
4116             ARABIC,
4117             COMMON,
4118             LATIN,
4119             COMMON,
4120             LATIN,
4121             COMMON,
4122             KATAKANA,
4123             COMMON,
4124             KATAKANA,
4125             COMMON,
4126             HANGUL,
4127             COMMON,
4128             LINEAR_B,
4129             COMMON,
4130             GREEK,
4131             COMMON,
4132             INHERITED,
4133             LYCIAN,
4134             CARIAN,
4135             OLD_ITALIC,
4136             GOTHIC,
4137             UGARITIC,
4138             OLD_PERSIAN,
4139             DESERET,
4140             SHAVIAN,
4141             OSMANYA,
4142             CYPRIOT,
4143             IMPERIAL_ARAMAIC,
4144             PHOENICIAN,
4145             LYDIAN,
4146             KHAROSHTHI,
4147             OLD_SOUTH_ARABIAN,
4148             AVESTAN,
4149             INSCRIPTIONAL_PARTHIAN,
4150             INSCRIPTIONAL_PAHLAVI,
4151             OLD_TURKIC,
4152             ARABIC,
4153             BRAHMI,
4154             KAITHI,
4155             CUNEIFORM,
4156             EGYPTIAN_HIEROGLYPHS,
4157             BAMUM,
4158             KATAKANA,
4159             HIRAGANA,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             INHERITED,
4164             COMMON,
4165             INHERITED,
4166             COMMON,
4167             INHERITED,
4168             COMMON,
4169             GREEK,
4170             COMMON,
4171             HIRAGANA,
4172             COMMON,
4173             HAN,
4174             COMMON,
4175             INHERITED,
4176             UNKNOWN
4177         };
4178 
4179         private static HashMap<String, Character.UnicodeScript> aliases;
4180         static {
4181             aliases = new HashMap<>(128);
4182             aliases.put("ARAB", ARABIC);
4183             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4184             aliases.put("ARMN", ARMENIAN);
4185             aliases.put("AVST", AVESTAN);
4186             aliases.put("BALI", BALINESE);
4187             aliases.put("BAMU", BAMUM);
4188             aliases.put("BENG", BENGALI);
4189             aliases.put("BOPO", BOPOMOFO);
4190             aliases.put("BRAI", BRAILLE);
4191             aliases.put("BUGI", BUGINESE);
4192             aliases.put("BUHD", BUHID);
4193             aliases.put("CANS", CANADIAN_ABORIGINAL);
4194             aliases.put("CARI", CARIAN);
4195             aliases.put("CHAM", CHAM);
4196             aliases.put("CHER", CHEROKEE);
4197             aliases.put("COPT", COPTIC);
4198             aliases.put("CPRT", CYPRIOT);
4199             aliases.put("CYRL", CYRILLIC);
4200             aliases.put("DEVA", DEVANAGARI);
4201             aliases.put("DSRT", DESERET);
4202             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4203             aliases.put("ETHI", ETHIOPIC);
4204             aliases.put("GEOR", GEORGIAN);
4205             aliases.put("GLAG", GLAGOLITIC);
4206             aliases.put("GOTH", GOTHIC);
4207             aliases.put("GREK", GREEK);
4208             aliases.put("GUJR", GUJARATI);
4209             aliases.put("GURU", GURMUKHI);
4210             aliases.put("HANG", HANGUL);
4211             aliases.put("HANI", HAN);
4212             aliases.put("HANO", HANUNOO);
4213             aliases.put("HEBR", HEBREW);
4214             aliases.put("HIRA", HIRAGANA);
4215             // it appears we don't have the KATAKANA_OR_HIRAGANA
4216             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4217             aliases.put("ITAL", OLD_ITALIC);
4218             aliases.put("JAVA", JAVANESE);
4219             aliases.put("KALI", KAYAH_LI);
4220             aliases.put("KANA", KATAKANA);
4221             aliases.put("KHAR", KHAROSHTHI);
4222             aliases.put("KHMR", KHMER);
4223             aliases.put("KNDA", KANNADA);
4224             aliases.put("KTHI", KAITHI);
4225             aliases.put("LANA", TAI_THAM);
4226             aliases.put("LAOO", LAO);
4227             aliases.put("LATN", LATIN);
4228             aliases.put("LEPC", LEPCHA);
4229             aliases.put("LIMB", LIMBU);
4230             aliases.put("LINB", LINEAR_B);
4231             aliases.put("LISU", LISU);
4232             aliases.put("LYCI", LYCIAN);
4233             aliases.put("LYDI", LYDIAN);
4234             aliases.put("MLYM", MALAYALAM);
4235             aliases.put("MONG", MONGOLIAN);
4236             aliases.put("MTEI", MEETEI_MAYEK);
4237             aliases.put("MYMR", MYANMAR);
4238             aliases.put("NKOO", NKO);
4239             aliases.put("OGAM", OGHAM);
4240             aliases.put("OLCK", OL_CHIKI);
4241             aliases.put("ORKH", OLD_TURKIC);
4242             aliases.put("ORYA", ORIYA);
4243             aliases.put("OSMA", OSMANYA);
4244             aliases.put("PHAG", PHAGS_PA);
4245             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4246             aliases.put("PHNX", PHOENICIAN);
4247             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4248             aliases.put("RJNG", REJANG);
4249             aliases.put("RUNR", RUNIC);
4250             aliases.put("SAMR", SAMARITAN);
4251             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4252             aliases.put("SAUR", SAURASHTRA);
4253             aliases.put("SHAW", SHAVIAN);
4254             aliases.put("SINH", SINHALA);
4255             aliases.put("SUND", SUNDANESE);
4256             aliases.put("SYLO", SYLOTI_NAGRI);
4257             aliases.put("SYRC", SYRIAC);
4258             aliases.put("TAGB", TAGBANWA);
4259             aliases.put("TALE", TAI_LE);
4260             aliases.put("TALU", NEW_TAI_LUE);
4261             aliases.put("TAML", TAMIL);
4262             aliases.put("TAVT", TAI_VIET);
4263             aliases.put("TELU", TELUGU);
4264             aliases.put("TFNG", TIFINAGH);
4265             aliases.put("TGLG", TAGALOG);
4266             aliases.put("THAA", THAANA);
4267             aliases.put("THAI", THAI);
4268             aliases.put("TIBT", TIBETAN);
4269             aliases.put("UGAR", UGARITIC);
4270             aliases.put("VAII", VAI);
4271             aliases.put("XPEO", OLD_PERSIAN);
4272             aliases.put("XSUX", CUNEIFORM);
4273             aliases.put("YIII", YI);
4274             aliases.put("ZINH", INHERITED);
4275             aliases.put("ZYYY", COMMON);
4276             aliases.put("ZZZZ", UNKNOWN);
4277         }
4278 
4279         /**
4280          * Returns the enum constant representing the Unicode script of which
4281          * the given character (Unicode code point) is assigned to.
4282          *
4283          * @param   codePoint the character (Unicode code point) in question.
4284          * @return  The <code>UnicodeScript</code> constant representing the
4285          *          Unicode script of which this character is assigned to.
4286          *
4287          * @exception IllegalArgumentException if the specified
4288          * <code>codePoint</code> is an invalid Unicode code point.
4289          * @see Character#isValidCodePoint(int)
4290          *
4291          */
4292         public static UnicodeScript of(int codePoint) {
4293             if (!isValidCodePoint(codePoint))
4294                 throw new IllegalArgumentException();
4295             int type = getType(codePoint);
4296             // leave SURROGATE and PRIVATE_USE for table lookup
4297             if (type == UNASSIGNED)
4298                 return UNKNOWN;
4299             int index = Arrays.binarySearch(scriptStarts, codePoint);
4300             if (index < 0)
4301                 index = -index - 2;
4302             return scripts[index];
4303         }
4304 
4305         /**
4306          * Returns the UnicodeScript constant with the given Unicode script
4307          * name or the script name alias. Script names and their aliases are
4308          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4309          * and PropertyValueAliases&lt;version&gt;.txt define script names
4310          * and the script name aliases for a particular version of the
4311          * standard. The {@link Character} class specifies the version of
4312          * the standard that it supports.
4313          * <p>
4314          * Character case is ignored for all of the valid script names.
4315          * The en_US locale's case mapping rules are used to provide
4316          * case-insensitive string comparisons for script name validation.
4317          * <p>
4318          *
4319          * @param scriptName A <code>UnicodeScript</code> name.
4320          * @return The <code>UnicodeScript</code> constant identified
4321          *         by <code>scriptName</code>
4322          * @throws IllegalArgumentException if <code>scriptName</code> is an
4323          *         invalid name
4324          * @throws NullPointerException if <code>scriptName</code> is null
4325          */
4326         public static final UnicodeScript forName(String scriptName) {
4327             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4328                                  //.replace(' ', '_'));
4329             UnicodeScript sc = aliases.get(scriptName);
4330             if (sc != null)
4331                 return sc;
4332             return valueOf(scriptName);
4333         }
4334     }
4335 
4336     /**
4337      * The value of the <code>Character</code>.
4338      *
4339      * @serial
4340      */
4341     private final char value;
4342 
4343     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4344     private static final long serialVersionUID = 3786198910865385080L;
4345 
4346     /**
4347      * Constructs a newly allocated <code>Character</code> object that
4348      * represents the specified <code>char</code> value.
4349      *
4350      * @param  value   the value to be represented by the
4351      *                  <code>Character</code> object.
4352      */
4353     public Character(char value) {
4354         this.value = value;
4355     }
4356 
4357     private static class CharacterCache {
4358         private CharacterCache(){}
4359 
4360         static final Character cache[] = new Character[127 + 1];
4361 
4362         static {
4363             for (int i = 0; i < cache.length; i++)
4364                 cache[i] = new Character((char)i);
4365         }
4366     }
4367 
4368     /**
4369      * Returns a <tt>Character</tt> instance representing the specified
4370      * <tt>char</tt> value.
4371      * If a new <tt>Character</tt> instance is not required, this method
4372      * should generally be used in preference to the constructor
4373      * {@link #Character(char)}, as this method is likely to yield
4374      * significantly better space and time performance by caching
4375      * frequently requested values.
4376      *
4377      * This method will always cache values in the range {@code
4378      * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may
4379      * cache other values outside of this range.
4380      *
4381      * @param  c a char value.
4382      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4383      * @since  1.5
4384      */
4385     public static Character valueOf(char c) {
4386         if (c <= 127) { // must cache
4387             return CharacterCache.cache[(int)c];
4388         }
4389         return new Character(c);
4390     }
4391 
4392     /**
4393      * Returns the value of this <code>Character</code> object.
4394      * @return  the primitive <code>char</code> value represented by
4395      *          this object.
4396      */
4397     public char charValue() {
4398         return value;
4399     }
4400 
4401     /**
4402      * Returns a hash code for this {@code Character}; equal to the result
4403      * of invoking {@code charValue()}.
4404      *
4405      * @return a hash code value for this {@code Character}
4406      */
4407     public int hashCode() {
4408         return (int)value;
4409     }
4410 
4411     /**
4412      * Compares this object against the specified object.
4413      * The result is <code>true</code> if and only if the argument is not
4414      * <code>null</code> and is a <code>Character</code> object that
4415      * represents the same <code>char</code> value as this object.
4416      *
4417      * @param   obj   the object to compare with.
4418      * @return  <code>true</code> if the objects are the same;
4419      *          <code>false</code> otherwise.
4420      */
4421     public boolean equals(Object obj) {
4422         if (obj instanceof Character) {
4423             return value == ((Character)obj).charValue();
4424         }
4425         return false;
4426     }
4427 
4428     /**
4429      * Returns a <code>String</code> object representing this
4430      * <code>Character</code>'s value.  The result is a string of
4431      * length 1 whose sole component is the primitive
4432      * <code>char</code> value represented by this
4433      * <code>Character</code> object.
4434      *
4435      * @return  a string representation of this object.
4436      */
4437     public String toString() {
4438         char buf[] = {value};
4439         return String.valueOf(buf);
4440     }
4441 
4442     /**
4443      * Returns a <code>String</code> object representing the
4444      * specified <code>char</code>.  The result is a string of length
4445      * 1 consisting solely of the specified <code>char</code>.
4446      *
4447      * @param c the <code>char</code> to be converted
4448      * @return the string representation of the specified <code>char</code>
4449      * @since 1.4
4450      */
4451     public static String toString(char c) {
4452         return String.valueOf(c);
4453     }
4454 
4455     /**
4456      * Determines whether the specified code point is a valid
4457      * <a href="http://www.unicode.org/glossary/#code_point">
4458      * Unicode code point value</a>.
4459      *
4460      * @param  codePoint the Unicode code point to be tested
4461      * @return {@code true} if the specified code point value is between
4462      *         {@link #MIN_CODE_POINT} and
4463      *         {@link #MAX_CODE_POINT} inclusive;
4464      *         {@code false} otherwise.
4465      * @since  1.5
4466      */
4467     public static boolean isValidCodePoint(int codePoint) {
4468         // Optimized form of:
4469         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4470         int plane = codePoint >>> 16;
4471         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4472     }
4473 
4474     /**
4475      * Determines whether the specified character (Unicode code point)
4476      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4477      * Such code points can be represented using a single {@code char}.
4478      *
4479      * @param  codePoint the character (Unicode code point) to be tested
4480      * @return {@code true} if the specified code point is between
4481      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4482      *         {@code false} otherwise.
4483      * @since  1.7
4484      */
4485     public static boolean isBmpCodePoint(int codePoint) {
4486         return codePoint >>> 16 == 0;
4487         // Optimized form of:
4488         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4489         // We consistently use logical shift (>>>) to facilitate
4490         // additional runtime optimizations.
4491     }
4492 
4493     /**
4494      * Determines whether the specified character (Unicode code point)
4495      * is in the <a href="#supplementary">supplementary character</a> range.
4496      *
4497      * @param  codePoint the character (Unicode code point) to be tested
4498      * @return {@code true} if the specified code point is between
4499      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4500      *         {@link #MAX_CODE_POINT} inclusive;
4501      *         {@code false} otherwise.
4502      * @since  1.5
4503      */
4504     public static boolean isSupplementaryCodePoint(int codePoint) {
4505         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4506             && codePoint <  MAX_CODE_POINT + 1;
4507     }
4508 
4509     /**
4510      * Determines if the given {@code char} value is a
4511      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4512      * Unicode high-surrogate code unit</a>
4513      * (also known as <i>leading-surrogate code unit</i>).
4514      *
4515      * <p>Such values do not represent characters by themselves,
4516      * but are used in the representation of
4517      * <a href="#supplementary">supplementary characters</a>
4518      * in the UTF-16 encoding.
4519      *
4520      * @param  ch the {@code char} value to be tested.
4521      * @return {@code true} if the {@code char} value is between
4522      *         {@link #MIN_HIGH_SURROGATE} and
4523      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4524      *         {@code false} otherwise.
4525      * @see    Character#isLowSurrogate(char)
4526      * @see    Character.UnicodeBlock#of(int)
4527      * @since  1.5
4528      */
4529     public static boolean isHighSurrogate(char ch) {
4530         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4531         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4532     }
4533 
4534     /**
4535      * Determines if the given {@code char} value is a
4536      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4537      * Unicode low-surrogate code unit</a>
4538      * (also known as <i>trailing-surrogate code unit</i>).
4539      *
4540      * <p>Such values do not represent characters by themselves,
4541      * but are used in the representation of
4542      * <a href="#supplementary">supplementary characters</a>
4543      * in the UTF-16 encoding.
4544      *
4545      * @param  ch the {@code char} value to be tested.
4546      * @return {@code true} if the {@code char} value is between
4547      *         {@link #MIN_LOW_SURROGATE} and
4548      *         {@link #MAX_LOW_SURROGATE} inclusive;
4549      *         {@code false} otherwise.
4550      * @see    Character#isHighSurrogate(char)
4551      * @since  1.5
4552      */
4553     public static boolean isLowSurrogate(char ch) {
4554         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4555     }
4556 
4557     /**
4558      * Determines if the given {@code char} value is a Unicode
4559      * <i>surrogate code unit</i>.
4560      *
4561      * <p>Such values do not represent characters by themselves,
4562      * but are used in the representation of
4563      * <a href="#supplementary">supplementary characters</a>
4564      * in the UTF-16 encoding.
4565      *
4566      * <p>A char value is a surrogate code unit if and only if it is either
4567      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4568      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4569      *
4570      * @param  ch the {@code char} value to be tested.
4571      * @return {@code true} if the {@code char} value is between
4572      *         {@link #MIN_SURROGATE} and
4573      *         {@link #MAX_SURROGATE} inclusive;
4574      *         {@code false} otherwise.
4575      * @since  1.7
4576      */
4577     public static boolean isSurrogate(char ch) {
4578         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4579     }
4580 
4581     /**
4582      * Determines whether the specified pair of <code>char</code>
4583      * values is a valid
4584      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4585      * Unicode surrogate pair</a>.
4586 
4587      * <p>This method is equivalent to the expression:
4588      * <blockquote><pre>
4589      * isHighSurrogate(high) && isLowSurrogate(low)
4590      * </pre></blockquote>
4591      *
4592      * @param  high the high-surrogate code value to be tested
4593      * @param  low the low-surrogate code value to be tested
4594      * @return <code>true</code> if the specified high and
4595      * low-surrogate code values represent a valid surrogate pair;
4596      * <code>false</code> otherwise.
4597      * @since  1.5
4598      */
4599     public static boolean isSurrogatePair(char high, char low) {
4600         return isHighSurrogate(high) && isLowSurrogate(low);
4601     }
4602 
4603     /**
4604      * Determines the number of <code>char</code> values needed to
4605      * represent the specified character (Unicode code point). If the
4606      * specified character is equal to or greater than 0x10000, then
4607      * the method returns 2. Otherwise, the method returns 1.
4608      *
4609      * <p>This method doesn't validate the specified character to be a
4610      * valid Unicode code point. The caller must validate the
4611      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4612      * if necessary.
4613      *
4614      * @param   codePoint the character (Unicode code point) to be tested.
4615      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4616      * @see     Character#isSupplementaryCodePoint(int)
4617      * @since   1.5
4618      */
4619     public static int charCount(int codePoint) {
4620         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4621     }
4622 
4623     /**
4624      * Converts the specified surrogate pair to its supplementary code
4625      * point value. This method does not validate the specified
4626      * surrogate pair. The caller must validate it using {@link
4627      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4628      *
4629      * @param  high the high-surrogate code unit
4630      * @param  low the low-surrogate code unit
4631      * @return the supplementary code point composed from the
4632      *         specified surrogate pair.
4633      * @since  1.5
4634      */
4635     public static int toCodePoint(char high, char low) {
4636         // Optimized form of:
4637         // return ((high - MIN_HIGH_SURROGATE) << 10)
4638         //         + (low - MIN_LOW_SURROGATE)
4639         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4640         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4641                                        - (MIN_HIGH_SURROGATE << 10)
4642                                        - MIN_LOW_SURROGATE);
4643     }
4644 
4645     /**
4646      * Returns the code point at the given index of the
4647      * <code>CharSequence</code>. If the <code>char</code> value at
4648      * the given index in the <code>CharSequence</code> is in the
4649      * high-surrogate range, the following index is less than the
4650      * length of the <code>CharSequence</code>, and the
4651      * <code>char</code> value at the following index is in the
4652      * low-surrogate range, then the supplementary code point
4653      * corresponding to this surrogate pair is returned. Otherwise,
4654      * the <code>char</code> value at the given index is returned.
4655      *
4656      * @param seq a sequence of <code>char</code> values (Unicode code
4657      * units)
4658      * @param index the index to the <code>char</code> values (Unicode
4659      * code units) in <code>seq</code> to be converted
4660      * @return the Unicode code point at the given index
4661      * @exception NullPointerException if <code>seq</code> is null.
4662      * @exception IndexOutOfBoundsException if the value
4663      * <code>index</code> is negative or not less than
4664      * {@link CharSequence#length() seq.length()}.
4665      * @since  1.5
4666      */
4667     public static int codePointAt(CharSequence seq, int index) {
4668         char c1 = seq.charAt(index++);
4669         if (isHighSurrogate(c1)) {
4670             if (index < seq.length()) {
4671                 char c2 = seq.charAt(index);
4672                 if (isLowSurrogate(c2)) {
4673                     return toCodePoint(c1, c2);
4674                 }
4675             }
4676         }
4677         return c1;
4678     }
4679 
4680     /**
4681      * Returns the code point at the given index of the
4682      * <code>char</code> array. If the <code>char</code> value at
4683      * the given index in the <code>char</code> array is in the
4684      * high-surrogate range, the following index is less than the
4685      * length of the <code>char</code> array, and the
4686      * <code>char</code> value at the following index is in the
4687      * low-surrogate range, then the supplementary code point
4688      * corresponding to this surrogate pair is returned. Otherwise,
4689      * the <code>char</code> value at the given index is returned.
4690      *
4691      * @param a the <code>char</code> array
4692      * @param index the index to the <code>char</code> values (Unicode
4693      * code units) in the <code>char</code> array to be converted
4694      * @return the Unicode code point at the given index
4695      * @exception NullPointerException if <code>a</code> is null.
4696      * @exception IndexOutOfBoundsException if the value
4697      * <code>index</code> is negative or not less than
4698      * the length of the <code>char</code> array.
4699      * @since  1.5
4700      */
4701     public static int codePointAt(char[] a, int index) {
4702         return codePointAtImpl(a, index, a.length);
4703     }
4704 
4705     /**
4706      * Returns the code point at the given index of the
4707      * <code>char</code> array, where only array elements with
4708      * <code>index</code> less than <code>limit</code> can be used. If
4709      * the <code>char</code> value at the given index in the
4710      * <code>char</code> array is in the high-surrogate range, the
4711      * following index is less than the <code>limit</code>, and the
4712      * <code>char</code> value at the following index is in the
4713      * low-surrogate range, then the supplementary code point
4714      * corresponding to this surrogate pair is returned. Otherwise,
4715      * the <code>char</code> value at the given index is returned.
4716      *
4717      * @param a the <code>char</code> array
4718      * @param index the index to the <code>char</code> values (Unicode
4719      * code units) in the <code>char</code> array to be converted
4720      * @param limit the index after the last array element that can be used in the
4721      * <code>char</code> array
4722      * @return the Unicode code point at the given index
4723      * @exception NullPointerException if <code>a</code> is null.
4724      * @exception IndexOutOfBoundsException if the <code>index</code>
4725      * argument is negative or not less than the <code>limit</code>
4726      * argument, or if the <code>limit</code> argument is negative or
4727      * greater than the length of the <code>char</code> array.
4728      * @since  1.5
4729      */
4730     public static int codePointAt(char[] a, int index, int limit) {
4731         if (index >= limit || limit < 0 || limit > a.length) {
4732             throw new IndexOutOfBoundsException();
4733         }
4734         return codePointAtImpl(a, index, limit);
4735     }
4736 
4737     // throws ArrayIndexOutofBoundsException if index out of bounds
4738     static int codePointAtImpl(char[] a, int index, int limit) {
4739         char c1 = a[index++];
4740         if (isHighSurrogate(c1)) {
4741             if (index < limit) {
4742                 char c2 = a[index];
4743                 if (isLowSurrogate(c2)) {
4744                     return toCodePoint(c1, c2);
4745                 }
4746             }
4747         }
4748         return c1;
4749     }
4750 
4751     /**
4752      * Returns the code point preceding the given index of the
4753      * <code>CharSequence</code>. If the <code>char</code> value at
4754      * <code>(index - 1)</code> in the <code>CharSequence</code> is in
4755      * the low-surrogate range, <code>(index - 2)</code> is not
4756      * negative, and the <code>char</code> value at <code>(index -
4757      * 2)</code> in the <code>CharSequence</code> is in the
4758      * high-surrogate range, then the supplementary code point
4759      * corresponding to this surrogate pair is returned. Otherwise,
4760      * the <code>char</code> value at <code>(index - 1)</code> is
4761      * returned.
4762      *
4763      * @param seq the <code>CharSequence</code> instance
4764      * @param index the index following the code point that should be returned
4765      * @return the Unicode code point value before the given index.
4766      * @exception NullPointerException if <code>seq</code> is null.
4767      * @exception IndexOutOfBoundsException if the <code>index</code>
4768      * argument is less than 1 or greater than {@link
4769      * CharSequence#length() seq.length()}.
4770      * @since  1.5
4771      */
4772     public static int codePointBefore(CharSequence seq, int index) {
4773         char c2 = seq.charAt(--index);
4774         if (isLowSurrogate(c2)) {
4775             if (index > 0) {
4776                 char c1 = seq.charAt(--index);
4777                 if (isHighSurrogate(c1)) {
4778                     return toCodePoint(c1, c2);
4779                 }
4780             }
4781         }
4782         return c2;
4783     }
4784 
4785     /**
4786      * Returns the code point preceding the given index of the
4787      * <code>char</code> array. If the <code>char</code> value at
4788      * <code>(index - 1)</code> in the <code>char</code> array is in
4789      * the low-surrogate range, <code>(index - 2)</code> is not
4790      * negative, and the <code>char</code> value at <code>(index -
4791      * 2)</code> in the <code>char</code> array is in the
4792      * high-surrogate range, then the supplementary code point
4793      * corresponding to this surrogate pair is returned. Otherwise,
4794      * the <code>char</code> value at <code>(index - 1)</code> is
4795      * returned.
4796      *
4797      * @param a the <code>char</code> array
4798      * @param index the index following the code point that should be returned
4799      * @return the Unicode code point value before the given index.
4800      * @exception NullPointerException if <code>a</code> is null.
4801      * @exception IndexOutOfBoundsException if the <code>index</code>
4802      * argument is less than 1 or greater than the length of the
4803      * <code>char</code> array
4804      * @since  1.5
4805      */
4806     public static int codePointBefore(char[] a, int index) {
4807         return codePointBeforeImpl(a, index, 0);
4808     }
4809 
4810     /**
4811      * Returns the code point preceding the given index of the
4812      * <code>char</code> array, where only array elements with
4813      * <code>index</code> greater than or equal to <code>start</code>
4814      * can be used. If the <code>char</code> value at <code>(index -
4815      * 1)</code> in the <code>char</code> array is in the
4816      * low-surrogate range, <code>(index - 2)</code> is not less than
4817      * <code>start</code>, and the <code>char</code> value at
4818      * <code>(index - 2)</code> in the <code>char</code> array is in
4819      * the high-surrogate range, then the supplementary code point
4820      * corresponding to this surrogate pair is returned. Otherwise,
4821      * the <code>char</code> value at <code>(index - 1)</code> is
4822      * returned.
4823      *
4824      * @param a the <code>char</code> array
4825      * @param index the index following the code point that should be returned
4826      * @param start the index of the first array element in the
4827      * <code>char</code> array
4828      * @return the Unicode code point value before the given index.
4829      * @exception NullPointerException if <code>a</code> is null.
4830      * @exception IndexOutOfBoundsException if the <code>index</code>
4831      * argument is not greater than the <code>start</code> argument or
4832      * is greater than the length of the <code>char</code> array, or
4833      * if the <code>start</code> argument is negative or not less than
4834      * the length of the <code>char</code> array.
4835      * @since  1.5
4836      */
4837     public static int codePointBefore(char[] a, int index, int start) {
4838         if (index <= start || start < 0 || start >= a.length) {
4839             throw new IndexOutOfBoundsException();
4840         }
4841         return codePointBeforeImpl(a, index, start);
4842     }
4843 
4844     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4845     static int codePointBeforeImpl(char[] a, int index, int start) {
4846         char c2 = a[--index];
4847         if (isLowSurrogate(c2)) {
4848             if (index > start) {
4849                 char c1 = a[--index];
4850                 if (isHighSurrogate(c1)) {
4851                     return toCodePoint(c1, c2);
4852                 }
4853             }
4854         }
4855         return c2;
4856     }
4857 
4858     /**
4859      * Returns the leading surrogate (a
4860      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4861      * high surrogate code unit</a>) of the
4862      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4863      * surrogate pair</a>
4864      * representing the specified supplementary character (Unicode
4865      * code point) in the UTF-16 encoding.  If the specified character
4866      * is not a
4867      * <a href="Character.html#supplementary">supplementary character</a>,
4868      * an unspecified {@code char} is returned.
4869      *
4870      * <p>If
4871      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4872      * is {@code true}, then
4873      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4874      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4875      * are also always {@code true}.
4876      *
4877      * @param   codePoint a supplementary character (Unicode code point)
4878      * @return  the leading surrogate code unit used to represent the
4879      *          character in the UTF-16 encoding
4880      * @since   1.7
4881      */
4882     public static char highSurrogate(int codePoint) {
4883         return (char) ((codePoint >>> 10)
4884             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4885     }
4886 
4887     /**
4888      * Returns the trailing surrogate (a
4889      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4890      * low surrogate code unit</a>) of the
4891      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4892      * surrogate pair</a>
4893      * representing the specified supplementary character (Unicode
4894      * code point) in the UTF-16 encoding.  If the specified character
4895      * is not a
4896      * <a href="Character.html#supplementary">supplementary character</a>,
4897      * an unspecified {@code char} is returned.
4898      *
4899      * <p>If
4900      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4901      * is {@code true}, then
4902      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4903      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4904      * are also always {@code true}.
4905      *
4906      * @param   codePoint a supplementary character (Unicode code point)
4907      * @return  the trailing surrogate code unit used to represent the
4908      *          character in the UTF-16 encoding
4909      * @since   1.7
4910      */
4911     public static char lowSurrogate(int codePoint) {
4912         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4913     }
4914 
4915     /**
4916      * Converts the specified character (Unicode code point) to its
4917      * UTF-16 representation. If the specified code point is a BMP
4918      * (Basic Multilingual Plane or Plane 0) value, the same value is
4919      * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the
4920      * specified code point is a supplementary character, its
4921      * surrogate values are stored in <code>dst[dstIndex]</code>
4922      * (high-surrogate) and <code>dst[dstIndex+1]</code>
4923      * (low-surrogate), and 2 is returned.
4924      *
4925      * @param  codePoint the character (Unicode code point) to be converted.
4926      * @param  dst an array of <code>char</code> in which the
4927      * <code>codePoint</code>'s UTF-16 value is stored.
4928      * @param dstIndex the start index into the <code>dst</code>
4929      * array where the converted value is stored.
4930      * @return 1 if the code point is a BMP code point, 2 if the
4931      * code point is a supplementary code point.
4932      * @exception IllegalArgumentException if the specified
4933      * <code>codePoint</code> is not a valid Unicode code point.
4934      * @exception NullPointerException if the specified <code>dst</code> is null.
4935      * @exception IndexOutOfBoundsException if <code>dstIndex</code>
4936      * is negative or not less than <code>dst.length</code>, or if
4937      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
4938      * array element(s) to store the resulting <code>char</code>
4939      * value(s). (If <code>dstIndex</code> is equal to
4940      * <code>dst.length-1</code> and the specified
4941      * <code>codePoint</code> is a supplementary character, the
4942      * high-surrogate value is not stored in
4943      * <code>dst[dstIndex]</code>.)
4944      * @since  1.5
4945      */
4946     public static int toChars(int codePoint, char[] dst, int dstIndex) {
4947         if (isBmpCodePoint(codePoint)) {
4948             dst[dstIndex] = (char) codePoint;
4949             return 1;
4950         } else if (isValidCodePoint(codePoint)) {
4951             toSurrogates(codePoint, dst, dstIndex);
4952             return 2;
4953         } else {
4954             throw new IllegalArgumentException();
4955         }
4956     }
4957 
4958     /**
4959      * Converts the specified character (Unicode code point) to its
4960      * UTF-16 representation stored in a <code>char</code> array. If
4961      * the specified code point is a BMP (Basic Multilingual Plane or
4962      * Plane 0) value, the resulting <code>char</code> array has
4963      * the same value as <code>codePoint</code>. If the specified code
4964      * point is a supplementary code point, the resulting
4965      * <code>char</code> array has the corresponding surrogate pair.
4966      *
4967      * @param  codePoint a Unicode code point
4968      * @return a <code>char</code> array having
4969      *         <code>codePoint</code>'s UTF-16 representation.
4970      * @exception IllegalArgumentException if the specified
4971      * <code>codePoint</code> is not a valid Unicode code point.
4972      * @since  1.5
4973      */
4974     public static char[] toChars(int codePoint) {
4975         if (isBmpCodePoint(codePoint)) {
4976             return new char[] { (char) codePoint };
4977         } else if (isValidCodePoint(codePoint)) {
4978             char[] result = new char[2];
4979             toSurrogates(codePoint, result, 0);
4980             return result;
4981         } else {
4982             throw new IllegalArgumentException();
4983         }
4984     }
4985 
4986     static void toSurrogates(int codePoint, char[] dst, int index) {
4987         // We write elements "backwards" to guarantee all-or-nothing
4988         dst[index+1] = lowSurrogate(codePoint);
4989         dst[index] = highSurrogate(codePoint);
4990     }
4991 
4992     /**
4993      * Returns the number of Unicode code points in the text range of
4994      * the specified char sequence. The text range begins at the
4995      * specified <code>beginIndex</code> and extends to the
4996      * <code>char</code> at index <code>endIndex - 1</code>. Thus the
4997      * length (in <code>char</code>s) of the text range is
4998      * <code>endIndex-beginIndex</code>. Unpaired surrogates within
4999      * the text range count as one code point each.
5000      *
5001      * @param seq the char sequence
5002      * @param beginIndex the index to the first <code>char</code> of
5003      * the text range.
5004      * @param endIndex the index after the last <code>char</code> of
5005      * the text range.
5006      * @return the number of Unicode code points in the specified text
5007      * range
5008      * @exception NullPointerException if <code>seq</code> is null.
5009      * @exception IndexOutOfBoundsException if the
5010      * <code>beginIndex</code> is negative, or <code>endIndex</code>
5011      * is larger than the length of the given sequence, or
5012      * <code>beginIndex</code> is larger than <code>endIndex</code>.
5013      * @since  1.5
5014      */
5015     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5016         int length = seq.length();
5017         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5018             throw new IndexOutOfBoundsException();
5019         }
5020         int n = endIndex - beginIndex;
5021         for (int i = beginIndex; i < endIndex; ) {
5022             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5023                 isLowSurrogate(seq.charAt(i))) {
5024                 n--;
5025                 i++;
5026             }
5027         }
5028         return n;
5029     }
5030 
5031     /**
5032      * Returns the number of Unicode code points in a subarray of the
5033      * <code>char</code> array argument. The <code>offset</code>
5034      * argument is the index of the first <code>char</code> of the
5035      * subarray and the <code>count</code> argument specifies the
5036      * length of the subarray in <code>char</code>s. Unpaired
5037      * surrogates within the subarray count as one code point each.
5038      *
5039      * @param a the <code>char</code> array
5040      * @param offset the index of the first <code>char</code> in the
5041      * given <code>char</code> array
5042      * @param count the length of the subarray in <code>char</code>s
5043      * @return the number of Unicode code points in the specified subarray
5044      * @exception NullPointerException if <code>a</code> is null.
5045      * @exception IndexOutOfBoundsException if <code>offset</code> or
5046      * <code>count</code> is negative, or if <code>offset +
5047      * count</code> is larger than the length of the given array.
5048      * @since  1.5
5049      */
5050     public static int codePointCount(char[] a, int offset, int count) {
5051         if (count > a.length - offset || offset < 0 || count < 0) {
5052             throw new IndexOutOfBoundsException();
5053         }
5054         return codePointCountImpl(a, offset, count);
5055     }
5056 
5057     static int codePointCountImpl(char[] a, int offset, int count) {
5058         int endIndex = offset + count;
5059         int n = count;
5060         for (int i = offset; i < endIndex; ) {
5061             if (isHighSurrogate(a[i++]) && i < endIndex &&
5062                 isLowSurrogate(a[i])) {
5063                 n--;
5064                 i++;
5065             }
5066         }
5067         return n;
5068     }
5069 
5070     /**
5071      * Returns the index within the given char sequence that is offset
5072      * from the given <code>index</code> by <code>codePointOffset</code>
5073      * code points. Unpaired surrogates within the text range given by
5074      * <code>index</code> and <code>codePointOffset</code> count as
5075      * one code point each.
5076      *
5077      * @param seq the char sequence
5078      * @param index the index to be offset
5079      * @param codePointOffset the offset in code points
5080      * @return the index within the char sequence
5081      * @exception NullPointerException if <code>seq</code> is null.
5082      * @exception IndexOutOfBoundsException if <code>index</code>
5083      *   is negative or larger then the length of the char sequence,
5084      *   or if <code>codePointOffset</code> is positive and the
5085      *   subsequence starting with <code>index</code> has fewer than
5086      *   <code>codePointOffset</code> code points, or if
5087      *   <code>codePointOffset</code> is negative and the subsequence
5088      *   before <code>index</code> has fewer than the absolute value
5089      *   of <code>codePointOffset</code> code points.
5090      * @since 1.5
5091      */
5092     public static int offsetByCodePoints(CharSequence seq, int index,
5093                                          int codePointOffset) {
5094         int length = seq.length();
5095         if (index < 0 || index > length) {
5096             throw new IndexOutOfBoundsException();
5097         }
5098 
5099         int x = index;
5100         if (codePointOffset >= 0) {
5101             int i;
5102             for (i = 0; x < length && i < codePointOffset; i++) {
5103                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5104                     isLowSurrogate(seq.charAt(x))) {
5105                     x++;
5106                 }
5107             }
5108             if (i < codePointOffset) {
5109                 throw new IndexOutOfBoundsException();
5110             }
5111         } else {
5112             int i;
5113             for (i = codePointOffset; x > 0 && i < 0; i++) {
5114                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5115                     isHighSurrogate(seq.charAt(x-1))) {
5116                     x--;
5117                 }
5118             }
5119             if (i < 0) {
5120                 throw new IndexOutOfBoundsException();
5121             }
5122         }
5123         return x;
5124     }
5125 
5126     /**
5127      * Returns the index within the given <code>char</code> subarray
5128      * that is offset from the given <code>index</code> by
5129      * <code>codePointOffset</code> code points. The
5130      * <code>start</code> and <code>count</code> arguments specify a
5131      * subarray of the <code>char</code> array. Unpaired surrogates
5132      * within the text range given by <code>index</code> and
5133      * <code>codePointOffset</code> count as one code point each.
5134      *
5135      * @param a the <code>char</code> array
5136      * @param start the index of the first <code>char</code> of the
5137      * subarray
5138      * @param count the length of the subarray in <code>char</code>s
5139      * @param index the index to be offset
5140      * @param codePointOffset the offset in code points
5141      * @return the index within the subarray
5142      * @exception NullPointerException if <code>a</code> is null.
5143      * @exception IndexOutOfBoundsException
5144      *   if <code>start</code> or <code>count</code> is negative,
5145      *   or if <code>start + count</code> is larger than the length of
5146      *   the given array,
5147      *   or if <code>index</code> is less than <code>start</code> or
5148      *   larger then <code>start + count</code>,
5149      *   or if <code>codePointOffset</code> is positive and the text range
5150      *   starting with <code>index</code> and ending with <code>start
5151      *   + count - 1</code> has fewer than <code>codePointOffset</code> code
5152      *   points,
5153      *   or if <code>codePointOffset</code> is negative and the text range
5154      *   starting with <code>start</code> and ending with <code>index
5155      *   - 1</code> has fewer than the absolute value of
5156      *   <code>codePointOffset</code> code points.
5157      * @since 1.5
5158      */
5159     public static int offsetByCodePoints(char[] a, int start, int count,
5160                                          int index, int codePointOffset) {
5161         if (count > a.length-start || start < 0 || count < 0
5162             || index < start || index > start+count) {
5163             throw new IndexOutOfBoundsException();
5164         }
5165         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5166     }
5167 
5168     static int offsetByCodePointsImpl(char[]a, int start, int count,
5169                                       int index, int codePointOffset) {
5170         int x = index;
5171         if (codePointOffset >= 0) {
5172             int limit = start + count;
5173             int i;
5174             for (i = 0; x < limit && i < codePointOffset; i++) {
5175                 if (isHighSurrogate(a[x++]) && x < limit &&
5176                     isLowSurrogate(a[x])) {
5177                     x++;
5178                 }
5179             }
5180             if (i < codePointOffset) {
5181                 throw new IndexOutOfBoundsException();
5182             }
5183         } else {
5184             int i;
5185             for (i = codePointOffset; x > start && i < 0; i++) {
5186                 if (isLowSurrogate(a[--x]) && x > start &&
5187                     isHighSurrogate(a[x-1])) {
5188                     x--;
5189                 }
5190             }
5191             if (i < 0) {
5192                 throw new IndexOutOfBoundsException();
5193             }
5194         }
5195         return x;
5196     }
5197 
5198     /**
5199      * Determines if the specified character is a lowercase character.
5200      * <p>
5201      * A character is lowercase if its general category type, provided
5202      * by <code>Character.getType(ch)</code>, is
5203      * <code>LOWERCASE_LETTER</code>.
5204      * <p>
5205      * The following are examples of lowercase characters:
5206      * <p><blockquote><pre>
5207      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5208      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5209      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5210      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5211      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5212      * </pre></blockquote>
5213      * <p> Many other Unicode characters are lowercase too.
5214      *
5215      * <p><b>Note:</b> This method cannot handle <a
5216      * href="#supplementary"> supplementary characters</a>. To support
5217      * all Unicode characters, including supplementary characters, use
5218      * the {@link #isLowerCase(int)} method.
5219      *
5220      * @param   ch   the character to be tested.
5221      * @return  <code>true</code> if the character is lowercase;
5222      *          <code>false</code> otherwise.
5223      * @see     Character#isLowerCase(char)
5224      * @see     Character#isTitleCase(char)
5225      * @see     Character#toLowerCase(char)
5226      * @see     Character#getType(char)
5227      */
5228     public static boolean isLowerCase(char ch) {
5229         return isLowerCase((int)ch);
5230     }
5231 
5232     /**
5233      * Determines if the specified character (Unicode code point) is a
5234      * lowercase character.
5235      * <p>
5236      * A character is lowercase if its general category type, provided
5237      * by {@link Character#getType getType(codePoint)}, is
5238      * <code>LOWERCASE_LETTER</code>.
5239      * <p>
5240      * The following are examples of lowercase characters:
5241      * <p><blockquote><pre>
5242      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5243      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5244      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5245      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5246      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5247      * </pre></blockquote>
5248      * <p> Many other Unicode characters are lowercase too.
5249      *
5250      * @param   codePoint the character (Unicode code point) to be tested.
5251      * @return  <code>true</code> if the character is lowercase;
5252      *          <code>false</code> otherwise.
5253      * @see     Character#isLowerCase(int)
5254      * @see     Character#isTitleCase(int)
5255      * @see     Character#toLowerCase(int)
5256      * @see     Character#getType(int)
5257      * @since   1.5
5258      */
5259     public static boolean isLowerCase(int codePoint) {
5260         return getType(codePoint) == Character.LOWERCASE_LETTER;
5261     }
5262 
5263     /**
5264      * Determines if the specified character is an uppercase character.
5265      * <p>
5266      * A character is uppercase if its general category type, provided by
5267      * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>.
5268      * <p>
5269      * The following are examples of uppercase characters:
5270      * <p><blockquote><pre>
5271      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5272      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5273      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5274      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5275      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5276      * </pre></blockquote>
5277      * <p> Many other Unicode characters are uppercase too.<p>
5278      *
5279      * <p><b>Note:</b> This method cannot handle <a
5280      * href="#supplementary"> supplementary characters</a>. To support
5281      * all Unicode characters, including supplementary characters, use
5282      * the {@link #isUpperCase(int)} method.
5283      *
5284      * @param   ch   the character to be tested.
5285      * @return  <code>true</code> if the character is uppercase;
5286      *          <code>false</code> otherwise.
5287      * @see     Character#isLowerCase(char)
5288      * @see     Character#isTitleCase(char)
5289      * @see     Character#toUpperCase(char)
5290      * @see     Character#getType(char)
5291      * @since   1.0
5292      */
5293     public static boolean isUpperCase(char ch) {
5294         return isUpperCase((int)ch);
5295     }
5296 
5297     /**
5298      * Determines if the specified character (Unicode code point) is an uppercase character.
5299      * <p>
5300      * A character is uppercase if its general category type, provided by
5301      * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>.
5302      * <p>
5303      * The following are examples of uppercase characters:
5304      * <p><blockquote><pre>
5305      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5306      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5307      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5308      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5309      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5310      * </pre></blockquote>
5311      * <p> Many other Unicode characters are uppercase too.<p>
5312      *
5313      * @param   codePoint the character (Unicode code point) to be tested.
5314      * @return  <code>true</code> if the character is uppercase;
5315      *          <code>false</code> otherwise.
5316      * @see     Character#isLowerCase(int)
5317      * @see     Character#isTitleCase(int)
5318      * @see     Character#toUpperCase(int)
5319      * @see     Character#getType(int)
5320      * @since   1.5
5321      */
5322     public static boolean isUpperCase(int codePoint) {
5323         return getType(codePoint) == Character.UPPERCASE_LETTER;
5324     }
5325 
5326     /**
5327      * Determines if the specified character is a titlecase character.
5328      * <p>
5329      * A character is a titlecase character if its general
5330      * category type, provided by <code>Character.getType(ch)</code>,
5331      * is <code>TITLECASE_LETTER</code>.
5332      * <p>
5333      * Some characters look like pairs of Latin letters. For example, there
5334      * is an uppercase letter that looks like "LJ" and has a corresponding
5335      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5336      * is the appropriate form to use when rendering a word in lowercase
5337      * with initial capitals, as for a book title.
5338      * <p>
5339      * These are some of the Unicode characters for which this method returns
5340      * <code>true</code>:
5341      * <ul>
5342      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
5343      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
5344      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
5345      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
5346      * </ul>
5347      * <p> Many other Unicode characters are titlecase too.<p>
5348      *
5349      * <p><b>Note:</b> This method cannot handle <a
5350      * href="#supplementary"> supplementary characters</a>. To support
5351      * all Unicode characters, including supplementary characters, use
5352      * the {@link #isTitleCase(int)} method.
5353      *
5354      * @param   ch   the character to be tested.
5355      * @return  <code>true</code> if the character is titlecase;
5356      *          <code>false</code> otherwise.
5357      * @see     Character#isLowerCase(char)
5358      * @see     Character#isUpperCase(char)
5359      * @see     Character#toTitleCase(char)
5360      * @see     Character#getType(char)
5361      * @since   1.0.2
5362      */
5363     public static boolean isTitleCase(char ch) {
5364         return isTitleCase((int)ch);
5365     }
5366 
5367     /**
5368      * Determines if the specified character (Unicode code point) is a titlecase character.
5369      * <p>
5370      * A character is a titlecase character if its general
5371      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5372      * is <code>TITLECASE_LETTER</code>.
5373      * <p>
5374      * Some characters look like pairs of Latin letters. For example, there
5375      * is an uppercase letter that looks like "LJ" and has a corresponding
5376      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5377      * is the appropriate form to use when rendering a word in lowercase
5378      * with initial capitals, as for a book title.
5379      * <p>
5380      * These are some of the Unicode characters for which this method returns
5381      * <code>true</code>:
5382      * <ul>
5383      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
5384      * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
5385      * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
5386      * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
5387      * </ul>
5388      * <p> Many other Unicode characters are titlecase too.<p>
5389      *
5390      * @param   codePoint the character (Unicode code point) to be tested.
5391      * @return  <code>true</code> if the character is titlecase;
5392      *          <code>false</code> otherwise.
5393      * @see     Character#isLowerCase(int)
5394      * @see     Character#isUpperCase(int)
5395      * @see     Character#toTitleCase(int)
5396      * @see     Character#getType(int)
5397      * @since   1.5
5398      */
5399     public static boolean isTitleCase(int codePoint) {
5400         return getType(codePoint) == Character.TITLECASE_LETTER;
5401     }
5402 
5403     /**
5404      * Determines if the specified character is a digit.
5405      * <p>
5406      * A character is a digit if its general category type, provided
5407      * by <code>Character.getType(ch)</code>, is
5408      * <code>DECIMAL_DIGIT_NUMBER</code>.
5409      * <p>
5410      * Some Unicode character ranges that contain digits:
5411      * <ul>
5412      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
5413      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
5414      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
5415      *     Arabic-Indic digits
5416      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
5417      *     Extended Arabic-Indic digits
5418      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
5419      *     Devanagari digits
5420      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
5421      *     Fullwidth digits
5422      * </ul>
5423      *
5424      * Many other character ranges contain digits as well.
5425      *
5426      * <p><b>Note:</b> This method cannot handle <a
5427      * href="#supplementary"> supplementary characters</a>. To support
5428      * all Unicode characters, including supplementary characters, use
5429      * the {@link #isDigit(int)} method.
5430      *
5431      * @param   ch   the character to be tested.
5432      * @return  <code>true</code> if the character is a digit;
5433      *          <code>false</code> otherwise.
5434      * @see     Character#digit(char, int)
5435      * @see     Character#forDigit(int, int)
5436      * @see     Character#getType(char)
5437      */
5438     public static boolean isDigit(char ch) {
5439         return isDigit((int)ch);
5440     }
5441 
5442     /**
5443      * Determines if the specified character (Unicode code point) is a digit.
5444      * <p>
5445      * A character is a digit if its general category type, provided
5446      * by {@link Character#getType(int) getType(codePoint)}, is
5447      * <code>DECIMAL_DIGIT_NUMBER</code>.
5448      * <p>
5449      * Some Unicode character ranges that contain digits:
5450      * <ul>
5451      * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
5452      *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
5453      * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
5454      *     Arabic-Indic digits
5455      * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
5456      *     Extended Arabic-Indic digits
5457      * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
5458      *     Devanagari digits
5459      * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
5460      *     Fullwidth digits
5461      * </ul>
5462      *
5463      * Many other character ranges contain digits as well.
5464      *
5465      * @param   codePoint the character (Unicode code point) to be tested.
5466      * @return  <code>true</code> if the character is a digit;
5467      *          <code>false</code> otherwise.
5468      * @see     Character#forDigit(int, int)
5469      * @see     Character#getType(int)
5470      * @since   1.5
5471      */
5472     public static boolean isDigit(int codePoint) {
5473         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5474     }
5475 
5476     /**
5477      * Determines if a character is defined in Unicode.
5478      * <p>
5479      * A character is defined if at least one of the following is true:
5480      * <ul>
5481      * <li>It has an entry in the UnicodeData file.
5482      * <li>It has a value in a range defined by the UnicodeData file.
5483      * </ul>
5484      *
5485      * <p><b>Note:</b> This method cannot handle <a
5486      * href="#supplementary"> supplementary characters</a>. To support
5487      * all Unicode characters, including supplementary characters, use
5488      * the {@link #isDefined(int)} method.
5489      *
5490      * @param   ch   the character to be tested
5491      * @return  <code>true</code> if the character has a defined meaning
5492      *          in Unicode; <code>false</code> otherwise.
5493      * @see     Character#isDigit(char)
5494      * @see     Character#isLetter(char)
5495      * @see     Character#isLetterOrDigit(char)
5496      * @see     Character#isLowerCase(char)
5497      * @see     Character#isTitleCase(char)
5498      * @see     Character#isUpperCase(char)
5499      * @since   1.0.2
5500      */
5501     public static boolean isDefined(char ch) {
5502         return isDefined((int)ch);
5503     }
5504 
5505     /**
5506      * Determines if a character (Unicode code point) is defined in Unicode.
5507      * <p>
5508      * A character is defined if at least one of the following is true:
5509      * <ul>
5510      * <li>It has an entry in the UnicodeData file.
5511      * <li>It has a value in a range defined by the UnicodeData file.
5512      * </ul>
5513      *
5514      * @param   codePoint the character (Unicode code point) to be tested.
5515      * @return  <code>true</code> if the character has a defined meaning
5516      *          in Unicode; <code>false</code> otherwise.
5517      * @see     Character#isDigit(int)
5518      * @see     Character#isLetter(int)
5519      * @see     Character#isLetterOrDigit(int)
5520      * @see     Character#isLowerCase(int)
5521      * @see     Character#isTitleCase(int)
5522      * @see     Character#isUpperCase(int)
5523      * @since   1.5
5524      */
5525     public static boolean isDefined(int codePoint) {
5526         return getType(codePoint) != Character.UNASSIGNED;
5527     }
5528 
5529     /**
5530      * Determines if the specified character is a letter.
5531      * <p>
5532      * A character is considered to be a letter if its general
5533      * category type, provided by <code>Character.getType(ch)</code>,
5534      * is any of the following:
5535      * <ul>
5536      * <li> <code>UPPERCASE_LETTER</code>
5537      * <li> <code>LOWERCASE_LETTER</code>
5538      * <li> <code>TITLECASE_LETTER</code>
5539      * <li> <code>MODIFIER_LETTER</code>
5540      * <li> <code>OTHER_LETTER</code>
5541      * </ul>
5542      *
5543      * Not all letters have case. Many characters are
5544      * letters but are neither uppercase nor lowercase nor titlecase.
5545      *
5546      * <p><b>Note:</b> This method cannot handle <a
5547      * href="#supplementary"> supplementary characters</a>. To support
5548      * all Unicode characters, including supplementary characters, use
5549      * the {@link #isLetter(int)} method.
5550      *
5551      * @param   ch   the character to be tested.
5552      * @return  <code>true</code> if the character is a letter;
5553      *          <code>false</code> otherwise.
5554      * @see     Character#isDigit(char)
5555      * @see     Character#isJavaIdentifierStart(char)
5556      * @see     Character#isJavaLetter(char)
5557      * @see     Character#isJavaLetterOrDigit(char)
5558      * @see     Character#isLetterOrDigit(char)
5559      * @see     Character#isLowerCase(char)
5560      * @see     Character#isTitleCase(char)
5561      * @see     Character#isUnicodeIdentifierStart(char)
5562      * @see     Character#isUpperCase(char)
5563      */
5564     public static boolean isLetter(char ch) {
5565         return isLetter((int)ch);
5566     }
5567 
5568     /**
5569      * Determines if the specified character (Unicode code point) is a letter.
5570      * <p>
5571      * A character is considered to be a letter if its general
5572      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5573      * is any of the following:
5574      * <ul>
5575      * <li> <code>UPPERCASE_LETTER</code>
5576      * <li> <code>LOWERCASE_LETTER</code>
5577      * <li> <code>TITLECASE_LETTER</code>
5578      * <li> <code>MODIFIER_LETTER</code>
5579      * <li> <code>OTHER_LETTER</code>
5580      * </ul>
5581      *
5582      * Not all letters have case. Many characters are
5583      * letters but are neither uppercase nor lowercase nor titlecase.
5584      *
5585      * @param   codePoint the character (Unicode code point) to be tested.
5586      * @return  <code>true</code> if the character is a letter;
5587      *          <code>false</code> otherwise.
5588      * @see     Character#isDigit(int)
5589      * @see     Character#isJavaIdentifierStart(int)
5590      * @see     Character#isLetterOrDigit(int)
5591      * @see     Character#isLowerCase(int)
5592      * @see     Character#isTitleCase(int)
5593      * @see     Character#isUnicodeIdentifierStart(int)
5594      * @see     Character#isUpperCase(int)
5595      * @since   1.5
5596      */
5597     public static boolean isLetter(int codePoint) {
5598         return ((((1 << Character.UPPERCASE_LETTER) |
5599             (1 << Character.LOWERCASE_LETTER) |
5600             (1 << Character.TITLECASE_LETTER) |
5601             (1 << Character.MODIFIER_LETTER) |
5602             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5603             != 0;
5604     }
5605 
5606     /**
5607      * Determines if the specified character is a letter or digit.
5608      * <p>
5609      * A character is considered to be a letter or digit if either
5610      * <code>Character.isLetter(char ch)</code> or
5611      * <code>Character.isDigit(char ch)</code> returns
5612      * <code>true</code> for the character.
5613      *
5614      * <p><b>Note:</b> This method cannot handle <a
5615      * href="#supplementary"> supplementary characters</a>. To support
5616      * all Unicode characters, including supplementary characters, use
5617      * the {@link #isLetterOrDigit(int)} method.
5618      *
5619      * @param   ch   the character to be tested.
5620      * @return  <code>true</code> if the character is a letter or digit;
5621      *          <code>false</code> otherwise.
5622      * @see     Character#isDigit(char)
5623      * @see     Character#isJavaIdentifierPart(char)
5624      * @see     Character#isJavaLetter(char)
5625      * @see     Character#isJavaLetterOrDigit(char)
5626      * @see     Character#isLetter(char)
5627      * @see     Character#isUnicodeIdentifierPart(char)
5628      * @since   1.0.2
5629      */
5630     public static boolean isLetterOrDigit(char ch) {
5631         return isLetterOrDigit((int)ch);
5632     }
5633 
5634     /**
5635      * Determines if the specified character (Unicode code point) is a letter or digit.
5636      * <p>
5637      * A character is considered to be a letter or digit if either
5638      * {@link #isLetter(int) isLetter(codePoint)} or
5639      * {@link #isDigit(int) isDigit(codePoint)} returns
5640      * <code>true</code> for the character.
5641      *
5642      * @param   codePoint the character (Unicode code point) to be tested.
5643      * @return  <code>true</code> if the character is a letter or digit;
5644      *          <code>false</code> otherwise.
5645      * @see     Character#isDigit(int)
5646      * @see     Character#isJavaIdentifierPart(int)
5647      * @see     Character#isLetter(int)
5648      * @see     Character#isUnicodeIdentifierPart(int)
5649      * @since   1.5
5650      */
5651     public static boolean isLetterOrDigit(int codePoint) {
5652         return ((((1 << Character.UPPERCASE_LETTER) |
5653             (1 << Character.LOWERCASE_LETTER) |
5654             (1 << Character.TITLECASE_LETTER) |
5655             (1 << Character.MODIFIER_LETTER) |
5656             (1 << Character.OTHER_LETTER) |
5657             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5658             != 0;
5659     }
5660 
5661     /**
5662      * Determines if the specified character is permissible as the first
5663      * character in a Java identifier.
5664      * <p>
5665      * A character may start a Java identifier if and only if
5666      * one of the following is true:
5667      * <ul>
5668      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5669      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
5670      * <li> ch is a currency symbol (such as "$")
5671      * <li> ch is a connecting punctuation character (such as "_").
5672      * </ul>
5673      *
5674      * @param   ch the character to be tested.
5675      * @return  <code>true</code> if the character may start a Java
5676      *          identifier; <code>false</code> otherwise.
5677      * @see     Character#isJavaLetterOrDigit(char)
5678      * @see     Character#isJavaIdentifierStart(char)
5679      * @see     Character#isJavaIdentifierPart(char)
5680      * @see     Character#isLetter(char)
5681      * @see     Character#isLetterOrDigit(char)
5682      * @see     Character#isUnicodeIdentifierStart(char)
5683      * @since   1.02
5684      * @deprecated Replaced by isJavaIdentifierStart(char).
5685      */
5686     @Deprecated
5687     public static boolean isJavaLetter(char ch) {
5688         return isJavaIdentifierStart(ch);
5689     }
5690 
5691     /**
5692      * Determines if the specified character may be part of a Java
5693      * identifier as other than the first character.
5694      * <p>
5695      * A character may be part of a Java identifier if and only if any
5696      * of the following are true:
5697      * <ul>
5698      * <li>  it is a letter
5699      * <li>  it is a currency symbol (such as <code>'$'</code>)
5700      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5701      * <li>  it is a digit
5702      * <li>  it is a numeric letter (such as a Roman numeral character)
5703      * <li>  it is a combining mark
5704      * <li>  it is a non-spacing mark
5705      * <li> <code>isIdentifierIgnorable</code> returns
5706      * <code>true</code> for the character.
5707      * </ul>
5708      *
5709      * @param   ch the character to be tested.
5710      * @return  <code>true</code> if the character may be part of a
5711      *          Java identifier; <code>false</code> otherwise.
5712      * @see     Character#isJavaLetter(char)
5713      * @see     Character#isJavaIdentifierStart(char)
5714      * @see     Character#isJavaIdentifierPart(char)
5715      * @see     Character#isLetter(char)
5716      * @see     Character#isLetterOrDigit(char)
5717      * @see     Character#isUnicodeIdentifierPart(char)
5718      * @see     Character#isIdentifierIgnorable(char)
5719      * @since   1.02
5720      * @deprecated Replaced by isJavaIdentifierPart(char).
5721      */
5722     @Deprecated
5723     public static boolean isJavaLetterOrDigit(char ch) {
5724         return isJavaIdentifierPart(ch);
5725     }
5726 
5727     /**
5728      * Determines if the specified character is
5729      * permissible as the first character in a Java identifier.
5730      * <p>
5731      * A character may start a Java identifier if and only if
5732      * one of the following conditions is true:
5733      * <ul>
5734      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5735      * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code>
5736      * <li> ch is a currency symbol (such as "$")
5737      * <li> ch is a connecting punctuation character (such as "_").
5738      * </ul>
5739      *
5740      * <p><b>Note:</b> This method cannot handle <a
5741      * href="#supplementary"> supplementary characters</a>. To support
5742      * all Unicode characters, including supplementary characters, use
5743      * the {@link #isJavaIdentifierStart(int)} method.
5744      *
5745      * @param   ch the character to be tested.
5746      * @return  <code>true</code> if the character may start a Java identifier;
5747      *          <code>false</code> otherwise.
5748      * @see     Character#isJavaIdentifierPart(char)
5749      * @see     Character#isLetter(char)
5750      * @see     Character#isUnicodeIdentifierStart(char)
5751      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5752      * @since   1.1
5753      */
5754     public static boolean isJavaIdentifierStart(char ch) {
5755         return isJavaIdentifierStart((int)ch);
5756     }
5757 
5758     /**
5759      * Determines if the character (Unicode code point) is
5760      * permissible as the first character in a Java identifier.
5761      * <p>
5762      * A character may start a Java identifier if and only if
5763      * one of the following conditions is true:
5764      * <ul>
5765      * <li> {@link #isLetter(int) isLetter(codePoint)}
5766      *      returns <code>true</code>
5767      * <li> {@link #getType(int) getType(codePoint)}
5768      *      returns <code>LETTER_NUMBER</code>
5769      * <li> the referenced character is a currency symbol (such as "$")
5770      * <li> the referenced character is a connecting punctuation character
5771      *      (such as "_").
5772      * </ul>
5773      *
5774      * @param   codePoint the character (Unicode code point) to be tested.
5775      * @return  <code>true</code> if the character may start a Java identifier;
5776      *          <code>false</code> otherwise.
5777      * @see     Character#isJavaIdentifierPart(int)
5778      * @see     Character#isLetter(int)
5779      * @see     Character#isUnicodeIdentifierStart(int)
5780      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5781      * @since   1.5
5782      */
5783     public static boolean isJavaIdentifierStart(int codePoint) {
5784         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
5785     }
5786 
5787     /**
5788      * Determines if the specified character may be part of a Java
5789      * identifier as other than the first character.
5790      * <p>
5791      * A character may be part of a Java identifier if any of the following
5792      * are true:
5793      * <ul>
5794      * <li>  it is a letter
5795      * <li>  it is a currency symbol (such as <code>'$'</code>)
5796      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5797      * <li>  it is a digit
5798      * <li>  it is a numeric letter (such as a Roman numeral character)
5799      * <li>  it is a combining mark
5800      * <li>  it is a non-spacing mark
5801      * <li> <code>isIdentifierIgnorable</code> returns
5802      * <code>true</code> for the character
5803      * </ul>
5804      *
5805      * <p><b>Note:</b> This method cannot handle <a
5806      * href="#supplementary"> supplementary characters</a>. To support
5807      * all Unicode characters, including supplementary characters, use
5808      * the {@link #isJavaIdentifierPart(int)} method.
5809      *
5810      * @param   ch      the character to be tested.
5811      * @return <code>true</code> if the character may be part of a
5812      *          Java identifier; <code>false</code> otherwise.
5813      * @see     Character#isIdentifierIgnorable(char)
5814      * @see     Character#isJavaIdentifierStart(char)
5815      * @see     Character#isLetterOrDigit(char)
5816      * @see     Character#isUnicodeIdentifierPart(char)
5817      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5818      * @since   1.1
5819      */
5820     public static boolean isJavaIdentifierPart(char ch) {
5821         return isJavaIdentifierPart((int)ch);
5822     }
5823 
5824     /**
5825      * Determines if the character (Unicode code point) may be part of a Java
5826      * identifier as other than the first character.
5827      * <p>
5828      * A character may be part of a Java identifier if any of the following
5829      * are true:
5830      * <ul>
5831      * <li>  it is a letter
5832      * <li>  it is a currency symbol (such as <code>'$'</code>)
5833      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5834      * <li>  it is a digit
5835      * <li>  it is a numeric letter (such as a Roman numeral character)
5836      * <li>  it is a combining mark
5837      * <li>  it is a non-spacing mark
5838      * <li> {@link #isIdentifierIgnorable(int)
5839      * isIdentifierIgnorable(codePoint)} returns <code>true</code> for
5840      * the character
5841      * </ul>
5842      *
5843      * @param   codePoint the character (Unicode code point) to be tested.
5844      * @return <code>true</code> if the character may be part of a
5845      *          Java identifier; <code>false</code> otherwise.
5846      * @see     Character#isIdentifierIgnorable(int)
5847      * @see     Character#isJavaIdentifierStart(int)
5848      * @see     Character#isLetterOrDigit(int)
5849      * @see     Character#isUnicodeIdentifierPart(int)
5850      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5851      * @since   1.5
5852      */
5853     public static boolean isJavaIdentifierPart(int codePoint) {
5854         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
5855     }
5856 
5857     /**
5858      * Determines if the specified character is permissible as the
5859      * first character in a Unicode identifier.
5860      * <p>
5861      * A character may start a Unicode identifier if and only if
5862      * one of the following conditions is true:
5863      * <ul>
5864      * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code>
5865      * <li> {@link #getType(char) getType(ch)} returns
5866      *      <code>LETTER_NUMBER</code>.
5867      * </ul>
5868      *
5869      * <p><b>Note:</b> This method cannot handle <a
5870      * href="#supplementary"> supplementary characters</a>. To support
5871      * all Unicode characters, including supplementary characters, use
5872      * the {@link #isUnicodeIdentifierStart(int)} method.
5873      *
5874      * @param   ch      the character to be tested.
5875      * @return  <code>true</code> if the character may start a Unicode
5876      *          identifier; <code>false</code> otherwise.
5877      * @see     Character#isJavaIdentifierStart(char)
5878      * @see     Character#isLetter(char)
5879      * @see     Character#isUnicodeIdentifierPart(char)
5880      * @since   1.1
5881      */
5882     public static boolean isUnicodeIdentifierStart(char ch) {
5883         return isUnicodeIdentifierStart((int)ch);
5884     }
5885 
5886     /**
5887      * Determines if the specified character (Unicode code point) is permissible as the
5888      * first character in a Unicode identifier.
5889      * <p>
5890      * A character may start a Unicode identifier if and only if
5891      * one of the following conditions is true:
5892      * <ul>
5893      * <li> {@link #isLetter(int) isLetter(codePoint)}
5894      *      returns <code>true</code>
5895      * <li> {@link #getType(int) getType(codePoint)}
5896      *      returns <code>LETTER_NUMBER</code>.
5897      * </ul>
5898      * @param   codePoint the character (Unicode code point) to be tested.
5899      * @return  <code>true</code> if the character may start a Unicode
5900      *          identifier; <code>false</code> otherwise.
5901      * @see     Character#isJavaIdentifierStart(int)
5902      * @see     Character#isLetter(int)
5903      * @see     Character#isUnicodeIdentifierPart(int)
5904      * @since   1.5
5905      */
5906     public static boolean isUnicodeIdentifierStart(int codePoint) {
5907         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
5908     }
5909 
5910     /**
5911      * Determines if the specified character may be part of a Unicode
5912      * identifier as other than the first character.
5913      * <p>
5914      * A character may be part of a Unicode identifier if and only if
5915      * one of the following statements is true:
5916      * <ul>
5917      * <li>  it is a letter
5918      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5919      * <li>  it is a digit
5920      * <li>  it is a numeric letter (such as a Roman numeral character)
5921      * <li>  it is a combining mark
5922      * <li>  it is a non-spacing mark
5923      * <li> <code>isIdentifierIgnorable</code> returns
5924      * <code>true</code> for this character.
5925      * </ul>
5926      *
5927      * <p><b>Note:</b> This method cannot handle <a
5928      * href="#supplementary"> supplementary characters</a>. To support
5929      * all Unicode characters, including supplementary characters, use
5930      * the {@link #isUnicodeIdentifierPart(int)} method.
5931      *
5932      * @param   ch      the character to be tested.
5933      * @return  <code>true</code> if the character may be part of a
5934      *          Unicode identifier; <code>false</code> otherwise.
5935      * @see     Character#isIdentifierIgnorable(char)
5936      * @see     Character#isJavaIdentifierPart(char)
5937      * @see     Character#isLetterOrDigit(char)
5938      * @see     Character#isUnicodeIdentifierStart(char)
5939      * @since   1.1
5940      */
5941     public static boolean isUnicodeIdentifierPart(char ch) {
5942         return isUnicodeIdentifierPart((int)ch);
5943     }
5944 
5945     /**
5946      * Determines if the specified character (Unicode code point) may be part of a Unicode
5947      * identifier as other than the first character.
5948      * <p>
5949      * A character may be part of a Unicode identifier if and only if
5950      * one of the following statements is true:
5951      * <ul>
5952      * <li>  it is a letter
5953      * <li>  it is a connecting punctuation character (such as <code>'_'</code>)
5954      * <li>  it is a digit
5955      * <li>  it is a numeric letter (such as a Roman numeral character)
5956      * <li>  it is a combining mark
5957      * <li>  it is a non-spacing mark
5958      * <li> <code>isIdentifierIgnorable</code> returns
5959      * <code>true</code> for this character.
5960      * </ul>
5961      * @param   codePoint the character (Unicode code point) to be tested.
5962      * @return  <code>true</code> if the character may be part of a
5963      *          Unicode identifier; <code>false</code> otherwise.
5964      * @see     Character#isIdentifierIgnorable(int)
5965      * @see     Character#isJavaIdentifierPart(int)
5966      * @see     Character#isLetterOrDigit(int)
5967      * @see     Character#isUnicodeIdentifierStart(int)
5968      * @since   1.5
5969      */
5970     public static boolean isUnicodeIdentifierPart(int codePoint) {
5971         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
5972     }
5973 
5974     /**
5975      * Determines if the specified character should be regarded as
5976      * an ignorable character in a Java identifier or a Unicode identifier.
5977      * <p>
5978      * The following Unicode characters are ignorable in a Java identifier
5979      * or a Unicode identifier:
5980      * <ul>
5981      * <li>ISO control characters that are not whitespace
5982      * <ul>
5983      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
5984      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
5985      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
5986      * </ul>
5987      *
5988      * <li>all characters that have the <code>FORMAT</code> general
5989      * category value
5990      * </ul>
5991      *
5992      * <p><b>Note:</b> This method cannot handle <a
5993      * href="#supplementary"> supplementary characters</a>. To support
5994      * all Unicode characters, including supplementary characters, use
5995      * the {@link #isIdentifierIgnorable(int)} method.
5996      *
5997      * @param   ch      the character to be tested.
5998      * @return  <code>true</code> if the character is an ignorable control
5999      *          character that may be part of a Java or Unicode identifier;
6000      *           <code>false</code> otherwise.
6001      * @see     Character#isJavaIdentifierPart(char)
6002      * @see     Character#isUnicodeIdentifierPart(char)
6003      * @since   1.1
6004      */
6005     public static boolean isIdentifierIgnorable(char ch) {
6006         return isIdentifierIgnorable((int)ch);
6007     }
6008 
6009     /**
6010      * Determines if the specified character (Unicode code point) should be regarded as
6011      * an ignorable character in a Java identifier or a Unicode identifier.
6012      * <p>
6013      * The following Unicode characters are ignorable in a Java identifier
6014      * or a Unicode identifier:
6015      * <ul>
6016      * <li>ISO control characters that are not whitespace
6017      * <ul>
6018      * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
6019      * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
6020      * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
6021      * </ul>
6022      *
6023      * <li>all characters that have the <code>FORMAT</code> general
6024      * category value
6025      * </ul>
6026      *
6027      * @param   codePoint the character (Unicode code point) to be tested.
6028      * @return  <code>true</code> if the character is an ignorable control
6029      *          character that may be part of a Java or Unicode identifier;
6030      *          <code>false</code> otherwise.
6031      * @see     Character#isJavaIdentifierPart(int)
6032      * @see     Character#isUnicodeIdentifierPart(int)
6033      * @since   1.5
6034      */
6035     public static boolean isIdentifierIgnorable(int codePoint) {
6036         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6037     }
6038 
6039     /**
6040      * Converts the character argument to lowercase using case
6041      * mapping information from the UnicodeData file.
6042      * <p>
6043      * Note that
6044      * <code>Character.isLowerCase(Character.toLowerCase(ch))</code>
6045      * does not always return <code>true</code> for some ranges of
6046      * characters, particularly those that are symbols or ideographs.
6047      *
6048      * <p>In general, {@link String#toLowerCase()} should be used to map
6049      * characters to lowercase. <code>String</code> case mapping methods
6050      * have several benefits over <code>Character</code> case mapping methods.
6051      * <code>String</code> case mapping methods can perform locale-sensitive
6052      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6053      * the <code>Character</code> case mapping methods cannot.
6054      *
6055      * <p><b>Note:</b> This method cannot handle <a
6056      * href="#supplementary"> supplementary characters</a>. To support
6057      * all Unicode characters, including supplementary characters, use
6058      * the {@link #toLowerCase(int)} method.
6059      *
6060      * @param   ch   the character to be converted.
6061      * @return  the lowercase equivalent of the character, if any;
6062      *          otherwise, the character itself.
6063      * @see     Character#isLowerCase(char)
6064      * @see     String#toLowerCase()
6065      */
6066     public static char toLowerCase(char ch) {
6067         return (char)toLowerCase((int)ch);
6068     }
6069 
6070     /**
6071      * Converts the character (Unicode code point) argument to
6072      * lowercase using case mapping information from the UnicodeData
6073      * file.
6074      *
6075      * <p> Note that
6076      * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code>
6077      * does not always return <code>true</code> for some ranges of
6078      * characters, particularly those that are symbols or ideographs.
6079      *
6080      * <p>In general, {@link String#toLowerCase()} should be used to map
6081      * characters to lowercase. <code>String</code> case mapping methods
6082      * have several benefits over <code>Character</code> case mapping methods.
6083      * <code>String</code> case mapping methods can perform locale-sensitive
6084      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6085      * the <code>Character</code> case mapping methods cannot.
6086      *
6087      * @param   codePoint   the character (Unicode code point) to be converted.
6088      * @return  the lowercase equivalent of the character (Unicode code
6089      *          point), if any; otherwise, the character itself.
6090      * @see     Character#isLowerCase(int)
6091      * @see     String#toLowerCase()
6092      *
6093      * @since   1.5
6094      */
6095     public static int toLowerCase(int codePoint) {
6096         return CharacterData.of(codePoint).toLowerCase(codePoint);
6097     }
6098 
6099     /**
6100      * Converts the character argument to uppercase using case mapping
6101      * information from the UnicodeData file.
6102      * <p>
6103      * Note that
6104      * <code>Character.isUpperCase(Character.toUpperCase(ch))</code>
6105      * does not always return <code>true</code> for some ranges of
6106      * characters, particularly those that are symbols or ideographs.
6107      *
6108      * <p>In general, {@link String#toUpperCase()} should be used to map
6109      * characters to uppercase. <code>String</code> case mapping methods
6110      * have several benefits over <code>Character</code> case mapping methods.
6111      * <code>String</code> case mapping methods can perform locale-sensitive
6112      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6113      * the <code>Character</code> case mapping methods cannot.
6114      *
6115      * <p><b>Note:</b> This method cannot handle <a
6116      * href="#supplementary"> supplementary characters</a>. To support
6117      * all Unicode characters, including supplementary characters, use
6118      * the {@link #toUpperCase(int)} method.
6119      *
6120      * @param   ch   the character to be converted.
6121      * @return  the uppercase equivalent of the character, if any;
6122      *          otherwise, the character itself.
6123      * @see     Character#isUpperCase(char)
6124      * @see     String#toUpperCase()
6125      */
6126     public static char toUpperCase(char ch) {
6127         return (char)toUpperCase((int)ch);
6128     }
6129 
6130     /**
6131      * Converts the character (Unicode code point) argument to
6132      * uppercase using case mapping information from the UnicodeData
6133      * file.
6134      *
6135      * <p>Note that
6136      * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code>
6137      * does not always return <code>true</code> for some ranges of
6138      * characters, particularly those that are symbols or ideographs.
6139      *
6140      * <p>In general, {@link String#toUpperCase()} should be used to map
6141      * characters to uppercase. <code>String</code> case mapping methods
6142      * have several benefits over <code>Character</code> case mapping methods.
6143      * <code>String</code> case mapping methods can perform locale-sensitive
6144      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6145      * the <code>Character</code> case mapping methods cannot.
6146      *
6147      * @param   codePoint   the character (Unicode code point) to be converted.
6148      * @return  the uppercase equivalent of the character, if any;
6149      *          otherwise, the character itself.
6150      * @see     Character#isUpperCase(int)
6151      * @see     String#toUpperCase()
6152      *
6153      * @since   1.5
6154      */
6155     public static int toUpperCase(int codePoint) {
6156         return CharacterData.of(codePoint).toUpperCase(codePoint);
6157     }
6158 
6159     /**
6160      * Converts the character argument to titlecase using case mapping
6161      * information from the UnicodeData file. If a character has no
6162      * explicit titlecase mapping and is not itself a titlecase char
6163      * according to UnicodeData, then the uppercase mapping is
6164      * returned as an equivalent titlecase mapping. If the
6165      * <code>char</code> argument is already a titlecase
6166      * <code>char</code>, the same <code>char</code> value will be
6167      * returned.
6168      * <p>
6169      * Note that
6170      * <code>Character.isTitleCase(Character.toTitleCase(ch))</code>
6171      * does not always return <code>true</code> for some ranges of
6172      * characters.
6173      *
6174      * <p><b>Note:</b> This method cannot handle <a
6175      * href="#supplementary"> supplementary characters</a>. To support
6176      * all Unicode characters, including supplementary characters, use
6177      * the {@link #toTitleCase(int)} method.
6178      *
6179      * @param   ch   the character to be converted.
6180      * @return  the titlecase equivalent of the character, if any;
6181      *          otherwise, the character itself.
6182      * @see     Character#isTitleCase(char)
6183      * @see     Character#toLowerCase(char)
6184      * @see     Character#toUpperCase(char)
6185      * @since   1.0.2
6186      */
6187     public static char toTitleCase(char ch) {
6188         return (char)toTitleCase((int)ch);
6189     }
6190 
6191     /**
6192      * Converts the character (Unicode code point) argument to titlecase using case mapping
6193      * information from the UnicodeData file. If a character has no
6194      * explicit titlecase mapping and is not itself a titlecase char
6195      * according to UnicodeData, then the uppercase mapping is
6196      * returned as an equivalent titlecase mapping. If the
6197      * character argument is already a titlecase
6198      * character, the same character value will be
6199      * returned.
6200      *
6201      * <p>Note that
6202      * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code>
6203      * does not always return <code>true</code> for some ranges of
6204      * characters.
6205      *
6206      * @param   codePoint   the character (Unicode code point) to be converted.
6207      * @return  the titlecase equivalent of the character, if any;
6208      *          otherwise, the character itself.
6209      * @see     Character#isTitleCase(int)
6210      * @see     Character#toLowerCase(int)
6211      * @see     Character#toUpperCase(int)
6212      * @since   1.5
6213      */
6214     public static int toTitleCase(int codePoint) {
6215         return CharacterData.of(codePoint).toTitleCase(codePoint);
6216     }
6217 
6218     /**
6219      * Returns the numeric value of the character <code>ch</code> in the
6220      * specified radix.
6221      * <p>
6222      * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
6223      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
6224      * value of <code>ch</code> is not a valid digit in the specified
6225      * radix, <code>-1</code> is returned. A character is a valid digit
6226      * if at least one of the following is true:
6227      * <ul>
6228      * <li>The method <code>isDigit</code> is <code>true</code> of the character
6229      *     and the Unicode decimal digit value of the character (or its
6230      *     single-character decomposition) is less than the specified radix.
6231      *     In this case the decimal digit value is returned.
6232      * <li>The character is one of the uppercase Latin letters
6233      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
6234      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
6235      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
6236      *     is returned.
6237      * <li>The character is one of the lowercase Latin letters
6238      *     <code>'a'</code> through <code>'z'</code> and its code is less than
6239      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
6240      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
6241      *     is returned.
6242      * </ul>
6243      *
6244      * <p><b>Note:</b> This method cannot handle <a
6245      * href="#supplementary"> supplementary characters</a>. To support
6246      * all Unicode characters, including supplementary characters, use
6247      * the {@link #digit(int, int)} method.
6248      *
6249      * @param   ch      the character to be converted.
6250      * @param   radix   the radix.
6251      * @return  the numeric value represented by the character in the
6252      *          specified radix.
6253      * @see     Character#forDigit(int, int)
6254      * @see     Character#isDigit(char)
6255      */
6256     public static int digit(char ch, int radix) {
6257         return digit((int)ch, radix);
6258     }
6259 
6260     /**
6261      * Returns the numeric value of the specified character (Unicode
6262      * code point) in the specified radix.
6263      *
6264      * <p>If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
6265      * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
6266      * character is not a valid digit in the specified
6267      * radix, <code>-1</code> is returned. A character is a valid digit
6268      * if at least one of the following is true:
6269      * <ul>
6270      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character
6271      *     and the Unicode decimal digit value of the character (or its
6272      *     single-character decomposition) is less than the specified radix.
6273      *     In this case the decimal digit value is returned.
6274      * <li>The character is one of the uppercase Latin letters
6275      *     <code>'A'</code> through <code>'Z'</code> and its code is less than
6276      *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
6277      *     In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
6278      *     is returned.
6279      * <li>The character is one of the lowercase Latin letters
6280      *     <code>'a'</code> through <code>'z'</code> and its code is less than
6281      *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
6282      *     In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
6283      *     is returned.
6284      * </ul>
6285      *
6286      * @param   codePoint the character (Unicode code point) to be converted.
6287      * @param   radix   the radix.
6288      * @return  the numeric value represented by the character in the
6289      *          specified radix.
6290      * @see     Character#forDigit(int, int)
6291      * @see     Character#isDigit(int)
6292      * @since   1.5
6293      */
6294     public static int digit(int codePoint, int radix) {
6295         return CharacterData.of(codePoint).digit(codePoint, radix);
6296     }
6297 
6298     /**
6299      * Returns the <code>int</code> value that the specified Unicode
6300      * character represents. For example, the character
6301      * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
6302      * an int with a value of 50.
6303      * <p>
6304      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
6305      * <code>'&#92;u005A'</code>), lowercase
6306      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
6307      * full width variant (<code>'&#92;uFF21'</code> through
6308      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
6309      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
6310      * through 35. This is independent of the Unicode specification,
6311      * which does not assign numeric values to these <code>char</code>
6312      * values.
6313      * <p>
6314      * If the character does not have a numeric value, then -1 is returned.
6315      * If the character has a numeric value that cannot be represented as a
6316      * nonnegative integer (for example, a fractional value), then -2
6317      * is returned.
6318      *
6319      * <p><b>Note:</b> This method cannot handle <a
6320      * href="#supplementary"> supplementary characters</a>. To support
6321      * all Unicode characters, including supplementary characters, use
6322      * the {@link #getNumericValue(int)} method.
6323      *
6324      * @param   ch      the character to be converted.
6325      * @return  the numeric value of the character, as a nonnegative <code>int</code>
6326      *           value; -2 if the character has a numeric value that is not a
6327      *          nonnegative integer; -1 if the character has no numeric value.
6328      * @see     Character#forDigit(int, int)
6329      * @see     Character#isDigit(char)
6330      * @since   1.1
6331      */
6332     public static int getNumericValue(char ch) {
6333         return getNumericValue((int)ch);
6334     }
6335 
6336     /**
6337      * Returns the <code>int</code> value that the specified
6338      * character (Unicode code point) represents. For example, the character
6339      * <code>'&#92;u216C'</code> (the Roman numeral fifty) will return
6340      * an <code>int</code> with a value of 50.
6341      * <p>
6342      * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
6343      * <code>'&#92;u005A'</code>), lowercase
6344      * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
6345      * full width variant (<code>'&#92;uFF21'</code> through
6346      * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
6347      * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
6348      * through 35. This is independent of the Unicode specification,
6349      * which does not assign numeric values to these <code>char</code>
6350      * values.
6351      * <p>
6352      * If the character does not have a numeric value, then -1 is returned.
6353      * If the character has a numeric value that cannot be represented as a
6354      * nonnegative integer (for example, a fractional value), then -2
6355      * is returned.
6356      *
6357      * @param   codePoint the character (Unicode code point) to be converted.
6358      * @return  the numeric value of the character, as a nonnegative <code>int</code>
6359      *          value; -2 if the character has a numeric value that is not a
6360      *          nonnegative integer; -1 if the character has no numeric value.
6361      * @see     Character#forDigit(int, int)
6362      * @see     Character#isDigit(int)
6363      * @since   1.5
6364      */
6365     public static int getNumericValue(int codePoint) {
6366         return CharacterData.of(codePoint).getNumericValue(codePoint);
6367     }
6368 
6369     /**
6370      * Determines if the specified character is ISO-LATIN-1 white space.
6371      * This method returns <code>true</code> for the following five
6372      * characters only:
6373      * <table>
6374      * <tr><td><code>'\t'</code></td>            <td><code>'&#92;u0009'</code></td>
6375      *     <td><code>HORIZONTAL TABULATION</code></td></tr>
6376      * <tr><td><code>'\n'</code></td>            <td><code>'&#92;u000A'</code></td>
6377      *     <td><code>NEW LINE</code></td></tr>
6378      * <tr><td><code>'\f'</code></td>            <td><code>'&#92;u000C'</code></td>
6379      *     <td><code>FORM FEED</code></td></tr>
6380      * <tr><td><code>'\r'</code></td>            <td><code>'&#92;u000D'</code></td>
6381      *     <td><code>CARRIAGE RETURN</code></td></tr>
6382      * <tr><td><code>'&nbsp;'</code></td>  <td><code>'&#92;u0020'</code></td>
6383      *     <td><code>SPACE</code></td></tr>
6384      * </table>
6385      *
6386      * @param      ch   the character to be tested.
6387      * @return     <code>true</code> if the character is ISO-LATIN-1 white
6388      *             space; <code>false</code> otherwise.
6389      * @see        Character#isSpaceChar(char)
6390      * @see        Character#isWhitespace(char)
6391      * @deprecated Replaced by isWhitespace(char).
6392      */
6393     @Deprecated
6394     public static boolean isSpace(char ch) {
6395         return (ch <= 0x0020) &&
6396             (((((1L << 0x0009) |
6397             (1L << 0x000A) |
6398             (1L << 0x000C) |
6399             (1L << 0x000D) |
6400             (1L << 0x0020)) >> ch) & 1L) != 0);
6401     }
6402 
6403 
6404     /**
6405      * Determines if the specified character is a Unicode space character.
6406      * A character is considered to be a space character if and only if
6407      * it is specified to be a space character by the Unicode standard. This
6408      * method returns true if the character's general category type is any of
6409      * the following:
6410      * <ul>
6411      * <li> <code>SPACE_SEPARATOR</code>
6412      * <li> <code>LINE_SEPARATOR</code>
6413      * <li> <code>PARAGRAPH_SEPARATOR</code>
6414      * </ul>
6415      *
6416      * <p><b>Note:</b> This method cannot handle <a
6417      * href="#supplementary"> supplementary characters</a>. To support
6418      * all Unicode characters, including supplementary characters, use
6419      * the {@link #isSpaceChar(int)} method.
6420      *
6421      * @param   ch      the character to be tested.
6422      * @return  <code>true</code> if the character is a space character;
6423      *          <code>false</code> otherwise.
6424      * @see     Character#isWhitespace(char)
6425      * @since   1.1
6426      */
6427     public static boolean isSpaceChar(char ch) {
6428         return isSpaceChar((int)ch);
6429     }
6430 
6431     /**
6432      * Determines if the specified character (Unicode code point) is a
6433      * Unicode space character.  A character is considered to be a
6434      * space character if and only if it is specified to be a space
6435      * character by the Unicode standard. This method returns true if
6436      * the character's general category type is any of the following:
6437      *
6438      * <ul>
6439      * <li> {@link #SPACE_SEPARATOR}
6440      * <li> {@link #LINE_SEPARATOR}
6441      * <li> {@link #PARAGRAPH_SEPARATOR}
6442      * </ul>
6443      *
6444      * @param   codePoint the character (Unicode code point) to be tested.
6445      * @return  <code>true</code> if the character is a space character;
6446      *          <code>false</code> otherwise.
6447      * @see     Character#isWhitespace(int)
6448      * @since   1.5
6449      */
6450     public static boolean isSpaceChar(int codePoint) {
6451         return ((((1 << Character.SPACE_SEPARATOR) |
6452                   (1 << Character.LINE_SEPARATOR) |
6453                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6454             != 0;
6455     }
6456 
6457     /**
6458      * Determines if the specified character is white space according to Java.
6459      * A character is a Java whitespace character if and only if it satisfies
6460      * one of the following criteria:
6461      * <ul>
6462      * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>,
6463      *      <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
6464      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
6465      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
6466      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
6467      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
6468      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
6469      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
6470      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
6471      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
6472      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
6473      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
6474      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
6475      * </ul>
6476      *
6477      * <p><b>Note:</b> This method cannot handle <a
6478      * href="#supplementary"> supplementary characters</a>. To support
6479      * all Unicode characters, including supplementary characters, use
6480      * the {@link #isWhitespace(int)} method.
6481      *
6482      * @param   ch the character to be tested.
6483      * @return  <code>true</code> if the character is a Java whitespace
6484      *          character; <code>false</code> otherwise.
6485      * @see     Character#isSpaceChar(char)
6486      * @since   1.1
6487      */
6488     public static boolean isWhitespace(char ch) {
6489         return isWhitespace((int)ch);
6490     }
6491 
6492     /**
6493      * Determines if the specified character (Unicode code point) is
6494      * white space according to Java.  A character is a Java
6495      * whitespace character if and only if it satisfies one of the
6496      * following criteria:
6497      * <ul>
6498      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6499      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6500      *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
6501      *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
6502      * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
6503      * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
6504      * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
6505      * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
6506      * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
6507      * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
6508      * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
6509      * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
6510      * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
6511      * </ul>
6512      * <p>
6513      *
6514      * @param   codePoint the character (Unicode code point) to be tested.
6515      * @return  <code>true</code> if the character is a Java whitespace
6516      *          character; <code>false</code> otherwise.
6517      * @see     Character#isSpaceChar(int)
6518      * @since   1.5
6519      */
6520     public static boolean isWhitespace(int codePoint) {
6521         return CharacterData.of(codePoint).isWhitespace(codePoint);
6522     }
6523 
6524     /**
6525      * Determines if the specified character is an ISO control
6526      * character.  A character is considered to be an ISO control
6527      * character if its code is in the range <code>'&#92;u0000'</code>
6528      * through <code>'&#92;u001F'</code> or in the range
6529      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
6530      *
6531      * <p><b>Note:</b> This method cannot handle <a
6532      * href="#supplementary"> supplementary characters</a>. To support
6533      * all Unicode characters, including supplementary characters, use
6534      * the {@link #isISOControl(int)} method.
6535      *
6536      * @param   ch      the character to be tested.
6537      * @return  <code>true</code> if the character is an ISO control character;
6538      *          <code>false</code> otherwise.
6539      *
6540      * @see     Character#isSpaceChar(char)
6541      * @see     Character#isWhitespace(char)
6542      * @since   1.1
6543      */
6544     public static boolean isISOControl(char ch) {
6545         return isISOControl((int)ch);
6546     }
6547 
6548     /**
6549      * Determines if the referenced character (Unicode code point) is an ISO control
6550      * character.  A character is considered to be an ISO control
6551      * character if its code is in the range <code>'&#92;u0000'</code>
6552      * through <code>'&#92;u001F'</code> or in the range
6553      * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
6554      *
6555      * @param   codePoint the character (Unicode code point) to be tested.
6556      * @return  <code>true</code> if the character is an ISO control character;
6557      *          <code>false</code> otherwise.
6558      * @see     Character#isSpaceChar(int)
6559      * @see     Character#isWhitespace(int)
6560      * @since   1.5
6561      */
6562     public static boolean isISOControl(int codePoint) {
6563         // Optimized form of:
6564         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6565         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6566         return codePoint <= 0x9F &&
6567             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6568     }
6569 
6570     /**
6571      * Returns a value indicating a character's general category.
6572      *
6573      * <p><b>Note:</b> This method cannot handle <a
6574      * href="#supplementary"> supplementary characters</a>. To support
6575      * all Unicode characters, including supplementary characters, use
6576      * the {@link #getType(int)} method.
6577      *
6578      * @param   ch      the character to be tested.
6579      * @return  a value of type <code>int</code> representing the
6580      *          character's general category.
6581      * @see     Character#COMBINING_SPACING_MARK
6582      * @see     Character#CONNECTOR_PUNCTUATION
6583      * @see     Character#CONTROL
6584      * @see     Character#CURRENCY_SYMBOL
6585      * @see     Character#DASH_PUNCTUATION
6586      * @see     Character#DECIMAL_DIGIT_NUMBER
6587      * @see     Character#ENCLOSING_MARK
6588      * @see     Character#END_PUNCTUATION
6589      * @see     Character#FINAL_QUOTE_PUNCTUATION
6590      * @see     Character#FORMAT
6591      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6592      * @see     Character#LETTER_NUMBER
6593      * @see     Character#LINE_SEPARATOR
6594      * @see     Character#LOWERCASE_LETTER
6595      * @see     Character#MATH_SYMBOL
6596      * @see     Character#MODIFIER_LETTER
6597      * @see     Character#MODIFIER_SYMBOL
6598      * @see     Character#NON_SPACING_MARK
6599      * @see     Character#OTHER_LETTER
6600      * @see     Character#OTHER_NUMBER
6601      * @see     Character#OTHER_PUNCTUATION
6602      * @see     Character#OTHER_SYMBOL
6603      * @see     Character#PARAGRAPH_SEPARATOR
6604      * @see     Character#PRIVATE_USE
6605      * @see     Character#SPACE_SEPARATOR
6606      * @see     Character#START_PUNCTUATION
6607      * @see     Character#SURROGATE
6608      * @see     Character#TITLECASE_LETTER
6609      * @see     Character#UNASSIGNED
6610      * @see     Character#UPPERCASE_LETTER
6611      * @since   1.1
6612      */
6613     public static int getType(char ch) {
6614         return getType((int)ch);
6615     }
6616 
6617     /**
6618      * Returns a value indicating a character's general category.
6619      *
6620      * @param   codePoint the character (Unicode code point) to be tested.
6621      * @return  a value of type <code>int</code> representing the
6622      *          character's general category.
6623      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6624      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6625      * @see     Character#CONTROL CONTROL
6626      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6627      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6628      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6629      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6630      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6631      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6632      * @see     Character#FORMAT FORMAT
6633      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6634      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6635      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6636      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6637      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6638      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6639      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6640      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6641      * @see     Character#OTHER_LETTER OTHER_LETTER
6642      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6643      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6644      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6645      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6646      * @see     Character#PRIVATE_USE PRIVATE_USE
6647      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6648      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6649      * @see     Character#SURROGATE SURROGATE
6650      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6651      * @see     Character#UNASSIGNED UNASSIGNED
6652      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6653      * @since   1.5
6654      */
6655     public static int getType(int codePoint) {
6656         return CharacterData.of(codePoint).getType(codePoint);
6657     }
6658 
6659     /**
6660      * Determines the character representation for a specific digit in
6661      * the specified radix. If the value of <code>radix</code> is not a
6662      * valid radix, or the value of <code>digit</code> is not a valid
6663      * digit in the specified radix, the null character
6664      * (<code>'&#92;u0000'</code>) is returned.
6665      * <p>
6666      * The <code>radix</code> argument is valid if it is greater than or
6667      * equal to <code>MIN_RADIX</code> and less than or equal to
6668      * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
6669      * <code>0&nbsp;&lt;=digit&nbsp;&lt;&nbsp;radix</code>.
6670      * <p>
6671      * If the digit is less than 10, then
6672      * <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
6673      * <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
6674      *
6675      * @param   digit   the number to convert to a character.
6676      * @param   radix   the radix.
6677      * @return  the <code>char</code> representation of the specified digit
6678      *          in the specified radix.
6679      * @see     Character#MIN_RADIX
6680      * @see     Character#MAX_RADIX
6681      * @see     Character#digit(char, int)
6682      */
6683     public static char forDigit(int digit, int radix) {
6684         if ((digit >= radix) || (digit < 0)) {
6685             return '\0';
6686         }
6687         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6688             return '\0';
6689         }
6690         if (digit < 10) {
6691             return (char)('0' + digit);
6692         }
6693         return (char)('a' - 10 + digit);
6694     }
6695 
6696     /**
6697      * Returns the Unicode directionality property for the given
6698      * character.  Character directionality is used to calculate the
6699      * visual ordering of text. The directionality value of undefined
6700      * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
6701      *
6702      * <p><b>Note:</b> This method cannot handle <a
6703      * href="#supplementary"> supplementary characters</a>. To support
6704      * all Unicode characters, including supplementary characters, use
6705      * the {@link #getDirectionality(int)} method.
6706      *
6707      * @param  ch <code>char</code> for which the directionality property
6708      *            is requested.
6709      * @return the directionality property of the <code>char</code> value.
6710      *
6711      * @see Character#DIRECTIONALITY_UNDEFINED
6712      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6713      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6714      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6715      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6716      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6717      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6718      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6719      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6720      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6721      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6722      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6723      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6724      * @see Character#DIRECTIONALITY_WHITESPACE
6725      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6726      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6727      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6728      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6729      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6730      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6731      * @since 1.4
6732      */
6733     public static byte getDirectionality(char ch) {
6734         return getDirectionality((int)ch);
6735     }
6736 
6737     /**
6738      * Returns the Unicode directionality property for the given
6739      * character (Unicode code point).  Character directionality is
6740      * used to calculate the visual ordering of text. The
6741      * directionality value of undefined character is {@link
6742      * #DIRECTIONALITY_UNDEFINED}.
6743      *
6744      * @param   codePoint the character (Unicode code point) for which
6745      *          the directionality property is requested.
6746      * @return the directionality property of the character.
6747      *
6748      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
6749      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
6750      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
6751      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6752      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
6753      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6754      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6755      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
6756      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6757      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
6758      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
6759      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
6760      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
6761      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
6762      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
6763      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6764      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6765      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6766      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6767      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6768      * @since    1.5
6769      */
6770     public static byte getDirectionality(int codePoint) {
6771         return CharacterData.of(codePoint).getDirectionality(codePoint);
6772     }
6773 
6774     /**
6775      * Determines whether the character is mirrored according to the
6776      * Unicode specification.  Mirrored characters should have their
6777      * glyphs horizontally mirrored when displayed in text that is
6778      * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
6779      * PARENTHESIS is semantically defined to be an <i>opening
6780      * parenthesis</i>.  This will appear as a "(" in text that is
6781      * left-to-right but as a ")" in text that is right-to-left.
6782      *
6783      * <p><b>Note:</b> This method cannot handle <a
6784      * href="#supplementary"> supplementary characters</a>. To support
6785      * all Unicode characters, including supplementary characters, use
6786      * the {@link #isMirrored(int)} method.
6787      *
6788      * @param  ch <code>char</code> for which the mirrored property is requested
6789      * @return <code>true</code> if the char is mirrored, <code>false</code>
6790      *         if the <code>char</code> is not mirrored or is not defined.
6791      * @since 1.4
6792      */
6793     public static boolean isMirrored(char ch) {
6794         return isMirrored((int)ch);
6795     }
6796 
6797     /**
6798      * Determines whether the specified character (Unicode code point)
6799      * is mirrored according to the Unicode specification.  Mirrored
6800      * characters should have their glyphs horizontally mirrored when
6801      * displayed in text that is right-to-left.  For example,
6802      * <code>'&#92;u0028'</code> LEFT PARENTHESIS is semantically
6803      * defined to be an <i>opening parenthesis</i>.  This will appear
6804      * as a "(" in text that is left-to-right but as a ")" in text
6805      * that is right-to-left.
6806      *
6807      * @param   codePoint the character (Unicode code point) to be tested.
6808      * @return  <code>true</code> if the character is mirrored, <code>false</code>
6809      *          if the character is not mirrored or is not defined.
6810      * @since   1.5
6811      */
6812     public static boolean isMirrored(int codePoint) {
6813         return CharacterData.of(codePoint).isMirrored(codePoint);
6814     }
6815 
6816     /**
6817      * Compares two <code>Character</code> objects numerically.
6818      *
6819      * @param   anotherCharacter   the <code>Character</code> to be compared.
6820 
6821      * @return  the value <code>0</code> if the argument <code>Character</code>
6822      *          is equal to this <code>Character</code>; a value less than
6823      *          <code>0</code> if this <code>Character</code> is numerically less
6824      *          than the <code>Character</code> argument; and a value greater than
6825      *          <code>0</code> if this <code>Character</code> is numerically greater
6826      *          than the <code>Character</code> argument (unsigned comparison).
6827      *          Note that this is strictly a numerical comparison; it is not
6828      *          locale-dependent.
6829      * @since   1.2
6830      */
6831     public int compareTo(Character anotherCharacter) {
6832         return compare(this.value, anotherCharacter.value);
6833     }
6834 
6835     /**
6836      * Compares two {@code char} values numerically.
6837      * The value returned is identical to what would be returned by:
6838      * <pre>
6839      *    Character.valueOf(x).compareTo(Character.valueOf(y))
6840      * </pre>
6841      *
6842      * @param  x the first {@code char} to compare
6843      * @param  y the second {@code char} to compare
6844      * @return the value {@code 0} if {@code x == y};
6845      *         a value less than {@code 0} if {@code x < y}; and
6846      *         a value greater than {@code 0} if {@code x > y}
6847      * @since 1.7
6848      */
6849     public static int compare(char x, char y) {
6850         return x - y;
6851     }
6852 
6853     /**
6854      * Converts the character (Unicode code point) argument to uppercase using
6855      * information from the UnicodeData file.
6856      * <p>
6857      *
6858      * @param   codePoint   the character (Unicode code point) to be converted.
6859      * @return  either the uppercase equivalent of the character, if
6860      *          any, or an error flag (<code>Character.ERROR</code>)
6861      *          that indicates that a 1:M <code>char</code> mapping exists.
6862      * @see     Character#isLowerCase(char)
6863      * @see     Character#isUpperCase(char)
6864      * @see     Character#toLowerCase(char)
6865      * @see     Character#toTitleCase(char)
6866      * @since 1.4
6867      */
6868     static int toUpperCaseEx(int codePoint) {
6869         assert isValidCodePoint(codePoint);
6870         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
6871     }
6872 
6873     /**
6874      * Converts the character (Unicode code point) argument to uppercase using case
6875      * mapping information from the SpecialCasing file in the Unicode
6876      * specification. If a character has no explicit uppercase
6877      * mapping, then the <code>char</code> itself is returned in the
6878      * <code>char[]</code>.
6879      *
6880      * @param   codePoint   the character (Unicode code point) to be converted.
6881      * @return a <code>char[]</code> with the uppercased character.
6882      * @since 1.4
6883      */
6884     static char[] toUpperCaseCharArray(int codePoint) {
6885         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
6886         assert isBmpCodePoint(codePoint);
6887         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
6888     }
6889 
6890     /**
6891      * The number of bits used to represent a <tt>char</tt> value in unsigned
6892      * binary form, constant {@code 16}.
6893      *
6894      * @since 1.5
6895      */
6896     public static final int SIZE = 16;
6897 
6898     /**
6899      * Returns the value obtained by reversing the order of the bytes in the
6900      * specified <tt>char</tt> value.
6901      *
6902      * @return the value obtained by reversing (or, equivalently, swapping)
6903      *     the bytes in the specified <tt>char</tt> value.
6904      * @since 1.5
6905      */
6906     public static char reverseBytes(char ch) {
6907         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
6908     }
6909 
6910     /**
6911      * Returns the Unicode name of the specified character
6912      * <code>codePoint</code>, or null if the code point is
6913      * {@link #UNASSIGNED unassigned}.
6914      * <p>
6915      * Note: if the specified character is not assigned a name by
6916      * the <i>UnicodeData</i> file (part of the Unicode Character
6917      * Database maintained by the Unicode Consortium), the returned
6918      * name is the same as the result of expression
6919      *
6920      * <blockquote><code>
6921      *     Character.UnicodeBlock.of(codePoint)
6922      *                           .toString()
6923      *                           .replace('_', ' ')
6924      *     + " "
6925      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6926      *
6927      * </code></blockquote>
6928      *
6929      * @param  codePoint the character (Unicode code point)
6930      *
6931      * @return the Unicode name of the specified character, or null if
6932      *         the code point is unassigned.
6933      *
6934      * @exception IllegalArgumentException if the specified
6935      *            <code>codePoint</code> is not a valid Unicode
6936      *            code point.
6937      *
6938      * @since 1.7
6939      */
6940     public static String getName(int codePoint) {
6941         if (!isValidCodePoint(codePoint)) {
6942             throw new IllegalArgumentException();
6943         }
6944         String name = CharacterName.get(codePoint);
6945         if (name != null)
6946             return name;
6947         if (getType(codePoint) == UNASSIGNED)
6948             return null;
6949         UnicodeBlock block = UnicodeBlock.of(codePoint);
6950         if (block != null)
6951             return block.toString().replace('_', ' ') + " "
6952                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6953         // should never come here
6954         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6955     }
6956 }