1 /*
   2  * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 11.0.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  * <p>
  59  * The code point, U+32FF, is reserved by the Unicode Consortium
  60  * to represent the Japanese square character for the new era that begins
  61  * May 2019. Relevant methods in the Character class return the same
  62  * properties as for the existing Japanese era characters (e.g., U+337E for
  63  * "Meizi"). For the details of the code point, refer to
  64  * <a href="http://blog.unicode.org/2018/09/new-japanese-era.html">
  65  * http://blog.unicode.org/2018/09/new-japanese-era.html</a>.
  66  *
  67  * <h3><a id="unicode">Unicode Character Representations</a></h3>
  68  *
  69  * <p>The {@code char} data type (and therefore the value that a
  70  * {@code Character} object encapsulates) are based on the
  71  * original Unicode specification, which defined characters as
  72  * fixed-width 16-bit entities. The Unicode Standard has since been
  73  * changed to allow for characters whose representation requires more
  74  * than 16 bits.  The range of legal <em>code point</em>s is now
  75  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  76  * (Refer to the <a
  77  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  78  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  79  * Standard.)
  80  *
  81  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
  82  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  83  * <a id="supplementary">Characters</a> whose code points are greater
  84  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  85  * platform uses the UTF-16 representation in {@code char} arrays and
  86  * in the {@code String} and {@code StringBuffer} classes. In
  87  * this representation, supplementary characters are represented as a pair
  88  * of {@code char} values, the first from the <em>high-surrogates</em>
  89  * range, (\uD800-\uDBFF), the second from the
  90  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  91  *
  92  * <p>A {@code char} value, therefore, represents Basic
  93  * Multilingual Plane (BMP) code points, including the surrogate
  94  * code points, or code units of the UTF-16 encoding. An
  95  * {@code int} value represents all Unicode code points,
  96  * including supplementary code points. The lower (least significant)
  97  * 21 bits of {@code int} are used to represent Unicode code
  98  * points and the upper (most significant) 11 bits must be zero.
  99  * Unless otherwise specified, the behavior with respect to
 100  * supplementary characters and surrogate {@code char} values is
 101  * as follows:
 102  *
 103  * <ul>
 104  * <li>The methods that only accept a {@code char} value cannot support
 105  * supplementary characters. They treat {@code char} values from the
 106  * surrogate ranges as undefined characters. For example,
 107  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 108  * this specific value if followed by any low-surrogate value in a string
 109  * would represent a letter.
 110  *
 111  * <li>The methods that accept an {@code int} value support all
 112  * Unicode characters, including supplementary characters. For
 113  * example, {@code Character.isLetter(0x2F81A)} returns
 114  * {@code true} because the code point value represents a letter
 115  * (a CJK ideograph).
 116  * </ul>
 117  *
 118  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 119  * used for character values in the range between U+0000 and U+10FFFF,
 120  * and <em>Unicode code unit</em> is used for 16-bit
 121  * {@code char} values that are code units of the <em>UTF-16</em>
 122  * encoding. For more information on Unicode terminology, refer to the
 123  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 124  *
 125  * @author  Lee Boynton
 126  * @author  Guy Steele
 127  * @author  Akira Tanaka
 128  * @author  Martin Buchholz
 129  * @author  Ulf Zibis
 130  * @since   1.0
 131  */
 132 public final
 133 class Character implements java.io.Serializable, Comparable<Character> {
 134     /**
 135      * The minimum radix available for conversion to and from strings.
 136      * The constant value of this field is the smallest value permitted
 137      * for the radix argument in radix-conversion methods such as the
 138      * {@code digit} method, the {@code forDigit} method, and the
 139      * {@code toString} method of class {@code Integer}.
 140      *
 141      * @see     Character#digit(char, int)
 142      * @see     Character#forDigit(int, int)
 143      * @see     Integer#toString(int, int)
 144      * @see     Integer#valueOf(String)
 145      */
 146     public static final int MIN_RADIX = 2;
 147 
 148     /**
 149      * The maximum radix available for conversion to and from strings.
 150      * The constant value of this field is the largest value permitted
 151      * for the radix argument in radix-conversion methods such as the
 152      * {@code digit} method, the {@code forDigit} method, and the
 153      * {@code toString} method of class {@code Integer}.
 154      *
 155      * @see     Character#digit(char, int)
 156      * @see     Character#forDigit(int, int)
 157      * @see     Integer#toString(int, int)
 158      * @see     Integer#valueOf(String)
 159      */
 160     public static final int MAX_RADIX = 36;
 161 
 162     /**
 163      * The constant value of this field is the smallest value of type
 164      * {@code char}, {@code '\u005Cu0000'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MIN_VALUE = '\u0000';
 169 
 170     /**
 171      * The constant value of this field is the largest value of type
 172      * {@code char}, {@code '\u005CuFFFF'}.
 173      *
 174      * @since   1.0.2
 175      */
 176     public static final char MAX_VALUE = '\uFFFF';
 177 
 178     /**
 179      * The {@code Class} instance representing the primitive type
 180      * {@code char}.
 181      *
 182      * @since   1.1
 183      */
 184     @SuppressWarnings("unchecked")
 185     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 186 
 187     /*
 188      * Normative general types
 189      */
 190 
 191     /*
 192      * General character types
 193      */
 194 
 195     /**
 196      * General category "Cn" in the Unicode specification.
 197      * @since   1.1
 198      */
 199     public static final byte UNASSIGNED = 0;
 200 
 201     /**
 202      * General category "Lu" in the Unicode specification.
 203      * @since   1.1
 204      */
 205     public static final byte UPPERCASE_LETTER = 1;
 206 
 207     /**
 208      * General category "Ll" in the Unicode specification.
 209      * @since   1.1
 210      */
 211     public static final byte LOWERCASE_LETTER = 2;
 212 
 213     /**
 214      * General category "Lt" in the Unicode specification.
 215      * @since   1.1
 216      */
 217     public static final byte TITLECASE_LETTER = 3;
 218 
 219     /**
 220      * General category "Lm" in the Unicode specification.
 221      * @since   1.1
 222      */
 223     public static final byte MODIFIER_LETTER = 4;
 224 
 225     /**
 226      * General category "Lo" in the Unicode specification.
 227      * @since   1.1
 228      */
 229     public static final byte OTHER_LETTER = 5;
 230 
 231     /**
 232      * General category "Mn" in the Unicode specification.
 233      * @since   1.1
 234      */
 235     public static final byte NON_SPACING_MARK = 6;
 236 
 237     /**
 238      * General category "Me" in the Unicode specification.
 239      * @since   1.1
 240      */
 241     public static final byte ENCLOSING_MARK = 7;
 242 
 243     /**
 244      * General category "Mc" in the Unicode specification.
 245      * @since   1.1
 246      */
 247     public static final byte COMBINING_SPACING_MARK = 8;
 248 
 249     /**
 250      * General category "Nd" in the Unicode specification.
 251      * @since   1.1
 252      */
 253     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 254 
 255     /**
 256      * General category "Nl" in the Unicode specification.
 257      * @since   1.1
 258      */
 259     public static final byte LETTER_NUMBER = 10;
 260 
 261     /**
 262      * General category "No" in the Unicode specification.
 263      * @since   1.1
 264      */
 265     public static final byte OTHER_NUMBER = 11;
 266 
 267     /**
 268      * General category "Zs" in the Unicode specification.
 269      * @since   1.1
 270      */
 271     public static final byte SPACE_SEPARATOR = 12;
 272 
 273     /**
 274      * General category "Zl" in the Unicode specification.
 275      * @since   1.1
 276      */
 277     public static final byte LINE_SEPARATOR = 13;
 278 
 279     /**
 280      * General category "Zp" in the Unicode specification.
 281      * @since   1.1
 282      */
 283     public static final byte PARAGRAPH_SEPARATOR = 14;
 284 
 285     /**
 286      * General category "Cc" in the Unicode specification.
 287      * @since   1.1
 288      */
 289     public static final byte CONTROL = 15;
 290 
 291     /**
 292      * General category "Cf" in the Unicode specification.
 293      * @since   1.1
 294      */
 295     public static final byte FORMAT = 16;
 296 
 297     /**
 298      * General category "Co" in the Unicode specification.
 299      * @since   1.1
 300      */
 301     public static final byte PRIVATE_USE = 18;
 302 
 303     /**
 304      * General category "Cs" in the Unicode specification.
 305      * @since   1.1
 306      */
 307     public static final byte SURROGATE = 19;
 308 
 309     /**
 310      * General category "Pd" in the Unicode specification.
 311      * @since   1.1
 312      */
 313     public static final byte DASH_PUNCTUATION = 20;
 314 
 315     /**
 316      * General category "Ps" in the Unicode specification.
 317      * @since   1.1
 318      */
 319     public static final byte START_PUNCTUATION = 21;
 320 
 321     /**
 322      * General category "Pe" in the Unicode specification.
 323      * @since   1.1
 324      */
 325     public static final byte END_PUNCTUATION = 22;
 326 
 327     /**
 328      * General category "Pc" in the Unicode specification.
 329      * @since   1.1
 330      */
 331     public static final byte CONNECTOR_PUNCTUATION = 23;
 332 
 333     /**
 334      * General category "Po" in the Unicode specification.
 335      * @since   1.1
 336      */
 337     public static final byte OTHER_PUNCTUATION = 24;
 338 
 339     /**
 340      * General category "Sm" in the Unicode specification.
 341      * @since   1.1
 342      */
 343     public static final byte MATH_SYMBOL = 25;
 344 
 345     /**
 346      * General category "Sc" in the Unicode specification.
 347      * @since   1.1
 348      */
 349     public static final byte CURRENCY_SYMBOL = 26;
 350 
 351     /**
 352      * General category "Sk" in the Unicode specification.
 353      * @since   1.1
 354      */
 355     public static final byte MODIFIER_SYMBOL = 27;
 356 
 357     /**
 358      * General category "So" in the Unicode specification.
 359      * @since   1.1
 360      */
 361     public static final byte OTHER_SYMBOL = 28;
 362 
 363     /**
 364      * General category "Pi" in the Unicode specification.
 365      * @since   1.4
 366      */
 367     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 368 
 369     /**
 370      * General category "Pf" in the Unicode specification.
 371      * @since   1.4
 372      */
 373     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 374 
 375     /**
 376      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 377      */
 378     static final int ERROR = 0xFFFFFFFF;
 379 
 380 
 381     /**
 382      * Undefined bidirectional character type. Undefined {@code char}
 383      * values have undefined directionality in the Unicode specification.
 384      * @since 1.4
 385      */
 386     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 387 
 388     /**
 389      * Strong bidirectional character type "L" in the Unicode specification.
 390      * @since 1.4
 391      */
 392     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 393 
 394     /**
 395      * Strong bidirectional character type "R" in the Unicode specification.
 396      * @since 1.4
 397      */
 398     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 399 
 400     /**
 401     * Strong bidirectional character type "AL" in the Unicode specification.
 402      * @since 1.4
 403      */
 404     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 405 
 406     /**
 407      * Weak bidirectional character type "EN" in the Unicode specification.
 408      * @since 1.4
 409      */
 410     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 411 
 412     /**
 413      * Weak bidirectional character type "ES" in the Unicode specification.
 414      * @since 1.4
 415      */
 416     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 417 
 418     /**
 419      * Weak bidirectional character type "ET" in the Unicode specification.
 420      * @since 1.4
 421      */
 422     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 423 
 424     /**
 425      * Weak bidirectional character type "AN" in the Unicode specification.
 426      * @since 1.4
 427      */
 428     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 429 
 430     /**
 431      * Weak bidirectional character type "CS" in the Unicode specification.
 432      * @since 1.4
 433      */
 434     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 435 
 436     /**
 437      * Weak bidirectional character type "NSM" in the Unicode specification.
 438      * @since 1.4
 439      */
 440     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 441 
 442     /**
 443      * Weak bidirectional character type "BN" in the Unicode specification.
 444      * @since 1.4
 445      */
 446     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 447 
 448     /**
 449      * Neutral bidirectional character type "B" in the Unicode specification.
 450      * @since 1.4
 451      */
 452     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 453 
 454     /**
 455      * Neutral bidirectional character type "S" in the Unicode specification.
 456      * @since 1.4
 457      */
 458     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 459 
 460     /**
 461      * Neutral bidirectional character type "WS" in the Unicode specification.
 462      * @since 1.4
 463      */
 464     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 465 
 466     /**
 467      * Neutral bidirectional character type "ON" in the Unicode specification.
 468      * @since 1.4
 469      */
 470     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 471 
 472     /**
 473      * Strong bidirectional character type "LRE" in the Unicode specification.
 474      * @since 1.4
 475      */
 476     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 477 
 478     /**
 479      * Strong bidirectional character type "LRO" in the Unicode specification.
 480      * @since 1.4
 481      */
 482     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 483 
 484     /**
 485      * Strong bidirectional character type "RLE" in the Unicode specification.
 486      * @since 1.4
 487      */
 488     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 489 
 490     /**
 491      * Strong bidirectional character type "RLO" in the Unicode specification.
 492      * @since 1.4
 493      */
 494     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 495 
 496     /**
 497      * Weak bidirectional character type "PDF" in the Unicode specification.
 498      * @since 1.4
 499      */
 500     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 501 
 502     /**
 503      * Weak bidirectional character type "LRI" in the Unicode specification.
 504      * @since 9
 505      */
 506     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 507 
 508     /**
 509      * Weak bidirectional character type "RLI" in the Unicode specification.
 510      * @since 9
 511      */
 512     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 513 
 514     /**
 515      * Weak bidirectional character type "FSI" in the Unicode specification.
 516      * @since 9
 517      */
 518     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 519 
 520     /**
 521      * Weak bidirectional character type "PDI" in the Unicode specification.
 522      * @since 9
 523      */
 524     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 525 
 526     /**
 527      * The minimum value of a
 528      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 529      * Unicode high-surrogate code unit</a>
 530      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 531      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 532      *
 533      * @since 1.5
 534      */
 535     public static final char MIN_HIGH_SURROGATE = '\uD800';
 536 
 537     /**
 538      * The maximum value of a
 539      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 540      * Unicode high-surrogate code unit</a>
 541      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 542      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 543      *
 544      * @since 1.5
 545      */
 546     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 547 
 548     /**
 549      * The minimum value of a
 550      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 551      * Unicode low-surrogate code unit</a>
 552      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 553      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 554      *
 555      * @since 1.5
 556      */
 557     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 558 
 559     /**
 560      * The maximum value of a
 561      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 562      * Unicode low-surrogate code unit</a>
 563      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 564      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 569 
 570     /**
 571      * The minimum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 577 
 578     /**
 579      * The maximum value of a Unicode surrogate code unit in the
 580      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 581      *
 582      * @since 1.5
 583      */
 584     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 585 
 586     /**
 587      * The minimum value of a
 588      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 589      * Unicode supplementary code point</a>, constant {@code U+10000}.
 590      *
 591      * @since 1.5
 592      */
 593     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 594 
 595     /**
 596      * The minimum value of a
 597      * <a href="http://www.unicode.org/glossary/#code_point">
 598      * Unicode code point</a>, constant {@code U+0000}.
 599      *
 600      * @since 1.5
 601      */
 602     public static final int MIN_CODE_POINT = 0x000000;
 603 
 604     /**
 605      * The maximum value of a
 606      * <a href="http://www.unicode.org/glossary/#code_point">
 607      * Unicode code point</a>, constant {@code U+10FFFF}.
 608      *
 609      * @since 1.5
 610      */
 611     public static final int MAX_CODE_POINT = 0X10FFFF;
 612 
 613 
 614     /**
 615      * Instances of this class represent particular subsets of the Unicode
 616      * character set.  The only family of subsets defined in the
 617      * {@code Character} class is {@link Character.UnicodeBlock}.
 618      * Other portions of the Java API may define other subsets for their
 619      * own purposes.
 620      *
 621      * @since 1.2
 622      */
 623     public static class Subset  {
 624 
 625         private String name;
 626 
 627         /**
 628          * Constructs a new {@code Subset} instance.
 629          *
 630          * @param  name  The name of this subset
 631          * @throws NullPointerException if name is {@code null}
 632          */
 633         protected Subset(String name) {
 634             if (name == null) {
 635                 throw new NullPointerException("name");
 636             }
 637             this.name = name;
 638         }
 639 
 640         /**
 641          * Compares two {@code Subset} objects for equality.
 642          * This method returns {@code true} if and only if
 643          * {@code this} and the argument refer to the same
 644          * object; since this method is {@code final}, this
 645          * guarantee holds for all subclasses.
 646          */
 647         public final boolean equals(Object obj) {
 648             return (this == obj);
 649         }
 650 
 651         /**
 652          * Returns the standard hash code as defined by the
 653          * {@link Object#hashCode} method.  This method
 654          * is {@code final} in order to ensure that the
 655          * {@code equals} and {@code hashCode} methods will
 656          * be consistent in all subclasses.
 657          */
 658         public final int hashCode() {
 659             return super.hashCode();
 660         }
 661 
 662         /**
 663          * Returns the name of this subset.
 664          */
 665         public final String toString() {
 666             return name;
 667         }
 668     }
 669 
 670     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 671     // for the latest specification of Unicode Blocks.
 672 
 673     /**
 674      * A family of character subsets representing the character blocks in the
 675      * Unicode specification. Character blocks generally define characters
 676      * used for a specific script or purpose. A character is contained by
 677      * at most one Unicode block.
 678      *
 679      * @since 1.2
 680      */
 681     public static final class UnicodeBlock extends Subset {
 682         /**
 683          * 649  - the expected number of entities
 684          * 0.75 - the default load factor of HashMap
 685          */
 686         private static Map<String, UnicodeBlock> map =
 687                 new HashMap<>((int)(649 / 0.75f + 1.0f));
 688 
 689         /**
 690          * Creates a UnicodeBlock with the given identifier name.
 691          * This name must be the same as the block identifier.
 692          */
 693         private UnicodeBlock(String idName) {
 694             super(idName);
 695             map.put(idName, this);
 696         }
 697 
 698         /**
 699          * Creates a UnicodeBlock with the given identifier name and
 700          * alias name.
 701          */
 702         private UnicodeBlock(String idName, String alias) {
 703             this(idName);
 704             map.put(alias, this);
 705         }
 706 
 707         /**
 708          * Creates a UnicodeBlock with the given identifier name and
 709          * alias names.
 710          */
 711         private UnicodeBlock(String idName, String... aliases) {
 712             this(idName);
 713             for (String alias : aliases)
 714                 map.put(alias, this);
 715         }
 716 
 717         /**
 718          * Constant for the "Basic Latin" Unicode character block.
 719          * @since 1.2
 720          */
 721         public static final UnicodeBlock  BASIC_LATIN =
 722             new UnicodeBlock("BASIC_LATIN",
 723                              "BASIC LATIN",
 724                              "BASICLATIN");
 725 
 726         /**
 727          * Constant for the "Latin-1 Supplement" Unicode character block.
 728          * @since 1.2
 729          */
 730         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 731             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 732                              "LATIN-1 SUPPLEMENT",
 733                              "LATIN-1SUPPLEMENT");
 734 
 735         /**
 736          * Constant for the "Latin Extended-A" Unicode character block.
 737          * @since 1.2
 738          */
 739         public static final UnicodeBlock LATIN_EXTENDED_A =
 740             new UnicodeBlock("LATIN_EXTENDED_A",
 741                              "LATIN EXTENDED-A",
 742                              "LATINEXTENDED-A");
 743 
 744         /**
 745          * Constant for the "Latin Extended-B" Unicode character block.
 746          * @since 1.2
 747          */
 748         public static final UnicodeBlock LATIN_EXTENDED_B =
 749             new UnicodeBlock("LATIN_EXTENDED_B",
 750                              "LATIN EXTENDED-B",
 751                              "LATINEXTENDED-B");
 752 
 753         /**
 754          * Constant for the "IPA Extensions" Unicode character block.
 755          * @since 1.2
 756          */
 757         public static final UnicodeBlock IPA_EXTENSIONS =
 758             new UnicodeBlock("IPA_EXTENSIONS",
 759                              "IPA EXTENSIONS",
 760                              "IPAEXTENSIONS");
 761 
 762         /**
 763          * Constant for the "Spacing Modifier Letters" Unicode character block.
 764          * @since 1.2
 765          */
 766         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 767             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 768                              "SPACING MODIFIER LETTERS",
 769                              "SPACINGMODIFIERLETTERS");
 770 
 771         /**
 772          * Constant for the "Combining Diacritical Marks" Unicode character block.
 773          * @since 1.2
 774          */
 775         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 776             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 777                              "COMBINING DIACRITICAL MARKS",
 778                              "COMBININGDIACRITICALMARKS");
 779 
 780         /**
 781          * Constant for the "Greek and Coptic" Unicode character block.
 782          * <p>
 783          * This block was previously known as the "Greek" block.
 784          *
 785          * @since 1.2
 786          */
 787         public static final UnicodeBlock GREEK =
 788             new UnicodeBlock("GREEK",
 789                              "GREEK AND COPTIC",
 790                              "GREEKANDCOPTIC");
 791 
 792         /**
 793          * Constant for the "Cyrillic" Unicode character block.
 794          * @since 1.2
 795          */
 796         public static final UnicodeBlock CYRILLIC =
 797             new UnicodeBlock("CYRILLIC");
 798 
 799         /**
 800          * Constant for the "Armenian" Unicode character block.
 801          * @since 1.2
 802          */
 803         public static final UnicodeBlock ARMENIAN =
 804             new UnicodeBlock("ARMENIAN");
 805 
 806         /**
 807          * Constant for the "Hebrew" Unicode character block.
 808          * @since 1.2
 809          */
 810         public static final UnicodeBlock HEBREW =
 811             new UnicodeBlock("HEBREW");
 812 
 813         /**
 814          * Constant for the "Arabic" Unicode character block.
 815          * @since 1.2
 816          */
 817         public static final UnicodeBlock ARABIC =
 818             new UnicodeBlock("ARABIC");
 819 
 820         /**
 821          * Constant for the "Devanagari" Unicode character block.
 822          * @since 1.2
 823          */
 824         public static final UnicodeBlock DEVANAGARI =
 825             new UnicodeBlock("DEVANAGARI");
 826 
 827         /**
 828          * Constant for the "Bengali" Unicode character block.
 829          * @since 1.2
 830          */
 831         public static final UnicodeBlock BENGALI =
 832             new UnicodeBlock("BENGALI");
 833 
 834         /**
 835          * Constant for the "Gurmukhi" Unicode character block.
 836          * @since 1.2
 837          */
 838         public static final UnicodeBlock GURMUKHI =
 839             new UnicodeBlock("GURMUKHI");
 840 
 841         /**
 842          * Constant for the "Gujarati" Unicode character block.
 843          * @since 1.2
 844          */
 845         public static final UnicodeBlock GUJARATI =
 846             new UnicodeBlock("GUJARATI");
 847 
 848         /**
 849          * Constant for the "Oriya" Unicode character block.
 850          * @since 1.2
 851          */
 852         public static final UnicodeBlock ORIYA =
 853             new UnicodeBlock("ORIYA");
 854 
 855         /**
 856          * Constant for the "Tamil" Unicode character block.
 857          * @since 1.2
 858          */
 859         public static final UnicodeBlock TAMIL =
 860             new UnicodeBlock("TAMIL");
 861 
 862         /**
 863          * Constant for the "Telugu" Unicode character block.
 864          * @since 1.2
 865          */
 866         public static final UnicodeBlock TELUGU =
 867             new UnicodeBlock("TELUGU");
 868 
 869         /**
 870          * Constant for the "Kannada" Unicode character block.
 871          * @since 1.2
 872          */
 873         public static final UnicodeBlock KANNADA =
 874             new UnicodeBlock("KANNADA");
 875 
 876         /**
 877          * Constant for the "Malayalam" Unicode character block.
 878          * @since 1.2
 879          */
 880         public static final UnicodeBlock MALAYALAM =
 881             new UnicodeBlock("MALAYALAM");
 882 
 883         /**
 884          * Constant for the "Thai" Unicode character block.
 885          * @since 1.2
 886          */
 887         public static final UnicodeBlock THAI =
 888             new UnicodeBlock("THAI");
 889 
 890         /**
 891          * Constant for the "Lao" Unicode character block.
 892          * @since 1.2
 893          */
 894         public static final UnicodeBlock LAO =
 895             new UnicodeBlock("LAO");
 896 
 897         /**
 898          * Constant for the "Tibetan" Unicode character block.
 899          * @since 1.2
 900          */
 901         public static final UnicodeBlock TIBETAN =
 902             new UnicodeBlock("TIBETAN");
 903 
 904         /**
 905          * Constant for the "Georgian" Unicode character block.
 906          * @since 1.2
 907          */
 908         public static final UnicodeBlock GEORGIAN =
 909             new UnicodeBlock("GEORGIAN");
 910 
 911         /**
 912          * Constant for the "Hangul Jamo" Unicode character block.
 913          * @since 1.2
 914          */
 915         public static final UnicodeBlock HANGUL_JAMO =
 916             new UnicodeBlock("HANGUL_JAMO",
 917                              "HANGUL JAMO",
 918                              "HANGULJAMO");
 919 
 920         /**
 921          * Constant for the "Latin Extended Additional" Unicode character block.
 922          * @since 1.2
 923          */
 924         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 925             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 926                              "LATIN EXTENDED ADDITIONAL",
 927                              "LATINEXTENDEDADDITIONAL");
 928 
 929         /**
 930          * Constant for the "Greek Extended" Unicode character block.
 931          * @since 1.2
 932          */
 933         public static final UnicodeBlock GREEK_EXTENDED =
 934             new UnicodeBlock("GREEK_EXTENDED",
 935                              "GREEK EXTENDED",
 936                              "GREEKEXTENDED");
 937 
 938         /**
 939          * Constant for the "General Punctuation" Unicode character block.
 940          * @since 1.2
 941          */
 942         public static final UnicodeBlock GENERAL_PUNCTUATION =
 943             new UnicodeBlock("GENERAL_PUNCTUATION",
 944                              "GENERAL PUNCTUATION",
 945                              "GENERALPUNCTUATION");
 946 
 947         /**
 948          * Constant for the "Superscripts and Subscripts" Unicode character
 949          * block.
 950          * @since 1.2
 951          */
 952         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 953             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 954                              "SUPERSCRIPTS AND SUBSCRIPTS",
 955                              "SUPERSCRIPTSANDSUBSCRIPTS");
 956 
 957         /**
 958          * Constant for the "Currency Symbols" Unicode character block.
 959          * @since 1.2
 960          */
 961         public static final UnicodeBlock CURRENCY_SYMBOLS =
 962             new UnicodeBlock("CURRENCY_SYMBOLS",
 963                              "CURRENCY SYMBOLS",
 964                              "CURRENCYSYMBOLS");
 965 
 966         /**
 967          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 968          * character block.
 969          * <p>
 970          * This block was previously known as "Combining Marks for Symbols".
 971          * @since 1.2
 972          */
 973         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 974             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 975                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 976                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 977                              "COMBINING MARKS FOR SYMBOLS",
 978                              "COMBININGMARKSFORSYMBOLS");
 979 
 980         /**
 981          * Constant for the "Letterlike Symbols" Unicode character block.
 982          * @since 1.2
 983          */
 984         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 985             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 986                              "LETTERLIKE SYMBOLS",
 987                              "LETTERLIKESYMBOLS");
 988 
 989         /**
 990          * Constant for the "Number Forms" Unicode character block.
 991          * @since 1.2
 992          */
 993         public static final UnicodeBlock NUMBER_FORMS =
 994             new UnicodeBlock("NUMBER_FORMS",
 995                              "NUMBER FORMS",
 996                              "NUMBERFORMS");
 997 
 998         /**
 999          * Constant for the "Arrows" Unicode character block.
1000          * @since 1.2
1001          */
1002         public static final UnicodeBlock ARROWS =
1003             new UnicodeBlock("ARROWS");
1004 
1005         /**
1006          * Constant for the "Mathematical Operators" Unicode character block.
1007          * @since 1.2
1008          */
1009         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1010             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1011                              "MATHEMATICAL OPERATORS",
1012                              "MATHEMATICALOPERATORS");
1013 
1014         /**
1015          * Constant for the "Miscellaneous Technical" Unicode character block.
1016          * @since 1.2
1017          */
1018         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1019             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1020                              "MISCELLANEOUS TECHNICAL",
1021                              "MISCELLANEOUSTECHNICAL");
1022 
1023         /**
1024          * Constant for the "Control Pictures" Unicode character block.
1025          * @since 1.2
1026          */
1027         public static final UnicodeBlock CONTROL_PICTURES =
1028             new UnicodeBlock("CONTROL_PICTURES",
1029                              "CONTROL PICTURES",
1030                              "CONTROLPICTURES");
1031 
1032         /**
1033          * Constant for the "Optical Character Recognition" Unicode character block.
1034          * @since 1.2
1035          */
1036         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1037             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1038                              "OPTICAL CHARACTER RECOGNITION",
1039                              "OPTICALCHARACTERRECOGNITION");
1040 
1041         /**
1042          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1043          * @since 1.2
1044          */
1045         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1046             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1047                              "ENCLOSED ALPHANUMERICS",
1048                              "ENCLOSEDALPHANUMERICS");
1049 
1050         /**
1051          * Constant for the "Box Drawing" Unicode character block.
1052          * @since 1.2
1053          */
1054         public static final UnicodeBlock BOX_DRAWING =
1055             new UnicodeBlock("BOX_DRAWING",
1056                              "BOX DRAWING",
1057                              "BOXDRAWING");
1058 
1059         /**
1060          * Constant for the "Block Elements" Unicode character block.
1061          * @since 1.2
1062          */
1063         public static final UnicodeBlock BLOCK_ELEMENTS =
1064             new UnicodeBlock("BLOCK_ELEMENTS",
1065                              "BLOCK ELEMENTS",
1066                              "BLOCKELEMENTS");
1067 
1068         /**
1069          * Constant for the "Geometric Shapes" Unicode character block.
1070          * @since 1.2
1071          */
1072         public static final UnicodeBlock GEOMETRIC_SHAPES =
1073             new UnicodeBlock("GEOMETRIC_SHAPES",
1074                              "GEOMETRIC SHAPES",
1075                              "GEOMETRICSHAPES");
1076 
1077         /**
1078          * Constant for the "Miscellaneous Symbols" Unicode character block.
1079          * @since 1.2
1080          */
1081         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1082             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1083                              "MISCELLANEOUS SYMBOLS",
1084                              "MISCELLANEOUSSYMBOLS");
1085 
1086         /**
1087          * Constant for the "Dingbats" Unicode character block.
1088          * @since 1.2
1089          */
1090         public static final UnicodeBlock DINGBATS =
1091             new UnicodeBlock("DINGBATS");
1092 
1093         /**
1094          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1095          * @since 1.2
1096          */
1097         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1098             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1099                              "CJK SYMBOLS AND PUNCTUATION",
1100                              "CJKSYMBOLSANDPUNCTUATION");
1101 
1102         /**
1103          * Constant for the "Hiragana" Unicode character block.
1104          * @since 1.2
1105          */
1106         public static final UnicodeBlock HIRAGANA =
1107             new UnicodeBlock("HIRAGANA");
1108 
1109         /**
1110          * Constant for the "Katakana" Unicode character block.
1111          * @since 1.2
1112          */
1113         public static final UnicodeBlock KATAKANA =
1114             new UnicodeBlock("KATAKANA");
1115 
1116         /**
1117          * Constant for the "Bopomofo" Unicode character block.
1118          * @since 1.2
1119          */
1120         public static final UnicodeBlock BOPOMOFO =
1121             new UnicodeBlock("BOPOMOFO");
1122 
1123         /**
1124          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1125          * @since 1.2
1126          */
1127         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1128             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1129                              "HANGUL COMPATIBILITY JAMO",
1130                              "HANGULCOMPATIBILITYJAMO");
1131 
1132         /**
1133          * Constant for the "Kanbun" Unicode character block.
1134          * @since 1.2
1135          */
1136         public static final UnicodeBlock KANBUN =
1137             new UnicodeBlock("KANBUN");
1138 
1139         /**
1140          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1141          * @since 1.2
1142          */
1143         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1144             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1145                              "ENCLOSED CJK LETTERS AND MONTHS",
1146                              "ENCLOSEDCJKLETTERSANDMONTHS");
1147 
1148         /**
1149          * Constant for the "CJK Compatibility" Unicode character block.
1150          * @since 1.2
1151          */
1152         public static final UnicodeBlock CJK_COMPATIBILITY =
1153             new UnicodeBlock("CJK_COMPATIBILITY",
1154                              "CJK COMPATIBILITY",
1155                              "CJKCOMPATIBILITY");
1156 
1157         /**
1158          * Constant for the "CJK Unified Ideographs" Unicode character block.
1159          * @since 1.2
1160          */
1161         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1162             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1163                              "CJK UNIFIED IDEOGRAPHS",
1164                              "CJKUNIFIEDIDEOGRAPHS");
1165 
1166         /**
1167          * Constant for the "Hangul Syllables" Unicode character block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock HANGUL_SYLLABLES =
1171             new UnicodeBlock("HANGUL_SYLLABLES",
1172                              "HANGUL SYLLABLES",
1173                              "HANGULSYLLABLES");
1174 
1175         /**
1176          * Constant for the "Private Use Area" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock PRIVATE_USE_AREA =
1180             new UnicodeBlock("PRIVATE_USE_AREA",
1181                              "PRIVATE USE AREA",
1182                              "PRIVATEUSEAREA");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Ideographs" Unicode character
1186          * block.
1187          * @since 1.2
1188          */
1189         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1190             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1191                              "CJK COMPATIBILITY IDEOGRAPHS",
1192                              "CJKCOMPATIBILITYIDEOGRAPHS");
1193 
1194         /**
1195          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1196          * @since 1.2
1197          */
1198         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1199             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1200                              "ALPHABETIC PRESENTATION FORMS",
1201                              "ALPHABETICPRESENTATIONFORMS");
1202 
1203         /**
1204          * Constant for the "Arabic Presentation Forms-A" Unicode character
1205          * block.
1206          * @since 1.2
1207          */
1208         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1209             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1210                              "ARABIC PRESENTATION FORMS-A",
1211                              "ARABICPRESENTATIONFORMS-A");
1212 
1213         /**
1214          * Constant for the "Combining Half Marks" Unicode character block.
1215          * @since 1.2
1216          */
1217         public static final UnicodeBlock COMBINING_HALF_MARKS =
1218             new UnicodeBlock("COMBINING_HALF_MARKS",
1219                              "COMBINING HALF MARKS",
1220                              "COMBININGHALFMARKS");
1221 
1222         /**
1223          * Constant for the "CJK Compatibility Forms" Unicode character block.
1224          * @since 1.2
1225          */
1226         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1227             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1228                              "CJK COMPATIBILITY FORMS",
1229                              "CJKCOMPATIBILITYFORMS");
1230 
1231         /**
1232          * Constant for the "Small Form Variants" Unicode character block.
1233          * @since 1.2
1234          */
1235         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1236             new UnicodeBlock("SMALL_FORM_VARIANTS",
1237                              "SMALL FORM VARIANTS",
1238                              "SMALLFORMVARIANTS");
1239 
1240         /**
1241          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1242          * @since 1.2
1243          */
1244         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1245             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1246                              "ARABIC PRESENTATION FORMS-B",
1247                              "ARABICPRESENTATIONFORMS-B");
1248 
1249         /**
1250          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1251          * block.
1252          * @since 1.2
1253          */
1254         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1255             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1256                              "HALFWIDTH AND FULLWIDTH FORMS",
1257                              "HALFWIDTHANDFULLWIDTHFORMS");
1258 
1259         /**
1260          * Constant for the "Specials" Unicode character block.
1261          * @since 1.2
1262          */
1263         public static final UnicodeBlock SPECIALS =
1264             new UnicodeBlock("SPECIALS");
1265 
1266         /**
1267          * @deprecated
1268          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1269          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1270          * These constants match the block definitions of the Unicode Standard.
1271          * The {@link #of(char)} and {@link #of(int)} methods return the
1272          * standard constants.
1273          */
1274         @Deprecated(since="1.5")
1275         public static final UnicodeBlock SURROGATES_AREA =
1276             new UnicodeBlock("SURROGATES_AREA");
1277 
1278         /**
1279          * Constant for the "Syriac" Unicode character block.
1280          * @since 1.4
1281          */
1282         public static final UnicodeBlock SYRIAC =
1283             new UnicodeBlock("SYRIAC");
1284 
1285         /**
1286          * Constant for the "Thaana" Unicode character block.
1287          * @since 1.4
1288          */
1289         public static final UnicodeBlock THAANA =
1290             new UnicodeBlock("THAANA");
1291 
1292         /**
1293          * Constant for the "Sinhala" Unicode character block.
1294          * @since 1.4
1295          */
1296         public static final UnicodeBlock SINHALA =
1297             new UnicodeBlock("SINHALA");
1298 
1299         /**
1300          * Constant for the "Myanmar" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock MYANMAR =
1304             new UnicodeBlock("MYANMAR");
1305 
1306         /**
1307          * Constant for the "Ethiopic" Unicode character block.
1308          * @since 1.4
1309          */
1310         public static final UnicodeBlock ETHIOPIC =
1311             new UnicodeBlock("ETHIOPIC");
1312 
1313         /**
1314          * Constant for the "Cherokee" Unicode character block.
1315          * @since 1.4
1316          */
1317         public static final UnicodeBlock CHEROKEE =
1318             new UnicodeBlock("CHEROKEE");
1319 
1320         /**
1321          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1322          * @since 1.4
1323          */
1324         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1325             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1326                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1327                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1328 
1329         /**
1330          * Constant for the "Ogham" Unicode character block.
1331          * @since 1.4
1332          */
1333         public static final UnicodeBlock OGHAM =
1334             new UnicodeBlock("OGHAM");
1335 
1336         /**
1337          * Constant for the "Runic" Unicode character block.
1338          * @since 1.4
1339          */
1340         public static final UnicodeBlock RUNIC =
1341             new UnicodeBlock("RUNIC");
1342 
1343         /**
1344          * Constant for the "Khmer" Unicode character block.
1345          * @since 1.4
1346          */
1347         public static final UnicodeBlock KHMER =
1348             new UnicodeBlock("KHMER");
1349 
1350         /**
1351          * Constant for the "Mongolian" Unicode character block.
1352          * @since 1.4
1353          */
1354         public static final UnicodeBlock MONGOLIAN =
1355             new UnicodeBlock("MONGOLIAN");
1356 
1357         /**
1358          * Constant for the "Braille Patterns" Unicode character block.
1359          * @since 1.4
1360          */
1361         public static final UnicodeBlock BRAILLE_PATTERNS =
1362             new UnicodeBlock("BRAILLE_PATTERNS",
1363                              "BRAILLE PATTERNS",
1364                              "BRAILLEPATTERNS");
1365 
1366         /**
1367          * Constant for the "CJK Radicals Supplement" Unicode character block.
1368          * @since 1.4
1369          */
1370         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1371             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1372                              "CJK RADICALS SUPPLEMENT",
1373                              "CJKRADICALSSUPPLEMENT");
1374 
1375         /**
1376          * Constant for the "Kangxi Radicals" Unicode character block.
1377          * @since 1.4
1378          */
1379         public static final UnicodeBlock KANGXI_RADICALS =
1380             new UnicodeBlock("KANGXI_RADICALS",
1381                              "KANGXI RADICALS",
1382                              "KANGXIRADICALS");
1383 
1384         /**
1385          * Constant for the "Ideographic Description Characters" Unicode character block.
1386          * @since 1.4
1387          */
1388         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1389             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1390                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1391                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1392 
1393         /**
1394          * Constant for the "Bopomofo Extended" Unicode character block.
1395          * @since 1.4
1396          */
1397         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1398             new UnicodeBlock("BOPOMOFO_EXTENDED",
1399                              "BOPOMOFO EXTENDED",
1400                              "BOPOMOFOEXTENDED");
1401 
1402         /**
1403          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1404          * @since 1.4
1405          */
1406         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1407             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1408                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1409                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1410 
1411         /**
1412          * Constant for the "Yi Syllables" Unicode character block.
1413          * @since 1.4
1414          */
1415         public static final UnicodeBlock YI_SYLLABLES =
1416             new UnicodeBlock("YI_SYLLABLES",
1417                              "YI SYLLABLES",
1418                              "YISYLLABLES");
1419 
1420         /**
1421          * Constant for the "Yi Radicals" Unicode character block.
1422          * @since 1.4
1423          */
1424         public static final UnicodeBlock YI_RADICALS =
1425             new UnicodeBlock("YI_RADICALS",
1426                              "YI RADICALS",
1427                              "YIRADICALS");
1428 
1429         /**
1430          * Constant for the "Cyrillic Supplement" Unicode character block.
1431          * This block was previously known as the "Cyrillic Supplementary" block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1435             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1436                              "CYRILLIC SUPPLEMENTARY",
1437                              "CYRILLICSUPPLEMENTARY",
1438                              "CYRILLIC SUPPLEMENT",
1439                              "CYRILLICSUPPLEMENT");
1440 
1441         /**
1442          * Constant for the "Tagalog" Unicode character block.
1443          * @since 1.5
1444          */
1445         public static final UnicodeBlock TAGALOG =
1446             new UnicodeBlock("TAGALOG");
1447 
1448         /**
1449          * Constant for the "Hanunoo" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock HANUNOO =
1453             new UnicodeBlock("HANUNOO");
1454 
1455         /**
1456          * Constant for the "Buhid" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock BUHID =
1460             new UnicodeBlock("BUHID");
1461 
1462         /**
1463          * Constant for the "Tagbanwa" Unicode character block.
1464          * @since 1.5
1465          */
1466         public static final UnicodeBlock TAGBANWA =
1467             new UnicodeBlock("TAGBANWA");
1468 
1469         /**
1470          * Constant for the "Limbu" Unicode character block.
1471          * @since 1.5
1472          */
1473         public static final UnicodeBlock LIMBU =
1474             new UnicodeBlock("LIMBU");
1475 
1476         /**
1477          * Constant for the "Tai Le" Unicode character block.
1478          * @since 1.5
1479          */
1480         public static final UnicodeBlock TAI_LE =
1481             new UnicodeBlock("TAI_LE",
1482                              "TAI LE",
1483                              "TAILE");
1484 
1485         /**
1486          * Constant for the "Khmer Symbols" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock KHMER_SYMBOLS =
1490             new UnicodeBlock("KHMER_SYMBOLS",
1491                              "KHMER SYMBOLS",
1492                              "KHMERSYMBOLS");
1493 
1494         /**
1495          * Constant for the "Phonetic Extensions" Unicode character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1499             new UnicodeBlock("PHONETIC_EXTENSIONS",
1500                              "PHONETIC EXTENSIONS",
1501                              "PHONETICEXTENSIONS");
1502 
1503         /**
1504          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1505          * @since 1.5
1506          */
1507         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1508             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1509                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1510                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1511 
1512         /**
1513          * Constant for the "Supplemental Arrows-A" Unicode character block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1517             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1518                              "SUPPLEMENTAL ARROWS-A",
1519                              "SUPPLEMENTALARROWS-A");
1520 
1521         /**
1522          * Constant for the "Supplemental Arrows-B" Unicode character block.
1523          * @since 1.5
1524          */
1525         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1526             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1527                              "SUPPLEMENTAL ARROWS-B",
1528                              "SUPPLEMENTALARROWS-B");
1529 
1530         /**
1531          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1532          * character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1536             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1537                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1538                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1539 
1540         /**
1541          * Constant for the "Supplemental Mathematical Operators" Unicode
1542          * character block.
1543          * @since 1.5
1544          */
1545         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1546             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1547                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1548                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1549 
1550         /**
1551          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1552          * block.
1553          * @since 1.5
1554          */
1555         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1556             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1557                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1558                              "MISCELLANEOUSSYMBOLSANDARROWS");
1559 
1560         /**
1561          * Constant for the "Katakana Phonetic Extensions" Unicode character
1562          * block.
1563          * @since 1.5
1564          */
1565         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1566             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1567                              "KATAKANA PHONETIC EXTENSIONS",
1568                              "KATAKANAPHONETICEXTENSIONS");
1569 
1570         /**
1571          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1575             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1576                              "YIJING HEXAGRAM SYMBOLS",
1577                              "YIJINGHEXAGRAMSYMBOLS");
1578 
1579         /**
1580          * Constant for the "Variation Selectors" Unicode character block.
1581          * @since 1.5
1582          */
1583         public static final UnicodeBlock VARIATION_SELECTORS =
1584             new UnicodeBlock("VARIATION_SELECTORS",
1585                              "VARIATION SELECTORS",
1586                              "VARIATIONSELECTORS");
1587 
1588         /**
1589          * Constant for the "Linear B Syllabary" Unicode character block.
1590          * @since 1.5
1591          */
1592         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1593             new UnicodeBlock("LINEAR_B_SYLLABARY",
1594                              "LINEAR B SYLLABARY",
1595                              "LINEARBSYLLABARY");
1596 
1597         /**
1598          * Constant for the "Linear B Ideograms" Unicode character block.
1599          * @since 1.5
1600          */
1601         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1602             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1603                              "LINEAR B IDEOGRAMS",
1604                              "LINEARBIDEOGRAMS");
1605 
1606         /**
1607          * Constant for the "Aegean Numbers" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock AEGEAN_NUMBERS =
1611             new UnicodeBlock("AEGEAN_NUMBERS",
1612                              "AEGEAN NUMBERS",
1613                              "AEGEANNUMBERS");
1614 
1615         /**
1616          * Constant for the "Old Italic" Unicode character block.
1617          * @since 1.5
1618          */
1619         public static final UnicodeBlock OLD_ITALIC =
1620             new UnicodeBlock("OLD_ITALIC",
1621                              "OLD ITALIC",
1622                              "OLDITALIC");
1623 
1624         /**
1625          * Constant for the "Gothic" Unicode character block.
1626          * @since 1.5
1627          */
1628         public static final UnicodeBlock GOTHIC =
1629             new UnicodeBlock("GOTHIC");
1630 
1631         /**
1632          * Constant for the "Ugaritic" Unicode character block.
1633          * @since 1.5
1634          */
1635         public static final UnicodeBlock UGARITIC =
1636             new UnicodeBlock("UGARITIC");
1637 
1638         /**
1639          * Constant for the "Deseret" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock DESERET =
1643             new UnicodeBlock("DESERET");
1644 
1645         /**
1646          * Constant for the "Shavian" Unicode character block.
1647          * @since 1.5
1648          */
1649         public static final UnicodeBlock SHAVIAN =
1650             new UnicodeBlock("SHAVIAN");
1651 
1652         /**
1653          * Constant for the "Osmanya" Unicode character block.
1654          * @since 1.5
1655          */
1656         public static final UnicodeBlock OSMANYA =
1657             new UnicodeBlock("OSMANYA");
1658 
1659         /**
1660          * Constant for the "Cypriot Syllabary" Unicode character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1664             new UnicodeBlock("CYPRIOT_SYLLABARY",
1665                              "CYPRIOT SYLLABARY",
1666                              "CYPRIOTSYLLABARY");
1667 
1668         /**
1669          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1670          * @since 1.5
1671          */
1672         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1673             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1674                              "BYZANTINE MUSICAL SYMBOLS",
1675                              "BYZANTINEMUSICALSYMBOLS");
1676 
1677         /**
1678          * Constant for the "Musical Symbols" Unicode character block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock MUSICAL_SYMBOLS =
1682             new UnicodeBlock("MUSICAL_SYMBOLS",
1683                              "MUSICAL SYMBOLS",
1684                              "MUSICALSYMBOLS");
1685 
1686         /**
1687          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1688          * @since 1.5
1689          */
1690         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1691             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1692                              "TAI XUAN JING SYMBOLS",
1693                              "TAIXUANJINGSYMBOLS");
1694 
1695         /**
1696          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1697          * character block.
1698          * @since 1.5
1699          */
1700         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1701             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1702                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1703                              "MATHEMATICALALPHANUMERICSYMBOLS");
1704 
1705         /**
1706          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1707          * character block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1711             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1712                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1713                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1714 
1715         /**
1716          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1720             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1721                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1722                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1723 
1724         /**
1725          * Constant for the "Tags" Unicode character block.
1726          * @since 1.5
1727          */
1728         public static final UnicodeBlock TAGS =
1729             new UnicodeBlock("TAGS");
1730 
1731         /**
1732          * Constant for the "Variation Selectors Supplement" Unicode character
1733          * block.
1734          * @since 1.5
1735          */
1736         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1737             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1738                              "VARIATION SELECTORS SUPPLEMENT",
1739                              "VARIATIONSELECTORSSUPPLEMENT");
1740 
1741         /**
1742          * Constant for the "Supplementary Private Use Area-A" Unicode character
1743          * block.
1744          * @since 1.5
1745          */
1746         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1747             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1748                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1749                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1750 
1751         /**
1752          * Constant for the "Supplementary Private Use Area-B" Unicode character
1753          * block.
1754          * @since 1.5
1755          */
1756         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1757             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1758                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1759                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1760 
1761         /**
1762          * Constant for the "High Surrogates" Unicode character block.
1763          * This block represents codepoint values in the high surrogate
1764          * range: U+D800 through U+DB7F
1765          *
1766          * @since 1.5
1767          */
1768         public static final UnicodeBlock HIGH_SURROGATES =
1769             new UnicodeBlock("HIGH_SURROGATES",
1770                              "HIGH SURROGATES",
1771                              "HIGHSURROGATES");
1772 
1773         /**
1774          * Constant for the "High Private Use Surrogates" Unicode character
1775          * block.
1776          * This block represents codepoint values in the private use high
1777          * surrogate range: U+DB80 through U+DBFF
1778          *
1779          * @since 1.5
1780          */
1781         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1782             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1783                              "HIGH PRIVATE USE SURROGATES",
1784                              "HIGHPRIVATEUSESURROGATES");
1785 
1786         /**
1787          * Constant for the "Low Surrogates" Unicode character block.
1788          * This block represents codepoint values in the low surrogate
1789          * range: U+DC00 through U+DFFF
1790          *
1791          * @since 1.5
1792          */
1793         public static final UnicodeBlock LOW_SURROGATES =
1794             new UnicodeBlock("LOW_SURROGATES",
1795                              "LOW SURROGATES",
1796                              "LOWSURROGATES");
1797 
1798         /**
1799          * Constant for the "Arabic Supplement" Unicode character block.
1800          * @since 1.7
1801          */
1802         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1803             new UnicodeBlock("ARABIC_SUPPLEMENT",
1804                              "ARABIC SUPPLEMENT",
1805                              "ARABICSUPPLEMENT");
1806 
1807         /**
1808          * Constant for the "NKo" Unicode character block.
1809          * @since 1.7
1810          */
1811         public static final UnicodeBlock NKO =
1812             new UnicodeBlock("NKO");
1813 
1814         /**
1815          * Constant for the "Samaritan" Unicode character block.
1816          * @since 1.7
1817          */
1818         public static final UnicodeBlock SAMARITAN =
1819             new UnicodeBlock("SAMARITAN");
1820 
1821         /**
1822          * Constant for the "Mandaic" Unicode character block.
1823          * @since 1.7
1824          */
1825         public static final UnicodeBlock MANDAIC =
1826             new UnicodeBlock("MANDAIC");
1827 
1828         /**
1829          * Constant for the "Ethiopic Supplement" Unicode character block.
1830          * @since 1.7
1831          */
1832         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1833             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1834                              "ETHIOPIC SUPPLEMENT",
1835                              "ETHIOPICSUPPLEMENT");
1836 
1837         /**
1838          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1839          * Unicode character block.
1840          * @since 1.7
1841          */
1842         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1843             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1844                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1845                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1846 
1847         /**
1848          * Constant for the "New Tai Lue" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock NEW_TAI_LUE =
1852             new UnicodeBlock("NEW_TAI_LUE",
1853                              "NEW TAI LUE",
1854                              "NEWTAILUE");
1855 
1856         /**
1857          * Constant for the "Buginese" Unicode character block.
1858          * @since 1.7
1859          */
1860         public static final UnicodeBlock BUGINESE =
1861             new UnicodeBlock("BUGINESE");
1862 
1863         /**
1864          * Constant for the "Tai Tham" Unicode character block.
1865          * @since 1.7
1866          */
1867         public static final UnicodeBlock TAI_THAM =
1868             new UnicodeBlock("TAI_THAM",
1869                              "TAI THAM",
1870                              "TAITHAM");
1871 
1872         /**
1873          * Constant for the "Balinese" Unicode character block.
1874          * @since 1.7
1875          */
1876         public static final UnicodeBlock BALINESE =
1877             new UnicodeBlock("BALINESE");
1878 
1879         /**
1880          * Constant for the "Sundanese" Unicode character block.
1881          * @since 1.7
1882          */
1883         public static final UnicodeBlock SUNDANESE =
1884             new UnicodeBlock("SUNDANESE");
1885 
1886         /**
1887          * Constant for the "Batak" Unicode character block.
1888          * @since 1.7
1889          */
1890         public static final UnicodeBlock BATAK =
1891             new UnicodeBlock("BATAK");
1892 
1893         /**
1894          * Constant for the "Lepcha" Unicode character block.
1895          * @since 1.7
1896          */
1897         public static final UnicodeBlock LEPCHA =
1898             new UnicodeBlock("LEPCHA");
1899 
1900         /**
1901          * Constant for the "Ol Chiki" Unicode character block.
1902          * @since 1.7
1903          */
1904         public static final UnicodeBlock OL_CHIKI =
1905             new UnicodeBlock("OL_CHIKI",
1906                              "OL CHIKI",
1907                              "OLCHIKI");
1908 
1909         /**
1910          * Constant for the "Vedic Extensions" Unicode character block.
1911          * @since 1.7
1912          */
1913         public static final UnicodeBlock VEDIC_EXTENSIONS =
1914             new UnicodeBlock("VEDIC_EXTENSIONS",
1915                              "VEDIC EXTENSIONS",
1916                              "VEDICEXTENSIONS");
1917 
1918         /**
1919          * Constant for the "Phonetic Extensions Supplement" Unicode character
1920          * block.
1921          * @since 1.7
1922          */
1923         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1924             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1925                              "PHONETIC EXTENSIONS SUPPLEMENT",
1926                              "PHONETICEXTENSIONSSUPPLEMENT");
1927 
1928         /**
1929          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1930          * character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1934             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1935                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1936                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1937 
1938         /**
1939          * Constant for the "Glagolitic" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock GLAGOLITIC =
1943             new UnicodeBlock("GLAGOLITIC");
1944 
1945         /**
1946          * Constant for the "Latin Extended-C" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock LATIN_EXTENDED_C =
1950             new UnicodeBlock("LATIN_EXTENDED_C",
1951                              "LATIN EXTENDED-C",
1952                              "LATINEXTENDED-C");
1953 
1954         /**
1955          * Constant for the "Coptic" Unicode character block.
1956          * @since 1.7
1957          */
1958         public static final UnicodeBlock COPTIC =
1959             new UnicodeBlock("COPTIC");
1960 
1961         /**
1962          * Constant for the "Georgian Supplement" Unicode character block.
1963          * @since 1.7
1964          */
1965         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1966             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1967                              "GEORGIAN SUPPLEMENT",
1968                              "GEORGIANSUPPLEMENT");
1969 
1970         /**
1971          * Constant for the "Tifinagh" Unicode character block.
1972          * @since 1.7
1973          */
1974         public static final UnicodeBlock TIFINAGH =
1975             new UnicodeBlock("TIFINAGH");
1976 
1977         /**
1978          * Constant for the "Ethiopic Extended" Unicode character block.
1979          * @since 1.7
1980          */
1981         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1982             new UnicodeBlock("ETHIOPIC_EXTENDED",
1983                              "ETHIOPIC EXTENDED",
1984                              "ETHIOPICEXTENDED");
1985 
1986         /**
1987          * Constant for the "Cyrillic Extended-A" Unicode character block.
1988          * @since 1.7
1989          */
1990         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1991             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1992                              "CYRILLIC EXTENDED-A",
1993                              "CYRILLICEXTENDED-A");
1994 
1995         /**
1996          * Constant for the "Supplemental Punctuation" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2000             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2001                              "SUPPLEMENTAL PUNCTUATION",
2002                              "SUPPLEMENTALPUNCTUATION");
2003 
2004         /**
2005          * Constant for the "CJK Strokes" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock CJK_STROKES =
2009             new UnicodeBlock("CJK_STROKES",
2010                              "CJK STROKES",
2011                              "CJKSTROKES");
2012 
2013         /**
2014          * Constant for the "Lisu" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LISU =
2018             new UnicodeBlock("LISU");
2019 
2020         /**
2021          * Constant for the "Vai" Unicode character block.
2022          * @since 1.7
2023          */
2024         public static final UnicodeBlock VAI =
2025             new UnicodeBlock("VAI");
2026 
2027         /**
2028          * Constant for the "Cyrillic Extended-B" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2032             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2033                              "CYRILLIC EXTENDED-B",
2034                              "CYRILLICEXTENDED-B");
2035 
2036         /**
2037          * Constant for the "Bamum" Unicode character block.
2038          * @since 1.7
2039          */
2040         public static final UnicodeBlock BAMUM =
2041             new UnicodeBlock("BAMUM");
2042 
2043         /**
2044          * Constant for the "Modifier Tone Letters" Unicode character block.
2045          * @since 1.7
2046          */
2047         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2048             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2049                              "MODIFIER TONE LETTERS",
2050                              "MODIFIERTONELETTERS");
2051 
2052         /**
2053          * Constant for the "Latin Extended-D" Unicode character block.
2054          * @since 1.7
2055          */
2056         public static final UnicodeBlock LATIN_EXTENDED_D =
2057             new UnicodeBlock("LATIN_EXTENDED_D",
2058                              "LATIN EXTENDED-D",
2059                              "LATINEXTENDED-D");
2060 
2061         /**
2062          * Constant for the "Syloti Nagri" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock SYLOTI_NAGRI =
2066             new UnicodeBlock("SYLOTI_NAGRI",
2067                              "SYLOTI NAGRI",
2068                              "SYLOTINAGRI");
2069 
2070         /**
2071          * Constant for the "Common Indic Number Forms" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2075             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2076                              "COMMON INDIC NUMBER FORMS",
2077                              "COMMONINDICNUMBERFORMS");
2078 
2079         /**
2080          * Constant for the "Phags-pa" Unicode character block.
2081          * @since 1.7
2082          */
2083         public static final UnicodeBlock PHAGS_PA =
2084             new UnicodeBlock("PHAGS_PA",
2085                              "PHAGS-PA");
2086 
2087         /**
2088          * Constant for the "Saurashtra" Unicode character block.
2089          * @since 1.7
2090          */
2091         public static final UnicodeBlock SAURASHTRA =
2092             new UnicodeBlock("SAURASHTRA");
2093 
2094         /**
2095          * Constant for the "Devanagari Extended" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2099             new UnicodeBlock("DEVANAGARI_EXTENDED",
2100                              "DEVANAGARI EXTENDED",
2101                              "DEVANAGARIEXTENDED");
2102 
2103         /**
2104          * Constant for the "Kayah Li" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock KAYAH_LI =
2108             new UnicodeBlock("KAYAH_LI",
2109                              "KAYAH LI",
2110                              "KAYAHLI");
2111 
2112         /**
2113          * Constant for the "Rejang" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock REJANG =
2117             new UnicodeBlock("REJANG");
2118 
2119         /**
2120          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2121          * @since 1.7
2122          */
2123         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2124             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2125                              "HANGUL JAMO EXTENDED-A",
2126                              "HANGULJAMOEXTENDED-A");
2127 
2128         /**
2129          * Constant for the "Javanese" Unicode character block.
2130          * @since 1.7
2131          */
2132         public static final UnicodeBlock JAVANESE =
2133             new UnicodeBlock("JAVANESE");
2134 
2135         /**
2136          * Constant for the "Cham" Unicode character block.
2137          * @since 1.7
2138          */
2139         public static final UnicodeBlock CHAM =
2140             new UnicodeBlock("CHAM");
2141 
2142         /**
2143          * Constant for the "Myanmar Extended-A" Unicode character block.
2144          * @since 1.7
2145          */
2146         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2147             new UnicodeBlock("MYANMAR_EXTENDED_A",
2148                              "MYANMAR EXTENDED-A",
2149                              "MYANMAREXTENDED-A");
2150 
2151         /**
2152          * Constant for the "Tai Viet" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock TAI_VIET =
2156             new UnicodeBlock("TAI_VIET",
2157                              "TAI VIET",
2158                              "TAIVIET");
2159 
2160         /**
2161          * Constant for the "Ethiopic Extended-A" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2165             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2166                              "ETHIOPIC EXTENDED-A",
2167                              "ETHIOPICEXTENDED-A");
2168 
2169         /**
2170          * Constant for the "Meetei Mayek" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock MEETEI_MAYEK =
2174             new UnicodeBlock("MEETEI_MAYEK",
2175                              "MEETEI MAYEK",
2176                              "MEETEIMAYEK");
2177 
2178         /**
2179          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2183             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2184                              "HANGUL JAMO EXTENDED-B",
2185                              "HANGULJAMOEXTENDED-B");
2186 
2187         /**
2188          * Constant for the "Vertical Forms" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock VERTICAL_FORMS =
2192             new UnicodeBlock("VERTICAL_FORMS",
2193                              "VERTICAL FORMS",
2194                              "VERTICALFORMS");
2195 
2196         /**
2197          * Constant for the "Ancient Greek Numbers" Unicode character block.
2198          * @since 1.7
2199          */
2200         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2201             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2202                              "ANCIENT GREEK NUMBERS",
2203                              "ANCIENTGREEKNUMBERS");
2204 
2205         /**
2206          * Constant for the "Ancient Symbols" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock ANCIENT_SYMBOLS =
2210             new UnicodeBlock("ANCIENT_SYMBOLS",
2211                              "ANCIENT SYMBOLS",
2212                              "ANCIENTSYMBOLS");
2213 
2214         /**
2215          * Constant for the "Phaistos Disc" Unicode character block.
2216          * @since 1.7
2217          */
2218         public static final UnicodeBlock PHAISTOS_DISC =
2219             new UnicodeBlock("PHAISTOS_DISC",
2220                              "PHAISTOS DISC",
2221                              "PHAISTOSDISC");
2222 
2223         /**
2224          * Constant for the "Lycian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYCIAN =
2228             new UnicodeBlock("LYCIAN");
2229 
2230         /**
2231          * Constant for the "Carian" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock CARIAN =
2235             new UnicodeBlock("CARIAN");
2236 
2237         /**
2238          * Constant for the "Old Persian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_PERSIAN =
2242             new UnicodeBlock("OLD_PERSIAN",
2243                              "OLD PERSIAN",
2244                              "OLDPERSIAN");
2245 
2246         /**
2247          * Constant for the "Imperial Aramaic" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2251             new UnicodeBlock("IMPERIAL_ARAMAIC",
2252                              "IMPERIAL ARAMAIC",
2253                              "IMPERIALARAMAIC");
2254 
2255         /**
2256          * Constant for the "Phoenician" Unicode character block.
2257          * @since 1.7
2258          */
2259         public static final UnicodeBlock PHOENICIAN =
2260             new UnicodeBlock("PHOENICIAN");
2261 
2262         /**
2263          * Constant for the "Lydian" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock LYDIAN =
2267             new UnicodeBlock("LYDIAN");
2268 
2269         /**
2270          * Constant for the "Kharoshthi" Unicode character block.
2271          * @since 1.7
2272          */
2273         public static final UnicodeBlock KHAROSHTHI =
2274             new UnicodeBlock("KHAROSHTHI");
2275 
2276         /**
2277          * Constant for the "Old South Arabian" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2281             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2282                              "OLD SOUTH ARABIAN",
2283                              "OLDSOUTHARABIAN");
2284 
2285         /**
2286          * Constant for the "Avestan" Unicode character block.
2287          * @since 1.7
2288          */
2289         public static final UnicodeBlock AVESTAN =
2290             new UnicodeBlock("AVESTAN");
2291 
2292         /**
2293          * Constant for the "Inscriptional Parthian" Unicode character block.
2294          * @since 1.7
2295          */
2296         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2297             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2298                              "INSCRIPTIONAL PARTHIAN",
2299                              "INSCRIPTIONALPARTHIAN");
2300 
2301         /**
2302          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2303          * @since 1.7
2304          */
2305         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2306             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2307                              "INSCRIPTIONAL PAHLAVI",
2308                              "INSCRIPTIONALPAHLAVI");
2309 
2310         /**
2311          * Constant for the "Old Turkic" Unicode character block.
2312          * @since 1.7
2313          */
2314         public static final UnicodeBlock OLD_TURKIC =
2315             new UnicodeBlock("OLD_TURKIC",
2316                              "OLD TURKIC",
2317                              "OLDTURKIC");
2318 
2319         /**
2320          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2321          * @since 1.7
2322          */
2323         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2324             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2325                              "RUMI NUMERAL SYMBOLS",
2326                              "RUMINUMERALSYMBOLS");
2327 
2328         /**
2329          * Constant for the "Brahmi" Unicode character block.
2330          * @since 1.7
2331          */
2332         public static final UnicodeBlock BRAHMI =
2333             new UnicodeBlock("BRAHMI");
2334 
2335         /**
2336          * Constant for the "Kaithi" Unicode character block.
2337          * @since 1.7
2338          */
2339         public static final UnicodeBlock KAITHI =
2340             new UnicodeBlock("KAITHI");
2341 
2342         /**
2343          * Constant for the "Cuneiform" Unicode character block.
2344          * @since 1.7
2345          */
2346         public static final UnicodeBlock CUNEIFORM =
2347             new UnicodeBlock("CUNEIFORM");
2348 
2349         /**
2350          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2351          * character block.
2352          * @since 1.7
2353          */
2354         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2355             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2356                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2357                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2358 
2359         /**
2360          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2361          * @since 1.7
2362          */
2363         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2364             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2365                              "EGYPTIAN HIEROGLYPHS",
2366                              "EGYPTIANHIEROGLYPHS");
2367 
2368         /**
2369          * Constant for the "Bamum Supplement" Unicode character block.
2370          * @since 1.7
2371          */
2372         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2373             new UnicodeBlock("BAMUM_SUPPLEMENT",
2374                              "BAMUM SUPPLEMENT",
2375                              "BAMUMSUPPLEMENT");
2376 
2377         /**
2378          * Constant for the "Kana Supplement" Unicode character block.
2379          * @since 1.7
2380          */
2381         public static final UnicodeBlock KANA_SUPPLEMENT =
2382             new UnicodeBlock("KANA_SUPPLEMENT",
2383                              "KANA SUPPLEMENT",
2384                              "KANASUPPLEMENT");
2385 
2386         /**
2387          * Constant for the "Ancient Greek Musical Notation" Unicode character
2388          * block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2392             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2393                              "ANCIENT GREEK MUSICAL NOTATION",
2394                              "ANCIENTGREEKMUSICALNOTATION");
2395 
2396         /**
2397          * Constant for the "Counting Rod Numerals" Unicode character block.
2398          * @since 1.7
2399          */
2400         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2401             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2402                              "COUNTING ROD NUMERALS",
2403                              "COUNTINGRODNUMERALS");
2404 
2405         /**
2406          * Constant for the "Mahjong Tiles" Unicode character block.
2407          * @since 1.7
2408          */
2409         public static final UnicodeBlock MAHJONG_TILES =
2410             new UnicodeBlock("MAHJONG_TILES",
2411                              "MAHJONG TILES",
2412                              "MAHJONGTILES");
2413 
2414         /**
2415          * Constant for the "Domino Tiles" Unicode character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock DOMINO_TILES =
2419             new UnicodeBlock("DOMINO_TILES",
2420                              "DOMINO TILES",
2421                              "DOMINOTILES");
2422 
2423         /**
2424          * Constant for the "Playing Cards" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock PLAYING_CARDS =
2428             new UnicodeBlock("PLAYING_CARDS",
2429                              "PLAYING CARDS",
2430                              "PLAYINGCARDS");
2431 
2432         /**
2433          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2434          * block.
2435          * @since 1.7
2436          */
2437         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2438             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2439                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2440                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2441 
2442         /**
2443          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2444          * block.
2445          * @since 1.7
2446          */
2447         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2448             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2449                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2450                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2451 
2452         /**
2453          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2454          * character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2458             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2459                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2460                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2461 
2462         /**
2463          * Constant for the "Emoticons" Unicode character block.
2464          * @since 1.7
2465          */
2466         public static final UnicodeBlock EMOTICONS =
2467             new UnicodeBlock("EMOTICONS");
2468 
2469         /**
2470          * Constant for the "Transport And Map Symbols" Unicode character block.
2471          * @since 1.7
2472          */
2473         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2474             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2475                              "TRANSPORT AND MAP SYMBOLS",
2476                              "TRANSPORTANDMAPSYMBOLS");
2477 
2478         /**
2479          * Constant for the "Alchemical Symbols" Unicode character block.
2480          * @since 1.7
2481          */
2482         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2483             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2484                              "ALCHEMICAL SYMBOLS",
2485                              "ALCHEMICALSYMBOLS");
2486 
2487         /**
2488          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2489          * character block.
2490          * @since 1.7
2491          */
2492         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2493             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2494                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2495                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2496 
2497         /**
2498          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2499          * character block.
2500          * @since 1.7
2501          */
2502         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2503             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2504                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2505                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2506 
2507         /**
2508          * Constant for the "Arabic Extended-A" Unicode character block.
2509          * @since 1.8
2510          */
2511         public static final UnicodeBlock ARABIC_EXTENDED_A =
2512             new UnicodeBlock("ARABIC_EXTENDED_A",
2513                              "ARABIC EXTENDED-A",
2514                              "ARABICEXTENDED-A");
2515 
2516         /**
2517          * Constant for the "Sundanese Supplement" Unicode character block.
2518          * @since 1.8
2519          */
2520         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2521             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2522                              "SUNDANESE SUPPLEMENT",
2523                              "SUNDANESESUPPLEMENT");
2524 
2525         /**
2526          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2527          * @since 1.8
2528          */
2529         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2530             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2531                              "MEETEI MAYEK EXTENSIONS",
2532                              "MEETEIMAYEKEXTENSIONS");
2533 
2534         /**
2535          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2536          * @since 1.8
2537          */
2538         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2539             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2540                              "MEROITIC HIEROGLYPHS",
2541                              "MEROITICHIEROGLYPHS");
2542 
2543         /**
2544          * Constant for the "Meroitic Cursive" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock MEROITIC_CURSIVE =
2548             new UnicodeBlock("MEROITIC_CURSIVE",
2549                              "MEROITIC CURSIVE",
2550                              "MEROITICCURSIVE");
2551 
2552         /**
2553          * Constant for the "Sora Sompeng" Unicode character block.
2554          * @since 1.8
2555          */
2556         public static final UnicodeBlock SORA_SOMPENG =
2557             new UnicodeBlock("SORA_SOMPENG",
2558                              "SORA SOMPENG",
2559                              "SORASOMPENG");
2560 
2561         /**
2562          * Constant for the "Chakma" Unicode character block.
2563          * @since 1.8
2564          */
2565         public static final UnicodeBlock CHAKMA =
2566             new UnicodeBlock("CHAKMA");
2567 
2568         /**
2569          * Constant for the "Sharada" Unicode character block.
2570          * @since 1.8
2571          */
2572         public static final UnicodeBlock SHARADA =
2573             new UnicodeBlock("SHARADA");
2574 
2575         /**
2576          * Constant for the "Takri" Unicode character block.
2577          * @since 1.8
2578          */
2579         public static final UnicodeBlock TAKRI =
2580             new UnicodeBlock("TAKRI");
2581 
2582         /**
2583          * Constant for the "Miao" Unicode character block.
2584          * @since 1.8
2585          */
2586         public static final UnicodeBlock MIAO =
2587             new UnicodeBlock("MIAO");
2588 
2589         /**
2590          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2591          * character block.
2592          * @since 1.8
2593          */
2594         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2595             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2596                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2597                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2598 
2599         /**
2600          * Constant for the "Combining Diacritical Marks Extended" Unicode
2601          * character block.
2602          * @since 9
2603          */
2604         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2605             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2606                              "COMBINING DIACRITICAL MARKS EXTENDED",
2607                              "COMBININGDIACRITICALMARKSEXTENDED");
2608 
2609         /**
2610          * Constant for the "Myanmar Extended-B" Unicode character block.
2611          * @since 9
2612          */
2613         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2614             new UnicodeBlock("MYANMAR_EXTENDED_B",
2615                              "MYANMAR EXTENDED-B",
2616                              "MYANMAREXTENDED-B");
2617 
2618         /**
2619          * Constant for the "Latin Extended-E" Unicode character block.
2620          * @since 9
2621          */
2622         public static final UnicodeBlock LATIN_EXTENDED_E =
2623             new UnicodeBlock("LATIN_EXTENDED_E",
2624                              "LATIN EXTENDED-E",
2625                              "LATINEXTENDED-E");
2626 
2627         /**
2628          * Constant for the "Coptic Epact Numbers" Unicode character block.
2629          * @since 9
2630          */
2631         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2632             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2633                              "COPTIC EPACT NUMBERS",
2634                              "COPTICEPACTNUMBERS");
2635 
2636         /**
2637          * Constant for the "Old Permic" Unicode character block.
2638          * @since 9
2639          */
2640         public static final UnicodeBlock OLD_PERMIC =
2641             new UnicodeBlock("OLD_PERMIC",
2642                              "OLD PERMIC",
2643                              "OLDPERMIC");
2644 
2645         /**
2646          * Constant for the "Elbasan" Unicode character block.
2647          * @since 9
2648          */
2649         public static final UnicodeBlock ELBASAN =
2650             new UnicodeBlock("ELBASAN");
2651 
2652         /**
2653          * Constant for the "Caucasian Albanian" Unicode character block.
2654          * @since 9
2655          */
2656         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2657             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2658                              "CAUCASIAN ALBANIAN",
2659                              "CAUCASIANALBANIAN");
2660 
2661         /**
2662          * Constant for the "Linear A" Unicode character block.
2663          * @since 9
2664          */
2665         public static final UnicodeBlock LINEAR_A =
2666             new UnicodeBlock("LINEAR_A",
2667                              "LINEAR A",
2668                              "LINEARA");
2669 
2670         /**
2671          * Constant for the "Palmyrene" Unicode character block.
2672          * @since 9
2673          */
2674         public static final UnicodeBlock PALMYRENE =
2675             new UnicodeBlock("PALMYRENE");
2676 
2677         /**
2678          * Constant for the "Nabataean" Unicode character block.
2679          * @since 9
2680          */
2681         public static final UnicodeBlock NABATAEAN =
2682             new UnicodeBlock("NABATAEAN");
2683 
2684         /**
2685          * Constant for the "Old North Arabian" Unicode character block.
2686          * @since 9
2687          */
2688         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2689             new UnicodeBlock("OLD_NORTH_ARABIAN",
2690                              "OLD NORTH ARABIAN",
2691                              "OLDNORTHARABIAN");
2692 
2693         /**
2694          * Constant for the "Manichaean" Unicode character block.
2695          * @since 9
2696          */
2697         public static final UnicodeBlock MANICHAEAN =
2698             new UnicodeBlock("MANICHAEAN");
2699 
2700         /**
2701          * Constant for the "Psalter Pahlavi" Unicode character block.
2702          * @since 9
2703          */
2704         public static final UnicodeBlock PSALTER_PAHLAVI =
2705             new UnicodeBlock("PSALTER_PAHLAVI",
2706                              "PSALTER PAHLAVI",
2707                              "PSALTERPAHLAVI");
2708 
2709         /**
2710          * Constant for the "Mahajani" Unicode character block.
2711          * @since 9
2712          */
2713         public static final UnicodeBlock MAHAJANI =
2714             new UnicodeBlock("MAHAJANI");
2715 
2716         /**
2717          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2718          * @since 9
2719          */
2720         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2721             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2722                              "SINHALA ARCHAIC NUMBERS",
2723                              "SINHALAARCHAICNUMBERS");
2724 
2725         /**
2726          * Constant for the "Khojki" Unicode character block.
2727          * @since 9
2728          */
2729         public static final UnicodeBlock KHOJKI =
2730             new UnicodeBlock("KHOJKI");
2731 
2732         /**
2733          * Constant for the "Khudawadi" Unicode character block.
2734          * @since 9
2735          */
2736         public static final UnicodeBlock KHUDAWADI =
2737             new UnicodeBlock("KHUDAWADI");
2738 
2739         /**
2740          * Constant for the "Grantha" Unicode character block.
2741          * @since 9
2742          */
2743         public static final UnicodeBlock GRANTHA =
2744             new UnicodeBlock("GRANTHA");
2745 
2746         /**
2747          * Constant for the "Tirhuta" Unicode character block.
2748          * @since 9
2749          */
2750         public static final UnicodeBlock TIRHUTA =
2751             new UnicodeBlock("TIRHUTA");
2752 
2753         /**
2754          * Constant for the "Siddham" Unicode character block.
2755          * @since 9
2756          */
2757         public static final UnicodeBlock SIDDHAM =
2758             new UnicodeBlock("SIDDHAM");
2759 
2760         /**
2761          * Constant for the "Modi" Unicode character block.
2762          * @since 9
2763          */
2764         public static final UnicodeBlock MODI =
2765             new UnicodeBlock("MODI");
2766 
2767         /**
2768          * Constant for the "Warang Citi" Unicode character block.
2769          * @since 9
2770          */
2771         public static final UnicodeBlock WARANG_CITI =
2772             new UnicodeBlock("WARANG_CITI",
2773                              "WARANG CITI",
2774                              "WARANGCITI");
2775 
2776         /**
2777          * Constant for the "Pau Cin Hau" Unicode character block.
2778          * @since 9
2779          */
2780         public static final UnicodeBlock PAU_CIN_HAU =
2781             new UnicodeBlock("PAU_CIN_HAU",
2782                              "PAU CIN HAU",
2783                              "PAUCINHAU");
2784 
2785         /**
2786          * Constant for the "Mro" Unicode character block.
2787          * @since 9
2788          */
2789         public static final UnicodeBlock MRO =
2790             new UnicodeBlock("MRO");
2791 
2792         /**
2793          * Constant for the "Bassa Vah" Unicode character block.
2794          * @since 9
2795          */
2796         public static final UnicodeBlock BASSA_VAH =
2797             new UnicodeBlock("BASSA_VAH",
2798                              "BASSA VAH",
2799                              "BASSAVAH");
2800 
2801         /**
2802          * Constant for the "Pahawh Hmong" Unicode character block.
2803          * @since 9
2804          */
2805         public static final UnicodeBlock PAHAWH_HMONG =
2806             new UnicodeBlock("PAHAWH_HMONG",
2807                              "PAHAWH HMONG",
2808                              "PAHAWHHMONG");
2809 
2810         /**
2811          * Constant for the "Duployan" Unicode character block.
2812          * @since 9
2813          */
2814         public static final UnicodeBlock DUPLOYAN =
2815             new UnicodeBlock("DUPLOYAN");
2816 
2817         /**
2818          * Constant for the "Shorthand Format Controls" Unicode character block.
2819          * @since 9
2820          */
2821         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2822             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2823                              "SHORTHAND FORMAT CONTROLS",
2824                              "SHORTHANDFORMATCONTROLS");
2825 
2826         /**
2827          * Constant for the "Mende Kikakui" Unicode character block.
2828          * @since 9
2829          */
2830         public static final UnicodeBlock MENDE_KIKAKUI =
2831             new UnicodeBlock("MENDE_KIKAKUI",
2832                              "MENDE KIKAKUI",
2833                              "MENDEKIKAKUI");
2834 
2835         /**
2836          * Constant for the "Ornamental Dingbats" Unicode character block.
2837          * @since 9
2838          */
2839         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2840             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2841                              "ORNAMENTAL DINGBATS",
2842                              "ORNAMENTALDINGBATS");
2843 
2844         /**
2845          * Constant for the "Geometric Shapes Extended" Unicode character block.
2846          * @since 9
2847          */
2848         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2849             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2850                              "GEOMETRIC SHAPES EXTENDED",
2851                              "GEOMETRICSHAPESEXTENDED");
2852 
2853         /**
2854          * Constant for the "Supplemental Arrows-C" Unicode character block.
2855          * @since 9
2856          */
2857         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2858             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2859                              "SUPPLEMENTAL ARROWS-C",
2860                              "SUPPLEMENTALARROWS-C");
2861 
2862         /**
2863          * Constant for the "Cherokee Supplement" Unicode character block.
2864          * @since 9
2865          */
2866         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2867             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2868                              "CHEROKEE SUPPLEMENT",
2869                              "CHEROKEESUPPLEMENT");
2870 
2871         /**
2872          * Constant for the "Hatran" Unicode character block.
2873          * @since 9
2874          */
2875         public static final UnicodeBlock HATRAN =
2876             new UnicodeBlock("HATRAN");
2877 
2878         /**
2879          * Constant for the "Old Hungarian" Unicode character block.
2880          * @since 9
2881          */
2882         public static final UnicodeBlock OLD_HUNGARIAN =
2883             new UnicodeBlock("OLD_HUNGARIAN",
2884                              "OLD HUNGARIAN",
2885                              "OLDHUNGARIAN");
2886 
2887         /**
2888          * Constant for the "Multani" Unicode character block.
2889          * @since 9
2890          */
2891         public static final UnicodeBlock MULTANI =
2892             new UnicodeBlock("MULTANI");
2893 
2894         /**
2895          * Constant for the "Ahom" Unicode character block.
2896          * @since 9
2897          */
2898         public static final UnicodeBlock AHOM =
2899             new UnicodeBlock("AHOM");
2900 
2901         /**
2902          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2903          * @since 9
2904          */
2905         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2906             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2907                              "EARLY DYNASTIC CUNEIFORM",
2908                              "EARLYDYNASTICCUNEIFORM");
2909 
2910         /**
2911          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2912          * @since 9
2913          */
2914         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2915             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2916                              "ANATOLIAN HIEROGLYPHS",
2917                              "ANATOLIANHIEROGLYPHS");
2918 
2919         /**
2920          * Constant for the "Sutton SignWriting" Unicode character block.
2921          * @since 9
2922          */
2923         public static final UnicodeBlock SUTTON_SIGNWRITING =
2924             new UnicodeBlock("SUTTON_SIGNWRITING",
2925                              "SUTTON SIGNWRITING",
2926                              "SUTTONSIGNWRITING");
2927 
2928         /**
2929          * Constant for the "Supplemental Symbols and Pictographs" Unicode
2930          * character block.
2931          * @since 9
2932          */
2933         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2934             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2935                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2936                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2937 
2938         /**
2939          * Constant for the "CJK Unified Ideographs Extension E" Unicode
2940          * character block.
2941          * @since 9
2942          */
2943         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2944             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2945                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2946                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2947 
2948         /**
2949          * Constant for the "Syriac Supplement" Unicode
2950          * character block.
2951          * @since 11
2952          */
2953         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2954             new UnicodeBlock("SYRIAC_SUPPLEMENT",
2955                              "SYRIAC SUPPLEMENT",
2956                              "SYRIACSUPPLEMENT");
2957 
2958         /**
2959          * Constant for the "Cyrillic Extended-C" Unicode
2960          * character block.
2961          * @since 11
2962          */
2963         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2964             new UnicodeBlock("CYRILLIC_EXTENDED_C",
2965                              "CYRILLIC EXTENDED-C",
2966                              "CYRILLICEXTENDED-C");
2967 
2968         /**
2969          * Constant for the "Osage" Unicode
2970          * character block.
2971          * @since 11
2972          */
2973         public static final UnicodeBlock OSAGE =
2974             new UnicodeBlock("OSAGE");
2975 
2976         /**
2977          * Constant for the "Newa" Unicode
2978          * character block.
2979          * @since 11
2980          */
2981         public static final UnicodeBlock NEWA =
2982             new UnicodeBlock("NEWA");
2983 
2984         /**
2985          * Constant for the "Mongolian Supplement" Unicode
2986          * character block.
2987          * @since 11
2988          */
2989         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2990             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2991                              "MONGOLIAN SUPPLEMENT",
2992                              "MONGOLIANSUPPLEMENT");
2993 
2994         /**
2995          * Constant for the "Marchen" Unicode
2996          * character block.
2997          * @since 11
2998          */
2999         public static final UnicodeBlock MARCHEN =
3000             new UnicodeBlock("MARCHEN");
3001 
3002         /**
3003          * Constant for the "Ideographic Symbols and Punctuation" Unicode
3004          * character block.
3005          * @since 11
3006          */
3007         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3008             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3009                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3010                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3011 
3012         /**
3013          * Constant for the "Tangut" Unicode
3014          * character block.
3015          * @since 11
3016          */
3017         public static final UnicodeBlock TANGUT =
3018             new UnicodeBlock("TANGUT");
3019 
3020         /**
3021          * Constant for the "Tangut Components" Unicode
3022          * character block.
3023          * @since 11
3024          */
3025         public static final UnicodeBlock TANGUT_COMPONENTS =
3026             new UnicodeBlock("TANGUT_COMPONENTS",
3027                              "TANGUT COMPONENTS",
3028                              "TANGUTCOMPONENTS");
3029 
3030         /**
3031          * Constant for the "Kana Extended-A" Unicode
3032          * character block.
3033          * @since 11
3034          */
3035         public static final UnicodeBlock KANA_EXTENDED_A =
3036             new UnicodeBlock("KANA_EXTENDED_A",
3037                              "KANA EXTENDED-A",
3038                              "KANAEXTENDED-A");
3039         /**
3040          * Constant for the "Glagolitic Supplement" Unicode
3041          * character block.
3042          * @since 11
3043          */
3044         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3045             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3046                              "GLAGOLITIC SUPPLEMENT",
3047                              "GLAGOLITICSUPPLEMENT");
3048         /**
3049          * Constant for the "Adlam" Unicode
3050          * character block.
3051          * @since 11
3052          */
3053         public static final UnicodeBlock ADLAM =
3054             new UnicodeBlock("ADLAM");
3055 
3056         /**
3057          * Constant for the "Masaram Gondi" Unicode
3058          * character block.
3059          * @since 11
3060          */
3061         public static final UnicodeBlock MASARAM_GONDI =
3062             new UnicodeBlock("MASARAM_GONDI",
3063                              "MASARAM GONDI",
3064                              "MASARAMGONDI");
3065 
3066         /**
3067          * Constant for the "Zanabazar Square" Unicode
3068          * character block.
3069          * @since 11
3070          */
3071         public static final UnicodeBlock ZANABAZAR_SQUARE =
3072             new UnicodeBlock("ZANABAZAR_SQUARE",
3073                              "ZANABAZAR SQUARE",
3074                              "ZANABAZARSQUARE");
3075 
3076         /**
3077          * Constant for the "Nushu" Unicode
3078          * character block.
3079          * @since 11
3080          */
3081         public static final UnicodeBlock NUSHU =
3082             new UnicodeBlock("NUSHU");
3083 
3084         /**
3085          * Constant for the "Soyombo" Unicode
3086          * character block.
3087          * @since 11
3088          */
3089         public static final UnicodeBlock SOYOMBO =
3090             new UnicodeBlock("SOYOMBO");
3091 
3092         /**
3093          * Constant for the "Bhaiksuki" Unicode
3094          * character block.
3095          * @since 11
3096          */
3097         public static final UnicodeBlock BHAIKSUKI =
3098             new UnicodeBlock("BHAIKSUKI");
3099 
3100         /**
3101          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3102          * character block.
3103          * @since 11
3104          */
3105         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3106             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3107                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3108                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3109         /**
3110          * Constant for the "Georgian Extended" Unicode
3111          * character block.
3112          * @since 12
3113          */
3114         public static final UnicodeBlock GEORGIAN_EXTENDED =
3115             new UnicodeBlock("GEORGIAN_EXTENDED",
3116                              "GEORGIAN EXTENDED",
3117                              "GEORGIANEXTENDED");
3118         
3119         /**
3120          * Constant for the "Hanifi Rohingya" Unicode
3121          * character block.
3122          * @since 12
3123          */
3124         public static final UnicodeBlock HANIFI_ROHINGYA =
3125             new UnicodeBlock("HANIFI_ROHINGYA",
3126                              "HANIFI ROHINGYA",
3127                              "HANIFIROHINGYA");
3128         
3129         /**
3130          * Constant for the "Old Sogdian" Unicode
3131          * character block.
3132          * @since 12
3133          */
3134         public static final UnicodeBlock OLD_SOGDIAN =
3135             new UnicodeBlock("OLD_SOGDIAN",
3136                              "OLD SOGDIAN",
3137                              "OLDSOGDIAN");
3138         
3139         /**
3140          * Constant for the "Sogdian" Unicode
3141          * character block.
3142          * @since 12
3143          */
3144         public static final UnicodeBlock SOGDIAN =
3145             new UnicodeBlock("SOGDIAN");
3146         
3147         /**
3148          * Constant for the "Dogra" Unicode
3149          * character block.
3150          * @since 12
3151          */
3152         public static final UnicodeBlock DOGRA =
3153             new UnicodeBlock("DOGRA");
3154         
3155         /**
3156          * Constant for the "Gunjala Gondi" Unicode
3157          * character block.
3158          * @since 12
3159          */
3160         public static final UnicodeBlock GUNJALA_GONDI =
3161             new UnicodeBlock("GUNJALA_GONDI",
3162                              "GUNJALA GONDI",
3163                              "GUNJALAGONDI");
3164         
3165         /**
3166          * Constant for the "Makasar" Unicode
3167          * character block.
3168          * @since 12
3169          */
3170         public static final UnicodeBlock MAKASAR =
3171             new UnicodeBlock("MAKASAR");
3172         
3173         /**
3174          * Constant for the "Medefaidrin" Unicode
3175          * character block.
3176          * @since 12
3177          */
3178         public static final UnicodeBlock MEDEFAIDRIN =
3179             new UnicodeBlock("MEDEFAIDRIN");
3180 
3181         /**
3182          * Constant for the "Mayan Numerals" Unicode
3183          * character block.
3184          * @since 12
3185          */
3186         public static final UnicodeBlock MAYAN_NUMERALS =
3187             new UnicodeBlock("MAYAN_NUMERALS",
3188                              "MAYAN NUMERALS",
3189                              "MAYANNUMERALS");
3190         
3191        /**
3192          * Constant for the "Indic Siyaq Numbers" Unicode
3193          * character block.
3194          * @since 12
3195          */
3196         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3197             new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3198                              "INDIC SIYAQ NUMBERS",
3199                              "INDICSIYAQNUMBERS");
3200         
3201         /**
3202          * Constant for the "Chess Symbols" Unicode
3203          * character block.
3204          * @since 12
3205          */
3206         public static final UnicodeBlock CHESS_SYMBOLS =
3207             new UnicodeBlock("CHESS_SYMBOLS",
3208                              "CHESS SYMBOLS",
3209                              "CHESSSYMBOLS");
3210         
3211         
3212         private static final int blockStarts[] = {
3213             0x0000,   // 0000..007F; Basic Latin
3214             0x0080,   // 0080..00FF; Latin-1 Supplement
3215             0x0100,   // 0100..017F; Latin Extended-A
3216             0x0180,   // 0180..024F; Latin Extended-B
3217             0x0250,   // 0250..02AF; IPA Extensions
3218             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3219             0x0300,   // 0300..036F; Combining Diacritical Marks
3220             0x0370,   // 0370..03FF; Greek and Coptic
3221             0x0400,   // 0400..04FF; Cyrillic
3222             0x0500,   // 0500..052F; Cyrillic Supplement
3223             0x0530,   // 0530..058F; Armenian
3224             0x0590,   // 0590..05FF; Hebrew
3225             0x0600,   // 0600..06FF; Arabic
3226             0x0700,   // 0700..074F; Syriac
3227             0x0750,   // 0750..077F; Arabic Supplement
3228             0x0780,   // 0780..07BF; Thaana
3229             0x07C0,   // 07C0..07FF; NKo
3230             0x0800,   // 0800..083F; Samaritan
3231             0x0840,   // 0840..085F; Mandaic
3232             0x0860,   // 0860..086F; Syriac Supplement
3233             0x0870,   //             unassigned
3234             0x08A0,   // 08A0..08FF; Arabic Extended-A
3235             0x0900,   // 0900..097F; Devanagari
3236             0x0980,   // 0980..09FF; Bengali
3237             0x0A00,   // 0A00..0A7F; Gurmukhi
3238             0x0A80,   // 0A80..0AFF; Gujarati
3239             0x0B00,   // 0B00..0B7F; Oriya
3240             0x0B80,   // 0B80..0BFF; Tamil
3241             0x0C00,   // 0C00..0C7F; Telugu
3242             0x0C80,   // 0C80..0CFF; Kannada
3243             0x0D00,   // 0D00..0D7F; Malayalam
3244             0x0D80,   // 0D80..0DFF; Sinhala
3245             0x0E00,   // 0E00..0E7F; Thai
3246             0x0E80,   // 0E80..0EFF; Lao
3247             0x0F00,   // 0F00..0FFF; Tibetan
3248             0x1000,   // 1000..109F; Myanmar
3249             0x10A0,   // 10A0..10FF; Georgian
3250             0x1100,   // 1100..11FF; Hangul Jamo
3251             0x1200,   // 1200..137F; Ethiopic
3252             0x1380,   // 1380..139F; Ethiopic Supplement
3253             0x13A0,   // 13A0..13FF; Cherokee
3254             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3255             0x1680,   // 1680..169F; Ogham
3256             0x16A0,   // 16A0..16FF; Runic
3257             0x1700,   // 1700..171F; Tagalog
3258             0x1720,   // 1720..173F; Hanunoo
3259             0x1740,   // 1740..175F; Buhid
3260             0x1760,   // 1760..177F; Tagbanwa
3261             0x1780,   // 1780..17FF; Khmer
3262             0x1800,   // 1800..18AF; Mongolian
3263             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3264             0x1900,   // 1900..194F; Limbu
3265             0x1950,   // 1950..197F; Tai Le
3266             0x1980,   // 1980..19DF; New Tai Lue
3267             0x19E0,   // 19E0..19FF; Khmer Symbols
3268             0x1A00,   // 1A00..1A1F; Buginese
3269             0x1A20,   // 1A20..1AAF; Tai Tham
3270             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3271             0x1B00,   // 1B00..1B7F; Balinese
3272             0x1B80,   // 1B80..1BBF; Sundanese
3273             0x1BC0,   // 1BC0..1BFF; Batak
3274             0x1C00,   // 1C00..1C4F; Lepcha
3275             0x1C50,   // 1C50..1C7F; Ol Chiki
3276             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3277             0x1C90,   // 1C90..1CBF; Georgian Extended
3278             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3279             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3280             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3281             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3282             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3283             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3284             0x1F00,   // 1F00..1FFF; Greek Extended
3285             0x2000,   // 2000..206F; General Punctuation
3286             0x2070,   // 2070..209F; Superscripts and Subscripts
3287             0x20A0,   // 20A0..20CF; Currency Symbols
3288             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3289             0x2100,   // 2100..214F; Letterlike Symbols
3290             0x2150,   // 2150..218F; Number Forms
3291             0x2190,   // 2190..21FF; Arrows
3292             0x2200,   // 2200..22FF; Mathematical Operators
3293             0x2300,   // 2300..23FF; Miscellaneous Technical
3294             0x2400,   // 2400..243F; Control Pictures
3295             0x2440,   // 2440..245F; Optical Character Recognition
3296             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3297             0x2500,   // 2500..257F; Box Drawing
3298             0x2580,   // 2580..259F; Block Elements
3299             0x25A0,   // 25A0..25FF; Geometric Shapes
3300             0x2600,   // 2600..26FF; Miscellaneous Symbols
3301             0x2700,   // 2700..27BF; Dingbats
3302             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3303             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3304             0x2800,   // 2800..28FF; Braille Patterns
3305             0x2900,   // 2900..297F; Supplemental Arrows-B
3306             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3307             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3308             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3309             0x2C00,   // 2C00..2C5F; Glagolitic
3310             0x2C60,   // 2C60..2C7F; Latin Extended-C
3311             0x2C80,   // 2C80..2CFF; Coptic
3312             0x2D00,   // 2D00..2D2F; Georgian Supplement
3313             0x2D30,   // 2D30..2D7F; Tifinagh
3314             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3315             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3316             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3317             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3318             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3319             0x2FE0,   //             unassigned
3320             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3321             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3322             0x3040,   // 3040..309F; Hiragana
3323             0x30A0,   // 30A0..30FF; Katakana
3324             0x3100,   // 3100..312F; Bopomofo
3325             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3326             0x3190,   // 3190..319F; Kanbun
3327             0x31A0,   // 31A0..31BF; Bopomofo Extended
3328             0x31C0,   // 31C0..31EF; CJK Strokes
3329             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3330             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3331             0x3300,   // 3300..33FF; CJK Compatibility
3332             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3333             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3334             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3335             0xA000,   // A000..A48F; Yi Syllables
3336             0xA490,   // A490..A4CF; Yi Radicals
3337             0xA4D0,   // A4D0..A4FF; Lisu
3338             0xA500,   // A500..A63F; Vai
3339             0xA640,   // A640..A69F; Cyrillic Extended-B
3340             0xA6A0,   // A6A0..A6FF; Bamum
3341             0xA700,   // A700..A71F; Modifier Tone Letters
3342             0xA720,   // A720..A7FF; Latin Extended-D
3343             0xA800,   // A800..A82F; Syloti Nagri
3344             0xA830,   // A830..A83F; Common Indic Number Forms
3345             0xA840,   // A840..A87F; Phags-pa
3346             0xA880,   // A880..A8DF; Saurashtra
3347             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3348             0xA900,   // A900..A92F; Kayah Li
3349             0xA930,   // A930..A95F; Rejang
3350             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3351             0xA980,   // A980..A9DF; Javanese
3352             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3353             0xAA00,   // AA00..AA5F; Cham
3354             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3355             0xAA80,   // AA80..AADF; Tai Viet
3356             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3357             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3358             0xAB30,   // AB30..AB6F; Latin Extended-E
3359             0xAB70,   // AB70..ABBF; Cherokee Supplement
3360             0xABC0,   // ABC0..ABFF; Meetei Mayek
3361             0xAC00,   // AC00..D7AF; Hangul Syllables
3362             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3363             0xD800,   // D800..DB7F; High Surrogates
3364             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3365             0xDC00,   // DC00..DFFF; Low Surrogates
3366             0xE000,   // E000..F8FF; Private Use Area
3367             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3368             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3369             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3370             0xFE00,   // FE00..FE0F; Variation Selectors
3371             0xFE10,   // FE10..FE1F; Vertical Forms
3372             0xFE20,   // FE20..FE2F; Combining Half Marks
3373             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3374             0xFE50,   // FE50..FE6F; Small Form Variants
3375             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3376             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3377             0xFFF0,   // FFF0..FFFF; Specials
3378             0x10000,  // 10000..1007F; Linear B Syllabary
3379             0x10080,  // 10080..100FF; Linear B Ideograms
3380             0x10100,  // 10100..1013F; Aegean Numbers
3381             0x10140,  // 10140..1018F; Ancient Greek Numbers
3382             0x10190,  // 10190..101CF; Ancient Symbols
3383             0x101D0,  // 101D0..101FF; Phaistos Disc
3384             0x10200,  //               unassigned
3385             0x10280,  // 10280..1029F; Lycian
3386             0x102A0,  // 102A0..102DF; Carian
3387             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3388             0x10300,  // 10300..1032F; Old Italic
3389             0x10330,  // 10330..1034F; Gothic
3390             0x10350,  // 10350..1037F; Old Permic
3391             0x10380,  // 10380..1039F; Ugaritic
3392             0x103A0,  // 103A0..103DF; Old Persian
3393             0x103E0,  //               unassigned
3394             0x10400,  // 10400..1044F; Deseret
3395             0x10450,  // 10450..1047F; Shavian
3396             0x10480,  // 10480..104AF; Osmanya
3397             0x104B0,  // 104B0..104FF; Osage
3398             0x10500,  // 10500..1052F; Elbasan
3399             0x10530,  // 10530..1056F; Caucasian Albanian
3400             0x10570,  //               unassigned
3401             0x10600,  // 10600..1077F; Linear A
3402             0x10780,  //               unassigned
3403             0x10800,  // 10800..1083F; Cypriot Syllabary
3404             0x10840,  // 10840..1085F; Imperial Aramaic
3405             0x10860,  // 10860..1087F; Palmyrene
3406             0x10880,  // 10880..108AF; Nabataean
3407             0x108B0,  //               unassigned
3408             0x108E0,  // 108E0..108FF; Hatran
3409             0x10900,  // 10900..1091F; Phoenician
3410             0x10920,  // 10920..1093F; Lydian
3411             0x10940,  //               unassigned
3412             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3413             0x109A0,  // 109A0..109FF; Meroitic Cursive
3414             0x10A00,  // 10A00..10A5F; Kharoshthi
3415             0x10A60,  // 10A60..10A7F; Old South Arabian
3416             0x10A80,  // 10A80..10A9F; Old North Arabian
3417             0x10AA0,  //               unassigned
3418             0x10AC0,  // 10AC0..10AFF; Manichaean
3419             0x10B00,  // 10B00..10B3F; Avestan
3420             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3421             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3422             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3423             0x10BB0,  //               unassigned
3424             0x10C00,  // 10C00..10C4F; Old Turkic
3425             0x10C50,  //               unassigned
3426             0x10C80,  // 10C80..10CFF; Old Hungarian
3427             0x10D00,  // 10D00..10D3F; Hanifi Rohingya
3428             0x10D40,  //               unassigned
3429             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3430             0x10E80,  //               unassigned
3431             0x10F00,  // 10F00..10F2F; Old Sogdian
3432             0x10F30,  // 10F30..10F6F; Sogdian
3433             0x10F70,  //               unassigned
3434             0x11000,  // 11000..1107F; Brahmi
3435             0x11080,  // 11080..110CF; Kaithi
3436             0x110D0,  // 110D0..110FF; Sora Sompeng
3437             0x11100,  // 11100..1114F; Chakma
3438             0x11150,  // 11150..1117F; Mahajani
3439             0x11180,  // 11180..111DF; Sharada
3440             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3441             0x11200,  // 11200..1124F; Khojki
3442             0x11250,  //               unassigned
3443             0x11280,  // 11280..112AF; Multani
3444             0x112B0,  // 112B0..112FF; Khudawadi
3445             0x11300,  // 11300..1137F; Grantha
3446             0x11380,  //               unassigned
3447             0x11400,  // 11400..1147F; Newa
3448             0x11480,  // 11480..114DF; Tirhuta
3449             0x114E0,  //               unassigned
3450             0x11580,  // 11580..115FF; Siddham
3451             0x11600,  // 11600..1165F; Modi
3452             0x11660, //  11660..1167F; Mongolian Supplement
3453             0x11680,  // 11680..116CF; Takri
3454             0x116D0,  //               unassigned
3455             0x11700,  // 11700..1173F; Ahom
3456             0x11740,  //               unassigned
3457             0x11800,  // 11800..1184F; Dogra
3458             0x11850,  //               unassigned
3459             0x118A0,  // 118A0..118FF; Warang Citi
3460             0x11900,  //               unassigned
3461             0x11A00,  // 11A00..11A4F; Zanabazar Square
3462             0x11A50,  // 11A50..11AAF; Soyombo
3463             0x11AB0,  //               unassigned
3464             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3465             0x11B00,  //               unassigned
3466             0x11C00,  // 11C00..11C6F; Bhaiksuki
3467             0x11C70,  // 11C70..11CBF; Marchen
3468             0x11CC0,  //               unassigned
3469             0x11D00,  // 11D00..11D5F; Masaram Gondi
3470             0x11D60,  // 11D60..11DAF; Gunjala Gondi
3471             0x11DB0,  //               unassigned
3472             0x11EE0,  // 11EE0..11EFF; Makasar
3473             0x11F00,  //               unassigned
3474             0x12000,  // 12000..123FF; Cuneiform
3475             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3476             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3477             0x12550,  //               unassigned
3478             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3479             0x13430,  //               unassigned
3480             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3481             0x14680,  //               unassigned
3482             0x16800,  // 16800..16A3F; Bamum Supplement
3483             0x16A40,  // 16A40..16A6F; Mro
3484             0x16A70,  //               unassigned
3485             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3486             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3487             0x16B90,  //               unassigned
3488             0x16E40,  // 16E40..16E9F; Medefaidrin
3489             0x16EA0,  //               unassigned
3490             0x16F00,  // 16F00..16F9F; Miao
3491             0x16FA0,  //               unassigned
3492             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3493             0x17000,  // 17000..187FF; Tangut
3494             0x18800,  // 18800..18AFF; Tangut Components
3495             0x18B00,  //               unassigned
3496             0x1B000,  // 1B000..1B0FF; Kana Supplement
3497             0x1B100,  // 1B100..1B12F; Kana Extended-A
3498             0x1B130,  //               unassigned
3499             0x1B170,  // 1B170..1B2FF; Nushu
3500             0x1B300,  //               unassigned
3501             0x1BC00,  // 1BC00..1BC9F; Duployan
3502             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3503             0x1BCB0,  //               unassigned
3504             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3505             0x1D100,  // 1D100..1D1FF; Musical Symbols
3506             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3507             0x1D250,  //               unassigned
3508             0x1D2E0,  // 1D2E0..1D2FF; Mayan Numerals
3509             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3510             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3511             0x1D380,  //               unassigned
3512             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3513             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3514             0x1DAB0,  //               unassigned
3515             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3516             0x1E030,  //               unassigned
3517             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3518             0x1E8E0,  //               unassigned
3519             0x1E900,  // 1E900..1E95F; Adlam
3520             0x1E960,  //               unassigned
3521             0x1EC70,  // 1EC70..1ECBF; Indic Siyaq Numbers
3522             0x1ECC0,  //               unassigned
3523             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3524             0x1EF00,  //               unassigned
3525             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3526             0x1F030,  // 1F030..1F09F; Domino Tiles
3527             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3528             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3529             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3530             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3531             0x1F600,  // 1F600..1F64F; Emoticons
3532             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3533             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3534             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3535             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3536             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3537             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3538             0x1FA00,  // 1FA00..1FA6F; Chess Symbols
3539             0x1FA70,  //               unassigned
3540             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3541             0x2A6E0,  //               unassigned
3542             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3543             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3544             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3545             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3546             0x2EBF0,  //               unassigned
3547             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3548             0x2FA20,  //               unassigned
3549             0xE0000,  // E0000..E007F; Tags
3550             0xE0080,  //               unassigned
3551             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3552             0xE01F0,  //               unassigned
3553             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3554             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
3555         };
3556 
3557         private static final UnicodeBlock[] blocks = {
3558             BASIC_LATIN,
3559             LATIN_1_SUPPLEMENT,
3560             LATIN_EXTENDED_A,
3561             LATIN_EXTENDED_B,
3562             IPA_EXTENSIONS,
3563             SPACING_MODIFIER_LETTERS,
3564             COMBINING_DIACRITICAL_MARKS,
3565             GREEK,
3566             CYRILLIC,
3567             CYRILLIC_SUPPLEMENTARY,
3568             ARMENIAN,
3569             HEBREW,
3570             ARABIC,
3571             SYRIAC,
3572             ARABIC_SUPPLEMENT,
3573             THAANA,
3574             NKO,
3575             SAMARITAN,
3576             MANDAIC,
3577             SYRIAC_SUPPLEMENT,
3578             null,
3579             ARABIC_EXTENDED_A,
3580             DEVANAGARI,
3581             BENGALI,
3582             GURMUKHI,
3583             GUJARATI,
3584             ORIYA,
3585             TAMIL,
3586             TELUGU,
3587             KANNADA,
3588             MALAYALAM,
3589             SINHALA,
3590             THAI,
3591             LAO,
3592             TIBETAN,
3593             MYANMAR,
3594             GEORGIAN,
3595             HANGUL_JAMO,
3596             ETHIOPIC,
3597             ETHIOPIC_SUPPLEMENT,
3598             CHEROKEE,
3599             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3600             OGHAM,
3601             RUNIC,
3602             TAGALOG,
3603             HANUNOO,
3604             BUHID,
3605             TAGBANWA,
3606             KHMER,
3607             MONGOLIAN,
3608             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3609             LIMBU,
3610             TAI_LE,
3611             NEW_TAI_LUE,
3612             KHMER_SYMBOLS,
3613             BUGINESE,
3614             TAI_THAM,
3615             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3616             BALINESE,
3617             SUNDANESE,
3618             BATAK,
3619             LEPCHA,
3620             OL_CHIKI,
3621             CYRILLIC_EXTENDED_C,
3622             GEORGIAN_EXTENDED,
3623             SUNDANESE_SUPPLEMENT,
3624             VEDIC_EXTENSIONS,
3625             PHONETIC_EXTENSIONS,
3626             PHONETIC_EXTENSIONS_SUPPLEMENT,
3627             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3628             LATIN_EXTENDED_ADDITIONAL,
3629             GREEK_EXTENDED,
3630             GENERAL_PUNCTUATION,
3631             SUPERSCRIPTS_AND_SUBSCRIPTS,
3632             CURRENCY_SYMBOLS,
3633             COMBINING_MARKS_FOR_SYMBOLS,
3634             LETTERLIKE_SYMBOLS,
3635             NUMBER_FORMS,
3636             ARROWS,
3637             MATHEMATICAL_OPERATORS,
3638             MISCELLANEOUS_TECHNICAL,
3639             CONTROL_PICTURES,
3640             OPTICAL_CHARACTER_RECOGNITION,
3641             ENCLOSED_ALPHANUMERICS,
3642             BOX_DRAWING,
3643             BLOCK_ELEMENTS,
3644             GEOMETRIC_SHAPES,
3645             MISCELLANEOUS_SYMBOLS,
3646             DINGBATS,
3647             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3648             SUPPLEMENTAL_ARROWS_A,
3649             BRAILLE_PATTERNS,
3650             SUPPLEMENTAL_ARROWS_B,
3651             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3652             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3653             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3654             GLAGOLITIC,
3655             LATIN_EXTENDED_C,
3656             COPTIC,
3657             GEORGIAN_SUPPLEMENT,
3658             TIFINAGH,
3659             ETHIOPIC_EXTENDED,
3660             CYRILLIC_EXTENDED_A,
3661             SUPPLEMENTAL_PUNCTUATION,
3662             CJK_RADICALS_SUPPLEMENT,
3663             KANGXI_RADICALS,
3664             null,
3665             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3666             CJK_SYMBOLS_AND_PUNCTUATION,
3667             HIRAGANA,
3668             KATAKANA,
3669             BOPOMOFO,
3670             HANGUL_COMPATIBILITY_JAMO,
3671             KANBUN,
3672             BOPOMOFO_EXTENDED,
3673             CJK_STROKES,
3674             KATAKANA_PHONETIC_EXTENSIONS,
3675             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3676             CJK_COMPATIBILITY,
3677             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3678             YIJING_HEXAGRAM_SYMBOLS,
3679             CJK_UNIFIED_IDEOGRAPHS,
3680             YI_SYLLABLES,
3681             YI_RADICALS,
3682             LISU,
3683             VAI,
3684             CYRILLIC_EXTENDED_B,
3685             BAMUM,
3686             MODIFIER_TONE_LETTERS,
3687             LATIN_EXTENDED_D,
3688             SYLOTI_NAGRI,
3689             COMMON_INDIC_NUMBER_FORMS,
3690             PHAGS_PA,
3691             SAURASHTRA,
3692             DEVANAGARI_EXTENDED,
3693             KAYAH_LI,
3694             REJANG,
3695             HANGUL_JAMO_EXTENDED_A,
3696             JAVANESE,
3697             MYANMAR_EXTENDED_B,
3698             CHAM,
3699             MYANMAR_EXTENDED_A,
3700             TAI_VIET,
3701             MEETEI_MAYEK_EXTENSIONS,
3702             ETHIOPIC_EXTENDED_A,
3703             LATIN_EXTENDED_E,
3704             CHEROKEE_SUPPLEMENT,
3705             MEETEI_MAYEK,
3706             HANGUL_SYLLABLES,
3707             HANGUL_JAMO_EXTENDED_B,
3708             HIGH_SURROGATES,
3709             HIGH_PRIVATE_USE_SURROGATES,
3710             LOW_SURROGATES,
3711             PRIVATE_USE_AREA,
3712             CJK_COMPATIBILITY_IDEOGRAPHS,
3713             ALPHABETIC_PRESENTATION_FORMS,
3714             ARABIC_PRESENTATION_FORMS_A,
3715             VARIATION_SELECTORS,
3716             VERTICAL_FORMS,
3717             COMBINING_HALF_MARKS,
3718             CJK_COMPATIBILITY_FORMS,
3719             SMALL_FORM_VARIANTS,
3720             ARABIC_PRESENTATION_FORMS_B,
3721             HALFWIDTH_AND_FULLWIDTH_FORMS,
3722             SPECIALS,
3723             LINEAR_B_SYLLABARY,
3724             LINEAR_B_IDEOGRAMS,
3725             AEGEAN_NUMBERS,
3726             ANCIENT_GREEK_NUMBERS,
3727             ANCIENT_SYMBOLS,
3728             PHAISTOS_DISC,
3729             null,
3730             LYCIAN,
3731             CARIAN,
3732             COPTIC_EPACT_NUMBERS,
3733             OLD_ITALIC,
3734             GOTHIC,
3735             OLD_PERMIC,
3736             UGARITIC,
3737             OLD_PERSIAN,
3738             null,
3739             DESERET,
3740             SHAVIAN,
3741             OSMANYA,
3742             OSAGE,
3743             ELBASAN,
3744             CAUCASIAN_ALBANIAN,
3745             null,
3746             LINEAR_A,
3747             null,
3748             CYPRIOT_SYLLABARY,
3749             IMPERIAL_ARAMAIC,
3750             PALMYRENE,
3751             NABATAEAN,
3752             null,
3753             HATRAN,
3754             PHOENICIAN,
3755             LYDIAN,
3756             null,
3757             MEROITIC_HIEROGLYPHS,
3758             MEROITIC_CURSIVE,
3759             KHAROSHTHI,
3760             OLD_SOUTH_ARABIAN,
3761             OLD_NORTH_ARABIAN,
3762             null,
3763             MANICHAEAN,
3764             AVESTAN,
3765             INSCRIPTIONAL_PARTHIAN,
3766             INSCRIPTIONAL_PAHLAVI,
3767             PSALTER_PAHLAVI,
3768             null,
3769             OLD_TURKIC,
3770             null,
3771             OLD_HUNGARIAN,
3772             HANIFI_ROHINGYA,
3773             null,
3774             RUMI_NUMERAL_SYMBOLS,
3775             null,
3776             OLD_SOGDIAN,
3777             SOGDIAN,
3778             null,
3779             BRAHMI,
3780             KAITHI,
3781             SORA_SOMPENG,
3782             CHAKMA,
3783             MAHAJANI,
3784             SHARADA,
3785             SINHALA_ARCHAIC_NUMBERS,
3786             KHOJKI,
3787             null,
3788             MULTANI,
3789             KHUDAWADI,
3790             GRANTHA,
3791             null,
3792             NEWA,
3793             TIRHUTA,
3794             null,
3795             SIDDHAM,
3796             MODI,
3797             MONGOLIAN_SUPPLEMENT,
3798             TAKRI,
3799             null,
3800             AHOM,
3801             null,
3802             DOGRA,
3803             null,
3804             WARANG_CITI,
3805             null,
3806             ZANABAZAR_SQUARE,
3807             SOYOMBO,
3808             null,
3809             PAU_CIN_HAU,
3810             null,
3811             BHAIKSUKI,
3812             MARCHEN,
3813             null,
3814             MASARAM_GONDI,
3815             GUNJALA_GONDI,
3816             null,
3817             MAKASAR,
3818             null,
3819             CUNEIFORM,
3820             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3821             EARLY_DYNASTIC_CUNEIFORM,
3822             null,
3823             EGYPTIAN_HIEROGLYPHS,
3824             null,
3825             ANATOLIAN_HIEROGLYPHS,
3826             null,
3827             BAMUM_SUPPLEMENT,
3828             MRO,
3829             null,
3830             BASSA_VAH,
3831             PAHAWH_HMONG,
3832             null,
3833             MEDEFAIDRIN,
3834             null,
3835             MIAO,
3836             null,
3837             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3838             TANGUT,
3839             TANGUT_COMPONENTS,
3840             null,
3841             KANA_SUPPLEMENT,
3842             KANA_EXTENDED_A,
3843             null,
3844             NUSHU,
3845             null,
3846             DUPLOYAN,
3847             SHORTHAND_FORMAT_CONTROLS,
3848             null,
3849             BYZANTINE_MUSICAL_SYMBOLS,
3850             MUSICAL_SYMBOLS,
3851             ANCIENT_GREEK_MUSICAL_NOTATION,
3852             null,
3853             MAYAN_NUMERALS,
3854             TAI_XUAN_JING_SYMBOLS,
3855             COUNTING_ROD_NUMERALS,
3856             null,
3857             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3858             SUTTON_SIGNWRITING,
3859             null,
3860             GLAGOLITIC_SUPPLEMENT,
3861             null,
3862             MENDE_KIKAKUI,
3863             null,
3864             ADLAM,
3865             null,
3866             INDIC_SIYAQ_NUMBERS,
3867             null,
3868             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3869             null,
3870             MAHJONG_TILES,
3871             DOMINO_TILES,
3872             PLAYING_CARDS,
3873             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3874             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3875             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3876             EMOTICONS,
3877             ORNAMENTAL_DINGBATS,
3878             TRANSPORT_AND_MAP_SYMBOLS,
3879             ALCHEMICAL_SYMBOLS,
3880             GEOMETRIC_SHAPES_EXTENDED,
3881             SUPPLEMENTAL_ARROWS_C,
3882             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3883             CHESS_SYMBOLS,
3884             null,
3885             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3886             null,
3887             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3888             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3889             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3890             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3891             null,
3892             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3893             null,
3894             TAGS,
3895             null,
3896             VARIATION_SELECTORS_SUPPLEMENT,
3897             null,
3898             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3899             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3900         };
3901 
3902 
3903         /**
3904          * Returns the object representing the Unicode block containing the
3905          * given character, or {@code null} if the character is not a
3906          * member of a defined block.
3907          *
3908          * <p><b>Note:</b> This method cannot handle
3909          * <a href="Character.html#supplementary"> supplementary
3910          * characters</a>.  To support all Unicode characters, including
3911          * supplementary characters, use the {@link #of(int)} method.
3912          *
3913          * @param   c  The character in question
3914          * @return  The {@code UnicodeBlock} instance representing the
3915          *          Unicode block of which this character is a member, or
3916          *          {@code null} if the character is not a member of any
3917          *          Unicode block
3918          */
3919         public static UnicodeBlock of(char c) {
3920             return of((int)c);
3921         }
3922 
3923         /**
3924          * Returns the object representing the Unicode block
3925          * containing the given character (Unicode code point), or
3926          * {@code null} if the character is not a member of a
3927          * defined block.
3928          *
3929          * @param   codePoint the character (Unicode code point) in question.
3930          * @return  The {@code UnicodeBlock} instance representing the
3931          *          Unicode block of which this character is a member, or
3932          *          {@code null} if the character is not a member of any
3933          *          Unicode block
3934          * @throws  IllegalArgumentException if the specified
3935          * {@code codePoint} is an invalid Unicode code point.
3936          * @see Character#isValidCodePoint(int)
3937          * @since   1.5
3938          */
3939         public static UnicodeBlock of(int codePoint) {
3940             if (!isValidCodePoint(codePoint)) {
3941                 throw new IllegalArgumentException(
3942                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
3943             }
3944 
3945             int top, bottom, current;
3946             bottom = 0;
3947             top = blockStarts.length;
3948             current = top/2;
3949 
3950             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3951             while (top - bottom > 1) {
3952                 if (codePoint >= blockStarts[current]) {
3953                     bottom = current;
3954                 } else {
3955                     top = current;
3956                 }
3957                 current = (top + bottom) / 2;
3958             }
3959             return blocks[current];
3960         }
3961 
3962         /**
3963          * Returns the UnicodeBlock with the given name. Block
3964          * names are determined by The Unicode Standard. The file
3965          * {@code Blocks-<version>.txt} defines blocks for a particular
3966          * version of the standard. The {@link Character} class specifies
3967          * the version of the standard that it supports.
3968          * <p>
3969          * This method accepts block names in the following forms:
3970          * <ol>
3971          * <li> Canonical block names as defined by the Unicode Standard.
3972          * For example, the standard defines a "Basic Latin" block. Therefore, this
3973          * method accepts "Basic Latin" as a valid block name. The documentation of
3974          * each UnicodeBlock provides the canonical name.
3975          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3976          * is a valid block name for the "Basic Latin" block.
3977          * <li>The text representation of each constant UnicodeBlock identifier.
3978          * For example, this method will return the {@link #BASIC_LATIN} block if
3979          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3980          * hyphens in the canonical name with underscores.
3981          * </ol>
3982          * Finally, character case is ignored for all of the valid block name forms.
3983          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3984          * The en_US locale's case mapping rules are used to provide case-insensitive
3985          * string comparisons for block name validation.
3986          * <p>
3987          * If the Unicode Standard changes block names, both the previous and
3988          * current names will be accepted.
3989          *
3990          * @param blockName A {@code UnicodeBlock} name.
3991          * @return The {@code UnicodeBlock} instance identified
3992          *         by {@code blockName}
3993          * @throws IllegalArgumentException if {@code blockName} is an
3994          *         invalid name
3995          * @throws NullPointerException if {@code blockName} is null
3996          * @since 1.5
3997          */
3998         public static final UnicodeBlock forName(String blockName) {
3999             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4000             if (block == null) {
4001                 throw new IllegalArgumentException("Not a valid block name: "
4002                             + blockName);
4003             }
4004             return block;
4005         }
4006     }
4007 
4008 
4009     /**
4010      * A family of character subsets representing the character scripts
4011      * defined in the <a href="http://www.unicode.org/reports/tr24/">
4012      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4013      * character is assigned to a single Unicode script, either a specific
4014      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4015      * one of the following three special values,
4016      * {@link Character.UnicodeScript#INHERITED Inherited},
4017      * {@link Character.UnicodeScript#COMMON Common} or
4018      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4019      *
4020      * @since 1.7
4021      */
4022     public static enum UnicodeScript {
4023         /**
4024          * Unicode script "Common".
4025          */
4026         COMMON,
4027 
4028         /**
4029          * Unicode script "Latin".
4030          */
4031         LATIN,
4032 
4033         /**
4034          * Unicode script "Greek".
4035          */
4036         GREEK,
4037 
4038         /**
4039          * Unicode script "Cyrillic".
4040          */
4041         CYRILLIC,
4042 
4043         /**
4044          * Unicode script "Armenian".
4045          */
4046         ARMENIAN,
4047 
4048         /**
4049          * Unicode script "Hebrew".
4050          */
4051         HEBREW,
4052 
4053         /**
4054          * Unicode script "Arabic".
4055          */
4056         ARABIC,
4057 
4058         /**
4059          * Unicode script "Syriac".
4060          */
4061         SYRIAC,
4062 
4063         /**
4064          * Unicode script "Thaana".
4065          */
4066         THAANA,
4067 
4068         /**
4069          * Unicode script "Devanagari".
4070          */
4071         DEVANAGARI,
4072 
4073         /**
4074          * Unicode script "Bengali".
4075          */
4076         BENGALI,
4077 
4078         /**
4079          * Unicode script "Gurmukhi".
4080          */
4081         GURMUKHI,
4082 
4083         /**
4084          * Unicode script "Gujarati".
4085          */
4086         GUJARATI,
4087 
4088         /**
4089          * Unicode script "Oriya".
4090          */
4091         ORIYA,
4092 
4093         /**
4094          * Unicode script "Tamil".
4095          */
4096         TAMIL,
4097 
4098         /**
4099          * Unicode script "Telugu".
4100          */
4101         TELUGU,
4102 
4103         /**
4104          * Unicode script "Kannada".
4105          */
4106         KANNADA,
4107 
4108         /**
4109          * Unicode script "Malayalam".
4110          */
4111         MALAYALAM,
4112 
4113         /**
4114          * Unicode script "Sinhala".
4115          */
4116         SINHALA,
4117 
4118         /**
4119          * Unicode script "Thai".
4120          */
4121         THAI,
4122 
4123         /**
4124          * Unicode script "Lao".
4125          */
4126         LAO,
4127 
4128         /**
4129          * Unicode script "Tibetan".
4130          */
4131         TIBETAN,
4132 
4133         /**
4134          * Unicode script "Myanmar".
4135          */
4136         MYANMAR,
4137 
4138         /**
4139          * Unicode script "Georgian".
4140          */
4141         GEORGIAN,
4142 
4143         /**
4144          * Unicode script "Hangul".
4145          */
4146         HANGUL,
4147 
4148         /**
4149          * Unicode script "Ethiopic".
4150          */
4151         ETHIOPIC,
4152 
4153         /**
4154          * Unicode script "Cherokee".
4155          */
4156         CHEROKEE,
4157 
4158         /**
4159          * Unicode script "Canadian_Aboriginal".
4160          */
4161         CANADIAN_ABORIGINAL,
4162 
4163         /**
4164          * Unicode script "Ogham".
4165          */
4166         OGHAM,
4167 
4168         /**
4169          * Unicode script "Runic".
4170          */
4171         RUNIC,
4172 
4173         /**
4174          * Unicode script "Khmer".
4175          */
4176         KHMER,
4177 
4178         /**
4179          * Unicode script "Mongolian".
4180          */
4181         MONGOLIAN,
4182 
4183         /**
4184          * Unicode script "Hiragana".
4185          */
4186         HIRAGANA,
4187 
4188         /**
4189          * Unicode script "Katakana".
4190          */
4191         KATAKANA,
4192 
4193         /**
4194          * Unicode script "Bopomofo".
4195          */
4196         BOPOMOFO,
4197 
4198         /**
4199          * Unicode script "Han".
4200          */
4201         HAN,
4202 
4203         /**
4204          * Unicode script "Yi".
4205          */
4206         YI,
4207 
4208         /**
4209          * Unicode script "Old_Italic".
4210          */
4211         OLD_ITALIC,
4212 
4213         /**
4214          * Unicode script "Gothic".
4215          */
4216         GOTHIC,
4217 
4218         /**
4219          * Unicode script "Deseret".
4220          */
4221         DESERET,
4222 
4223         /**
4224          * Unicode script "Inherited".
4225          */
4226         INHERITED,
4227 
4228         /**
4229          * Unicode script "Tagalog".
4230          */
4231         TAGALOG,
4232 
4233         /**
4234          * Unicode script "Hanunoo".
4235          */
4236         HANUNOO,
4237 
4238         /**
4239          * Unicode script "Buhid".
4240          */
4241         BUHID,
4242 
4243         /**
4244          * Unicode script "Tagbanwa".
4245          */
4246         TAGBANWA,
4247 
4248         /**
4249          * Unicode script "Limbu".
4250          */
4251         LIMBU,
4252 
4253         /**
4254          * Unicode script "Tai_Le".
4255          */
4256         TAI_LE,
4257 
4258         /**
4259          * Unicode script "Linear_B".
4260          */
4261         LINEAR_B,
4262 
4263         /**
4264          * Unicode script "Ugaritic".
4265          */
4266         UGARITIC,
4267 
4268         /**
4269          * Unicode script "Shavian".
4270          */
4271         SHAVIAN,
4272 
4273         /**
4274          * Unicode script "Osmanya".
4275          */
4276         OSMANYA,
4277 
4278         /**
4279          * Unicode script "Cypriot".
4280          */
4281         CYPRIOT,
4282 
4283         /**
4284          * Unicode script "Braille".
4285          */
4286         BRAILLE,
4287 
4288         /**
4289          * Unicode script "Buginese".
4290          */
4291         BUGINESE,
4292 
4293         /**
4294          * Unicode script "Coptic".
4295          */
4296         COPTIC,
4297 
4298         /**
4299          * Unicode script "New_Tai_Lue".
4300          */
4301         NEW_TAI_LUE,
4302 
4303         /**
4304          * Unicode script "Glagolitic".
4305          */
4306         GLAGOLITIC,
4307 
4308         /**
4309          * Unicode script "Tifinagh".
4310          */
4311         TIFINAGH,
4312 
4313         /**
4314          * Unicode script "Syloti_Nagri".
4315          */
4316         SYLOTI_NAGRI,
4317 
4318         /**
4319          * Unicode script "Old_Persian".
4320          */
4321         OLD_PERSIAN,
4322 
4323         /**
4324          * Unicode script "Kharoshthi".
4325          */
4326         KHAROSHTHI,
4327 
4328         /**
4329          * Unicode script "Balinese".
4330          */
4331         BALINESE,
4332 
4333         /**
4334          * Unicode script "Cuneiform".
4335          */
4336         CUNEIFORM,
4337 
4338         /**
4339          * Unicode script "Phoenician".
4340          */
4341         PHOENICIAN,
4342 
4343         /**
4344          * Unicode script "Phags_Pa".
4345          */
4346         PHAGS_PA,
4347 
4348         /**
4349          * Unicode script "Nko".
4350          */
4351         NKO,
4352 
4353         /**
4354          * Unicode script "Sundanese".
4355          */
4356         SUNDANESE,
4357 
4358         /**
4359          * Unicode script "Batak".
4360          */
4361         BATAK,
4362 
4363         /**
4364          * Unicode script "Lepcha".
4365          */
4366         LEPCHA,
4367 
4368         /**
4369          * Unicode script "Ol_Chiki".
4370          */
4371         OL_CHIKI,
4372 
4373         /**
4374          * Unicode script "Vai".
4375          */
4376         VAI,
4377 
4378         /**
4379          * Unicode script "Saurashtra".
4380          */
4381         SAURASHTRA,
4382 
4383         /**
4384          * Unicode script "Kayah_Li".
4385          */
4386         KAYAH_LI,
4387 
4388         /**
4389          * Unicode script "Rejang".
4390          */
4391         REJANG,
4392 
4393         /**
4394          * Unicode script "Lycian".
4395          */
4396         LYCIAN,
4397 
4398         /**
4399          * Unicode script "Carian".
4400          */
4401         CARIAN,
4402 
4403         /**
4404          * Unicode script "Lydian".
4405          */
4406         LYDIAN,
4407 
4408         /**
4409          * Unicode script "Cham".
4410          */
4411         CHAM,
4412 
4413         /**
4414          * Unicode script "Tai_Tham".
4415          */
4416         TAI_THAM,
4417 
4418         /**
4419          * Unicode script "Tai_Viet".
4420          */
4421         TAI_VIET,
4422 
4423         /**
4424          * Unicode script "Avestan".
4425          */
4426         AVESTAN,
4427 
4428         /**
4429          * Unicode script "Egyptian_Hieroglyphs".
4430          */
4431         EGYPTIAN_HIEROGLYPHS,
4432 
4433         /**
4434          * Unicode script "Samaritan".
4435          */
4436         SAMARITAN,
4437 
4438         /**
4439          * Unicode script "Mandaic".
4440          */
4441         MANDAIC,
4442 
4443         /**
4444          * Unicode script "Lisu".
4445          */
4446         LISU,
4447 
4448         /**
4449          * Unicode script "Bamum".
4450          */
4451         BAMUM,
4452 
4453         /**
4454          * Unicode script "Javanese".
4455          */
4456         JAVANESE,
4457 
4458         /**
4459          * Unicode script "Meetei_Mayek".
4460          */
4461         MEETEI_MAYEK,
4462 
4463         /**
4464          * Unicode script "Imperial_Aramaic".
4465          */
4466         IMPERIAL_ARAMAIC,
4467 
4468         /**
4469          * Unicode script "Old_South_Arabian".
4470          */
4471         OLD_SOUTH_ARABIAN,
4472 
4473         /**
4474          * Unicode script "Inscriptional_Parthian".
4475          */
4476         INSCRIPTIONAL_PARTHIAN,
4477 
4478         /**
4479          * Unicode script "Inscriptional_Pahlavi".
4480          */
4481         INSCRIPTIONAL_PAHLAVI,
4482 
4483         /**
4484          * Unicode script "Old_Turkic".
4485          */
4486         OLD_TURKIC,
4487 
4488         /**
4489          * Unicode script "Brahmi".
4490          */
4491         BRAHMI,
4492 
4493         /**
4494          * Unicode script "Kaithi".
4495          */
4496         KAITHI,
4497 
4498         /**
4499          * Unicode script "Meroitic Hieroglyphs".
4500          * @since 1.8
4501          */
4502         MEROITIC_HIEROGLYPHS,
4503 
4504         /**
4505          * Unicode script "Meroitic Cursive".
4506          * @since 1.8
4507          */
4508         MEROITIC_CURSIVE,
4509 
4510         /**
4511          * Unicode script "Sora Sompeng".
4512          * @since 1.8
4513          */
4514         SORA_SOMPENG,
4515 
4516         /**
4517          * Unicode script "Chakma".
4518          * @since 1.8
4519          */
4520         CHAKMA,
4521 
4522         /**
4523          * Unicode script "Sharada".
4524          * @since 1.8
4525          */
4526         SHARADA,
4527 
4528         /**
4529          * Unicode script "Takri".
4530          * @since 1.8
4531          */
4532         TAKRI,
4533 
4534         /**
4535          * Unicode script "Miao".
4536          * @since 1.8
4537          */
4538         MIAO,
4539 
4540         /**
4541          * Unicode script "Caucasian Albanian".
4542          * @since 9
4543          */
4544         CAUCASIAN_ALBANIAN,
4545 
4546         /**
4547          * Unicode script "Bassa Vah".
4548          * @since 9
4549          */
4550         BASSA_VAH,
4551 
4552         /**
4553          * Unicode script "Duployan".
4554          * @since 9
4555          */
4556         DUPLOYAN,
4557 
4558         /**
4559          * Unicode script "Elbasan".
4560          * @since 9
4561          */
4562         ELBASAN,
4563 
4564         /**
4565          * Unicode script "Grantha".
4566          * @since 9
4567          */
4568         GRANTHA,
4569 
4570         /**
4571          * Unicode script "Pahawh Hmong".
4572          * @since 9
4573          */
4574         PAHAWH_HMONG,
4575 
4576         /**
4577          * Unicode script "Khojki".
4578          * @since 9
4579          */
4580         KHOJKI,
4581 
4582         /**
4583          * Unicode script "Linear A".
4584          * @since 9
4585          */
4586         LINEAR_A,
4587 
4588         /**
4589          * Unicode script "Mahajani".
4590          * @since 9
4591          */
4592         MAHAJANI,
4593 
4594         /**
4595          * Unicode script "Manichaean".
4596          * @since 9
4597          */
4598         MANICHAEAN,
4599 
4600         /**
4601          * Unicode script "Mende Kikakui".
4602          * @since 9
4603          */
4604         MENDE_KIKAKUI,
4605 
4606         /**
4607          * Unicode script "Modi".
4608          * @since 9
4609          */
4610         MODI,
4611 
4612         /**
4613          * Unicode script "Mro".
4614          * @since 9
4615          */
4616         MRO,
4617 
4618         /**
4619          * Unicode script "Old North Arabian".
4620          * @since 9
4621          */
4622         OLD_NORTH_ARABIAN,
4623 
4624         /**
4625          * Unicode script "Nabataean".
4626          * @since 9
4627          */
4628         NABATAEAN,
4629 
4630         /**
4631          * Unicode script "Palmyrene".
4632          * @since 9
4633          */
4634         PALMYRENE,
4635 
4636         /**
4637          * Unicode script "Pau Cin Hau".
4638          * @since 9
4639          */
4640         PAU_CIN_HAU,
4641 
4642         /**
4643          * Unicode script "Old Permic".
4644          * @since 9
4645          */
4646         OLD_PERMIC,
4647 
4648         /**
4649          * Unicode script "Psalter Pahlavi".
4650          * @since 9
4651          */
4652         PSALTER_PAHLAVI,
4653 
4654         /**
4655          * Unicode script "Siddham".
4656          * @since 9
4657          */
4658         SIDDHAM,
4659 
4660         /**
4661          * Unicode script "Khudawadi".
4662          * @since 9
4663          */
4664         KHUDAWADI,
4665 
4666         /**
4667          * Unicode script "Tirhuta".
4668          * @since 9
4669          */
4670         TIRHUTA,
4671 
4672         /**
4673          * Unicode script "Warang Citi".
4674          * @since 9
4675          */
4676         WARANG_CITI,
4677 
4678          /**
4679          * Unicode script "Ahom".
4680          * @since 9
4681          */
4682         AHOM,
4683 
4684         /**
4685          * Unicode script "Anatolian Hieroglyphs".
4686          * @since 9
4687          */
4688         ANATOLIAN_HIEROGLYPHS,
4689 
4690         /**
4691          * Unicode script "Hatran".
4692          * @since 9
4693          */
4694         HATRAN,
4695 
4696         /**
4697          * Unicode script "Multani".
4698          * @since 9
4699          */
4700         MULTANI,
4701 
4702         /**
4703          * Unicode script "Old Hungarian".
4704          * @since 9
4705          */
4706         OLD_HUNGARIAN,
4707 
4708         /**
4709          * Unicode script "SignWriting".
4710          * @since 9
4711          */
4712         SIGNWRITING,
4713 
4714         /**
4715           * Unicode script "Adlam".
4716           * @since 11
4717           */
4718         ADLAM,
4719 
4720         /**
4721           * Unicode script "Bhaiksuki".
4722           * @since 11
4723           */
4724         BHAIKSUKI,
4725 
4726         /**
4727           * Unicode script "Marchen".
4728           * @since 11
4729           */
4730         MARCHEN,
4731 
4732         /**
4733           * Unicode script "Newa".
4734           * @since 11
4735           */
4736         NEWA,
4737 
4738         /**
4739           * Unicode script "Osage".
4740           * @since 11
4741           */
4742         OSAGE,
4743 
4744         /**
4745           * Unicode script "Tangut".
4746           * @since 11
4747           */
4748         TANGUT,
4749 
4750         /**
4751           * Unicode script "Masaram Gondi".
4752           * @since 11
4753           */
4754         MASARAM_GONDI,
4755 
4756         /**
4757           * Unicode script "Nushu".
4758           * @since 11
4759           */
4760         NUSHU,
4761 
4762         /**
4763           * Unicode script "Soyombo".
4764           * @since 11
4765           */
4766         SOYOMBO,
4767 
4768         /**
4769           * Unicode script "Zanabazar Square".
4770           * @since 11
4771           */
4772         ZANABAZAR_SQUARE,
4773          
4774         /**
4775           * Unicode script "Hanifi Rohingya".
4776           * @since 12
4777           */
4778         HANIFI_ROHINGYA,
4779         
4780         /**
4781           * Unicode script "Old Sogdian".
4782           * @since 12
4783           */
4784         OLD_SOGDIAN,
4785         
4786         /**
4787           * Unicode script "Sogdian".
4788           * @since 12
4789           */
4790         SOGDIAN,
4791         
4792         /**
4793           * Unicode script "Dogra".
4794           * @since 12
4795           */
4796         DOGRA,
4797         
4798         /**
4799           * Unicode script "Gunjala Gondi".
4800           * @since 12
4801           */
4802         GUNJALA_GONDI,
4803         
4804         /**
4805           * Unicode script "Makasar".
4806           * @since 12
4807           */
4808         MAKASAR,
4809         
4810         /**
4811           * Unicode script "Medefaidrin".
4812           * @since 12
4813           */
4814         MEDEFAIDRIN,
4815        
4816         /**
4817          * Unicode script "Unknown".
4818          */
4819         UNKNOWN;
4820 
4821         private static final int[] scriptStarts = {
4822             0x0000,   // 0000..0040; COMMON
4823             0x0041,   // 0041..005A; LATIN
4824             0x005B,   // 005B..0060; COMMON
4825             0x0061,   // 0061..007A; LATIN
4826             0x007B,   // 007B..00A9; COMMON
4827             0x00AA,   // 00AA      ; LATIN
4828             0x00AB,   // 00AB..00B9; COMMON
4829             0x00BA,   // 00BA      ; LATIN
4830             0x00BB,   // 00BB..00BF; COMMON
4831             0x00C0,   // 00C0..00D6; LATIN
4832             0x00D7,   // 00D7      ; COMMON
4833             0x00D8,   // 00D8..00F6; LATIN
4834             0x00F7,   // 00F7      ; COMMON
4835             0x00F8,   // 00F8..02B8; LATIN
4836             0x02B9,   // 02B9..02DF; COMMON
4837             0x02E0,   // 02E0..02E4; LATIN
4838             0x02E5,   // 02E5..02E9; COMMON
4839             0x02EA,   // 02EA..02EB; BOPOMOFO
4840             0x02EC,   // 02EC..02FF; COMMON
4841             0x0300,   // 0300..036F; INHERITED
4842             0x0370,   // 0370..0373; GREEK
4843             0x0374,   // 0374      ; COMMON
4844             0x0375,   // 0375..0377; GREEK
4845             0x0378,   // 0378..0379; UNKNOWN
4846             0x037A,   // 037A..037D; GREEK
4847             0x037E,   // 037E      ; COMMON
4848             0x037F,   // 037F      ; GREEK
4849             0x0380,   // 0380..0383; UNKNOWN
4850             0x0384,   // 0384      ; GREEK
4851             0x0385,   // 0385      ; COMMON
4852             0x0386,   // 0386      ; GREEK
4853             0x0387,   // 0387      ; COMMON
4854             0x0388,   // 0388..038A; GREEK
4855             0x038B,   // 038B      ; UNKNOWN
4856             0x038C,   // 038C      ; GREEK
4857             0x038D,   // 038D      ; UNKNOWN
4858             0x038E,   // 038E..03A1; GREEK
4859             0x03A2,   // 03A2      ; UNKNOWN
4860             0x03A3,   // 03A3..03E1; GREEK
4861             0x03E2,   // 03E2..03EF; COPTIC
4862             0x03F0,   // 03F0..03FF; GREEK
4863             0x0400,   // 0400..0484; CYRILLIC
4864             0x0485,   // 0485..0486; INHERITED
4865             0x0487,   // 0487..052F; CYRILLIC
4866             0x0530,   // 0530      ; UNKNOWN
4867             0x0531,   // 0531..0556; ARMENIAN
4868             0x0557,   // 0557..0558; UNKNOWN
4869             0x0559,   // 0559..0588; ARMENIAN
4870             0x0589,   // 0589      ; COMMON
4871             0x058A,   // 058A      ; ARMENIAN
4872             0x058B,   // 058B..058C; UNKNOWN
4873             0x058D,   // 058D..058F; ARMENIAN
4874             0x0590,   // 0590      ; UNKNOWN
4875             0x0591,   // 0591..05C7; HEBREW
4876             0x05C8,   // 05C8..05CF; UNKNOWN
4877             0x05D0,   // 05D0..05EA; HEBREW
4878             0x05EB,   // 05EB..05EE; UNKNOWN
4879             0x05EF,   // 05EF..05F4; HEBREW
4880             0x05F5,   // 05F5..05FF; UNKNOWN
4881             0x0600,   // 0600..0604; ARABIC
4882             0x0605,   // 0605      ; COMMON
4883             0x0606,   // 0606..060B; ARABIC
4884             0x060C,   // 060C      ; COMMON
4885             0x060D,   // 060D..061A; ARABIC
4886             0x061B,   // 061B      ; COMMON
4887             0x061C,   // 061C      ; ARABIC
4888             0x061D,   // 061D      ; UNKNOWN
4889             0x061E,   // 061E      ; ARABIC
4890             0x061F,   // 061F      ; COMMON
4891             0x0620,   // 0620..063F; ARABIC
4892             0x0640,   // 0640      ; COMMON
4893             0x0641,   // 0641..064A; ARABIC
4894             0x064B,   // 064B..0655; INHERITED
4895             0x0656,   // 0656..066F; ARABIC
4896             0x0670,   // 0670      ; INHERITED
4897             0x0671,   // 0671..06DC; ARABIC
4898             0x06DD,   // 06DD      ; COMMON
4899             0x06DE,   // 06DE..06FF; ARABIC
4900             0x0700,   // 0700..070D; SYRIAC
4901             0x070E,   // 070E      ; UNKNOWN
4902             0x070F,   // 070F..074A; SYRIAC
4903             0x074B,   // 074B..074C; UNKNOWN
4904             0x074D,   // 074D..074F; SYRIAC
4905             0x0750,   // 0750..077F; ARABIC
4906             0x0780,   // 0780..07B1; THAANA
4907             0x07B2,   // 07B2..07BF; UNKNOWN
4908             0x07C0,   // 07C0..07FA; NKO
4909             0x07FB,   // 07FB..07FC; UNKNOWN
4910             0X07FD,   // 07FD..07FF; NKO
4911             0x0800,   // 0800..082D; SAMARITAN
4912             0x082E,   // 082E..082F; UNKNOWN
4913             0x0830,   // 0830..083E; SAMARITAN
4914             0x083F,   // 083F      ; UNKNOWN
4915             0x0840,   // 0840..085B; MANDAIC
4916             0x085C,   // 085C..085D; UNKNOWN
4917             0x085E,   // 085E      ; MANDAIC
4918             0x085F,   // 085F      ; UNKNOWN
4919             0x0860,   // 0860..086A; SYRIAC
4920             0x086B,   // 086B..089F; UNKNOWN
4921             0x08A0,   // 08A0..08B4; ARABIC
4922             0x08B5,   // 08B5      ; UNKNOWN
4923             0x08B6,   // 08B6..08BD; ARABIC
4924             0x08BE,   // 08BE..08D2; UNKNOWN
4925             0x08D3,   // 08D3..08E1; ARABIC
4926             0x08E2,   // 08E2      ; COMMON
4927             0x08E3,   // 08E3..08FF; ARABIC
4928             0x0900,   // 0900..0950; DEVANAGARI
4929             0x0951,   // 0951..0952; INHERITED
4930             0x0953,   // 0953..0963; DEVANAGARI
4931             0x0964,   // 0964..0965; COMMON
4932             0x0966,   // 0966..097F; DEVANAGARI
4933             0x0980,   // 0980..0983; BENGALI
4934             0x0984,   // 0984      ; UNKNOWN
4935             0x0985,   // 0985..098C; BENGALI
4936             0x098D,   // 098D..098E; UNKNOWN
4937             0x098F,   // 098F..0990; BENGALI
4938             0x0991,   // 0991..0992; UNKNOWN
4939             0x0993,   // 0993..09A8; BENGALI
4940             0x09A9,   // 09A9      ; UNKNOWN
4941             0x09AA,   // 09AA..09B0; BENGALI
4942             0x09B1,   // 09B1      ; UNKNOWN
4943             0x09B2,   // 09B2      ; BENGALI
4944             0x09B3,   // 09B3..09B5; UNKNOWN
4945             0x09B6,   // 09B6..09B9; BENGALI
4946             0x09BA,   // 09BA..09BB; UNKNOWN
4947             0x09BC,   // 09BC..09C4; BENGALI
4948             0x09C5,   // 09C5..09C6; UNKNOWN
4949             0x09C7,   // 09C7..09C8; BENGALI
4950             0x09C9,   // 09C9..09CA; UNKNOWN
4951             0x09CB,   // 09CB..09CE; BENGALI
4952             0x09CF,   // 09CF..09D6; UNKNOWN
4953             0x09D7,   // 09D7      ; BENGALI
4954             0x09D8,   // 09D8..09DB; UNKNOWN
4955             0x09DC,   // 09DC..09DD; BENGALI
4956             0x09DE,   // 09DE      ; UNKNOWN
4957             0x09DF,   // 09DF..09E3; BENGALI
4958             0x09E4,   // 09E4..09E5; UNKNOWN
4959             0x09E6,   // 09E6..09FE; BENGALI
4960             0x09FF,   // 09FF..0A00; UNKNOWN
4961             0x0A01,   // 0A01..0A03; GURMUKHI
4962             0x0A04,   // 0A04      ; UNKNOWN
4963             0x0A05,   // 0A05..0A0A; GURMUKHI
4964             0x0A0B,   // 0A0B..0A0E; UNKNOWN
4965             0x0A0F,   // 0A0F..0A10; GURMUKHI
4966             0x0A11,   // 0A11..0A12; UNKNOWN
4967             0x0A13,   // 0A13..0A28; GURMUKHI
4968             0x0A29,   // 0A29      ; UNKNOWN
4969             0x0A2A,   // 0A2A..0A30; GURMUKHI
4970             0x0A31,   // 0A31      ; UNKNOWN
4971             0x0A32,   // 0A32..0A33; GURMUKHI
4972             0x0A34,   // 0A34      ; UNKNOWN
4973             0x0A35,   // 0A35..0A36; GURMUKHI
4974             0x0A37,   // 0A37      ; UNKNOWN
4975             0x0A38,   // 0A38..0A39; GURMUKHI
4976             0x0A3A,   // 0A3A..0A3B; UNKNOWN
4977             0x0A3C,   // 0A3C      ; GURMUKHI
4978             0x0A3D,   // 0A3D      ; UNKNOWN
4979             0x0A3E,   // 0A3E..0A42; GURMUKHI
4980             0x0A43,   // 0A43..0A46; UNKNOWN
4981             0x0A47,   // 0A47..0A48; GURMUKHI
4982             0x0A49,   // 0A49..0A4A; UNKNOWN
4983             0x0A4B,   // 0A4B..0A4D; GURMUKHI
4984             0x0A4E,   // 0A4E..0A50; UNKNOWN
4985             0x0A51,   // 0A51      ; GURMUKHI
4986             0x0A52,   // 0A52..0A58; UNKNOWN
4987             0x0A59,   // 0A59..0A5C; GURMUKHI
4988             0x0A5D,   // 0A5D      ; UNKNOWN
4989             0x0A5E,   // 0A5E      ; GURMUKHI
4990             0x0A5F,   // 0A5F..0A65; UNKNOWN
4991             0x0A66,   // 0A66..0A76; GURMUKHI
4992             0x0A77,   // 0A77..0A80; UNKNOWN
4993             0x0A81,   // 0A81..0A83; GUJARATI
4994             0x0A84,   // 0A84      ; UNKNOWN
4995             0x0A85,   // 0A85..0A8D; GUJARATI
4996             0x0A8E,   // 0A8E      ; UNKNOWN
4997             0x0A8F,   // 0A8F..0A91; GUJARATI
4998             0x0A92,   // 0A92      ; UNKNOWN
4999             0x0A93,   // 0A93..0AA8; GUJARATI
5000             0x0AA9,   // 0AA9      ; UNKNOWN
5001             0x0AAA,   // 0AAA..0AB0; GUJARATI
5002             0x0AB1,   // 0AB1      ; UNKNOWN
5003             0x0AB2,   // 0AB2..0AB3; GUJARATI
5004             0x0AB4,   // 0AB4      ; UNKNOWN
5005             0x0AB5,   // 0AB5..0AB9; GUJARATI
5006             0x0ABA,   // 0ABA..0ABB; UNKNOWN
5007             0x0ABC,   // 0ABC..0AC5; GUJARATI
5008             0x0AC6,   // 0AC6      ; UNKNOWN
5009             0x0AC7,   // 0AC7..0AC9; GUJARATI
5010             0x0ACA,   // 0ACA      ; UNKNOWN
5011             0x0ACB,   // 0ACB..0ACD; GUJARATI
5012             0x0ACE,   // 0ACE..0ACF; UNKNOWN
5013             0x0AD0,   // 0AD0      ; GUJARATI
5014             0x0AD1,   // 0AD1..0ADF; UNKNOWN
5015             0x0AE0,   // 0AE0..0AE3; GUJARATI
5016             0x0AE4,   // 0AE4..0AE5; UNKNOWN
5017             0x0AE6,   // 0AE6..0AF1; GUJARATI
5018             0x0AF2,   // 0AF2..0AF8; UNKNOWN
5019             0x0AF9,   // 0AF9..0AFF; GUJARATI
5020             0x0B00,   // 0B00      ; UNKNOWN
5021             0x0B01,   // 0B01..0B03; ORIYA
5022             0x0B04,   // 0B04      ; UNKNOWN
5023             0x0B05,   // 0B05..0B0C; ORIYA
5024             0x0B0D,   // 0B0D..0B0E; UNKNOWN
5025             0x0B0F,   // 0B0F..0B10; ORIYA
5026             0x0B11,   // 0B11..0B12; UNKNOWN
5027             0x0B13,   // 0B13..0B28; ORIYA
5028             0x0B29,   // 0B29      ; UNKNOWN
5029             0x0B2A,   // 0B2A..0B30; ORIYA
5030             0x0B31,   // 0B31      ; UNKNOWN
5031             0x0B32,   // 0B32..0B33; ORIYA
5032             0x0B34,   // 0B34      ; UNKNOWN
5033             0x0B35,   // 0B35..0B39; ORIYA
5034             0x0B3A,   // 0B3A..0B3B; UNKNOWN
5035             0x0B3C,   // 0B3C..0B44; ORIYA
5036             0x0B45,   // 0B45..0B46; UNKNOWN
5037             0x0B47,   // 0B47..0B48; ORIYA
5038             0x0B49,   // 0B49..0B4A; UNKNOWN
5039             0x0B4B,   // 0B4B..0B4D; ORIYA
5040             0x0B4E,   // 0B4E..0B55; UNKNOWN
5041             0x0B56,   // 0B56..0B57; ORIYA
5042             0x0B58,   // 0B58..0B5B; UNKNOWN
5043             0x0B5C,   // 0B5C..0B5D; ORIYA
5044             0x0B5E,   // 0B5E      ; UNKNOWN
5045             0x0B5F,   // 0B5F..0B63; ORIYA
5046             0x0B64,   // 0B64..0B65; UNKNOWN
5047             0x0B66,   // 0B66..0B77; ORIYA
5048             0x0B78,   // 0B78..0B81; UNKNOWN
5049             0x0B82,   // 0B82..0B83; TAMIL
5050             0x0B84,   // 0B84      ; UNKNOWN
5051             0x0B85,   // 0B85..0B8A; TAMIL
5052             0x0B8B,   // 0B8B..0B8D; UNKNOWN
5053             0x0B8E,   // 0B8E..0B90; TAMIL
5054             0x0B91,   // 0B91      ; UNKNOWN
5055             0x0B92,   // 0B92..0B95; TAMIL
5056             0x0B96,   // 0B96..0B98; UNKNOWN
5057             0x0B99,   // 0B99..0B9A; TAMIL
5058             0x0B9B,   // 0B9B      ; UNKNOWN
5059             0x0B9C,   // 0B9C      ; TAMIL
5060             0x0B9D,   // 0B9D      ; UNKNOWN
5061             0x0B9E,   // 0B9E..0B9F; TAMIL
5062             0x0BA0,   // 0BA0..0BA2; UNKNOWN
5063             0x0BA3,   // 0BA3..0BA4; TAMIL
5064             0x0BA5,   // 0BA5..0BA7; UNKNOWN
5065             0x0BA8,   // 0BA8..0BAA; TAMIL
5066             0x0BAB,   // 0BAB..0BAD; UNKNOWN
5067             0x0BAE,   // 0BAE..0BB9; TAMIL
5068             0x0BBA,   // 0BBA..0BBD; UNKNOWN
5069             0x0BBE,   // 0BBE..0BC2; TAMIL
5070             0x0BC3,   // 0BC3..0BC5; UNKNOWN
5071             0x0BC6,   // 0BC6..0BC8; TAMIL
5072             0x0BC9,   // 0BC9      ; UNKNOWN
5073             0x0BCA,   // 0BCA..0BCD; TAMIL
5074             0x0BCE,   // 0BCE..0BCF; UNKNOWN
5075             0x0BD0,   // 0BD0      ; TAMIL
5076             0x0BD1,   // 0BD1..0BD6; UNKNOWN
5077             0x0BD7,   // 0BD7      ; TAMIL
5078             0x0BD8,   // 0BD8..0BE5; UNKNOWN
5079             0x0BE6,   // 0BE6..0BFA; TAMIL
5080             0x0BFB,   // 0BFB..0BFF; UNKNOWN
5081             0x0C00,   // 0C00..0C0C; TELUGU
5082             0x0C0D,   // 0C0D      ; UNKNOWN
5083             0x0C0E,   // 0C0E..0C10; TELUGU
5084             0x0C11,   // 0C11      ; UNKNOWN
5085             0x0C12,   // 0C12..0C28; TELUGU
5086             0x0C29,   // 0C29      ; UNKNOWN
5087             0x0C2A,   // 0C2A..0C39; TELUGU
5088             0x0C3A,   // 0C3A..0C3C; UNKNOWN
5089             0x0C3D,   // 0C3D..0C44; TELUGU
5090             0x0C45,   // 0C45      ; UNKNOWN
5091             0x0C46,   // 0C46..0C48; TELUGU
5092             0x0C49,   // 0C49      ; UNKNOWN
5093             0x0C4A,   // 0C4A..0C4D; TELUGU
5094             0x0C4E,   // 0C4E..0C54; UNKNOWN
5095             0x0C55,   // 0C55..0C56; TELUGU
5096             0x0C57,   // 0C57      ; UNKNOWN
5097             0x0C58,   // 0C58..0C5A; TELUGU
5098             0x0C5B,   // 0C5B..0C5F; UNKNOWN
5099             0x0C60,   // 0C60..0C63; TELUGU
5100             0x0C64,   // 0C64..0C65; UNKNOWN
5101             0x0C66,   // 0C66..0C6F; TELUGU
5102             0x0C70,   // 0C70..0C77; UNKNOWN
5103             0x0C78,   // 0C78..0C7F; TELUGU
5104             0x0C80,   // 0C80..0C8C; KANNADA
5105             0x0C8D,   // 0C8D      ; UNKNOWN
5106             0x0C8E,   // 0C8E..0C90; KANNADA
5107             0x0C91,   // 0C91      ; UNKNOWN
5108             0x0C92,   // 0C92..0CA8; KANNADA
5109             0x0CA9,   // 0CA9      ; UNKNOWN
5110             0x0CAA,   // 0CAA..0CB3; KANNADA
5111             0x0CB4,   // 0CB4      ; UNKNOWN
5112             0x0CB5,   // 0CB5..0CB9; KANNADA
5113             0x0CBA,   // 0CBA..0CBB; UNKNOWN
5114             0x0CBC,   // 0CBC..0CC4; KANNADA
5115             0x0CC5,   // 0CC5      ; UNKNOWN
5116             0x0CC6,   // 0CC6..0CC8; KANNADA
5117             0x0CC9,   // 0CC9      ; UNKNOWN
5118             0x0CCA,   // 0CCA..0CCD; KANNADA
5119             0x0CCE,   // 0CCE..0CD4; UNKNOWN
5120             0x0CD5,   // 0CD5..0CD6; KANNADA
5121             0x0CD7,   // 0CD7..0CDD; UNKNOWN
5122             0x0CDE,   // 0CDE      ; KANNADA
5123             0x0CDF,   // 0CDF      ; UNKNOWN
5124             0x0CE0,   // 0CE0..0CE3; KANNADA
5125             0x0CE4,   // 0CE4..0CE5; UNKNOWN
5126             0x0CE6,   // 0CE6..0CEF; KANNADA
5127             0x0CF0,   // 0CF0      ; UNKNOWN
5128             0x0CF1,   // 0CF1..0CF2; KANNADA
5129             0x0CF3,   // 0CF3..0CFF; UNKNOWN
5130             0x0D00,   // 0D00..0D03; MALAYALAM
5131             0x0D04,   // 0D04      ; UNKNOWN
5132             0x0D05,   // 0D05..0D0C; MALAYALAM
5133             0x0D0D,   // 0D0D      ; UNKNOWN
5134             0x0D0E,   // 0D0E..0D10; MALAYALAM
5135             0x0D11,   // 0D11      ; UNKNOWN
5136             0x0D12,   // 0D12..0D44; MALAYALAM
5137             0x0D45,   // 0D45      ; UNKNOWN
5138             0x0D46,   // 0D46..0D48; MALAYALAM
5139             0x0D49,   // 0D49      ; UNKNOWN
5140             0x0D4A,   // 0D4A..0D4F; MALAYALAM
5141             0x0D50,   // 0D50..0D53; UNKNOWN
5142             0x0D54,   // 0D54..0D63; MALAYALAM
5143             0x0D64,   // 0D64..0D65; UNKNOWN
5144             0x0D66,   // 0D66..0D7F; MALAYALAM
5145             0x0D80,   // 0D80..0D81; UNKNOWN
5146             0x0D82,   // 0D82..0D83; SINHALA
5147             0x0D84,   // 0D84      ; UNKNOWN
5148             0x0D85,   // 0D85..0D96; SINHALA
5149             0x0D97,   // 0D97..0D99; UNKNOWN
5150             0x0D9A,   // 0D9A..0DB1; SINHALA
5151             0x0DB2,   // 0DB2      ; UNKNOWN
5152             0x0DB3,   // 0DB3..0DBB; SINHALA
5153             0x0DBC,   // 0DBC      ; UNKNOWN
5154             0x0DBD,   // 0DBD      ; SINHALA
5155             0x0DBE,   // 0DBE..0DBF; UNKNOWN
5156             0x0DC0,   // 0DC0..0DC6; SINHALA
5157             0x0DC7,   // 0DC7..0DC9; UNKNOWN
5158             0x0DCA,   // 0DCA      ; SINHALA
5159             0x0DCB,   // 0DCB..0DCE; UNKNOWN
5160             0x0DCF,   // 0DCF..0DD4; SINHALA
5161             0x0DD5,   // 0DD5      ; UNKNOWN
5162             0x0DD6,   // 0DD6      ; SINHALA
5163             0x0DD7,   // 0DD7      ; UNKNOWN
5164             0x0DD8,   // 0DD8..0DDF; SINHALA
5165             0x0DE0,   // 0DE0..0DE5; UNKNOWN
5166             0x0DE6,   // 0DE6..0DEF; SINHALA
5167             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5168             0x0DF2,   // 0DF2..0DF4; SINHALA
5169             0x0DF5,   // 0DF5..0E00; UNKNOWN
5170             0x0E01,   // 0E01..0E3A; THAI
5171             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5172             0x0E3F,   // 0E3F      ; COMMON
5173             0x0E40,   // 0E40..0E5B; THAI
5174             0x0E5C,   // 0E5C..0E80; UNKNOWN
5175             0x0E81,   // 0E81..0E82; LAO
5176             0x0E83,   // 0E83      ; UNKNOWN
5177             0x0E84,   // 0E84      ; LAO
5178             0x0E85,   // 0E85..0E86; UNKNOWN
5179             0x0E87,   // 0E87..0E88; LAO
5180             0x0E89,   // 0E89      ; UNKNOWN
5181             0x0E8A,   // 0E8A      ; LAO
5182             0x0E8B,   // 0E8B..0E8C; UNKNOWN
5183             0x0E8D,   // 0E8D      ; LAO
5184             0x0E8E,   // 0E8E..0E93; UNKNOWN
5185             0x0E94,   // 0E94..0E97; LAO
5186             0x0E98,   // 0E98      ; UNKNOWN
5187             0x0E99,   // 0E99..0E9F; LAO
5188             0x0EA0,   // 0EA0      ; UNKNOWN
5189             0x0EA1,   // 0EA1..0EA3; LAO
5190             0x0EA4,   // 0EA4      ; UNKNOWN
5191             0x0EA5,   // 0EA5      ; LAO
5192             0x0EA6,   // 0EA6      ; UNKNOWN
5193             0x0EA7,   // 0EA7      ; LAO
5194             0x0EA8,   // 0EA8..0EA9; UNKNOWN
5195             0x0EAA,   // 0EAA..0EAB; LAO
5196             0x0EAC,   // 0EAC      ; UNKNOWN
5197             0x0EAD,   // 0EAD..0EB9; LAO
5198             0x0EBA,   // 0EBA      ; UNKNOWN
5199             0x0EBB,   // 0EBB..0EBD; LAO
5200             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5201             0x0EC0,   // 0EC0..0EC4; LAO
5202             0x0EC5,   // 0EC5      ; UNKNOWN
5203             0x0EC6,   // 0EC6      ; LAO
5204             0x0EC7,   // 0EC7      ; UNKNOWN
5205             0x0EC8,   // 0EC8..0ECD; LAO
5206             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5207             0x0ED0,   // 0ED0..0ED9; LAO
5208             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5209             0x0EDC,   // 0EDC..0EDF; LAO
5210             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5211             0x0F00,   // 0F00..0F47; TIBETAN
5212             0x0F48,   // 0F48      ; UNKNOWN
5213             0x0F49,   // 0F49..0F6C; TIBETAN
5214             0x0F6D,   // 0F6D..0F70; UNKNOWN
5215             0x0F71,   // 0F71..0F97; TIBETAN
5216             0x0F98,   // 0F98      ; UNKNOWN
5217             0x0F99,   // 0F99..0FBC; TIBETAN
5218             0x0FBD,   // 0FBD      ; UNKNOWN
5219             0x0FBE,   // 0FBE..0FCC; TIBETAN
5220             0x0FCD,   // 0FCD      ; UNKNOWN
5221             0x0FCE,   // 0FCE..0FD4; TIBETAN
5222             0x0FD5,   // 0FD5..0FD8; COMMON
5223             0x0FD9,   // 0FD9..0FDA; TIBETAN
5224             0x0FDB,   // 0FDB..FFF; UNKNOWN
5225             0x1000,   // 1000..109F; MYANMAR
5226             0x10A0,   // 10A0..10C5; GEORGIAN
5227             0x10C6,   // 10C6      ; UNKNOWN
5228             0x10C7,   // 10C7      ; GEORGIAN
5229             0x10C8,   // 10C8..10CC; UNKNOWN
5230             0x10CD,   // 10CD      ; GEORGIAN
5231             0x10CE,   // 10CE..10CF; UNKNOWN
5232             0x10D0,   // 10D0..10FA; GEORGIAN
5233             0x10FB,   // 10FB      ; COMMON
5234             0x10FC,   // 10FC..10FF; GEORGIAN
5235             0x1100,   // 1100..11FF; HANGUL
5236             0x1200,   // 1200..1248; ETHIOPIC
5237             0x1249,   // 1249      ; UNKNOWN
5238             0x124A,   // 124A..124D; ETHIOPIC
5239             0x124E,   // 124E..124F; UNKNOWN
5240             0x1250,   // 1250..1256; ETHIOPIC
5241             0x1257,   // 1257      ; UNKNOWN
5242             0x1258,   // 1258      ; ETHIOPIC
5243             0x1259,   // 1259      ; UNKNOWN
5244             0x125A,   // 125A..125D; ETHIOPIC
5245             0x125E,   // 125E..125F; UNKNOWN
5246             0x1260,   // 1260..1288; ETHIOPIC
5247             0x1289,   // 1289      ; UNKNOWN
5248             0x128A,   // 128A..128D; ETHIOPIC
5249             0x128E,   // 128E..128F; UNKNOWN
5250             0x1290,   // 1290..12B0; ETHIOPIC
5251             0x12B1,   // 12B1      ; UNKNOWN
5252             0x12B2,   // 12B2..12B5; ETHIOPIC
5253             0x12B6,   // 12B6..12B7; UNKNOWN
5254             0x12B8,   // 12B8..12BE; ETHIOPIC
5255             0x12BF,   // 12BF      ; UNKNOWN
5256             0x12C0,   // 12C0      ; ETHIOPIC
5257             0x12C1,   // 12C1      ; UNKNOWN
5258             0x12C2,   // 12C2..12C5; ETHIOPIC
5259             0x12C6,   // 12C6..12C7; UNKNOWN
5260             0x12C8,   // 12C8..12D6; ETHIOPIC
5261             0x12D7,   // 12D7      ; UNKNOWN
5262             0x12D8,   // 12D8..1310; ETHIOPIC
5263             0x1311,   // 1311      ; UNKNOWN
5264             0x1312,   // 1312..1315; ETHIOPIC
5265             0x1316,   // 1316..1317; UNKNOWN
5266             0x1318,   // 1318..135A; ETHIOPIC
5267             0x135B,   // 135B..135C; UNKNOWN
5268             0x135D,   // 135D..137C; ETHIOPIC
5269             0x137D,   // 137D..137F; UNKNOWN
5270             0x1380,   // 1380..1399; ETHIOPIC
5271             0x139A,   // 139A..139F; UNKNOWN
5272             0x13A0,   // 13A0..13F5; CHEROKEE
5273             0x13F6,   // 13F6..13F7; UNKNOWN
5274             0x13F8,   // 13F8..13FD; CHEROKEE
5275             0x13FE,   // 13FE..13FF; UNKNOWN
5276             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5277             0x1680,   // 1680..169C; OGHAM
5278             0x169D,   // 169D..169F; UNKNOWN
5279             0x16A0,   // 16A0..16EA; RUNIC
5280             0x16EB,   // 16EB..16ED; COMMON
5281             0x16EE,   // 16EE..16F8; RUNIC
5282             0x16F9,   // 16F9..16FF; UNKNOWN
5283             0x1700,   // 1700..170C; TAGALOG
5284             0x170D,   // 170D      ; UNKNOWN
5285             0x170E,   // 170E..1714; TAGALOG
5286             0x1715,   // 1715..171F; UNKNOWN
5287             0x1720,   // 1720..1734; HANUNOO
5288             0x1735,   // 1735..1736; COMMON
5289             0x1737,   // 1737..173F; UNKNOWN
5290             0x1740,   // 1740..1753; BUHID
5291             0x1754,   // 1754..175F; UNKNOWN
5292             0x1760,   // 1760..176C; TAGBANWA
5293             0x176D,   // 176D      ; UNKNOWN
5294             0x176E,   // 176E..1770; TAGBANWA
5295             0x1771,   // 1771      ; UNKNOWN
5296             0x1772,   // 1772..1773; TAGBANWA
5297             0x1774,   // 1774..177F; UNKNOWN
5298             0x1780,   // 1780..17DD; KHMER
5299             0x17DE,   // 17DE..17DF; UNKNOWN
5300             0x17E0,   // 17E0..17E9; KHMER
5301             0x17EA,   // 17EA..17EF; UNKNOWN
5302             0x17F0,   // 17F0..17F9; KHMER
5303             0x17FA,   // 17FA..17FF; UNKNOWN
5304             0x1800,   // 1800..1801; MONGOLIAN
5305             0x1802,   // 1802..1803; COMMON
5306             0x1804,   // 1804      ; MONGOLIAN
5307             0x1805,   // 1805      ; COMMON
5308             0x1806,   // 1806..180E; MONGOLIAN
5309             0x180F,   // 180F      ; UNKNOWN
5310             0x1810,   // 1810..1819; MONGOLIAN
5311             0x181A,   // 181A..181F; UNKNOWN
5312             0x1820,   // 1820..1878; MONGOLIAN
5313             0x1879,   // 1879..187F; UNKNOWN
5314             0x1880,   // 1880..18AA; MONGOLIAN
5315             0x18AB,   // 18AB..18AF; UNKNOWN
5316             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5317             0x18F6,   // 18F6..18FF; UNKNOWN
5318             0x1900,   // 1900..191E; LIMBU
5319             0x191F,   // 191F      ; UNKNOWN
5320             0x1920,   // 1920..192B; LIMBU
5321             0x192C,   // 192C..192F; UNKNOWN
5322             0x1930,   // 1930..193B; LIMBU
5323             0x193C,   // 193C..193F; UNKNOWN
5324             0x1940,   // 1940      ; LIMBU
5325             0x1941,   // 1941..1943; UNKNOWN
5326             0x1944,   // 1944..194F; LIMBU
5327             0x1950,   // 1950..196D; TAI_LE
5328             0x196E,   // 196E..196F; UNKNOWN
5329             0x1970,   // 1970..1974; TAI_LE
5330             0x1975,   // 1975..197F; UNKNOWN
5331             0x1980,   // 1980..19AB; NEW_TAI_LUE
5332             0x19AC,   // 19AC..19AF; UNKNOWN
5333             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5334             0x19CA,   // 19CA..19CF; UNKNOWN
5335             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5336             0x19DB,   // 19DB..19DD; UNKNOWN
5337             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5338             0x19E0,   // 19E0..19FF; KHMER
5339             0x1A00,   // 1A00..1A1B; BUGINESE
5340             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5341             0x1A1E,   // 1A1E..1A1F; BUGINESE
5342             0x1A20,   // 1A20..1A5E; TAI_THAM
5343             0x1A5F,   // 1A5F      ; UNKNOWN
5344             0x1A60,   // 1A60..1A7C; TAI_THAM
5345             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5346             0x1A7F,   // 1A7F..1A89; TAI_THAM
5347             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5348             0x1A90,   // 1A90..1A99; TAI_THAM
5349             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5350             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5351             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5352             0x1AB0,   // 1AB0..1ABE; INHERITED
5353             0x1ABF,   // 1ABF..1AFF; UNKNOWN
5354             0x1B00,   // 1B00..1B4B; BALINESE
5355             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5356             0x1B50,   // 1B50..1B7C; BALINESE
5357             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5358             0x1B80,   // 1B80..1BBF; SUNDANESE
5359             0x1BC0,   // 1BC0..1BF3; BATAK
5360             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5361             0x1BFC,   // 1BFC..1BFF; BATAK
5362             0x1C00,   // 1C00..1C37; LEPCHA
5363             0x1C38,   // 1C38..1C3A; UNKNOWN
5364             0x1C3B,   // 1C3B..1C49; LEPCHA
5365             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5366             0x1C4D,   // 1C4D..1C4F; LEPCHA
5367             0x1C50,   // 1C50..1C7F; OL_CHIKI
5368             0x1C80,   // 1C80..1C88; CYRILLIC
5369             0x1C89,   // 1C89      ; UNKNOWN
5370             0x1C90,   // 1C90..1CBA; GEORGIAN
5371             0x1CBB,   // 1CBB..1CBC; UNKNOWN
5372             0x1CBD,   // 1CBD..1CBF; GEORGIAN    
5373             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5374             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5375             0x1CD0,   // 1CD0..1CD2; INHERITED
5376             0x1CD3,   // 1CD3      ; COMMON
5377             0x1CD4,   // 1CD4..1CE0; INHERITED
5378             0x1CE1,   // 1CE1      ; COMMON
5379             0x1CE2,   // 1CE2..1CE8; INHERITED
5380             0x1CE9,   // 1CE9..1CEC; COMMON
5381             0x1CED,   // 1CED      ; INHERITED
5382             0x1CEE,   // 1CEE..1CF3; COMMON
5383             0x1CF4,   // 1CF4      ; INHERITED
5384             0x1CF5,   // 1CF5..1CF7; COMMON
5385             0x1CF8,   // 1CF8..1CF9; INHERITED
5386             0x1CFA,   // 1CFA..1CFF; UNKNOWN
5387             0x1D00,   // 1D00..1D25; LATIN
5388             0x1D26,   // 1D26..1D2A; GREEK
5389             0x1D2B,   // 1D2B      ; CYRILLIC
5390             0x1D2C,   // 1D2C..1D5C; LATIN
5391             0x1D5D,   // 1D5D..1D61; GREEK
5392             0x1D62,   // 1D62..1D65; LATIN
5393             0x1D66,   // 1D66..1D6A; GREEK
5394             0x1D6B,   // 1D6B..1D77; LATIN
5395             0x1D78,   // 1D78      ; CYRILLIC
5396             0x1D79,   // 1D79..1DBE; LATIN
5397             0x1DBF,   // 1DBF      ; GREEK
5398             0x1DC0,   // 1DC0..1DF9; INHERITED
5399             0x1DFA,   // 1DFA      ; UNKNOWN
5400             0x1DFB,   // 1DFB..1DFF; INHERITED
5401             0x1E00,   // 1E00..1EFF; LATIN
5402             0x1F00,   // 1F00..1F15; GREEK
5403             0x1F16,   // 1F16..1F17; UNKNOWN
5404             0x1F18,   // 1F18..1F1D; GREEK
5405             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5406             0x1F20,   // 1F20..1F45; GREEK
5407             0x1F46,   // 1F46..1F47; UNKNOWN
5408             0x1F48,   // 1F48..1F4D; GREEK
5409             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5410             0x1F50,   // 1F50..1F57; GREEK
5411             0x1F58,   // 1F58      ; UNKNOWN
5412             0x1F59,   // 1F59      ; GREEK
5413             0x1F5A,   // 1F5A      ; UNKNOWN
5414             0x1F5B,   // 1F5B      ; GREEK
5415             0x1F5C,   // 1F5C      ; UNKNOWN
5416             0x1F5D,   // 1F5D      ; GREEK
5417             0x1F5E,   // 1F5E      ; UNKNOWN
5418             0x1F5F,   // 1F5F..1F7D; GREEK
5419             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5420             0x1F80,   // 1F80..1FB4; GREEK
5421             0x1FB5,   // 1FB5      ; UNKNOWN
5422             0x1FB6,   // 1FB6..1FC4; GREEK
5423             0x1FC5,   // 1FC5      ; UNKNOWN
5424             0x1FC6,   // 1FC6..1FD3; GREEK
5425             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5426             0x1FD6,   // 1FD6..1FDB; GREEK
5427             0x1FDC,   // 1FDC      ; UNKNOWN
5428             0x1FDD,   // 1FDD..1FEF; GREEK
5429             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5430             0x1FF2,   // 1FF2..1FF4; GREEK
5431             0x1FF5,   // 1FF5      ; UNKNOWN
5432             0x1FF6,   // 1FF6..1FFE; GREEK
5433             0x1FFF,   // 1FFF      ; UNKNOWN
5434             0x2000,   // 2000..200B; COMMON
5435             0x200C,   // 200C..200D; INHERITED
5436             0x200E,   // 200E..2064; COMMON
5437             0x2065,   // 2065      ; UNKNOWN
5438             0x2066,   // 2066..2070; COMMON
5439             0x2071,   // 2071      ; LATIN
5440             0x2072,   // 2072..2073; UNKNOWN
5441             0x2074,   // 2074..207E; COMMON
5442             0x207F,   // 207F      ; LATIN
5443             0x2080,   // 2080..208E; COMMON
5444             0x208F,   // 208F      ; UNKNOWN
5445             0x2090,   // 2090..209C; LATIN
5446             0x209D,   // 209D..209F; UNKNOWN
5447             0x20A0,   // 20A0..20BF; COMMON
5448             0x20C0,   // 20C0..20CF; UNKNOWN
5449             0x20D0,   // 20D0..20F0; INHERITED
5450             0x20F1,   // 20F1..20FF; UNKNOWN
5451             0x2100,   // 2100..2125; COMMON
5452             0x2126,   // 2126      ; GREEK
5453             0x2127,   // 2127..2129; COMMON
5454             0x212A,   // 212A..212B; LATIN
5455             0x212C,   // 212C..2131; COMMON
5456             0x2132,   // 2132      ; LATIN
5457             0x2133,   // 2133..214D; COMMON
5458             0x214E,   // 214E      ; LATIN
5459             0x214F,   // 214F..215F; COMMON
5460             0x2160,   // 2160..2188; LATIN
5461             0x2189,   // 2189..218B; COMMON
5462             0x218C,   // 218C..218F; UNKNOWN
5463             0x2190,   // 2190..2426; COMMON
5464             0x2427,   // 2427..243F; UNKNOWN
5465             0x2440,   // 2440..244A; COMMON
5466             0x244B,   // 244B..245F; UNKNOWN
5467             0x2460,   // 2460..27FF; COMMON
5468             0x2800,   // 2800..28FF; BRAILLE
5469             0x2900,   // 2900..2B73; COMMON
5470             0x2B74,   // 2B74..2B75; UNKNOWN
5471             0x2B76,   // 2B76..2B95; COMMON
5472             0x2B96,   // 2B96..2B97; UNKNOWN
5473             0x2B98,   // 2B98..2BC8; COMMON
5474             0x2BC9,   // 2BC9      ; UNKNOWN
5475             0x2BCA,   // 2BCA..2BFE; COMMON
5476             0x2BFF,   // 2BFF;       UNKNOWN
5477             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5478             0x2C2F,   // 2C2F      ; UNKNOWN
5479             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5480             0x2C5F,   // 2C5F      ; UNKNOWN
5481             0x2C60,   // 2C60..2C7F; LATIN
5482             0x2C80,   // 2C80..2CF3; COPTIC
5483             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5484             0x2CF9,   // 2CF9..2CFF; COPTIC
5485             0x2D00,   // 2D00..2D25; GEORGIAN
5486             0x2D26,   // 2D26      ; UNKNOWN
5487             0x2D27,   // 2D27      ; GEORGIAN
5488             0x2D28,   // 2D28..2D2C; UNKNOWN
5489             0x2D2D,   // 2D2D      ; GEORGIAN
5490             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5491             0x2D30,   // 2D30..2D67; TIFINAGH
5492             0x2D68,   // 2D68..2D6E; UNKNOWN
5493             0x2D6F,   // 2D6F..2D70; TIFINAGH
5494             0x2D71,   // 2D71..2D7E; UNKNOWN
5495             0x2D7F,   // 2D7F      ; TIFINAGH
5496             0x2D80,   // 2D80..2D96; ETHIOPIC
5497             0x2D97,   // 2D97..2D9F; UNKNOWN
5498             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5499             0x2DA7,   // 2DA7      ; UNKNOWN
5500             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5501             0x2DAF,   // 2DAF      ; UNKNOWN
5502             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5503             0x2DB7,   // 2DB7      ; UNKNOWN
5504             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5505             0x2DBF,   // 2DBF      ; UNKNOWN
5506             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5507             0x2DC7,   // 2DC7      ; UNKNOWN
5508             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5509             0x2DCF,   // 2DCF      ; UNKNOWN
5510             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5511             0x2DD7,   // 2DD7      ; UNKNOWN
5512             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5513             0x2DDF,   // 2DDF      ; UNKNOWN
5514             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5515             0x2E00,   // 2E00..2E4E; COMMON
5516             0x2E4F,   // 2E4F..2E7F; UNKNOWN
5517             0x2E80,   // 2E80..2E99; HAN
5518             0x2E9A,   // 2E9A      ; UNKNOWN
5519             0x2E9B,   // 2E9B..2EF3; HAN
5520             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5521             0x2F00,   // 2F00..2FD5; HAN
5522             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5523             0x2FF0,   // 2FF0..2FFB; COMMON
5524             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5525             0x3000,   // 3000..3004; COMMON
5526             0x3005,   // 3005      ; HAN
5527             0x3006,   // 3006      ; COMMON
5528             0x3007,   // 3007      ; HAN
5529             0x3008,   // 3008..3020; COMMON
5530             0x3021,   // 3021..3029; HAN
5531             0x302A,   // 302A..302D; INHERITED
5532             0x302E,   // 302E..302F; HANGUL
5533             0x3030,   // 3030..3037; COMMON
5534             0x3038,   // 3038..303B; HAN
5535             0x303C,   // 303C..303F; COMMON
5536             0x3040,   // 3040      ; UNKNOWN
5537             0x3041,   // 3041..3096; HIRAGANA
5538             0x3097,   // 3097..3098; UNKNOWN
5539             0x3099,   // 3099..309A; INHERITED
5540             0x309B,   // 309B..309C; COMMON
5541             0x309D,   // 309D..309F; HIRAGANA
5542             0x30A0,   // 30A0      ; COMMON
5543             0x30A1,   // 30A1..30FA; KATAKANA
5544             0x30FB,   // 30FB..30FC; COMMON
5545             0x30FD,   // 30FD..30FF; KATAKANA
5546             0x3100,   // 3100..3104; UNKNOWN
5547             0x3105,   // 3105..312F; BOPOMOFO
5548             0x3130,   // 3130;       UNKNOWN
5549             0x3131,   // 3131..318E; HANGUL
5550             0x318F,   // 318F      ; UNKNOWN
5551             0x3190,   // 3190..319F; COMMON
5552             0x31A0,   // 31A0..31BA; BOPOMOFO
5553             0x31BB,   // 31BB..31BF; UNKNOWN
5554             0x31C0,   // 31C0..31E3; COMMON
5555             0x31E4,   // 31E4..31EF; UNKNOWN
5556             0x31F0,   // 31F0..31FF; KATAKANA
5557             0x3200,   // 3200..321E; HANGUL
5558             0x321F,   // 321F      ; UNKNOWN
5559             0x3220,   // 3220..325F; COMMON
5560             0x3260,   // 3260..327E; HANGUL
5561             0x327F,   // 327F..32CF; COMMON
5562             0x32D0,   // 32D0..32FE; KATAKANA
5563             0x32FF,   // 32FF      ; COMMON
5564             0x3300,   // 3300..3357; KATAKANA
5565             0x3358,   // 3358..33FF; COMMON
5566             0x3400,   // 3400..4DB5; HAN
5567             0x4DB6,   // 4DB6..4DBF; UNKNOWN
5568             0x4DC0,   // 4DC0..4DFF; COMMON
5569             0x4E00,   // 4E00..9FEF; HAN
5570             0x9FF0,   // 9FF0..9FFF; UNKNOWN
5571             0xA000,   // A000..A48C; YI
5572             0xA48D,   // A48D..A48F; UNKNOWN
5573             0xA490,   // A490..A4C6; YI
5574             0xA4C7,   // A4C7..A4CF; UNKNOWN
5575             0xA4D0,   // A4D0..A4FF; LISU
5576             0xA500,   // A500..A62B; VAI
5577             0xA62C,   // A62C..A63F; UNKNOWN
5578             0xA640,   // A640..A69F; CYRILLIC
5579             0xA6A0,   // A6A0..A6F7; BAMUM
5580             0xA6F8,   // A6F8..A6FF; UNKNOWN
5581             0xA700,   // A700..A721; COMMON
5582             0xA722,   // A722..A787; LATIN
5583             0xA788,   // A788..A78A; COMMON
5584             0xA78B,   // A78B..A7B9; LATIN
5585             0xA7C0,   // A7C0..A7F6; UNKNOWN
5586             0xA7F7,   // A7F7..A7FF; LATIN
5587             0xA800,   // A800..A82B; SYLOTI_NAGRI
5588             0xA82C,   // A82C..A82F; UNKNOWN
5589             0xA830,   // A830..A839; COMMON
5590             0xA83A,   // A83A..A83F; UNKNOWN
5591             0xA840,   // A840..A877; PHAGS_PA
5592             0xA878,   // A878..A87F; UNKNOWN
5593             0xA880,   // A880..A8C5; SAURASHTRA
5594             0xA8C6,   // A8C6..A8CD; UNKNOWN
5595             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5596             0xA8DA,   // A8DA..A8DF; UNKNOWN
5597             0xA8E0,   // A8E0..A8FF; DEVANAGARI
5598             0xA900,   // A900..A92D; KAYAH_LI
5599             0xA92E,   // A92E      ; COMMON
5600             0xA92F,   // A92F      ; KAYAH_LI
5601             0xA930,   // A930..A953; REJANG
5602             0xA954,   // A954..A95E; UNKNOWN
5603             0xA95F,   // A95F      ; REJANG
5604             0xA960,   // A960..A97C; HANGUL
5605             0xA97D,   // A97D..A97F; UNKNOWN
5606             0xA980,   // A980..A9CD; JAVANESE
5607             0xA9CE,   // A9CE      ; UNKNOWN
5608             0xA9CF,   // A9CF      ; COMMON
5609             0xA9D0,   // A9D0..A9D9; JAVANESE
5610             0xA9DA,   // A9DA..A9DD; UNKNOWN
5611             0xA9DE,   // A9DE..A9DF; JAVANESE
5612             0xA9E0,   // A9E0..A9FE; MYANMAR
5613             0xA9FF,   // A9FF      ; UNKNOWN
5614             0xAA00,   // AA00..AA36; CHAM
5615             0xAA37,   // AA37..AA3F; UNKNOWN
5616             0xAA40,   // AA40..AA4D; CHAM
5617             0xAA4E,   // AA4E..AA4F; UNKNOWN
5618             0xAA50,   // AA50..AA59; CHAM
5619             0xAA5A,   // AA5A..AA5B; UNKNOWN
5620             0xAA5C,   // AA5C..AA5F; CHAM
5621             0xAA60,   // AA60..AA7F; MYANMAR
5622             0xAA80,   // AA80..AAC2; TAI_VIET
5623             0xAAC3,   // AAC3..AADA; UNKNOWN
5624             0xAADB,   // AADB..AADF; TAI_VIET
5625             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5626             0xAAF7,   // AAF7..AB00; UNKNOWN
5627             0xAB01,   // AB01..AB06; ETHIOPIC
5628             0xAB07,   // AB07..AB08; UNKNOWN
5629             0xAB09,   // AB09..AB0E; ETHIOPIC
5630             0xAB0F,   // AB0F..AB10; UNKNOWN
5631             0xAB11,   // AB11..AB16; ETHIOPIC
5632             0xAB17,   // AB17..AB1F; UNKNOWN
5633             0xAB20,   // AB20..AB26; ETHIOPIC
5634             0xAB27,   // AB27      ; UNKNOWN
5635             0xAB28,   // AB28..AB2E; ETHIOPIC
5636             0xAB2F,   // AB2F      ; UNKNOWN
5637             0xAB30,   // AB30..AB5A; LATIN
5638             0xAB5B,   // AB5B      ; COMMON
5639             0xAB5C,   // AB5C..AB64; LATIN
5640             0xAB65,   // AB65      ; GREEK
5641             0xAB66,   // AB66..AB6F; UNKNOWN
5642             0xAB70,   // AB70..ABBF; CHEROKEE
5643             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5644             0xABEE,   // ABEE..ABEF; UNKNOWN
5645             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5646             0xABFA,   // ABFA..ABFF; UNKNOWN
5647             0xAC00,   // AC00..D7A3; HANGUL
5648             0xD7A4,   // D7A4..D7AF; UNKNOWN
5649             0xD7B0,   // D7B0..D7C6; HANGUL
5650             0xD7C7,   // D7C7..D7CA; UNKNOWN
5651             0xD7CB,   // D7CB..D7FB; HANGUL
5652             0xD7FC,   // D7FC..F8FF; UNKNOWN
5653             0xF900,   // F900..FA6D; HAN
5654             0xFA6E,   // FA6E..FA6F; UNKNOWN
5655             0xFA70,   // FA70..FAD9; HAN
5656             0xFADA,   // FADA..FAFF; UNKNOWN
5657             0xFB00,   // FB00..FB06; LATIN
5658             0xFB07,   // FB07..FB12; UNKNOWN
5659             0xFB13,   // FB13..FB17; ARMENIAN
5660             0xFB18,   // FB18..FB1C; UNKNOWN
5661             0xFB1D,   // FB1D..FB36; HEBREW
5662             0xFB37,   // FB37      ; UNKNOWN
5663             0xFB38,   // FB38..FB3C; HEBREW
5664             0xFB3D,   // FB3D      ; UNKNOWN
5665             0xFB3E,   // FB3E      ; HEBREW
5666             0xFB3F,   // FB3F      ; UNKNOWN
5667             0xFB40,   // FB40..FB41; HEBREW
5668             0xFB42,   // FB42      ; UNKNOWN
5669             0xFB43,   // FB43..FB44; HEBREW
5670             0xFB45,   // FB45      ; UNKNOWN
5671             0xFB46,   // FB46..FB4F; HEBREW
5672             0xFB50,   // FB50..FBC1; ARABIC
5673             0xFBC2,   // FBC2..FBD2; UNKNOWN
5674             0xFBD3,   // FBD3..FD3D; ARABIC
5675             0xFD3E,   // FD3E..FD3F; COMMON
5676             0xFD40,   // FD40..FD4F; UNKNOWN
5677             0xFD50,   // FD50..FD8F; ARABIC
5678             0xFD90,   // FD90..FD91; UNKNOWN
5679             0xFD92,   // FD92..FDC7; ARABIC
5680             0xFDC8,   // FDC8..FDEF; UNKNOWN
5681             0xFDF0,   // FDF0..FDFD; ARABIC
5682             0xFDFE,   // FDFE..FDFF; UNKNOWN
5683             0xFE00,   // FE00..FE0F; INHERITED
5684             0xFE10,   // FE10..FE19; COMMON
5685             0xFE1A,   // FE1A..FE1F; UNKNOWN
5686             0xFE20,   // FE20..FE2D; INHERITED
5687             0xFE2E,   // FE2E..FE2F; CYRILLIC
5688             0xFE30,   // FE30..FE52; COMMON
5689             0xFE53,   // FE53      ; UNKNOWN
5690             0xFE54,   // FE54..FE66; COMMON
5691             0xFE67,   // FE67      ; UNKNOWN
5692             0xFE68,   // FE68..FE6B; COMMON
5693             0xFE6C,   // FE6C..FE6F; UNKNOWN
5694             0xFE70,   // FE70..FE74; ARABIC
5695             0xFE75,   // FE75      ; UNKNOWN
5696             0xFE76,   // FE76..FEFC; ARABIC
5697             0xFEFD,   // FEFD..FEFE; UNKNOWN
5698             0xFEFF,   // FEFF      ; COMMON
5699             0xFF00,   // FF00      ; UNKNOWN
5700             0xFF01,   // FF01..FF20; COMMON
5701             0xFF21,   // FF21..FF3A; LATIN
5702             0xFF3B,   // FF3B..FF40; COMMON
5703             0xFF41,   // FF41..FF5A; LATIN
5704             0xFF5B,   // FF5B..FF65; COMMON
5705             0xFF66,   // FF66..FF6F; KATAKANA
5706             0xFF70,   // FF70      ; COMMON
5707             0xFF71,   // FF71..FF9D; KATAKANA
5708             0xFF9E,   // FF9E..FF9F; COMMON
5709             0xFFA0,   // FFA0..FFBE; HANGUL
5710             0xFFBF,   // FFBF..FFC1; UNKNOWN
5711             0xFFC2,   // FFC2..FFC7; HANGUL
5712             0xFFC8,   // FFC8..FFC9; UNKNOWN
5713             0xFFCA,   // FFCA..FFCF; HANGUL
5714             0xFFD0,   // FFD0..FFD1; UNKNOWN
5715             0xFFD2,   // FFD2..FFD7; HANGUL
5716             0xFFD8,   // FFD8..FFD9; UNKNOWN
5717             0xFFDA,   // FFDA..FFDC; HANGUL
5718             0xFFDD,   // FFDD..FFDF; UNKNOWN
5719             0xFFE0,   // FFE0..FFE6; COMMON
5720             0xFFE7,   // FFE7      ; UNKNOWN
5721             0xFFE8,   // FFE8..FFEE; COMMON
5722             0xFFEF,   // FFEF..FFF8; UNKNOWN
5723             0xFFF9,   // FFF9..FFFD; COMMON
5724             0xFFFE,   // FFFE..FFFF; UNKNOWN
5725             0x10000,  // 10000..1000B; LINEAR_B
5726             0x1000C,  // 1000C       ; UNKNOWN
5727             0x1000D,  // 1000D..10026; LINEAR_B
5728             0x10027,  // 10027       ; UNKNOWN
5729             0x10028,  // 10028..1003A; LINEAR_B
5730             0x1003B,  // 1003B       ; UNKNOWN
5731             0x1003C,  // 1003C..1003D; LINEAR_B
5732             0x1003E,  // 1003E       ; UNKNOWN
5733             0x1003F,  // 1003F..1004D; LINEAR_B
5734             0x1004E,  // 1004E..1004F; UNKNOWN
5735             0x10050,  // 10050..1005D; LINEAR_B
5736             0x1005E,  // 1005E..1007F; UNKNOWN
5737             0x10080,  // 10080..100FA; LINEAR_B
5738             0x100FB,  // 100FB..100FF; UNKNOWN
5739             0x10100,  // 10100..10102; COMMON
5740             0x10103,  // 10103..10106; UNKNOWN
5741             0x10107,  // 10107..10133; COMMON
5742             0x10134,  // 10134..10136; UNKNOWN
5743             0x10137,  // 10137..1013F; COMMON
5744             0x10140,  // 10140..1018E; GREEK
5745             0x1018F,  // 1018F       ; UNKNOWN
5746             0x10190,  // 10190..1019B; COMMON
5747             0x1019C,  // 1019C..1019F; UNKNOWN
5748             0x101A0,  // 101A0       ; GREEK
5749             0x101A1,  // 101A1..101CF; UNKNOWN
5750             0x101D0,  // 101D0..101FC; COMMON
5751             0x101FD,  // 101FD       ; INHERITED
5752             0x101FE,  // 101FE..1027F; UNKNOWN
5753             0x10280,  // 10280..1029C; LYCIAN
5754             0x1029D,  // 1029D..1029F; UNKNOWN
5755             0x102A0,  // 102A0..102D0; CARIAN
5756             0x102D1,  // 102D1..102DF; UNKNOWN
5757             0x102E0,  // 102E0       ; INHERITED
5758             0x102E1,  // 102E1..102FB; COMMON
5759             0x102FC,  // 102FC..102FF; UNKNOWN
5760             0x10300,  // 10300..10323; OLD_ITALIC
5761             0x10324,  // 10324..1032C; UNKNOWN
5762             0x1032D,  // 1032D..1032F; OLD_ITALIC
5763             0x10330,  // 10330..1034A; GOTHIC
5764             0x1034B,  // 1034B..1034F; UNKNOWN
5765             0x10350,  // 10350..1037A; OLD_PERMIC
5766             0x1037B,  // 1037B..1037F; UNKNOWN
5767             0x10380,  // 10380..1039D; UGARITIC
5768             0x1039E,  // 1039E       ; UNKNOWN
5769             0x1039F,  // 1039F       ; UGARITIC
5770             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5771             0x103C4,  // 103C4..103C7; UNKNOWN
5772             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5773             0x103D6,  // 103D6..103FF; UNKNOWN
5774             0x10400,  // 10400..1044F; DESERET
5775             0x10450,  // 10450..1047F; SHAVIAN
5776             0x10480,  // 10480..1049D; OSMANYA
5777             0x1049E,  // 1049E..1049F; UNKNOWN
5778             0x104A0,  // 104A0..104A9; OSMANYA
5779             0x104AA,  // 104AA..104AF; UNKNOWN
5780             0x104B0,  // 104B0..104D3; OSAGE
5781             0x104D4,  // 104D4..104D7; UNKNOWN
5782             0x104D8,  // 104D8..104FB; OSAGE
5783             0x104FC,  // 104FC..104FF; UNKNOWN
5784             0x10500,  // 10500..10527; ELBASAN
5785             0x10528,  // 10528..1052F; UNKNOWN
5786             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5787             0x10564,  // 10564..1056E; UNKNOWN
5788             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5789             0x10570,  // 10570..105FF; UNKNOWN
5790             0x10600,  // 10600..10736; LINEAR_A
5791             0x10737,  // 10737..1073F; UNKNOWN
5792             0x10740,  // 10740..10755; LINEAR_A
5793             0x10756,  // 10756..1075F; UNKNOWN
5794             0x10760,  // 10760..10767; LINEAR_A
5795             0x10768,  // 10768..107FF; UNKNOWN
5796             0x10800,  // 10800..10805; CYPRIOT
5797             0x10806,  // 10806..10807; UNKNOWN
5798             0x10808,  // 10808       ; CYPRIOT
5799             0x10809,  // 10809       ; UNKNOWN
5800             0x1080A,  // 1080A..10835; CYPRIOT
5801             0x10836,  // 10836       ; UNKNOWN
5802             0x10837,  // 10837..10838; CYPRIOT
5803             0x10839,  // 10839..1083B; UNKNOWN
5804             0x1083C,  // 1083C       ; CYPRIOT
5805             0x1083D,  // 1083D..1083E; UNKNOWN
5806             0x1083F,  // 1083F       ; CYPRIOT
5807             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5808             0x10856,  // 10856       ; UNKNOWN
5809             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5810             0x10860,  // 10860..1087F; PALMYRENE
5811             0x10880,  // 10880..1089E; NABATAEAN
5812             0x1089F,  // 1089F..108A6; UNKNOWN
5813             0x108A7,  // 108A7..108AF; NABATAEAN
5814             0x108B0,  // 108B0..108DF; UNKNOWN
5815             0x108E0,  // 108E0..108F2; HATRAN
5816             0x108F3,  // 108F3       ; UNKNOWN
5817             0x108F4,  // 108F4..108F5; HATRAN
5818             0x108F6,  // 108F6..108FA; UNKNOWN
5819             0x108FB,  // 108FB..108FF; HATRAN
5820             0x10900,  // 10900..1091B; PHOENICIAN
5821             0x1091C,  // 1091C..1091E; UNKNOWN
5822             0x1091F,  // 1091F       ; PHOENICIAN
5823             0x10920,  // 10920..10939; LYDIAN
5824             0x1093A,  // 1093A..1093E; UNKNOWN
5825             0x1093F,  // 1093F       ; LYDIAN
5826             0x10940,  // 10940..1097F; UNKNOWN
5827             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5828             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5829             0x109B8,  // 109B8..109BB; UNKNOWN
5830             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
5831             0x109D0,  // 109D0..109D1; UNKNOWN
5832             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
5833             0x10A00,  // 10A00..10A03; KHAROSHTHI
5834             0x10A04,  // 10A04       ; UNKNOWN
5835             0x10A05,  // 10A05..10A06; KHAROSHTHI
5836             0x10A07,  // 10A07..10A0B; UNKNOWN
5837             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5838             0x10A14,  // 10A14       ; UNKNOWN
5839             0x10A15,  // 10A15..10A17; KHAROSHTHI
5840             0x10A18,  // 10A18       ; UNKNOWN
5841             0x10A19,  // 10A19..10A35; KHAROSHTHI
5842             0x10A36,  // 10A36..10A37; UNKNOWN
5843             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5844             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5845             0x10A3F,  // 10A3F..10A48; KHAROSHTHI
5846             0x10A49,  // 10A49..10A4F; UNKNOWN
5847             0x10A50,  // 10A50..10A58; KHAROSHTHI
5848             0x10A59,  // 10A59..10A5F; UNKNOWN
5849             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5850             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5851             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5852             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5853             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5854             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5855             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5856             0x10B00,  // 10B00..10B35; AVESTAN
5857             0x10B36,  // 10B36..10B38; UNKNOWN
5858             0x10B39,  // 10B39..10B3F; AVESTAN
5859             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5860             0x10B56,  // 10B56..10B57; UNKNOWN
5861             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5862             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5863             0x10B73,  // 10B73..10B77; UNKNOWN
5864             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5865             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5866             0x10B92,  // 10B92..10B98; UNKNOWN
5867             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5868             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5869             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5870             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5871             0x10C00,  // 10C00..10C48; OLD_TURKIC
5872             0x10C49,  // 10C49..10C7F; UNKNOWN
5873             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
5874             0x10CB3,  // 10CB3..10CBF; UNKNOWN
5875             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
5876             0x10CF3,  // 10CF3..10CF9; UNKNOWN
5877             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
5878             0x10D00,  // 10D00..10D27; HANIFI ROHINGYA
5879             0x10D28,  // 10D28..10D29; UNKNOWN
5880             0x10D30,  // 10D30..10D39; HANIFI ROHINGYA
5881             0x10D3A,  // 10D3A..10E5F; UNKNOWN 
5882             0x10E60,  // 10E60..10E7E; ARABIC
5883             0x10E7F,  // 10E7F..10EFF; UNKNOWN
5884             0x10F00,  // 10F00..10F27; OLD SOGDIAN
5885             0x10F28,  // 10F28..10F2F; UNKNOWN
5886             0x10F30,  // 10F30..10F59; SOGDIAN
5887             0x10F5A,  // 10F5A..10FFF; UNKNOWN
5888             0x11000,  // 11000..1104D; BRAHMI
5889             0x1104E,  // 1104E..11051; UNKNOWN
5890             0x11052,  // 11052..1106F; BRAHMI
5891             0x11070,  // 11070..1107E; UNKNOWN
5892             0x1107F,  // 1107F       ; BRAHMI
5893             0x11080,  // 11080..110C1; KAITHI
5894             0x110C2,  // 110C2..110CC; UNKNOWN
5895             0x110CD,  // 110CD       ; KAITHI
5896             0x110CE,  // 110CE..110CF; UNKNOWN
5897             0x110D0,  // 110D0..110E8; SORA_SOMPENG
5898             0x110E9,  // 110E9..110EF; UNKNOWN
5899             0x110F0,  // 110F0..110F9; SORA_SOMPENG
5900             0x110FA,  // 110FA..110FF; UNKNOWN
5901             0x11100,  // 11100..11134; CHAKMA
5902             0x11135,  // 11135       ; UNKNOWN
5903             0x11136,  // 11136..11146; CHAKMA
5904             0x11147,  // 11147..1114F; UNKNOWN
5905             0x11150,  // 11150..11176; MAHAJANI
5906             0x11177,  // 11177..1117F; UNKNOWN
5907             0x11180,  // 11180..111CD; SHARADA
5908             0x111CE,  // 111CE..111CF; UNKNOWN
5909             0x111D0,  // 111D0..111DF; SHARADA
5910             0x111E0,  // 111E0       ; UNKNOWN
5911             0x111E1,  // 111E1..111F4; SINHALA
5912             0x111F5,  // 111F5..111FF; UNKNOWN
5913             0x11200,  // 11200..11211; KHOJKI
5914             0x11212,  // 11212       ; UNKNOWN
5915             0x11213,  // 11213..1123E; KHOJKI
5916             0x1123F,  // 1123F..1127F; UNKNOWN
5917             0x11280,  // 11280..11286; MULTANI
5918             0x11287,  // 11287       ; UNKNOWN
5919             0x11288,  // 11288       ; MULTANI
5920             0x11289,  // 11289       ; UNKNOWN
5921             0x1128A,  // 1128A..1128D; MULTANI
5922             0x1128E,  // 1128E       ; UNKNOWN
5923             0x1128F,  // 1128F..1129D; MULTANI
5924             0x1129E,  // 1129E       ; UNKNOWN
5925             0x1129F,  // 1129F..112A9; MULTANI
5926             0x112AA,  // 112AA..112AF; UNKNOWN
5927             0x112B0,  // 112B0..112EA; KHUDAWADI
5928             0x112EB,  // 112EB..112EF; UNKNOWN
5929             0x112F0,  // 112F0..112F9; KHUDAWADI
5930             0x112FA,  // 112FA..112FF; UNKNOWN
5931             0x11300,  // 11300..11303; GRANTHA
5932             0x11304,  // 11304       ; UNKNOWN
5933             0x11305,  // 11305..1130C; GRANTHA
5934             0x1130D,  // 1130D..1130E; UNKNOWN
5935             0x1130F,  // 1130F..11310; GRANTHA
5936             0x11311,  // 11311..11312; UNKNOWN
5937             0x11313,  // 11313..11328; GRANTHA
5938             0x11329,  // 11329       ; UNKNOWN
5939             0x1132A,  // 1132A..11330; GRANTHA
5940             0x11331,  // 11331       ; UNKNOWN
5941             0x11332,  // 11332..11333; GRANTHA
5942             0x11334,  // 11334       ; UNKNOWN
5943             0x11335,  // 11335..11339; GRANTHA
5944             0x1133A,  // 1133A       ; UNKNOWN
5945             0x1133B,  // 1133B       ; INHERITED
5946             0x1133C,  // 1133C..11344; GRANTHA
5947             0x11345,  // 11345..11346; UNKNOWN
5948             0x11347,  // 11347..11348; GRANTHA
5949             0x11349,  // 11349..1134A; UNKNOWN
5950             0x1134B,  // 1134B..1134D; GRANTHA
5951             0x1134E,  // 1134E..1134F; UNKNOWN
5952             0x11350,  // 11350       ; GRANTHA
5953             0x11351,  // 11351..11356; UNKNOWN
5954             0x11357,  // 11357       ; GRANTHA
5955             0x11358,  // 11358..1135C; UNKNOWN
5956             0x1135D,  // 1135D..11363; GRANTHA
5957             0x11364,  // 11364..11365; UNKNOWN
5958             0x11366,  // 11366..1136C; GRANTHA
5959             0x1136D,  // 1136D..1136F; UNKNOWN
5960             0x11370,  // 11370..11374; GRANTHA
5961             0x11375,  // 11375..113FF; UNKNOWN
5962             0x11400,  // 11400..11459; NEWA
5963             0x1145A,  // 1145A       ; UNKNOWN
5964             0x1145B,  // 1145B       ; NEWA
5965             0x1145C,  // 1145C       ; UNKNOWN
5966             0x1145D,  // 1145D..1145E; NEWA
5967             0x1145F,  // 1145F..1147F; UNKNOWN
5968             0x11480,  // 11480..114C7; TIRHUTA
5969             0x114C8,  // 114C8..114CF; UNKNOWN
5970             0x114D0,  // 114D0..114D9; TIRHUTA
5971             0x114DA,  // 114DA..1157F; UNKNOWN
5972             0x11580,  // 11580..115B5; SIDDHAM
5973             0x115B6,  // 115B6..115B7; UNKNOWN
5974             0x115B8,  // 115B8..115DD; SIDDHAM
5975             0x115DE,  // 115DE..115FF; UNKNOWN
5976             0x11600,  // 11600..11644; MODI
5977             0x11645,  // 11645..1164F; UNKNOWN
5978             0x11650,  // 11650..11659; MODI
5979             0x1165A,  // 1165A..1165F; UNKNOWN
5980             0x11660,  // 11660..1166C; MONGOLIAN
5981             0X1166D,  // 1166D..1167F; UNKNOWN
5982             0x11680,  // 11680..116B7; TAKRI
5983             0x116B8,  // 116B8..116BF; UNKNOWN
5984             0x116C0,  // 116C0..116C9; TAKRI
5985             0x116CA,  // 116CA..116FF; UNKNOWN
5986             0x11700,  // 11700..1171A; AHOM
5987             0x1171B,  // 1171B..1171C; UNKNOWN
5988             0x1171D,  // 1171D..1172B; AHOM
5989             0x1172C,  // 1172C..1172F; UNKNOWN
5990             0x11730,  // 11730..1173F; AHOM
5991             0x11740,  // 11740..117FF; UNKNOWN
5992             0x11800,  // 11800..1183B; DOGRA
5993             0x1183C,  // 1183C..1189F; UNKNOWN           
5994             0x118A0,  // 118A0..118F2; WARANG_CITI
5995             0x118F3,  // 118F3..118FE; UNKNOWN
5996             0x118FF,  // 118FF       ; WARANG_CITI
5997             0x11900,  // 11900..119FF; UNKNOWN
5998             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
5999             0X11A48,  // 11A48..11A4F; UNKNOWN
6000             0x11A50,  // 11A50..11A83; SOYOMBO
6001             0x11A84,  // 11A84..11A85; UNKNOWN
6002             0x11A86,  // 11A86..11AA2; SOYOMBO
6003             0x11AA3,  // 11AA3..11ABF; UNKNOWN
6004             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
6005             0x11AF9,  // 11AF9..11BFF; UNKNOWN
6006             0x11C00,  // 11C00..11C08; BHAIKSUKI
6007             0x11C09,  // 11C09       ; UNKNOWN
6008             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
6009             0x11C37,  // 11C37       ; UNKNOWN
6010             0x11C38,  // 11C38..11C45; BHAIKSUKI
6011             0x11C46,  // 11C46..11C49; UNKNOWN
6012             0x11C50,  // 11C50..11C6C; BHAIKSUKI
6013             0x11C6D,  // 11C6D..11C6F; UNKNOWN
6014             0x11C70,  // 11C70..11C8F; MARCHEN
6015             0x11C90,  // 11C90..11C91; UNKNOWN
6016             0x11C92,  // 11C92..11CA7; MARCHEN
6017             0x11CA8,  // 11CA8       ; UNKNOWN
6018             0x11CA9,  // 11CA9..11CB6; MARCHEN
6019             0x11CB7,  // 11CB7..11CFF; UNKNOWN
6020             0x11D00,  // 11D00..11D06; MASARAM_GONDI
6021             0x11D07,  // 11D07       ; UNKNOWN
6022             0x11D08,  // 11D08..11D09; MASARAM_GONDI
6023             0x11D0A,  // 11D0A       ; UNKNOWN
6024             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
6025             0x11D37,  // 11D37..11D39; UNKNOWN
6026             0x11D3A,  // 11D3A       ; MASARAM_GONDI
6027             0x11D3B,  // 11D3B       ; UNKNOWN
6028             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
6029             0x11D3E,  // 11D3E       ; UNKNOWN
6030             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
6031             0x11D48,  // 11D48..11D49, UNKNOWN
6032             0x11D50,  // 11D50..11D59; MASARAM_GONDI
6033             0x11D5A,  // 11D5A..11D5F; UNKNOWN
6034             0x11D60,  // 11D60..11D68; GUNJALA GONDI
6035             0x11D69,  //             ; UNKNOWN
6036             0x11D6A,  // 11D6A..11D8E; GUNJALA GONDI
6037             0x11D8F,  //             ; UNKNOWN
6038             0x11D90,  // 11D90..11D91; GUNJALA GONDI
6039             0x11D92,  //             ; UNKNOWN
6040             0x11D93,  // 11D93..11D98; GUNJALA GONDI
6041             0x11D99,  // 11D99       ; UNKNOWN
6042             0x11DA0,  // 11DA0..11DA9; GUNJALA GONDI
6043             0x11DAA,  // 11DAA..11DFF; UNKNOWN
6044             0x11EE0,  // 11EE0..11EF8; MAKASAR
6045             0x11EF9,  // 11EF9..11FFF; UNKNOWN
6046             0x12000,  // 12000..12399; CUNEIFORM
6047             0x1239A,  // 1239A..123FF; UNKNOWN
6048             0x12400,  // 12400..1246E; CUNEIFORM
6049             0x1246F,  // 1246F       ; UNKNOWN
6050             0x12470,  // 12470..12474; CUNEIFORM
6051             0x12475,  // 12475..1247F; UNKNOWN
6052             0x12480,  // 12480..12543; CUNEIFORM
6053             0x12544,  // 12544..12FFF; UNKNOWN
6054             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
6055             0x1342F,  // 1342F..143FF; UNKNOWN
6056             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
6057             0x14647,  // 14647..167FF; UNKNOWN
6058             0x16800,  // 16800..16A38; BAMUM
6059             0x16A39,  // 16A39..16A3F; UNKNOWN
6060             0x16A40,  // 16A40..16A5E; MRO
6061             0x16A5F,  // 16A5F       ; UNKNOWN
6062             0x16A60,  // 16A60..16A69; MRO
6063             0x16A6A,  // 16A6A..16A6D; UNKNOWN
6064             0x16A6E,  // 16A6E..16A6F; MRO
6065             0x16A70,  // 16A70..16ACF; UNKNOWN
6066             0x16AD0,  // 16AD0..16AED; BASSA_VAH
6067             0x16AEE,  // 16AEE..16AEF; UNKNOWN
6068             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
6069             0x16AF6,  // 16AF6..16AFF; UNKNOWN
6070             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
6071             0x16B46,  // 16B46..16B4F; UNKNOWN
6072             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
6073             0x16B5A,  // 16B5A       ; UNKNOWN
6074             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
6075             0x16B62,  // 16B62       ; UNKNOWN
6076             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
6077             0x16B78,  // 16B78..16B7C; UNKNOWN
6078             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
6079             0x16B90,  // 16B90..16E3F; UNKNOWN
6080             0x16E40,  // 16E40..16E9A; MEDEFAIDRIN
6081             0x16E9B,  // 16E9B..16EFF; UNKNOWN
6082             0x16F00,  // 16F00..16F44; MIAO
6083             0x16F45,  // 16F45..16F4F; UNKNOWN
6084             0x16F50,  // 16F50..16F7E; MIAO
6085             0x16F7F,  // 16F7F..16F8E; UNKNOWN
6086             0x16F8F,  // 16F8F..16F9F; MIAO
6087             0x16FA0,  // 16FA0..16FDF; UNKNOWN
6088             0x16FE0,  // 16FE0       ; TANGUT
6089             0x16FE1,  // 16FE1       ; NUSHU
6090             0x16FE2,  // 16FE2..16FFF; UNKNOWN
6091             0x17000,  // 17000..187F1; TANGUT
6092             0x187F2,  // 187F2..187FF; UNKNOWN
6093             0x18800,  // 18800..18AF2; TANGUT
6094             0x18AF3,  // 18AF3..1AFFF; UNKNOWN
6095             0x1B000,  // 1B000       ; KATAKANA
6096             0x1B001,  // 1B001..1B11E; HIRAGANA
6097             0x1B11F,  // 1B11F..1B16F; UNKNOWN
6098             0x1B170,  // 1B170..1B2FB; NUSHU
6099             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
6100             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
6101             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
6102             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
6103             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
6104             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
6105             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
6106             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
6107             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
6108             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
6109             0x1BCA0,  // 1BCA0..1BCA3; COMMON
6110             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
6111             0x1D000,  // 1D000..1D0F5; COMMON
6112             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
6113             0x1D100,  // 1D100..1D126; COMMON
6114             0x1D127,  // 1D127..1D128; UNKNOWN
6115             0x1D129,  // 1D129..1D166; COMMON
6116             0x1D167,  // 1D167..1D169; INHERITED
6117             0x1D16A,  // 1D16A..1D17A; COMMON
6118             0x1D17B,  // 1D17B..1D182; INHERITED
6119             0x1D183,  // 1D183..1D184; COMMON
6120             0x1D185,  // 1D185..1D18B; INHERITED
6121             0x1D18C,  // 1D18C..1D1A9; COMMON
6122             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
6123             0x1D1AE,  // 1D1AE..1D1E8; COMMON
6124             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
6125             0x1D200,  // 1D200..1D245; GREEK
6126             0x1D246,  // 1D246..1D2DF; UNKNOWN
6127             0x1D2E0,  // 1D2E0..1D2F3; COMMON
6128             0x1D2F4,  // 1D2F4..1D2FF; UNKNOWN
6129             0x1D300,  // 1D300..1D356; COMMON
6130             0x1D357,  // 1D357..1D35F; UNKNOWN
6131             0x1D360,  // 1D360..1D378; COMMON
6132             0x1D379,  // 1D379..1D3FF; UNKNOWN
6133             0x1D400,  // 1D400..1D454; COMMON
6134             0x1D455,  // 1D455       ; UNKNOWN
6135             0x1D456,  // 1D456..1D49C; COMMON
6136             0x1D49D,  // 1D49D       ; UNKNOWN
6137             0x1D49E,  // 1D49E..1D49F; COMMON
6138             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
6139             0x1D4A2,  // 1D4A2       ; COMMON
6140             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
6141             0x1D4A5,  // 1D4A5..1D4A6; COMMON
6142             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
6143             0x1D4A9,  // 1D4A9..1D4AC; COMMON
6144             0x1D4AD,  // 1D4AD       ; UNKNOWN
6145             0x1D4AE,  // 1D4AE..1D4B9; COMMON
6146             0x1D4BA,  // 1D4BA       ; UNKNOWN
6147             0x1D4BB,  // 1D4BB       ; COMMON
6148             0x1D4BC,  // 1D4BC       ; UNKNOWN
6149             0x1D4BD,  // 1D4BD..1D4C3; COMMON
6150             0x1D4C4,  // 1D4C4       ; UNKNOWN
6151             0x1D4C5,  // 1D4C5..1D505; COMMON
6152             0x1D506,  // 1D506       ; UNKNOWN
6153             0x1D507,  // 1D507..1D50A; COMMON
6154             0x1D50B,  // 1D50B..1D50C; UNKNOWN
6155             0x1D50D,  // 1D50D..1D514; COMMON
6156             0x1D515,  // 1D515       ; UNKNOWN
6157             0x1D516,  // 1D516..1D51C; COMMON
6158             0x1D51D,  // 1D51D       ; UNKNOWN
6159             0x1D51E,  // 1D51E..1D539; COMMON
6160             0x1D53A,  // 1D53A       ; UNKNOWN
6161             0x1D53B,  // 1D53B..1D53E; COMMON
6162             0x1D53F,  // 1D53F       ; UNKNOWN
6163             0x1D540,  // 1D540..1D544; COMMON
6164             0x1D545,  // 1D545       ; UNKNOWN
6165             0x1D546,  // 1D546       ; COMMON
6166             0x1D547,  // 1D547..1D549; UNKNOWN
6167             0x1D54A,  // 1D54A..1D550; COMMON
6168             0x1D551,  // 1D551       ; UNKNOWN
6169             0x1D552,  // 1D552..1D6A5; COMMON
6170             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
6171             0x1D6A8,  // 1D6A8..1D7CB; COMMON
6172             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
6173             0x1D7CE,  // 1D7CE..1D7FF; COMMON
6174             0x1D800,  // 1D800..1DA8B; SIGNWRITING
6175             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
6176             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
6177             0x1DAA0,  // 1DAA0       ; UNKNOWN
6178             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
6179             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
6180             0x1E000,  // 1E000..1E006; GLAGOLITIC
6181             0x1E007,  // 1E007       ; UNKNOWN
6182             0x1E008,  // 1E008..1E018; GLAGOLITIC
6183             0x1E019,  // 1E019..1E01A; UNKNOWN
6184             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
6185             0x1E022,  // 1E022       ; UNKNOWN
6186             0x1E023,  // 1E023..1E024; GLAGOLITIC
6187             0x1E025,  // 1E025       ; UNKNOWN
6188             0x1E026,  // 1E026..1E02A; GLAGOLITIC
6189             0x1E02B,  // 1E02B..1E7FF; UNKNOWN
6190             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6191             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6192             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6193             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6194             0x1E900,  // 1E900..1E94A; ADLAM
6195             0x1E94B,  // 1E94B..1E94F; UNKNOWN
6196             0x1E950,  // 1E950..1E959; ADLAM
6197             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6198             0x1E95E,  // 1E95E..1E95F; ADLAM
6199             0x1E960,  // 1E960..1EC70; UNKNOWN
6200             0x1EC71,  // 1EC71..1ECB4; COMMON
6201             0x1ECB5,  // 1ECB5..1EDFF; UNKNOWN            
6202             0x1EE00,  // 1EE00..1EE03; ARABIC
6203             0x1EE04,  // 1EE04       ; UNKNOWN
6204             0x1EE05,  // 1EE05..1EE1F; ARABIC
6205             0x1EE20,  // 1EE20       ; UNKNOWN
6206             0x1EE21,  // 1EE21..1EE22; ARABIC
6207             0x1EE23,  // 1EE23       ; UNKNOWN
6208             0x1EE24,  // 1EE24       ; ARABIC
6209             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6210             0x1EE27,  // 1EE27       ; ARABIC
6211             0x1EE28,  // 1EE28       ; UNKNOWN
6212             0x1EE29,  // 1EE29..1EE32; ARABIC
6213             0x1EE33,  // 1EE33       ; UNKNOWN
6214             0x1EE34,  // 1EE34..1EE37; ARABIC
6215             0x1EE38,  // 1EE38       ; UNKNOWN
6216             0x1EE39,  // 1EE39       ; ARABIC
6217             0x1EE3A,  // 1EE3A       ; UNKNOWN
6218             0x1EE3B,  // 1EE3B       ; ARABIC
6219             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6220             0x1EE42,  // 1EE42       ; ARABIC
6221             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6222             0x1EE47,  // 1EE47       ; ARABIC
6223             0x1EE48,  // 1EE48       ; UNKNOWN
6224             0x1EE49,  // 1EE49       ; ARABIC
6225             0x1EE4A,  // 1EE4A       ; UNKNOWN
6226             0x1EE4B,  // 1EE4B       ; ARABIC
6227             0x1EE4C,  // 1EE4C       ; UNKNOWN
6228             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6229             0x1EE50,  // 1EE50       ; UNKNOWN
6230             0x1EE51,  // 1EE51..1EE52; ARABIC
6231             0x1EE53,  // 1EE53       ; UNKNOWN
6232             0x1EE54,  // 1EE54       ; ARABIC
6233             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6234             0x1EE57,  // 1EE57       ; ARABIC
6235             0x1EE58,  // 1EE58       ; UNKNOWN
6236             0x1EE59,  // 1EE59       ; ARABIC
6237             0x1EE5A,  // 1EE5A       ; UNKNOWN
6238             0x1EE5B,  // 1EE5B       ; ARABIC
6239             0x1EE5C,  // 1EE5C       ; UNKNOWN
6240             0x1EE5D,  // 1EE5D       ; ARABIC
6241             0x1EE5E,  // 1EE5E       ; UNKNOWN
6242             0x1EE5F,  // 1EE5F       ; ARABIC
6243             0x1EE60,  // 1EE60       ; UNKNOWN
6244             0x1EE61,  // 1EE61..1EE62; ARABIC
6245             0x1EE63,  // 1EE63       ; UNKNOWN
6246             0x1EE64,  // 1EE64       ; ARABIC
6247             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6248             0x1EE67,  // 1EE67..1EE6A; ARABIC
6249             0x1EE6B,  // 1EE6B       ; UNKNOWN
6250             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6251             0x1EE73,  // 1EE73       ; UNKNOWN
6252             0x1EE74,  // 1EE74..1EE77; ARABIC
6253             0x1EE78,  // 1EE78       ; UNKNOWN
6254             0x1EE79,  // 1EE79..1EE7C; ARABIC
6255             0x1EE7D,  // 1EE7D       ; UNKNOWN
6256             0x1EE7E,  // 1EE7E       ; ARABIC
6257             0x1EE7F,  // 1EE7F       ; UNKNOWN
6258             0x1EE80,  // 1EE80..1EE89; ARABIC
6259             0x1EE8A,  // 1EE8A       ; UNKNOWN
6260             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6261             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6262             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6263             0x1EEA4,  // 1EEA4       ; UNKNOWN
6264             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6265             0x1EEAA,  // 1EEAA       ; UNKNOWN
6266             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6267             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6268             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6269             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6270             0x1F000,  // 1F000..1F02B; COMMON
6271             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6272             0x1F030,  // 1F030..1F093; COMMON
6273             0x1F094,  // 1F094..1F09F; UNKNOWN
6274             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6275             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6276             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6277             0x1F0C0,  // 1F0C0       ; UNKNOWN
6278             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6279             0x1F0D0,  // 1F0D0       ; UNKNOWN
6280             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6281             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6282             0x1F100,  // 1F100..1F10C; COMMON
6283             0x1F10D,  // 1F10D..1F10F; UNKNOWN
6284             0x1F110,  // 1F110..1F16B; COMMON
6285             0x1F16C,  // 1F16C..1F16F; UNKNOWN
6286             0x1F170,  // 1F170..1F1AC; COMMON
6287             0x1F1AD,  // 1F1AD..1F1E5; UNKNOWN
6288             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6289             0x1F200,  // 1F200       ; HIRAGANA
6290             0x1F201,  // 1F201..1F202; COMMON
6291             0x1F203,  // 1F203..1F20F; UNKNOWN
6292             0x1F210,  // 1F210..1F23B; COMMON
6293             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6294             0x1F240,  // 1F240..1F248; COMMON
6295             0x1F249,  // 1F249..1F24F; UNKNOWN
6296             0x1F250,  // 1F250..1F251; COMMON
6297             0x1F252,  // 1F252..1F25F; UNKNOWN
6298             0x1F260,  // 1F260..1F265; COMMON
6299             0x1F266,  // 1F266..1F2FF; UNKNOWN
6300             0x1F300,  // 1F300..1F6D4; COMMON
6301             0x1F6D5,  // 1F6D5..1F6DF; UNKNOWN
6302             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6303             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6304             0x1F6F0,  // 1F6F0..1F6F9; COMMON
6305             0x1F6FA,  // 1F6FA..1F6FF; UNKNOWN
6306             0x1F700,  // 1F700..1F773; COMMON
6307             0x1F774,  // 1F774..1F77F; UNKNOWN
6308             0x1F780,  // 1F780..1F7D8; COMMON
6309             0x1F7D9,  // 1F7D9..1F7FF; UNKNOWN
6310             0x1F800,  // 1F800..1F80B; COMMON
6311             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6312             0x1F810,  // 1F810..1F847; COMMON
6313             0x1F848,  // 1F848..1F84F; UNKNOWN
6314             0x1F850,  // 1F850..1F859; COMMON
6315             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6316             0x1F860,  // 1F860..1F887; COMMON
6317             0x1F888,  // 1F888..1F88F; UNKNOWN
6318             0x1F890,  // 1F890..1F8AD; COMMON
6319             0x1F8AE,  // 1F8AE..1F8FF; UNKNOWN
6320             0x1F900,  // 1F900..1F90B; COMMON
6321             0x1F90C,  // 1F90C..1F90F; UNKNOWN
6322             0x1F910,  // 1F910..1F93E; COMMON
6323             0x1F93F,  // 1F93F       ; UNKNOWN
6324             0x1F940,  // 1F940..1F970; COMMON
6325             0x1F971,  // 1F971..1F972; UNKNOWN
6326             0x1F973,  // 1F973..1F976; COMMON
6327             0x1F977,  // 1F977..1F979; UNKNOWN
6328             0x1F97A,  // 1F97A       ; COMMON
6329             0x1F97B,  // 1F97B       ; UNKNOWN
6330             0x1F97C,  // 1F97C..1F9A2; COMMON
6331             0x1F9A3,  // 1F9A3..1F9AF; UNKNOWN
6332             0x1F9B0,  // 1F9B0..1F9B9; COMMON
6333             0x1F9BA,  // 1F9BA..1F9BF; UNKNOWN
6334             0x1F9C0,  // 1F9C0..1F9C2; COMMON
6335             0x1F9C3,  // 1F9C3..1F9CF; UNKNOWN
6336             0x1F9D0,  // 1F9D0..1F9FF; COMMON
6337             0x1FA00,  // 1FA00..1FA5F; UNKNOWN
6338             0x1FA60,  // 1FA60..1FA6D; COMMON
6339             0x1FA6E,  // 1FA6E..1FFFF; UNKNOWN
6340             0x20000,  // 20000..2A6D6; HAN
6341             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
6342             0x2A700,  // 2A700..2B734; HAN
6343             0x2B735,  // 2B735..2B73F; UNKNOWN
6344             0x2B740,  // 2B740..2B81D; HAN
6345             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6346             0x2B820,  // 2B820..2CEA1; HAN
6347             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6348             0x2CEB0,  // 2CEB0..2EBE0; HAN
6349             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6350             0x2F800,  // 2F800..2FA1D; HAN
6351             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
6352             0xE0001,  // E0001       ; COMMON
6353             0xE0002,  // E0002..E001F; UNKNOWN
6354             0xE0020,  // E0020..E007F; COMMON
6355             0xE0080,  // E0080..E00FF; UNKNOWN
6356             0xE0100,  // E0100..E01EF; INHERITED
6357             0xE01F0   // E01F0..10FFFF; UNKNOWN
6358         };
6359 
6360         private static final UnicodeScript[] scripts = {
6361             COMMON,                   // 0000..0040
6362             LATIN,                    // 0041..005A
6363             COMMON,                   // 005B..0060
6364             LATIN,                    // 0061..007A
6365             COMMON,                   // 007B..00A9
6366             LATIN,                    // 00AA
6367             COMMON,                   // 00AB..00B9
6368             LATIN,                    // 00BA
6369             COMMON,                   // 00BB..00BF
6370             LATIN,                    // 00C0..00D6
6371             COMMON,                   // 00D7
6372             LATIN,                    // 00D8..00F6
6373             COMMON,                   // 00F7
6374             LATIN,                    // 00F8..02B8
6375             COMMON,                   // 02B9..02DF
6376             LATIN,                    // 02E0..02E4
6377             COMMON,                   // 02E5..02E9
6378             BOPOMOFO,                 // 02EA..02EB
6379             COMMON,                   // 02EC..02FF
6380             INHERITED,                // 0300..036F
6381             GREEK,                    // 0370..0373
6382             COMMON,                   // 0374
6383             GREEK,                    // 0375..0377
6384             UNKNOWN,                  // 0378..0379
6385             GREEK,                    // 037A..037D
6386             COMMON,                   // 037E
6387             GREEK,                    // 037F
6388             UNKNOWN,                  // 0380..0383
6389             GREEK,                    // 0384
6390             COMMON,                   // 0385
6391             GREEK,                    // 0386
6392             COMMON,                   // 0387
6393             GREEK,                    // 0388..038A
6394             UNKNOWN,                  // 038B
6395             GREEK,                    // 038C
6396             UNKNOWN,                  // 038D
6397             GREEK,                    // 038E..03A1
6398             UNKNOWN,                  // 03A2
6399             GREEK,                    // 03A3..03E1
6400             COPTIC,                   // 03E2..03EF
6401             GREEK,                    // 03F0..03FF
6402             CYRILLIC,                 // 0400..0484
6403             INHERITED,                // 0485..0486
6404             CYRILLIC,                 // 0487..052F
6405             UNKNOWN,                  // 0530
6406             ARMENIAN,                 // 0531..0556
6407             UNKNOWN,                  // 0557..0558
6408             ARMENIAN,                 // 0559..0588
6409             COMMON,                   // 0589
6410             ARMENIAN,                 // 058A
6411             UNKNOWN,                  // 058B..058C
6412             ARMENIAN,                 // 058D..058F
6413             UNKNOWN,                  // 0590
6414             HEBREW,                   // 0591..05C7
6415             UNKNOWN,                  // 05C8..05CF
6416             HEBREW,                   // 05D0..05EA
6417             UNKNOWN,                  // 05EB..05EE
6418             HEBREW,                   // 05EF..05F4
6419             UNKNOWN,                  // 05F5..05FF
6420             ARABIC,                   // 0600..0604
6421             COMMON,                   // 0605
6422             ARABIC,                   // 0606..060B
6423             COMMON,                   // 060C
6424             ARABIC,                   // 060D..061A
6425             COMMON,                   // 061B
6426             ARABIC,                   // 061C
6427             UNKNOWN,                  // 061D
6428             ARABIC,                   // 061E
6429             COMMON,                   // 061F
6430             ARABIC,                   // 0620..063F
6431             COMMON,                   // 0640
6432             ARABIC,                   // 0641..064A
6433             INHERITED,                // 064B..0655
6434             ARABIC,                   // 0656..066F
6435             INHERITED,                // 0670
6436             ARABIC,                   // 0671..06DC
6437             COMMON,                   // 06DD
6438             ARABIC,                   // 06DE..06FF
6439             SYRIAC,                   // 0700..070D
6440             UNKNOWN,                  // 070E
6441             SYRIAC,                   // 070F..074A
6442             UNKNOWN,                  // 074B..074C
6443             SYRIAC,                   // 074D..074F
6444             ARABIC,                   // 0750..077F
6445             THAANA,                   // 0780..07B1
6446             UNKNOWN,                  // 07B2..07BF
6447             NKO,                      // 07C0..07FA
6448             UNKNOWN,                  // 07FB..07FC
6449             NKO,                      // 07FD..07FF
6450             SAMARITAN,                // 0800..082D
6451             UNKNOWN,                  // 082E..082F
6452             SAMARITAN,                // 0830..083E
6453             UNKNOWN,                  // 083F
6454             MANDAIC,                  // 0840..085B
6455             UNKNOWN,                  // 085C..085D
6456             MANDAIC,                  // 085E
6457             UNKNOWN,                  // 085F
6458             SYRIAC,                   // 0860..086A
6459             UNKNOWN,                  // 086B..089F
6460             ARABIC,                   // 08A0..08B4
6461             UNKNOWN,                  // 08B5
6462             ARABIC,                   // 08B6..08BD
6463             UNKNOWN,                  // 08BE..08D2
6464             ARABIC,                   // 08D3..08E1
6465             COMMON,                   // 08E2
6466             ARABIC,                   // 08E3..08FF
6467             DEVANAGARI,               // 0900..0950
6468             INHERITED,                // 0951..0952
6469             DEVANAGARI,               // 0953..0963
6470             COMMON,                   // 0964..0965
6471             DEVANAGARI,               // 0966..097F
6472             BENGALI,                  // 0980..0983
6473             UNKNOWN,                  // 0984
6474             BENGALI,                  // 0985..098C
6475             UNKNOWN,                  // 098D..098E
6476             BENGALI,                  // 098F..0990
6477             UNKNOWN,                  // 0991..0992
6478             BENGALI,                  // 0993..09A8
6479             UNKNOWN,                  // 09A9
6480             BENGALI,                  // 09AA..09B0
6481             UNKNOWN,                  // 09B1
6482             BENGALI,                  // 09B2
6483             UNKNOWN,                  // 09B3..09B5
6484             BENGALI,                  // 09B6..09B9
6485             UNKNOWN,                  // 09BA..09BB
6486             BENGALI,                  // 09BC..09C4
6487             UNKNOWN,                  // 09C5..09C6
6488             BENGALI,                  // 09C7..09C8
6489             UNKNOWN,                  // 09C9..09CA
6490             BENGALI,                  // 09CB..09CE
6491             UNKNOWN,                  // 09CF..09D6
6492             BENGALI,                  // 09D7
6493             UNKNOWN,                  // 09D8..09DB
6494             BENGALI,                  // 09DC..09DD
6495             UNKNOWN,                  // 09DE
6496             BENGALI,                  // 09DF..09E3
6497             UNKNOWN,                  // 09E4..09E5
6498             BENGALI,                  // 09E6..09FE
6499             UNKNOWN,                  // 09FF..0A00
6500             GURMUKHI,                 // 0A01..0A03
6501             UNKNOWN,                  // 0A04
6502             GURMUKHI,                 // 0A05..0A0A
6503             UNKNOWN,                  // 0A0B..0A0E
6504             GURMUKHI,                 // 0A0F..0A10
6505             UNKNOWN,                  // 0A11..0A12
6506             GURMUKHI,                 // 0A13..0A28
6507             UNKNOWN,                  // 0A29
6508             GURMUKHI,                 // 0A2A..0A30
6509             UNKNOWN,                  // 0A31
6510             GURMUKHI,                 // 0A32..0A33
6511             UNKNOWN,                  // 0A34
6512             GURMUKHI,                 // 0A35..0A36
6513             UNKNOWN,                  // 0A37
6514             GURMUKHI,                 // 0A38..0A39
6515             UNKNOWN,                  // 0A3A..0A3B
6516             GURMUKHI,                 // 0A3C
6517             UNKNOWN,                  // 0A3D
6518             GURMUKHI,                 // 0A3E..0A42
6519             UNKNOWN,                  // 0A43..0A46
6520             GURMUKHI,                 // 0A47..0A48
6521             UNKNOWN,                  // 0A49..0A4A
6522             GURMUKHI,                 // 0A4B..0A4D
6523             UNKNOWN,                  // 0A4E..0A50
6524             GURMUKHI,                 // 0A51
6525             UNKNOWN,                  // 0A52..0A58
6526             GURMUKHI,                 // 0A59..0A5C
6527             UNKNOWN,                  // 0A5D
6528             GURMUKHI,                 // 0A5E
6529             UNKNOWN,                  // 0A5F..0A65
6530             GURMUKHI,                 // 0A66..0A76
6531             UNKNOWN,                  // 0A77..0A80
6532             GUJARATI,                 // 0A81..0A83
6533             UNKNOWN,                  // 0A84
6534             GUJARATI,                 // 0A85..0A8D
6535             UNKNOWN,                  // 0A8E
6536             GUJARATI,                 // 0A8F..0A91
6537             UNKNOWN,                  // 0A92
6538             GUJARATI,                 // 0A93..0AA8
6539             UNKNOWN,                  // 0AA9
6540             GUJARATI,                 // 0AAA..0AB0
6541             UNKNOWN,                  // 0AB1
6542             GUJARATI,                 // 0AB2..0AB3
6543             UNKNOWN,                  // 0AB4
6544             GUJARATI,                 // 0AB5..0AB9
6545             UNKNOWN,                  // 0ABA..0ABB
6546             GUJARATI,                 // 0ABC..0AC5
6547             UNKNOWN,                  // 0AC6
6548             GUJARATI,                 // 0AC7..0AC9
6549             UNKNOWN,                  // 0ACA
6550             GUJARATI,                 // 0ACB..0ACD
6551             UNKNOWN,                  // 0ACE..0ACF
6552             GUJARATI,                 // 0AD0
6553             UNKNOWN,                  // 0AD1..0ADF
6554             GUJARATI,                 // 0AE0..0AE3
6555             UNKNOWN,                  // 0AE4..0AE5
6556             GUJARATI,                 // 0AE6..0AF1
6557             UNKNOWN,                  // 0AF2..0AF8
6558             GUJARATI,                 // 0AF9..0AFF
6559             UNKNOWN,                  // 0B00
6560             ORIYA,                    // 0B01..0B03
6561             UNKNOWN,                  // 0B04
6562             ORIYA,                    // 0B05..0B0C
6563             UNKNOWN,                  // 0B0D..0B0E
6564             ORIYA,                    // 0B0F..0B10
6565             UNKNOWN,                  // 0B11..0B12
6566             ORIYA,                    // 0B13..0B28
6567             UNKNOWN,                  // 0B29
6568             ORIYA,                    // 0B2A..0B30
6569             UNKNOWN,                  // 0B31
6570             ORIYA,                    // 0B32..0B33
6571             UNKNOWN,                  // 0B34
6572             ORIYA,                    // 0B35..0B39
6573             UNKNOWN,                  // 0B3A..0B3B
6574             ORIYA,                    // 0B3C..0B44
6575             UNKNOWN,                  // 0B45..0B46
6576             ORIYA,                    // 0B47..0B48
6577             UNKNOWN,                  // 0B49..0B4A
6578             ORIYA,                    // 0B4B..0B4D
6579             UNKNOWN,                  // 0B4E..0B55
6580             ORIYA,                    // 0B56..0B57
6581             UNKNOWN,                  // 0B58..0B5B
6582             ORIYA,                    // 0B5C..0B5D
6583             UNKNOWN,                  // 0B5E
6584             ORIYA,                    // 0B5F..0B63
6585             UNKNOWN,                  // 0B64..0B65
6586             ORIYA,                    // 0B66..0B77
6587             UNKNOWN,                  // 0B78..0B81
6588             TAMIL,                    // 0B82..0B83
6589             UNKNOWN,                  // 0B84
6590             TAMIL,                    // 0B85..0B8A
6591             UNKNOWN,                  // 0B8B..0B8D
6592             TAMIL,                    // 0B8E..0B90
6593             UNKNOWN,                  // 0B91
6594             TAMIL,                    // 0B92..0B95
6595             UNKNOWN,                  // 0B96..0B98
6596             TAMIL,                    // 0B99..0B9A
6597             UNKNOWN,                  // 0B9B
6598             TAMIL,                    // 0B9C
6599             UNKNOWN,                  // 0B9D
6600             TAMIL,                    // 0B9E..0B9F
6601             UNKNOWN,                  // 0BA0..0BA2
6602             TAMIL,                    // 0BA3..0BA4
6603             UNKNOWN,                  // 0BA5..0BA7
6604             TAMIL,                    // 0BA8..0BAA
6605             UNKNOWN,                  // 0BAB..0BAD
6606             TAMIL,                    // 0BAE..0BB9
6607             UNKNOWN,                  // 0BBA..0BBD
6608             TAMIL,                    // 0BBE..0BC2
6609             UNKNOWN,                  // 0BC3..0BC5
6610             TAMIL,                    // 0BC6..0BC8
6611             UNKNOWN,                  // 0BC9
6612             TAMIL,                    // 0BCA..0BCD
6613             UNKNOWN,                  // 0BCE..0BCF
6614             TAMIL,                    // 0BD0
6615             UNKNOWN,                  // 0BD1..0BD6
6616             TAMIL,                    // 0BD7
6617             UNKNOWN,                  // 0BD8..0BE5
6618             TAMIL,                    // 0BE6..0BFA
6619             UNKNOWN,                  // 0BFB..0BFF
6620             TELUGU,                   // 0C00..0C0C
6621             UNKNOWN,                  // 0C0D
6622             TELUGU,                   // 0C0E..0C10
6623             UNKNOWN,                  // 0C11
6624             TELUGU,                   // 0C12..0C28
6625             UNKNOWN,                  // 0C29
6626             TELUGU,                   // 0C2A..0C39
6627             UNKNOWN,                  // 0C3A..0C3C
6628             TELUGU,                   // 0C3D..0C44
6629             UNKNOWN,                  // 0C45
6630             TELUGU,                   // 0C46..0C48
6631             UNKNOWN,                  // 0C49
6632             TELUGU,                   // 0C4A..0C4D
6633             UNKNOWN,                  // 0C4E..0C54
6634             TELUGU,                   // 0C55..0C56
6635             UNKNOWN,                  // 0C57
6636             TELUGU,                   // 0C58..0C5A
6637             UNKNOWN,                  // 0C5B..0C5F
6638             TELUGU,                   // 0C60..0C63
6639             UNKNOWN,                  // 0C64..0C65
6640             TELUGU,                   // 0C66..0C6F
6641             UNKNOWN,                  // 0C70..0C77
6642             TELUGU,                   // 0C78..0C7F
6643             KANNADA,                  // 0C80..0C8C
6644             UNKNOWN,                  // 0C8D
6645             KANNADA,                  // 0C8E..0C90
6646             UNKNOWN,                  // 0C91
6647             KANNADA,                  // 0C92..0CA8
6648             UNKNOWN,                  // 0CA9
6649             KANNADA,                  // 0CAA..0CB3
6650             UNKNOWN,                  // 0CB4
6651             KANNADA,                  // 0CB5..0CB9
6652             UNKNOWN,                  // 0CBA..0CBB
6653             KANNADA,                  // 0CBC..0CC4
6654             UNKNOWN,                  // 0CC5
6655             KANNADA,                  // 0CC6..0CC8
6656             UNKNOWN,                  // 0CC9
6657             KANNADA,                  // 0CCA..0CCD
6658             UNKNOWN,                  // 0CCE..0CD4
6659             KANNADA,                  // 0CD5..0CD6
6660             UNKNOWN,                  // 0CD7..0CDD
6661             KANNADA,                  // 0CDE
6662             UNKNOWN,                  // 0CDF
6663             KANNADA,                  // 0CE0..0CE3
6664             UNKNOWN,                  // 0CE4..0CE5
6665             KANNADA,                  // 0CE6..0CEF
6666             UNKNOWN,                  // 0CF0
6667             KANNADA,                  // 0CF1..0CF2
6668             UNKNOWN,                  // 0CF3..0CFF
6669             MALAYALAM,                // 0D00..0D03
6670             UNKNOWN,                  // 0D04
6671             MALAYALAM,                // 0D05..0D0C
6672             UNKNOWN,                  // 0D0D
6673             MALAYALAM,                // 0D0E..0D10
6674             UNKNOWN,                  // 0D11
6675             MALAYALAM,                // 0D12..0D44
6676             UNKNOWN,                  // 0D45
6677             MALAYALAM,                // 0D46..0D48
6678             UNKNOWN,                  // 0D49
6679             MALAYALAM,                // 0D4A..0D4F
6680             UNKNOWN,                  // 0D50..0D53
6681             MALAYALAM,                // 0D54..0D63
6682             UNKNOWN,                  // 0D64..0D65
6683             MALAYALAM,                // 0D66..0D7F
6684             UNKNOWN,                  // 0D80..0D81
6685             SINHALA,                  // 0D82..0D83
6686             UNKNOWN,                  // 0D84
6687             SINHALA,                  // 0D85..0D96
6688             UNKNOWN,                  // 0D97..0D99
6689             SINHALA,                  // 0D9A..0DB1
6690             UNKNOWN,                  // 0DB2
6691             SINHALA,                  // 0DB3..0DBB
6692             UNKNOWN,                  // 0DBC
6693             SINHALA,                  // 0DBD
6694             UNKNOWN,                  // 0DBE..0DBF
6695             SINHALA,                  // 0DC0..0DC6
6696             UNKNOWN,                  // 0DC7..0DC9
6697             SINHALA,                  // 0DCA
6698             UNKNOWN,                  // 0DCB..0DCE
6699             SINHALA,                  // 0DCF..0DD4
6700             UNKNOWN,                  // 0DD5
6701             SINHALA,                  // 0DD6
6702             UNKNOWN,                  // 0DD7
6703             SINHALA,                  // 0DD8..0DDF
6704             UNKNOWN,                  // 0DE0..0DE5
6705             SINHALA,                  // 0DE6..0DEF
6706             UNKNOWN,                  // 0DF0..0DF1
6707             SINHALA,                  // 0DF2..0DF4
6708             UNKNOWN,                  // 0DF5..0E00
6709             THAI,                     // 0E01..0E3A
6710             UNKNOWN,                  // 0E3B..0E3E
6711             COMMON,                   // 0E3F
6712             THAI,                     // 0E40..0E5B
6713             UNKNOWN,                  // 0E5C..0E80
6714             LAO,                      // 0E81..0E82
6715             UNKNOWN,                  // 0E83
6716             LAO,                      // 0E84
6717             UNKNOWN,                  // 0E85..0E86
6718             LAO,                      // 0E87..0E88
6719             UNKNOWN,                  // 0E89
6720             LAO,                      // 0E8A
6721             UNKNOWN,                  // 0E8B..0E8C
6722             LAO,                      // 0E8D
6723             UNKNOWN,                  // 0E8E..0E93
6724             LAO,                      // 0E94..0E97
6725             UNKNOWN,                  // 0E98
6726             LAO,                      // 0E99..0E9F
6727             UNKNOWN,                  // 0EA0
6728             LAO,                      // 0EA1..0EA3
6729             UNKNOWN,                  // 0EA4
6730             LAO,                      // 0EA5
6731             UNKNOWN,                  // 0EA6
6732             LAO,                      // 0EA7
6733             UNKNOWN,                  // 0EA8..0EA9
6734             LAO,                      // 0EAA..0EAB
6735             UNKNOWN,                  // 0EAC
6736             LAO,                      // 0EAD..0EB9
6737             UNKNOWN,                  // 0EBA
6738             LAO,                      // 0EBB..0EBD
6739             UNKNOWN,                  // 0EBE..0EBF
6740             LAO,                      // 0EC0..0EC4
6741             UNKNOWN,                  // 0EC5
6742             LAO,                      // 0EC6
6743             UNKNOWN,                  // 0EC7
6744             LAO,                      // 0EC8..0ECD
6745             UNKNOWN,                  // 0ECE..0ECF
6746             LAO,                      // 0ED0..0ED9
6747             UNKNOWN,                  // 0EDA..0EDB
6748             LAO,                      // 0EDC..0EDF
6749             UNKNOWN,                  // 0EE0..0EFF
6750             TIBETAN,                  // 0F00..0F47
6751             UNKNOWN,                  // 0F48
6752             TIBETAN,                  // 0F49..0F6C
6753             UNKNOWN,                  // 0F6D..0F70
6754             TIBETAN,                  // 0F71..0F97
6755             UNKNOWN,                  // 0F98
6756             TIBETAN,                  // 0F99..0FBC
6757             UNKNOWN,                  // 0FBD
6758             TIBETAN,                  // 0FBE..0FCC
6759             UNKNOWN,                  // 0FCD
6760             TIBETAN,                  // 0FCE..0FD4
6761             COMMON,                   // 0FD5..0FD8
6762             TIBETAN,                  // 0FD9..0FDA
6763             UNKNOWN,                  // 0FDB..FFF
6764             MYANMAR,                  // 1000..109F
6765             GEORGIAN,                 // 10A0..10C5
6766             UNKNOWN,                  // 10C6
6767             GEORGIAN,                 // 10C7
6768             UNKNOWN,                  // 10C8..10CC
6769             GEORGIAN,                 // 10CD
6770             UNKNOWN,                  // 10CE..10CF
6771             GEORGIAN,                 // 10D0..10FA
6772             COMMON,                   // 10FB
6773             GEORGIAN,                 // 10FC..10FF
6774             HANGUL,                   // 1100..11FF
6775             ETHIOPIC,                 // 1200..1248
6776             UNKNOWN,                  // 1249
6777             ETHIOPIC,                 // 124A..124D
6778             UNKNOWN,                  // 124E..124F
6779             ETHIOPIC,                 // 1250..1256
6780             UNKNOWN,                  // 1257
6781             ETHIOPIC,                 // 1258
6782             UNKNOWN,                  // 1259
6783             ETHIOPIC,                 // 125A..125D
6784             UNKNOWN,                  // 125E..125F
6785             ETHIOPIC,                 // 1260..1288
6786             UNKNOWN,                  // 1289
6787             ETHIOPIC,                 // 128A..128D
6788             UNKNOWN,                  // 128E..128F
6789             ETHIOPIC,                 // 1290..12B0
6790             UNKNOWN,                  // 12B1
6791             ETHIOPIC,                 // 12B2..12B5
6792             UNKNOWN,                  // 12B6..12B7
6793             ETHIOPIC,                 // 12B8..12BE
6794             UNKNOWN,                  // 12BF
6795             ETHIOPIC,                 // 12C0
6796             UNKNOWN,                  // 12C1
6797             ETHIOPIC,                 // 12C2..12C5
6798             UNKNOWN,                  // 12C6..12C7
6799             ETHIOPIC,                 // 12C8..12D6
6800             UNKNOWN,                  // 12D7
6801             ETHIOPIC,                 // 12D8..1310
6802             UNKNOWN,                  // 1311
6803             ETHIOPIC,                 // 1312..1315
6804             UNKNOWN,                  // 1316..1317
6805             ETHIOPIC,                 // 1318..135A
6806             UNKNOWN,                  // 135B..135C
6807             ETHIOPIC,                 // 135D..137C
6808             UNKNOWN,                  // 137D..137F
6809             ETHIOPIC,                 // 1380..1399
6810             UNKNOWN,                  // 139A..139F
6811             CHEROKEE,                 // 13A0..13F5
6812             UNKNOWN,                  // 13F6..13F7
6813             CHEROKEE,                 // 13F8..13FD
6814             UNKNOWN,                  // 13FE..13FF
6815             CANADIAN_ABORIGINAL,      // 1400..167F
6816             OGHAM,                    // 1680..169C
6817             UNKNOWN,                  // 169D..169F
6818             RUNIC,                    // 16A0..16EA
6819             COMMON,                   // 16EB..16ED
6820             RUNIC,                    // 16EE..16F8
6821             UNKNOWN,                  // 16F9..16FF
6822             TAGALOG,                  // 1700..170C
6823             UNKNOWN,                  // 170D
6824             TAGALOG,                  // 170E..1714
6825             UNKNOWN,                  // 1715..171F
6826             HANUNOO,                  // 1720..1734
6827             COMMON,                   // 1735..1736
6828             UNKNOWN,                  // 1737..173F
6829             BUHID,                    // 1740..1753
6830             UNKNOWN,                  // 1754..175F
6831             TAGBANWA,                 // 1760..176C
6832             UNKNOWN,                  // 176D
6833             TAGBANWA,                 // 176E..1770
6834             UNKNOWN,                  // 1771
6835             TAGBANWA,                 // 1772..1773
6836             UNKNOWN,                  // 1774..177F
6837             KHMER,                    // 1780..17DD
6838             UNKNOWN,                  // 17DE..17DF
6839             KHMER,                    // 17E0..17E9
6840             UNKNOWN,                  // 17EA..17EF
6841             KHMER,                    // 17F0..17F9
6842             UNKNOWN,                  // 17FA..17FF
6843             MONGOLIAN,                // 1800..1801
6844             COMMON,                   // 1802..1803
6845             MONGOLIAN,                // 1804
6846             COMMON,                   // 1805
6847             MONGOLIAN,                // 1806..180E
6848             UNKNOWN,                  // 180F
6849             MONGOLIAN,                // 1810..1819
6850             UNKNOWN,                  // 181A..181F
6851             MONGOLIAN,                // 1820..1878
6852             UNKNOWN,                  // 1879..187F
6853             MONGOLIAN,                // 1880..18AA
6854             UNKNOWN,                  // 18AB..18AF
6855             CANADIAN_ABORIGINAL,      // 18B0..18F5
6856             UNKNOWN,                  // 18F6..18FF
6857             LIMBU,                    // 1900..191E
6858             UNKNOWN,                  // 191F
6859             LIMBU,                    // 1920..192B
6860             UNKNOWN,                  // 192C..192F
6861             LIMBU,                    // 1930..193B
6862             UNKNOWN,                  // 193C..193F
6863             LIMBU,                    // 1940
6864             UNKNOWN,                  // 1941..1943
6865             LIMBU,                    // 1944..194F
6866             TAI_LE,                   // 1950..196D
6867             UNKNOWN,                  // 196E..196F
6868             TAI_LE,                   // 1970..1974
6869             UNKNOWN,                  // 1975..197F
6870             NEW_TAI_LUE,              // 1980..19AB
6871             UNKNOWN,                  // 19AC..19AF
6872             NEW_TAI_LUE,              // 19B0..19C9
6873             UNKNOWN,                  // 19CA..19CF
6874             NEW_TAI_LUE,              // 19D0..19DA
6875             UNKNOWN,                  // 19DB..19DD
6876             NEW_TAI_LUE,              // 19DE..19DF
6877             KHMER,                    // 19E0..19FF
6878             BUGINESE,                 // 1A00..1A1B
6879             UNKNOWN,                  // 1A1C..1A1D
6880             BUGINESE,                 // 1A1E..1A1F
6881             TAI_THAM,                 // 1A20..1A5E
6882             UNKNOWN,                  // 1A5F
6883             TAI_THAM,                 // 1A60..1A7C
6884             UNKNOWN,                  // 1A7D..1A7E
6885             TAI_THAM,                 // 1A7F..1A89
6886             UNKNOWN,                  // 1A8A..1A8F
6887             TAI_THAM,                 // 1A90..1A99
6888             UNKNOWN,                  // 1A9A..1A9F
6889             TAI_THAM,                 // 1AA0..1AAD
6890             UNKNOWN,                  // 1AAE..1AAF
6891             INHERITED,                // 1AB0..1ABE
6892             UNKNOWN,                  // 1ABF..1AFF
6893             BALINESE,                 // 1B00..1B4B
6894             UNKNOWN,                  // 1B4C..1B4F
6895             BALINESE,                 // 1B50..1B7C
6896             UNKNOWN,                  // 1B7D..1B7F
6897             SUNDANESE,                // 1B80..1BBF
6898             BATAK,                    // 1BC0..1BF3
6899             UNKNOWN,                  // 1BF4..1BFB
6900             BATAK,                    // 1BFC..1BFF
6901             LEPCHA,                   // 1C00..1C37
6902             UNKNOWN,                  // 1C38..1C3A
6903             LEPCHA,                   // 1C3B..1C49
6904             UNKNOWN,                  // 1C4A..1C4C
6905             LEPCHA,                   // 1C4D..1C4F
6906             OL_CHIKI,                 // 1C50..1C7F
6907             CYRILLIC,                 // 1C80..1C88
6908             UNKNOWN,                  // 1C89      
6909             GEORGIAN,                 // 1C90..1CBA 
6910             UNKNOWN,                  // 1CBB..1CBC 
6911             GEORGIAN,                 // 1CBD..1CBF
6912             SUNDANESE,                // 1CC0..1CC7
6913             UNKNOWN,                  // 1CC8..1CCF
6914             INHERITED,                // 1CD0..1CD2
6915             COMMON,                   // 1CD3
6916             INHERITED,                // 1CD4..1CE0
6917             COMMON,                   // 1CE1
6918             INHERITED,                // 1CE2..1CE8
6919             COMMON,                   // 1CE9..1CEC
6920             INHERITED,                // 1CED
6921             COMMON,                   // 1CEE..1CF3
6922             INHERITED,                // 1CF4
6923             COMMON,                   // 1CF5..1CF7
6924             INHERITED,                // 1CF8..1CF9
6925             UNKNOWN,                  // 1CFA..1CFF
6926             LATIN,                    // 1D00..1D25
6927             GREEK,                    // 1D26..1D2A
6928             CYRILLIC,                 // 1D2B
6929             LATIN,                    // 1D2C..1D5C
6930             GREEK,                    // 1D5D..1D61
6931             LATIN,                    // 1D62..1D65
6932             GREEK,                    // 1D66..1D6A
6933             LATIN,                    // 1D6B..1D77
6934             CYRILLIC,                 // 1D78
6935             LATIN,                    // 1D79..1DBE
6936             GREEK,                    // 1DBF
6937             INHERITED,                // 1DC0..1DF9
6938             UNKNOWN,                  // 1DFA
6939             INHERITED,                // 1DFB..1DFF
6940             LATIN,                    // 1E00..1EFF
6941             GREEK,                    // 1F00..1F15
6942             UNKNOWN,                  // 1F16..1F17
6943             GREEK,                    // 1F18..1F1D
6944             UNKNOWN,                  // 1F1E..1F1F
6945             GREEK,                    // 1F20..1F45
6946             UNKNOWN,                  // 1F46..1F47
6947             GREEK,                    // 1F48..1F4D
6948             UNKNOWN,                  // 1F4E..1F4F
6949             GREEK,                    // 1F50..1F57
6950             UNKNOWN,                  // 1F58
6951             GREEK,                    // 1F59
6952             UNKNOWN,                  // 1F5A
6953             GREEK,                    // 1F5B
6954             UNKNOWN,                  // 1F5C
6955             GREEK,                    // 1F5D
6956             UNKNOWN,                  // 1F5E
6957             GREEK,                    // 1F5F..1F7D
6958             UNKNOWN,                  // 1F7E..1F7F
6959             GREEK,                    // 1F80..1FB4
6960             UNKNOWN,                  // 1FB5
6961             GREEK,                    // 1FB6..1FC4
6962             UNKNOWN,                  // 1FC5
6963             GREEK,                    // 1FC6..1FD3
6964             UNKNOWN,                  // 1FD4..1FD5
6965             GREEK,                    // 1FD6..1FDB
6966             UNKNOWN,                  // 1FDC
6967             GREEK,                    // 1FDD..1FEF
6968             UNKNOWN,                  // 1FF0..1FF1
6969             GREEK,                    // 1FF2..1FF4
6970             UNKNOWN,                  // 1FF5
6971             GREEK,                    // 1FF6..1FFE
6972             UNKNOWN,                  // 1FFF
6973             COMMON,                   // 2000..200B
6974             INHERITED,                // 200C..200D
6975             COMMON,                   // 200E..2064
6976             UNKNOWN,                  // 2065
6977             COMMON,                   // 2066..2070
6978             LATIN,                    // 2071
6979             UNKNOWN,                  // 2072..2073
6980             COMMON,                   // 2074..207E
6981             LATIN,                    // 207F
6982             COMMON,                   // 2080..208E
6983             UNKNOWN,                  // 208F
6984             LATIN,                    // 2090..209C
6985             UNKNOWN,                  // 209D..209F
6986             COMMON,                   // 20A0..20BF
6987             UNKNOWN,                  // 20C0..20CF
6988             INHERITED,                // 20D0..20F0
6989             UNKNOWN,                  // 20F1..20FF
6990             COMMON,                   // 2100..2125
6991             GREEK,                    // 2126
6992             COMMON,                   // 2127..2129
6993             LATIN,                    // 212A..212B
6994             COMMON,                   // 212C..2131
6995             LATIN,                    // 2132
6996             COMMON,                   // 2133..214D
6997             LATIN,                    // 214E
6998             COMMON,                   // 214F..215F
6999             LATIN,                    // 2160..2188
7000             COMMON,                   // 2189..218B
7001             UNKNOWN,                  // 218C..218F
7002             COMMON,                   // 2190..2426
7003             UNKNOWN,                  // 2427..243F
7004             COMMON,                   // 2440..244A
7005             UNKNOWN,                  // 244B..245F
7006             COMMON,                   // 2460..27FF
7007             BRAILLE,                  // 2800..28FF
7008             COMMON,                   // 2900..2B73
7009             UNKNOWN,                  // 2B74..2B75
7010             COMMON,                   // 2B76..2B95
7011             UNKNOWN,                  // 2B96..2B97
7012             COMMON,                   // 2B98..2BC8
7013             UNKNOWN,                  // 2BC9
7014             COMMON,                   // 2BCA..2BFE
7015             UNKNOWN,                  // 0x2BFF
7016             GLAGOLITIC,               // 2C00..2C2E
7017             UNKNOWN,                  // 2C2F
7018             GLAGOLITIC,               // 2C30..2C5E
7019             UNKNOWN,                  // 2C5F
7020             LATIN,                    // 2C60..2C7F
7021             COPTIC,                   // 2C80..2CF3
7022             UNKNOWN,                  // 2CF4..2CF8
7023             COPTIC,                   // 2CF9..2CFF
7024             GEORGIAN,                 // 2D00..2D25
7025             UNKNOWN,                  // 2D26
7026             GEORGIAN,                 // 2D27
7027             UNKNOWN,                  // 2D28..2D2C
7028             GEORGIAN,                 // 2D2D
7029             UNKNOWN,                  // 2D2E..2D2F
7030             TIFINAGH,                 // 2D30..2D67
7031             UNKNOWN,                  // 2D68..2D6E
7032             TIFINAGH,                 // 2D6F..2D70
7033             UNKNOWN,                  // 2D71..2D7E
7034             TIFINAGH,                 // 2D7F
7035             ETHIOPIC,                 // 2D80..2D96
7036             UNKNOWN,                  // 2D97..2D9F
7037             ETHIOPIC,                 // 2DA0..2DA6
7038             UNKNOWN,                  // 2DA7
7039             ETHIOPIC,                 // 2DA8..2DAE
7040             UNKNOWN,                  // 2DAF
7041             ETHIOPIC,                 // 2DB0..2DB6
7042             UNKNOWN,                  // 2DB7
7043             ETHIOPIC,                 // 2DB8..2DBE
7044             UNKNOWN,                  // 2DBF
7045             ETHIOPIC,                 // 2DC0..2DC6
7046             UNKNOWN,                  // 2DC7
7047             ETHIOPIC,                 // 2DC8..2DCE
7048             UNKNOWN,                  // 2DCF
7049             ETHIOPIC,                 // 2DD0..2DD6
7050             UNKNOWN,                  // 2DD7
7051             ETHIOPIC,                 // 2DD8..2DDE
7052             UNKNOWN,                  // 2DDF
7053             CYRILLIC,                 // 2DE0..2DFF
7054             COMMON,                   // 2E00..2E4E
7055             UNKNOWN,                  // 2E4F..2E7F
7056             HAN,                      // 2E80..2E99
7057             UNKNOWN,                  // 2E9A
7058             HAN,                      // 2E9B..2EF3
7059             UNKNOWN,                  // 2EF4..2EFF
7060             HAN,                      // 2F00..2FD5
7061             UNKNOWN,                  // 2FD6..2FEF
7062             COMMON,                   // 2FF0..2FFB
7063             UNKNOWN,                  // 2FFC..2FFF
7064             COMMON,                   // 3000..3004
7065             HAN,                      // 3005
7066             COMMON,                   // 3006
7067             HAN,                      // 3007
7068             COMMON,                   // 3008..3020
7069             HAN,                      // 3021..3029
7070             INHERITED,                // 302A..302D
7071             HANGUL,                   // 302E..302F
7072             COMMON,                   // 3030..3037
7073             HAN,                      // 3038..303B
7074             COMMON,                   // 303C..303F
7075             UNKNOWN,                  // 3040
7076             HIRAGANA,                 // 3041..3096
7077             UNKNOWN,                  // 3097..3098
7078             INHERITED,                // 3099..309A
7079             COMMON,                   // 309B..309C
7080             HIRAGANA,                 // 309D..309F
7081             COMMON,                   // 30A0
7082             KATAKANA,                 // 30A1..30FA
7083             COMMON,                   // 30FB..30FC
7084             KATAKANA,                 // 30FD..30FF
7085             UNKNOWN,                  // 3100..3104
7086             BOPOMOFO,                 // 3105..312F
7087             UNKNOWN,                  // 3130
7088             HANGUL,                   // 3131..318E
7089             UNKNOWN,                  // 318F
7090             COMMON,                   // 3190..319F
7091             BOPOMOFO,                 // 31A0..31BA
7092             UNKNOWN,                  // 31BB..31BF
7093             COMMON,                   // 31C0..31E3
7094             UNKNOWN,                  // 31E4..31EF
7095             KATAKANA,                 // 31F0..31FF
7096             HANGUL,                   // 3200..321E
7097             UNKNOWN,                  // 321F
7098             COMMON,                   // 3220..325F
7099             HANGUL,                   // 3260..327E
7100             COMMON,                   // 327F..32CF
7101             KATAKANA,                 // 32D0..32FE
7102             COMMON,                   // 32FF
7103             KATAKANA,                 // 3300..3357
7104             COMMON,                   // 3358..33FF
7105             HAN,                      // 3400..4DB5
7106             UNKNOWN,                  // 4DB6..4DBF
7107             COMMON,                   // 4DC0..4DFF
7108             HAN,                      // 4E00..9FEF
7109             UNKNOWN,                  // 9FF0..9FFF
7110             YI,                       // A000..A48C
7111             UNKNOWN,                  // A48D..A48F
7112             YI,                       // A490..A4C6
7113             UNKNOWN,                  // A4C7..A4CF
7114             LISU,                     // A4D0..A4FF
7115             VAI,                      // A500..A62B
7116             UNKNOWN,                  // A62C..A63F
7117             CYRILLIC,                 // A640..A69F
7118             BAMUM,                    // A6A0..A6F7
7119             UNKNOWN,                  // A6F8..A6FF
7120             COMMON,                   // A700..A721
7121             LATIN,                    // A722..A787
7122             COMMON,                   // A788..A78A
7123             LATIN,                    // A78B..A7B9
7124             UNKNOWN,                  // A7C0..A7F6
7125             LATIN,                    // A7F7..A7FF
7126             SYLOTI_NAGRI,             // A800..A82B
7127             UNKNOWN,                  // A82C..A82F
7128             COMMON,                   // A830..A839
7129             UNKNOWN,                  // A83A..A83F
7130             PHAGS_PA,                 // A840..A877
7131             UNKNOWN,                  // A878..A87F
7132             SAURASHTRA,               // A880..A8C5
7133             UNKNOWN,                  // A8C6..A8CD
7134             SAURASHTRA,               // A8CE..A8D9
7135             UNKNOWN,                  // A8DA..A8DF
7136             DEVANAGARI,               // A8E0..A8FF
7137             KAYAH_LI,                 // A900..A92D
7138             COMMON,                   // A92E
7139             KAYAH_LI,                 // A92F
7140             REJANG,                   // A930..A953
7141             UNKNOWN,                  // A954..A95E
7142             REJANG,                   // A95F
7143             HANGUL,                   // A960..A97C
7144             UNKNOWN,                  // A97D..A97F
7145             JAVANESE,                 // A980..A9CD
7146             UNKNOWN,                  // A9CE
7147             COMMON,                   // A9CF
7148             JAVANESE,                 // A9D0..A9D9
7149             UNKNOWN,                  // A9DA..A9DD
7150             JAVANESE,                 // A9DE..A9DF
7151             MYANMAR,                  // A9E0..A9FE
7152             UNKNOWN,                  // A9FF
7153             CHAM,                     // AA00..AA36
7154             UNKNOWN,                  // AA37..AA3F
7155             CHAM,                     // AA40..AA4D
7156             UNKNOWN,                  // AA4E..AA4F
7157             CHAM,                     // AA50..AA59
7158             UNKNOWN,                  // AA5A..AA5B
7159             CHAM,                     // AA5C..AA5F
7160             MYANMAR,                  // AA60..AA7F
7161             TAI_VIET,                 // AA80..AAC2
7162             UNKNOWN,                  // AAC3..AADA
7163             TAI_VIET,                 // AADB..AADF
7164             MEETEI_MAYEK,             // AAE0..AAF6
7165             UNKNOWN,                  // AAF7..AB00
7166             ETHIOPIC,                 // AB01..AB06
7167             UNKNOWN,                  // AB07..AB08
7168             ETHIOPIC,                 // AB09..AB0E
7169             UNKNOWN,                  // AB0F..AB10
7170             ETHIOPIC,                 // AB11..AB16
7171             UNKNOWN,                  // AB17..AB1F
7172             ETHIOPIC,                 // AB20..AB26
7173             UNKNOWN,                  // AB27
7174             ETHIOPIC,                 // AB28..AB2E
7175             UNKNOWN,                  // AB2F
7176             LATIN,                    // AB30..AB5A
7177             COMMON,                   // AB5B
7178             LATIN,                    // AB5C..AB64
7179             GREEK,                    // AB65
7180             UNKNOWN,                  // AB66..AB6F
7181             CHEROKEE,                 // AB70..ABBF
7182             MEETEI_MAYEK,             // ABC0..ABED
7183             UNKNOWN,                  // ABEE..ABEF
7184             MEETEI_MAYEK,             // ABF0..ABF9
7185             UNKNOWN,                  // ABFA..ABFF
7186             HANGUL,                   // AC00..D7A3
7187             UNKNOWN,                  // D7A4..D7AF
7188             HANGUL,                   // D7B0..D7C6
7189             UNKNOWN,                  // D7C7..D7CA
7190             HANGUL,                   // D7CB..D7FB
7191             UNKNOWN,                  // D7FC..F8FF
7192             HAN,                      // F900..FA6D
7193             UNKNOWN,                  // FA6E..FA6F
7194             HAN,                      // FA70..FAD9
7195             UNKNOWN,                  // FADA..FAFF
7196             LATIN,                    // FB00..FB06
7197             UNKNOWN,                  // FB07..FB12
7198             ARMENIAN,                 // FB13..FB17
7199             UNKNOWN,                  // FB18..FB1C
7200             HEBREW,                   // FB1D..FB36
7201             UNKNOWN,                  // FB37
7202             HEBREW,                   // FB38..FB3C
7203             UNKNOWN,                  // FB3D
7204             HEBREW,                   // FB3E
7205             UNKNOWN,                  // FB3F
7206             HEBREW,                   // FB40..FB41
7207             UNKNOWN,                  // FB42
7208             HEBREW,                   // FB43..FB44
7209             UNKNOWN,                  // FB45
7210             HEBREW,                   // FB46..FB4F
7211             ARABIC,                   // FB50..FBC1
7212             UNKNOWN,                  // FBC2..FBD2
7213             ARABIC,                   // FBD3..FD3D
7214             COMMON,                   // FD3E..FD3F
7215             UNKNOWN,                  // FD40..FD4F
7216             ARABIC,                   // FD50..FD8F
7217             UNKNOWN,                  // FD90..FD91
7218             ARABIC,                   // FD92..FDC7
7219             UNKNOWN,                  // FDC8..FDEF
7220             ARABIC,                   // FDF0..FDFD
7221             UNKNOWN,                  // FDFE..FDFF
7222             INHERITED,                // FE00..FE0F
7223             COMMON,                   // FE10..FE19
7224             UNKNOWN,                  // FE1A..FE1F
7225             INHERITED,                // FE20..FE2D
7226             CYRILLIC,                 // FE2E..FE2F
7227             COMMON,                   // FE30..FE52
7228             UNKNOWN,                  // FE53
7229             COMMON,                   // FE54..FE66
7230             UNKNOWN,                  // FE67
7231             COMMON,                   // FE68..FE6B
7232             UNKNOWN,                  // FE6C..FE6F
7233             ARABIC,                   // FE70..FE74
7234             UNKNOWN,                  // FE75
7235             ARABIC,                   // FE76..FEFC
7236             UNKNOWN,                  // FEFD..FEFE
7237             COMMON,                   // FEFF
7238             UNKNOWN,                  // FF00
7239             COMMON,                   // FF01..FF20
7240             LATIN,                    // FF21..FF3A
7241             COMMON,                   // FF3B..FF40
7242             LATIN,                    // FF41..FF5A
7243             COMMON,                   // FF5B..FF65
7244             KATAKANA,                 // FF66..FF6F
7245             COMMON,                   // FF70
7246             KATAKANA,                 // FF71..FF9D
7247             COMMON,                   // FF9E..FF9F
7248             HANGUL,                   // FFA0..FFBE
7249             UNKNOWN,                  // FFBF..FFC1
7250             HANGUL,                   // FFC2..FFC7
7251             UNKNOWN,                  // FFC8..FFC9
7252             HANGUL,                   // FFCA..FFCF
7253             UNKNOWN,                  // FFD0..FFD1
7254             HANGUL,                   // FFD2..FFD7
7255             UNKNOWN,                  // FFD8..FFD9
7256             HANGUL,                   // FFDA..FFDC
7257             UNKNOWN,                  // FFDD..FFDF
7258             COMMON,                   // FFE0..FFE6
7259             UNKNOWN,                  // FFE7
7260             COMMON,                   // FFE8..FFEE
7261             UNKNOWN,                  // FFEF..FFF8
7262             COMMON,                   // FFF9..FFFD
7263             UNKNOWN,                  // FFFE..FFFF
7264             LINEAR_B,                 // 10000..1000B
7265             UNKNOWN,                  // 1000C
7266             LINEAR_B,                 // 1000D..10026
7267             UNKNOWN,                  // 10027
7268             LINEAR_B,                 // 10028..1003A
7269             UNKNOWN,                  // 1003B
7270             LINEAR_B,                 // 1003C..1003D
7271             UNKNOWN,                  // 1003E
7272             LINEAR_B,                 // 1003F..1004D
7273             UNKNOWN,                  // 1004E..1004F
7274             LINEAR_B,                 // 10050..1005D
7275             UNKNOWN,                  // 1005E..1007F
7276             LINEAR_B,                 // 10080..100FA
7277             UNKNOWN,                  // 100FB..100FF
7278             COMMON,                   // 10100..10102
7279             UNKNOWN,                  // 10103..10106
7280             COMMON,                   // 10107..10133
7281             UNKNOWN,                  // 10134..10136
7282             COMMON,                   // 10137..1013F
7283             GREEK,                    // 10140..1018E
7284             UNKNOWN,                  // 1018F
7285             COMMON,                   // 10190..1019B
7286             UNKNOWN,                  // 1019C..1019F
7287             GREEK,                    // 101A0
7288             UNKNOWN,                  // 101A1..101CF
7289             COMMON,                   // 101D0..101FC
7290             INHERITED,                // 101FD
7291             UNKNOWN,                  // 101FE..1027F
7292             LYCIAN,                   // 10280..1029C
7293             UNKNOWN,                  // 1029D..1029F
7294             CARIAN,                   // 102A0..102D0
7295             UNKNOWN,                  // 102D1..102DF
7296             INHERITED,                // 102E0
7297             COMMON,                   // 102E1..102FB
7298             UNKNOWN,                  // 102FC..102FF
7299             OLD_ITALIC,               // 10300..10323
7300             UNKNOWN,                  // 10324..1032C
7301             OLD_ITALIC,               // 1032D..1032F
7302             GOTHIC,                   // 10330..1034A
7303             UNKNOWN,                  // 1034B..1034F
7304             OLD_PERMIC,               // 10350..1037A
7305             UNKNOWN,                  // 1037B..1037F
7306             UGARITIC,                 // 10380..1039D
7307             UNKNOWN,                  // 1039E
7308             UGARITIC,                 // 1039F
7309             OLD_PERSIAN,              // 103A0..103C3
7310             UNKNOWN,                  // 103C4..103C7
7311             OLD_PERSIAN,              // 103C8..103D5
7312             UNKNOWN,                  // 103D6..103FF
7313             DESERET,                  // 10400..1044F
7314             SHAVIAN,                  // 10450..1047F
7315             OSMANYA,                  // 10480..1049D
7316             UNKNOWN,                  // 1049E..1049F
7317             OSMANYA,                  // 104A0..104A9
7318             UNKNOWN,                  // 104AA..104AF
7319             OSAGE,                    // 104B0..104D3;
7320             UNKNOWN,                  // 104D4..104D7;
7321             OSAGE,                    // 104D8..104FB;
7322             UNKNOWN,                  // 104FC..104FF;
7323             ELBASAN,                  // 10500..10527
7324             UNKNOWN,                  // 10528..1052F
7325             CAUCASIAN_ALBANIAN,       // 10530..10563
7326             UNKNOWN,                  // 10564..1056E
7327             CAUCASIAN_ALBANIAN,       // 1056F
7328             UNKNOWN,                  // 10570..105FF
7329             LINEAR_A,                 // 10600..10736
7330             UNKNOWN,                  // 10737..1073F
7331             LINEAR_A,                 // 10740..10755
7332             UNKNOWN,                  // 10756..1075F
7333             LINEAR_A,                 // 10760..10767
7334             UNKNOWN,                  // 10768..107FF
7335             CYPRIOT,                  // 10800..10805
7336             UNKNOWN,                  // 10806..10807
7337             CYPRIOT,                  // 10808
7338             UNKNOWN,                  // 10809
7339             CYPRIOT,                  // 1080A..10835
7340             UNKNOWN,                  // 10836
7341             CYPRIOT,                  // 10837..10838
7342             UNKNOWN,                  // 10839..1083B
7343             CYPRIOT,                  // 1083C
7344             UNKNOWN,                  // 1083D..1083E
7345             CYPRIOT,                  // 1083F
7346             IMPERIAL_ARAMAIC,         // 10840..10855
7347             UNKNOWN,                  // 10856
7348             IMPERIAL_ARAMAIC,         // 10857..1085F
7349             PALMYRENE,                // 10860..1087F
7350             NABATAEAN,                // 10880..1089E
7351             UNKNOWN,                  // 1089F..108A6
7352             NABATAEAN,                // 108A7..108AF
7353             UNKNOWN,                  // 108B0..108DF
7354             HATRAN,                   // 108E0..108F2
7355             UNKNOWN,                  // 108F3
7356             HATRAN,                   // 108F4..108F5
7357             UNKNOWN,                  // 108F6..108FA
7358             HATRAN,                   // 108FB..108FF
7359             PHOENICIAN,               // 10900..1091B
7360             UNKNOWN,                  // 1091C..1091E
7361             PHOENICIAN,               // 1091F
7362             LYDIAN,                   // 10920..10939
7363             UNKNOWN,                  // 1093A..1093E
7364             LYDIAN,                   // 1093F
7365             UNKNOWN,                  // 10940..1097F
7366             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7367             MEROITIC_CURSIVE,         // 109A0..109B7
7368             UNKNOWN,                  // 109B8..109BB
7369             MEROITIC_CURSIVE,         // 109BC..109CF
7370             UNKNOWN,                  // 109D0..109D1
7371             MEROITIC_CURSIVE,         // 109D2..109FF
7372             KHAROSHTHI,               // 10A00..10A03
7373             UNKNOWN,                  // 10A04
7374             KHAROSHTHI,               // 10A05..10A06
7375             UNKNOWN,                  // 10A07..10A0B
7376             KHAROSHTHI,               // 10A0C..10A13
7377             UNKNOWN,                  // 10A14
7378             KHAROSHTHI,               // 10A15..10A17
7379             UNKNOWN,                  // 10A18
7380             KHAROSHTHI,               // 10A19..10A35
7381             UNKNOWN,                  // 10A36..10A37
7382             KHAROSHTHI,               // 10A38..10A3A
7383             UNKNOWN,                  // 10A3B..10A3E
7384             KHAROSHTHI,               // 10A3F..10A48
7385             UNKNOWN,                  // 10A49..10A4F
7386             KHAROSHTHI,               // 10A50..10A58
7387             UNKNOWN,                  // 10A59..10A5F
7388             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7389             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7390             UNKNOWN,                  // 10AA0..10ABF
7391             MANICHAEAN,               // 10AC0..10AE6
7392             UNKNOWN,                  // 10AE7..10AEA
7393             MANICHAEAN,               // 10AEB..10AF6
7394             UNKNOWN,                  // 10AF7..10AFF
7395             AVESTAN,                  // 10B00..10B35
7396             UNKNOWN,                  // 10B36..10B38
7397             AVESTAN,                  // 10B39..10B3F
7398             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7399             UNKNOWN,                  // 10B56..10B57
7400             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7401             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7402             UNKNOWN,                  // 10B73..10B77
7403             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7404             PSALTER_PAHLAVI,          // 10B80..10B91
7405             UNKNOWN,                  // 10B92..10B98
7406             PSALTER_PAHLAVI,          // 10B99..10B9C
7407             UNKNOWN,                  // 10B9D..10BA8
7408             PSALTER_PAHLAVI,          // 10BA9..10BAF
7409             UNKNOWN,                  // 10BB0..10BFF
7410             OLD_TURKIC,               // 10C00..10C48
7411             UNKNOWN,                  // 10C49..10C7F
7412             OLD_HUNGARIAN,            // 10C80..10CB2
7413             UNKNOWN,                  // 10CB3..10CBF
7414             OLD_HUNGARIAN,            // 10CC0..10CF2
7415             UNKNOWN,                  // 10CF3..10CF9
7416             OLD_HUNGARIAN,            // 10CFA..10CFF
7417             HANIFI_ROHINGYA,          // 10D00..10D27
7418             UNKNOWN,                  // 10D28..10D29
7419             HANIFI_ROHINGYA,          // 10D30..10D39
7420             UNKNOWN,                  // 10D3A..10E5F    
7421             ARABIC,                   // 10E60..10E7E
7422             UNKNOWN,                  // 10E7F..10EFF
7423             OLD_SOGDIAN,              // 10F00..10F27
7424             UNKNOWN,                  // 10F28..10F2F
7425             SOGDIAN,                  // 10F30..10F59
7426             UNKNOWN,                  // 10F5A..10FFF
7427             BRAHMI,                   // 11000..1104D
7428             UNKNOWN,                  // 1104E..11051
7429             BRAHMI,                   // 11052..1106F
7430             UNKNOWN,                  // 11070..1107E
7431             BRAHMI,                   // 1107F
7432             KAITHI,                   // 11080..110C1
7433             UNKNOWN,                  // 110C2..110CC
7434             KAITHI,                   // 110CD
7435             UNKNOWN,                  // 110CE..110CF
7436             SORA_SOMPENG,             // 110D0..110E8
7437             UNKNOWN,                  // 110E9..110EF
7438             SORA_SOMPENG,             // 110F0..110F9
7439             UNKNOWN,                  // 110FA..110FF
7440             CHAKMA,                   // 11100..11134
7441             UNKNOWN,                  // 11135
7442             CHAKMA,                   // 11136..11146
7443             UNKNOWN,                  // 11147..1114F
7444             MAHAJANI,                 // 11150..11176
7445             UNKNOWN,                  // 11177..1117F
7446             SHARADA,                  // 11180..111CD
7447             UNKNOWN,                  // 111CE..111CF
7448             SHARADA,                  // 111D0..111DF
7449             UNKNOWN,                  // 111E0
7450             SINHALA,                  // 111E1..111F4
7451             UNKNOWN,                  // 111F5..111FF
7452             KHOJKI,                   // 11200..11211
7453             UNKNOWN,                  // 11212
7454             KHOJKI,                   // 11213..1123E
7455             UNKNOWN,                  // 1123F..1127F
7456             MULTANI,                  // 11280..11286
7457             UNKNOWN,                  // 11287
7458             MULTANI,                  // 11288
7459             UNKNOWN,                  // 11289
7460             MULTANI,                  // 1128A..1128D
7461             UNKNOWN,                  // 1128E
7462             MULTANI,                  // 1128F..1129D
7463             UNKNOWN,                  // 1129E
7464             MULTANI,                  // 1129F..112A9
7465             UNKNOWN,                  // 112AA..112AF
7466             KHUDAWADI,                // 112B0..112EA
7467             UNKNOWN,                  // 112EB..112EF
7468             KHUDAWADI,                // 112F0..112F9
7469             UNKNOWN,                  // 112FA..112FF
7470             GRANTHA,                  // 11300..11303
7471             UNKNOWN,                  // 11304
7472             GRANTHA,                  // 11305..1130C
7473             UNKNOWN,                  // 1130D..1130E
7474             GRANTHA,                  // 1130F..11310
7475             UNKNOWN,                  // 11311..11312
7476             GRANTHA,                  // 11313..11328
7477             UNKNOWN,                  // 11329
7478             GRANTHA,                  // 1132A..11330
7479             UNKNOWN,                  // 11331
7480             GRANTHA,                  // 11332..11333
7481             UNKNOWN,                  // 11334
7482             GRANTHA,                  // 11335..11339
7483             UNKNOWN,                  // 1133A
7484             INHERITED,                // 1133B
7485             GRANTHA,                  // 1133C..11344
7486             UNKNOWN,                  // 11345..11346
7487             GRANTHA,                  // 11347..11348
7488             UNKNOWN,                  // 11349..1134A
7489             GRANTHA,                  // 1134B..1134D
7490             UNKNOWN,                  // 1134E..1134F
7491             GRANTHA,                  // 11350
7492             UNKNOWN,                  // 11351..11356
7493             GRANTHA,                  // 11357
7494             UNKNOWN,                  // 11358..1135C
7495             GRANTHA,                  // 1135D..11363
7496             UNKNOWN,                  // 11364..11365
7497             GRANTHA,                  // 11366..1136C
7498             UNKNOWN,                  // 1136D..1136F
7499             GRANTHA,                  // 11370..11374
7500             UNKNOWN,                  // 11375..113FF
7501             NEWA,                     // 11400..11459
7502             UNKNOWN,                  // 1145A
7503             NEWA,                     // 1145B
7504             UNKNOWN,                  // 1145C
7505             NEWA,                     // 1145D..1145E
7506             UNKNOWN,                  // 1145F..1147F
7507             TIRHUTA,                  // 11480..114C7
7508             UNKNOWN,                  // 114C8..114CF
7509             TIRHUTA,                  // 114D0..114D9
7510             UNKNOWN,                  // 114DA..1157F
7511             SIDDHAM,                  // 11580..115B5
7512             UNKNOWN,                  // 115B6..115B7
7513             SIDDHAM,                  // 115B8..115DD
7514             UNKNOWN,                  // 115DE..115FF
7515             MODI,                     // 11600..11644
7516             UNKNOWN,                  // 11645..1164F
7517             MODI,                     // 11650..11659
7518             UNKNOWN,                  // 1165A..1165F
7519             MONGOLIAN,                // 11660..1166C
7520             UNKNOWN,                  // 1166D..1167F
7521             TAKRI,                    // 11680..116B7
7522             UNKNOWN,                  // 116B8..116BF
7523             TAKRI,                    // 116C0..116C9
7524             UNKNOWN,                  // 116CA..116FF
7525             AHOM,                     // 11700..1171A
7526             UNKNOWN,                  // 1171B..1171C
7527             AHOM,                     // 1171D..1172B
7528             UNKNOWN,                  // 1172C..1172F
7529             AHOM,                     // 11730..1173F
7530             UNKNOWN,                  // 11740..117FF
7531             DOGRA,                    // 11800..1183B
7532             UNKNOWN,                  // 1183C..1189F
7533             WARANG_CITI,              // 118A0..118F2
7534             UNKNOWN,                  // 118F3..118FE
7535             WARANG_CITI,              // 118FF
7536             UNKNOWN,                  // 11900..119FF
7537             ZANABAZAR_SQUARE,         // 11A00..11A47
7538             UNKNOWN,                  // 11A48..11A4F
7539             SOYOMBO,                  // 11A50..11A83
7540             UNKNOWN,                  // 11A84..11A85
7541             SOYOMBO,                  // 11A86..11AA2
7542             UNKNOWN,                  // 11AA3..11ABF
7543             PAU_CIN_HAU,              // 11AC0..11AF8
7544             UNKNOWN,                  // 11AF9..11BFF
7545             BHAIKSUKI,                // 11C00..11C08
7546             UNKNOWN,                  // 11C09
7547             BHAIKSUKI,                // 11C0A..11C36
7548             UNKNOWN,                  // 11C37
7549             BHAIKSUKI,                // 11C38..11C45
7550             UNKNOWN,                  // 11C46..11C49
7551             BHAIKSUKI,                // 11C50..11C6C
7552             UNKNOWN,                  // 11C6D..11C6F
7553             MARCHEN,                  // 11C70..11C8F
7554             UNKNOWN,                  // 11C90..11C91
7555             MARCHEN,                  // 11C92..11CA7
7556             UNKNOWN,                  // 11CA8
7557             MARCHEN,                  // 11CA9..11CB6
7558             UNKNOWN,                  // 11CB7..11CFF
7559             MASARAM_GONDI,            // 11D00..11D06
7560             UNKNOWN,                  // 11D07
7561             MASARAM_GONDI,            // 11D08..11D09
7562             UNKNOWN,                  // 11D0A
7563             MASARAM_GONDI,            // 11D0B..11D36
7564             UNKNOWN,                  // 11D37..11D39
7565             MASARAM_GONDI,            // 11D3A
7566             UNKNOWN,                  // 11D3B
7567             MASARAM_GONDI,            // 11D3C..11D3D
7568             UNKNOWN,                  // 11D3E
7569             MASARAM_GONDI,            // 11D3F..11D47
7570             UNKNOWN,                  // 11D48..11D49
7571             MASARAM_GONDI,            // 11D50..11D59
7572             UNKNOWN,                  // 11D5A..11D5F
7573             GUNJALA_GONDI,            // 11D60..11D68 
7574             UNKNOWN,                  // 11D69       
7575             GUNJALA_GONDI,            // 11D6A..11D8E 
7576             UNKNOWN,                  // 11D8F              
7577             GUNJALA_GONDI,            // 11D90..11D91
7578             UNKNOWN,                  // 11D92         
7579             GUNJALA_GONDI,            // 11D93..11D98
7580             UNKNOWN,                  // 11D99        
7581             GUNJALA_GONDI,            // 11DA0..11DA9 
7582             UNKNOWN,                  // 11DAA..11DFF
7583             MAKASAR,                  // 11EE0..11EF8 
7584             UNKNOWN,                  // 11EF9..11FFF             
7585             CUNEIFORM,                // 12000..12399
7586             UNKNOWN,                  // 1239A..123FF
7587             CUNEIFORM,                // 12400..1246E
7588             UNKNOWN,                  // 1246F
7589             CUNEIFORM,                // 12470..12474
7590             UNKNOWN,                  // 12475..1247F
7591             CUNEIFORM,                // 12480..12543
7592             UNKNOWN,                  // 12544..12FFF
7593             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
7594             UNKNOWN,                  // 1342F..143FF
7595             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
7596             UNKNOWN,                  // 14647..167FF
7597             BAMUM,                    // 16800..16A38
7598             UNKNOWN,                  // 16A39..16A3F
7599             MRO,                      // 16A40..16A5E
7600             UNKNOWN,                  // 16A5F
7601             MRO,                      // 16A60..16A69
7602             UNKNOWN,                  // 16A6A..16A6D
7603             MRO,                      // 16A6E..16A6F
7604             UNKNOWN,                  // 16A70..16ACF
7605             BASSA_VAH,                // 16AD0..16AED
7606             UNKNOWN,                  // 16AEE..16AEF
7607             BASSA_VAH,                // 16AF0..16AF5
7608             UNKNOWN,                  // 16AF6..16AFF
7609             PAHAWH_HMONG,             // 16B00..16B45
7610             UNKNOWN,                  // 16B46..16B4F
7611             PAHAWH_HMONG,             // 16B50..16B59
7612             UNKNOWN,                  // 16B5A
7613             PAHAWH_HMONG,             // 16B5B..16B61
7614             UNKNOWN,                  // 16B62
7615             PAHAWH_HMONG,             // 16B63..16B77
7616             UNKNOWN,                  // 16B78..16B7C
7617             PAHAWH_HMONG,             // 16B7D..16B8F
7618             UNKNOWN,                  // 16B90..16E3F
7619             MEDEFAIDRIN,              // 16E40..16E9A
7620             UNKNOWN,                  // 16E9B..16EFF
7621             MIAO,                     // 16F00..16F44
7622             UNKNOWN,                  // 16F45..16F4F
7623             MIAO,                     // 16F50..16F7E
7624             UNKNOWN,                  // 16F7F..16F8E
7625             MIAO,                     // 16F8F..16F9F
7626             UNKNOWN,                  // 16FA0..16FDF
7627             TANGUT,                   // 16FE0
7628             NUSHU,                    // 16FE1
7629             UNKNOWN,                  // 16FE2..16FFF
7630             TANGUT,                   // 17000..187F1
7631             UNKNOWN,                  // 187F2..187FF
7632             TANGUT,                   // 18800..18AF2
7633             UNKNOWN,                  // 18AF3..1AFFF
7634             KATAKANA,                 // 1B000
7635             HIRAGANA,                 // 1B001..1B11E
7636             UNKNOWN,                  // 1B11F..1B16F
7637             NUSHU,                    // 1B170..1B2FB
7638             UNKNOWN,                  // 1B2FC..1BBFF
7639             DUPLOYAN,                 // 1BC00..1BC6A
7640             UNKNOWN,                  // 1BC6B..1BC6F
7641             DUPLOYAN,                 // 1BC70..1BC7C
7642             UNKNOWN,                  // 1BC7D..1BC7F
7643             DUPLOYAN,                 // 1BC80..1BC88
7644             UNKNOWN,                  // 1BC89..1BC8F
7645             DUPLOYAN,                 // 1BC90..1BC99
7646             UNKNOWN,                  // 1BC9A..1BC9B
7647             DUPLOYAN,                 // 1BC9C..1BC9F
7648             COMMON,                   // 1BCA0..1BCA3
7649             UNKNOWN,                  // 1BCA4..1CFFF
7650             COMMON,                   // 1D000..1D0F5
7651             UNKNOWN,                  // 1D0F6..1D0FF
7652             COMMON,                   // 1D100..1D126
7653             UNKNOWN,                  // 1D127..1D128
7654             COMMON,                   // 1D129..1D166
7655             INHERITED,                // 1D167..1D169
7656             COMMON,                   // 1D16A..1D17A
7657             INHERITED,                // 1D17B..1D182
7658             COMMON,                   // 1D183..1D184
7659             INHERITED,                // 1D185..1D18B
7660             COMMON,                   // 1D18C..1D1A9
7661             INHERITED,                // 1D1AA..1D1AD
7662             COMMON,                   // 1D1AE..1D1E8
7663             UNKNOWN,                  // 1D1E9..1D1FF
7664             GREEK,                    // 1D200..1D245
7665             UNKNOWN,                  // 1D246..1D2DF
7666             COMMON,                   // 1D2E0..1D2F3
7667             UNKNOWN,                  // 1D2F4..1D2FF
7668             COMMON,                   // 1D300..1D356        
7669             UNKNOWN,                  // 1D357..1D35F
7670             COMMON,                   // 1D360..1D378
7671             UNKNOWN,                  // 1D379..1D3FF
7672             COMMON,                   // 1D400..1D454
7673             UNKNOWN,                  // 1D455
7674             COMMON,                   // 1D456..1D49C
7675             UNKNOWN,                  // 1D49D
7676             COMMON,                   // 1D49E..1D49F
7677             UNKNOWN,                  // 1D4A0..1D4A1
7678             COMMON,                   // 1D4A2
7679             UNKNOWN,                  // 1D4A3..1D4A4
7680             COMMON,                   // 1D4A5..1D4A6
7681             UNKNOWN,                  // 1D4A7..1D4A8
7682             COMMON,                   // 1D4A9..1D4AC
7683             UNKNOWN,                  // 1D4AD
7684             COMMON,                   // 1D4AE..1D4B9
7685             UNKNOWN,                  // 1D4BA
7686             COMMON,                   // 1D4BB
7687             UNKNOWN,                  // 1D4BC
7688             COMMON,                   // 1D4BD..1D4C3
7689             UNKNOWN,                  // 1D4C4
7690             COMMON,                   // 1D4C5..1D505
7691             UNKNOWN,                  // 1D506
7692             COMMON,                   // 1D507..1D50A
7693             UNKNOWN,                  // 1D50B..1D50C
7694             COMMON,                   // 1D50D..1D514
7695             UNKNOWN,                  // 1D515
7696             COMMON,                   // 1D516..1D51C
7697             UNKNOWN,                  // 1D51D
7698             COMMON,                   // 1D51E..1D539
7699             UNKNOWN,                  // 1D53A
7700             COMMON,                   // 1D53B..1D53E
7701             UNKNOWN,                  // 1D53F
7702             COMMON,                   // 1D540..1D544
7703             UNKNOWN,                  // 1D545
7704             COMMON,                   // 1D546
7705             UNKNOWN,                  // 1D547..1D549
7706             COMMON,                   // 1D54A..1D550
7707             UNKNOWN,                  // 1D551
7708             COMMON,                   // 1D552..1D6A5
7709             UNKNOWN,                  // 1D6A6..1D6A7
7710             COMMON,                   // 1D6A8..1D7CB
7711             UNKNOWN,                  // 1D7CC..1D7CD
7712             COMMON,                   // 1D7CE..1D7FF
7713             SIGNWRITING,              // 1D800..1DA8B
7714             UNKNOWN,                  // 1DA8C..1DA9A
7715             SIGNWRITING,              // 1DA9B..1DA9F
7716             UNKNOWN,                  // 1DAA0
7717             SIGNWRITING,              // 1DAA1..1DAAF
7718             UNKNOWN,                  // 1DAB0..1DFFF
7719             GLAGOLITIC,               // 1E000..1E006
7720             UNKNOWN,                  // 1E007
7721             GLAGOLITIC,               // 1E008..1E018
7722             UNKNOWN,                  // 1E019..1E01A
7723             GLAGOLITIC,               // 1E01B..1E021
7724             UNKNOWN,                  // 1E022
7725             GLAGOLITIC,               // 1E023..1E024
7726             UNKNOWN,                  // 1E025
7727             GLAGOLITIC,               // 1E026..1E02A
7728             UNKNOWN,                  // 1E02B..1E7FF
7729             MENDE_KIKAKUI,            // 1E800..1E8C4
7730             UNKNOWN,                  // 1E8C5..1E8C6
7731             MENDE_KIKAKUI,            // 1E8C7..1E8D6
7732             UNKNOWN,                  // 1E8D7..1E8FF
7733             ADLAM,                    // 1E900..1E94A
7734             UNKNOWN,                  // 1E94B..1E94F
7735             ADLAM,                    // 1E950..1E959
7736             UNKNOWN,                  // 1E95A..1E95D
7737             ADLAM,                    // 1E95E..1E95F
7738             UNKNOWN,                  // 1E960..1EC70
7739             COMMON,                   // 1EC71..1ECB4
7740             UNKNOWN,                  // 1ECB5..1EDFF
7741             ARABIC,                   // 1EE00..1EE03
7742             UNKNOWN,                  // 1EE04
7743             ARABIC,                   // 1EE05..1EE1F
7744             UNKNOWN,                  // 1EE20
7745             ARABIC,                   // 1EE21..1EE22
7746             UNKNOWN,                  // 1EE23
7747             ARABIC,                   // 1EE24
7748             UNKNOWN,                  // 1EE25..1EE26
7749             ARABIC,                   // 1EE27
7750             UNKNOWN,                  // 1EE28
7751             ARABIC,                   // 1EE29..1EE32
7752             UNKNOWN,                  // 1EE33
7753             ARABIC,                   // 1EE34..1EE37
7754             UNKNOWN,                  // 1EE38
7755             ARABIC,                   // 1EE39
7756             UNKNOWN,                  // 1EE3A
7757             ARABIC,                   // 1EE3B
7758             UNKNOWN,                  // 1EE3C..1EE41
7759             ARABIC,                   // 1EE42
7760             UNKNOWN,                  // 1EE43..1EE46
7761             ARABIC,                   // 1EE47
7762             UNKNOWN,                  // 1EE48
7763             ARABIC,                   // 1EE49
7764             UNKNOWN,                  // 1EE4A
7765             ARABIC,                   // 1EE4B
7766             UNKNOWN,                  // 1EE4C
7767             ARABIC,                   // 1EE4D..1EE4F
7768             UNKNOWN,                  // 1EE50
7769             ARABIC,                   // 1EE51..1EE52
7770             UNKNOWN,                  // 1EE53
7771             ARABIC,                   // 1EE54
7772             UNKNOWN,                  // 1EE55..1EE56
7773             ARABIC,                   // 1EE57
7774             UNKNOWN,                  // 1EE58
7775             ARABIC,                   // 1EE59
7776             UNKNOWN,                  // 1EE5A
7777             ARABIC,                   // 1EE5B
7778             UNKNOWN,                  // 1EE5C
7779             ARABIC,                   // 1EE5D
7780             UNKNOWN,                  // 1EE5E
7781             ARABIC,                   // 1EE5F
7782             UNKNOWN,                  // 1EE60
7783             ARABIC,                   // 1EE61..1EE62
7784             UNKNOWN,                  // 1EE63
7785             ARABIC,                   // 1EE64
7786             UNKNOWN,                  // 1EE65..1EE66
7787             ARABIC,                   // 1EE67..1EE6A
7788             UNKNOWN,                  // 1EE6B
7789             ARABIC,                   // 1EE6C..1EE72
7790             UNKNOWN,                  // 1EE73
7791             ARABIC,                   // 1EE74..1EE77
7792             UNKNOWN,                  // 1EE78
7793             ARABIC,                   // 1EE79..1EE7C
7794             UNKNOWN,                  // 1EE7D
7795             ARABIC,                   // 1EE7E
7796             UNKNOWN,                  // 1EE7F
7797             ARABIC,                   // 1EE80..1EE89
7798             UNKNOWN,                  // 1EE8A
7799             ARABIC,                   // 1EE8B..1EE9B
7800             UNKNOWN,                  // 1EE9C..1EEA0
7801             ARABIC,                   // 1EEA1..1EEA3
7802             UNKNOWN,                  // 1EEA4
7803             ARABIC,                   // 1EEA5..1EEA9
7804             UNKNOWN,                  // 1EEAA
7805             ARABIC,                   // 1EEAB..1EEBB
7806             UNKNOWN,                  // 1EEBC..1EEEF
7807             ARABIC,                   // 1EEF0..1EEF1
7808             UNKNOWN,                  // 1EEF2..1EFFF
7809             COMMON,                   // 1F000..1F02B
7810             UNKNOWN,                  // 1F02C..1F02F
7811             COMMON,                   // 1F030..1F093
7812             UNKNOWN,                  // 1F094..1F09F
7813             COMMON,                   // 1F0A0..1F0AE
7814             UNKNOWN,                  // 1F0AF..1F0B0
7815             COMMON,                   // 1F0B1..1F0BF
7816             UNKNOWN,                  // 1F0C0
7817             COMMON,                   // 1F0C1..1F0CF
7818             UNKNOWN,                  // 1F0D0
7819             COMMON,                   // 1F0D1..1F0F5
7820             UNKNOWN,                  // 1F0F6..1F0FF
7821             COMMON,                   // 1F100..1F10C
7822             UNKNOWN,                  // 1F10D..1F10F
7823             COMMON,                   // 1F110..1F16B
7824             UNKNOWN,                  // 1F16C..1F16F
7825             COMMON,                   // 1F170..1F1AC
7826             UNKNOWN,                  // 1F1AD..1F1E5
7827             COMMON,                   // 1F1E6..1F1FF
7828             HIRAGANA,                 // 1F200
7829             COMMON,                   // 1F201..1F202
7830             UNKNOWN,                  // 1F203..1F20F
7831             COMMON,                   // 1F210..1F23B
7832             UNKNOWN,                  // 1F23C..1F23F
7833             COMMON,                   // 1F240..1F248
7834             UNKNOWN,                  // 1F249..1F24F
7835             COMMON,                   // 1F250..1F251
7836             UNKNOWN,                  // 1F252..1F25F
7837             COMMON,                   // 1F260..1F265
7838             UNKNOWN,                  // 1F266..1F2FF
7839             COMMON,                   // 1F300..1F6D4
7840             UNKNOWN,                  // 1F6D5..1F6DF
7841             COMMON,                   // 1F6E0..1F6EC
7842             UNKNOWN,                  // 1F6ED..1F6EF            
7843             COMMON,                   // 1F6F0..1F6F9
7844             UNKNOWN,                  // 1F6FA..1F6FF
7845             COMMON,                   // 1F700..1F773
7846             UNKNOWN,                  // 1F774..1F77F
7847             COMMON,                   // 1F780..1F7D8
7848             UNKNOWN,                  // 1F7D9..1F7FF
7849             COMMON,                   // 1F800..1F80B
7850             UNKNOWN,                  // 1F80C..1F80F
7851             COMMON,                   // 1F810..1F847
7852             UNKNOWN,                  // 1F848..1F84F
7853             COMMON,                   // 1F850..1F859
7854             UNKNOWN,                  // 1F85A..1F85F
7855             COMMON,                   // 1F860..1F887
7856             UNKNOWN,                  // 1F888..1F88F
7857             COMMON,                   // 1F890..1F8AD
7858             UNKNOWN,                  // 1F8AE..1F8FF
7859             COMMON,                   // 1F900..1F90B
7860             UNKNOWN,                  // 1F90C..1F90F
7861             COMMON,                   // 1F910..1F93E
7862             UNKNOWN,                  // 1F93F
7863             COMMON,                   // 1F940..1F970
7864             UNKNOWN,                  // 1F971..1F972
7865             COMMON,                   // 1F973..1F976
7866             UNKNOWN,                  // 1F977..1F979
7867             COMMON,                   // 1F97A
7868             UNKNOWN,                  // 1F97B
7869             COMMON,                   // 1F97C..1F9A2
7870             UNKNOWN,                  // 1F9A3..1F9AF
7871             COMMON,                   // 1F9B0..1F9B9
7872             UNKNOWN,                  // 1F9BA..1F9BF
7873             COMMON,                   // 1F9C0..1F9C2
7874             UNKNOWN,                  // 1F9C3..1F9CF
7875             COMMON,                   // 1F9D0..1F9FF
7876             UNKNOWN,                  // 1FA00..1FA5F
7877             COMMON,                   // 1FA60..1FA6D
7878             UNKNOWN,                  // 1FA6E..1FFFF
7879             HAN,                      // 20000..2A6D6
7880             UNKNOWN,                  // 2A6D7..2A6FF
7881             HAN,                      // 2A700..2B734
7882             UNKNOWN,                  // 2B735..2B73F
7883             HAN,                      // 2B740..2B81D
7884             UNKNOWN,                  // 2B81E..2B81F
7885             HAN,                      // 2B820..2CEA1
7886             UNKNOWN,                  // 2CEA2..2CEAF
7887             HAN,                      // 2CEB0..2EBE0
7888             UNKNOWN,                  // 2EBE1..2F7FF
7889             HAN,                      // 2F800..2FA1D
7890             UNKNOWN,                  // 2FA1E..E0000
7891             COMMON,                   // E0001
7892             UNKNOWN,                  // E0002..E001F
7893             COMMON,                   // E0020..E007F
7894             UNKNOWN,                  // E0080..E00FF
7895             INHERITED,                // E0100..E01EF
7896             UNKNOWN                   // E01F0..10FFFF
7897         };
7898 
7899         private static HashMap<String, Character.UnicodeScript> aliases;
7900         static {
7901             aliases = new HashMap<>((int)(149 / 0.75f + 1.0f));
7902             aliases.put("ADLM", ADLAM);
7903             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7904             aliases.put("AHOM", AHOM);
7905             aliases.put("ARAB", ARABIC);
7906             aliases.put("ARMI", IMPERIAL_ARAMAIC);
7907             aliases.put("ARMN", ARMENIAN);
7908             aliases.put("AVST", AVESTAN);
7909             aliases.put("BALI", BALINESE);
7910             aliases.put("BAMU", BAMUM);
7911             aliases.put("BASS", BASSA_VAH);
7912             aliases.put("BATK", BATAK);
7913             aliases.put("BENG", BENGALI);
7914             aliases.put("BHKS", BHAIKSUKI);
7915             aliases.put("BOPO", BOPOMOFO);
7916             aliases.put("BRAH", BRAHMI);
7917             aliases.put("BRAI", BRAILLE);
7918             aliases.put("BUGI", BUGINESE);
7919             aliases.put("BUHD", BUHID);
7920             aliases.put("CAKM", CHAKMA);
7921             aliases.put("CANS", CANADIAN_ABORIGINAL);
7922             aliases.put("CARI", CARIAN);
7923             aliases.put("CHAM", CHAM);
7924             aliases.put("CHER", CHEROKEE);
7925             aliases.put("COPT", COPTIC);
7926             aliases.put("CPRT", CYPRIOT);
7927             aliases.put("CYRL", CYRILLIC);
7928             aliases.put("DEVA", DEVANAGARI);
7929             aliases.put("DOGR", DOGRA);
7930             aliases.put("DSRT", DESERET);
7931             aliases.put("DUPL", DUPLOYAN);
7932             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7933             aliases.put("ELBA", ELBASAN);
7934             aliases.put("ETHI", ETHIOPIC);
7935             aliases.put("GEOR", GEORGIAN);
7936             aliases.put("GLAG", GLAGOLITIC);
7937             aliases.put("GONM", MASARAM_GONDI);
7938             aliases.put("GOTH", GOTHIC);
7939             aliases.put("GONG", GUNJALA_GONDI);
7940             aliases.put("GRAN", GRANTHA);
7941             aliases.put("GREK", GREEK);
7942             aliases.put("GUJR", GUJARATI);
7943             aliases.put("GURU", GURMUKHI);
7944             aliases.put("HANG", HANGUL);
7945             aliases.put("HANI", HAN);
7946             aliases.put("HANO", HANUNOO);
7947             aliases.put("HATR", HATRAN);
7948             aliases.put("HEBR", HEBREW);
7949             aliases.put("HIRA", HIRAGANA);
7950             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
7951             aliases.put("HMNG", PAHAWH_HMONG);
7952             // it appears we don't have the KATAKANA_OR_HIRAGANA
7953             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7954             aliases.put("HUNG", OLD_HUNGARIAN);
7955             aliases.put("ITAL", OLD_ITALIC);
7956             aliases.put("JAVA", JAVANESE);
7957             aliases.put("KALI", KAYAH_LI);
7958             aliases.put("KANA", KATAKANA);
7959             aliases.put("KHAR", KHAROSHTHI);
7960             aliases.put("KHMR", KHMER);
7961             aliases.put("KHOJ", KHOJKI);
7962             aliases.put("KNDA", KANNADA);
7963             aliases.put("KTHI", KAITHI);
7964             aliases.put("LANA", TAI_THAM);
7965             aliases.put("LAOO", LAO);
7966             aliases.put("LATN", LATIN);
7967             aliases.put("LEPC", LEPCHA);
7968             aliases.put("LIMB", LIMBU);
7969             aliases.put("LINA", LINEAR_A);
7970             aliases.put("LINB", LINEAR_B);
7971             aliases.put("LISU", LISU);
7972             aliases.put("LYCI", LYCIAN);
7973             aliases.put("LYDI", LYDIAN);
7974             aliases.put("MAHJ", MAHAJANI);
7975             aliases.put("MAKA", MAKASAR);
7976             aliases.put("MARC", MARCHEN);
7977             aliases.put("MAND", MANDAIC);
7978             aliases.put("MANI", MANICHAEAN);
7979             aliases.put("MEDF", MEDEFAIDRIN);
7980             aliases.put("MEND", MENDE_KIKAKUI);
7981             aliases.put("MERC", MEROITIC_CURSIVE);
7982             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7983             aliases.put("MLYM", MALAYALAM);
7984             aliases.put("MODI", MODI);
7985             aliases.put("MONG", MONGOLIAN);
7986             aliases.put("MROO", MRO);
7987             aliases.put("MTEI", MEETEI_MAYEK);
7988             aliases.put("MULT", MULTANI);
7989             aliases.put("MYMR", MYANMAR);
7990             aliases.put("NARB", OLD_NORTH_ARABIAN);
7991             aliases.put("NBAT", NABATAEAN);
7992             aliases.put("NEWA", NEWA);
7993             aliases.put("NKOO", NKO);
7994             aliases.put("NSHU", NUSHU);
7995             aliases.put("OGAM", OGHAM);
7996             aliases.put("OLCK", OL_CHIKI);
7997             aliases.put("ORKH", OLD_TURKIC);
7998             aliases.put("ORYA", ORIYA);
7999             aliases.put("OSGE", OSAGE);
8000             aliases.put("OSMA", OSMANYA);
8001             aliases.put("PALM", PALMYRENE);
8002             aliases.put("PAUC", PAU_CIN_HAU);
8003             aliases.put("PERM", OLD_PERMIC);
8004             aliases.put("PHAG", PHAGS_PA);
8005             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8006             aliases.put("PHLP", PSALTER_PAHLAVI);
8007             aliases.put("PHNX", PHOENICIAN);
8008             aliases.put("PLRD", MIAO);
8009             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8010             aliases.put("RJNG", REJANG);
8011             aliases.put("ROHG", HANIFI_ROHINGYA);
8012             aliases.put("RUNR", RUNIC);
8013             aliases.put("SAMR", SAMARITAN);
8014             aliases.put("SARB", OLD_SOUTH_ARABIAN);
8015             aliases.put("SAUR", SAURASHTRA);
8016             aliases.put("SGNW", SIGNWRITING);
8017             aliases.put("SHAW", SHAVIAN);
8018             aliases.put("SHRD", SHARADA);
8019             aliases.put("SIDD", SIDDHAM);
8020             aliases.put("SIND", KHUDAWADI);
8021             aliases.put("SINH", SINHALA);
8022             aliases.put("SOGD", SOGDIAN);
8023             aliases.put("SOGO", OLD_SOGDIAN);
8024             aliases.put("SORA", SORA_SOMPENG);
8025             aliases.put("SOYO", SOYOMBO);
8026             aliases.put("SUND", SUNDANESE);
8027             aliases.put("SYLO", SYLOTI_NAGRI);
8028             aliases.put("SYRC", SYRIAC);
8029             aliases.put("TAGB", TAGBANWA);
8030             aliases.put("TAKR", TAKRI);
8031             aliases.put("TALE", TAI_LE);
8032             aliases.put("TALU", NEW_TAI_LUE);
8033             aliases.put("TAML", TAMIL);
8034             aliases.put("TANG", TANGUT);
8035             aliases.put("TAVT", TAI_VIET);
8036             aliases.put("TELU", TELUGU);
8037             aliases.put("TFNG", TIFINAGH);
8038             aliases.put("TGLG", TAGALOG);
8039             aliases.put("THAA", THAANA);
8040             aliases.put("THAI", THAI);
8041             aliases.put("TIBT", TIBETAN);
8042             aliases.put("TIRH", TIRHUTA);
8043             aliases.put("UGAR", UGARITIC);
8044             aliases.put("VAII", VAI);
8045             aliases.put("WARA", WARANG_CITI);
8046             aliases.put("XPEO", OLD_PERSIAN);
8047             aliases.put("XSUX", CUNEIFORM);
8048             aliases.put("YIII", YI);
8049             aliases.put("ZANB", ZANABAZAR_SQUARE);
8050             aliases.put("ZINH", INHERITED);
8051             aliases.put("ZYYY", COMMON);
8052             aliases.put("ZZZZ", UNKNOWN);
8053         }
8054 
8055         /**
8056          * Returns the enum constant representing the Unicode script of which
8057          * the given character (Unicode code point) is assigned to.
8058          *
8059          * @param   codePoint the character (Unicode code point) in question.
8060          * @return  The {@code UnicodeScript} constant representing the
8061          *          Unicode script of which this character is assigned to.
8062          *
8063          * @throws  IllegalArgumentException if the specified
8064          * {@code codePoint} is an invalid Unicode code point.
8065          * @see Character#isValidCodePoint(int)
8066          *
8067          */
8068         public static UnicodeScript of(int codePoint) {
8069             if (!isValidCodePoint(codePoint))
8070                 throw new IllegalArgumentException(
8071                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
8072             int type = getType(codePoint);
8073             // leave SURROGATE and PRIVATE_USE for table lookup
8074             if (type == UNASSIGNED)
8075                 return UNKNOWN;
8076             int index = Arrays.binarySearch(scriptStarts, codePoint);
8077             if (index < 0)
8078                 index = -index - 2;
8079             return scripts[index];
8080         }
8081 
8082         /**
8083          * Returns the UnicodeScript constant with the given Unicode script
8084          * name or the script name alias. Script names and their aliases are
8085          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
8086          * and {@code PropertyValueAliases<version>.txt} define script names
8087          * and the script name aliases for a particular version of the
8088          * standard. The {@link Character} class specifies the version of
8089          * the standard that it supports.
8090          * <p>
8091          * Character case is ignored for all of the valid script names.
8092          * The en_US locale's case mapping rules are used to provide
8093          * case-insensitive string comparisons for script name validation.
8094          *
8095          * @param scriptName A {@code UnicodeScript} name.
8096          * @return The {@code UnicodeScript} constant identified
8097          *         by {@code scriptName}
8098          * @throws IllegalArgumentException if {@code scriptName} is an
8099          *         invalid name
8100          * @throws NullPointerException if {@code scriptName} is null
8101          */
8102         public static final UnicodeScript forName(String scriptName) {
8103             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8104                                  //.replace(' ', '_'));
8105             UnicodeScript sc = aliases.get(scriptName);
8106             if (sc != null)
8107                 return sc;
8108             return valueOf(scriptName);
8109         }
8110     }
8111 
8112     /**
8113      * The value of the {@code Character}.
8114      *
8115      * @serial
8116      */
8117     private final char value;
8118 
8119     /** use serialVersionUID from JDK 1.0.2 for interoperability */
8120     private static final long serialVersionUID = 3786198910865385080L;
8121 
8122     /**
8123      * Constructs a newly allocated {@code Character} object that
8124      * represents the specified {@code char} value.
8125      *
8126      * @param  value   the value to be represented by the
8127      *                  {@code Character} object.
8128      *
8129      * @deprecated
8130      * It is rarely appropriate to use this constructor. The static factory
8131      * {@link #valueOf(char)} is generally a better choice, as it is
8132      * likely to yield significantly better space and time performance.
8133      */
8134     @Deprecated(since="9")
8135     public Character(char value) {
8136         this.value = value;
8137     }
8138 
8139     private static class CharacterCache {
8140         private CharacterCache(){}
8141 
8142         static final Character cache[] = new Character[127 + 1];
8143 
8144         static {
8145             for (int i = 0; i < cache.length; i++)
8146                 cache[i] = new Character((char)i);
8147         }
8148     }
8149 
8150     /**
8151      * Returns a {@code Character} instance representing the specified
8152      * {@code char} value.
8153      * If a new {@code Character} instance is not required, this method
8154      * should generally be used in preference to the constructor
8155      * {@link #Character(char)}, as this method is likely to yield
8156      * significantly better space and time performance by caching
8157      * frequently requested values.
8158      *
8159      * This method will always cache values in the range {@code
8160      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
8161      * cache other values outside of this range.
8162      *
8163      * @param  c a char value.
8164      * @return a {@code Character} instance representing {@code c}.
8165      * @since  1.5
8166      */
8167     @HotSpotIntrinsicCandidate
8168     public static Character valueOf(char c) {
8169         if (c <= 127) { // must cache
8170             return CharacterCache.cache[(int)c];
8171         }
8172         return new Character(c);
8173     }
8174 
8175     /**
8176      * Returns the value of this {@code Character} object.
8177      * @return  the primitive {@code char} value represented by
8178      *          this object.
8179      */
8180     @HotSpotIntrinsicCandidate
8181     public char charValue() {
8182         return value;
8183     }
8184 
8185     /**
8186      * Returns a hash code for this {@code Character}; equal to the result
8187      * of invoking {@code charValue()}.
8188      *
8189      * @return a hash code value for this {@code Character}
8190      */
8191     @Override
8192     public int hashCode() {
8193         return Character.hashCode(value);
8194     }
8195 
8196     /**
8197      * Returns a hash code for a {@code char} value; compatible with
8198      * {@code Character.hashCode()}.
8199      *
8200      * @since 1.8
8201      *
8202      * @param value The {@code char} for which to return a hash code.
8203      * @return a hash code value for a {@code char} value.
8204      */
8205     public static int hashCode(char value) {
8206         return (int)value;
8207     }
8208 
8209     /**
8210      * Compares this object against the specified object.
8211      * The result is {@code true} if and only if the argument is not
8212      * {@code null} and is a {@code Character} object that
8213      * represents the same {@code char} value as this object.
8214      *
8215      * @param   obj   the object to compare with.
8216      * @return  {@code true} if the objects are the same;
8217      *          {@code false} otherwise.
8218      */
8219     public boolean equals(Object obj) {
8220         if (obj instanceof Character) {
8221             return value == ((Character)obj).charValue();
8222         }
8223         return false;
8224     }
8225 
8226     /**
8227      * Returns a {@code String} object representing this
8228      * {@code Character}'s value.  The result is a string of
8229      * length 1 whose sole component is the primitive
8230      * {@code char} value represented by this
8231      * {@code Character} object.
8232      *
8233      * @return  a string representation of this object.
8234      */
8235     public String toString() {
8236         char buf[] = {value};
8237         return String.valueOf(buf);
8238     }
8239 
8240     /**
8241      * Returns a {@code String} object representing the
8242      * specified {@code char}.  The result is a string of length
8243      * 1 consisting solely of the specified {@code char}.
8244      *
8245      * @apiNote This method cannot handle <a
8246      * href="#supplementary"> supplementary characters</a>. To support
8247      * all Unicode characters, including supplementary characters, use
8248      * the {@link #toString(int)} method.
8249      *
8250      * @param c the {@code char} to be converted
8251      * @return the string representation of the specified {@code char}
8252      * @since 1.4
8253      */
8254     public static String toString(char c) {
8255         return String.valueOf(c);
8256     }
8257 
8258     /**
8259      * Returns a {@code String} object representing the
8260      * specified character (Unicode code point).  The result is a string of
8261      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8262      *
8263      * @param codePoint the {@code codePoint} to be converted
8264      * @return the string representation of the specified {@code codePoint}
8265      * @throws IllegalArgumentException if the specified
8266      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8267      *      valid Unicode code point}.
8268      * @since 11
8269      */
8270     public static String toString(int codePoint) {
8271         return String.valueOfCodePoint(codePoint);
8272     }
8273 
8274     /**
8275      * Determines whether the specified code point is a valid
8276      * <a href="http://www.unicode.org/glossary/#code_point">
8277      * Unicode code point value</a>.
8278      *
8279      * @param  codePoint the Unicode code point to be tested
8280      * @return {@code true} if the specified code point value is between
8281      *         {@link #MIN_CODE_POINT} and
8282      *         {@link #MAX_CODE_POINT} inclusive;
8283      *         {@code false} otherwise.
8284      * @since  1.5
8285      */
8286     public static boolean isValidCodePoint(int codePoint) {
8287         // Optimized form of:
8288         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8289         int plane = codePoint >>> 16;
8290         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8291     }
8292 
8293     /**
8294      * Determines whether the specified character (Unicode code point)
8295      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8296      * Such code points can be represented using a single {@code char}.
8297      *
8298      * @param  codePoint the character (Unicode code point) to be tested
8299      * @return {@code true} if the specified code point is between
8300      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8301      *         {@code false} otherwise.
8302      * @since  1.7
8303      */
8304     public static boolean isBmpCodePoint(int codePoint) {
8305         return codePoint >>> 16 == 0;
8306         // Optimized form of:
8307         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8308         // We consistently use logical shift (>>>) to facilitate
8309         // additional runtime optimizations.
8310     }
8311 
8312     /**
8313      * Determines whether the specified character (Unicode code point)
8314      * is in the <a href="#supplementary">supplementary character</a> range.
8315      *
8316      * @param  codePoint the character (Unicode code point) to be tested
8317      * @return {@code true} if the specified code point is between
8318      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8319      *         {@link #MAX_CODE_POINT} inclusive;
8320      *         {@code false} otherwise.
8321      * @since  1.5
8322      */
8323     public static boolean isSupplementaryCodePoint(int codePoint) {
8324         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8325             && codePoint <  MAX_CODE_POINT + 1;
8326     }
8327 
8328     /**
8329      * Determines if the given {@code char} value is a
8330      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8331      * Unicode high-surrogate code unit</a>
8332      * (also known as <i>leading-surrogate code unit</i>).
8333      *
8334      * <p>Such values do not represent characters by themselves,
8335      * but are used in the representation of
8336      * <a href="#supplementary">supplementary characters</a>
8337      * in the UTF-16 encoding.
8338      *
8339      * @param  ch the {@code char} value to be tested.
8340      * @return {@code true} if the {@code char} value is between
8341      *         {@link #MIN_HIGH_SURROGATE} and
8342      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8343      *         {@code false} otherwise.
8344      * @see    Character#isLowSurrogate(char)
8345      * @see    Character.UnicodeBlock#of(int)
8346      * @since  1.5
8347      */
8348     public static boolean isHighSurrogate(char ch) {
8349         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8350         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8351     }
8352 
8353     /**
8354      * Determines if the given {@code char} value is a
8355      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8356      * Unicode low-surrogate code unit</a>
8357      * (also known as <i>trailing-surrogate code unit</i>).
8358      *
8359      * <p>Such values do not represent characters by themselves,
8360      * but are used in the representation of
8361      * <a href="#supplementary">supplementary characters</a>
8362      * in the UTF-16 encoding.
8363      *
8364      * @param  ch the {@code char} value to be tested.
8365      * @return {@code true} if the {@code char} value is between
8366      *         {@link #MIN_LOW_SURROGATE} and
8367      *         {@link #MAX_LOW_SURROGATE} inclusive;
8368      *         {@code false} otherwise.
8369      * @see    Character#isHighSurrogate(char)
8370      * @since  1.5
8371      */
8372     public static boolean isLowSurrogate(char ch) {
8373         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8374     }
8375 
8376     /**
8377      * Determines if the given {@code char} value is a Unicode
8378      * <i>surrogate code unit</i>.
8379      *
8380      * <p>Such values do not represent characters by themselves,
8381      * but are used in the representation of
8382      * <a href="#supplementary">supplementary characters</a>
8383      * in the UTF-16 encoding.
8384      *
8385      * <p>A char value is a surrogate code unit if and only if it is either
8386      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8387      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8388      *
8389      * @param  ch the {@code char} value to be tested.
8390      * @return {@code true} if the {@code char} value is between
8391      *         {@link #MIN_SURROGATE} and
8392      *         {@link #MAX_SURROGATE} inclusive;
8393      *         {@code false} otherwise.
8394      * @since  1.7
8395      */
8396     public static boolean isSurrogate(char ch) {
8397         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8398     }
8399 
8400     /**
8401      * Determines whether the specified pair of {@code char}
8402      * values is a valid
8403      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8404      * Unicode surrogate pair</a>.
8405 
8406      * <p>This method is equivalent to the expression:
8407      * <blockquote><pre>{@code
8408      * isHighSurrogate(high) && isLowSurrogate(low)
8409      * }</pre></blockquote>
8410      *
8411      * @param  high the high-surrogate code value to be tested
8412      * @param  low the low-surrogate code value to be tested
8413      * @return {@code true} if the specified high and
8414      * low-surrogate code values represent a valid surrogate pair;
8415      * {@code false} otherwise.
8416      * @since  1.5
8417      */
8418     public static boolean isSurrogatePair(char high, char low) {
8419         return isHighSurrogate(high) && isLowSurrogate(low);
8420     }
8421 
8422     /**
8423      * Determines the number of {@code char} values needed to
8424      * represent the specified character (Unicode code point). If the
8425      * specified character is equal to or greater than 0x10000, then
8426      * the method returns 2. Otherwise, the method returns 1.
8427      *
8428      * <p>This method doesn't validate the specified character to be a
8429      * valid Unicode code point. The caller must validate the
8430      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8431      * if necessary.
8432      *
8433      * @param   codePoint the character (Unicode code point) to be tested.
8434      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8435      * @see     Character#isSupplementaryCodePoint(int)
8436      * @since   1.5
8437      */
8438     public static int charCount(int codePoint) {
8439         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8440     }
8441 
8442     /**
8443      * Converts the specified surrogate pair to its supplementary code
8444      * point value. This method does not validate the specified
8445      * surrogate pair. The caller must validate it using {@link
8446      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8447      *
8448      * @param  high the high-surrogate code unit
8449      * @param  low the low-surrogate code unit
8450      * @return the supplementary code point composed from the
8451      *         specified surrogate pair.
8452      * @since  1.5
8453      */
8454     public static int toCodePoint(char high, char low) {
8455         // Optimized form of:
8456         // return ((high - MIN_HIGH_SURROGATE) << 10)
8457         //         + (low - MIN_LOW_SURROGATE)
8458         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8459         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8460                                        - (MIN_HIGH_SURROGATE << 10)
8461                                        - MIN_LOW_SURROGATE);
8462     }
8463 
8464     /**
8465      * Returns the code point at the given index of the
8466      * {@code CharSequence}. If the {@code char} value at
8467      * the given index in the {@code CharSequence} is in the
8468      * high-surrogate range, the following index is less than the
8469      * length of the {@code CharSequence}, and the
8470      * {@code char} value at the following index is in the
8471      * low-surrogate range, then the supplementary code point
8472      * corresponding to this surrogate pair is returned. Otherwise,
8473      * the {@code char} value at the given index is returned.
8474      *
8475      * @param seq a sequence of {@code char} values (Unicode code
8476      * units)
8477      * @param index the index to the {@code char} values (Unicode
8478      * code units) in {@code seq} to be converted
8479      * @return the Unicode code point at the given index
8480      * @throws NullPointerException if {@code seq} is null.
8481      * @throws IndexOutOfBoundsException if the value
8482      * {@code index} is negative or not less than
8483      * {@link CharSequence#length() seq.length()}.
8484      * @since  1.5
8485      */
8486     public static int codePointAt(CharSequence seq, int index) {
8487         char c1 = seq.charAt(index);
8488         if (isHighSurrogate(c1) && ++index < seq.length()) {
8489             char c2 = seq.charAt(index);
8490             if (isLowSurrogate(c2)) {
8491                 return toCodePoint(c1, c2);
8492             }
8493         }
8494         return c1;
8495     }
8496 
8497     /**
8498      * Returns the code point at the given index of the
8499      * {@code char} array. If the {@code char} value at
8500      * the given index in the {@code char} array is in the
8501      * high-surrogate range, the following index is less than the
8502      * length of the {@code char} array, and the
8503      * {@code char} value at the following index is in the
8504      * low-surrogate range, then the supplementary code point
8505      * corresponding to this surrogate pair is returned. Otherwise,
8506      * the {@code char} value at the given index is returned.
8507      *
8508      * @param a the {@code char} array
8509      * @param index the index to the {@code char} values (Unicode
8510      * code units) in the {@code char} array to be converted
8511      * @return the Unicode code point at the given index
8512      * @throws NullPointerException if {@code a} is null.
8513      * @throws IndexOutOfBoundsException if the value
8514      * {@code index} is negative or not less than
8515      * the length of the {@code char} array.
8516      * @since  1.5
8517      */
8518     public static int codePointAt(char[] a, int index) {
8519         return codePointAtImpl(a, index, a.length);
8520     }
8521 
8522     /**
8523      * Returns the code point at the given index of the
8524      * {@code char} array, where only array elements with
8525      * {@code index} less than {@code limit} can be used. If
8526      * the {@code char} value at the given index in the
8527      * {@code char} array is in the high-surrogate range, the
8528      * following index is less than the {@code limit}, and the
8529      * {@code char} value at the following index is in the
8530      * low-surrogate range, then the supplementary code point
8531      * corresponding to this surrogate pair is returned. Otherwise,
8532      * the {@code char} value at the given index is returned.
8533      *
8534      * @param a the {@code char} array
8535      * @param index the index to the {@code char} values (Unicode
8536      * code units) in the {@code char} array to be converted
8537      * @param limit the index after the last array element that
8538      * can be used in the {@code char} array
8539      * @return the Unicode code point at the given index
8540      * @throws NullPointerException if {@code a} is null.
8541      * @throws IndexOutOfBoundsException if the {@code index}
8542      * argument is negative or not less than the {@code limit}
8543      * argument, or if the {@code limit} argument is negative or
8544      * greater than the length of the {@code char} array.
8545      * @since  1.5
8546      */
8547     public static int codePointAt(char[] a, int index, int limit) {
8548         if (index >= limit || limit < 0 || limit > a.length) {
8549             throw new IndexOutOfBoundsException();
8550         }
8551         return codePointAtImpl(a, index, limit);
8552     }
8553 
8554     // throws ArrayIndexOutOfBoundsException if index out of bounds
8555     static int codePointAtImpl(char[] a, int index, int limit) {
8556         char c1 = a[index];
8557         if (isHighSurrogate(c1) && ++index < limit) {
8558             char c2 = a[index];
8559             if (isLowSurrogate(c2)) {
8560                 return toCodePoint(c1, c2);
8561             }
8562         }
8563         return c1;
8564     }
8565 
8566     /**
8567      * Returns the code point preceding the given index of the
8568      * {@code CharSequence}. If the {@code char} value at
8569      * {@code (index - 1)} in the {@code CharSequence} is in
8570      * the low-surrogate range, {@code (index - 2)} is not
8571      * negative, and the {@code char} value at {@code (index - 2)}
8572      * in the {@code CharSequence} is in the
8573      * high-surrogate range, then the supplementary code point
8574      * corresponding to this surrogate pair is returned. Otherwise,
8575      * the {@code char} value at {@code (index - 1)} is
8576      * returned.
8577      *
8578      * @param seq the {@code CharSequence} instance
8579      * @param index the index following the code point that should be returned
8580      * @return the Unicode code point value before the given index.
8581      * @throws NullPointerException if {@code seq} is null.
8582      * @throws IndexOutOfBoundsException if the {@code index}
8583      * argument is less than 1 or greater than {@link
8584      * CharSequence#length() seq.length()}.
8585      * @since  1.5
8586      */
8587     public static int codePointBefore(CharSequence seq, int index) {
8588         char c2 = seq.charAt(--index);
8589         if (isLowSurrogate(c2) && index > 0) {
8590             char c1 = seq.charAt(--index);
8591             if (isHighSurrogate(c1)) {
8592                 return toCodePoint(c1, c2);
8593             }
8594         }
8595         return c2;
8596     }
8597 
8598     /**
8599      * Returns the code point preceding the given index of the
8600      * {@code char} array. If the {@code char} value at
8601      * {@code (index - 1)} in the {@code char} array is in
8602      * the low-surrogate range, {@code (index - 2)} is not
8603      * negative, and the {@code char} value at {@code (index - 2)}
8604      * in the {@code char} array is in the
8605      * high-surrogate range, then the supplementary code point
8606      * corresponding to this surrogate pair is returned. Otherwise,
8607      * the {@code char} value at {@code (index - 1)} is
8608      * returned.
8609      *
8610      * @param a the {@code char} array
8611      * @param index the index following the code point that should be returned
8612      * @return the Unicode code point value before the given index.
8613      * @throws NullPointerException if {@code a} is null.
8614      * @throws IndexOutOfBoundsException if the {@code index}
8615      * argument is less than 1 or greater than the length of the
8616      * {@code char} array
8617      * @since  1.5
8618      */
8619     public static int codePointBefore(char[] a, int index) {
8620         return codePointBeforeImpl(a, index, 0);
8621     }
8622 
8623     /**
8624      * Returns the code point preceding the given index of the
8625      * {@code char} array, where only array elements with
8626      * {@code index} greater than or equal to {@code start}
8627      * can be used. If the {@code char} value at {@code (index - 1)}
8628      * in the {@code char} array is in the
8629      * low-surrogate range, {@code (index - 2)} is not less than
8630      * {@code start}, and the {@code char} value at
8631      * {@code (index - 2)} in the {@code char} array is in
8632      * the high-surrogate range, then the supplementary code point
8633      * corresponding to this surrogate pair is returned. Otherwise,
8634      * the {@code char} value at {@code (index - 1)} is
8635      * returned.
8636      *
8637      * @param a the {@code char} array
8638      * @param index the index following the code point that should be returned
8639      * @param start the index of the first array element in the
8640      * {@code char} array
8641      * @return the Unicode code point value before the given index.
8642      * @throws NullPointerException if {@code a} is null.
8643      * @throws IndexOutOfBoundsException if the {@code index}
8644      * argument is not greater than the {@code start} argument or
8645      * is greater than the length of the {@code char} array, or
8646      * if the {@code start} argument is negative or not less than
8647      * the length of the {@code char} array.
8648      * @since  1.5
8649      */
8650     public static int codePointBefore(char[] a, int index, int start) {
8651         if (index <= start || start < 0 || start >= a.length) {
8652             throw new IndexOutOfBoundsException();
8653         }
8654         return codePointBeforeImpl(a, index, start);
8655     }
8656 
8657     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8658     static int codePointBeforeImpl(char[] a, int index, int start) {
8659         char c2 = a[--index];
8660         if (isLowSurrogate(c2) && index > start) {
8661             char c1 = a[--index];
8662             if (isHighSurrogate(c1)) {
8663                 return toCodePoint(c1, c2);
8664             }
8665         }
8666         return c2;
8667     }
8668 
8669     /**
8670      * Returns the leading surrogate (a
8671      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8672      * high surrogate code unit</a>) of the
8673      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8674      * surrogate pair</a>
8675      * representing the specified supplementary character (Unicode
8676      * code point) in the UTF-16 encoding.  If the specified character
8677      * is not a
8678      * <a href="Character.html#supplementary">supplementary character</a>,
8679      * an unspecified {@code char} is returned.
8680      *
8681      * <p>If
8682      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8683      * is {@code true}, then
8684      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8685      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8686      * are also always {@code true}.
8687      *
8688      * @param   codePoint a supplementary character (Unicode code point)
8689      * @return  the leading surrogate code unit used to represent the
8690      *          character in the UTF-16 encoding
8691      * @since   1.7
8692      */
8693     public static char highSurrogate(int codePoint) {
8694         return (char) ((codePoint >>> 10)
8695             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8696     }
8697 
8698     /**
8699      * Returns the trailing surrogate (a
8700      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8701      * low surrogate code unit</a>) of the
8702      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8703      * surrogate pair</a>
8704      * representing the specified supplementary character (Unicode
8705      * code point) in the UTF-16 encoding.  If the specified character
8706      * is not a
8707      * <a href="Character.html#supplementary">supplementary character</a>,
8708      * an unspecified {@code char} is returned.
8709      *
8710      * <p>If
8711      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8712      * is {@code true}, then
8713      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8714      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8715      * are also always {@code true}.
8716      *
8717      * @param   codePoint a supplementary character (Unicode code point)
8718      * @return  the trailing surrogate code unit used to represent the
8719      *          character in the UTF-16 encoding
8720      * @since   1.7
8721      */
8722     public static char lowSurrogate(int codePoint) {
8723         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8724     }
8725 
8726     /**
8727      * Converts the specified character (Unicode code point) to its
8728      * UTF-16 representation. If the specified code point is a BMP
8729      * (Basic Multilingual Plane or Plane 0) value, the same value is
8730      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8731      * specified code point is a supplementary character, its
8732      * surrogate values are stored in {@code dst[dstIndex]}
8733      * (high-surrogate) and {@code dst[dstIndex+1]}
8734      * (low-surrogate), and 2 is returned.
8735      *
8736      * @param  codePoint the character (Unicode code point) to be converted.
8737      * @param  dst an array of {@code char} in which the
8738      * {@code codePoint}'s UTF-16 value is stored.
8739      * @param dstIndex the start index into the {@code dst}
8740      * array where the converted value is stored.
8741      * @return 1 if the code point is a BMP code point, 2 if the
8742      * code point is a supplementary code point.
8743      * @throws IllegalArgumentException if the specified
8744      * {@code codePoint} is not a valid Unicode code point.
8745      * @throws NullPointerException if the specified {@code dst} is null.
8746      * @throws IndexOutOfBoundsException if {@code dstIndex}
8747      * is negative or not less than {@code dst.length}, or if
8748      * {@code dst} at {@code dstIndex} doesn't have enough
8749      * array element(s) to store the resulting {@code char}
8750      * value(s). (If {@code dstIndex} is equal to
8751      * {@code dst.length-1} and the specified
8752      * {@code codePoint} is a supplementary character, the
8753      * high-surrogate value is not stored in
8754      * {@code dst[dstIndex]}.)
8755      * @since  1.5
8756      */
8757     public static int toChars(int codePoint, char[] dst, int dstIndex) {
8758         if (isBmpCodePoint(codePoint)) {
8759             dst[dstIndex] = (char) codePoint;
8760             return 1;
8761         } else if (isValidCodePoint(codePoint)) {
8762             toSurrogates(codePoint, dst, dstIndex);
8763             return 2;
8764         } else {
8765             throw new IllegalArgumentException(
8766                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8767         }
8768     }
8769 
8770     /**
8771      * Converts the specified character (Unicode code point) to its
8772      * UTF-16 representation stored in a {@code char} array. If
8773      * the specified code point is a BMP (Basic Multilingual Plane or
8774      * Plane 0) value, the resulting {@code char} array has
8775      * the same value as {@code codePoint}. If the specified code
8776      * point is a supplementary code point, the resulting
8777      * {@code char} array has the corresponding surrogate pair.
8778      *
8779      * @param  codePoint a Unicode code point
8780      * @return a {@code char} array having
8781      *         {@code codePoint}'s UTF-16 representation.
8782      * @throws IllegalArgumentException if the specified
8783      * {@code codePoint} is not a valid Unicode code point.
8784      * @since  1.5
8785      */
8786     public static char[] toChars(int codePoint) {
8787         if (isBmpCodePoint(codePoint)) {
8788             return new char[] { (char) codePoint };
8789         } else if (isValidCodePoint(codePoint)) {
8790             char[] result = new char[2];
8791             toSurrogates(codePoint, result, 0);
8792             return result;
8793         } else {
8794             throw new IllegalArgumentException(
8795                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8796         }
8797     }
8798 
8799     static void toSurrogates(int codePoint, char[] dst, int index) {
8800         // We write elements "backwards" to guarantee all-or-nothing
8801         dst[index+1] = lowSurrogate(codePoint);
8802         dst[index] = highSurrogate(codePoint);
8803     }
8804 
8805     /**
8806      * Returns the number of Unicode code points in the text range of
8807      * the specified char sequence. The text range begins at the
8808      * specified {@code beginIndex} and extends to the
8809      * {@code char} at index {@code endIndex - 1}. Thus the
8810      * length (in {@code char}s) of the text range is
8811      * {@code endIndex-beginIndex}. Unpaired surrogates within
8812      * the text range count as one code point each.
8813      *
8814      * @param seq the char sequence
8815      * @param beginIndex the index to the first {@code char} of
8816      * the text range.
8817      * @param endIndex the index after the last {@code char} of
8818      * the text range.
8819      * @return the number of Unicode code points in the specified text
8820      * range
8821      * @throws NullPointerException if {@code seq} is null.
8822      * @throws IndexOutOfBoundsException if the
8823      * {@code beginIndex} is negative, or {@code endIndex}
8824      * is larger than the length of the given sequence, or
8825      * {@code beginIndex} is larger than {@code endIndex}.
8826      * @since  1.5
8827      */
8828     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
8829         int length = seq.length();
8830         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
8831             throw new IndexOutOfBoundsException();
8832         }
8833         int n = endIndex - beginIndex;
8834         for (int i = beginIndex; i < endIndex; ) {
8835             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
8836                 isLowSurrogate(seq.charAt(i))) {
8837                 n--;
8838                 i++;
8839             }
8840         }
8841         return n;
8842     }
8843 
8844     /**
8845      * Returns the number of Unicode code points in a subarray of the
8846      * {@code char} array argument. The {@code offset}
8847      * argument is the index of the first {@code char} of the
8848      * subarray and the {@code count} argument specifies the
8849      * length of the subarray in {@code char}s. Unpaired
8850      * surrogates within the subarray count as one code point each.
8851      *
8852      * @param a the {@code char} array
8853      * @param offset the index of the first {@code char} in the
8854      * given {@code char} array
8855      * @param count the length of the subarray in {@code char}s
8856      * @return the number of Unicode code points in the specified subarray
8857      * @throws NullPointerException if {@code a} is null.
8858      * @throws IndexOutOfBoundsException if {@code offset} or
8859      * {@code count} is negative, or if {@code offset +
8860      * count} is larger than the length of the given array.
8861      * @since  1.5
8862      */
8863     public static int codePointCount(char[] a, int offset, int count) {
8864         if (count > a.length - offset || offset < 0 || count < 0) {
8865             throw new IndexOutOfBoundsException();
8866         }
8867         return codePointCountImpl(a, offset, count);
8868     }
8869 
8870     static int codePointCountImpl(char[] a, int offset, int count) {
8871         int endIndex = offset + count;
8872         int n = count;
8873         for (int i = offset; i < endIndex; ) {
8874             if (isHighSurrogate(a[i++]) && i < endIndex &&
8875                 isLowSurrogate(a[i])) {
8876                 n--;
8877                 i++;
8878             }
8879         }
8880         return n;
8881     }
8882 
8883     /**
8884      * Returns the index within the given char sequence that is offset
8885      * from the given {@code index} by {@code codePointOffset}
8886      * code points. Unpaired surrogates within the text range given by
8887      * {@code index} and {@code codePointOffset} count as
8888      * one code point each.
8889      *
8890      * @param seq the char sequence
8891      * @param index the index to be offset
8892      * @param codePointOffset the offset in code points
8893      * @return the index within the char sequence
8894      * @throws NullPointerException if {@code seq} is null.
8895      * @throws IndexOutOfBoundsException if {@code index}
8896      *   is negative or larger then the length of the char sequence,
8897      *   or if {@code codePointOffset} is positive and the
8898      *   subsequence starting with {@code index} has fewer than
8899      *   {@code codePointOffset} code points, or if
8900      *   {@code codePointOffset} is negative and the subsequence
8901      *   before {@code index} has fewer than the absolute value
8902      *   of {@code codePointOffset} code points.
8903      * @since 1.5
8904      */
8905     public static int offsetByCodePoints(CharSequence seq, int index,
8906                                          int codePointOffset) {
8907         int length = seq.length();
8908         if (index < 0 || index > length) {
8909             throw new IndexOutOfBoundsException();
8910         }
8911 
8912         int x = index;
8913         if (codePointOffset >= 0) {
8914             int i;
8915             for (i = 0; x < length && i < codePointOffset; i++) {
8916                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8917                     isLowSurrogate(seq.charAt(x))) {
8918                     x++;
8919                 }
8920             }
8921             if (i < codePointOffset) {
8922                 throw new IndexOutOfBoundsException();
8923             }
8924         } else {
8925             int i;
8926             for (i = codePointOffset; x > 0 && i < 0; i++) {
8927                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8928                     isHighSurrogate(seq.charAt(x-1))) {
8929                     x--;
8930                 }
8931             }
8932             if (i < 0) {
8933                 throw new IndexOutOfBoundsException();
8934             }
8935         }
8936         return x;
8937     }
8938 
8939     /**
8940      * Returns the index within the given {@code char} subarray
8941      * that is offset from the given {@code index} by
8942      * {@code codePointOffset} code points. The
8943      * {@code start} and {@code count} arguments specify a
8944      * subarray of the {@code char} array. Unpaired surrogates
8945      * within the text range given by {@code index} and
8946      * {@code codePointOffset} count as one code point each.
8947      *
8948      * @param a the {@code char} array
8949      * @param start the index of the first {@code char} of the
8950      * subarray
8951      * @param count the length of the subarray in {@code char}s
8952      * @param index the index to be offset
8953      * @param codePointOffset the offset in code points
8954      * @return the index within the subarray
8955      * @throws NullPointerException if {@code a} is null.
8956      * @throws IndexOutOfBoundsException
8957      *   if {@code start} or {@code count} is negative,
8958      *   or if {@code start + count} is larger than the length of
8959      *   the given array,
8960      *   or if {@code index} is less than {@code start} or
8961      *   larger then {@code start + count},
8962      *   or if {@code codePointOffset} is positive and the text range
8963      *   starting with {@code index} and ending with {@code start + count - 1}
8964      *   has fewer than {@code codePointOffset} code
8965      *   points,
8966      *   or if {@code codePointOffset} is negative and the text range
8967      *   starting with {@code start} and ending with {@code index - 1}
8968      *   has fewer than the absolute value of
8969      *   {@code codePointOffset} code points.
8970      * @since 1.5
8971      */
8972     public static int offsetByCodePoints(char[] a, int start, int count,
8973                                          int index, int codePointOffset) {
8974         if (count > a.length-start || start < 0 || count < 0
8975             || index < start || index > start+count) {
8976             throw new IndexOutOfBoundsException();
8977         }
8978         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8979     }
8980 
8981     static int offsetByCodePointsImpl(char[]a, int start, int count,
8982                                       int index, int codePointOffset) {
8983         int x = index;
8984         if (codePointOffset >= 0) {
8985             int limit = start + count;
8986             int i;
8987             for (i = 0; x < limit && i < codePointOffset; i++) {
8988                 if (isHighSurrogate(a[x++]) && x < limit &&
8989                     isLowSurrogate(a[x])) {
8990                     x++;
8991                 }
8992             }
8993             if (i < codePointOffset) {
8994                 throw new IndexOutOfBoundsException();
8995             }
8996         } else {
8997             int i;
8998             for (i = codePointOffset; x > start && i < 0; i++) {
8999                 if (isLowSurrogate(a[--x]) && x > start &&
9000                     isHighSurrogate(a[x-1])) {
9001                     x--;
9002                 }
9003             }
9004             if (i < 0) {
9005                 throw new IndexOutOfBoundsException();
9006             }
9007         }
9008         return x;
9009     }
9010 
9011     /**
9012      * Determines if the specified character is a lowercase character.
9013      * <p>
9014      * A character is lowercase if its general category type, provided
9015      * by {@code Character.getType(ch)}, is
9016      * {@code LOWERCASE_LETTER}, or it has contributory property
9017      * Other_Lowercase as defined by the Unicode Standard.
9018      * <p>
9019      * The following are examples of lowercase characters:
9020      * <blockquote><pre>
9021      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9022      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
9023      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
9024      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
9025      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
9026      * </pre></blockquote>
9027      * <p> Many other Unicode characters are lowercase too.
9028      *
9029      * <p><b>Note:</b> This method cannot handle <a
9030      * href="#supplementary"> supplementary characters</a>. To support
9031      * all Unicode characters, including supplementary characters, use
9032      * the {@link #isLowerCase(int)} method.
9033      *
9034      * @param   ch   the character to be tested.
9035      * @return  {@code true} if the character is lowercase;
9036      *          {@code false} otherwise.
9037      * @see     Character#isLowerCase(char)
9038      * @see     Character#isTitleCase(char)
9039      * @see     Character#toLowerCase(char)
9040      * @see     Character#getType(char)
9041      */
9042     public static boolean isLowerCase(char ch) {
9043         return isLowerCase((int)ch);
9044     }
9045 
9046     /**
9047      * Determines if the specified character (Unicode code point) is a
9048      * lowercase character.
9049      * <p>
9050      * A character is lowercase if its general category type, provided
9051      * by {@link Character#getType getType(codePoint)}, is
9052      * {@code LOWERCASE_LETTER}, or it has contributory property
9053      * Other_Lowercase as defined by the Unicode Standard.
9054      * <p>
9055      * The following are examples of lowercase characters:
9056      * <blockquote><pre>
9057      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9058      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
9059      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
9060      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
9061      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
9062      * </pre></blockquote>
9063      * <p> Many other Unicode characters are lowercase too.
9064      *
9065      * @param   codePoint the character (Unicode code point) to be tested.
9066      * @return  {@code true} if the character is lowercase;
9067      *          {@code false} otherwise.
9068      * @see     Character#isLowerCase(int)
9069      * @see     Character#isTitleCase(int)
9070      * @see     Character#toLowerCase(int)
9071      * @see     Character#getType(int)
9072      * @since   1.5
9073      */
9074     public static boolean isLowerCase(int codePoint) {
9075         return getType(codePoint) == Character.LOWERCASE_LETTER ||
9076                CharacterData.of(codePoint).isOtherLowercase(codePoint);
9077     }
9078 
9079     /**
9080      * Determines if the specified character is an uppercase character.
9081      * <p>
9082      * A character is uppercase if its general category type, provided by
9083      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9084      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9085      * <p>
9086      * The following are examples of uppercase characters:
9087      * <blockquote><pre>
9088      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9089      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
9090      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
9091      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
9092      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
9093      * </pre></blockquote>
9094      * <p> Many other Unicode characters are uppercase too.
9095      *
9096      * <p><b>Note:</b> This method cannot handle <a
9097      * href="#supplementary"> supplementary characters</a>. To support
9098      * all Unicode characters, including supplementary characters, use
9099      * the {@link #isUpperCase(int)} method.
9100      *
9101      * @param   ch   the character to be tested.
9102      * @return  {@code true} if the character is uppercase;
9103      *          {@code false} otherwise.
9104      * @see     Character#isLowerCase(char)
9105      * @see     Character#isTitleCase(char)
9106      * @see     Character#toUpperCase(char)
9107      * @see     Character#getType(char)
9108      * @since   1.0
9109      */
9110     public static boolean isUpperCase(char ch) {
9111         return isUpperCase((int)ch);
9112     }
9113 
9114     /**
9115      * Determines if the specified character (Unicode code point) is an uppercase character.
9116      * <p>
9117      * A character is uppercase if its general category type, provided by
9118      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
9119      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9120      * <p>
9121      * The following are examples of uppercase characters:
9122      * <blockquote><pre>
9123      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9124      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
9125      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
9126      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
9127      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
9128      * </pre></blockquote>
9129      * <p> Many other Unicode characters are uppercase too.
9130      *
9131      * @param   codePoint the character (Unicode code point) to be tested.
9132      * @return  {@code true} if the character is uppercase;
9133      *          {@code false} otherwise.
9134      * @see     Character#isLowerCase(int)
9135      * @see     Character#isTitleCase(int)
9136      * @see     Character#toUpperCase(int)
9137      * @see     Character#getType(int)
9138      * @since   1.5
9139      */
9140     public static boolean isUpperCase(int codePoint) {
9141         return getType(codePoint) == Character.UPPERCASE_LETTER ||
9142                CharacterData.of(codePoint).isOtherUppercase(codePoint);
9143     }
9144 
9145     /**
9146      * Determines if the specified character is a titlecase character.
9147      * <p>
9148      * A character is a titlecase character if its general
9149      * category type, provided by {@code Character.getType(ch)},
9150      * is {@code TITLECASE_LETTER}.
9151      * <p>
9152      * Some characters look like pairs of Latin letters. For example, there
9153      * is an uppercase letter that looks like "LJ" and has a corresponding
9154      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9155      * is the appropriate form to use when rendering a word in lowercase
9156      * with initial capitals, as for a book title.
9157      * <p>
9158      * These are some of the Unicode characters for which this method returns
9159      * {@code true}:
9160      * <ul>
9161      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9162      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9163      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9164      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9165      * </ul>
9166      * <p> Many other Unicode characters are titlecase too.
9167      *
9168      * <p><b>Note:</b> This method cannot handle <a
9169      * href="#supplementary"> supplementary characters</a>. To support
9170      * all Unicode characters, including supplementary characters, use
9171      * the {@link #isTitleCase(int)} method.
9172      *
9173      * @param   ch   the character to be tested.
9174      * @return  {@code true} if the character is titlecase;
9175      *          {@code false} otherwise.
9176      * @see     Character#isLowerCase(char)
9177      * @see     Character#isUpperCase(char)
9178      * @see     Character#toTitleCase(char)
9179      * @see     Character#getType(char)
9180      * @since   1.0.2
9181      */
9182     public static boolean isTitleCase(char ch) {
9183         return isTitleCase((int)ch);
9184     }
9185 
9186     /**
9187      * Determines if the specified character (Unicode code point) is a titlecase character.
9188      * <p>
9189      * A character is a titlecase character if its general
9190      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9191      * is {@code TITLECASE_LETTER}.
9192      * <p>
9193      * Some characters look like pairs of Latin letters. For example, there
9194      * is an uppercase letter that looks like "LJ" and has a corresponding
9195      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9196      * is the appropriate form to use when rendering a word in lowercase
9197      * with initial capitals, as for a book title.
9198      * <p>
9199      * These are some of the Unicode characters for which this method returns
9200      * {@code true}:
9201      * <ul>
9202      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9203      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9204      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9205      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9206      * </ul>
9207      * <p> Many other Unicode characters are titlecase too.
9208      *
9209      * @param   codePoint the character (Unicode code point) to be tested.
9210      * @return  {@code true} if the character is titlecase;
9211      *          {@code false} otherwise.
9212      * @see     Character#isLowerCase(int)
9213      * @see     Character#isUpperCase(int)
9214      * @see     Character#toTitleCase(int)
9215      * @see     Character#getType(int)
9216      * @since   1.5
9217      */
9218     public static boolean isTitleCase(int codePoint) {
9219         return getType(codePoint) == Character.TITLECASE_LETTER;
9220     }
9221 
9222     /**
9223      * Determines if the specified character is a digit.
9224      * <p>
9225      * A character is a digit if its general category type, provided
9226      * by {@code Character.getType(ch)}, is
9227      * {@code DECIMAL_DIGIT_NUMBER}.
9228      * <p>
9229      * Some Unicode character ranges that contain digits:
9230      * <ul>
9231      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9232      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9233      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9234      *     Arabic-Indic digits
9235      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9236      *     Extended Arabic-Indic digits
9237      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9238      *     Devanagari digits
9239      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9240      *     Fullwidth digits
9241      * </ul>
9242      *
9243      * Many other character ranges contain digits as well.
9244      *
9245      * <p><b>Note:</b> This method cannot handle <a
9246      * href="#supplementary"> supplementary characters</a>. To support
9247      * all Unicode characters, including supplementary characters, use
9248      * the {@link #isDigit(int)} method.
9249      *
9250      * @param   ch   the character to be tested.
9251      * @return  {@code true} if the character is a digit;
9252      *          {@code false} otherwise.
9253      * @see     Character#digit(char, int)
9254      * @see     Character#forDigit(int, int)
9255      * @see     Character#getType(char)
9256      */
9257     public static boolean isDigit(char ch) {
9258         return isDigit((int)ch);
9259     }
9260 
9261     /**
9262      * Determines if the specified character (Unicode code point) is a digit.
9263      * <p>
9264      * A character is a digit if its general category type, provided
9265      * by {@link Character#getType(int) getType(codePoint)}, is
9266      * {@code DECIMAL_DIGIT_NUMBER}.
9267      * <p>
9268      * Some Unicode character ranges that contain digits:
9269      * <ul>
9270      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9271      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9272      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9273      *     Arabic-Indic digits
9274      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9275      *     Extended Arabic-Indic digits
9276      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9277      *     Devanagari digits
9278      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9279      *     Fullwidth digits
9280      * </ul>
9281      *
9282      * Many other character ranges contain digits as well.
9283      *
9284      * @param   codePoint the character (Unicode code point) to be tested.
9285      * @return  {@code true} if the character is a digit;
9286      *          {@code false} otherwise.
9287      * @see     Character#forDigit(int, int)
9288      * @see     Character#getType(int)
9289      * @since   1.5
9290      */
9291     public static boolean isDigit(int codePoint) {
9292         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
9293     }
9294 
9295     /**
9296      * Determines if a character is defined in Unicode.
9297      * <p>
9298      * A character is defined if at least one of the following is true:
9299      * <ul>
9300      * <li>It has an entry in the UnicodeData file.
9301      * <li>It has a value in a range defined by the UnicodeData file.
9302      * </ul>
9303      *
9304      * <p><b>Note:</b> This method cannot handle <a
9305      * href="#supplementary"> supplementary characters</a>. To support
9306      * all Unicode characters, including supplementary characters, use
9307      * the {@link #isDefined(int)} method.
9308      *
9309      * @param   ch   the character to be tested
9310      * @return  {@code true} if the character has a defined meaning
9311      *          in Unicode; {@code false} otherwise.
9312      * @see     Character#isDigit(char)
9313      * @see     Character#isLetter(char)
9314      * @see     Character#isLetterOrDigit(char)
9315      * @see     Character#isLowerCase(char)
9316      * @see     Character#isTitleCase(char)
9317      * @see     Character#isUpperCase(char)
9318      * @since   1.0.2
9319      */
9320     public static boolean isDefined(char ch) {
9321         return isDefined((int)ch);
9322     }
9323 
9324     /**
9325      * Determines if a character (Unicode code point) is defined in Unicode.
9326      * <p>
9327      * A character is defined if at least one of the following is true:
9328      * <ul>
9329      * <li>It has an entry in the UnicodeData file.
9330      * <li>It has a value in a range defined by the UnicodeData file.
9331      * </ul>
9332      *
9333      * @param   codePoint the character (Unicode code point) to be tested.
9334      * @return  {@code true} if the character has a defined meaning
9335      *          in Unicode; {@code false} otherwise.
9336      * @see     Character#isDigit(int)
9337      * @see     Character#isLetter(int)
9338      * @see     Character#isLetterOrDigit(int)
9339      * @see     Character#isLowerCase(int)
9340      * @see     Character#isTitleCase(int)
9341      * @see     Character#isUpperCase(int)
9342      * @since   1.5
9343      */
9344     public static boolean isDefined(int codePoint) {
9345         return getType(codePoint) != Character.UNASSIGNED;
9346     }
9347 
9348     /**
9349      * Determines if the specified character is a letter.
9350      * <p>
9351      * A character is considered to be a letter if its general
9352      * category type, provided by {@code Character.getType(ch)},
9353      * is any of the following:
9354      * <ul>
9355      * <li> {@code UPPERCASE_LETTER}
9356      * <li> {@code LOWERCASE_LETTER}
9357      * <li> {@code TITLECASE_LETTER}
9358      * <li> {@code MODIFIER_LETTER}
9359      * <li> {@code OTHER_LETTER}
9360      * </ul>
9361      *
9362      * Not all letters have case. Many characters are
9363      * letters but are neither uppercase nor lowercase nor titlecase.
9364      *
9365      * <p><b>Note:</b> This method cannot handle <a
9366      * href="#supplementary"> supplementary characters</a>. To support
9367      * all Unicode characters, including supplementary characters, use
9368      * the {@link #isLetter(int)} method.
9369      *
9370      * @param   ch   the character to be tested.
9371      * @return  {@code true} if the character is a letter;
9372      *          {@code false} otherwise.
9373      * @see     Character#isDigit(char)
9374      * @see     Character#isJavaIdentifierStart(char)
9375      * @see     Character#isJavaLetter(char)
9376      * @see     Character#isJavaLetterOrDigit(char)
9377      * @see     Character#isLetterOrDigit(char)
9378      * @see     Character#isLowerCase(char)
9379      * @see     Character#isTitleCase(char)
9380      * @see     Character#isUnicodeIdentifierStart(char)
9381      * @see     Character#isUpperCase(char)
9382      */
9383     public static boolean isLetter(char ch) {
9384         return isLetter((int)ch);
9385     }
9386 
9387     /**
9388      * Determines if the specified character (Unicode code point) is a letter.
9389      * <p>
9390      * A character is considered to be a letter if its general
9391      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9392      * is any of the following:
9393      * <ul>
9394      * <li> {@code UPPERCASE_LETTER}
9395      * <li> {@code LOWERCASE_LETTER}
9396      * <li> {@code TITLECASE_LETTER}
9397      * <li> {@code MODIFIER_LETTER}
9398      * <li> {@code OTHER_LETTER}
9399      * </ul>
9400      *
9401      * Not all letters have case. Many characters are
9402      * letters but are neither uppercase nor lowercase nor titlecase.
9403      *
9404      * @param   codePoint the character (Unicode code point) to be tested.
9405      * @return  {@code true} if the character is a letter;
9406      *          {@code false} otherwise.
9407      * @see     Character#isDigit(int)
9408      * @see     Character#isJavaIdentifierStart(int)
9409      * @see     Character#isLetterOrDigit(int)
9410      * @see     Character#isLowerCase(int)
9411      * @see     Character#isTitleCase(int)
9412      * @see     Character#isUnicodeIdentifierStart(int)
9413      * @see     Character#isUpperCase(int)
9414      * @since   1.5
9415      */
9416     public static boolean isLetter(int codePoint) {
9417         return ((((1 << Character.UPPERCASE_LETTER) |
9418             (1 << Character.LOWERCASE_LETTER) |
9419             (1 << Character.TITLECASE_LETTER) |
9420             (1 << Character.MODIFIER_LETTER) |
9421             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9422             != 0;
9423     }
9424 
9425     /**
9426      * Determines if the specified character is a letter or digit.
9427      * <p>
9428      * A character is considered to be a letter or digit if either
9429      * {@code Character.isLetter(char ch)} or
9430      * {@code Character.isDigit(char ch)} returns
9431      * {@code true} for the character.
9432      *
9433      * <p><b>Note:</b> This method cannot handle <a
9434      * href="#supplementary"> supplementary characters</a>. To support
9435      * all Unicode characters, including supplementary characters, use
9436      * the {@link #isLetterOrDigit(int)} method.
9437      *
9438      * @param   ch   the character to be tested.
9439      * @return  {@code true} if the character is a letter or digit;
9440      *          {@code false} otherwise.
9441      * @see     Character#isDigit(char)
9442      * @see     Character#isJavaIdentifierPart(char)
9443      * @see     Character#isJavaLetter(char)
9444      * @see     Character#isJavaLetterOrDigit(char)
9445      * @see     Character#isLetter(char)
9446      * @see     Character#isUnicodeIdentifierPart(char)
9447      * @since   1.0.2
9448      */
9449     public static boolean isLetterOrDigit(char ch) {
9450         return isLetterOrDigit((int)ch);
9451     }
9452 
9453     /**
9454      * Determines if the specified character (Unicode code point) is a letter or digit.
9455      * <p>
9456      * A character is considered to be a letter or digit if either
9457      * {@link #isLetter(int) isLetter(codePoint)} or
9458      * {@link #isDigit(int) isDigit(codePoint)} returns
9459      * {@code true} for the character.
9460      *
9461      * @param   codePoint the character (Unicode code point) to be tested.
9462      * @return  {@code true} if the character is a letter or digit;
9463      *          {@code false} otherwise.
9464      * @see     Character#isDigit(int)
9465      * @see     Character#isJavaIdentifierPart(int)
9466      * @see     Character#isLetter(int)
9467      * @see     Character#isUnicodeIdentifierPart(int)
9468      * @since   1.5
9469      */
9470     public static boolean isLetterOrDigit(int codePoint) {
9471         return ((((1 << Character.UPPERCASE_LETTER) |
9472             (1 << Character.LOWERCASE_LETTER) |
9473             (1 << Character.TITLECASE_LETTER) |
9474             (1 << Character.MODIFIER_LETTER) |
9475             (1 << Character.OTHER_LETTER) |
9476             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9477             != 0;
9478     }
9479 
9480     /**
9481      * Determines if the specified character is permissible as the first
9482      * character in a Java identifier.
9483      * <p>
9484      * A character may start a Java identifier if and only if
9485      * one of the following is true:
9486      * <ul>
9487      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9488      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9489      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9490      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9491      * </ul>
9492      *
9493      * @param   ch the character to be tested.
9494      * @return  {@code true} if the character may start a Java
9495      *          identifier; {@code false} otherwise.
9496      * @see     Character#isJavaLetterOrDigit(char)
9497      * @see     Character#isJavaIdentifierStart(char)
9498      * @see     Character#isJavaIdentifierPart(char)
9499      * @see     Character#isLetter(char)
9500      * @see     Character#isLetterOrDigit(char)
9501      * @see     Character#isUnicodeIdentifierStart(char)
9502      * @since   1.0.2
9503      * @deprecated Replaced by isJavaIdentifierStart(char).
9504      */
9505     @Deprecated(since="1.1")
9506     public static boolean isJavaLetter(char ch) {
9507         return isJavaIdentifierStart(ch);
9508     }
9509 
9510     /**
9511      * Determines if the specified character may be part of a Java
9512      * identifier as other than the first character.
9513      * <p>
9514      * A character may be part of a Java identifier if and only if any
9515      * of the following are true:
9516      * <ul>
9517      * <li>  it is a letter
9518      * <li>  it is a currency symbol (such as {@code '$'})
9519      * <li>  it is a connecting punctuation character (such as {@code '_'})
9520      * <li>  it is a digit
9521      * <li>  it is a numeric letter (such as a Roman numeral character)
9522      * <li>  it is a combining mark
9523      * <li>  it is a non-spacing mark
9524      * <li> {@code isIdentifierIgnorable} returns
9525      * {@code true} for the character.
9526      * </ul>
9527      *
9528      * @param   ch the character to be tested.
9529      * @return  {@code true} if the character may be part of a
9530      *          Java identifier; {@code false} otherwise.
9531      * @see     Character#isJavaLetter(char)
9532      * @see     Character#isJavaIdentifierStart(char)
9533      * @see     Character#isJavaIdentifierPart(char)
9534      * @see     Character#isLetter(char)
9535      * @see     Character#isLetterOrDigit(char)
9536      * @see     Character#isUnicodeIdentifierPart(char)
9537      * @see     Character#isIdentifierIgnorable(char)
9538      * @since   1.0.2
9539      * @deprecated Replaced by isJavaIdentifierPart(char).
9540      */
9541     @Deprecated(since="1.1")
9542     public static boolean isJavaLetterOrDigit(char ch) {
9543         return isJavaIdentifierPart(ch);
9544     }
9545 
9546     /**
9547      * Determines if the specified character (Unicode code point) is an alphabet.
9548      * <p>
9549      * A character is considered to be alphabetic if its general category type,
9550      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9551      * the following:
9552      * <ul>
9553      * <li> {@code UPPERCASE_LETTER}
9554      * <li> {@code LOWERCASE_LETTER}
9555      * <li> {@code TITLECASE_LETTER}
9556      * <li> {@code MODIFIER_LETTER}
9557      * <li> {@code OTHER_LETTER}
9558      * <li> {@code LETTER_NUMBER}
9559      * </ul>
9560      * or it has contributory property Other_Alphabetic as defined by the
9561      * Unicode Standard.
9562      *
9563      * @param   codePoint the character (Unicode code point) to be tested.
9564      * @return  {@code true} if the character is a Unicode alphabet
9565      *          character, {@code false} otherwise.
9566      * @since   1.7
9567      */
9568     public static boolean isAlphabetic(int codePoint) {
9569         return (((((1 << Character.UPPERCASE_LETTER) |
9570             (1 << Character.LOWERCASE_LETTER) |
9571             (1 << Character.TITLECASE_LETTER) |
9572             (1 << Character.MODIFIER_LETTER) |
9573             (1 << Character.OTHER_LETTER) |
9574             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9575             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9576     }
9577 
9578     /**
9579      * Determines if the specified character (Unicode code point) is a CJKV
9580      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9581      * the Unicode Standard.
9582      *
9583      * @param   codePoint the character (Unicode code point) to be tested.
9584      * @return  {@code true} if the character is a Unicode ideograph
9585      *          character, {@code false} otherwise.
9586      * @since   1.7
9587      */
9588     public static boolean isIdeographic(int codePoint) {
9589         return CharacterData.of(codePoint).isIdeographic(codePoint);
9590     }
9591 
9592     /**
9593      * Determines if the specified character is
9594      * permissible as the first character in a Java identifier.
9595      * <p>
9596      * A character may start a Java identifier if and only if
9597      * one of the following conditions is true:
9598      * <ul>
9599      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9600      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9601      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9602      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9603      * </ul>
9604      *
9605      * <p><b>Note:</b> This method cannot handle <a
9606      * href="#supplementary"> supplementary characters</a>. To support
9607      * all Unicode characters, including supplementary characters, use
9608      * the {@link #isJavaIdentifierStart(int)} method.
9609      *
9610      * @param   ch the character to be tested.
9611      * @return  {@code true} if the character may start a Java identifier;
9612      *          {@code false} otherwise.
9613      * @see     Character#isJavaIdentifierPart(char)
9614      * @see     Character#isLetter(char)
9615      * @see     Character#isUnicodeIdentifierStart(char)
9616      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9617      * @since   1.1
9618      */
9619     public static boolean isJavaIdentifierStart(char ch) {
9620         return isJavaIdentifierStart((int)ch);
9621     }
9622 
9623     /**
9624      * Determines if the character (Unicode code point) is
9625      * permissible as the first character in a Java identifier.
9626      * <p>
9627      * A character may start a Java identifier if and only if
9628      * one of the following conditions is true:
9629      * <ul>
9630      * <li> {@link #isLetter(int) isLetter(codePoint)}
9631      *      returns {@code true}
9632      * <li> {@link #getType(int) getType(codePoint)}
9633      *      returns {@code LETTER_NUMBER}
9634      * <li> the referenced character is a currency symbol (such as {@code '$'})
9635      * <li> the referenced character is a connecting punctuation character
9636      *      (such as {@code '_'}).
9637      * </ul>
9638      *
9639      * @param   codePoint the character (Unicode code point) to be tested.
9640      * @return  {@code true} if the character may start a Java identifier;
9641      *          {@code false} otherwise.
9642      * @see     Character#isJavaIdentifierPart(int)
9643      * @see     Character#isLetter(int)
9644      * @see     Character#isUnicodeIdentifierStart(int)
9645      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9646      * @since   1.5
9647      */
9648     public static boolean isJavaIdentifierStart(int codePoint) {
9649         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9650     }
9651 
9652     /**
9653      * Determines if the specified character may be part of a Java
9654      * identifier as other than the first character.
9655      * <p>
9656      * A character may be part of a Java identifier if any of the following
9657      * are true:
9658      * <ul>
9659      * <li>  it is a letter
9660      * <li>  it is a currency symbol (such as {@code '$'})
9661      * <li>  it is a connecting punctuation character (such as {@code '_'})
9662      * <li>  it is a digit
9663      * <li>  it is a numeric letter (such as a Roman numeral character)
9664      * <li>  it is a combining mark
9665      * <li>  it is a non-spacing mark
9666      * <li> {@code isIdentifierIgnorable} returns
9667      * {@code true} for the character
9668      * </ul>
9669      *
9670      * <p><b>Note:</b> This method cannot handle <a
9671      * href="#supplementary"> supplementary characters</a>. To support
9672      * all Unicode characters, including supplementary characters, use
9673      * the {@link #isJavaIdentifierPart(int)} method.
9674      *
9675      * @param   ch      the character to be tested.
9676      * @return {@code true} if the character may be part of a
9677      *          Java identifier; {@code false} otherwise.
9678      * @see     Character#isIdentifierIgnorable(char)
9679      * @see     Character#isJavaIdentifierStart(char)
9680      * @see     Character#isLetterOrDigit(char)
9681      * @see     Character#isUnicodeIdentifierPart(char)
9682      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9683      * @since   1.1
9684      */
9685     public static boolean isJavaIdentifierPart(char ch) {
9686         return isJavaIdentifierPart((int)ch);
9687     }
9688 
9689     /**
9690      * Determines if the character (Unicode code point) may be part of a Java
9691      * identifier as other than the first character.
9692      * <p>
9693      * A character may be part of a Java identifier if any of the following
9694      * are true:
9695      * <ul>
9696      * <li>  it is a letter
9697      * <li>  it is a currency symbol (such as {@code '$'})
9698      * <li>  it is a connecting punctuation character (such as {@code '_'})
9699      * <li>  it is a digit
9700      * <li>  it is a numeric letter (such as a Roman numeral character)
9701      * <li>  it is a combining mark
9702      * <li>  it is a non-spacing mark
9703      * <li> {@link #isIdentifierIgnorable(int)
9704      * isIdentifierIgnorable(codePoint)} returns {@code true} for
9705      * the character
9706      * </ul>
9707      *
9708      * @param   codePoint the character (Unicode code point) to be tested.
9709      * @return {@code true} if the character may be part of a
9710      *          Java identifier; {@code false} otherwise.
9711      * @see     Character#isIdentifierIgnorable(int)
9712      * @see     Character#isJavaIdentifierStart(int)
9713      * @see     Character#isLetterOrDigit(int)
9714      * @see     Character#isUnicodeIdentifierPart(int)
9715      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9716      * @since   1.5
9717      */
9718     public static boolean isJavaIdentifierPart(int codePoint) {
9719         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9720     }
9721 
9722     /**
9723      * Determines if the specified character is permissible as the
9724      * first character in a Unicode identifier.
9725      * <p>
9726      * A character may start a Unicode identifier if and only if
9727      * one of the following conditions is true:
9728      * <ul>
9729      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9730      * <li> {@link #getType(char) getType(ch)} returns
9731      *      {@code LETTER_NUMBER}.
9732      * </ul>
9733      *
9734      * <p><b>Note:</b> This method cannot handle <a
9735      * href="#supplementary"> supplementary characters</a>. To support
9736      * all Unicode characters, including supplementary characters, use
9737      * the {@link #isUnicodeIdentifierStart(int)} method.
9738      *
9739      * @param   ch      the character to be tested.
9740      * @return  {@code true} if the character may start a Unicode
9741      *          identifier; {@code false} otherwise.
9742      * @see     Character#isJavaIdentifierStart(char)
9743      * @see     Character#isLetter(char)
9744      * @see     Character#isUnicodeIdentifierPart(char)
9745      * @since   1.1
9746      */
9747     public static boolean isUnicodeIdentifierStart(char ch) {
9748         return isUnicodeIdentifierStart((int)ch);
9749     }
9750 
9751     /**
9752      * Determines if the specified character (Unicode code point) is permissible as the
9753      * first character in a Unicode identifier.
9754      * <p>
9755      * A character may start a Unicode identifier if and only if
9756      * one of the following conditions is true:
9757      * <ul>
9758      * <li> {@link #isLetter(int) isLetter(codePoint)}
9759      *      returns {@code true}
9760      * <li> {@link #getType(int) getType(codePoint)}
9761      *      returns {@code LETTER_NUMBER}.
9762      * </ul>
9763      * @param   codePoint the character (Unicode code point) to be tested.
9764      * @return  {@code true} if the character may start a Unicode
9765      *          identifier; {@code false} otherwise.
9766      * @see     Character#isJavaIdentifierStart(int)
9767      * @see     Character#isLetter(int)
9768      * @see     Character#isUnicodeIdentifierPart(int)
9769      * @since   1.5
9770      */
9771     public static boolean isUnicodeIdentifierStart(int codePoint) {
9772         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9773     }
9774 
9775     /**
9776      * Determines if the specified character may be part of a Unicode
9777      * identifier as other than the first character.
9778      * <p>
9779      * A character may be part of a Unicode identifier if and only if
9780      * one of the following statements is true:
9781      * <ul>
9782      * <li>  it is a letter
9783      * <li>  it is a connecting punctuation character (such as {@code '_'})
9784      * <li>  it is a digit
9785      * <li>  it is a numeric letter (such as a Roman numeral character)
9786      * <li>  it is a combining mark
9787      * <li>  it is a non-spacing mark
9788      * <li> {@code isIdentifierIgnorable} returns
9789      * {@code true} for this character.
9790      * </ul>
9791      *
9792      * <p><b>Note:</b> This method cannot handle <a
9793      * href="#supplementary"> supplementary characters</a>. To support
9794      * all Unicode characters, including supplementary characters, use
9795      * the {@link #isUnicodeIdentifierPart(int)} method.
9796      *
9797      * @param   ch      the character to be tested.
9798      * @return  {@code true} if the character may be part of a
9799      *          Unicode identifier; {@code false} otherwise.
9800      * @see     Character#isIdentifierIgnorable(char)
9801      * @see     Character#isJavaIdentifierPart(char)
9802      * @see     Character#isLetterOrDigit(char)
9803      * @see     Character#isUnicodeIdentifierStart(char)
9804      * @since   1.1
9805      */
9806     public static boolean isUnicodeIdentifierPart(char ch) {
9807         return isUnicodeIdentifierPart((int)ch);
9808     }
9809 
9810     /**
9811      * Determines if the specified character (Unicode code point) may be part of a Unicode
9812      * identifier as other than the first character.
9813      * <p>
9814      * A character may be part of a Unicode identifier if and only if
9815      * one of the following statements is true:
9816      * <ul>
9817      * <li>  it is a letter
9818      * <li>  it is a connecting punctuation character (such as {@code '_'})
9819      * <li>  it is a digit
9820      * <li>  it is a numeric letter (such as a Roman numeral character)
9821      * <li>  it is a combining mark
9822      * <li>  it is a non-spacing mark
9823      * <li> {@code isIdentifierIgnorable} returns
9824      * {@code true} for this character.
9825      * </ul>
9826      * @param   codePoint the character (Unicode code point) to be tested.
9827      * @return  {@code true} if the character may be part of a
9828      *          Unicode identifier; {@code false} otherwise.
9829      * @see     Character#isIdentifierIgnorable(int)
9830      * @see     Character#isJavaIdentifierPart(int)
9831      * @see     Character#isLetterOrDigit(int)
9832      * @see     Character#isUnicodeIdentifierStart(int)
9833      * @since   1.5
9834      */
9835     public static boolean isUnicodeIdentifierPart(int codePoint) {
9836         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
9837     }
9838 
9839     /**
9840      * Determines if the specified character should be regarded as
9841      * an ignorable character in a Java identifier or a Unicode identifier.
9842      * <p>
9843      * The following Unicode characters are ignorable in a Java identifier
9844      * or a Unicode identifier:
9845      * <ul>
9846      * <li>ISO control characters that are not whitespace
9847      * <ul>
9848      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9849      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9850      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9851      * </ul>
9852      *
9853      * <li>all characters that have the {@code FORMAT} general
9854      * category value
9855      * </ul>
9856      *
9857      * <p><b>Note:</b> This method cannot handle <a
9858      * href="#supplementary"> supplementary characters</a>. To support
9859      * all Unicode characters, including supplementary characters, use
9860      * the {@link #isIdentifierIgnorable(int)} method.
9861      *
9862      * @param   ch      the character to be tested.
9863      * @return  {@code true} if the character is an ignorable control
9864      *          character that may be part of a Java or Unicode identifier;
9865      *           {@code false} otherwise.
9866      * @see     Character#isJavaIdentifierPart(char)
9867      * @see     Character#isUnicodeIdentifierPart(char)
9868      * @since   1.1
9869      */
9870     public static boolean isIdentifierIgnorable(char ch) {
9871         return isIdentifierIgnorable((int)ch);
9872     }
9873 
9874     /**
9875      * Determines if the specified character (Unicode code point) should be regarded as
9876      * an ignorable character in a Java identifier or a Unicode identifier.
9877      * <p>
9878      * The following Unicode characters are ignorable in a Java identifier
9879      * or a Unicode identifier:
9880      * <ul>
9881      * <li>ISO control characters that are not whitespace
9882      * <ul>
9883      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9884      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9885      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9886      * </ul>
9887      *
9888      * <li>all characters that have the {@code FORMAT} general
9889      * category value
9890      * </ul>
9891      *
9892      * @param   codePoint the character (Unicode code point) to be tested.
9893      * @return  {@code true} if the character is an ignorable control
9894      *          character that may be part of a Java or Unicode identifier;
9895      *          {@code false} otherwise.
9896      * @see     Character#isJavaIdentifierPart(int)
9897      * @see     Character#isUnicodeIdentifierPart(int)
9898      * @since   1.5
9899      */
9900     public static boolean isIdentifierIgnorable(int codePoint) {
9901         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
9902     }
9903 
9904     /**
9905      * Converts the character argument to lowercase using case
9906      * mapping information from the UnicodeData file.
9907      * <p>
9908      * Note that
9909      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
9910      * does not always return {@code true} for some ranges of
9911      * characters, particularly those that are symbols or ideographs.
9912      *
9913      * <p>In general, {@link String#toLowerCase()} should be used to map
9914      * characters to lowercase. {@code String} case mapping methods
9915      * have several benefits over {@code Character} case mapping methods.
9916      * {@code String} case mapping methods can perform locale-sensitive
9917      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9918      * the {@code Character} case mapping methods cannot.
9919      *
9920      * <p><b>Note:</b> This method cannot handle <a
9921      * href="#supplementary"> supplementary characters</a>. To support
9922      * all Unicode characters, including supplementary characters, use
9923      * the {@link #toLowerCase(int)} method.
9924      *
9925      * @param   ch   the character to be converted.
9926      * @return  the lowercase equivalent of the character, if any;
9927      *          otherwise, the character itself.
9928      * @see     Character#isLowerCase(char)
9929      * @see     String#toLowerCase()
9930      */
9931     public static char toLowerCase(char ch) {
9932         return (char)toLowerCase((int)ch);
9933     }
9934 
9935     /**
9936      * Converts the character (Unicode code point) argument to
9937      * lowercase using case mapping information from the UnicodeData
9938      * file.
9939      *
9940      * <p> Note that
9941      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9942      * does not always return {@code true} for some ranges of
9943      * characters, particularly those that are symbols or ideographs.
9944      *
9945      * <p>In general, {@link String#toLowerCase()} should be used to map
9946      * characters to lowercase. {@code String} case mapping methods
9947      * have several benefits over {@code Character} case mapping methods.
9948      * {@code String} case mapping methods can perform locale-sensitive
9949      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9950      * the {@code Character} case mapping methods cannot.
9951      *
9952      * @param   codePoint   the character (Unicode code point) to be converted.
9953      * @return  the lowercase equivalent of the character (Unicode code
9954      *          point), if any; otherwise, the character itself.
9955      * @see     Character#isLowerCase(int)
9956      * @see     String#toLowerCase()
9957      *
9958      * @since   1.5
9959      */
9960     public static int toLowerCase(int codePoint) {
9961         return CharacterData.of(codePoint).toLowerCase(codePoint);
9962     }
9963 
9964     /**
9965      * Converts the character argument to uppercase using case mapping
9966      * information from the UnicodeData file.
9967      * <p>
9968      * Note that
9969      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9970      * does not always return {@code true} for some ranges of
9971      * characters, particularly those that are symbols or ideographs.
9972      *
9973      * <p>In general, {@link String#toUpperCase()} should be used to map
9974      * characters to uppercase. {@code String} case mapping methods
9975      * have several benefits over {@code Character} case mapping methods.
9976      * {@code String} case mapping methods can perform locale-sensitive
9977      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9978      * the {@code Character} case mapping methods cannot.
9979      *
9980      * <p><b>Note:</b> This method cannot handle <a
9981      * href="#supplementary"> supplementary characters</a>. To support
9982      * all Unicode characters, including supplementary characters, use
9983      * the {@link #toUpperCase(int)} method.
9984      *
9985      * @param   ch   the character to be converted.
9986      * @return  the uppercase equivalent of the character, if any;
9987      *          otherwise, the character itself.
9988      * @see     Character#isUpperCase(char)
9989      * @see     String#toUpperCase()
9990      */
9991     public static char toUpperCase(char ch) {
9992         return (char)toUpperCase((int)ch);
9993     }
9994 
9995     /**
9996      * Converts the character (Unicode code point) argument to
9997      * uppercase using case mapping information from the UnicodeData
9998      * file.
9999      *
10000      * <p>Note that
10001      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
10002      * does not always return {@code true} for some ranges of
10003      * characters, particularly those that are symbols or ideographs.
10004      *
10005      * <p>In general, {@link String#toUpperCase()} should be used to map
10006      * characters to uppercase. {@code String} case mapping methods
10007      * have several benefits over {@code Character} case mapping methods.
10008      * {@code String} case mapping methods can perform locale-sensitive
10009      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10010      * the {@code Character} case mapping methods cannot.
10011      *
10012      * @param   codePoint   the character (Unicode code point) to be converted.
10013      * @return  the uppercase equivalent of the character, if any;
10014      *          otherwise, the character itself.
10015      * @see     Character#isUpperCase(int)
10016      * @see     String#toUpperCase()
10017      *
10018      * @since   1.5
10019      */
10020     public static int toUpperCase(int codePoint) {
10021         return CharacterData.of(codePoint).toUpperCase(codePoint);
10022     }
10023 
10024     /**
10025      * Converts the character argument to titlecase using case mapping
10026      * information from the UnicodeData file. If a character has no
10027      * explicit titlecase mapping and is not itself a titlecase char
10028      * according to UnicodeData, then the uppercase mapping is
10029      * returned as an equivalent titlecase mapping. If the
10030      * {@code char} argument is already a titlecase
10031      * {@code char}, the same {@code char} value will be
10032      * returned.
10033      * <p>
10034      * Note that
10035      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
10036      * does not always return {@code true} for some ranges of
10037      * characters.
10038      *
10039      * <p><b>Note:</b> This method cannot handle <a
10040      * href="#supplementary"> supplementary characters</a>. To support
10041      * all Unicode characters, including supplementary characters, use
10042      * the {@link #toTitleCase(int)} method.
10043      *
10044      * @param   ch   the character to be converted.
10045      * @return  the titlecase equivalent of the character, if any;
10046      *          otherwise, the character itself.
10047      * @see     Character#isTitleCase(char)
10048      * @see     Character#toLowerCase(char)
10049      * @see     Character#toUpperCase(char)
10050      * @since   1.0.2
10051      */
10052     public static char toTitleCase(char ch) {
10053         return (char)toTitleCase((int)ch);
10054     }
10055 
10056     /**
10057      * Converts the character (Unicode code point) argument to titlecase using case mapping
10058      * information from the UnicodeData file. If a character has no
10059      * explicit titlecase mapping and is not itself a titlecase char
10060      * according to UnicodeData, then the uppercase mapping is
10061      * returned as an equivalent titlecase mapping. If the
10062      * character argument is already a titlecase
10063      * character, the same character value will be
10064      * returned.
10065      *
10066      * <p>Note that
10067      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
10068      * does not always return {@code true} for some ranges of
10069      * characters.
10070      *
10071      * @param   codePoint   the character (Unicode code point) to be converted.
10072      * @return  the titlecase equivalent of the character, if any;
10073      *          otherwise, the character itself.
10074      * @see     Character#isTitleCase(int)
10075      * @see     Character#toLowerCase(int)
10076      * @see     Character#toUpperCase(int)
10077      * @since   1.5
10078      */
10079     public static int toTitleCase(int codePoint) {
10080         return CharacterData.of(codePoint).toTitleCase(codePoint);
10081     }
10082 
10083     /**
10084      * Returns the numeric value of the character {@code ch} in the
10085      * specified radix.
10086      * <p>
10087      * If the radix is not in the range {@code MIN_RADIX} &le;
10088      * {@code radix} &le; {@code MAX_RADIX} or if the
10089      * value of {@code ch} is not a valid digit in the specified
10090      * radix, {@code -1} is returned. A character is a valid digit
10091      * if at least one of the following is true:
10092      * <ul>
10093      * <li>The method {@code isDigit} is {@code true} of the character
10094      *     and the Unicode decimal digit value of the character (or its
10095      *     single-character decomposition) is less than the specified radix.
10096      *     In this case the decimal digit value is returned.
10097      * <li>The character is one of the uppercase Latin letters
10098      *     {@code 'A'} through {@code 'Z'} and its code is less than
10099      *     {@code radix + 'A' - 10}.
10100      *     In this case, {@code ch - 'A' + 10}
10101      *     is returned.
10102      * <li>The character is one of the lowercase Latin letters
10103      *     {@code 'a'} through {@code 'z'} and its code is less than
10104      *     {@code radix + 'a' - 10}.
10105      *     In this case, {@code ch - 'a' + 10}
10106      *     is returned.
10107      * <li>The character is one of the fullwidth uppercase Latin letters A
10108      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10109      *     and its code is less than
10110      *     {@code radix + '\u005CuFF21' - 10}.
10111      *     In this case, {@code ch - '\u005CuFF21' + 10}
10112      *     is returned.
10113      * <li>The character is one of the fullwidth lowercase Latin letters a
10114      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10115      *     and its code is less than
10116      *     {@code radix + '\u005CuFF41' - 10}.
10117      *     In this case, {@code ch - '\u005CuFF41' + 10}
10118      *     is returned.
10119      * </ul>
10120      *
10121      * <p><b>Note:</b> This method cannot handle <a
10122      * href="#supplementary"> supplementary characters</a>. To support
10123      * all Unicode characters, including supplementary characters, use
10124      * the {@link #digit(int, int)} method.
10125      *
10126      * @param   ch      the character to be converted.
10127      * @param   radix   the radix.
10128      * @return  the numeric value represented by the character in the
10129      *          specified radix.
10130      * @see     Character#forDigit(int, int)
10131      * @see     Character#isDigit(char)
10132      */
10133     public static int digit(char ch, int radix) {
10134         return digit((int)ch, radix);
10135     }
10136 
10137     /**
10138      * Returns the numeric value of the specified character (Unicode
10139      * code point) in the specified radix.
10140      *
10141      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
10142      * {@code radix} &le; {@code MAX_RADIX} or if the
10143      * character is not a valid digit in the specified
10144      * radix, {@code -1} is returned. A character is a valid digit
10145      * if at least one of the following is true:
10146      * <ul>
10147      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
10148      *     and the Unicode decimal digit value of the character (or its
10149      *     single-character decomposition) is less than the specified radix.
10150      *     In this case the decimal digit value is returned.
10151      * <li>The character is one of the uppercase Latin letters
10152      *     {@code 'A'} through {@code 'Z'} and its code is less than
10153      *     {@code radix + 'A' - 10}.
10154      *     In this case, {@code codePoint - 'A' + 10}
10155      *     is returned.
10156      * <li>The character is one of the lowercase Latin letters
10157      *     {@code 'a'} through {@code 'z'} and its code is less than
10158      *     {@code radix + 'a' - 10}.
10159      *     In this case, {@code codePoint - 'a' + 10}
10160      *     is returned.
10161      * <li>The character is one of the fullwidth uppercase Latin letters A
10162      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10163      *     and its code is less than
10164      *     {@code radix + '\u005CuFF21' - 10}.
10165      *     In this case,
10166      *     {@code codePoint - '\u005CuFF21' + 10}
10167      *     is returned.
10168      * <li>The character is one of the fullwidth lowercase Latin letters a
10169      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10170      *     and its code is less than
10171      *     {@code radix + '\u005CuFF41'- 10}.
10172      *     In this case,
10173      *     {@code codePoint - '\u005CuFF41' + 10}
10174      *     is returned.
10175      * </ul>
10176      *
10177      * @param   codePoint the character (Unicode code point) to be converted.
10178      * @param   radix   the radix.
10179      * @return  the numeric value represented by the character in the
10180      *          specified radix.
10181      * @see     Character#forDigit(int, int)
10182      * @see     Character#isDigit(int)
10183      * @since   1.5
10184      */
10185     public static int digit(int codePoint, int radix) {
10186         return CharacterData.of(codePoint).digit(codePoint, radix);
10187     }
10188 
10189     /**
10190      * Returns the {@code int} value that the specified Unicode
10191      * character represents. For example, the character
10192      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
10193      * an int with a value of 50.
10194      * <p>
10195      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10196      * {@code '\u005Cu005A'}), lowercase
10197      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10198      * full width variant ({@code '\u005CuFF21'} through
10199      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10200      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10201      * through 35. This is independent of the Unicode specification,
10202      * which does not assign numeric values to these {@code char}
10203      * values.
10204      * <p>
10205      * If the character does not have a numeric value, then -1 is returned.
10206      * If the character has a numeric value that cannot be represented as a
10207      * nonnegative integer (for example, a fractional value), then -2
10208      * is returned.
10209      *
10210      * <p><b>Note:</b> This method cannot handle <a
10211      * href="#supplementary"> supplementary characters</a>. To support
10212      * all Unicode characters, including supplementary characters, use
10213      * the {@link #getNumericValue(int)} method.
10214      *
10215      * @param   ch      the character to be converted.
10216      * @return  the numeric value of the character, as a nonnegative {@code int}
10217      *          value; -2 if the character has a numeric value but the value
10218      *          can not be represented as a nonnegative {@code int} value;
10219      *          -1 if the character has no numeric value.
10220      * @see     Character#forDigit(int, int)
10221      * @see     Character#isDigit(char)
10222      * @since   1.1
10223      */
10224     public static int getNumericValue(char ch) {
10225         return getNumericValue((int)ch);
10226     }
10227 
10228     /**
10229      * Returns the {@code int} value that the specified
10230      * character (Unicode code point) represents. For example, the character
10231      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
10232      * an {@code int} with a value of 50.
10233      * <p>
10234      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10235      * {@code '\u005Cu005A'}), lowercase
10236      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10237      * full width variant ({@code '\u005CuFF21'} through
10238      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10239      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10240      * through 35. This is independent of the Unicode specification,
10241      * which does not assign numeric values to these {@code char}
10242      * values.
10243      * <p>
10244      * If the character does not have a numeric value, then -1 is returned.
10245      * If the character has a numeric value that cannot be represented as a
10246      * nonnegative integer (for example, a fractional value), then -2
10247      * is returned.
10248      *
10249      * @param   codePoint the character (Unicode code point) to be converted.
10250      * @return  the numeric value of the character, as a nonnegative {@code int}
10251      *          value; -2 if the character has a numeric value but the value
10252      *          can not be represented as a nonnegative {@code int} value;
10253      *          -1 if the character has no numeric value.
10254      * @see     Character#forDigit(int, int)
10255      * @see     Character#isDigit(int)
10256      * @since   1.5
10257      */
10258     public static int getNumericValue(int codePoint) {
10259         return CharacterData.of(codePoint).getNumericValue(codePoint);
10260     }
10261 
10262     /**
10263      * Determines if the specified character is ISO-LATIN-1 white space.
10264      * This method returns {@code true} for the following five
10265      * characters only:
10266      * <table class="striped">
10267      * <caption style="display:none">truechars</caption>
10268      * <thead>
10269      * <tr><th scope="col">Character
10270      *     <th scope="col">Code
10271      *     <th scope="col">Name
10272      * </thead>
10273      * <tbody>
10274      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
10275      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
10276      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
10277      *     <td>{@code NEW LINE}</td></tr>
10278      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
10279      *     <td>{@code FORM FEED}</td></tr>
10280      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
10281      *     <td>{@code CARRIAGE RETURN}</td></tr>
10282      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
10283      *     <td>{@code SPACE}</td></tr>
10284      * </tbody>
10285      * </table>
10286      *
10287      * @param      ch   the character to be tested.
10288      * @return     {@code true} if the character is ISO-LATIN-1 white
10289      *             space; {@code false} otherwise.
10290      * @see        Character#isSpaceChar(char)
10291      * @see        Character#isWhitespace(char)
10292      * @deprecated Replaced by isWhitespace(char).
10293      */
10294     @Deprecated(since="1.1")
10295     public static boolean isSpace(char ch) {
10296         return (ch <= 0x0020) &&
10297             (((((1L << 0x0009) |
10298             (1L << 0x000A) |
10299             (1L << 0x000C) |
10300             (1L << 0x000D) |
10301             (1L << 0x0020)) >> ch) & 1L) != 0);
10302     }
10303 
10304 
10305     /**
10306      * Determines if the specified character is a Unicode space character.
10307      * A character is considered to be a space character if and only if
10308      * it is specified to be a space character by the Unicode Standard. This
10309      * method returns true if the character's general category type is any of
10310      * the following:
10311      * <ul>
10312      * <li> {@code SPACE_SEPARATOR}
10313      * <li> {@code LINE_SEPARATOR}
10314      * <li> {@code PARAGRAPH_SEPARATOR}
10315      * </ul>
10316      *
10317      * <p><b>Note:</b> This method cannot handle <a
10318      * href="#supplementary"> supplementary characters</a>. To support
10319      * all Unicode characters, including supplementary characters, use
10320      * the {@link #isSpaceChar(int)} method.
10321      *
10322      * @param   ch      the character to be tested.
10323      * @return  {@code true} if the character is a space character;
10324      *          {@code false} otherwise.
10325      * @see     Character#isWhitespace(char)
10326      * @since   1.1
10327      */
10328     public static boolean isSpaceChar(char ch) {
10329         return isSpaceChar((int)ch);
10330     }
10331 
10332     /**
10333      * Determines if the specified character (Unicode code point) is a
10334      * Unicode space character.  A character is considered to be a
10335      * space character if and only if it is specified to be a space
10336      * character by the Unicode Standard. This method returns true if
10337      * the character's general category type is any of the following:
10338      *
10339      * <ul>
10340      * <li> {@link #SPACE_SEPARATOR}
10341      * <li> {@link #LINE_SEPARATOR}
10342      * <li> {@link #PARAGRAPH_SEPARATOR}
10343      * </ul>
10344      *
10345      * @param   codePoint the character (Unicode code point) to be tested.
10346      * @return  {@code true} if the character is a space character;
10347      *          {@code false} otherwise.
10348      * @see     Character#isWhitespace(int)
10349      * @since   1.5
10350      */
10351     public static boolean isSpaceChar(int codePoint) {
10352         return ((((1 << Character.SPACE_SEPARATOR) |
10353                   (1 << Character.LINE_SEPARATOR) |
10354                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10355             != 0;
10356     }
10357 
10358     /**
10359      * Determines if the specified character is white space according to Java.
10360      * A character is a Java whitespace character if and only if it satisfies
10361      * one of the following criteria:
10362      * <ul>
10363      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10364      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10365      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10366      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10367      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10368      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10369      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10370      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10371      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10372      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10373      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10374      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10375      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10376      * </ul>
10377      *
10378      * <p><b>Note:</b> This method cannot handle <a
10379      * href="#supplementary"> supplementary characters</a>. To support
10380      * all Unicode characters, including supplementary characters, use
10381      * the {@link #isWhitespace(int)} method.
10382      *
10383      * @param   ch the character to be tested.
10384      * @return  {@code true} if the character is a Java whitespace
10385      *          character; {@code false} otherwise.
10386      * @see     Character#isSpaceChar(char)
10387      * @since   1.1
10388      */
10389     public static boolean isWhitespace(char ch) {
10390         return isWhitespace((int)ch);
10391     }
10392 
10393     /**
10394      * Determines if the specified character (Unicode code point) is
10395      * white space according to Java.  A character is a Java
10396      * whitespace character if and only if it satisfies one of the
10397      * following criteria:
10398      * <ul>
10399      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10400      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10401      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10402      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10403      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10404      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10405      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10406      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10407      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10408      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10409      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10410      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10411      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10412      * </ul>
10413      *
10414      * @param   codePoint the character (Unicode code point) to be tested.
10415      * @return  {@code true} if the character is a Java whitespace
10416      *          character; {@code false} otherwise.
10417      * @see     Character#isSpaceChar(int)
10418      * @since   1.5
10419      */
10420     public static boolean isWhitespace(int codePoint) {
10421         return CharacterData.of(codePoint).isWhitespace(codePoint);
10422     }
10423 
10424     /**
10425      * Determines if the specified character is an ISO control
10426      * character.  A character is considered to be an ISO control
10427      * character if its code is in the range {@code '\u005Cu0000'}
10428      * through {@code '\u005Cu001F'} or in the range
10429      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10430      *
10431      * <p><b>Note:</b> This method cannot handle <a
10432      * href="#supplementary"> supplementary characters</a>. To support
10433      * all Unicode characters, including supplementary characters, use
10434      * the {@link #isISOControl(int)} method.
10435      *
10436      * @param   ch      the character to be tested.
10437      * @return  {@code true} if the character is an ISO control character;
10438      *          {@code false} otherwise.
10439      *
10440      * @see     Character#isSpaceChar(char)
10441      * @see     Character#isWhitespace(char)
10442      * @since   1.1
10443      */
10444     public static boolean isISOControl(char ch) {
10445         return isISOControl((int)ch);
10446     }
10447 
10448     /**
10449      * Determines if the referenced character (Unicode code point) is an ISO control
10450      * character.  A character is considered to be an ISO control
10451      * character if its code is in the range {@code '\u005Cu0000'}
10452      * through {@code '\u005Cu001F'} or in the range
10453      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10454      *
10455      * @param   codePoint the character (Unicode code point) to be tested.
10456      * @return  {@code true} if the character is an ISO control character;
10457      *          {@code false} otherwise.
10458      * @see     Character#isSpaceChar(int)
10459      * @see     Character#isWhitespace(int)
10460      * @since   1.5
10461      */
10462     public static boolean isISOControl(int codePoint) {
10463         // Optimized form of:
10464         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
10465         //     (codePoint >= 0x7F && codePoint <= 0x9F);
10466         return codePoint <= 0x9F &&
10467             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10468     }
10469 
10470     /**
10471      * Returns a value indicating a character's general category.
10472      *
10473      * <p><b>Note:</b> This method cannot handle <a
10474      * href="#supplementary"> supplementary characters</a>. To support
10475      * all Unicode characters, including supplementary characters, use
10476      * the {@link #getType(int)} method.
10477      *
10478      * @param   ch      the character to be tested.
10479      * @return  a value of type {@code int} representing the
10480      *          character's general category.
10481      * @see     Character#COMBINING_SPACING_MARK
10482      * @see     Character#CONNECTOR_PUNCTUATION
10483      * @see     Character#CONTROL
10484      * @see     Character#CURRENCY_SYMBOL
10485      * @see     Character#DASH_PUNCTUATION
10486      * @see     Character#DECIMAL_DIGIT_NUMBER
10487      * @see     Character#ENCLOSING_MARK
10488      * @see     Character#END_PUNCTUATION
10489      * @see     Character#FINAL_QUOTE_PUNCTUATION
10490      * @see     Character#FORMAT
10491      * @see     Character#INITIAL_QUOTE_PUNCTUATION
10492      * @see     Character#LETTER_NUMBER
10493      * @see     Character#LINE_SEPARATOR
10494      * @see     Character#LOWERCASE_LETTER
10495      * @see     Character#MATH_SYMBOL
10496      * @see     Character#MODIFIER_LETTER
10497      * @see     Character#MODIFIER_SYMBOL
10498      * @see     Character#NON_SPACING_MARK
10499      * @see     Character#OTHER_LETTER
10500      * @see     Character#OTHER_NUMBER
10501      * @see     Character#OTHER_PUNCTUATION
10502      * @see     Character#OTHER_SYMBOL
10503      * @see     Character#PARAGRAPH_SEPARATOR
10504      * @see     Character#PRIVATE_USE
10505      * @see     Character#SPACE_SEPARATOR
10506      * @see     Character#START_PUNCTUATION
10507      * @see     Character#SURROGATE
10508      * @see     Character#TITLECASE_LETTER
10509      * @see     Character#UNASSIGNED
10510      * @see     Character#UPPERCASE_LETTER
10511      * @since   1.1
10512      */
10513     public static int getType(char ch) {
10514         return getType((int)ch);
10515     }
10516 
10517     /**
10518      * Returns a value indicating a character's general category.
10519      *
10520      * @param   codePoint the character (Unicode code point) to be tested.
10521      * @return  a value of type {@code int} representing the
10522      *          character's general category.
10523      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10524      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10525      * @see     Character#CONTROL CONTROL
10526      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10527      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
10528      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10529      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
10530      * @see     Character#END_PUNCTUATION END_PUNCTUATION
10531      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10532      * @see     Character#FORMAT FORMAT
10533      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10534      * @see     Character#LETTER_NUMBER LETTER_NUMBER
10535      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
10536      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
10537      * @see     Character#MATH_SYMBOL MATH_SYMBOL
10538      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
10539      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10540      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
10541      * @see     Character#OTHER_LETTER OTHER_LETTER
10542      * @see     Character#OTHER_NUMBER OTHER_NUMBER
10543      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10544      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
10545      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10546      * @see     Character#PRIVATE_USE PRIVATE_USE
10547      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
10548      * @see     Character#START_PUNCTUATION START_PUNCTUATION
10549      * @see     Character#SURROGATE SURROGATE
10550      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
10551      * @see     Character#UNASSIGNED UNASSIGNED
10552      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
10553      * @since   1.5
10554      */
10555     public static int getType(int codePoint) {
10556         return CharacterData.of(codePoint).getType(codePoint);
10557     }
10558 
10559     /**
10560      * Determines the character representation for a specific digit in
10561      * the specified radix. If the value of {@code radix} is not a
10562      * valid radix, or the value of {@code digit} is not a valid
10563      * digit in the specified radix, the null character
10564      * ({@code '\u005Cu0000'}) is returned.
10565      * <p>
10566      * The {@code radix} argument is valid if it is greater than or
10567      * equal to {@code MIN_RADIX} and less than or equal to
10568      * {@code MAX_RADIX}. The {@code digit} argument is valid if
10569      * {@code 0 <= digit < radix}.
10570      * <p>
10571      * If the digit is less than 10, then
10572      * {@code '0' + digit} is returned. Otherwise, the value
10573      * {@code 'a' + digit - 10} is returned.
10574      *
10575      * @param   digit   the number to convert to a character.
10576      * @param   radix   the radix.
10577      * @return  the {@code char} representation of the specified digit
10578      *          in the specified radix.
10579      * @see     Character#MIN_RADIX
10580      * @see     Character#MAX_RADIX
10581      * @see     Character#digit(char, int)
10582      */
10583     public static char forDigit(int digit, int radix) {
10584         if ((digit >= radix) || (digit < 0)) {
10585             return '\0';
10586         }
10587         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10588             return '\0';
10589         }
10590         if (digit < 10) {
10591             return (char)('0' + digit);
10592         }
10593         return (char)('a' - 10 + digit);
10594     }
10595 
10596     /**
10597      * Returns the Unicode directionality property for the given
10598      * character.  Character directionality is used to calculate the
10599      * visual ordering of text. The directionality value of undefined
10600      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10601      *
10602      * <p><b>Note:</b> This method cannot handle <a
10603      * href="#supplementary"> supplementary characters</a>. To support
10604      * all Unicode characters, including supplementary characters, use
10605      * the {@link #getDirectionality(int)} method.
10606      *
10607      * @param  ch {@code char} for which the directionality property
10608      *            is requested.
10609      * @return the directionality property of the {@code char} value.
10610      *
10611      * @see Character#DIRECTIONALITY_UNDEFINED
10612      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10613      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10614      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10615      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10616      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10617      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10618      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10619      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10620      * @see Character#DIRECTIONALITY_NONSPACING_MARK
10621      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10622      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10623      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10624      * @see Character#DIRECTIONALITY_WHITESPACE
10625      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10626      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10627      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10628      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10629      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10630      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10631      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10632      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10633      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10634      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10635      * @since 1.4
10636      */
10637     public static byte getDirectionality(char ch) {
10638         return getDirectionality((int)ch);
10639     }
10640 
10641     /**
10642      * Returns the Unicode directionality property for the given
10643      * character (Unicode code point).  Character directionality is
10644      * used to calculate the visual ordering of text. The
10645      * directionality value of undefined character is {@link
10646      * #DIRECTIONALITY_UNDEFINED}.
10647      *
10648      * @param   codePoint the character (Unicode code point) for which
10649      *          the directionality property is requested.
10650      * @return the directionality property of the character.
10651      *
10652      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10653      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10654      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10655      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10656      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10657      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10658      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10659      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10660      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10661      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10662      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10663      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10664      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10665      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10666      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10667      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10668      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10669      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10670      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10671      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10672      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10673      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10674      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10675      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10676      * @since    1.5
10677      */
10678     public static byte getDirectionality(int codePoint) {
10679         return CharacterData.of(codePoint).getDirectionality(codePoint);
10680     }
10681 
10682     /**
10683      * Determines whether the character is mirrored according to the
10684      * Unicode specification.  Mirrored characters should have their
10685      * glyphs horizontally mirrored when displayed in text that is
10686      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
10687      * PARENTHESIS is semantically defined to be an <i>opening
10688      * parenthesis</i>.  This will appear as a "(" in text that is
10689      * left-to-right but as a ")" in text that is right-to-left.
10690      *
10691      * <p><b>Note:</b> This method cannot handle <a
10692      * href="#supplementary"> supplementary characters</a>. To support
10693      * all Unicode characters, including supplementary characters, use
10694      * the {@link #isMirrored(int)} method.
10695      *
10696      * @param  ch {@code char} for which the mirrored property is requested
10697      * @return {@code true} if the char is mirrored, {@code false}
10698      *         if the {@code char} is not mirrored or is not defined.
10699      * @since 1.4
10700      */
10701     public static boolean isMirrored(char ch) {
10702         return isMirrored((int)ch);
10703     }
10704 
10705     /**
10706      * Determines whether the specified character (Unicode code point)
10707      * is mirrored according to the Unicode specification.  Mirrored
10708      * characters should have their glyphs horizontally mirrored when
10709      * displayed in text that is right-to-left.  For example,
10710      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10711      * defined to be an <i>opening parenthesis</i>.  This will appear
10712      * as a "(" in text that is left-to-right but as a ")" in text
10713      * that is right-to-left.
10714      *
10715      * @param   codePoint the character (Unicode code point) to be tested.
10716      * @return  {@code true} if the character is mirrored, {@code false}
10717      *          if the character is not mirrored or is not defined.
10718      * @since   1.5
10719      */
10720     public static boolean isMirrored(int codePoint) {
10721         return CharacterData.of(codePoint).isMirrored(codePoint);
10722     }
10723 
10724     /**
10725      * Compares two {@code Character} objects numerically.
10726      *
10727      * @param   anotherCharacter   the {@code Character} to be compared.
10728 
10729      * @return  the value {@code 0} if the argument {@code Character}
10730      *          is equal to this {@code Character}; a value less than
10731      *          {@code 0} if this {@code Character} is numerically less
10732      *          than the {@code Character} argument; and a value greater than
10733      *          {@code 0} if this {@code Character} is numerically greater
10734      *          than the {@code Character} argument (unsigned comparison).
10735      *          Note that this is strictly a numerical comparison; it is not
10736      *          locale-dependent.
10737      * @since   1.2
10738      */
10739     public int compareTo(Character anotherCharacter) {
10740         return compare(this.value, anotherCharacter.value);
10741     }
10742 
10743     /**
10744      * Compares two {@code char} values numerically.
10745      * The value returned is identical to what would be returned by:
10746      * <pre>
10747      *    Character.valueOf(x).compareTo(Character.valueOf(y))
10748      * </pre>
10749      *
10750      * @param  x the first {@code char} to compare
10751      * @param  y the second {@code char} to compare
10752      * @return the value {@code 0} if {@code x == y};
10753      *         a value less than {@code 0} if {@code x < y}; and
10754      *         a value greater than {@code 0} if {@code x > y}
10755      * @since 1.7
10756      */
10757     public static int compare(char x, char y) {
10758         return x - y;
10759     }
10760 
10761     /**
10762      * Converts the character (Unicode code point) argument to uppercase using
10763      * information from the UnicodeData file.
10764      *
10765      * @param   codePoint   the character (Unicode code point) to be converted.
10766      * @return  either the uppercase equivalent of the character, if
10767      *          any, or an error flag ({@code Character.ERROR})
10768      *          that indicates that a 1:M {@code char} mapping exists.
10769      * @see     Character#isLowerCase(char)
10770      * @see     Character#isUpperCase(char)
10771      * @see     Character#toLowerCase(char)
10772      * @see     Character#toTitleCase(char)
10773      * @since 1.4
10774      */
10775     static int toUpperCaseEx(int codePoint) {
10776         assert isValidCodePoint(codePoint);
10777         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
10778     }
10779 
10780     /**
10781      * Converts the character (Unicode code point) argument to uppercase using case
10782      * mapping information from the SpecialCasing file in the Unicode
10783      * specification. If a character has no explicit uppercase
10784      * mapping, then the {@code char} itself is returned in the
10785      * {@code char[]}.
10786      *
10787      * @param   codePoint   the character (Unicode code point) to be converted.
10788      * @return a {@code char[]} with the uppercased character.
10789      * @since 1.4
10790      */
10791     static char[] toUpperCaseCharArray(int codePoint) {
10792         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
10793         assert isBmpCodePoint(codePoint);
10794         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
10795     }
10796 
10797     /**
10798      * The number of bits used to represent a {@code char} value in unsigned
10799      * binary form, constant {@code 16}.
10800      *
10801      * @since 1.5
10802      */
10803     public static final int SIZE = 16;
10804 
10805     /**
10806      * The number of bytes used to represent a {@code char} value in unsigned
10807      * binary form.
10808      *
10809      * @since 1.8
10810      */
10811     public static final int BYTES = SIZE / Byte.SIZE;
10812 
10813     /**
10814      * Returns the value obtained by reversing the order of the bytes in the
10815      * specified {@code char} value.
10816      *
10817      * @param ch The {@code char} of which to reverse the byte order.
10818      * @return the value obtained by reversing (or, equivalently, swapping)
10819      *     the bytes in the specified {@code char} value.
10820      * @since 1.5
10821      */
10822     @HotSpotIntrinsicCandidate
10823     public static char reverseBytes(char ch) {
10824         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
10825     }
10826 
10827     /**
10828      * Returns the Unicode name of the specified character
10829      * {@code codePoint}, or null if the code point is
10830      * {@link #UNASSIGNED unassigned}.
10831      * <p>
10832      * Note: if the specified character is not assigned a name by
10833      * the <i>UnicodeData</i> file (part of the Unicode Character
10834      * Database maintained by the Unicode Consortium), the returned
10835      * name is the same as the result of expression.
10836      *
10837      * <blockquote>{@code
10838      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10839      *     + " "
10840      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10841      *
10842      * }</blockquote>
10843      *
10844      * @param  codePoint the character (Unicode code point)
10845      *
10846      * @return the Unicode name of the specified character, or null if
10847      *         the code point is unassigned.
10848      *
10849      * @throws IllegalArgumentException if the specified
10850      *            {@code codePoint} is not a valid Unicode
10851      *            code point.
10852      *
10853      * @since 1.7
10854      */
10855     public static String getName(int codePoint) {
10856         if (!isValidCodePoint(codePoint)) {
10857             throw new IllegalArgumentException(
10858                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10859         }
10860         String name = CharacterName.getInstance().getName(codePoint);
10861         if (name != null)
10862             return name;
10863         if (getType(codePoint) == UNASSIGNED)
10864             return null;
10865         UnicodeBlock block = UnicodeBlock.of(codePoint);
10866         if (block != null)
10867             return block.toString().replace('_', ' ') + " "
10868                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10869         // should never come here
10870         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10871     }
10872 
10873     /**
10874      * Returns the code point value of the Unicode character specified by
10875      * the given Unicode character name.
10876      * <p>
10877      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
10878      * file (part of the Unicode Character Database maintained by the Unicode
10879      * Consortium), its name is defined as the result of expression
10880      *
10881      * <blockquote>{@code
10882      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10883      *     + " "
10884      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10885      *
10886      * }</blockquote>
10887      * <p>
10888      * The {@code name} matching is case insensitive, with any leading and
10889      * trailing whitespace character removed.
10890      *
10891      * @param  name the Unicode character name
10892      *
10893      * @return the code point value of the character specified by its name.
10894      *
10895      * @throws IllegalArgumentException if the specified {@code name}
10896      *         is not a valid Unicode character name.
10897      * @throws NullPointerException if {@code name} is {@code null}
10898      *
10899      * @since 9
10900      */
10901     public static int codePointOf(String name) {
10902         name = name.trim().toUpperCase(Locale.ROOT);
10903         int cp = CharacterName.getInstance().getCodePoint(name);
10904         if (cp != -1)
10905             return cp;
10906         try {
10907             int off = name.lastIndexOf(' ');
10908             if (off != -1) {
10909                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
10910                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
10911                     return cp;
10912             }
10913         } catch (Exception x) {}
10914         throw new IllegalArgumentException("Unrecognized character name :" + name);
10915     }
10916 }