1 /*
   2  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 import jdk.internal.misc.VM;
  35 
  36 /**
  37  * The {@code Character} class wraps a value of the primitive
  38  * type {@code char} in an object. An object of class
  39  * {@code Character} contains a single field whose type is
  40  * {@code char}.
  41  * <p>
  42  * In addition, this class provides a large number of static methods for
  43  * determining a character's category (lowercase letter, digit, etc.)
  44  * and for converting characters from uppercase to lowercase and vice
  45  * versa.
  46  *
  47  * <h2><a id="conformance">Unicode Conformance</a></h2>
  48  * <p>
  49  * The fields and methods of class {@code Character} are defined in terms
  50  * of character information from the Unicode Standard, specifically the
  51  * <i>UnicodeData</i> file that is part of the Unicode Character Database.
  52  * This file specifies properties including name and category for every
  53  * assigned Unicode code point or character range. The file is available
  54  * from the Unicode Consortium at
  55  * <a href="http://www.unicode.org">http://www.unicode.org</a>.
  56  * <p>
  57  * Character information is based on the Unicode Standard, version 12.1.
  58  *
  59  * <h2><a id="unicode">Unicode Character Representations</a></h2>
  60  *
  61  * <p>The {@code char} data type (and therefore the value that a
  62  * {@code Character} object encapsulates) are based on the
  63  * original Unicode specification, which defined characters as
  64  * fixed-width 16-bit entities. The Unicode Standard has since been
  65  * changed to allow for characters whose representation requires more
  66  * than 16 bits.  The range of legal <em>code point</em>s is now
  67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  68  * (Refer to the <a
  69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  71  * Standard.)
  72  *
  73  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
  74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  75  * <a id="supplementary">Characters</a> whose code points are greater
  76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  77  * platform uses the UTF-16 representation in {@code char} arrays and
  78  * in the {@code String} and {@code StringBuffer} classes. In
  79  * this representation, supplementary characters are represented as a pair
  80  * of {@code char} values, the first from the <em>high-surrogates</em>
  81  * range, (\uD800-\uDBFF), the second from the
  82  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  83  *
  84  * <p>A {@code char} value, therefore, represents Basic
  85  * Multilingual Plane (BMP) code points, including the surrogate
  86  * code points, or code units of the UTF-16 encoding. An
  87  * {@code int} value represents all Unicode code points,
  88  * including supplementary code points. The lower (least significant)
  89  * 21 bits of {@code int} are used to represent Unicode code
  90  * points and the upper (most significant) 11 bits must be zero.
  91  * Unless otherwise specified, the behavior with respect to
  92  * supplementary characters and surrogate {@code char} values is
  93  * as follows:
  94  *
  95  * <ul>
  96  * <li>The methods that only accept a {@code char} value cannot support
  97  * supplementary characters. They treat {@code char} values from the
  98  * surrogate ranges as undefined characters. For example,
  99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 100  * this specific value if followed by any low-surrogate value in a string
 101  * would represent a letter.
 102  *
 103  * <li>The methods that accept an {@code int} value support all
 104  * Unicode characters, including supplementary characters. For
 105  * example, {@code Character.isLetter(0x2F81A)} returns
 106  * {@code true} because the code point value represents a letter
 107  * (a CJK ideograph).
 108  * </ul>
 109  *
 110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 111  * used for character values in the range between U+0000 and U+10FFFF,
 112  * and <em>Unicode code unit</em> is used for 16-bit
 113  * {@code char} values that are code units of the <em>UTF-16</em>
 114  * encoding. For more information on Unicode terminology, refer to the
 115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 116  *
 117  * @author  Lee Boynton
 118  * @author  Guy Steele
 119  * @author  Akira Tanaka
 120  * @author  Martin Buchholz
 121  * @author  Ulf Zibis
 122  * @since   1.0
 123  */
 124 public final
 125 class Character implements java.io.Serializable, Comparable<Character> {
 126     /**
 127      * The minimum radix available for conversion to and from strings.
 128      * The constant value of this field is the smallest value permitted
 129      * for the radix argument in radix-conversion methods such as the
 130      * {@code digit} method, the {@code forDigit} method, and the
 131      * {@code toString} method of class {@code Integer}.
 132      *
 133      * @see     Character#digit(char, int)
 134      * @see     Character#forDigit(int, int)
 135      * @see     Integer#toString(int, int)
 136      * @see     Integer#valueOf(String)
 137      */
 138     public static final int MIN_RADIX = 2;
 139 
 140     /**
 141      * The maximum radix available for conversion to and from strings.
 142      * The constant value of this field is the largest value permitted
 143      * for the radix argument in radix-conversion methods such as the
 144      * {@code digit} method, the {@code forDigit} method, and the
 145      * {@code toString} method of class {@code Integer}.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * {@code char}, {@code '\u005Cu0000'}.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * {@code char}, {@code '\u005CuFFFF'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The {@code Class} instance representing the primitive type
 172      * {@code char}.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined {@code char}
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * Weak bidirectional character type "LRI" in the Unicode specification.
 496      * @since 9
 497      */
 498     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 499 
 500     /**
 501      * Weak bidirectional character type "RLI" in the Unicode specification.
 502      * @since 9
 503      */
 504     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 505 
 506     /**
 507      * Weak bidirectional character type "FSI" in the Unicode specification.
 508      * @since 9
 509      */
 510     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 511 
 512     /**
 513      * Weak bidirectional character type "PDI" in the Unicode specification.
 514      * @since 9
 515      */
 516     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 517 
 518     /**
 519      * The minimum value of a
 520      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 521      * Unicode high-surrogate code unit</a>
 522      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 523      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a
 531      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 532      * Unicode high-surrogate code unit</a>
 533      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 534      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 535      *
 536      * @since 1.5
 537      */
 538     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 539 
 540     /**
 541      * The minimum value of a
 542      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 543      * Unicode low-surrogate code unit</a>
 544      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 545      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 546      *
 547      * @since 1.5
 548      */
 549     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 550 
 551     /**
 552      * The maximum value of a
 553      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 554      * Unicode low-surrogate code unit</a>
 555      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 556      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 557      *
 558      * @since 1.5
 559      */
 560     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 561 
 562     /**
 563      * The minimum value of a Unicode surrogate code unit in the
 564      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 569 
 570     /**
 571      * The maximum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 577 
 578     /**
 579      * The minimum value of a
 580      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 581      * Unicode supplementary code point</a>, constant {@code U+10000}.
 582      *
 583      * @since 1.5
 584      */
 585     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 586 
 587     /**
 588      * The minimum value of a
 589      * <a href="http://www.unicode.org/glossary/#code_point">
 590      * Unicode code point</a>, constant {@code U+0000}.
 591      *
 592      * @since 1.5
 593      */
 594     public static final int MIN_CODE_POINT = 0x000000;
 595 
 596     /**
 597      * The maximum value of a
 598      * <a href="http://www.unicode.org/glossary/#code_point">
 599      * Unicode code point</a>, constant {@code U+10FFFF}.
 600      *
 601      * @since 1.5
 602      */
 603     public static final int MAX_CODE_POINT = 0X10FFFF;
 604 
 605 
 606     /**
 607      * Instances of this class represent particular subsets of the Unicode
 608      * character set.  The only family of subsets defined in the
 609      * {@code Character} class is {@link Character.UnicodeBlock}.
 610      * Other portions of the Java API may define other subsets for their
 611      * own purposes.
 612      *
 613      * @since 1.2
 614      */
 615     public static class Subset  {
 616 
 617         private String name;
 618 
 619         /**
 620          * Constructs a new {@code Subset} instance.
 621          *
 622          * @param  name  The name of this subset
 623          * @throws NullPointerException if name is {@code null}
 624          */
 625         protected Subset(String name) {
 626             if (name == null) {
 627                 throw new NullPointerException("name");
 628             }
 629             this.name = name;
 630         }
 631 
 632         /**
 633          * Compares two {@code Subset} objects for equality.
 634          * This method returns {@code true} if and only if
 635          * {@code this} and the argument refer to the same
 636          * object; since this method is {@code final}, this
 637          * guarantee holds for all subclasses.
 638          */
 639         public final boolean equals(Object obj) {
 640             return (this == obj);
 641         }
 642 
 643         /**
 644          * Returns the standard hash code as defined by the
 645          * {@link Object#hashCode} method.  This method
 646          * is {@code final} in order to ensure that the
 647          * {@code equals} and {@code hashCode} methods will
 648          * be consistent in all subclasses.
 649          */
 650         public final int hashCode() {
 651             return super.hashCode();
 652         }
 653 
 654         /**
 655          * Returns the name of this subset.
 656          */
 657         public final String toString() {
 658             return name;
 659         }
 660     }
 661 
 662     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 663     // for the latest specification of Unicode Blocks.
 664 
 665     /**
 666      * A family of character subsets representing the character blocks in the
 667      * Unicode specification. Character blocks generally define characters
 668      * used for a specific script or purpose. A character is contained by
 669      * at most one Unicode block.
 670      *
 671      * @since 1.2
 672      */
 673     public static final class UnicodeBlock extends Subset {
 674         /**
 675          * 676 - the expected number of entities
 676          * 0.75 - the default load factor of HashMap
 677          */
 678         private static final int NUM_ENTITIES = 676;
 679         private static Map<String, UnicodeBlock> map =
 680                 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f));
 681 
 682         /**
 683          * Creates a UnicodeBlock with the given identifier name.
 684          * This name must be the same as the block identifier.
 685          */
 686         private UnicodeBlock(String idName) {
 687             super(idName);
 688             map.put(idName, this);
 689         }
 690 
 691         /**
 692          * Creates a UnicodeBlock with the given identifier name and
 693          * alias name.
 694          */
 695         private UnicodeBlock(String idName, String alias) {
 696             this(idName);
 697             map.put(alias, this);
 698         }
 699 
 700         /**
 701          * Creates a UnicodeBlock with the given identifier name and
 702          * alias names.
 703          */
 704         private UnicodeBlock(String idName, String... aliases) {
 705             this(idName);
 706             for (String alias : aliases)
 707                 map.put(alias, this);
 708         }
 709 
 710         /**
 711          * Constant for the "Basic Latin" Unicode character block.
 712          * @since 1.2
 713          */
 714         public static final UnicodeBlock  BASIC_LATIN =
 715             new UnicodeBlock("BASIC_LATIN",
 716                              "BASIC LATIN",
 717                              "BASICLATIN");
 718 
 719         /**
 720          * Constant for the "Latin-1 Supplement" Unicode character block.
 721          * @since 1.2
 722          */
 723         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 724             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 725                              "LATIN-1 SUPPLEMENT",
 726                              "LATIN-1SUPPLEMENT");
 727 
 728         /**
 729          * Constant for the "Latin Extended-A" Unicode character block.
 730          * @since 1.2
 731          */
 732         public static final UnicodeBlock LATIN_EXTENDED_A =
 733             new UnicodeBlock("LATIN_EXTENDED_A",
 734                              "LATIN EXTENDED-A",
 735                              "LATINEXTENDED-A");
 736 
 737         /**
 738          * Constant for the "Latin Extended-B" Unicode character block.
 739          * @since 1.2
 740          */
 741         public static final UnicodeBlock LATIN_EXTENDED_B =
 742             new UnicodeBlock("LATIN_EXTENDED_B",
 743                              "LATIN EXTENDED-B",
 744                              "LATINEXTENDED-B");
 745 
 746         /**
 747          * Constant for the "IPA Extensions" Unicode character block.
 748          * @since 1.2
 749          */
 750         public static final UnicodeBlock IPA_EXTENSIONS =
 751             new UnicodeBlock("IPA_EXTENSIONS",
 752                              "IPA EXTENSIONS",
 753                              "IPAEXTENSIONS");
 754 
 755         /**
 756          * Constant for the "Spacing Modifier Letters" Unicode character block.
 757          * @since 1.2
 758          */
 759         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 760             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 761                              "SPACING MODIFIER LETTERS",
 762                              "SPACINGMODIFIERLETTERS");
 763 
 764         /**
 765          * Constant for the "Combining Diacritical Marks" Unicode character block.
 766          * @since 1.2
 767          */
 768         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 769             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 770                              "COMBINING DIACRITICAL MARKS",
 771                              "COMBININGDIACRITICALMARKS");
 772 
 773         /**
 774          * Constant for the "Greek and Coptic" Unicode character block.
 775          * <p>
 776          * This block was previously known as the "Greek" block.
 777          *
 778          * @since 1.2
 779          */
 780         public static final UnicodeBlock GREEK =
 781             new UnicodeBlock("GREEK",
 782                              "GREEK AND COPTIC",
 783                              "GREEKANDCOPTIC");
 784 
 785         /**
 786          * Constant for the "Cyrillic" Unicode character block.
 787          * @since 1.2
 788          */
 789         public static final UnicodeBlock CYRILLIC =
 790             new UnicodeBlock("CYRILLIC");
 791 
 792         /**
 793          * Constant for the "Armenian" Unicode character block.
 794          * @since 1.2
 795          */
 796         public static final UnicodeBlock ARMENIAN =
 797             new UnicodeBlock("ARMENIAN");
 798 
 799         /**
 800          * Constant for the "Hebrew" Unicode character block.
 801          * @since 1.2
 802          */
 803         public static final UnicodeBlock HEBREW =
 804             new UnicodeBlock("HEBREW");
 805 
 806         /**
 807          * Constant for the "Arabic" Unicode character block.
 808          * @since 1.2
 809          */
 810         public static final UnicodeBlock ARABIC =
 811             new UnicodeBlock("ARABIC");
 812 
 813         /**
 814          * Constant for the "Devanagari" Unicode character block.
 815          * @since 1.2
 816          */
 817         public static final UnicodeBlock DEVANAGARI =
 818             new UnicodeBlock("DEVANAGARI");
 819 
 820         /**
 821          * Constant for the "Bengali" Unicode character block.
 822          * @since 1.2
 823          */
 824         public static final UnicodeBlock BENGALI =
 825             new UnicodeBlock("BENGALI");
 826 
 827         /**
 828          * Constant for the "Gurmukhi" Unicode character block.
 829          * @since 1.2
 830          */
 831         public static final UnicodeBlock GURMUKHI =
 832             new UnicodeBlock("GURMUKHI");
 833 
 834         /**
 835          * Constant for the "Gujarati" Unicode character block.
 836          * @since 1.2
 837          */
 838         public static final UnicodeBlock GUJARATI =
 839             new UnicodeBlock("GUJARATI");
 840 
 841         /**
 842          * Constant for the "Oriya" Unicode character block.
 843          * @since 1.2
 844          */
 845         public static final UnicodeBlock ORIYA =
 846             new UnicodeBlock("ORIYA");
 847 
 848         /**
 849          * Constant for the "Tamil" Unicode character block.
 850          * @since 1.2
 851          */
 852         public static final UnicodeBlock TAMIL =
 853             new UnicodeBlock("TAMIL");
 854 
 855         /**
 856          * Constant for the "Telugu" Unicode character block.
 857          * @since 1.2
 858          */
 859         public static final UnicodeBlock TELUGU =
 860             new UnicodeBlock("TELUGU");
 861 
 862         /**
 863          * Constant for the "Kannada" Unicode character block.
 864          * @since 1.2
 865          */
 866         public static final UnicodeBlock KANNADA =
 867             new UnicodeBlock("KANNADA");
 868 
 869         /**
 870          * Constant for the "Malayalam" Unicode character block.
 871          * @since 1.2
 872          */
 873         public static final UnicodeBlock MALAYALAM =
 874             new UnicodeBlock("MALAYALAM");
 875 
 876         /**
 877          * Constant for the "Thai" Unicode character block.
 878          * @since 1.2
 879          */
 880         public static final UnicodeBlock THAI =
 881             new UnicodeBlock("THAI");
 882 
 883         /**
 884          * Constant for the "Lao" Unicode character block.
 885          * @since 1.2
 886          */
 887         public static final UnicodeBlock LAO =
 888             new UnicodeBlock("LAO");
 889 
 890         /**
 891          * Constant for the "Tibetan" Unicode character block.
 892          * @since 1.2
 893          */
 894         public static final UnicodeBlock TIBETAN =
 895             new UnicodeBlock("TIBETAN");
 896 
 897         /**
 898          * Constant for the "Georgian" Unicode character block.
 899          * @since 1.2
 900          */
 901         public static final UnicodeBlock GEORGIAN =
 902             new UnicodeBlock("GEORGIAN");
 903 
 904         /**
 905          * Constant for the "Hangul Jamo" Unicode character block.
 906          * @since 1.2
 907          */
 908         public static final UnicodeBlock HANGUL_JAMO =
 909             new UnicodeBlock("HANGUL_JAMO",
 910                              "HANGUL JAMO",
 911                              "HANGULJAMO");
 912 
 913         /**
 914          * Constant for the "Latin Extended Additional" Unicode character block.
 915          * @since 1.2
 916          */
 917         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 918             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 919                              "LATIN EXTENDED ADDITIONAL",
 920                              "LATINEXTENDEDADDITIONAL");
 921 
 922         /**
 923          * Constant for the "Greek Extended" Unicode character block.
 924          * @since 1.2
 925          */
 926         public static final UnicodeBlock GREEK_EXTENDED =
 927             new UnicodeBlock("GREEK_EXTENDED",
 928                              "GREEK EXTENDED",
 929                              "GREEKEXTENDED");
 930 
 931         /**
 932          * Constant for the "General Punctuation" Unicode character block.
 933          * @since 1.2
 934          */
 935         public static final UnicodeBlock GENERAL_PUNCTUATION =
 936             new UnicodeBlock("GENERAL_PUNCTUATION",
 937                              "GENERAL PUNCTUATION",
 938                              "GENERALPUNCTUATION");
 939 
 940         /**
 941          * Constant for the "Superscripts and Subscripts" Unicode character
 942          * block.
 943          * @since 1.2
 944          */
 945         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 946             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 947                              "SUPERSCRIPTS AND SUBSCRIPTS",
 948                              "SUPERSCRIPTSANDSUBSCRIPTS");
 949 
 950         /**
 951          * Constant for the "Currency Symbols" Unicode character block.
 952          * @since 1.2
 953          */
 954         public static final UnicodeBlock CURRENCY_SYMBOLS =
 955             new UnicodeBlock("CURRENCY_SYMBOLS",
 956                              "CURRENCY SYMBOLS",
 957                              "CURRENCYSYMBOLS");
 958 
 959         /**
 960          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 961          * character block.
 962          * <p>
 963          * This block was previously known as "Combining Marks for Symbols".
 964          * @since 1.2
 965          */
 966         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 967             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 968                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 969                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 970                              "COMBINING MARKS FOR SYMBOLS",
 971                              "COMBININGMARKSFORSYMBOLS");
 972 
 973         /**
 974          * Constant for the "Letterlike Symbols" Unicode character block.
 975          * @since 1.2
 976          */
 977         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 978             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 979                              "LETTERLIKE SYMBOLS",
 980                              "LETTERLIKESYMBOLS");
 981 
 982         /**
 983          * Constant for the "Number Forms" Unicode character block.
 984          * @since 1.2
 985          */
 986         public static final UnicodeBlock NUMBER_FORMS =
 987             new UnicodeBlock("NUMBER_FORMS",
 988                              "NUMBER FORMS",
 989                              "NUMBERFORMS");
 990 
 991         /**
 992          * Constant for the "Arrows" Unicode character block.
 993          * @since 1.2
 994          */
 995         public static final UnicodeBlock ARROWS =
 996             new UnicodeBlock("ARROWS");
 997 
 998         /**
 999          * Constant for the "Mathematical Operators" Unicode character block.
1000          * @since 1.2
1001          */
1002         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1003             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1004                              "MATHEMATICAL OPERATORS",
1005                              "MATHEMATICALOPERATORS");
1006 
1007         /**
1008          * Constant for the "Miscellaneous Technical" Unicode character block.
1009          * @since 1.2
1010          */
1011         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1012             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1013                              "MISCELLANEOUS TECHNICAL",
1014                              "MISCELLANEOUSTECHNICAL");
1015 
1016         /**
1017          * Constant for the "Control Pictures" Unicode character block.
1018          * @since 1.2
1019          */
1020         public static final UnicodeBlock CONTROL_PICTURES =
1021             new UnicodeBlock("CONTROL_PICTURES",
1022                              "CONTROL PICTURES",
1023                              "CONTROLPICTURES");
1024 
1025         /**
1026          * Constant for the "Optical Character Recognition" Unicode character block.
1027          * @since 1.2
1028          */
1029         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1030             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1031                              "OPTICAL CHARACTER RECOGNITION",
1032                              "OPTICALCHARACTERRECOGNITION");
1033 
1034         /**
1035          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1036          * @since 1.2
1037          */
1038         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1039             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1040                              "ENCLOSED ALPHANUMERICS",
1041                              "ENCLOSEDALPHANUMERICS");
1042 
1043         /**
1044          * Constant for the "Box Drawing" Unicode character block.
1045          * @since 1.2
1046          */
1047         public static final UnicodeBlock BOX_DRAWING =
1048             new UnicodeBlock("BOX_DRAWING",
1049                              "BOX DRAWING",
1050                              "BOXDRAWING");
1051 
1052         /**
1053          * Constant for the "Block Elements" Unicode character block.
1054          * @since 1.2
1055          */
1056         public static final UnicodeBlock BLOCK_ELEMENTS =
1057             new UnicodeBlock("BLOCK_ELEMENTS",
1058                              "BLOCK ELEMENTS",
1059                              "BLOCKELEMENTS");
1060 
1061         /**
1062          * Constant for the "Geometric Shapes" Unicode character block.
1063          * @since 1.2
1064          */
1065         public static final UnicodeBlock GEOMETRIC_SHAPES =
1066             new UnicodeBlock("GEOMETRIC_SHAPES",
1067                              "GEOMETRIC SHAPES",
1068                              "GEOMETRICSHAPES");
1069 
1070         /**
1071          * Constant for the "Miscellaneous Symbols" Unicode character block.
1072          * @since 1.2
1073          */
1074         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1075             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1076                              "MISCELLANEOUS SYMBOLS",
1077                              "MISCELLANEOUSSYMBOLS");
1078 
1079         /**
1080          * Constant for the "Dingbats" Unicode character block.
1081          * @since 1.2
1082          */
1083         public static final UnicodeBlock DINGBATS =
1084             new UnicodeBlock("DINGBATS");
1085 
1086         /**
1087          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1088          * @since 1.2
1089          */
1090         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1091             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1092                              "CJK SYMBOLS AND PUNCTUATION",
1093                              "CJKSYMBOLSANDPUNCTUATION");
1094 
1095         /**
1096          * Constant for the "Hiragana" Unicode character block.
1097          * @since 1.2
1098          */
1099         public static final UnicodeBlock HIRAGANA =
1100             new UnicodeBlock("HIRAGANA");
1101 
1102         /**
1103          * Constant for the "Katakana" Unicode character block.
1104          * @since 1.2
1105          */
1106         public static final UnicodeBlock KATAKANA =
1107             new UnicodeBlock("KATAKANA");
1108 
1109         /**
1110          * Constant for the "Bopomofo" Unicode character block.
1111          * @since 1.2
1112          */
1113         public static final UnicodeBlock BOPOMOFO =
1114             new UnicodeBlock("BOPOMOFO");
1115 
1116         /**
1117          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1118          * @since 1.2
1119          */
1120         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1121             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1122                              "HANGUL COMPATIBILITY JAMO",
1123                              "HANGULCOMPATIBILITYJAMO");
1124 
1125         /**
1126          * Constant for the "Kanbun" Unicode character block.
1127          * @since 1.2
1128          */
1129         public static final UnicodeBlock KANBUN =
1130             new UnicodeBlock("KANBUN");
1131 
1132         /**
1133          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1134          * @since 1.2
1135          */
1136         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1137             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1138                              "ENCLOSED CJK LETTERS AND MONTHS",
1139                              "ENCLOSEDCJKLETTERSANDMONTHS");
1140 
1141         /**
1142          * Constant for the "CJK Compatibility" Unicode character block.
1143          * @since 1.2
1144          */
1145         public static final UnicodeBlock CJK_COMPATIBILITY =
1146             new UnicodeBlock("CJK_COMPATIBILITY",
1147                              "CJK COMPATIBILITY",
1148                              "CJKCOMPATIBILITY");
1149 
1150         /**
1151          * Constant for the "CJK Unified Ideographs" Unicode character block.
1152          * @since 1.2
1153          */
1154         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1155             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1156                              "CJK UNIFIED IDEOGRAPHS",
1157                              "CJKUNIFIEDIDEOGRAPHS");
1158 
1159         /**
1160          * Constant for the "Hangul Syllables" Unicode character block.
1161          * @since 1.2
1162          */
1163         public static final UnicodeBlock HANGUL_SYLLABLES =
1164             new UnicodeBlock("HANGUL_SYLLABLES",
1165                              "HANGUL SYLLABLES",
1166                              "HANGULSYLLABLES");
1167 
1168         /**
1169          * Constant for the "Private Use Area" Unicode character block.
1170          * @since 1.2
1171          */
1172         public static final UnicodeBlock PRIVATE_USE_AREA =
1173             new UnicodeBlock("PRIVATE_USE_AREA",
1174                              "PRIVATE USE AREA",
1175                              "PRIVATEUSEAREA");
1176 
1177         /**
1178          * Constant for the "CJK Compatibility Ideographs" Unicode character
1179          * block.
1180          * @since 1.2
1181          */
1182         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1183             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1184                              "CJK COMPATIBILITY IDEOGRAPHS",
1185                              "CJKCOMPATIBILITYIDEOGRAPHS");
1186 
1187         /**
1188          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1189          * @since 1.2
1190          */
1191         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1192             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1193                              "ALPHABETIC PRESENTATION FORMS",
1194                              "ALPHABETICPRESENTATIONFORMS");
1195 
1196         /**
1197          * Constant for the "Arabic Presentation Forms-A" Unicode character
1198          * block.
1199          * @since 1.2
1200          */
1201         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1202             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1203                              "ARABIC PRESENTATION FORMS-A",
1204                              "ARABICPRESENTATIONFORMS-A");
1205 
1206         /**
1207          * Constant for the "Combining Half Marks" Unicode character block.
1208          * @since 1.2
1209          */
1210         public static final UnicodeBlock COMBINING_HALF_MARKS =
1211             new UnicodeBlock("COMBINING_HALF_MARKS",
1212                              "COMBINING HALF MARKS",
1213                              "COMBININGHALFMARKS");
1214 
1215         /**
1216          * Constant for the "CJK Compatibility Forms" Unicode character block.
1217          * @since 1.2
1218          */
1219         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1220             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1221                              "CJK COMPATIBILITY FORMS",
1222                              "CJKCOMPATIBILITYFORMS");
1223 
1224         /**
1225          * Constant for the "Small Form Variants" Unicode character block.
1226          * @since 1.2
1227          */
1228         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1229             new UnicodeBlock("SMALL_FORM_VARIANTS",
1230                              "SMALL FORM VARIANTS",
1231                              "SMALLFORMVARIANTS");
1232 
1233         /**
1234          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1235          * @since 1.2
1236          */
1237         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1238             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1239                              "ARABIC PRESENTATION FORMS-B",
1240                              "ARABICPRESENTATIONFORMS-B");
1241 
1242         /**
1243          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1244          * block.
1245          * @since 1.2
1246          */
1247         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1248             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1249                              "HALFWIDTH AND FULLWIDTH FORMS",
1250                              "HALFWIDTHANDFULLWIDTHFORMS");
1251 
1252         /**
1253          * Constant for the "Specials" Unicode character block.
1254          * @since 1.2
1255          */
1256         public static final UnicodeBlock SPECIALS =
1257             new UnicodeBlock("SPECIALS");
1258 
1259         /**
1260          * @deprecated
1261          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1262          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1263          * These constants match the block definitions of the Unicode Standard.
1264          * The {@link #of(char)} and {@link #of(int)} methods return the
1265          * standard constants.
1266          */
1267         @Deprecated(since="1.5")
1268         public static final UnicodeBlock SURROGATES_AREA =
1269             new UnicodeBlock("SURROGATES_AREA");
1270 
1271         /**
1272          * Constant for the "Syriac" Unicode character block.
1273          * @since 1.4
1274          */
1275         public static final UnicodeBlock SYRIAC =
1276             new UnicodeBlock("SYRIAC");
1277 
1278         /**
1279          * Constant for the "Thaana" Unicode character block.
1280          * @since 1.4
1281          */
1282         public static final UnicodeBlock THAANA =
1283             new UnicodeBlock("THAANA");
1284 
1285         /**
1286          * Constant for the "Sinhala" Unicode character block.
1287          * @since 1.4
1288          */
1289         public static final UnicodeBlock SINHALA =
1290             new UnicodeBlock("SINHALA");
1291 
1292         /**
1293          * Constant for the "Myanmar" Unicode character block.
1294          * @since 1.4
1295          */
1296         public static final UnicodeBlock MYANMAR =
1297             new UnicodeBlock("MYANMAR");
1298 
1299         /**
1300          * Constant for the "Ethiopic" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock ETHIOPIC =
1304             new UnicodeBlock("ETHIOPIC");
1305 
1306         /**
1307          * Constant for the "Cherokee" Unicode character block.
1308          * @since 1.4
1309          */
1310         public static final UnicodeBlock CHEROKEE =
1311             new UnicodeBlock("CHEROKEE");
1312 
1313         /**
1314          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1315          * @since 1.4
1316          */
1317         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1318             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1319                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1320                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1321 
1322         /**
1323          * Constant for the "Ogham" Unicode character block.
1324          * @since 1.4
1325          */
1326         public static final UnicodeBlock OGHAM =
1327             new UnicodeBlock("OGHAM");
1328 
1329         /**
1330          * Constant for the "Runic" Unicode character block.
1331          * @since 1.4
1332          */
1333         public static final UnicodeBlock RUNIC =
1334             new UnicodeBlock("RUNIC");
1335 
1336         /**
1337          * Constant for the "Khmer" Unicode character block.
1338          * @since 1.4
1339          */
1340         public static final UnicodeBlock KHMER =
1341             new UnicodeBlock("KHMER");
1342 
1343         /**
1344          * Constant for the "Mongolian" Unicode character block.
1345          * @since 1.4
1346          */
1347         public static final UnicodeBlock MONGOLIAN =
1348             new UnicodeBlock("MONGOLIAN");
1349 
1350         /**
1351          * Constant for the "Braille Patterns" Unicode character block.
1352          * @since 1.4
1353          */
1354         public static final UnicodeBlock BRAILLE_PATTERNS =
1355             new UnicodeBlock("BRAILLE_PATTERNS",
1356                              "BRAILLE PATTERNS",
1357                              "BRAILLEPATTERNS");
1358 
1359         /**
1360          * Constant for the "CJK Radicals Supplement" Unicode character block.
1361          * @since 1.4
1362          */
1363         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1364             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1365                              "CJK RADICALS SUPPLEMENT",
1366                              "CJKRADICALSSUPPLEMENT");
1367 
1368         /**
1369          * Constant for the "Kangxi Radicals" Unicode character block.
1370          * @since 1.4
1371          */
1372         public static final UnicodeBlock KANGXI_RADICALS =
1373             new UnicodeBlock("KANGXI_RADICALS",
1374                              "KANGXI RADICALS",
1375                              "KANGXIRADICALS");
1376 
1377         /**
1378          * Constant for the "Ideographic Description Characters" Unicode character block.
1379          * @since 1.4
1380          */
1381         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1382             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1383                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1384                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1385 
1386         /**
1387          * Constant for the "Bopomofo Extended" Unicode character block.
1388          * @since 1.4
1389          */
1390         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1391             new UnicodeBlock("BOPOMOFO_EXTENDED",
1392                              "BOPOMOFO EXTENDED",
1393                              "BOPOMOFOEXTENDED");
1394 
1395         /**
1396          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1397          * @since 1.4
1398          */
1399         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1400             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1401                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1402                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1403 
1404         /**
1405          * Constant for the "Yi Syllables" Unicode character block.
1406          * @since 1.4
1407          */
1408         public static final UnicodeBlock YI_SYLLABLES =
1409             new UnicodeBlock("YI_SYLLABLES",
1410                              "YI SYLLABLES",
1411                              "YISYLLABLES");
1412 
1413         /**
1414          * Constant for the "Yi Radicals" Unicode character block.
1415          * @since 1.4
1416          */
1417         public static final UnicodeBlock YI_RADICALS =
1418             new UnicodeBlock("YI_RADICALS",
1419                              "YI RADICALS",
1420                              "YIRADICALS");
1421 
1422         /**
1423          * Constant for the "Cyrillic Supplement" Unicode character block.
1424          * This block was previously known as the "Cyrillic Supplementary" block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1428             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1429                              "CYRILLIC SUPPLEMENTARY",
1430                              "CYRILLICSUPPLEMENTARY",
1431                              "CYRILLIC SUPPLEMENT",
1432                              "CYRILLICSUPPLEMENT");
1433 
1434         /**
1435          * Constant for the "Tagalog" Unicode character block.
1436          * @since 1.5
1437          */
1438         public static final UnicodeBlock TAGALOG =
1439             new UnicodeBlock("TAGALOG");
1440 
1441         /**
1442          * Constant for the "Hanunoo" Unicode character block.
1443          * @since 1.5
1444          */
1445         public static final UnicodeBlock HANUNOO =
1446             new UnicodeBlock("HANUNOO");
1447 
1448         /**
1449          * Constant for the "Buhid" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock BUHID =
1453             new UnicodeBlock("BUHID");
1454 
1455         /**
1456          * Constant for the "Tagbanwa" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock TAGBANWA =
1460             new UnicodeBlock("TAGBANWA");
1461 
1462         /**
1463          * Constant for the "Limbu" Unicode character block.
1464          * @since 1.5
1465          */
1466         public static final UnicodeBlock LIMBU =
1467             new UnicodeBlock("LIMBU");
1468 
1469         /**
1470          * Constant for the "Tai Le" Unicode character block.
1471          * @since 1.5
1472          */
1473         public static final UnicodeBlock TAI_LE =
1474             new UnicodeBlock("TAI_LE",
1475                              "TAI LE",
1476                              "TAILE");
1477 
1478         /**
1479          * Constant for the "Khmer Symbols" Unicode character block.
1480          * @since 1.5
1481          */
1482         public static final UnicodeBlock KHMER_SYMBOLS =
1483             new UnicodeBlock("KHMER_SYMBOLS",
1484                              "KHMER SYMBOLS",
1485                              "KHMERSYMBOLS");
1486 
1487         /**
1488          * Constant for the "Phonetic Extensions" Unicode character block.
1489          * @since 1.5
1490          */
1491         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1492             new UnicodeBlock("PHONETIC_EXTENSIONS",
1493                              "PHONETIC EXTENSIONS",
1494                              "PHONETICEXTENSIONS");
1495 
1496         /**
1497          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1498          * @since 1.5
1499          */
1500         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1501             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1502                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1503                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1504 
1505         /**
1506          * Constant for the "Supplemental Arrows-A" Unicode character block.
1507          * @since 1.5
1508          */
1509         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1510             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1511                              "SUPPLEMENTAL ARROWS-A",
1512                              "SUPPLEMENTALARROWS-A");
1513 
1514         /**
1515          * Constant for the "Supplemental Arrows-B" Unicode character block.
1516          * @since 1.5
1517          */
1518         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1519             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1520                              "SUPPLEMENTAL ARROWS-B",
1521                              "SUPPLEMENTALARROWS-B");
1522 
1523         /**
1524          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1525          * character block.
1526          * @since 1.5
1527          */
1528         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1529             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1530                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1531                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1532 
1533         /**
1534          * Constant for the "Supplemental Mathematical Operators" Unicode
1535          * character block.
1536          * @since 1.5
1537          */
1538         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1539             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1540                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1541                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1542 
1543         /**
1544          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1545          * block.
1546          * @since 1.5
1547          */
1548         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1549             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1550                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1551                              "MISCELLANEOUSSYMBOLSANDARROWS");
1552 
1553         /**
1554          * Constant for the "Katakana Phonetic Extensions" Unicode character
1555          * block.
1556          * @since 1.5
1557          */
1558         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1559             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1560                              "KATAKANA PHONETIC EXTENSIONS",
1561                              "KATAKANAPHONETICEXTENSIONS");
1562 
1563         /**
1564          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1565          * @since 1.5
1566          */
1567         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1568             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1569                              "YIJING HEXAGRAM SYMBOLS",
1570                              "YIJINGHEXAGRAMSYMBOLS");
1571 
1572         /**
1573          * Constant for the "Variation Selectors" Unicode character block.
1574          * @since 1.5
1575          */
1576         public static final UnicodeBlock VARIATION_SELECTORS =
1577             new UnicodeBlock("VARIATION_SELECTORS",
1578                              "VARIATION SELECTORS",
1579                              "VARIATIONSELECTORS");
1580 
1581         /**
1582          * Constant for the "Linear B Syllabary" Unicode character block.
1583          * @since 1.5
1584          */
1585         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1586             new UnicodeBlock("LINEAR_B_SYLLABARY",
1587                              "LINEAR B SYLLABARY",
1588                              "LINEARBSYLLABARY");
1589 
1590         /**
1591          * Constant for the "Linear B Ideograms" Unicode character block.
1592          * @since 1.5
1593          */
1594         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1595             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1596                              "LINEAR B IDEOGRAMS",
1597                              "LINEARBIDEOGRAMS");
1598 
1599         /**
1600          * Constant for the "Aegean Numbers" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock AEGEAN_NUMBERS =
1604             new UnicodeBlock("AEGEAN_NUMBERS",
1605                              "AEGEAN NUMBERS",
1606                              "AEGEANNUMBERS");
1607 
1608         /**
1609          * Constant for the "Old Italic" Unicode character block.
1610          * @since 1.5
1611          */
1612         public static final UnicodeBlock OLD_ITALIC =
1613             new UnicodeBlock("OLD_ITALIC",
1614                              "OLD ITALIC",
1615                              "OLDITALIC");
1616 
1617         /**
1618          * Constant for the "Gothic" Unicode character block.
1619          * @since 1.5
1620          */
1621         public static final UnicodeBlock GOTHIC =
1622             new UnicodeBlock("GOTHIC");
1623 
1624         /**
1625          * Constant for the "Ugaritic" Unicode character block.
1626          * @since 1.5
1627          */
1628         public static final UnicodeBlock UGARITIC =
1629             new UnicodeBlock("UGARITIC");
1630 
1631         /**
1632          * Constant for the "Deseret" Unicode character block.
1633          * @since 1.5
1634          */
1635         public static final UnicodeBlock DESERET =
1636             new UnicodeBlock("DESERET");
1637 
1638         /**
1639          * Constant for the "Shavian" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock SHAVIAN =
1643             new UnicodeBlock("SHAVIAN");
1644 
1645         /**
1646          * Constant for the "Osmanya" Unicode character block.
1647          * @since 1.5
1648          */
1649         public static final UnicodeBlock OSMANYA =
1650             new UnicodeBlock("OSMANYA");
1651 
1652         /**
1653          * Constant for the "Cypriot Syllabary" Unicode character block.
1654          * @since 1.5
1655          */
1656         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1657             new UnicodeBlock("CYPRIOT_SYLLABARY",
1658                              "CYPRIOT SYLLABARY",
1659                              "CYPRIOTSYLLABARY");
1660 
1661         /**
1662          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1663          * @since 1.5
1664          */
1665         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1666             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1667                              "BYZANTINE MUSICAL SYMBOLS",
1668                              "BYZANTINEMUSICALSYMBOLS");
1669 
1670         /**
1671          * Constant for the "Musical Symbols" Unicode character block.
1672          * @since 1.5
1673          */
1674         public static final UnicodeBlock MUSICAL_SYMBOLS =
1675             new UnicodeBlock("MUSICAL_SYMBOLS",
1676                              "MUSICAL SYMBOLS",
1677                              "MUSICALSYMBOLS");
1678 
1679         /**
1680          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1681          * @since 1.5
1682          */
1683         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1684             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1685                              "TAI XUAN JING SYMBOLS",
1686                              "TAIXUANJINGSYMBOLS");
1687 
1688         /**
1689          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1690          * character block.
1691          * @since 1.5
1692          */
1693         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1694             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1695                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1696                              "MATHEMATICALALPHANUMERICSYMBOLS");
1697 
1698         /**
1699          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1700          * character block.
1701          * @since 1.5
1702          */
1703         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1704             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1705                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1706                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1707 
1708         /**
1709          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1710          * @since 1.5
1711          */
1712         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1713             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1714                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1715                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1716 
1717         /**
1718          * Constant for the "Tags" Unicode character block.
1719          * @since 1.5
1720          */
1721         public static final UnicodeBlock TAGS =
1722             new UnicodeBlock("TAGS");
1723 
1724         /**
1725          * Constant for the "Variation Selectors Supplement" Unicode character
1726          * block.
1727          * @since 1.5
1728          */
1729         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1730             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1731                              "VARIATION SELECTORS SUPPLEMENT",
1732                              "VARIATIONSELECTORSSUPPLEMENT");
1733 
1734         /**
1735          * Constant for the "Supplementary Private Use Area-A" Unicode character
1736          * block.
1737          * @since 1.5
1738          */
1739         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1740             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1741                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1742                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1743 
1744         /**
1745          * Constant for the "Supplementary Private Use Area-B" Unicode character
1746          * block.
1747          * @since 1.5
1748          */
1749         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1750             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1751                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1752                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1753 
1754         /**
1755          * Constant for the "High Surrogates" Unicode character block.
1756          * This block represents codepoint values in the high surrogate
1757          * range: U+D800 through U+DB7F
1758          *
1759          * @since 1.5
1760          */
1761         public static final UnicodeBlock HIGH_SURROGATES =
1762             new UnicodeBlock("HIGH_SURROGATES",
1763                              "HIGH SURROGATES",
1764                              "HIGHSURROGATES");
1765 
1766         /**
1767          * Constant for the "High Private Use Surrogates" Unicode character
1768          * block.
1769          * This block represents codepoint values in the private use high
1770          * surrogate range: U+DB80 through U+DBFF
1771          *
1772          * @since 1.5
1773          */
1774         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1775             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1776                              "HIGH PRIVATE USE SURROGATES",
1777                              "HIGHPRIVATEUSESURROGATES");
1778 
1779         /**
1780          * Constant for the "Low Surrogates" Unicode character block.
1781          * This block represents codepoint values in the low surrogate
1782          * range: U+DC00 through U+DFFF
1783          *
1784          * @since 1.5
1785          */
1786         public static final UnicodeBlock LOW_SURROGATES =
1787             new UnicodeBlock("LOW_SURROGATES",
1788                              "LOW SURROGATES",
1789                              "LOWSURROGATES");
1790 
1791         /**
1792          * Constant for the "Arabic Supplement" Unicode character block.
1793          * @since 1.7
1794          */
1795         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1796             new UnicodeBlock("ARABIC_SUPPLEMENT",
1797                              "ARABIC SUPPLEMENT",
1798                              "ARABICSUPPLEMENT");
1799 
1800         /**
1801          * Constant for the "NKo" Unicode character block.
1802          * @since 1.7
1803          */
1804         public static final UnicodeBlock NKO =
1805             new UnicodeBlock("NKO");
1806 
1807         /**
1808          * Constant for the "Samaritan" Unicode character block.
1809          * @since 1.7
1810          */
1811         public static final UnicodeBlock SAMARITAN =
1812             new UnicodeBlock("SAMARITAN");
1813 
1814         /**
1815          * Constant for the "Mandaic" Unicode character block.
1816          * @since 1.7
1817          */
1818         public static final UnicodeBlock MANDAIC =
1819             new UnicodeBlock("MANDAIC");
1820 
1821         /**
1822          * Constant for the "Ethiopic Supplement" Unicode character block.
1823          * @since 1.7
1824          */
1825         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1826             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1827                              "ETHIOPIC SUPPLEMENT",
1828                              "ETHIOPICSUPPLEMENT");
1829 
1830         /**
1831          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1832          * Unicode character block.
1833          * @since 1.7
1834          */
1835         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1836             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1837                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1838                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1839 
1840         /**
1841          * Constant for the "New Tai Lue" Unicode character block.
1842          * @since 1.7
1843          */
1844         public static final UnicodeBlock NEW_TAI_LUE =
1845             new UnicodeBlock("NEW_TAI_LUE",
1846                              "NEW TAI LUE",
1847                              "NEWTAILUE");
1848 
1849         /**
1850          * Constant for the "Buginese" Unicode character block.
1851          * @since 1.7
1852          */
1853         public static final UnicodeBlock BUGINESE =
1854             new UnicodeBlock("BUGINESE");
1855 
1856         /**
1857          * Constant for the "Tai Tham" Unicode character block.
1858          * @since 1.7
1859          */
1860         public static final UnicodeBlock TAI_THAM =
1861             new UnicodeBlock("TAI_THAM",
1862                              "TAI THAM",
1863                              "TAITHAM");
1864 
1865         /**
1866          * Constant for the "Balinese" Unicode character block.
1867          * @since 1.7
1868          */
1869         public static final UnicodeBlock BALINESE =
1870             new UnicodeBlock("BALINESE");
1871 
1872         /**
1873          * Constant for the "Sundanese" Unicode character block.
1874          * @since 1.7
1875          */
1876         public static final UnicodeBlock SUNDANESE =
1877             new UnicodeBlock("SUNDANESE");
1878 
1879         /**
1880          * Constant for the "Batak" Unicode character block.
1881          * @since 1.7
1882          */
1883         public static final UnicodeBlock BATAK =
1884             new UnicodeBlock("BATAK");
1885 
1886         /**
1887          * Constant for the "Lepcha" Unicode character block.
1888          * @since 1.7
1889          */
1890         public static final UnicodeBlock LEPCHA =
1891             new UnicodeBlock("LEPCHA");
1892 
1893         /**
1894          * Constant for the "Ol Chiki" Unicode character block.
1895          * @since 1.7
1896          */
1897         public static final UnicodeBlock OL_CHIKI =
1898             new UnicodeBlock("OL_CHIKI",
1899                              "OL CHIKI",
1900                              "OLCHIKI");
1901 
1902         /**
1903          * Constant for the "Vedic Extensions" Unicode character block.
1904          * @since 1.7
1905          */
1906         public static final UnicodeBlock VEDIC_EXTENSIONS =
1907             new UnicodeBlock("VEDIC_EXTENSIONS",
1908                              "VEDIC EXTENSIONS",
1909                              "VEDICEXTENSIONS");
1910 
1911         /**
1912          * Constant for the "Phonetic Extensions Supplement" Unicode character
1913          * block.
1914          * @since 1.7
1915          */
1916         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1917             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1918                              "PHONETIC EXTENSIONS SUPPLEMENT",
1919                              "PHONETICEXTENSIONSSUPPLEMENT");
1920 
1921         /**
1922          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1923          * character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1927             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1928                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1929                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1930 
1931         /**
1932          * Constant for the "Glagolitic" Unicode character block.
1933          * @since 1.7
1934          */
1935         public static final UnicodeBlock GLAGOLITIC =
1936             new UnicodeBlock("GLAGOLITIC");
1937 
1938         /**
1939          * Constant for the "Latin Extended-C" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock LATIN_EXTENDED_C =
1943             new UnicodeBlock("LATIN_EXTENDED_C",
1944                              "LATIN EXTENDED-C",
1945                              "LATINEXTENDED-C");
1946 
1947         /**
1948          * Constant for the "Coptic" Unicode character block.
1949          * @since 1.7
1950          */
1951         public static final UnicodeBlock COPTIC =
1952             new UnicodeBlock("COPTIC");
1953 
1954         /**
1955          * Constant for the "Georgian Supplement" Unicode character block.
1956          * @since 1.7
1957          */
1958         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1959             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1960                              "GEORGIAN SUPPLEMENT",
1961                              "GEORGIANSUPPLEMENT");
1962 
1963         /**
1964          * Constant for the "Tifinagh" Unicode character block.
1965          * @since 1.7
1966          */
1967         public static final UnicodeBlock TIFINAGH =
1968             new UnicodeBlock("TIFINAGH");
1969 
1970         /**
1971          * Constant for the "Ethiopic Extended" Unicode character block.
1972          * @since 1.7
1973          */
1974         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1975             new UnicodeBlock("ETHIOPIC_EXTENDED",
1976                              "ETHIOPIC EXTENDED",
1977                              "ETHIOPICEXTENDED");
1978 
1979         /**
1980          * Constant for the "Cyrillic Extended-A" Unicode character block.
1981          * @since 1.7
1982          */
1983         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1984             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1985                              "CYRILLIC EXTENDED-A",
1986                              "CYRILLICEXTENDED-A");
1987 
1988         /**
1989          * Constant for the "Supplemental Punctuation" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1993             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1994                              "SUPPLEMENTAL PUNCTUATION",
1995                              "SUPPLEMENTALPUNCTUATION");
1996 
1997         /**
1998          * Constant for the "CJK Strokes" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock CJK_STROKES =
2002             new UnicodeBlock("CJK_STROKES",
2003                              "CJK STROKES",
2004                              "CJKSTROKES");
2005 
2006         /**
2007          * Constant for the "Lisu" Unicode character block.
2008          * @since 1.7
2009          */
2010         public static final UnicodeBlock LISU =
2011             new UnicodeBlock("LISU");
2012 
2013         /**
2014          * Constant for the "Vai" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock VAI =
2018             new UnicodeBlock("VAI");
2019 
2020         /**
2021          * Constant for the "Cyrillic Extended-B" Unicode character block.
2022          * @since 1.7
2023          */
2024         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2025             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2026                              "CYRILLIC EXTENDED-B",
2027                              "CYRILLICEXTENDED-B");
2028 
2029         /**
2030          * Constant for the "Bamum" Unicode character block.
2031          * @since 1.7
2032          */
2033         public static final UnicodeBlock BAMUM =
2034             new UnicodeBlock("BAMUM");
2035 
2036         /**
2037          * Constant for the "Modifier Tone Letters" Unicode character block.
2038          * @since 1.7
2039          */
2040         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2041             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2042                              "MODIFIER TONE LETTERS",
2043                              "MODIFIERTONELETTERS");
2044 
2045         /**
2046          * Constant for the "Latin Extended-D" Unicode character block.
2047          * @since 1.7
2048          */
2049         public static final UnicodeBlock LATIN_EXTENDED_D =
2050             new UnicodeBlock("LATIN_EXTENDED_D",
2051                              "LATIN EXTENDED-D",
2052                              "LATINEXTENDED-D");
2053 
2054         /**
2055          * Constant for the "Syloti Nagri" Unicode character block.
2056          * @since 1.7
2057          */
2058         public static final UnicodeBlock SYLOTI_NAGRI =
2059             new UnicodeBlock("SYLOTI_NAGRI",
2060                              "SYLOTI NAGRI",
2061                              "SYLOTINAGRI");
2062 
2063         /**
2064          * Constant for the "Common Indic Number Forms" Unicode character block.
2065          * @since 1.7
2066          */
2067         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2068             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2069                              "COMMON INDIC NUMBER FORMS",
2070                              "COMMONINDICNUMBERFORMS");
2071 
2072         /**
2073          * Constant for the "Phags-pa" Unicode character block.
2074          * @since 1.7
2075          */
2076         public static final UnicodeBlock PHAGS_PA =
2077             new UnicodeBlock("PHAGS_PA",
2078                              "PHAGS-PA");
2079 
2080         /**
2081          * Constant for the "Saurashtra" Unicode character block.
2082          * @since 1.7
2083          */
2084         public static final UnicodeBlock SAURASHTRA =
2085             new UnicodeBlock("SAURASHTRA");
2086 
2087         /**
2088          * Constant for the "Devanagari Extended" Unicode character block.
2089          * @since 1.7
2090          */
2091         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2092             new UnicodeBlock("DEVANAGARI_EXTENDED",
2093                              "DEVANAGARI EXTENDED",
2094                              "DEVANAGARIEXTENDED");
2095 
2096         /**
2097          * Constant for the "Kayah Li" Unicode character block.
2098          * @since 1.7
2099          */
2100         public static final UnicodeBlock KAYAH_LI =
2101             new UnicodeBlock("KAYAH_LI",
2102                              "KAYAH LI",
2103                              "KAYAHLI");
2104 
2105         /**
2106          * Constant for the "Rejang" Unicode character block.
2107          * @since 1.7
2108          */
2109         public static final UnicodeBlock REJANG =
2110             new UnicodeBlock("REJANG");
2111 
2112         /**
2113          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2117             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2118                              "HANGUL JAMO EXTENDED-A",
2119                              "HANGULJAMOEXTENDED-A");
2120 
2121         /**
2122          * Constant for the "Javanese" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock JAVANESE =
2126             new UnicodeBlock("JAVANESE");
2127 
2128         /**
2129          * Constant for the "Cham" Unicode character block.
2130          * @since 1.7
2131          */
2132         public static final UnicodeBlock CHAM =
2133             new UnicodeBlock("CHAM");
2134 
2135         /**
2136          * Constant for the "Myanmar Extended-A" Unicode character block.
2137          * @since 1.7
2138          */
2139         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2140             new UnicodeBlock("MYANMAR_EXTENDED_A",
2141                              "MYANMAR EXTENDED-A",
2142                              "MYANMAREXTENDED-A");
2143 
2144         /**
2145          * Constant for the "Tai Viet" Unicode character block.
2146          * @since 1.7
2147          */
2148         public static final UnicodeBlock TAI_VIET =
2149             new UnicodeBlock("TAI_VIET",
2150                              "TAI VIET",
2151                              "TAIVIET");
2152 
2153         /**
2154          * Constant for the "Ethiopic Extended-A" Unicode character block.
2155          * @since 1.7
2156          */
2157         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2158             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2159                              "ETHIOPIC EXTENDED-A",
2160                              "ETHIOPICEXTENDED-A");
2161 
2162         /**
2163          * Constant for the "Meetei Mayek" Unicode character block.
2164          * @since 1.7
2165          */
2166         public static final UnicodeBlock MEETEI_MAYEK =
2167             new UnicodeBlock("MEETEI_MAYEK",
2168                              "MEETEI MAYEK",
2169                              "MEETEIMAYEK");
2170 
2171         /**
2172          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2173          * @since 1.7
2174          */
2175         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2176             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2177                              "HANGUL JAMO EXTENDED-B",
2178                              "HANGULJAMOEXTENDED-B");
2179 
2180         /**
2181          * Constant for the "Vertical Forms" Unicode character block.
2182          * @since 1.7
2183          */
2184         public static final UnicodeBlock VERTICAL_FORMS =
2185             new UnicodeBlock("VERTICAL_FORMS",
2186                              "VERTICAL FORMS",
2187                              "VERTICALFORMS");
2188 
2189         /**
2190          * Constant for the "Ancient Greek Numbers" Unicode character block.
2191          * @since 1.7
2192          */
2193         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2194             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2195                              "ANCIENT GREEK NUMBERS",
2196                              "ANCIENTGREEKNUMBERS");
2197 
2198         /**
2199          * Constant for the "Ancient Symbols" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock ANCIENT_SYMBOLS =
2203             new UnicodeBlock("ANCIENT_SYMBOLS",
2204                              "ANCIENT SYMBOLS",
2205                              "ANCIENTSYMBOLS");
2206 
2207         /**
2208          * Constant for the "Phaistos Disc" Unicode character block.
2209          * @since 1.7
2210          */
2211         public static final UnicodeBlock PHAISTOS_DISC =
2212             new UnicodeBlock("PHAISTOS_DISC",
2213                              "PHAISTOS DISC",
2214                              "PHAISTOSDISC");
2215 
2216         /**
2217          * Constant for the "Lycian" Unicode character block.
2218          * @since 1.7
2219          */
2220         public static final UnicodeBlock LYCIAN =
2221             new UnicodeBlock("LYCIAN");
2222 
2223         /**
2224          * Constant for the "Carian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock CARIAN =
2228             new UnicodeBlock("CARIAN");
2229 
2230         /**
2231          * Constant for the "Old Persian" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock OLD_PERSIAN =
2235             new UnicodeBlock("OLD_PERSIAN",
2236                              "OLD PERSIAN",
2237                              "OLDPERSIAN");
2238 
2239         /**
2240          * Constant for the "Imperial Aramaic" Unicode character block.
2241          * @since 1.7
2242          */
2243         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2244             new UnicodeBlock("IMPERIAL_ARAMAIC",
2245                              "IMPERIAL ARAMAIC",
2246                              "IMPERIALARAMAIC");
2247 
2248         /**
2249          * Constant for the "Phoenician" Unicode character block.
2250          * @since 1.7
2251          */
2252         public static final UnicodeBlock PHOENICIAN =
2253             new UnicodeBlock("PHOENICIAN");
2254 
2255         /**
2256          * Constant for the "Lydian" Unicode character block.
2257          * @since 1.7
2258          */
2259         public static final UnicodeBlock LYDIAN =
2260             new UnicodeBlock("LYDIAN");
2261 
2262         /**
2263          * Constant for the "Kharoshthi" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock KHAROSHTHI =
2267             new UnicodeBlock("KHAROSHTHI");
2268 
2269         /**
2270          * Constant for the "Old South Arabian" Unicode character block.
2271          * @since 1.7
2272          */
2273         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2274             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2275                              "OLD SOUTH ARABIAN",
2276                              "OLDSOUTHARABIAN");
2277 
2278         /**
2279          * Constant for the "Avestan" Unicode character block.
2280          * @since 1.7
2281          */
2282         public static final UnicodeBlock AVESTAN =
2283             new UnicodeBlock("AVESTAN");
2284 
2285         /**
2286          * Constant for the "Inscriptional Parthian" Unicode character block.
2287          * @since 1.7
2288          */
2289         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2290             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2291                              "INSCRIPTIONAL PARTHIAN",
2292                              "INSCRIPTIONALPARTHIAN");
2293 
2294         /**
2295          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2296          * @since 1.7
2297          */
2298         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2299             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2300                              "INSCRIPTIONAL PAHLAVI",
2301                              "INSCRIPTIONALPAHLAVI");
2302 
2303         /**
2304          * Constant for the "Old Turkic" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock OLD_TURKIC =
2308             new UnicodeBlock("OLD_TURKIC",
2309                              "OLD TURKIC",
2310                              "OLDTURKIC");
2311 
2312         /**
2313          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2314          * @since 1.7
2315          */
2316         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2317             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2318                              "RUMI NUMERAL SYMBOLS",
2319                              "RUMINUMERALSYMBOLS");
2320 
2321         /**
2322          * Constant for the "Brahmi" Unicode character block.
2323          * @since 1.7
2324          */
2325         public static final UnicodeBlock BRAHMI =
2326             new UnicodeBlock("BRAHMI");
2327 
2328         /**
2329          * Constant for the "Kaithi" Unicode character block.
2330          * @since 1.7
2331          */
2332         public static final UnicodeBlock KAITHI =
2333             new UnicodeBlock("KAITHI");
2334 
2335         /**
2336          * Constant for the "Cuneiform" Unicode character block.
2337          * @since 1.7
2338          */
2339         public static final UnicodeBlock CUNEIFORM =
2340             new UnicodeBlock("CUNEIFORM");
2341 
2342         /**
2343          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2344          * character block.
2345          * @since 1.7
2346          */
2347         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2348             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2349                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2350                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2351 
2352         /**
2353          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2354          * @since 1.7
2355          */
2356         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2357             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2358                              "EGYPTIAN HIEROGLYPHS",
2359                              "EGYPTIANHIEROGLYPHS");
2360 
2361         /**
2362          * Constant for the "Bamum Supplement" Unicode character block.
2363          * @since 1.7
2364          */
2365         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2366             new UnicodeBlock("BAMUM_SUPPLEMENT",
2367                              "BAMUM SUPPLEMENT",
2368                              "BAMUMSUPPLEMENT");
2369 
2370         /**
2371          * Constant for the "Kana Supplement" Unicode character block.
2372          * @since 1.7
2373          */
2374         public static final UnicodeBlock KANA_SUPPLEMENT =
2375             new UnicodeBlock("KANA_SUPPLEMENT",
2376                              "KANA SUPPLEMENT",
2377                              "KANASUPPLEMENT");
2378 
2379         /**
2380          * Constant for the "Ancient Greek Musical Notation" Unicode character
2381          * block.
2382          * @since 1.7
2383          */
2384         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2385             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2386                              "ANCIENT GREEK MUSICAL NOTATION",
2387                              "ANCIENTGREEKMUSICALNOTATION");
2388 
2389         /**
2390          * Constant for the "Counting Rod Numerals" Unicode character block.
2391          * @since 1.7
2392          */
2393         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2394             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2395                              "COUNTING ROD NUMERALS",
2396                              "COUNTINGRODNUMERALS");
2397 
2398         /**
2399          * Constant for the "Mahjong Tiles" Unicode character block.
2400          * @since 1.7
2401          */
2402         public static final UnicodeBlock MAHJONG_TILES =
2403             new UnicodeBlock("MAHJONG_TILES",
2404                              "MAHJONG TILES",
2405                              "MAHJONGTILES");
2406 
2407         /**
2408          * Constant for the "Domino Tiles" Unicode character block.
2409          * @since 1.7
2410          */
2411         public static final UnicodeBlock DOMINO_TILES =
2412             new UnicodeBlock("DOMINO_TILES",
2413                              "DOMINO TILES",
2414                              "DOMINOTILES");
2415 
2416         /**
2417          * Constant for the "Playing Cards" Unicode character block.
2418          * @since 1.7
2419          */
2420         public static final UnicodeBlock PLAYING_CARDS =
2421             new UnicodeBlock("PLAYING_CARDS",
2422                              "PLAYING CARDS",
2423                              "PLAYINGCARDS");
2424 
2425         /**
2426          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2427          * block.
2428          * @since 1.7
2429          */
2430         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2431             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2432                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2433                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2434 
2435         /**
2436          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2437          * block.
2438          * @since 1.7
2439          */
2440         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2441             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2442                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2443                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2444 
2445         /**
2446          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2447          * character block.
2448          * @since 1.7
2449          */
2450         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2451             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2452                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2453                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2454 
2455         /**
2456          * Constant for the "Emoticons" Unicode character block.
2457          * @since 1.7
2458          */
2459         public static final UnicodeBlock EMOTICONS =
2460             new UnicodeBlock("EMOTICONS");
2461 
2462         /**
2463          * Constant for the "Transport And Map Symbols" Unicode character block.
2464          * @since 1.7
2465          */
2466         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2467             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2468                              "TRANSPORT AND MAP SYMBOLS",
2469                              "TRANSPORTANDMAPSYMBOLS");
2470 
2471         /**
2472          * Constant for the "Alchemical Symbols" Unicode character block.
2473          * @since 1.7
2474          */
2475         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2476             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2477                              "ALCHEMICAL SYMBOLS",
2478                              "ALCHEMICALSYMBOLS");
2479 
2480         /**
2481          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2482          * character block.
2483          * @since 1.7
2484          */
2485         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2486             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2487                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2488                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2489 
2490         /**
2491          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2492          * character block.
2493          * @since 1.7
2494          */
2495         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2496             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2497                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2498                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2499 
2500         /**
2501          * Constant for the "Arabic Extended-A" Unicode character block.
2502          * @since 1.8
2503          */
2504         public static final UnicodeBlock ARABIC_EXTENDED_A =
2505             new UnicodeBlock("ARABIC_EXTENDED_A",
2506                              "ARABIC EXTENDED-A",
2507                              "ARABICEXTENDED-A");
2508 
2509         /**
2510          * Constant for the "Sundanese Supplement" Unicode character block.
2511          * @since 1.8
2512          */
2513         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2514             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2515                              "SUNDANESE SUPPLEMENT",
2516                              "SUNDANESESUPPLEMENT");
2517 
2518         /**
2519          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2520          * @since 1.8
2521          */
2522         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2523             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2524                              "MEETEI MAYEK EXTENSIONS",
2525                              "MEETEIMAYEKEXTENSIONS");
2526 
2527         /**
2528          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2529          * @since 1.8
2530          */
2531         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2532             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2533                              "MEROITIC HIEROGLYPHS",
2534                              "MEROITICHIEROGLYPHS");
2535 
2536         /**
2537          * Constant for the "Meroitic Cursive" Unicode character block.
2538          * @since 1.8
2539          */
2540         public static final UnicodeBlock MEROITIC_CURSIVE =
2541             new UnicodeBlock("MEROITIC_CURSIVE",
2542                              "MEROITIC CURSIVE",
2543                              "MEROITICCURSIVE");
2544 
2545         /**
2546          * Constant for the "Sora Sompeng" Unicode character block.
2547          * @since 1.8
2548          */
2549         public static final UnicodeBlock SORA_SOMPENG =
2550             new UnicodeBlock("SORA_SOMPENG",
2551                              "SORA SOMPENG",
2552                              "SORASOMPENG");
2553 
2554         /**
2555          * Constant for the "Chakma" Unicode character block.
2556          * @since 1.8
2557          */
2558         public static final UnicodeBlock CHAKMA =
2559             new UnicodeBlock("CHAKMA");
2560 
2561         /**
2562          * Constant for the "Sharada" Unicode character block.
2563          * @since 1.8
2564          */
2565         public static final UnicodeBlock SHARADA =
2566             new UnicodeBlock("SHARADA");
2567 
2568         /**
2569          * Constant for the "Takri" Unicode character block.
2570          * @since 1.8
2571          */
2572         public static final UnicodeBlock TAKRI =
2573             new UnicodeBlock("TAKRI");
2574 
2575         /**
2576          * Constant for the "Miao" Unicode character block.
2577          * @since 1.8
2578          */
2579         public static final UnicodeBlock MIAO =
2580             new UnicodeBlock("MIAO");
2581 
2582         /**
2583          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2584          * character block.
2585          * @since 1.8
2586          */
2587         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2588             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2589                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2590                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2591 
2592         /**
2593          * Constant for the "Combining Diacritical Marks Extended" Unicode
2594          * character block.
2595          * @since 9
2596          */
2597         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2598             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2599                              "COMBINING DIACRITICAL MARKS EXTENDED",
2600                              "COMBININGDIACRITICALMARKSEXTENDED");
2601 
2602         /**
2603          * Constant for the "Myanmar Extended-B" Unicode character block.
2604          * @since 9
2605          */
2606         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2607             new UnicodeBlock("MYANMAR_EXTENDED_B",
2608                              "MYANMAR EXTENDED-B",
2609                              "MYANMAREXTENDED-B");
2610 
2611         /**
2612          * Constant for the "Latin Extended-E" Unicode character block.
2613          * @since 9
2614          */
2615         public static final UnicodeBlock LATIN_EXTENDED_E =
2616             new UnicodeBlock("LATIN_EXTENDED_E",
2617                              "LATIN EXTENDED-E",
2618                              "LATINEXTENDED-E");
2619 
2620         /**
2621          * Constant for the "Coptic Epact Numbers" Unicode character block.
2622          * @since 9
2623          */
2624         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2625             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2626                              "COPTIC EPACT NUMBERS",
2627                              "COPTICEPACTNUMBERS");
2628 
2629         /**
2630          * Constant for the "Old Permic" Unicode character block.
2631          * @since 9
2632          */
2633         public static final UnicodeBlock OLD_PERMIC =
2634             new UnicodeBlock("OLD_PERMIC",
2635                              "OLD PERMIC",
2636                              "OLDPERMIC");
2637 
2638         /**
2639          * Constant for the "Elbasan" Unicode character block.
2640          * @since 9
2641          */
2642         public static final UnicodeBlock ELBASAN =
2643             new UnicodeBlock("ELBASAN");
2644 
2645         /**
2646          * Constant for the "Caucasian Albanian" Unicode character block.
2647          * @since 9
2648          */
2649         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2650             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2651                              "CAUCASIAN ALBANIAN",
2652                              "CAUCASIANALBANIAN");
2653 
2654         /**
2655          * Constant for the "Linear A" Unicode character block.
2656          * @since 9
2657          */
2658         public static final UnicodeBlock LINEAR_A =
2659             new UnicodeBlock("LINEAR_A",
2660                              "LINEAR A",
2661                              "LINEARA");
2662 
2663         /**
2664          * Constant for the "Palmyrene" Unicode character block.
2665          * @since 9
2666          */
2667         public static final UnicodeBlock PALMYRENE =
2668             new UnicodeBlock("PALMYRENE");
2669 
2670         /**
2671          * Constant for the "Nabataean" Unicode character block.
2672          * @since 9
2673          */
2674         public static final UnicodeBlock NABATAEAN =
2675             new UnicodeBlock("NABATAEAN");
2676 
2677         /**
2678          * Constant for the "Old North Arabian" Unicode character block.
2679          * @since 9
2680          */
2681         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2682             new UnicodeBlock("OLD_NORTH_ARABIAN",
2683                              "OLD NORTH ARABIAN",
2684                              "OLDNORTHARABIAN");
2685 
2686         /**
2687          * Constant for the "Manichaean" Unicode character block.
2688          * @since 9
2689          */
2690         public static final UnicodeBlock MANICHAEAN =
2691             new UnicodeBlock("MANICHAEAN");
2692 
2693         /**
2694          * Constant for the "Psalter Pahlavi" Unicode character block.
2695          * @since 9
2696          */
2697         public static final UnicodeBlock PSALTER_PAHLAVI =
2698             new UnicodeBlock("PSALTER_PAHLAVI",
2699                              "PSALTER PAHLAVI",
2700                              "PSALTERPAHLAVI");
2701 
2702         /**
2703          * Constant for the "Mahajani" Unicode character block.
2704          * @since 9
2705          */
2706         public static final UnicodeBlock MAHAJANI =
2707             new UnicodeBlock("MAHAJANI");
2708 
2709         /**
2710          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2711          * @since 9
2712          */
2713         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2714             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2715                              "SINHALA ARCHAIC NUMBERS",
2716                              "SINHALAARCHAICNUMBERS");
2717 
2718         /**
2719          * Constant for the "Khojki" Unicode character block.
2720          * @since 9
2721          */
2722         public static final UnicodeBlock KHOJKI =
2723             new UnicodeBlock("KHOJKI");
2724 
2725         /**
2726          * Constant for the "Khudawadi" Unicode character block.
2727          * @since 9
2728          */
2729         public static final UnicodeBlock KHUDAWADI =
2730             new UnicodeBlock("KHUDAWADI");
2731 
2732         /**
2733          * Constant for the "Grantha" Unicode character block.
2734          * @since 9
2735          */
2736         public static final UnicodeBlock GRANTHA =
2737             new UnicodeBlock("GRANTHA");
2738 
2739         /**
2740          * Constant for the "Tirhuta" Unicode character block.
2741          * @since 9
2742          */
2743         public static final UnicodeBlock TIRHUTA =
2744             new UnicodeBlock("TIRHUTA");
2745 
2746         /**
2747          * Constant for the "Siddham" Unicode character block.
2748          * @since 9
2749          */
2750         public static final UnicodeBlock SIDDHAM =
2751             new UnicodeBlock("SIDDHAM");
2752 
2753         /**
2754          * Constant for the "Modi" Unicode character block.
2755          * @since 9
2756          */
2757         public static final UnicodeBlock MODI =
2758             new UnicodeBlock("MODI");
2759 
2760         /**
2761          * Constant for the "Warang Citi" Unicode character block.
2762          * @since 9
2763          */
2764         public static final UnicodeBlock WARANG_CITI =
2765             new UnicodeBlock("WARANG_CITI",
2766                              "WARANG CITI",
2767                              "WARANGCITI");
2768 
2769         /**
2770          * Constant for the "Pau Cin Hau" Unicode character block.
2771          * @since 9
2772          */
2773         public static final UnicodeBlock PAU_CIN_HAU =
2774             new UnicodeBlock("PAU_CIN_HAU",
2775                              "PAU CIN HAU",
2776                              "PAUCINHAU");
2777 
2778         /**
2779          * Constant for the "Mro" Unicode character block.
2780          * @since 9
2781          */
2782         public static final UnicodeBlock MRO =
2783             new UnicodeBlock("MRO");
2784 
2785         /**
2786          * Constant for the "Bassa Vah" Unicode character block.
2787          * @since 9
2788          */
2789         public static final UnicodeBlock BASSA_VAH =
2790             new UnicodeBlock("BASSA_VAH",
2791                              "BASSA VAH",
2792                              "BASSAVAH");
2793 
2794         /**
2795          * Constant for the "Pahawh Hmong" Unicode character block.
2796          * @since 9
2797          */
2798         public static final UnicodeBlock PAHAWH_HMONG =
2799             new UnicodeBlock("PAHAWH_HMONG",
2800                              "PAHAWH HMONG",
2801                              "PAHAWHHMONG");
2802 
2803         /**
2804          * Constant for the "Duployan" Unicode character block.
2805          * @since 9
2806          */
2807         public static final UnicodeBlock DUPLOYAN =
2808             new UnicodeBlock("DUPLOYAN");
2809 
2810         /**
2811          * Constant for the "Shorthand Format Controls" Unicode character block.
2812          * @since 9
2813          */
2814         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2815             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2816                              "SHORTHAND FORMAT CONTROLS",
2817                              "SHORTHANDFORMATCONTROLS");
2818 
2819         /**
2820          * Constant for the "Mende Kikakui" Unicode character block.
2821          * @since 9
2822          */
2823         public static final UnicodeBlock MENDE_KIKAKUI =
2824             new UnicodeBlock("MENDE_KIKAKUI",
2825                              "MENDE KIKAKUI",
2826                              "MENDEKIKAKUI");
2827 
2828         /**
2829          * Constant for the "Ornamental Dingbats" Unicode character block.
2830          * @since 9
2831          */
2832         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2833             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2834                              "ORNAMENTAL DINGBATS",
2835                              "ORNAMENTALDINGBATS");
2836 
2837         /**
2838          * Constant for the "Geometric Shapes Extended" Unicode character block.
2839          * @since 9
2840          */
2841         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2842             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2843                              "GEOMETRIC SHAPES EXTENDED",
2844                              "GEOMETRICSHAPESEXTENDED");
2845 
2846         /**
2847          * Constant for the "Supplemental Arrows-C" Unicode character block.
2848          * @since 9
2849          */
2850         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2851             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2852                              "SUPPLEMENTAL ARROWS-C",
2853                              "SUPPLEMENTALARROWS-C");
2854 
2855         /**
2856          * Constant for the "Cherokee Supplement" Unicode character block.
2857          * @since 9
2858          */
2859         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2860             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2861                              "CHEROKEE SUPPLEMENT",
2862                              "CHEROKEESUPPLEMENT");
2863 
2864         /**
2865          * Constant for the "Hatran" Unicode character block.
2866          * @since 9
2867          */
2868         public static final UnicodeBlock HATRAN =
2869             new UnicodeBlock("HATRAN");
2870 
2871         /**
2872          * Constant for the "Old Hungarian" Unicode character block.
2873          * @since 9
2874          */
2875         public static final UnicodeBlock OLD_HUNGARIAN =
2876             new UnicodeBlock("OLD_HUNGARIAN",
2877                              "OLD HUNGARIAN",
2878                              "OLDHUNGARIAN");
2879 
2880         /**
2881          * Constant for the "Multani" Unicode character block.
2882          * @since 9
2883          */
2884         public static final UnicodeBlock MULTANI =
2885             new UnicodeBlock("MULTANI");
2886 
2887         /**
2888          * Constant for the "Ahom" Unicode character block.
2889          * @since 9
2890          */
2891         public static final UnicodeBlock AHOM =
2892             new UnicodeBlock("AHOM");
2893 
2894         /**
2895          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2896          * @since 9
2897          */
2898         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2899             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2900                              "EARLY DYNASTIC CUNEIFORM",
2901                              "EARLYDYNASTICCUNEIFORM");
2902 
2903         /**
2904          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2905          * @since 9
2906          */
2907         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2908             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2909                              "ANATOLIAN HIEROGLYPHS",
2910                              "ANATOLIANHIEROGLYPHS");
2911 
2912         /**
2913          * Constant for the "Sutton SignWriting" Unicode character block.
2914          * @since 9
2915          */
2916         public static final UnicodeBlock SUTTON_SIGNWRITING =
2917             new UnicodeBlock("SUTTON_SIGNWRITING",
2918                              "SUTTON SIGNWRITING",
2919                              "SUTTONSIGNWRITING");
2920 
2921         /**
2922          * Constant for the "Supplemental Symbols and Pictographs" Unicode
2923          * character block.
2924          * @since 9
2925          */
2926         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2927             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2928                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2929                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2930 
2931         /**
2932          * Constant for the "CJK Unified Ideographs Extension E" Unicode
2933          * character block.
2934          * @since 9
2935          */
2936         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2937             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2938                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2939                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2940 
2941         /**
2942          * Constant for the "Syriac Supplement" Unicode
2943          * character block.
2944          * @since 11
2945          */
2946         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2947             new UnicodeBlock("SYRIAC_SUPPLEMENT",
2948                              "SYRIAC SUPPLEMENT",
2949                              "SYRIACSUPPLEMENT");
2950 
2951         /**
2952          * Constant for the "Cyrillic Extended-C" Unicode
2953          * character block.
2954          * @since 11
2955          */
2956         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2957             new UnicodeBlock("CYRILLIC_EXTENDED_C",
2958                              "CYRILLIC EXTENDED-C",
2959                              "CYRILLICEXTENDED-C");
2960 
2961         /**
2962          * Constant for the "Osage" Unicode
2963          * character block.
2964          * @since 11
2965          */
2966         public static final UnicodeBlock OSAGE =
2967             new UnicodeBlock("OSAGE");
2968 
2969         /**
2970          * Constant for the "Newa" Unicode
2971          * character block.
2972          * @since 11
2973          */
2974         public static final UnicodeBlock NEWA =
2975             new UnicodeBlock("NEWA");
2976 
2977         /**
2978          * Constant for the "Mongolian Supplement" Unicode
2979          * character block.
2980          * @since 11
2981          */
2982         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2983             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2984                              "MONGOLIAN SUPPLEMENT",
2985                              "MONGOLIANSUPPLEMENT");
2986 
2987         /**
2988          * Constant for the "Marchen" Unicode
2989          * character block.
2990          * @since 11
2991          */
2992         public static final UnicodeBlock MARCHEN =
2993             new UnicodeBlock("MARCHEN");
2994 
2995         /**
2996          * Constant for the "Ideographic Symbols and Punctuation" Unicode
2997          * character block.
2998          * @since 11
2999          */
3000         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3001             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3002                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3003                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3004 
3005         /**
3006          * Constant for the "Tangut" Unicode
3007          * character block.
3008          * @since 11
3009          */
3010         public static final UnicodeBlock TANGUT =
3011             new UnicodeBlock("TANGUT");
3012 
3013         /**
3014          * Constant for the "Tangut Components" Unicode
3015          * character block.
3016          * @since 11
3017          */
3018         public static final UnicodeBlock TANGUT_COMPONENTS =
3019             new UnicodeBlock("TANGUT_COMPONENTS",
3020                              "TANGUT COMPONENTS",
3021                              "TANGUTCOMPONENTS");
3022 
3023         /**
3024          * Constant for the "Kana Extended-A" Unicode
3025          * character block.
3026          * @since 11
3027          */
3028         public static final UnicodeBlock KANA_EXTENDED_A =
3029             new UnicodeBlock("KANA_EXTENDED_A",
3030                              "KANA EXTENDED-A",
3031                              "KANAEXTENDED-A");
3032         /**
3033          * Constant for the "Glagolitic Supplement" Unicode
3034          * character block.
3035          * @since 11
3036          */
3037         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3038             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3039                              "GLAGOLITIC SUPPLEMENT",
3040                              "GLAGOLITICSUPPLEMENT");
3041         /**
3042          * Constant for the "Adlam" Unicode
3043          * character block.
3044          * @since 11
3045          */
3046         public static final UnicodeBlock ADLAM =
3047             new UnicodeBlock("ADLAM");
3048 
3049         /**
3050          * Constant for the "Masaram Gondi" Unicode
3051          * character block.
3052          * @since 11
3053          */
3054         public static final UnicodeBlock MASARAM_GONDI =
3055             new UnicodeBlock("MASARAM_GONDI",
3056                              "MASARAM GONDI",
3057                              "MASARAMGONDI");
3058 
3059         /**
3060          * Constant for the "Zanabazar Square" Unicode
3061          * character block.
3062          * @since 11
3063          */
3064         public static final UnicodeBlock ZANABAZAR_SQUARE =
3065             new UnicodeBlock("ZANABAZAR_SQUARE",
3066                              "ZANABAZAR SQUARE",
3067                              "ZANABAZARSQUARE");
3068 
3069         /**
3070          * Constant for the "Nushu" Unicode
3071          * character block.
3072          * @since 11
3073          */
3074         public static final UnicodeBlock NUSHU =
3075             new UnicodeBlock("NUSHU");
3076 
3077         /**
3078          * Constant for the "Soyombo" Unicode
3079          * character block.
3080          * @since 11
3081          */
3082         public static final UnicodeBlock SOYOMBO =
3083             new UnicodeBlock("SOYOMBO");
3084 
3085         /**
3086          * Constant for the "Bhaiksuki" Unicode
3087          * character block.
3088          * @since 11
3089          */
3090         public static final UnicodeBlock BHAIKSUKI =
3091             new UnicodeBlock("BHAIKSUKI");
3092 
3093         /**
3094          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3095          * character block.
3096          * @since 11
3097          */
3098         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3099             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3100                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3101                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3102         /**
3103          * Constant for the "Georgian Extended" Unicode
3104          * character block.
3105          * @since 12
3106          */
3107         public static final UnicodeBlock GEORGIAN_EXTENDED =
3108             new UnicodeBlock("GEORGIAN_EXTENDED",
3109                              "GEORGIAN EXTENDED",
3110                              "GEORGIANEXTENDED");
3111 
3112         /**
3113          * Constant for the "Hanifi Rohingya" Unicode
3114          * character block.
3115          * @since 12
3116          */
3117         public static final UnicodeBlock HANIFI_ROHINGYA =
3118             new UnicodeBlock("HANIFI_ROHINGYA",
3119                              "HANIFI ROHINGYA",
3120                              "HANIFIROHINGYA");
3121 
3122         /**
3123          * Constant for the "Old Sogdian" Unicode
3124          * character block.
3125          * @since 12
3126          */
3127         public static final UnicodeBlock OLD_SOGDIAN =
3128             new UnicodeBlock("OLD_SOGDIAN",
3129                              "OLD SOGDIAN",
3130                              "OLDSOGDIAN");
3131 
3132         /**
3133          * Constant for the "Sogdian" Unicode
3134          * character block.
3135          * @since 12
3136          */
3137         public static final UnicodeBlock SOGDIAN =
3138             new UnicodeBlock("SOGDIAN");
3139 
3140         /**
3141          * Constant for the "Dogra" Unicode
3142          * character block.
3143          * @since 12
3144          */
3145         public static final UnicodeBlock DOGRA =
3146             new UnicodeBlock("DOGRA");
3147 
3148         /**
3149          * Constant for the "Gunjala Gondi" Unicode
3150          * character block.
3151          * @since 12
3152          */
3153         public static final UnicodeBlock GUNJALA_GONDI =
3154             new UnicodeBlock("GUNJALA_GONDI",
3155                              "GUNJALA GONDI",
3156                              "GUNJALAGONDI");
3157 
3158         /**
3159          * Constant for the "Makasar" Unicode
3160          * character block.
3161          * @since 12
3162          */
3163         public static final UnicodeBlock MAKASAR =
3164             new UnicodeBlock("MAKASAR");
3165 
3166         /**
3167          * Constant for the "Medefaidrin" Unicode
3168          * character block.
3169          * @since 12
3170          */
3171         public static final UnicodeBlock MEDEFAIDRIN =
3172             new UnicodeBlock("MEDEFAIDRIN");
3173 
3174         /**
3175          * Constant for the "Mayan Numerals" Unicode
3176          * character block.
3177          * @since 12
3178          */
3179         public static final UnicodeBlock MAYAN_NUMERALS =
3180             new UnicodeBlock("MAYAN_NUMERALS",
3181                              "MAYAN NUMERALS",
3182                              "MAYANNUMERALS");
3183 
3184        /**
3185          * Constant for the "Indic Siyaq Numbers" Unicode
3186          * character block.
3187          * @since 12
3188          */
3189         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3190             new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3191                              "INDIC SIYAQ NUMBERS",
3192                              "INDICSIYAQNUMBERS");
3193 
3194         /**
3195          * Constant for the "Chess Symbols" Unicode
3196          * character block.
3197          * @since 12
3198          */
3199         public static final UnicodeBlock CHESS_SYMBOLS =
3200             new UnicodeBlock("CHESS_SYMBOLS",
3201                              "CHESS SYMBOLS",
3202                              "CHESSSYMBOLS");
3203 
3204         /**
3205          * Constant for the "Elymaic" Unicode
3206          * character block.
3207          * @since 13
3208          */
3209         public static final UnicodeBlock ELYMAIC =
3210             new UnicodeBlock("ELYMAIC");
3211 
3212         /**
3213          * Constant for the "Nandinagari" Unicode
3214          * character block.
3215          * @since 13
3216          */
3217         public static final UnicodeBlock NANDINAGARI =
3218             new UnicodeBlock("NANDINAGARI");
3219 
3220         /**
3221          * Constant for the "Tamil Supplement" Unicode
3222          * character block.
3223          * @since 13
3224          */
3225         public static final UnicodeBlock TAMIL_SUPPLEMENT =
3226             new UnicodeBlock("TAMIL_SUPPLEMENT",
3227                              "TAMIL SUPPLEMENT",
3228                              "TAMILSUPPLEMENT");
3229 
3230         /**
3231          * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3232          * character block.
3233          * @since 13
3234          */
3235         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3236             new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3237                              "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3238                              "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3239 
3240         /**
3241          * Constant for the "Small Kana Extension" Unicode
3242          * character block.
3243          * @since 13
3244          */
3245         public static final UnicodeBlock SMALL_KANA_EXTENSION =
3246             new UnicodeBlock("SMALL_KANA_EXTENSION",
3247                              "SMALL KANA EXTENSION",
3248                              "SMALLKANAEXTENSION");
3249 
3250         /**
3251          * Constant for the "Nyiakeng Puachue Hmong" Unicode
3252          * character block.
3253          * @since 13
3254          */
3255         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3256             new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3257                              "NYIAKENG PUACHUE HMONG",
3258                              "NYIAKENGPUACHUEHMONG");
3259 
3260         /**
3261          * Constant for the "Wancho" Unicode
3262          * character block.
3263          * @since 13
3264          */
3265         public static final UnicodeBlock WANCHO =
3266             new UnicodeBlock("WANCHO");
3267 
3268         /**
3269          * Constant for the "Ottoman Siyaq Numbers" Unicode
3270          * character block.
3271          * @since 13
3272          */
3273         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3274             new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3275                              "OTTOMAN SIYAQ NUMBERS",
3276                              "OTTOMANSIYAQNUMBERS");
3277 
3278         /**
3279          * Constant for the "Symbols and Pictographs Extended-A" Unicode
3280          * character block.
3281          * @since 13
3282          */
3283         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3284             new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3285                              "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3286                              "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3287 
3288         private static final int blockStarts[] = {
3289             0x0000,   // 0000..007F; Basic Latin
3290             0x0080,   // 0080..00FF; Latin-1 Supplement
3291             0x0100,   // 0100..017F; Latin Extended-A
3292             0x0180,   // 0180..024F; Latin Extended-B
3293             0x0250,   // 0250..02AF; IPA Extensions
3294             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3295             0x0300,   // 0300..036F; Combining Diacritical Marks
3296             0x0370,   // 0370..03FF; Greek and Coptic
3297             0x0400,   // 0400..04FF; Cyrillic
3298             0x0500,   // 0500..052F; Cyrillic Supplement
3299             0x0530,   // 0530..058F; Armenian
3300             0x0590,   // 0590..05FF; Hebrew
3301             0x0600,   // 0600..06FF; Arabic
3302             0x0700,   // 0700..074F; Syriac
3303             0x0750,   // 0750..077F; Arabic Supplement
3304             0x0780,   // 0780..07BF; Thaana
3305             0x07C0,   // 07C0..07FF; NKo
3306             0x0800,   // 0800..083F; Samaritan
3307             0x0840,   // 0840..085F; Mandaic
3308             0x0860,   // 0860..086F; Syriac Supplement
3309             0x0870,   //             unassigned
3310             0x08A0,   // 08A0..08FF; Arabic Extended-A
3311             0x0900,   // 0900..097F; Devanagari
3312             0x0980,   // 0980..09FF; Bengali
3313             0x0A00,   // 0A00..0A7F; Gurmukhi
3314             0x0A80,   // 0A80..0AFF; Gujarati
3315             0x0B00,   // 0B00..0B7F; Oriya
3316             0x0B80,   // 0B80..0BFF; Tamil
3317             0x0C00,   // 0C00..0C7F; Telugu
3318             0x0C80,   // 0C80..0CFF; Kannada
3319             0x0D00,   // 0D00..0D7F; Malayalam
3320             0x0D80,   // 0D80..0DFF; Sinhala
3321             0x0E00,   // 0E00..0E7F; Thai
3322             0x0E80,   // 0E80..0EFF; Lao
3323             0x0F00,   // 0F00..0FFF; Tibetan
3324             0x1000,   // 1000..109F; Myanmar
3325             0x10A0,   // 10A0..10FF; Georgian
3326             0x1100,   // 1100..11FF; Hangul Jamo
3327             0x1200,   // 1200..137F; Ethiopic
3328             0x1380,   // 1380..139F; Ethiopic Supplement
3329             0x13A0,   // 13A0..13FF; Cherokee
3330             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3331             0x1680,   // 1680..169F; Ogham
3332             0x16A0,   // 16A0..16FF; Runic
3333             0x1700,   // 1700..171F; Tagalog
3334             0x1720,   // 1720..173F; Hanunoo
3335             0x1740,   // 1740..175F; Buhid
3336             0x1760,   // 1760..177F; Tagbanwa
3337             0x1780,   // 1780..17FF; Khmer
3338             0x1800,   // 1800..18AF; Mongolian
3339             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3340             0x1900,   // 1900..194F; Limbu
3341             0x1950,   // 1950..197F; Tai Le
3342             0x1980,   // 1980..19DF; New Tai Lue
3343             0x19E0,   // 19E0..19FF; Khmer Symbols
3344             0x1A00,   // 1A00..1A1F; Buginese
3345             0x1A20,   // 1A20..1AAF; Tai Tham
3346             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3347             0x1B00,   // 1B00..1B7F; Balinese
3348             0x1B80,   // 1B80..1BBF; Sundanese
3349             0x1BC0,   // 1BC0..1BFF; Batak
3350             0x1C00,   // 1C00..1C4F; Lepcha
3351             0x1C50,   // 1C50..1C7F; Ol Chiki
3352             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3353             0x1C90,   // 1C90..1CBF; Georgian Extended
3354             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3355             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3356             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3357             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3358             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3359             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3360             0x1F00,   // 1F00..1FFF; Greek Extended
3361             0x2000,   // 2000..206F; General Punctuation
3362             0x2070,   // 2070..209F; Superscripts and Subscripts
3363             0x20A0,   // 20A0..20CF; Currency Symbols
3364             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3365             0x2100,   // 2100..214F; Letterlike Symbols
3366             0x2150,   // 2150..218F; Number Forms
3367             0x2190,   // 2190..21FF; Arrows
3368             0x2200,   // 2200..22FF; Mathematical Operators
3369             0x2300,   // 2300..23FF; Miscellaneous Technical
3370             0x2400,   // 2400..243F; Control Pictures
3371             0x2440,   // 2440..245F; Optical Character Recognition
3372             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3373             0x2500,   // 2500..257F; Box Drawing
3374             0x2580,   // 2580..259F; Block Elements
3375             0x25A0,   // 25A0..25FF; Geometric Shapes
3376             0x2600,   // 2600..26FF; Miscellaneous Symbols
3377             0x2700,   // 2700..27BF; Dingbats
3378             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3379             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3380             0x2800,   // 2800..28FF; Braille Patterns
3381             0x2900,   // 2900..297F; Supplemental Arrows-B
3382             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3383             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3384             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3385             0x2C00,   // 2C00..2C5F; Glagolitic
3386             0x2C60,   // 2C60..2C7F; Latin Extended-C
3387             0x2C80,   // 2C80..2CFF; Coptic
3388             0x2D00,   // 2D00..2D2F; Georgian Supplement
3389             0x2D30,   // 2D30..2D7F; Tifinagh
3390             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3391             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3392             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3393             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3394             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3395             0x2FE0,   //             unassigned
3396             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3397             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3398             0x3040,   // 3040..309F; Hiragana
3399             0x30A0,   // 30A0..30FF; Katakana
3400             0x3100,   // 3100..312F; Bopomofo
3401             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3402             0x3190,   // 3190..319F; Kanbun
3403             0x31A0,   // 31A0..31BF; Bopomofo Extended
3404             0x31C0,   // 31C0..31EF; CJK Strokes
3405             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3406             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3407             0x3300,   // 3300..33FF; CJK Compatibility
3408             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3409             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3410             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3411             0xA000,   // A000..A48F; Yi Syllables
3412             0xA490,   // A490..A4CF; Yi Radicals
3413             0xA4D0,   // A4D0..A4FF; Lisu
3414             0xA500,   // A500..A63F; Vai
3415             0xA640,   // A640..A69F; Cyrillic Extended-B
3416             0xA6A0,   // A6A0..A6FF; Bamum
3417             0xA700,   // A700..A71F; Modifier Tone Letters
3418             0xA720,   // A720..A7FF; Latin Extended-D
3419             0xA800,   // A800..A82F; Syloti Nagri
3420             0xA830,   // A830..A83F; Common Indic Number Forms
3421             0xA840,   // A840..A87F; Phags-pa
3422             0xA880,   // A880..A8DF; Saurashtra
3423             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3424             0xA900,   // A900..A92F; Kayah Li
3425             0xA930,   // A930..A95F; Rejang
3426             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3427             0xA980,   // A980..A9DF; Javanese
3428             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3429             0xAA00,   // AA00..AA5F; Cham
3430             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3431             0xAA80,   // AA80..AADF; Tai Viet
3432             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3433             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3434             0xAB30,   // AB30..AB6F; Latin Extended-E
3435             0xAB70,   // AB70..ABBF; Cherokee Supplement
3436             0xABC0,   // ABC0..ABFF; Meetei Mayek
3437             0xAC00,   // AC00..D7AF; Hangul Syllables
3438             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3439             0xD800,   // D800..DB7F; High Surrogates
3440             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3441             0xDC00,   // DC00..DFFF; Low Surrogates
3442             0xE000,   // E000..F8FF; Private Use Area
3443             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3444             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3445             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3446             0xFE00,   // FE00..FE0F; Variation Selectors
3447             0xFE10,   // FE10..FE1F; Vertical Forms
3448             0xFE20,   // FE20..FE2F; Combining Half Marks
3449             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3450             0xFE50,   // FE50..FE6F; Small Form Variants
3451             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3452             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3453             0xFFF0,   // FFF0..FFFF; Specials
3454             0x10000,  // 10000..1007F; Linear B Syllabary
3455             0x10080,  // 10080..100FF; Linear B Ideograms
3456             0x10100,  // 10100..1013F; Aegean Numbers
3457             0x10140,  // 10140..1018F; Ancient Greek Numbers
3458             0x10190,  // 10190..101CF; Ancient Symbols
3459             0x101D0,  // 101D0..101FF; Phaistos Disc
3460             0x10200,  //               unassigned
3461             0x10280,  // 10280..1029F; Lycian
3462             0x102A0,  // 102A0..102DF; Carian
3463             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3464             0x10300,  // 10300..1032F; Old Italic
3465             0x10330,  // 10330..1034F; Gothic
3466             0x10350,  // 10350..1037F; Old Permic
3467             0x10380,  // 10380..1039F; Ugaritic
3468             0x103A0,  // 103A0..103DF; Old Persian
3469             0x103E0,  //               unassigned
3470             0x10400,  // 10400..1044F; Deseret
3471             0x10450,  // 10450..1047F; Shavian
3472             0x10480,  // 10480..104AF; Osmanya
3473             0x104B0,  // 104B0..104FF; Osage
3474             0x10500,  // 10500..1052F; Elbasan
3475             0x10530,  // 10530..1056F; Caucasian Albanian
3476             0x10570,  //               unassigned
3477             0x10600,  // 10600..1077F; Linear A
3478             0x10780,  //               unassigned
3479             0x10800,  // 10800..1083F; Cypriot Syllabary
3480             0x10840,  // 10840..1085F; Imperial Aramaic
3481             0x10860,  // 10860..1087F; Palmyrene
3482             0x10880,  // 10880..108AF; Nabataean
3483             0x108B0,  //               unassigned
3484             0x108E0,  // 108E0..108FF; Hatran
3485             0x10900,  // 10900..1091F; Phoenician
3486             0x10920,  // 10920..1093F; Lydian
3487             0x10940,  //               unassigned
3488             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3489             0x109A0,  // 109A0..109FF; Meroitic Cursive
3490             0x10A00,  // 10A00..10A5F; Kharoshthi
3491             0x10A60,  // 10A60..10A7F; Old South Arabian
3492             0x10A80,  // 10A80..10A9F; Old North Arabian
3493             0x10AA0,  //               unassigned
3494             0x10AC0,  // 10AC0..10AFF; Manichaean
3495             0x10B00,  // 10B00..10B3F; Avestan
3496             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3497             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3498             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3499             0x10BB0,  //               unassigned
3500             0x10C00,  // 10C00..10C4F; Old Turkic
3501             0x10C50,  //               unassigned
3502             0x10C80,  // 10C80..10CFF; Old Hungarian
3503             0x10D00,  // 10D00..10D3F; Hanifi Rohingya
3504             0x10D40,  //               unassigned
3505             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3506             0x10E80,  //               unassigned
3507             0x10F00,  // 10F00..10F2F; Old Sogdian
3508             0x10F30,  // 10F30..10F6F; Sogdian
3509             0x10F70,  //               unassigned
3510             0x10FE0,  // 10FE0..10FFF; Elymaic
3511             0x11000,  // 11000..1107F; Brahmi
3512             0x11080,  // 11080..110CF; Kaithi
3513             0x110D0,  // 110D0..110FF; Sora Sompeng
3514             0x11100,  // 11100..1114F; Chakma
3515             0x11150,  // 11150..1117F; Mahajani
3516             0x11180,  // 11180..111DF; Sharada
3517             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3518             0x11200,  // 11200..1124F; Khojki
3519             0x11250,  //               unassigned
3520             0x11280,  // 11280..112AF; Multani
3521             0x112B0,  // 112B0..112FF; Khudawadi
3522             0x11300,  // 11300..1137F; Grantha
3523             0x11380,  //               unassigned
3524             0x11400,  // 11400..1147F; Newa
3525             0x11480,  // 11480..114DF; Tirhuta
3526             0x114E0,  //               unassigned
3527             0x11580,  // 11580..115FF; Siddham
3528             0x11600,  // 11600..1165F; Modi
3529             0x11660,  // 11660..1167F; Mongolian Supplement
3530             0x11680,  // 11680..116CF; Takri
3531             0x116D0,  //               unassigned
3532             0x11700,  // 11700..1173F; Ahom
3533             0x11740,  //               unassigned
3534             0x11800,  // 11800..1184F; Dogra
3535             0x11850,  //               unassigned
3536             0x118A0,  // 118A0..118FF; Warang Citi
3537             0x11900,  //               unassigned
3538             0x119A0,  // 119A0..119FF; Nandinagari
3539             0x11A00,  // 11A00..11A4F; Zanabazar Square
3540             0x11A50,  // 11A50..11AAF; Soyombo
3541             0x11AB0,  //               unassigned
3542             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3543             0x11B00,  //               unassigned
3544             0x11C00,  // 11C00..11C6F; Bhaiksuki
3545             0x11C70,  // 11C70..11CBF; Marchen
3546             0x11CC0,  //               unassigned
3547             0x11D00,  // 11D00..11D5F; Masaram Gondi
3548             0x11D60,  // 11D60..11DAF; Gunjala Gondi
3549             0x11DB0,  //               unassigned
3550             0x11EE0,  // 11EE0..11EFF; Makasar
3551             0x11F00,  //               unassigned
3552             0x11FC0,  // 11FC0..11FFF; Tamil Supplement
3553             0x12000,  // 12000..123FF; Cuneiform
3554             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3555             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3556             0x12550,  //               unassigned
3557             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3558             0x13430,  // 13430..1343F; Egyptian Hieroglyph Format Controls
3559             0x13440,  //               unassigned
3560             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3561             0x14680,  //               unassigned
3562             0x16800,  // 16800..16A3F; Bamum Supplement
3563             0x16A40,  // 16A40..16A6F; Mro
3564             0x16A70,  //               unassigned
3565             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3566             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3567             0x16B90,  //               unassigned
3568             0x16E40,  // 16E40..16E9F; Medefaidrin
3569             0x16EA0,  //               unassigned
3570             0x16F00,  // 16F00..16F9F; Miao
3571             0x16FA0,  //               unassigned
3572             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3573             0x17000,  // 17000..187FF; Tangut
3574             0x18800,  // 18800..18AFF; Tangut Components
3575             0x18B00,  //               unassigned
3576             0x1B000,  // 1B000..1B0FF; Kana Supplement
3577             0x1B100,  // 1B100..1B12F; Kana Extended-A
3578             0x1B130,  // 1B130..1B16F; Small Kana Extension
3579             0x1B170,  // 1B170..1B2FF; Nushu
3580             0x1B300,  //               unassigned
3581             0x1BC00,  // 1BC00..1BC9F; Duployan
3582             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3583             0x1BCB0,  //               unassigned
3584             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3585             0x1D100,  // 1D100..1D1FF; Musical Symbols
3586             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3587             0x1D250,  //               unassigned
3588             0x1D2E0,  // 1D2E0..1D2FF; Mayan Numerals
3589             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3590             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3591             0x1D380,  //               unassigned
3592             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3593             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3594             0x1DAB0,  //               unassigned
3595             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3596             0x1E030,  //               unassigned
3597             0x1E100,  // 1E100..1E14F; Nyiakeng Puachue Hmong
3598             0x1E150,  //               unassigned
3599             0x1E2C0,  // 1E2C0..1E2FF; Wancho
3600             0x1E300,  //               unassigned
3601             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3602             0x1E8E0,  //               unassigned
3603             0x1E900,  // 1E900..1E95F; Adlam
3604             0x1E960,  //               unassigned
3605             0x1EC70,  // 1EC70..1ECBF; Indic Siyaq Numbers
3606             0x1ECC0,  //               unassigned
3607             0x1ED00,  // 1ED00..1ED4F; Ottoman Siyaq Numbers
3608             0x1ED50,  //               unassigned
3609             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3610             0x1EF00,  //               unassigned
3611             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3612             0x1F030,  // 1F030..1F09F; Domino Tiles
3613             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3614             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3615             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3616             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3617             0x1F600,  // 1F600..1F64F; Emoticons
3618             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3619             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3620             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3621             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3622             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3623             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3624             0x1FA00,  // 1FA00..1FA6F; Chess Symbols
3625             0x1FA70,  // 1FA70..1FAFF; Symbols and Pictographs Extended-A
3626             0x1FB00,  //               unassigned
3627             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3628             0x2A6E0,  //               unassigned
3629             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3630             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3631             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3632             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3633             0x2EBF0,  //               unassigned
3634             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3635             0x2FA20,  //               unassigned
3636             0xE0000,  // E0000..E007F; Tags
3637             0xE0080,  //               unassigned
3638             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3639             0xE01F0,  //               unassigned
3640             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3641             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
3642         };
3643 
3644         private static final UnicodeBlock[] blocks = {
3645             BASIC_LATIN,
3646             LATIN_1_SUPPLEMENT,
3647             LATIN_EXTENDED_A,
3648             LATIN_EXTENDED_B,
3649             IPA_EXTENSIONS,
3650             SPACING_MODIFIER_LETTERS,
3651             COMBINING_DIACRITICAL_MARKS,
3652             GREEK,
3653             CYRILLIC,
3654             CYRILLIC_SUPPLEMENTARY,
3655             ARMENIAN,
3656             HEBREW,
3657             ARABIC,
3658             SYRIAC,
3659             ARABIC_SUPPLEMENT,
3660             THAANA,
3661             NKO,
3662             SAMARITAN,
3663             MANDAIC,
3664             SYRIAC_SUPPLEMENT,
3665             null,
3666             ARABIC_EXTENDED_A,
3667             DEVANAGARI,
3668             BENGALI,
3669             GURMUKHI,
3670             GUJARATI,
3671             ORIYA,
3672             TAMIL,
3673             TELUGU,
3674             KANNADA,
3675             MALAYALAM,
3676             SINHALA,
3677             THAI,
3678             LAO,
3679             TIBETAN,
3680             MYANMAR,
3681             GEORGIAN,
3682             HANGUL_JAMO,
3683             ETHIOPIC,
3684             ETHIOPIC_SUPPLEMENT,
3685             CHEROKEE,
3686             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3687             OGHAM,
3688             RUNIC,
3689             TAGALOG,
3690             HANUNOO,
3691             BUHID,
3692             TAGBANWA,
3693             KHMER,
3694             MONGOLIAN,
3695             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3696             LIMBU,
3697             TAI_LE,
3698             NEW_TAI_LUE,
3699             KHMER_SYMBOLS,
3700             BUGINESE,
3701             TAI_THAM,
3702             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3703             BALINESE,
3704             SUNDANESE,
3705             BATAK,
3706             LEPCHA,
3707             OL_CHIKI,
3708             CYRILLIC_EXTENDED_C,
3709             GEORGIAN_EXTENDED,
3710             SUNDANESE_SUPPLEMENT,
3711             VEDIC_EXTENSIONS,
3712             PHONETIC_EXTENSIONS,
3713             PHONETIC_EXTENSIONS_SUPPLEMENT,
3714             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3715             LATIN_EXTENDED_ADDITIONAL,
3716             GREEK_EXTENDED,
3717             GENERAL_PUNCTUATION,
3718             SUPERSCRIPTS_AND_SUBSCRIPTS,
3719             CURRENCY_SYMBOLS,
3720             COMBINING_MARKS_FOR_SYMBOLS,
3721             LETTERLIKE_SYMBOLS,
3722             NUMBER_FORMS,
3723             ARROWS,
3724             MATHEMATICAL_OPERATORS,
3725             MISCELLANEOUS_TECHNICAL,
3726             CONTROL_PICTURES,
3727             OPTICAL_CHARACTER_RECOGNITION,
3728             ENCLOSED_ALPHANUMERICS,
3729             BOX_DRAWING,
3730             BLOCK_ELEMENTS,
3731             GEOMETRIC_SHAPES,
3732             MISCELLANEOUS_SYMBOLS,
3733             DINGBATS,
3734             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3735             SUPPLEMENTAL_ARROWS_A,
3736             BRAILLE_PATTERNS,
3737             SUPPLEMENTAL_ARROWS_B,
3738             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3739             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3740             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3741             GLAGOLITIC,
3742             LATIN_EXTENDED_C,
3743             COPTIC,
3744             GEORGIAN_SUPPLEMENT,
3745             TIFINAGH,
3746             ETHIOPIC_EXTENDED,
3747             CYRILLIC_EXTENDED_A,
3748             SUPPLEMENTAL_PUNCTUATION,
3749             CJK_RADICALS_SUPPLEMENT,
3750             KANGXI_RADICALS,
3751             null,
3752             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3753             CJK_SYMBOLS_AND_PUNCTUATION,
3754             HIRAGANA,
3755             KATAKANA,
3756             BOPOMOFO,
3757             HANGUL_COMPATIBILITY_JAMO,
3758             KANBUN,
3759             BOPOMOFO_EXTENDED,
3760             CJK_STROKES,
3761             KATAKANA_PHONETIC_EXTENSIONS,
3762             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3763             CJK_COMPATIBILITY,
3764             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3765             YIJING_HEXAGRAM_SYMBOLS,
3766             CJK_UNIFIED_IDEOGRAPHS,
3767             YI_SYLLABLES,
3768             YI_RADICALS,
3769             LISU,
3770             VAI,
3771             CYRILLIC_EXTENDED_B,
3772             BAMUM,
3773             MODIFIER_TONE_LETTERS,
3774             LATIN_EXTENDED_D,
3775             SYLOTI_NAGRI,
3776             COMMON_INDIC_NUMBER_FORMS,
3777             PHAGS_PA,
3778             SAURASHTRA,
3779             DEVANAGARI_EXTENDED,
3780             KAYAH_LI,
3781             REJANG,
3782             HANGUL_JAMO_EXTENDED_A,
3783             JAVANESE,
3784             MYANMAR_EXTENDED_B,
3785             CHAM,
3786             MYANMAR_EXTENDED_A,
3787             TAI_VIET,
3788             MEETEI_MAYEK_EXTENSIONS,
3789             ETHIOPIC_EXTENDED_A,
3790             LATIN_EXTENDED_E,
3791             CHEROKEE_SUPPLEMENT,
3792             MEETEI_MAYEK,
3793             HANGUL_SYLLABLES,
3794             HANGUL_JAMO_EXTENDED_B,
3795             HIGH_SURROGATES,
3796             HIGH_PRIVATE_USE_SURROGATES,
3797             LOW_SURROGATES,
3798             PRIVATE_USE_AREA,
3799             CJK_COMPATIBILITY_IDEOGRAPHS,
3800             ALPHABETIC_PRESENTATION_FORMS,
3801             ARABIC_PRESENTATION_FORMS_A,
3802             VARIATION_SELECTORS,
3803             VERTICAL_FORMS,
3804             COMBINING_HALF_MARKS,
3805             CJK_COMPATIBILITY_FORMS,
3806             SMALL_FORM_VARIANTS,
3807             ARABIC_PRESENTATION_FORMS_B,
3808             HALFWIDTH_AND_FULLWIDTH_FORMS,
3809             SPECIALS,
3810             LINEAR_B_SYLLABARY,
3811             LINEAR_B_IDEOGRAMS,
3812             AEGEAN_NUMBERS,
3813             ANCIENT_GREEK_NUMBERS,
3814             ANCIENT_SYMBOLS,
3815             PHAISTOS_DISC,
3816             null,
3817             LYCIAN,
3818             CARIAN,
3819             COPTIC_EPACT_NUMBERS,
3820             OLD_ITALIC,
3821             GOTHIC,
3822             OLD_PERMIC,
3823             UGARITIC,
3824             OLD_PERSIAN,
3825             null,
3826             DESERET,
3827             SHAVIAN,
3828             OSMANYA,
3829             OSAGE,
3830             ELBASAN,
3831             CAUCASIAN_ALBANIAN,
3832             null,
3833             LINEAR_A,
3834             null,
3835             CYPRIOT_SYLLABARY,
3836             IMPERIAL_ARAMAIC,
3837             PALMYRENE,
3838             NABATAEAN,
3839             null,
3840             HATRAN,
3841             PHOENICIAN,
3842             LYDIAN,
3843             null,
3844             MEROITIC_HIEROGLYPHS,
3845             MEROITIC_CURSIVE,
3846             KHAROSHTHI,
3847             OLD_SOUTH_ARABIAN,
3848             OLD_NORTH_ARABIAN,
3849             null,
3850             MANICHAEAN,
3851             AVESTAN,
3852             INSCRIPTIONAL_PARTHIAN,
3853             INSCRIPTIONAL_PAHLAVI,
3854             PSALTER_PAHLAVI,
3855             null,
3856             OLD_TURKIC,
3857             null,
3858             OLD_HUNGARIAN,
3859             HANIFI_ROHINGYA,
3860             null,
3861             RUMI_NUMERAL_SYMBOLS,
3862             null,
3863             OLD_SOGDIAN,
3864             SOGDIAN,
3865             null,
3866             ELYMAIC,
3867             BRAHMI,
3868             KAITHI,
3869             SORA_SOMPENG,
3870             CHAKMA,
3871             MAHAJANI,
3872             SHARADA,
3873             SINHALA_ARCHAIC_NUMBERS,
3874             KHOJKI,
3875             null,
3876             MULTANI,
3877             KHUDAWADI,
3878             GRANTHA,
3879             null,
3880             NEWA,
3881             TIRHUTA,
3882             null,
3883             SIDDHAM,
3884             MODI,
3885             MONGOLIAN_SUPPLEMENT,
3886             TAKRI,
3887             null,
3888             AHOM,
3889             null,
3890             DOGRA,
3891             null,
3892             WARANG_CITI,
3893             null,
3894             NANDINAGARI,
3895             ZANABAZAR_SQUARE,
3896             SOYOMBO,
3897             null,
3898             PAU_CIN_HAU,
3899             null,
3900             BHAIKSUKI,
3901             MARCHEN,
3902             null,
3903             MASARAM_GONDI,
3904             GUNJALA_GONDI,
3905             null,
3906             MAKASAR,
3907             null,
3908             TAMIL_SUPPLEMENT,
3909             CUNEIFORM,
3910             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3911             EARLY_DYNASTIC_CUNEIFORM,
3912             null,
3913             EGYPTIAN_HIEROGLYPHS,
3914             EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
3915             null,
3916             ANATOLIAN_HIEROGLYPHS,
3917             null,
3918             BAMUM_SUPPLEMENT,
3919             MRO,
3920             null,
3921             BASSA_VAH,
3922             PAHAWH_HMONG,
3923             null,
3924             MEDEFAIDRIN,
3925             null,
3926             MIAO,
3927             null,
3928             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3929             TANGUT,
3930             TANGUT_COMPONENTS,
3931             null,
3932             KANA_SUPPLEMENT,
3933             KANA_EXTENDED_A,
3934             SMALL_KANA_EXTENSION,
3935             NUSHU,
3936             null,
3937             DUPLOYAN,
3938             SHORTHAND_FORMAT_CONTROLS,
3939             null,
3940             BYZANTINE_MUSICAL_SYMBOLS,
3941             MUSICAL_SYMBOLS,
3942             ANCIENT_GREEK_MUSICAL_NOTATION,
3943             null,
3944             MAYAN_NUMERALS,
3945             TAI_XUAN_JING_SYMBOLS,
3946             COUNTING_ROD_NUMERALS,
3947             null,
3948             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3949             SUTTON_SIGNWRITING,
3950             null,
3951             GLAGOLITIC_SUPPLEMENT,
3952             null,
3953             NYIAKENG_PUACHUE_HMONG,
3954             null,
3955             WANCHO,
3956             null,
3957             MENDE_KIKAKUI,
3958             null,
3959             ADLAM,
3960             null,
3961             INDIC_SIYAQ_NUMBERS,
3962             null,
3963             OTTOMAN_SIYAQ_NUMBERS,
3964             null,
3965             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3966             null,
3967             MAHJONG_TILES,
3968             DOMINO_TILES,
3969             PLAYING_CARDS,
3970             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3971             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3972             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3973             EMOTICONS,
3974             ORNAMENTAL_DINGBATS,
3975             TRANSPORT_AND_MAP_SYMBOLS,
3976             ALCHEMICAL_SYMBOLS,
3977             GEOMETRIC_SHAPES_EXTENDED,
3978             SUPPLEMENTAL_ARROWS_C,
3979             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3980             CHESS_SYMBOLS,
3981             SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
3982             null,
3983             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3984             null,
3985             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3986             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3987             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3988             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3989             null,
3990             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3991             null,
3992             TAGS,
3993             null,
3994             VARIATION_SELECTORS_SUPPLEMENT,
3995             null,
3996             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3997             SUPPLEMENTARY_PRIVATE_USE_AREA_B,
3998         };
3999 
4000 
4001         /**
4002          * Returns the object representing the Unicode block containing the
4003          * given character, or {@code null} if the character is not a
4004          * member of a defined block.
4005          *
4006          * <p><b>Note:</b> This method cannot handle
4007          * <a href="Character.html#supplementary"> supplementary
4008          * characters</a>.  To support all Unicode characters, including
4009          * supplementary characters, use the {@link #of(int)} method.
4010          *
4011          * @param   c  The character in question
4012          * @return  The {@code UnicodeBlock} instance representing the
4013          *          Unicode block of which this character is a member, or
4014          *          {@code null} if the character is not a member of any
4015          *          Unicode block
4016          */
4017         public static UnicodeBlock of(char c) {
4018             return of((int)c);
4019         }
4020 
4021         /**
4022          * Returns the object representing the Unicode block
4023          * containing the given character (Unicode code point), or
4024          * {@code null} if the character is not a member of a
4025          * defined block.
4026          *
4027          * @param   codePoint the character (Unicode code point) in question.
4028          * @return  The {@code UnicodeBlock} instance representing the
4029          *          Unicode block of which this character is a member, or
4030          *          {@code null} if the character is not a member of any
4031          *          Unicode block
4032          * @throws  IllegalArgumentException if the specified
4033          * {@code codePoint} is an invalid Unicode code point.
4034          * @see Character#isValidCodePoint(int)
4035          * @since   1.5
4036          */
4037         public static UnicodeBlock of(int codePoint) {
4038             if (!isValidCodePoint(codePoint)) {
4039                 throw new IllegalArgumentException(
4040                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
4041             }
4042 
4043             int top, bottom, current;
4044             bottom = 0;
4045             top = blockStarts.length;
4046             current = top/2;
4047 
4048             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4049             while (top - bottom > 1) {
4050                 if (codePoint >= blockStarts[current]) {
4051                     bottom = current;
4052                 } else {
4053                     top = current;
4054                 }
4055                 current = (top + bottom) / 2;
4056             }
4057             return blocks[current];
4058         }
4059 
4060         /**
4061          * Returns the UnicodeBlock with the given name. Block
4062          * names are determined by The Unicode Standard. The file
4063          * {@code Blocks-<version>.txt} defines blocks for a particular
4064          * version of the standard. The {@link Character} class specifies
4065          * the version of the standard that it supports.
4066          * <p>
4067          * This method accepts block names in the following forms:
4068          * <ol>
4069          * <li> Canonical block names as defined by the Unicode Standard.
4070          * For example, the standard defines a "Basic Latin" block. Therefore, this
4071          * method accepts "Basic Latin" as a valid block name. The documentation of
4072          * each UnicodeBlock provides the canonical name.
4073          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4074          * is a valid block name for the "Basic Latin" block.
4075          * <li>The text representation of each constant UnicodeBlock identifier.
4076          * For example, this method will return the {@link #BASIC_LATIN} block if
4077          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4078          * hyphens in the canonical name with underscores.
4079          * </ol>
4080          * Finally, character case is ignored for all of the valid block name forms.
4081          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4082          * The en_US locale's case mapping rules are used to provide case-insensitive
4083          * string comparisons for block name validation.
4084          * <p>
4085          * If the Unicode Standard changes block names, both the previous and
4086          * current names will be accepted.
4087          *
4088          * @param blockName A {@code UnicodeBlock} name.
4089          * @return The {@code UnicodeBlock} instance identified
4090          *         by {@code blockName}
4091          * @throws IllegalArgumentException if {@code blockName} is an
4092          *         invalid name
4093          * @throws NullPointerException if {@code blockName} is null
4094          * @since 1.5
4095          */
4096         public static final UnicodeBlock forName(String blockName) {
4097             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4098             if (block == null) {
4099                 throw new IllegalArgumentException("Not a valid block name: "
4100                             + blockName);
4101             }
4102             return block;
4103         }
4104     }
4105 
4106 
4107     /**
4108      * A family of character subsets representing the character scripts
4109      * defined in the <a href="http://www.unicode.org/reports/tr24/">
4110      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4111      * character is assigned to a single Unicode script, either a specific
4112      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4113      * one of the following three special values,
4114      * {@link Character.UnicodeScript#INHERITED Inherited},
4115      * {@link Character.UnicodeScript#COMMON Common} or
4116      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4117      *
4118      * @since 1.7
4119      */
4120     public static enum UnicodeScript {
4121         /**
4122          * Unicode script "Common".
4123          */
4124         COMMON,
4125 
4126         /**
4127          * Unicode script "Latin".
4128          */
4129         LATIN,
4130 
4131         /**
4132          * Unicode script "Greek".
4133          */
4134         GREEK,
4135 
4136         /**
4137          * Unicode script "Cyrillic".
4138          */
4139         CYRILLIC,
4140 
4141         /**
4142          * Unicode script "Armenian".
4143          */
4144         ARMENIAN,
4145 
4146         /**
4147          * Unicode script "Hebrew".
4148          */
4149         HEBREW,
4150 
4151         /**
4152          * Unicode script "Arabic".
4153          */
4154         ARABIC,
4155 
4156         /**
4157          * Unicode script "Syriac".
4158          */
4159         SYRIAC,
4160 
4161         /**
4162          * Unicode script "Thaana".
4163          */
4164         THAANA,
4165 
4166         /**
4167          * Unicode script "Devanagari".
4168          */
4169         DEVANAGARI,
4170 
4171         /**
4172          * Unicode script "Bengali".
4173          */
4174         BENGALI,
4175 
4176         /**
4177          * Unicode script "Gurmukhi".
4178          */
4179         GURMUKHI,
4180 
4181         /**
4182          * Unicode script "Gujarati".
4183          */
4184         GUJARATI,
4185 
4186         /**
4187          * Unicode script "Oriya".
4188          */
4189         ORIYA,
4190 
4191         /**
4192          * Unicode script "Tamil".
4193          */
4194         TAMIL,
4195 
4196         /**
4197          * Unicode script "Telugu".
4198          */
4199         TELUGU,
4200 
4201         /**
4202          * Unicode script "Kannada".
4203          */
4204         KANNADA,
4205 
4206         /**
4207          * Unicode script "Malayalam".
4208          */
4209         MALAYALAM,
4210 
4211         /**
4212          * Unicode script "Sinhala".
4213          */
4214         SINHALA,
4215 
4216         /**
4217          * Unicode script "Thai".
4218          */
4219         THAI,
4220 
4221         /**
4222          * Unicode script "Lao".
4223          */
4224         LAO,
4225 
4226         /**
4227          * Unicode script "Tibetan".
4228          */
4229         TIBETAN,
4230 
4231         /**
4232          * Unicode script "Myanmar".
4233          */
4234         MYANMAR,
4235 
4236         /**
4237          * Unicode script "Georgian".
4238          */
4239         GEORGIAN,
4240 
4241         /**
4242          * Unicode script "Hangul".
4243          */
4244         HANGUL,
4245 
4246         /**
4247          * Unicode script "Ethiopic".
4248          */
4249         ETHIOPIC,
4250 
4251         /**
4252          * Unicode script "Cherokee".
4253          */
4254         CHEROKEE,
4255 
4256         /**
4257          * Unicode script "Canadian_Aboriginal".
4258          */
4259         CANADIAN_ABORIGINAL,
4260 
4261         /**
4262          * Unicode script "Ogham".
4263          */
4264         OGHAM,
4265 
4266         /**
4267          * Unicode script "Runic".
4268          */
4269         RUNIC,
4270 
4271         /**
4272          * Unicode script "Khmer".
4273          */
4274         KHMER,
4275 
4276         /**
4277          * Unicode script "Mongolian".
4278          */
4279         MONGOLIAN,
4280 
4281         /**
4282          * Unicode script "Hiragana".
4283          */
4284         HIRAGANA,
4285 
4286         /**
4287          * Unicode script "Katakana".
4288          */
4289         KATAKANA,
4290 
4291         /**
4292          * Unicode script "Bopomofo".
4293          */
4294         BOPOMOFO,
4295 
4296         /**
4297          * Unicode script "Han".
4298          */
4299         HAN,
4300 
4301         /**
4302          * Unicode script "Yi".
4303          */
4304         YI,
4305 
4306         /**
4307          * Unicode script "Old_Italic".
4308          */
4309         OLD_ITALIC,
4310 
4311         /**
4312          * Unicode script "Gothic".
4313          */
4314         GOTHIC,
4315 
4316         /**
4317          * Unicode script "Deseret".
4318          */
4319         DESERET,
4320 
4321         /**
4322          * Unicode script "Inherited".
4323          */
4324         INHERITED,
4325 
4326         /**
4327          * Unicode script "Tagalog".
4328          */
4329         TAGALOG,
4330 
4331         /**
4332          * Unicode script "Hanunoo".
4333          */
4334         HANUNOO,
4335 
4336         /**
4337          * Unicode script "Buhid".
4338          */
4339         BUHID,
4340 
4341         /**
4342          * Unicode script "Tagbanwa".
4343          */
4344         TAGBANWA,
4345 
4346         /**
4347          * Unicode script "Limbu".
4348          */
4349         LIMBU,
4350 
4351         /**
4352          * Unicode script "Tai_Le".
4353          */
4354         TAI_LE,
4355 
4356         /**
4357          * Unicode script "Linear_B".
4358          */
4359         LINEAR_B,
4360 
4361         /**
4362          * Unicode script "Ugaritic".
4363          */
4364         UGARITIC,
4365 
4366         /**
4367          * Unicode script "Shavian".
4368          */
4369         SHAVIAN,
4370 
4371         /**
4372          * Unicode script "Osmanya".
4373          */
4374         OSMANYA,
4375 
4376         /**
4377          * Unicode script "Cypriot".
4378          */
4379         CYPRIOT,
4380 
4381         /**
4382          * Unicode script "Braille".
4383          */
4384         BRAILLE,
4385 
4386         /**
4387          * Unicode script "Buginese".
4388          */
4389         BUGINESE,
4390 
4391         /**
4392          * Unicode script "Coptic".
4393          */
4394         COPTIC,
4395 
4396         /**
4397          * Unicode script "New_Tai_Lue".
4398          */
4399         NEW_TAI_LUE,
4400 
4401         /**
4402          * Unicode script "Glagolitic".
4403          */
4404         GLAGOLITIC,
4405 
4406         /**
4407          * Unicode script "Tifinagh".
4408          */
4409         TIFINAGH,
4410 
4411         /**
4412          * Unicode script "Syloti_Nagri".
4413          */
4414         SYLOTI_NAGRI,
4415 
4416         /**
4417          * Unicode script "Old_Persian".
4418          */
4419         OLD_PERSIAN,
4420 
4421         /**
4422          * Unicode script "Kharoshthi".
4423          */
4424         KHAROSHTHI,
4425 
4426         /**
4427          * Unicode script "Balinese".
4428          */
4429         BALINESE,
4430 
4431         /**
4432          * Unicode script "Cuneiform".
4433          */
4434         CUNEIFORM,
4435 
4436         /**
4437          * Unicode script "Phoenician".
4438          */
4439         PHOENICIAN,
4440 
4441         /**
4442          * Unicode script "Phags_Pa".
4443          */
4444         PHAGS_PA,
4445 
4446         /**
4447          * Unicode script "Nko".
4448          */
4449         NKO,
4450 
4451         /**
4452          * Unicode script "Sundanese".
4453          */
4454         SUNDANESE,
4455 
4456         /**
4457          * Unicode script "Batak".
4458          */
4459         BATAK,
4460 
4461         /**
4462          * Unicode script "Lepcha".
4463          */
4464         LEPCHA,
4465 
4466         /**
4467          * Unicode script "Ol_Chiki".
4468          */
4469         OL_CHIKI,
4470 
4471         /**
4472          * Unicode script "Vai".
4473          */
4474         VAI,
4475 
4476         /**
4477          * Unicode script "Saurashtra".
4478          */
4479         SAURASHTRA,
4480 
4481         /**
4482          * Unicode script "Kayah_Li".
4483          */
4484         KAYAH_LI,
4485 
4486         /**
4487          * Unicode script "Rejang".
4488          */
4489         REJANG,
4490 
4491         /**
4492          * Unicode script "Lycian".
4493          */
4494         LYCIAN,
4495 
4496         /**
4497          * Unicode script "Carian".
4498          */
4499         CARIAN,
4500 
4501         /**
4502          * Unicode script "Lydian".
4503          */
4504         LYDIAN,
4505 
4506         /**
4507          * Unicode script "Cham".
4508          */
4509         CHAM,
4510 
4511         /**
4512          * Unicode script "Tai_Tham".
4513          */
4514         TAI_THAM,
4515 
4516         /**
4517          * Unicode script "Tai_Viet".
4518          */
4519         TAI_VIET,
4520 
4521         /**
4522          * Unicode script "Avestan".
4523          */
4524         AVESTAN,
4525 
4526         /**
4527          * Unicode script "Egyptian_Hieroglyphs".
4528          */
4529         EGYPTIAN_HIEROGLYPHS,
4530 
4531         /**
4532          * Unicode script "Samaritan".
4533          */
4534         SAMARITAN,
4535 
4536         /**
4537          * Unicode script "Mandaic".
4538          */
4539         MANDAIC,
4540 
4541         /**
4542          * Unicode script "Lisu".
4543          */
4544         LISU,
4545 
4546         /**
4547          * Unicode script "Bamum".
4548          */
4549         BAMUM,
4550 
4551         /**
4552          * Unicode script "Javanese".
4553          */
4554         JAVANESE,
4555 
4556         /**
4557          * Unicode script "Meetei_Mayek".
4558          */
4559         MEETEI_MAYEK,
4560 
4561         /**
4562          * Unicode script "Imperial_Aramaic".
4563          */
4564         IMPERIAL_ARAMAIC,
4565 
4566         /**
4567          * Unicode script "Old_South_Arabian".
4568          */
4569         OLD_SOUTH_ARABIAN,
4570 
4571         /**
4572          * Unicode script "Inscriptional_Parthian".
4573          */
4574         INSCRIPTIONAL_PARTHIAN,
4575 
4576         /**
4577          * Unicode script "Inscriptional_Pahlavi".
4578          */
4579         INSCRIPTIONAL_PAHLAVI,
4580 
4581         /**
4582          * Unicode script "Old_Turkic".
4583          */
4584         OLD_TURKIC,
4585 
4586         /**
4587          * Unicode script "Brahmi".
4588          */
4589         BRAHMI,
4590 
4591         /**
4592          * Unicode script "Kaithi".
4593          */
4594         KAITHI,
4595 
4596         /**
4597          * Unicode script "Meroitic Hieroglyphs".
4598          * @since 1.8
4599          */
4600         MEROITIC_HIEROGLYPHS,
4601 
4602         /**
4603          * Unicode script "Meroitic Cursive".
4604          * @since 1.8
4605          */
4606         MEROITIC_CURSIVE,
4607 
4608         /**
4609          * Unicode script "Sora Sompeng".
4610          * @since 1.8
4611          */
4612         SORA_SOMPENG,
4613 
4614         /**
4615          * Unicode script "Chakma".
4616          * @since 1.8
4617          */
4618         CHAKMA,
4619 
4620         /**
4621          * Unicode script "Sharada".
4622          * @since 1.8
4623          */
4624         SHARADA,
4625 
4626         /**
4627          * Unicode script "Takri".
4628          * @since 1.8
4629          */
4630         TAKRI,
4631 
4632         /**
4633          * Unicode script "Miao".
4634          * @since 1.8
4635          */
4636         MIAO,
4637 
4638         /**
4639          * Unicode script "Caucasian Albanian".
4640          * @since 9
4641          */
4642         CAUCASIAN_ALBANIAN,
4643 
4644         /**
4645          * Unicode script "Bassa Vah".
4646          * @since 9
4647          */
4648         BASSA_VAH,
4649 
4650         /**
4651          * Unicode script "Duployan".
4652          * @since 9
4653          */
4654         DUPLOYAN,
4655 
4656         /**
4657          * Unicode script "Elbasan".
4658          * @since 9
4659          */
4660         ELBASAN,
4661 
4662         /**
4663          * Unicode script "Grantha".
4664          * @since 9
4665          */
4666         GRANTHA,
4667 
4668         /**
4669          * Unicode script "Pahawh Hmong".
4670          * @since 9
4671          */
4672         PAHAWH_HMONG,
4673 
4674         /**
4675          * Unicode script "Khojki".
4676          * @since 9
4677          */
4678         KHOJKI,
4679 
4680         /**
4681          * Unicode script "Linear A".
4682          * @since 9
4683          */
4684         LINEAR_A,
4685 
4686         /**
4687          * Unicode script "Mahajani".
4688          * @since 9
4689          */
4690         MAHAJANI,
4691 
4692         /**
4693          * Unicode script "Manichaean".
4694          * @since 9
4695          */
4696         MANICHAEAN,
4697 
4698         /**
4699          * Unicode script "Mende Kikakui".
4700          * @since 9
4701          */
4702         MENDE_KIKAKUI,
4703 
4704         /**
4705          * Unicode script "Modi".
4706          * @since 9
4707          */
4708         MODI,
4709 
4710         /**
4711          * Unicode script "Mro".
4712          * @since 9
4713          */
4714         MRO,
4715 
4716         /**
4717          * Unicode script "Old North Arabian".
4718          * @since 9
4719          */
4720         OLD_NORTH_ARABIAN,
4721 
4722         /**
4723          * Unicode script "Nabataean".
4724          * @since 9
4725          */
4726         NABATAEAN,
4727 
4728         /**
4729          * Unicode script "Palmyrene".
4730          * @since 9
4731          */
4732         PALMYRENE,
4733 
4734         /**
4735          * Unicode script "Pau Cin Hau".
4736          * @since 9
4737          */
4738         PAU_CIN_HAU,
4739 
4740         /**
4741          * Unicode script "Old Permic".
4742          * @since 9
4743          */
4744         OLD_PERMIC,
4745 
4746         /**
4747          * Unicode script "Psalter Pahlavi".
4748          * @since 9
4749          */
4750         PSALTER_PAHLAVI,
4751 
4752         /**
4753          * Unicode script "Siddham".
4754          * @since 9
4755          */
4756         SIDDHAM,
4757 
4758         /**
4759          * Unicode script "Khudawadi".
4760          * @since 9
4761          */
4762         KHUDAWADI,
4763 
4764         /**
4765          * Unicode script "Tirhuta".
4766          * @since 9
4767          */
4768         TIRHUTA,
4769 
4770         /**
4771          * Unicode script "Warang Citi".
4772          * @since 9
4773          */
4774         WARANG_CITI,
4775 
4776          /**
4777          * Unicode script "Ahom".
4778          * @since 9
4779          */
4780         AHOM,
4781 
4782         /**
4783          * Unicode script "Anatolian Hieroglyphs".
4784          * @since 9
4785          */
4786         ANATOLIAN_HIEROGLYPHS,
4787 
4788         /**
4789          * Unicode script "Hatran".
4790          * @since 9
4791          */
4792         HATRAN,
4793 
4794         /**
4795          * Unicode script "Multani".
4796          * @since 9
4797          */
4798         MULTANI,
4799 
4800         /**
4801          * Unicode script "Old Hungarian".
4802          * @since 9
4803          */
4804         OLD_HUNGARIAN,
4805 
4806         /**
4807          * Unicode script "SignWriting".
4808          * @since 9
4809          */
4810         SIGNWRITING,
4811 
4812         /**
4813          * Unicode script "Adlam".
4814          * @since 11
4815          */
4816         ADLAM,
4817 
4818         /**
4819          * Unicode script "Bhaiksuki".
4820          * @since 11
4821          */
4822         BHAIKSUKI,
4823 
4824         /**
4825          * Unicode script "Marchen".
4826          * @since 11
4827          */
4828         MARCHEN,
4829 
4830         /**
4831          * Unicode script "Newa".
4832          * @since 11
4833          */
4834         NEWA,
4835 
4836         /**
4837          * Unicode script "Osage".
4838          * @since 11
4839          */
4840         OSAGE,
4841 
4842         /**
4843          * Unicode script "Tangut".
4844          * @since 11
4845          */
4846         TANGUT,
4847 
4848         /**
4849          * Unicode script "Masaram Gondi".
4850          * @since 11
4851          */
4852         MASARAM_GONDI,
4853 
4854         /**
4855          * Unicode script "Nushu".
4856          * @since 11
4857          */
4858         NUSHU,
4859 
4860         /**
4861          * Unicode script "Soyombo".
4862          * @since 11
4863          */
4864         SOYOMBO,
4865 
4866         /**
4867          * Unicode script "Zanabazar Square".
4868          * @since 11
4869          */
4870         ZANABAZAR_SQUARE,
4871 
4872         /**
4873          * Unicode script "Hanifi Rohingya".
4874          * @since 12
4875          */
4876         HANIFI_ROHINGYA,
4877 
4878         /**
4879          * Unicode script "Old Sogdian".
4880          * @since 12
4881          */
4882         OLD_SOGDIAN,
4883 
4884         /**
4885          * Unicode script "Sogdian".
4886          * @since 12
4887          */
4888         SOGDIAN,
4889 
4890         /**
4891          * Unicode script "Dogra".
4892          * @since 12
4893          */
4894         DOGRA,
4895 
4896         /**
4897          * Unicode script "Gunjala Gondi".
4898          * @since 12
4899          */
4900         GUNJALA_GONDI,
4901 
4902         /**
4903          * Unicode script "Makasar".
4904          * @since 12
4905          */
4906         MAKASAR,
4907 
4908         /**
4909          * Unicode script "Medefaidrin".
4910          * @since 12
4911          */
4912         MEDEFAIDRIN,
4913 
4914         /**
4915          * Unicode script "Elymaic".
4916          * @since 13
4917          */
4918         ELYMAIC,
4919 
4920         /**
4921          * Unicode script "Nandinagari".
4922          * @since 13
4923          */
4924         NANDINAGARI,
4925 
4926         /**
4927          * Unicode script "Nyiakeng Puachue Hmong".
4928          * @since 13
4929          */
4930         NYIAKENG_PUACHUE_HMONG,
4931 
4932         /**
4933          * Unicode script "Wancho".
4934          * @since 13
4935          */
4936         WANCHO,
4937 
4938         /**
4939          * Unicode script "Unknown".
4940          */
4941         UNKNOWN;
4942 
4943         private static final int[] scriptStarts = {
4944             0x0000,   // 0000..0040; COMMON
4945             0x0041,   // 0041..005A; LATIN
4946             0x005B,   // 005B..0060; COMMON
4947             0x0061,   // 0061..007A; LATIN
4948             0x007B,   // 007B..00A9; COMMON
4949             0x00AA,   // 00AA      ; LATIN
4950             0x00AB,   // 00AB..00B9; COMMON
4951             0x00BA,   // 00BA      ; LATIN
4952             0x00BB,   // 00BB..00BF; COMMON
4953             0x00C0,   // 00C0..00D6; LATIN
4954             0x00D7,   // 00D7      ; COMMON
4955             0x00D8,   // 00D8..00F6; LATIN
4956             0x00F7,   // 00F7      ; COMMON
4957             0x00F8,   // 00F8..02B8; LATIN
4958             0x02B9,   // 02B9..02DF; COMMON
4959             0x02E0,   // 02E0..02E4; LATIN
4960             0x02E5,   // 02E5..02E9; COMMON
4961             0x02EA,   // 02EA..02EB; BOPOMOFO
4962             0x02EC,   // 02EC..02FF; COMMON
4963             0x0300,   // 0300..036F; INHERITED
4964             0x0370,   // 0370..0373; GREEK
4965             0x0374,   // 0374      ; COMMON
4966             0x0375,   // 0375..0377; GREEK
4967             0x0378,   // 0378..0379; UNKNOWN
4968             0x037A,   // 037A..037D; GREEK
4969             0x037E,   // 037E      ; COMMON
4970             0x037F,   // 037F      ; GREEK
4971             0x0380,   // 0380..0383; UNKNOWN
4972             0x0384,   // 0384      ; GREEK
4973             0x0385,   // 0385      ; COMMON
4974             0x0386,   // 0386      ; GREEK
4975             0x0387,   // 0387      ; COMMON
4976             0x0388,   // 0388..038A; GREEK
4977             0x038B,   // 038B      ; UNKNOWN
4978             0x038C,   // 038C      ; GREEK
4979             0x038D,   // 038D      ; UNKNOWN
4980             0x038E,   // 038E..03A1; GREEK
4981             0x03A2,   // 03A2      ; UNKNOWN
4982             0x03A3,   // 03A3..03E1; GREEK
4983             0x03E2,   // 03E2..03EF; COPTIC
4984             0x03F0,   // 03F0..03FF; GREEK
4985             0x0400,   // 0400..0484; CYRILLIC
4986             0x0485,   // 0485..0486; INHERITED
4987             0x0487,   // 0487..052F; CYRILLIC
4988             0x0530,   // 0530      ; UNKNOWN
4989             0x0531,   // 0531..0556; ARMENIAN
4990             0x0557,   // 0557..0558; UNKNOWN
4991             0x0559,   // 0559..0588; ARMENIAN
4992             0x0589,   // 0589      ; COMMON
4993             0x058A,   // 058A      ; ARMENIAN
4994             0x058B,   // 058B..058C; UNKNOWN
4995             0x058D,   // 058D..058F; ARMENIAN
4996             0x0590,   // 0590      ; UNKNOWN
4997             0x0591,   // 0591..05C7; HEBREW
4998             0x05C8,   // 05C8..05CF; UNKNOWN
4999             0x05D0,   // 05D0..05EA; HEBREW
5000             0x05EB,   // 05EB..05EE; UNKNOWN
5001             0x05EF,   // 05EF..05F4; HEBREW
5002             0x05F5,   // 05F5..05FF; UNKNOWN
5003             0x0600,   // 0600..0604; ARABIC
5004             0x0605,   // 0605      ; COMMON
5005             0x0606,   // 0606..060B; ARABIC
5006             0x060C,   // 060C      ; COMMON
5007             0x060D,   // 060D..061A; ARABIC
5008             0x061B,   // 061B      ; COMMON
5009             0x061C,   // 061C      ; ARABIC
5010             0x061D,   // 061D      ; UNKNOWN
5011             0x061E,   // 061E      ; ARABIC
5012             0x061F,   // 061F      ; COMMON
5013             0x0620,   // 0620..063F; ARABIC
5014             0x0640,   // 0640      ; COMMON
5015             0x0641,   // 0641..064A; ARABIC
5016             0x064B,   // 064B..0655; INHERITED
5017             0x0656,   // 0656..066F; ARABIC
5018             0x0670,   // 0670      ; INHERITED
5019             0x0671,   // 0671..06DC; ARABIC
5020             0x06DD,   // 06DD      ; COMMON
5021             0x06DE,   // 06DE..06FF; ARABIC
5022             0x0700,   // 0700..070D; SYRIAC
5023             0x070E,   // 070E      ; UNKNOWN
5024             0x070F,   // 070F..074A; SYRIAC
5025             0x074B,   // 074B..074C; UNKNOWN
5026             0x074D,   // 074D..074F; SYRIAC
5027             0x0750,   // 0750..077F; ARABIC
5028             0x0780,   // 0780..07B1; THAANA
5029             0x07B2,   // 07B2..07BF; UNKNOWN
5030             0x07C0,   // 07C0..07FA; NKO
5031             0x07FB,   // 07FB..07FC; UNKNOWN
5032             0x07FD,   // 07FD..07FF; NKO
5033             0x0800,   // 0800..082D; SAMARITAN
5034             0x082E,   // 082E..082F; UNKNOWN
5035             0x0830,   // 0830..083E; SAMARITAN
5036             0x083F,   // 083F      ; UNKNOWN
5037             0x0840,   // 0840..085B; MANDAIC
5038             0x085C,   // 085C..085D; UNKNOWN
5039             0x085E,   // 085E      ; MANDAIC
5040             0x085F,   // 085F      ; UNKNOWN
5041             0x0860,   // 0860..086A; SYRIAC
5042             0x086B,   // 086B..089F; UNKNOWN
5043             0x08A0,   // 08A0..08B4; ARABIC
5044             0x08B5,   // 08B5      ; UNKNOWN
5045             0x08B6,   // 08B6..08BD; ARABIC
5046             0x08BE,   // 08BE..08D2; UNKNOWN
5047             0x08D3,   // 08D3..08E1; ARABIC
5048             0x08E2,   // 08E2      ; COMMON
5049             0x08E3,   // 08E3..08FF; ARABIC
5050             0x0900,   // 0900..0950; DEVANAGARI
5051             0x0951,   // 0951..0954; INHERITED
5052             0x0955,   // 0955..0963; DEVANAGARI
5053             0x0964,   // 0964..0965; COMMON
5054             0x0966,   // 0966..097F; DEVANAGARI
5055             0x0980,   // 0980..0983; BENGALI
5056             0x0984,   // 0984      ; UNKNOWN
5057             0x0985,   // 0985..098C; BENGALI
5058             0x098D,   // 098D..098E; UNKNOWN
5059             0x098F,   // 098F..0990; BENGALI
5060             0x0991,   // 0991..0992; UNKNOWN
5061             0x0993,   // 0993..09A8; BENGALI
5062             0x09A9,   // 09A9      ; UNKNOWN
5063             0x09AA,   // 09AA..09B0; BENGALI
5064             0x09B1,   // 09B1      ; UNKNOWN
5065             0x09B2,   // 09B2      ; BENGALI
5066             0x09B3,   // 09B3..09B5; UNKNOWN
5067             0x09B6,   // 09B6..09B9; BENGALI
5068             0x09BA,   // 09BA..09BB; UNKNOWN
5069             0x09BC,   // 09BC..09C4; BENGALI
5070             0x09C5,   // 09C5..09C6; UNKNOWN
5071             0x09C7,   // 09C7..09C8; BENGALI
5072             0x09C9,   // 09C9..09CA; UNKNOWN
5073             0x09CB,   // 09CB..09CE; BENGALI
5074             0x09CF,   // 09CF..09D6; UNKNOWN
5075             0x09D7,   // 09D7      ; BENGALI
5076             0x09D8,   // 09D8..09DB; UNKNOWN
5077             0x09DC,   // 09DC..09DD; BENGALI
5078             0x09DE,   // 09DE      ; UNKNOWN
5079             0x09DF,   // 09DF..09E3; BENGALI
5080             0x09E4,   // 09E4..09E5; UNKNOWN
5081             0x09E6,   // 09E6..09FE; BENGALI
5082             0x09FF,   // 09FF..0A00; UNKNOWN
5083             0x0A01,   // 0A01..0A03; GURMUKHI
5084             0x0A04,   // 0A04      ; UNKNOWN
5085             0x0A05,   // 0A05..0A0A; GURMUKHI
5086             0x0A0B,   // 0A0B..0A0E; UNKNOWN
5087             0x0A0F,   // 0A0F..0A10; GURMUKHI
5088             0x0A11,   // 0A11..0A12; UNKNOWN
5089             0x0A13,   // 0A13..0A28; GURMUKHI
5090             0x0A29,   // 0A29      ; UNKNOWN
5091             0x0A2A,   // 0A2A..0A30; GURMUKHI
5092             0x0A31,   // 0A31      ; UNKNOWN
5093             0x0A32,   // 0A32..0A33; GURMUKHI
5094             0x0A34,   // 0A34      ; UNKNOWN
5095             0x0A35,   // 0A35..0A36; GURMUKHI
5096             0x0A37,   // 0A37      ; UNKNOWN
5097             0x0A38,   // 0A38..0A39; GURMUKHI
5098             0x0A3A,   // 0A3A..0A3B; UNKNOWN
5099             0x0A3C,   // 0A3C      ; GURMUKHI
5100             0x0A3D,   // 0A3D      ; UNKNOWN
5101             0x0A3E,   // 0A3E..0A42; GURMUKHI
5102             0x0A43,   // 0A43..0A46; UNKNOWN
5103             0x0A47,   // 0A47..0A48; GURMUKHI
5104             0x0A49,   // 0A49..0A4A; UNKNOWN
5105             0x0A4B,   // 0A4B..0A4D; GURMUKHI
5106             0x0A4E,   // 0A4E..0A50; UNKNOWN
5107             0x0A51,   // 0A51      ; GURMUKHI
5108             0x0A52,   // 0A52..0A58; UNKNOWN
5109             0x0A59,   // 0A59..0A5C; GURMUKHI
5110             0x0A5D,   // 0A5D      ; UNKNOWN
5111             0x0A5E,   // 0A5E      ; GURMUKHI
5112             0x0A5F,   // 0A5F..0A65; UNKNOWN
5113             0x0A66,   // 0A66..0A76; GURMUKHI
5114             0x0A77,   // 0A77..0A80; UNKNOWN
5115             0x0A81,   // 0A81..0A83; GUJARATI
5116             0x0A84,   // 0A84      ; UNKNOWN
5117             0x0A85,   // 0A85..0A8D; GUJARATI
5118             0x0A8E,   // 0A8E      ; UNKNOWN
5119             0x0A8F,   // 0A8F..0A91; GUJARATI
5120             0x0A92,   // 0A92      ; UNKNOWN
5121             0x0A93,   // 0A93..0AA8; GUJARATI
5122             0x0AA9,   // 0AA9      ; UNKNOWN
5123             0x0AAA,   // 0AAA..0AB0; GUJARATI
5124             0x0AB1,   // 0AB1      ; UNKNOWN
5125             0x0AB2,   // 0AB2..0AB3; GUJARATI
5126             0x0AB4,   // 0AB4      ; UNKNOWN
5127             0x0AB5,   // 0AB5..0AB9; GUJARATI
5128             0x0ABA,   // 0ABA..0ABB; UNKNOWN
5129             0x0ABC,   // 0ABC..0AC5; GUJARATI
5130             0x0AC6,   // 0AC6      ; UNKNOWN
5131             0x0AC7,   // 0AC7..0AC9; GUJARATI
5132             0x0ACA,   // 0ACA      ; UNKNOWN
5133             0x0ACB,   // 0ACB..0ACD; GUJARATI
5134             0x0ACE,   // 0ACE..0ACF; UNKNOWN
5135             0x0AD0,   // 0AD0      ; GUJARATI
5136             0x0AD1,   // 0AD1..0ADF; UNKNOWN
5137             0x0AE0,   // 0AE0..0AE3; GUJARATI
5138             0x0AE4,   // 0AE4..0AE5; UNKNOWN
5139             0x0AE6,   // 0AE6..0AF1; GUJARATI
5140             0x0AF2,   // 0AF2..0AF8; UNKNOWN
5141             0x0AF9,   // 0AF9..0AFF; GUJARATI
5142             0x0B00,   // 0B00      ; UNKNOWN
5143             0x0B01,   // 0B01..0B03; ORIYA
5144             0x0B04,   // 0B04      ; UNKNOWN
5145             0x0B05,   // 0B05..0B0C; ORIYA
5146             0x0B0D,   // 0B0D..0B0E; UNKNOWN
5147             0x0B0F,   // 0B0F..0B10; ORIYA
5148             0x0B11,   // 0B11..0B12; UNKNOWN
5149             0x0B13,   // 0B13..0B28; ORIYA
5150             0x0B29,   // 0B29      ; UNKNOWN
5151             0x0B2A,   // 0B2A..0B30; ORIYA
5152             0x0B31,   // 0B31      ; UNKNOWN
5153             0x0B32,   // 0B32..0B33; ORIYA
5154             0x0B34,   // 0B34      ; UNKNOWN
5155             0x0B35,   // 0B35..0B39; ORIYA
5156             0x0B3A,   // 0B3A..0B3B; UNKNOWN
5157             0x0B3C,   // 0B3C..0B44; ORIYA
5158             0x0B45,   // 0B45..0B46; UNKNOWN
5159             0x0B47,   // 0B47..0B48; ORIYA
5160             0x0B49,   // 0B49..0B4A; UNKNOWN
5161             0x0B4B,   // 0B4B..0B4D; ORIYA
5162             0x0B4E,   // 0B4E..0B55; UNKNOWN
5163             0x0B56,   // 0B56..0B57; ORIYA
5164             0x0B58,   // 0B58..0B5B; UNKNOWN
5165             0x0B5C,   // 0B5C..0B5D; ORIYA
5166             0x0B5E,   // 0B5E      ; UNKNOWN
5167             0x0B5F,   // 0B5F..0B63; ORIYA
5168             0x0B64,   // 0B64..0B65; UNKNOWN
5169             0x0B66,   // 0B66..0B77; ORIYA
5170             0x0B78,   // 0B78..0B81; UNKNOWN
5171             0x0B82,   // 0B82..0B83; TAMIL
5172             0x0B84,   // 0B84      ; UNKNOWN
5173             0x0B85,   // 0B85..0B8A; TAMIL
5174             0x0B8B,   // 0B8B..0B8D; UNKNOWN
5175             0x0B8E,   // 0B8E..0B90; TAMIL
5176             0x0B91,   // 0B91      ; UNKNOWN
5177             0x0B92,   // 0B92..0B95; TAMIL
5178             0x0B96,   // 0B96..0B98; UNKNOWN
5179             0x0B99,   // 0B99..0B9A; TAMIL
5180             0x0B9B,   // 0B9B      ; UNKNOWN
5181             0x0B9C,   // 0B9C      ; TAMIL
5182             0x0B9D,   // 0B9D      ; UNKNOWN
5183             0x0B9E,   // 0B9E..0B9F; TAMIL
5184             0x0BA0,   // 0BA0..0BA2; UNKNOWN
5185             0x0BA3,   // 0BA3..0BA4; TAMIL
5186             0x0BA5,   // 0BA5..0BA7; UNKNOWN
5187             0x0BA8,   // 0BA8..0BAA; TAMIL
5188             0x0BAB,   // 0BAB..0BAD; UNKNOWN
5189             0x0BAE,   // 0BAE..0BB9; TAMIL
5190             0x0BBA,   // 0BBA..0BBD; UNKNOWN
5191             0x0BBE,   // 0BBE..0BC2; TAMIL
5192             0x0BC3,   // 0BC3..0BC5; UNKNOWN
5193             0x0BC6,   // 0BC6..0BC8; TAMIL
5194             0x0BC9,   // 0BC9      ; UNKNOWN
5195             0x0BCA,   // 0BCA..0BCD; TAMIL
5196             0x0BCE,   // 0BCE..0BCF; UNKNOWN
5197             0x0BD0,   // 0BD0      ; TAMIL
5198             0x0BD1,   // 0BD1..0BD6; UNKNOWN
5199             0x0BD7,   // 0BD7      ; TAMIL
5200             0x0BD8,   // 0BD8..0BE5; UNKNOWN
5201             0x0BE6,   // 0BE6..0BFA; TAMIL
5202             0x0BFB,   // 0BFB..0BFF; UNKNOWN
5203             0x0C00,   // 0C00..0C0C; TELUGU
5204             0x0C0D,   // 0C0D      ; UNKNOWN
5205             0x0C0E,   // 0C0E..0C10; TELUGU
5206             0x0C11,   // 0C11      ; UNKNOWN
5207             0x0C12,   // 0C12..0C28; TELUGU
5208             0x0C29,   // 0C29      ; UNKNOWN
5209             0x0C2A,   // 0C2A..0C39; TELUGU
5210             0x0C3A,   // 0C3A..0C3C; UNKNOWN
5211             0x0C3D,   // 0C3D..0C44; TELUGU
5212             0x0C45,   // 0C45      ; UNKNOWN
5213             0x0C46,   // 0C46..0C48; TELUGU
5214             0x0C49,   // 0C49      ; UNKNOWN
5215             0x0C4A,   // 0C4A..0C4D; TELUGU
5216             0x0C4E,   // 0C4E..0C54; UNKNOWN
5217             0x0C55,   // 0C55..0C56; TELUGU
5218             0x0C57,   // 0C57      ; UNKNOWN
5219             0x0C58,   // 0C58..0C5A; TELUGU
5220             0x0C5B,   // 0C5B..0C5F; UNKNOWN
5221             0x0C60,   // 0C60..0C63; TELUGU
5222             0x0C64,   // 0C64..0C65; UNKNOWN
5223             0x0C66,   // 0C66..0C6F; TELUGU
5224             0x0C70,   // 0C70..0C76; UNKNOWN
5225             0x0C77,   // 0C77..0C7F; TELUGU
5226             0x0C80,   // 0C80..0C8C; KANNADA
5227             0x0C8D,   // 0C8D      ; UNKNOWN
5228             0x0C8E,   // 0C8E..0C90; KANNADA
5229             0x0C91,   // 0C91      ; UNKNOWN
5230             0x0C92,   // 0C92..0CA8; KANNADA
5231             0x0CA9,   // 0CA9      ; UNKNOWN
5232             0x0CAA,   // 0CAA..0CB3; KANNADA
5233             0x0CB4,   // 0CB4      ; UNKNOWN
5234             0x0CB5,   // 0CB5..0CB9; KANNADA
5235             0x0CBA,   // 0CBA..0CBB; UNKNOWN
5236             0x0CBC,   // 0CBC..0CC4; KANNADA
5237             0x0CC5,   // 0CC5      ; UNKNOWN
5238             0x0CC6,   // 0CC6..0CC8; KANNADA
5239             0x0CC9,   // 0CC9      ; UNKNOWN
5240             0x0CCA,   // 0CCA..0CCD; KANNADA
5241             0x0CCE,   // 0CCE..0CD4; UNKNOWN
5242             0x0CD5,   // 0CD5..0CD6; KANNADA
5243             0x0CD7,   // 0CD7..0CDD; UNKNOWN
5244             0x0CDE,   // 0CDE      ; KANNADA
5245             0x0CDF,   // 0CDF      ; UNKNOWN
5246             0x0CE0,   // 0CE0..0CE3; KANNADA
5247             0x0CE4,   // 0CE4..0CE5; UNKNOWN
5248             0x0CE6,   // 0CE6..0CEF; KANNADA
5249             0x0CF0,   // 0CF0      ; UNKNOWN
5250             0x0CF1,   // 0CF1..0CF2; KANNADA
5251             0x0CF3,   // 0CF3..0CFF; UNKNOWN
5252             0x0D00,   // 0D00..0D03; MALAYALAM
5253             0x0D04,   // 0D04      ; UNKNOWN
5254             0x0D05,   // 0D05..0D0C; MALAYALAM
5255             0x0D0D,   // 0D0D      ; UNKNOWN
5256             0x0D0E,   // 0D0E..0D10; MALAYALAM
5257             0x0D11,   // 0D11      ; UNKNOWN
5258             0x0D12,   // 0D12..0D44; MALAYALAM
5259             0x0D45,   // 0D45      ; UNKNOWN
5260             0x0D46,   // 0D46..0D48; MALAYALAM
5261             0x0D49,   // 0D49      ; UNKNOWN
5262             0x0D4A,   // 0D4A..0D4F; MALAYALAM
5263             0x0D50,   // 0D50..0D53; UNKNOWN
5264             0x0D54,   // 0D54..0D63; MALAYALAM
5265             0x0D64,   // 0D64..0D65; UNKNOWN
5266             0x0D66,   // 0D66..0D7F; MALAYALAM
5267             0x0D80,   // 0D80..0D81; UNKNOWN
5268             0x0D82,   // 0D82..0D83; SINHALA
5269             0x0D84,   // 0D84      ; UNKNOWN
5270             0x0D85,   // 0D85..0D96; SINHALA
5271             0x0D97,   // 0D97..0D99; UNKNOWN
5272             0x0D9A,   // 0D9A..0DB1; SINHALA
5273             0x0DB2,   // 0DB2      ; UNKNOWN
5274             0x0DB3,   // 0DB3..0DBB; SINHALA
5275             0x0DBC,   // 0DBC      ; UNKNOWN
5276             0x0DBD,   // 0DBD      ; SINHALA
5277             0x0DBE,   // 0DBE..0DBF; UNKNOWN
5278             0x0DC0,   // 0DC0..0DC6; SINHALA
5279             0x0DC7,   // 0DC7..0DC9; UNKNOWN
5280             0x0DCA,   // 0DCA      ; SINHALA
5281             0x0DCB,   // 0DCB..0DCE; UNKNOWN
5282             0x0DCF,   // 0DCF..0DD4; SINHALA
5283             0x0DD5,   // 0DD5      ; UNKNOWN
5284             0x0DD6,   // 0DD6      ; SINHALA
5285             0x0DD7,   // 0DD7      ; UNKNOWN
5286             0x0DD8,   // 0DD8..0DDF; SINHALA
5287             0x0DE0,   // 0DE0..0DE5; UNKNOWN
5288             0x0DE6,   // 0DE6..0DEF; SINHALA
5289             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5290             0x0DF2,   // 0DF2..0DF4; SINHALA
5291             0x0DF5,   // 0DF5..0E00; UNKNOWN
5292             0x0E01,   // 0E01..0E3A; THAI
5293             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5294             0x0E3F,   // 0E3F      ; COMMON
5295             0x0E40,   // 0E40..0E5B; THAI
5296             0x0E5C,   // 0E5C..0E80; UNKNOWN
5297             0x0E81,   // 0E81..0E82; LAO
5298             0x0E83,   // 0E83      ; UNKNOWN
5299             0x0E84,   // 0E84      ; LAO
5300             0x0E85,   // 0E85      ; UNKNOWN
5301             0x0E86,   // 0E86..0E8A; LAO
5302             0x0E8B,   // 0E8B      ; UNKNOWN
5303             0x0E8C,   // 0E8C..0EA3; LAO
5304             0x0EA4,   // 0EA4      ; UNKNOWN
5305             0x0EA5,   // 0EA5      ; LAO
5306             0x0EA6,   // 0EA6      ; UNKNOWN
5307             0x0EA7,   // 0EA7..0EBD; LAO
5308             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5309             0x0EC0,   // 0EC0..0EC4; LAO
5310             0x0EC5,   // 0EC5      ; UNKNOWN
5311             0x0EC6,   // 0EC6      ; LAO
5312             0x0EC7,   // 0EC7      ; UNKNOWN
5313             0x0EC8,   // 0EC8..0ECD; LAO
5314             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5315             0x0ED0,   // 0ED0..0ED9; LAO
5316             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5317             0x0EDC,   // 0EDC..0EDF; LAO
5318             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5319             0x0F00,   // 0F00..0F47; TIBETAN
5320             0x0F48,   // 0F48      ; UNKNOWN
5321             0x0F49,   // 0F49..0F6C; TIBETAN
5322             0x0F6D,   // 0F6D..0F70; UNKNOWN
5323             0x0F71,   // 0F71..0F97; TIBETAN
5324             0x0F98,   // 0F98      ; UNKNOWN
5325             0x0F99,   // 0F99..0FBC; TIBETAN
5326             0x0FBD,   // 0FBD      ; UNKNOWN
5327             0x0FBE,   // 0FBE..0FCC; TIBETAN
5328             0x0FCD,   // 0FCD      ; UNKNOWN
5329             0x0FCE,   // 0FCE..0FD4; TIBETAN
5330             0x0FD5,   // 0FD5..0FD8; COMMON
5331             0x0FD9,   // 0FD9..0FDA; TIBETAN
5332             0x0FDB,   // 0FDB..0FFF; UNKNOWN
5333             0x1000,   // 1000..109F; MYANMAR
5334             0x10A0,   // 10A0..10C5; GEORGIAN
5335             0x10C6,   // 10C6      ; UNKNOWN
5336             0x10C7,   // 10C7      ; GEORGIAN
5337             0x10C8,   // 10C8..10CC; UNKNOWN
5338             0x10CD,   // 10CD      ; GEORGIAN
5339             0x10CE,   // 10CE..10CF; UNKNOWN
5340             0x10D0,   // 10D0..10FA; GEORGIAN
5341             0x10FB,   // 10FB      ; COMMON
5342             0x10FC,   // 10FC..10FF; GEORGIAN
5343             0x1100,   // 1100..11FF; HANGUL
5344             0x1200,   // 1200..1248; ETHIOPIC
5345             0x1249,   // 1249      ; UNKNOWN
5346             0x124A,   // 124A..124D; ETHIOPIC
5347             0x124E,   // 124E..124F; UNKNOWN
5348             0x1250,   // 1250..1256; ETHIOPIC
5349             0x1257,   // 1257      ; UNKNOWN
5350             0x1258,   // 1258      ; ETHIOPIC
5351             0x1259,   // 1259      ; UNKNOWN
5352             0x125A,   // 125A..125D; ETHIOPIC
5353             0x125E,   // 125E..125F; UNKNOWN
5354             0x1260,   // 1260..1288; ETHIOPIC
5355             0x1289,   // 1289      ; UNKNOWN
5356             0x128A,   // 128A..128D; ETHIOPIC
5357             0x128E,   // 128E..128F; UNKNOWN
5358             0x1290,   // 1290..12B0; ETHIOPIC
5359             0x12B1,   // 12B1      ; UNKNOWN
5360             0x12B2,   // 12B2..12B5; ETHIOPIC
5361             0x12B6,   // 12B6..12B7; UNKNOWN
5362             0x12B8,   // 12B8..12BE; ETHIOPIC
5363             0x12BF,   // 12BF      ; UNKNOWN
5364             0x12C0,   // 12C0      ; ETHIOPIC
5365             0x12C1,   // 12C1      ; UNKNOWN
5366             0x12C2,   // 12C2..12C5; ETHIOPIC
5367             0x12C6,   // 12C6..12C7; UNKNOWN
5368             0x12C8,   // 12C8..12D6; ETHIOPIC
5369             0x12D7,   // 12D7      ; UNKNOWN
5370             0x12D8,   // 12D8..1310; ETHIOPIC
5371             0x1311,   // 1311      ; UNKNOWN
5372             0x1312,   // 1312..1315; ETHIOPIC
5373             0x1316,   // 1316..1317; UNKNOWN
5374             0x1318,   // 1318..135A; ETHIOPIC
5375             0x135B,   // 135B..135C; UNKNOWN
5376             0x135D,   // 135D..137C; ETHIOPIC
5377             0x137D,   // 137D..137F; UNKNOWN
5378             0x1380,   // 1380..1399; ETHIOPIC
5379             0x139A,   // 139A..139F; UNKNOWN
5380             0x13A0,   // 13A0..13F5; CHEROKEE
5381             0x13F6,   // 13F6..13F7; UNKNOWN
5382             0x13F8,   // 13F8..13FD; CHEROKEE
5383             0x13FE,   // 13FE..13FF; UNKNOWN
5384             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5385             0x1680,   // 1680..169C; OGHAM
5386             0x169D,   // 169D..169F; UNKNOWN
5387             0x16A0,   // 16A0..16EA; RUNIC
5388             0x16EB,   // 16EB..16ED; COMMON
5389             0x16EE,   // 16EE..16F8; RUNIC
5390             0x16F9,   // 16F9..16FF; UNKNOWN
5391             0x1700,   // 1700..170C; TAGALOG
5392             0x170D,   // 170D      ; UNKNOWN
5393             0x170E,   // 170E..1714; TAGALOG
5394             0x1715,   // 1715..171F; UNKNOWN
5395             0x1720,   // 1720..1734; HANUNOO
5396             0x1735,   // 1735..1736; COMMON
5397             0x1737,   // 1737..173F; UNKNOWN
5398             0x1740,   // 1740..1753; BUHID
5399             0x1754,   // 1754..175F; UNKNOWN
5400             0x1760,   // 1760..176C; TAGBANWA
5401             0x176D,   // 176D      ; UNKNOWN
5402             0x176E,   // 176E..1770; TAGBANWA
5403             0x1771,   // 1771      ; UNKNOWN
5404             0x1772,   // 1772..1773; TAGBANWA
5405             0x1774,   // 1774..177F; UNKNOWN
5406             0x1780,   // 1780..17DD; KHMER
5407             0x17DE,   // 17DE..17DF; UNKNOWN
5408             0x17E0,   // 17E0..17E9; KHMER
5409             0x17EA,   // 17EA..17EF; UNKNOWN
5410             0x17F0,   // 17F0..17F9; KHMER
5411             0x17FA,   // 17FA..17FF; UNKNOWN
5412             0x1800,   // 1800..1801; MONGOLIAN
5413             0x1802,   // 1802..1803; COMMON
5414             0x1804,   // 1804      ; MONGOLIAN
5415             0x1805,   // 1805      ; COMMON
5416             0x1806,   // 1806..180E; MONGOLIAN
5417             0x180F,   // 180F      ; UNKNOWN
5418             0x1810,   // 1810..1819; MONGOLIAN
5419             0x181A,   // 181A..181F; UNKNOWN
5420             0x1820,   // 1820..1878; MONGOLIAN
5421             0x1879,   // 1879..187F; UNKNOWN
5422             0x1880,   // 1880..18AA; MONGOLIAN
5423             0x18AB,   // 18AB..18AF; UNKNOWN
5424             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5425             0x18F6,   // 18F6..18FF; UNKNOWN
5426             0x1900,   // 1900..191E; LIMBU
5427             0x191F,   // 191F      ; UNKNOWN
5428             0x1920,   // 1920..192B; LIMBU
5429             0x192C,   // 192C..192F; UNKNOWN
5430             0x1930,   // 1930..193B; LIMBU
5431             0x193C,   // 193C..193F; UNKNOWN
5432             0x1940,   // 1940      ; LIMBU
5433             0x1941,   // 1941..1943; UNKNOWN
5434             0x1944,   // 1944..194F; LIMBU
5435             0x1950,   // 1950..196D; TAI_LE
5436             0x196E,   // 196E..196F; UNKNOWN
5437             0x1970,   // 1970..1974; TAI_LE
5438             0x1975,   // 1975..197F; UNKNOWN
5439             0x1980,   // 1980..19AB; NEW_TAI_LUE
5440             0x19AC,   // 19AC..19AF; UNKNOWN
5441             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5442             0x19CA,   // 19CA..19CF; UNKNOWN
5443             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5444             0x19DB,   // 19DB..19DD; UNKNOWN
5445             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5446             0x19E0,   // 19E0..19FF; KHMER
5447             0x1A00,   // 1A00..1A1B; BUGINESE
5448             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5449             0x1A1E,   // 1A1E..1A1F; BUGINESE
5450             0x1A20,   // 1A20..1A5E; TAI_THAM
5451             0x1A5F,   // 1A5F      ; UNKNOWN
5452             0x1A60,   // 1A60..1A7C; TAI_THAM
5453             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5454             0x1A7F,   // 1A7F..1A89; TAI_THAM
5455             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5456             0x1A90,   // 1A90..1A99; TAI_THAM
5457             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5458             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5459             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5460             0x1AB0,   // 1AB0..1ABE; INHERITED
5461             0x1ABF,   // 1ABF..1AFF; UNKNOWN
5462             0x1B00,   // 1B00..1B4B; BALINESE
5463             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5464             0x1B50,   // 1B50..1B7C; BALINESE
5465             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5466             0x1B80,   // 1B80..1BBF; SUNDANESE
5467             0x1BC0,   // 1BC0..1BF3; BATAK
5468             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5469             0x1BFC,   // 1BFC..1BFF; BATAK
5470             0x1C00,   // 1C00..1C37; LEPCHA
5471             0x1C38,   // 1C38..1C3A; UNKNOWN
5472             0x1C3B,   // 1C3B..1C49; LEPCHA
5473             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5474             0x1C4D,   // 1C4D..1C4F; LEPCHA
5475             0x1C50,   // 1C50..1C7F; OL_CHIKI
5476             0x1C80,   // 1C80..1C88; CYRILLIC
5477             0x1C89,   // 1C89..1C8F; UNKNOWN
5478             0x1C90,   // 1C90..1CBA; GEORGIAN
5479             0x1CBB,   // 1CBB..1CBC; UNKNOWN
5480             0x1CBD,   // 1CBD..1CBF; GEORGIAN
5481             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5482             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5483             0x1CD0,   // 1CD0..1CD2; INHERITED
5484             0x1CD3,   // 1CD3      ; COMMON
5485             0x1CD4,   // 1CD4..1CE0; INHERITED
5486             0x1CE1,   // 1CE1      ; COMMON
5487             0x1CE2,   // 1CE2..1CE8; INHERITED
5488             0x1CE9,   // 1CE9..1CEC; COMMON
5489             0x1CED,   // 1CED      ; INHERITED
5490             0x1CEE,   // 1CEE..1CF3; COMMON
5491             0x1CF4,   // 1CF4      ; INHERITED
5492             0x1CF5,   // 1CF5..1CF7; COMMON
5493             0x1CF8,   // 1CF8..1CF9; INHERITED
5494             0x1CFA,   // 1CFA      ; COMMON
5495             0x1CFB,   // 1CFB..1CFF; UNKNOWN
5496             0x1D00,   // 1D00..1D25; LATIN
5497             0x1D26,   // 1D26..1D2A; GREEK
5498             0x1D2B,   // 1D2B      ; CYRILLIC
5499             0x1D2C,   // 1D2C..1D5C; LATIN
5500             0x1D5D,   // 1D5D..1D61; GREEK
5501             0x1D62,   // 1D62..1D65; LATIN
5502             0x1D66,   // 1D66..1D6A; GREEK
5503             0x1D6B,   // 1D6B..1D77; LATIN
5504             0x1D78,   // 1D78      ; CYRILLIC
5505             0x1D79,   // 1D79..1DBE; LATIN
5506             0x1DBF,   // 1DBF      ; GREEK
5507             0x1DC0,   // 1DC0..1DF9; INHERITED
5508             0x1DFA,   // 1DFA      ; UNKNOWN
5509             0x1DFB,   // 1DFB..1DFF; INHERITED
5510             0x1E00,   // 1E00..1EFF; LATIN
5511             0x1F00,   // 1F00..1F15; GREEK
5512             0x1F16,   // 1F16..1F17; UNKNOWN
5513             0x1F18,   // 1F18..1F1D; GREEK
5514             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5515             0x1F20,   // 1F20..1F45; GREEK
5516             0x1F46,   // 1F46..1F47; UNKNOWN
5517             0x1F48,   // 1F48..1F4D; GREEK
5518             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5519             0x1F50,   // 1F50..1F57; GREEK
5520             0x1F58,   // 1F58      ; UNKNOWN
5521             0x1F59,   // 1F59      ; GREEK
5522             0x1F5A,   // 1F5A      ; UNKNOWN
5523             0x1F5B,   // 1F5B      ; GREEK
5524             0x1F5C,   // 1F5C      ; UNKNOWN
5525             0x1F5D,   // 1F5D      ; GREEK
5526             0x1F5E,   // 1F5E      ; UNKNOWN
5527             0x1F5F,   // 1F5F..1F7D; GREEK
5528             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5529             0x1F80,   // 1F80..1FB4; GREEK
5530             0x1FB5,   // 1FB5      ; UNKNOWN
5531             0x1FB6,   // 1FB6..1FC4; GREEK
5532             0x1FC5,   // 1FC5      ; UNKNOWN
5533             0x1FC6,   // 1FC6..1FD3; GREEK
5534             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5535             0x1FD6,   // 1FD6..1FDB; GREEK
5536             0x1FDC,   // 1FDC      ; UNKNOWN
5537             0x1FDD,   // 1FDD..1FEF; GREEK
5538             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5539             0x1FF2,   // 1FF2..1FF4; GREEK
5540             0x1FF5,   // 1FF5      ; UNKNOWN
5541             0x1FF6,   // 1FF6..1FFE; GREEK
5542             0x1FFF,   // 1FFF      ; UNKNOWN
5543             0x2000,   // 2000..200B; COMMON
5544             0x200C,   // 200C..200D; INHERITED
5545             0x200E,   // 200E..2064; COMMON
5546             0x2065,   // 2065      ; UNKNOWN
5547             0x2066,   // 2066..2070; COMMON
5548             0x2071,   // 2071      ; LATIN
5549             0x2072,   // 2072..2073; UNKNOWN
5550             0x2074,   // 2074..207E; COMMON
5551             0x207F,   // 207F      ; LATIN
5552             0x2080,   // 2080..208E; COMMON
5553             0x208F,   // 208F      ; UNKNOWN
5554             0x2090,   // 2090..209C; LATIN
5555             0x209D,   // 209D..209F; UNKNOWN
5556             0x20A0,   // 20A0..20BF; COMMON
5557             0x20C0,   // 20C0..20CF; UNKNOWN
5558             0x20D0,   // 20D0..20F0; INHERITED
5559             0x20F1,   // 20F1..20FF; UNKNOWN
5560             0x2100,   // 2100..2125; COMMON
5561             0x2126,   // 2126      ; GREEK
5562             0x2127,   // 2127..2129; COMMON
5563             0x212A,   // 212A..212B; LATIN
5564             0x212C,   // 212C..2131; COMMON
5565             0x2132,   // 2132      ; LATIN
5566             0x2133,   // 2133..214D; COMMON
5567             0x214E,   // 214E      ; LATIN
5568             0x214F,   // 214F..215F; COMMON
5569             0x2160,   // 2160..2188; LATIN
5570             0x2189,   // 2189..218B; COMMON
5571             0x218C,   // 218C..218F; UNKNOWN
5572             0x2190,   // 2190..2426; COMMON
5573             0x2427,   // 2427..243F; UNKNOWN
5574             0x2440,   // 2440..244A; COMMON
5575             0x244B,   // 244B..245F; UNKNOWN
5576             0x2460,   // 2460..27FF; COMMON
5577             0x2800,   // 2800..28FF; BRAILLE
5578             0x2900,   // 2900..2B73; COMMON
5579             0x2B74,   // 2B74..2B75; UNKNOWN
5580             0x2B76,   // 2B76..2B95; COMMON
5581             0x2B96,   // 2B96..2B97; UNKNOWN
5582             0x2B98,   // 2B98..2BFF; COMMON
5583             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5584             0x2C2F,   // 2C2F      ; UNKNOWN
5585             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5586             0x2C5F,   // 2C5F      ; UNKNOWN
5587             0x2C60,   // 2C60..2C7F; LATIN
5588             0x2C80,   // 2C80..2CF3; COPTIC
5589             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5590             0x2CF9,   // 2CF9..2CFF; COPTIC
5591             0x2D00,   // 2D00..2D25; GEORGIAN
5592             0x2D26,   // 2D26      ; UNKNOWN
5593             0x2D27,   // 2D27      ; GEORGIAN
5594             0x2D28,   // 2D28..2D2C; UNKNOWN
5595             0x2D2D,   // 2D2D      ; GEORGIAN
5596             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5597             0x2D30,   // 2D30..2D67; TIFINAGH
5598             0x2D68,   // 2D68..2D6E; UNKNOWN
5599             0x2D6F,   // 2D6F..2D70; TIFINAGH
5600             0x2D71,   // 2D71..2D7E; UNKNOWN
5601             0x2D7F,   // 2D7F      ; TIFINAGH
5602             0x2D80,   // 2D80..2D96; ETHIOPIC
5603             0x2D97,   // 2D97..2D9F; UNKNOWN
5604             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5605             0x2DA7,   // 2DA7      ; UNKNOWN
5606             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5607             0x2DAF,   // 2DAF      ; UNKNOWN
5608             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5609             0x2DB7,   // 2DB7      ; UNKNOWN
5610             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5611             0x2DBF,   // 2DBF      ; UNKNOWN
5612             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5613             0x2DC7,   // 2DC7      ; UNKNOWN
5614             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5615             0x2DCF,   // 2DCF      ; UNKNOWN
5616             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5617             0x2DD7,   // 2DD7      ; UNKNOWN
5618             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5619             0x2DDF,   // 2DDF      ; UNKNOWN
5620             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5621             0x2E00,   // 2E00..2E4F; COMMON
5622             0x2E50,   // 2E50..2E7F; UNKNOWN
5623             0x2E80,   // 2E80..2E99; HAN
5624             0x2E9A,   // 2E9A      ; UNKNOWN
5625             0x2E9B,   // 2E9B..2EF3; HAN
5626             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5627             0x2F00,   // 2F00..2FD5; HAN
5628             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5629             0x2FF0,   // 2FF0..2FFB; COMMON
5630             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5631             0x3000,   // 3000..3004; COMMON
5632             0x3005,   // 3005      ; HAN
5633             0x3006,   // 3006      ; COMMON
5634             0x3007,   // 3007      ; HAN
5635             0x3008,   // 3008..3020; COMMON
5636             0x3021,   // 3021..3029; HAN
5637             0x302A,   // 302A..302D; INHERITED
5638             0x302E,   // 302E..302F; HANGUL
5639             0x3030,   // 3030..3037; COMMON
5640             0x3038,   // 3038..303B; HAN
5641             0x303C,   // 303C..303F; COMMON
5642             0x3040,   // 3040      ; UNKNOWN
5643             0x3041,   // 3041..3096; HIRAGANA
5644             0x3097,   // 3097..3098; UNKNOWN
5645             0x3099,   // 3099..309A; INHERITED
5646             0x309B,   // 309B..309C; COMMON
5647             0x309D,   // 309D..309F; HIRAGANA
5648             0x30A0,   // 30A0      ; COMMON
5649             0x30A1,   // 30A1..30FA; KATAKANA
5650             0x30FB,   // 30FB..30FC; COMMON
5651             0x30FD,   // 30FD..30FF; KATAKANA
5652             0x3100,   // 3100..3104; UNKNOWN
5653             0x3105,   // 3105..312F; BOPOMOFO
5654             0x3130,   // 3130      ; UNKNOWN
5655             0x3131,   // 3131..318E; HANGUL
5656             0x318F,   // 318F      ; UNKNOWN
5657             0x3190,   // 3190..319F; COMMON
5658             0x31A0,   // 31A0..31BA; BOPOMOFO
5659             0x31BB,   // 31BB..31BF; UNKNOWN
5660             0x31C0,   // 31C0..31E3; COMMON
5661             0x31E4,   // 31E4..31EF; UNKNOWN
5662             0x31F0,   // 31F0..31FF; KATAKANA
5663             0x3200,   // 3200..321E; HANGUL
5664             0x321F,   // 321F      ; UNKNOWN
5665             0x3220,   // 3220..325F; COMMON
5666             0x3260,   // 3260..327E; HANGUL
5667             0x327F,   // 327F..32CF; COMMON
5668             0x32D0,   // 32D0..32FE; KATAKANA
5669             0x32FF,   // 32FF      ; COMMON
5670             0x3300,   // 3300..3357; KATAKANA
5671             0x3358,   // 3358..33FF; COMMON
5672             0x3400,   // 3400..4DB5; HAN
5673             0x4DB6,   // 4DB6..4DBF; UNKNOWN
5674             0x4DC0,   // 4DC0..4DFF; COMMON
5675             0x4E00,   // 4E00..9FEF; HAN
5676             0x9FF0,   // 9FF0..9FFF; UNKNOWN
5677             0xA000,   // A000..A48C; YI
5678             0xA48D,   // A48D..A48F; UNKNOWN
5679             0xA490,   // A490..A4C6; YI
5680             0xA4C7,   // A4C7..A4CF; UNKNOWN
5681             0xA4D0,   // A4D0..A4FF; LISU
5682             0xA500,   // A500..A62B; VAI
5683             0xA62C,   // A62C..A63F; UNKNOWN
5684             0xA640,   // A640..A69F; CYRILLIC
5685             0xA6A0,   // A6A0..A6F7; BAMUM
5686             0xA6F8,   // A6F8..A6FF; UNKNOWN
5687             0xA700,   // A700..A721; COMMON
5688             0xA722,   // A722..A787; LATIN
5689             0xA788,   // A788..A78A; COMMON
5690             0xA78B,   // A78B..A7BF; LATIN
5691             0xA7C0,   // A7C0..A7C1; UNKNOWN
5692             0xA7C2,   // A7C2..A7C6; LATIN
5693             0xA7C7,   // A7C7..A7F6; UNKNOWN
5694             0xA7F7,   // A7F7..A7FF; LATIN
5695             0xA800,   // A800..A82B; SYLOTI_NAGRI
5696             0xA82C,   // A82C..A82F; UNKNOWN
5697             0xA830,   // A830..A839; COMMON
5698             0xA83A,   // A83A..A83F; UNKNOWN
5699             0xA840,   // A840..A877; PHAGS_PA
5700             0xA878,   // A878..A87F; UNKNOWN
5701             0xA880,   // A880..A8C5; SAURASHTRA
5702             0xA8C6,   // A8C6..A8CD; UNKNOWN
5703             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5704             0xA8DA,   // A8DA..A8DF; UNKNOWN
5705             0xA8E0,   // A8E0..A8FF; DEVANAGARI
5706             0xA900,   // A900..A92D; KAYAH_LI
5707             0xA92E,   // A92E      ; COMMON
5708             0xA92F,   // A92F      ; KAYAH_LI
5709             0xA930,   // A930..A953; REJANG
5710             0xA954,   // A954..A95E; UNKNOWN
5711             0xA95F,   // A95F      ; REJANG
5712             0xA960,   // A960..A97C; HANGUL
5713             0xA97D,   // A97D..A97F; UNKNOWN
5714             0xA980,   // A980..A9CD; JAVANESE
5715             0xA9CE,   // A9CE      ; UNKNOWN
5716             0xA9CF,   // A9CF      ; COMMON
5717             0xA9D0,   // A9D0..A9D9; JAVANESE
5718             0xA9DA,   // A9DA..A9DD; UNKNOWN
5719             0xA9DE,   // A9DE..A9DF; JAVANESE
5720             0xA9E0,   // A9E0..A9FE; MYANMAR
5721             0xA9FF,   // A9FF      ; UNKNOWN
5722             0xAA00,   // AA00..AA36; CHAM
5723             0xAA37,   // AA37..AA3F; UNKNOWN
5724             0xAA40,   // AA40..AA4D; CHAM
5725             0xAA4E,   // AA4E..AA4F; UNKNOWN
5726             0xAA50,   // AA50..AA59; CHAM
5727             0xAA5A,   // AA5A..AA5B; UNKNOWN
5728             0xAA5C,   // AA5C..AA5F; CHAM
5729             0xAA60,   // AA60..AA7F; MYANMAR
5730             0xAA80,   // AA80..AAC2; TAI_VIET
5731             0xAAC3,   // AAC3..AADA; UNKNOWN
5732             0xAADB,   // AADB..AADF; TAI_VIET
5733             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5734             0xAAF7,   // AAF7..AB00; UNKNOWN
5735             0xAB01,   // AB01..AB06; ETHIOPIC
5736             0xAB07,   // AB07..AB08; UNKNOWN
5737             0xAB09,   // AB09..AB0E; ETHIOPIC
5738             0xAB0F,   // AB0F..AB10; UNKNOWN
5739             0xAB11,   // AB11..AB16; ETHIOPIC
5740             0xAB17,   // AB17..AB1F; UNKNOWN
5741             0xAB20,   // AB20..AB26; ETHIOPIC
5742             0xAB27,   // AB27      ; UNKNOWN
5743             0xAB28,   // AB28..AB2E; ETHIOPIC
5744             0xAB2F,   // AB2F      ; UNKNOWN
5745             0xAB30,   // AB30..AB5A; LATIN
5746             0xAB5B,   // AB5B      ; COMMON
5747             0xAB5C,   // AB5C..AB64; LATIN
5748             0xAB65,   // AB65      ; GREEK
5749             0xAB66,   // AB66..AB67; LATIN
5750             0xAB68,   // AB68..AB6F; UNKNOWN
5751             0xAB70,   // AB70..ABBF; CHEROKEE
5752             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5753             0xABEE,   // ABEE..ABEF; UNKNOWN
5754             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5755             0xABFA,   // ABFA..ABFF; UNKNOWN
5756             0xAC00,   // AC00..D7A3; HANGUL
5757             0xD7A4,   // D7A4..D7AF; UNKNOWN
5758             0xD7B0,   // D7B0..D7C6; HANGUL
5759             0xD7C7,   // D7C7..D7CA; UNKNOWN
5760             0xD7CB,   // D7CB..D7FB; HANGUL
5761             0xD7FC,   // D7FC..F8FF; UNKNOWN
5762             0xF900,   // F900..FA6D; HAN
5763             0xFA6E,   // FA6E..FA6F; UNKNOWN
5764             0xFA70,   // FA70..FAD9; HAN
5765             0xFADA,   // FADA..FAFF; UNKNOWN
5766             0xFB00,   // FB00..FB06; LATIN
5767             0xFB07,   // FB07..FB12; UNKNOWN
5768             0xFB13,   // FB13..FB17; ARMENIAN
5769             0xFB18,   // FB18..FB1C; UNKNOWN
5770             0xFB1D,   // FB1D..FB36; HEBREW
5771             0xFB37,   // FB37      ; UNKNOWN
5772             0xFB38,   // FB38..FB3C; HEBREW
5773             0xFB3D,   // FB3D      ; UNKNOWN
5774             0xFB3E,   // FB3E      ; HEBREW
5775             0xFB3F,   // FB3F      ; UNKNOWN
5776             0xFB40,   // FB40..FB41; HEBREW
5777             0xFB42,   // FB42      ; UNKNOWN
5778             0xFB43,   // FB43..FB44; HEBREW
5779             0xFB45,   // FB45      ; UNKNOWN
5780             0xFB46,   // FB46..FB4F; HEBREW
5781             0xFB50,   // FB50..FBC1; ARABIC
5782             0xFBC2,   // FBC2..FBD2; UNKNOWN
5783             0xFBD3,   // FBD3..FD3D; ARABIC
5784             0xFD3E,   // FD3E..FD3F; COMMON
5785             0xFD40,   // FD40..FD4F; UNKNOWN
5786             0xFD50,   // FD50..FD8F; ARABIC
5787             0xFD90,   // FD90..FD91; UNKNOWN
5788             0xFD92,   // FD92..FDC7; ARABIC
5789             0xFDC8,   // FDC8..FDEF; UNKNOWN
5790             0xFDF0,   // FDF0..FDFD; ARABIC
5791             0xFDFE,   // FDFE..FDFF; UNKNOWN
5792             0xFE00,   // FE00..FE0F; INHERITED
5793             0xFE10,   // FE10..FE19; COMMON
5794             0xFE1A,   // FE1A..FE1F; UNKNOWN
5795             0xFE20,   // FE20..FE2D; INHERITED
5796             0xFE2E,   // FE2E..FE2F; CYRILLIC
5797             0xFE30,   // FE30..FE52; COMMON
5798             0xFE53,   // FE53      ; UNKNOWN
5799             0xFE54,   // FE54..FE66; COMMON
5800             0xFE67,   // FE67      ; UNKNOWN
5801             0xFE68,   // FE68..FE6B; COMMON
5802             0xFE6C,   // FE6C..FE6F; UNKNOWN
5803             0xFE70,   // FE70..FE74; ARABIC
5804             0xFE75,   // FE75      ; UNKNOWN
5805             0xFE76,   // FE76..FEFC; ARABIC
5806             0xFEFD,   // FEFD..FEFE; UNKNOWN
5807             0xFEFF,   // FEFF      ; COMMON
5808             0xFF00,   // FF00      ; UNKNOWN
5809             0xFF01,   // FF01..FF20; COMMON
5810             0xFF21,   // FF21..FF3A; LATIN
5811             0xFF3B,   // FF3B..FF40; COMMON
5812             0xFF41,   // FF41..FF5A; LATIN
5813             0xFF5B,   // FF5B..FF65; COMMON
5814             0xFF66,   // FF66..FF6F; KATAKANA
5815             0xFF70,   // FF70      ; COMMON
5816             0xFF71,   // FF71..FF9D; KATAKANA
5817             0xFF9E,   // FF9E..FF9F; COMMON
5818             0xFFA0,   // FFA0..FFBE; HANGUL
5819             0xFFBF,   // FFBF..FFC1; UNKNOWN
5820             0xFFC2,   // FFC2..FFC7; HANGUL
5821             0xFFC8,   // FFC8..FFC9; UNKNOWN
5822             0xFFCA,   // FFCA..FFCF; HANGUL
5823             0xFFD0,   // FFD0..FFD1; UNKNOWN
5824             0xFFD2,   // FFD2..FFD7; HANGUL
5825             0xFFD8,   // FFD8..FFD9; UNKNOWN
5826             0xFFDA,   // FFDA..FFDC; HANGUL
5827             0xFFDD,   // FFDD..FFDF; UNKNOWN
5828             0xFFE0,   // FFE0..FFE6; COMMON
5829             0xFFE7,   // FFE7      ; UNKNOWN
5830             0xFFE8,   // FFE8..FFEE; COMMON
5831             0xFFEF,   // FFEF..FFF8; UNKNOWN
5832             0xFFF9,   // FFF9..FFFD; COMMON
5833             0xFFFE,   // FFFE..FFFF; UNKNOWN
5834             0x10000,  // 10000..1000B; LINEAR_B
5835             0x1000C,  // 1000C       ; UNKNOWN
5836             0x1000D,  // 1000D..10026; LINEAR_B
5837             0x10027,  // 10027       ; UNKNOWN
5838             0x10028,  // 10028..1003A; LINEAR_B
5839             0x1003B,  // 1003B       ; UNKNOWN
5840             0x1003C,  // 1003C..1003D; LINEAR_B
5841             0x1003E,  // 1003E       ; UNKNOWN
5842             0x1003F,  // 1003F..1004D; LINEAR_B
5843             0x1004E,  // 1004E..1004F; UNKNOWN
5844             0x10050,  // 10050..1005D; LINEAR_B
5845             0x1005E,  // 1005E..1007F; UNKNOWN
5846             0x10080,  // 10080..100FA; LINEAR_B
5847             0x100FB,  // 100FB..100FF; UNKNOWN
5848             0x10100,  // 10100..10102; COMMON
5849             0x10103,  // 10103..10106; UNKNOWN
5850             0x10107,  // 10107..10133; COMMON
5851             0x10134,  // 10134..10136; UNKNOWN
5852             0x10137,  // 10137..1013F; COMMON
5853             0x10140,  // 10140..1018E; GREEK
5854             0x1018F,  // 1018F       ; UNKNOWN
5855             0x10190,  // 10190..1019B; COMMON
5856             0x1019C,  // 1019C..1019F; UNKNOWN
5857             0x101A0,  // 101A0       ; GREEK
5858             0x101A1,  // 101A1..101CF; UNKNOWN
5859             0x101D0,  // 101D0..101FC; COMMON
5860             0x101FD,  // 101FD       ; INHERITED
5861             0x101FE,  // 101FE..1027F; UNKNOWN
5862             0x10280,  // 10280..1029C; LYCIAN
5863             0x1029D,  // 1029D..1029F; UNKNOWN
5864             0x102A0,  // 102A0..102D0; CARIAN
5865             0x102D1,  // 102D1..102DF; UNKNOWN
5866             0x102E0,  // 102E0       ; INHERITED
5867             0x102E1,  // 102E1..102FB; COMMON
5868             0x102FC,  // 102FC..102FF; UNKNOWN
5869             0x10300,  // 10300..10323; OLD_ITALIC
5870             0x10324,  // 10324..1032C; UNKNOWN
5871             0x1032D,  // 1032D..1032F; OLD_ITALIC
5872             0x10330,  // 10330..1034A; GOTHIC
5873             0x1034B,  // 1034B..1034F; UNKNOWN
5874             0x10350,  // 10350..1037A; OLD_PERMIC
5875             0x1037B,  // 1037B..1037F; UNKNOWN
5876             0x10380,  // 10380..1039D; UGARITIC
5877             0x1039E,  // 1039E       ; UNKNOWN
5878             0x1039F,  // 1039F       ; UGARITIC
5879             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5880             0x103C4,  // 103C4..103C7; UNKNOWN
5881             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5882             0x103D6,  // 103D6..103FF; UNKNOWN
5883             0x10400,  // 10400..1044F; DESERET
5884             0x10450,  // 10450..1047F; SHAVIAN
5885             0x10480,  // 10480..1049D; OSMANYA
5886             0x1049E,  // 1049E..1049F; UNKNOWN
5887             0x104A0,  // 104A0..104A9; OSMANYA
5888             0x104AA,  // 104AA..104AF; UNKNOWN
5889             0x104B0,  // 104B0..104D3; OSAGE
5890             0x104D4,  // 104D4..104D7; UNKNOWN
5891             0x104D8,  // 104D8..104FB; OSAGE
5892             0x104FC,  // 104FC..104FF; UNKNOWN
5893             0x10500,  // 10500..10527; ELBASAN
5894             0x10528,  // 10528..1052F; UNKNOWN
5895             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5896             0x10564,  // 10564..1056E; UNKNOWN
5897             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5898             0x10570,  // 10570..105FF; UNKNOWN
5899             0x10600,  // 10600..10736; LINEAR_A
5900             0x10737,  // 10737..1073F; UNKNOWN
5901             0x10740,  // 10740..10755; LINEAR_A
5902             0x10756,  // 10756..1075F; UNKNOWN
5903             0x10760,  // 10760..10767; LINEAR_A
5904             0x10768,  // 10768..107FF; UNKNOWN
5905             0x10800,  // 10800..10805; CYPRIOT
5906             0x10806,  // 10806..10807; UNKNOWN
5907             0x10808,  // 10808       ; CYPRIOT
5908             0x10809,  // 10809       ; UNKNOWN
5909             0x1080A,  // 1080A..10835; CYPRIOT
5910             0x10836,  // 10836       ; UNKNOWN
5911             0x10837,  // 10837..10838; CYPRIOT
5912             0x10839,  // 10839..1083B; UNKNOWN
5913             0x1083C,  // 1083C       ; CYPRIOT
5914             0x1083D,  // 1083D..1083E; UNKNOWN
5915             0x1083F,  // 1083F       ; CYPRIOT
5916             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5917             0x10856,  // 10856       ; UNKNOWN
5918             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5919             0x10860,  // 10860..1087F; PALMYRENE
5920             0x10880,  // 10880..1089E; NABATAEAN
5921             0x1089F,  // 1089F..108A6; UNKNOWN
5922             0x108A7,  // 108A7..108AF; NABATAEAN
5923             0x108B0,  // 108B0..108DF; UNKNOWN
5924             0x108E0,  // 108E0..108F2; HATRAN
5925             0x108F3,  // 108F3       ; UNKNOWN
5926             0x108F4,  // 108F4..108F5; HATRAN
5927             0x108F6,  // 108F6..108FA; UNKNOWN
5928             0x108FB,  // 108FB..108FF; HATRAN
5929             0x10900,  // 10900..1091B; PHOENICIAN
5930             0x1091C,  // 1091C..1091E; UNKNOWN
5931             0x1091F,  // 1091F       ; PHOENICIAN
5932             0x10920,  // 10920..10939; LYDIAN
5933             0x1093A,  // 1093A..1093E; UNKNOWN
5934             0x1093F,  // 1093F       ; LYDIAN
5935             0x10940,  // 10940..1097F; UNKNOWN
5936             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5937             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5938             0x109B8,  // 109B8..109BB; UNKNOWN
5939             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
5940             0x109D0,  // 109D0..109D1; UNKNOWN
5941             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
5942             0x10A00,  // 10A00..10A03; KHAROSHTHI
5943             0x10A04,  // 10A04       ; UNKNOWN
5944             0x10A05,  // 10A05..10A06; KHAROSHTHI
5945             0x10A07,  // 10A07..10A0B; UNKNOWN
5946             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5947             0x10A14,  // 10A14       ; UNKNOWN
5948             0x10A15,  // 10A15..10A17; KHAROSHTHI
5949             0x10A18,  // 10A18       ; UNKNOWN
5950             0x10A19,  // 10A19..10A35; KHAROSHTHI
5951             0x10A36,  // 10A36..10A37; UNKNOWN
5952             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5953             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5954             0x10A3F,  // 10A3F..10A48; KHAROSHTHI
5955             0x10A49,  // 10A49..10A4F; UNKNOWN
5956             0x10A50,  // 10A50..10A58; KHAROSHTHI
5957             0x10A59,  // 10A59..10A5F; UNKNOWN
5958             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5959             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5960             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5961             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5962             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5963             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5964             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5965             0x10B00,  // 10B00..10B35; AVESTAN
5966             0x10B36,  // 10B36..10B38; UNKNOWN
5967             0x10B39,  // 10B39..10B3F; AVESTAN
5968             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5969             0x10B56,  // 10B56..10B57; UNKNOWN
5970             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5971             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5972             0x10B73,  // 10B73..10B77; UNKNOWN
5973             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5974             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5975             0x10B92,  // 10B92..10B98; UNKNOWN
5976             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5977             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5978             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5979             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5980             0x10C00,  // 10C00..10C48; OLD_TURKIC
5981             0x10C49,  // 10C49..10C7F; UNKNOWN
5982             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
5983             0x10CB3,  // 10CB3..10CBF; UNKNOWN
5984             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
5985             0x10CF3,  // 10CF3..10CF9; UNKNOWN
5986             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
5987             0x10D00,  // 10D00..10D27; HANIFI_ROHINGYA
5988             0x10D28,  // 10D28..10D2F; UNKNOWN
5989             0x10D30,  // 10D30..10D39; HANIFI_ROHINGYA
5990             0x10D3A,  // 10D3A..10E5F; UNKNOWN
5991             0x10E60,  // 10E60..10E7E; ARABIC
5992             0x10E7F,  // 10E7F..10EFF; UNKNOWN
5993             0x10F00,  // 10F00..10F27; OLD_SOGDIAN
5994             0x10F28,  // 10F28..10F2F; UNKNOWN
5995             0x10F30,  // 10F30..10F59; SOGDIAN
5996             0x10F5A,  // 10F5A..10FDF; UNKNOWN
5997             0x10FE0,  // 10FE0..10FF6; ELYMAIC
5998             0x10FF7,  // 10FF7..10FFF; UNKNOWN
5999             0x11000,  // 11000..1104D; BRAHMI
6000             0x1104E,  // 1104E..11051; UNKNOWN
6001             0x11052,  // 11052..1106F; BRAHMI
6002             0x11070,  // 11070..1107E; UNKNOWN
6003             0x1107F,  // 1107F       ; BRAHMI
6004             0x11080,  // 11080..110C1; KAITHI
6005             0x110C2,  // 110C2..110CC; UNKNOWN
6006             0x110CD,  // 110CD       ; KAITHI
6007             0x110CE,  // 110CE..110CF; UNKNOWN
6008             0x110D0,  // 110D0..110E8; SORA_SOMPENG
6009             0x110E9,  // 110E9..110EF; UNKNOWN
6010             0x110F0,  // 110F0..110F9; SORA_SOMPENG
6011             0x110FA,  // 110FA..110FF; UNKNOWN
6012             0x11100,  // 11100..11134; CHAKMA
6013             0x11135,  // 11135       ; UNKNOWN
6014             0x11136,  // 11136..11146; CHAKMA
6015             0x11147,  // 11147..1114F; UNKNOWN
6016             0x11150,  // 11150..11176; MAHAJANI
6017             0x11177,  // 11177..1117F; UNKNOWN
6018             0x11180,  // 11180..111CD; SHARADA
6019             0x111CE,  // 111CE..111CF; UNKNOWN
6020             0x111D0,  // 111D0..111DF; SHARADA
6021             0x111E0,  // 111E0       ; UNKNOWN
6022             0x111E1,  // 111E1..111F4; SINHALA
6023             0x111F5,  // 111F5..111FF; UNKNOWN
6024             0x11200,  // 11200..11211; KHOJKI
6025             0x11212,  // 11212       ; UNKNOWN
6026             0x11213,  // 11213..1123E; KHOJKI
6027             0x1123F,  // 1123F..1127F; UNKNOWN
6028             0x11280,  // 11280..11286; MULTANI
6029             0x11287,  // 11287       ; UNKNOWN
6030             0x11288,  // 11288       ; MULTANI
6031             0x11289,  // 11289       ; UNKNOWN
6032             0x1128A,  // 1128A..1128D; MULTANI
6033             0x1128E,  // 1128E       ; UNKNOWN
6034             0x1128F,  // 1128F..1129D; MULTANI
6035             0x1129E,  // 1129E       ; UNKNOWN
6036             0x1129F,  // 1129F..112A9; MULTANI
6037             0x112AA,  // 112AA..112AF; UNKNOWN
6038             0x112B0,  // 112B0..112EA; KHUDAWADI
6039             0x112EB,  // 112EB..112EF; UNKNOWN
6040             0x112F0,  // 112F0..112F9; KHUDAWADI
6041             0x112FA,  // 112FA..112FF; UNKNOWN
6042             0x11300,  // 11300..11303; GRANTHA
6043             0x11304,  // 11304       ; UNKNOWN
6044             0x11305,  // 11305..1130C; GRANTHA
6045             0x1130D,  // 1130D..1130E; UNKNOWN
6046             0x1130F,  // 1130F..11310; GRANTHA
6047             0x11311,  // 11311..11312; UNKNOWN
6048             0x11313,  // 11313..11328; GRANTHA
6049             0x11329,  // 11329       ; UNKNOWN
6050             0x1132A,  // 1132A..11330; GRANTHA
6051             0x11331,  // 11331       ; UNKNOWN
6052             0x11332,  // 11332..11333; GRANTHA
6053             0x11334,  // 11334       ; UNKNOWN
6054             0x11335,  // 11335..11339; GRANTHA
6055             0x1133A,  // 1133A       ; UNKNOWN
6056             0x1133B,  // 1133B       ; INHERITED
6057             0x1133C,  // 1133C..11344; GRANTHA
6058             0x11345,  // 11345..11346; UNKNOWN
6059             0x11347,  // 11347..11348; GRANTHA
6060             0x11349,  // 11349..1134A; UNKNOWN
6061             0x1134B,  // 1134B..1134D; GRANTHA
6062             0x1134E,  // 1134E..1134F; UNKNOWN
6063             0x11350,  // 11350       ; GRANTHA
6064             0x11351,  // 11351..11356; UNKNOWN
6065             0x11357,  // 11357       ; GRANTHA
6066             0x11358,  // 11358..1135C; UNKNOWN
6067             0x1135D,  // 1135D..11363; GRANTHA
6068             0x11364,  // 11364..11365; UNKNOWN
6069             0x11366,  // 11366..1136C; GRANTHA
6070             0x1136D,  // 1136D..1136F; UNKNOWN
6071             0x11370,  // 11370..11374; GRANTHA
6072             0x11375,  // 11375..113FF; UNKNOWN
6073             0x11400,  // 11400..11459; NEWA
6074             0x1145A,  // 1145A       ; UNKNOWN
6075             0x1145B,  // 1145B       ; NEWA
6076             0x1145C,  // 1145C       ; UNKNOWN
6077             0x1145D,  // 1145D..1145F; NEWA
6078             0x11460,  // 11460..1147F; UNKNOWN
6079             0x11480,  // 11480..114C7; TIRHUTA
6080             0x114C8,  // 114C8..114CF; UNKNOWN
6081             0x114D0,  // 114D0..114D9; TIRHUTA
6082             0x114DA,  // 114DA..1157F; UNKNOWN
6083             0x11580,  // 11580..115B5; SIDDHAM
6084             0x115B6,  // 115B6..115B7; UNKNOWN
6085             0x115B8,  // 115B8..115DD; SIDDHAM
6086             0x115DE,  // 115DE..115FF; UNKNOWN
6087             0x11600,  // 11600..11644; MODI
6088             0x11645,  // 11645..1164F; UNKNOWN
6089             0x11650,  // 11650..11659; MODI
6090             0x1165A,  // 1165A..1165F; UNKNOWN
6091             0x11660,  // 11660..1166C; MONGOLIAN
6092             0x1166D,  // 1166D..1167F; UNKNOWN
6093             0x11680,  // 11680..116B8; TAKRI
6094             0x116B9,  // 116B9..116BF; UNKNOWN
6095             0x116C0,  // 116C0..116C9; TAKRI
6096             0x116CA,  // 116CA..116FF; UNKNOWN
6097             0x11700,  // 11700..1171A; AHOM
6098             0x1171B,  // 1171B..1171C; UNKNOWN
6099             0x1171D,  // 1171D..1172B; AHOM
6100             0x1172C,  // 1172C..1172F; UNKNOWN
6101             0x11730,  // 11730..1173F; AHOM
6102             0x11740,  // 11740..117FF; UNKNOWN
6103             0x11800,  // 11800..1183B; DOGRA
6104             0x1183C,  // 1183C..1189F; UNKNOWN
6105             0x118A0,  // 118A0..118F2; WARANG_CITI
6106             0x118F3,  // 118F3..118FE; UNKNOWN
6107             0x118FF,  // 118FF       ; WARANG_CITI
6108             0x11900,  // 11900..1199F; UNKNOWN
6109             0x119A0,  // 119A0..119A7; NANDINAGARI
6110             0x119A8,  // 119A8..119A9; UNKNOWN
6111             0x119AA,  // 119AA..119D7; NANDINAGARI
6112             0x119D8,  // 119D8..119D9; UNKNOWN
6113             0x119DA,  // 119DA..119E4; NANDINAGARI
6114             0x119E5,  // 119E5..119FF; UNKNOWN
6115             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
6116             0x11A48,  // 11A48..11A4F; UNKNOWN
6117             0x11A50,  // 11A50..11AA2; SOYOMBO
6118             0x11AA3,  // 11AA3..11ABF; UNKNOWN
6119             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
6120             0x11AF9,  // 11AF9..11BFF; UNKNOWN
6121             0x11C00,  // 11C00..11C08; BHAIKSUKI
6122             0x11C09,  // 11C09       ; UNKNOWN
6123             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
6124             0x11C37,  // 11C37       ; UNKNOWN
6125             0x11C38,  // 11C38..11C45; BHAIKSUKI
6126             0x11C46,  // 11C46..11C4F; UNKNOWN
6127             0x11C50,  // 11C50..11C6C; BHAIKSUKI
6128             0x11C6D,  // 11C6D..11C6F; UNKNOWN
6129             0x11C70,  // 11C70..11C8F; MARCHEN
6130             0x11C90,  // 11C90..11C91; UNKNOWN
6131             0x11C92,  // 11C92..11CA7; MARCHEN
6132             0x11CA8,  // 11CA8       ; UNKNOWN
6133             0x11CA9,  // 11CA9..11CB6; MARCHEN
6134             0x11CB7,  // 11CB7..11CFF; UNKNOWN
6135             0x11D00,  // 11D00..11D06; MASARAM_GONDI
6136             0x11D07,  // 11D07       ; UNKNOWN
6137             0x11D08,  // 11D08..11D09; MASARAM_GONDI
6138             0x11D0A,  // 11D0A       ; UNKNOWN
6139             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
6140             0x11D37,  // 11D37..11D39; UNKNOWN
6141             0x11D3A,  // 11D3A       ; MASARAM_GONDI
6142             0x11D3B,  // 11D3B       ; UNKNOWN
6143             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
6144             0x11D3E,  // 11D3E       ; UNKNOWN
6145             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
6146             0x11D48,  // 11D48..11D4F; UNKNOWN
6147             0x11D50,  // 11D50..11D59; MASARAM_GONDI
6148             0x11D5A,  // 11D5A..11D5F; UNKNOWN
6149             0x11D60,  // 11D60..11D65; GUNJALA_GONDI
6150             0x11D66,  // 11D66       ; UNKNOWN
6151             0x11D67,  // 11D67..11D68; GUNJALA_GONDI
6152             0x11D69,  // 11D69       ; UNKNOWN
6153             0x11D6A,  // 11D6A..11D8E; GUNJALA_GONDI
6154             0x11D8F,  // 11D8F       ; UNKNOWN
6155             0x11D90,  // 11D90..11D91; GUNJALA_GONDI
6156             0x11D92,  // 11D92       ; UNKNOWN
6157             0x11D93,  // 11D93..11D98; GUNJALA_GONDI
6158             0x11D99,  // 11D99..11D9F; UNKNOWN
6159             0x11DA0,  // 11DA0..11DA9; GUNJALA_GONDI
6160             0x11DAA,  // 11DAA..11EDF; UNKNOWN
6161             0x11EE0,  // 11EE0..11EF8; MAKASAR
6162             0x11EF9,  // 11EF9..11FBF; UNKNOWN
6163             0x11FC0,  // 11FC0..11FF1; TAMIL
6164             0x11FF2,  // 11FF2..11FFE; UNKNOWN
6165             0x11FFF,  // 11FFF       ; TAMIL
6166             0x12000,  // 12000..12399; CUNEIFORM
6167             0x1239A,  // 1239A..123FF; UNKNOWN
6168             0x12400,  // 12400..1246E; CUNEIFORM
6169             0x1246F,  // 1246F       ; UNKNOWN
6170             0x12470,  // 12470..12474; CUNEIFORM
6171             0x12475,  // 12475..1247F; UNKNOWN
6172             0x12480,  // 12480..12543; CUNEIFORM
6173             0x12544,  // 12544..12FFF; UNKNOWN
6174             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
6175             0x1342F,  // 1342F       ; UNKNOWN
6176             0x13430,  // 13430..13438; EGYPTIAN_HIEROGLYPHS
6177             0x13439,  // 13439..143FF; UNKNOWN
6178             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
6179             0x14647,  // 14647..167FF; UNKNOWN
6180             0x16800,  // 16800..16A38; BAMUM
6181             0x16A39,  // 16A39..16A3F; UNKNOWN
6182             0x16A40,  // 16A40..16A5E; MRO
6183             0x16A5F,  // 16A5F       ; UNKNOWN
6184             0x16A60,  // 16A60..16A69; MRO
6185             0x16A6A,  // 16A6A..16A6D; UNKNOWN
6186             0x16A6E,  // 16A6E..16A6F; MRO
6187             0x16A70,  // 16A70..16ACF; UNKNOWN
6188             0x16AD0,  // 16AD0..16AED; BASSA_VAH
6189             0x16AEE,  // 16AEE..16AEF; UNKNOWN
6190             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
6191             0x16AF6,  // 16AF6..16AFF; UNKNOWN
6192             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
6193             0x16B46,  // 16B46..16B4F; UNKNOWN
6194             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
6195             0x16B5A,  // 16B5A       ; UNKNOWN
6196             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
6197             0x16B62,  // 16B62       ; UNKNOWN
6198             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
6199             0x16B78,  // 16B78..16B7C; UNKNOWN
6200             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
6201             0x16B90,  // 16B90..16E3F; UNKNOWN
6202             0x16E40,  // 16E40..16E9A; MEDEFAIDRIN
6203             0x16E9B,  // 16E9B..16EFF; UNKNOWN
6204             0x16F00,  // 16F00..16F4A; MIAO
6205             0x16F4B,  // 16F4B..16F4E; UNKNOWN
6206             0x16F4F,  // 16F4F..16F87; MIAO
6207             0x16F88,  // 16F88..16F8E; UNKNOWN
6208             0x16F8F,  // 16F8F..16F9F; MIAO
6209             0x16FA0,  // 16FA0..16FDF; UNKNOWN
6210             0x16FE0,  // 16FE0       ; TANGUT
6211             0x16FE1,  // 16FE1       ; NUSHU
6212             0x16FE2,  // 16FE2..16FE3; COMMON
6213             0x16FE4,  // 16FE4..16FFF; UNKNOWN
6214             0x17000,  // 17000..187F7; TANGUT
6215             0x187F8,  // 187F8..187FF; UNKNOWN
6216             0x18800,  // 18800..18AF2; TANGUT
6217             0x18AF3,  // 18AF3..1AFFF; UNKNOWN
6218             0x1B000,  // 1B000       ; KATAKANA
6219             0x1B001,  // 1B001..1B11E; HIRAGANA
6220             0x1B11F,  // 1B11F..1B14F; UNKNOWN
6221             0x1B150,  // 1B150..1B152; HIRAGANA
6222             0x1B153,  // 1B153..1B163; UNKNOWN
6223             0x1B164,  // 1B164..1B167; KATAKANA
6224             0x1B168,  // 1B168..1B16F; UNKNOWN
6225             0x1B170,  // 1B170..1B2FB; NUSHU
6226             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
6227             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
6228             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
6229             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
6230             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
6231             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
6232             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
6233             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
6234             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
6235             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
6236             0x1BCA0,  // 1BCA0..1BCA3; COMMON
6237             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
6238             0x1D000,  // 1D000..1D0F5; COMMON
6239             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
6240             0x1D100,  // 1D100..1D126; COMMON
6241             0x1D127,  // 1D127..1D128; UNKNOWN
6242             0x1D129,  // 1D129..1D166; COMMON
6243             0x1D167,  // 1D167..1D169; INHERITED
6244             0x1D16A,  // 1D16A..1D17A; COMMON
6245             0x1D17B,  // 1D17B..1D182; INHERITED
6246             0x1D183,  // 1D183..1D184; COMMON
6247             0x1D185,  // 1D185..1D18B; INHERITED
6248             0x1D18C,  // 1D18C..1D1A9; COMMON
6249             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
6250             0x1D1AE,  // 1D1AE..1D1E8; COMMON
6251             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
6252             0x1D200,  // 1D200..1D245; GREEK
6253             0x1D246,  // 1D246..1D2DF; UNKNOWN
6254             0x1D2E0,  // 1D2E0..1D2F3; COMMON
6255             0x1D2F4,  // 1D2F4..1D2FF; UNKNOWN
6256             0x1D300,  // 1D300..1D356; COMMON
6257             0x1D357,  // 1D357..1D35F; UNKNOWN
6258             0x1D360,  // 1D360..1D378; COMMON
6259             0x1D379,  // 1D379..1D3FF; UNKNOWN
6260             0x1D400,  // 1D400..1D454; COMMON
6261             0x1D455,  // 1D455       ; UNKNOWN
6262             0x1D456,  // 1D456..1D49C; COMMON
6263             0x1D49D,  // 1D49D       ; UNKNOWN
6264             0x1D49E,  // 1D49E..1D49F; COMMON
6265             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
6266             0x1D4A2,  // 1D4A2       ; COMMON
6267             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
6268             0x1D4A5,  // 1D4A5..1D4A6; COMMON
6269             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
6270             0x1D4A9,  // 1D4A9..1D4AC; COMMON
6271             0x1D4AD,  // 1D4AD       ; UNKNOWN
6272             0x1D4AE,  // 1D4AE..1D4B9; COMMON
6273             0x1D4BA,  // 1D4BA       ; UNKNOWN
6274             0x1D4BB,  // 1D4BB       ; COMMON
6275             0x1D4BC,  // 1D4BC       ; UNKNOWN
6276             0x1D4BD,  // 1D4BD..1D4C3; COMMON
6277             0x1D4C4,  // 1D4C4       ; UNKNOWN
6278             0x1D4C5,  // 1D4C5..1D505; COMMON
6279             0x1D506,  // 1D506       ; UNKNOWN
6280             0x1D507,  // 1D507..1D50A; COMMON
6281             0x1D50B,  // 1D50B..1D50C; UNKNOWN
6282             0x1D50D,  // 1D50D..1D514; COMMON
6283             0x1D515,  // 1D515       ; UNKNOWN
6284             0x1D516,  // 1D516..1D51C; COMMON
6285             0x1D51D,  // 1D51D       ; UNKNOWN
6286             0x1D51E,  // 1D51E..1D539; COMMON
6287             0x1D53A,  // 1D53A       ; UNKNOWN
6288             0x1D53B,  // 1D53B..1D53E; COMMON
6289             0x1D53F,  // 1D53F       ; UNKNOWN
6290             0x1D540,  // 1D540..1D544; COMMON
6291             0x1D545,  // 1D545       ; UNKNOWN
6292             0x1D546,  // 1D546       ; COMMON
6293             0x1D547,  // 1D547..1D549; UNKNOWN
6294             0x1D54A,  // 1D54A..1D550; COMMON
6295             0x1D551,  // 1D551       ; UNKNOWN
6296             0x1D552,  // 1D552..1D6A5; COMMON
6297             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
6298             0x1D6A8,  // 1D6A8..1D7CB; COMMON
6299             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
6300             0x1D7CE,  // 1D7CE..1D7FF; COMMON
6301             0x1D800,  // 1D800..1DA8B; SIGNWRITING
6302             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
6303             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
6304             0x1DAA0,  // 1DAA0       ; UNKNOWN
6305             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
6306             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
6307             0x1E000,  // 1E000..1E006; GLAGOLITIC
6308             0x1E007,  // 1E007       ; UNKNOWN
6309             0x1E008,  // 1E008..1E018; GLAGOLITIC
6310             0x1E019,  // 1E019..1E01A; UNKNOWN
6311             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
6312             0x1E022,  // 1E022       ; UNKNOWN
6313             0x1E023,  // 1E023..1E024; GLAGOLITIC
6314             0x1E025,  // 1E025       ; UNKNOWN
6315             0x1E026,  // 1E026..1E02A; GLAGOLITIC
6316             0x1E02B,  // 1E02B..1E0FF; UNKNOWN
6317             0x1E100,  // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
6318             0x1E12D,  // 1E12D..1E12F; UNKNOWN
6319             0x1E130,  // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
6320             0x1E13E,  // 1E13E..1E13F; UNKNOWN
6321             0x1E140,  // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
6322             0x1E14A,  // 1E14A..1E14D; UNKNOWN
6323             0x1E14E,  // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
6324             0x1E150,  // 1E150..1E2BF; UNKNOWN
6325             0x1E2C0,  // 1E2C0..1E2F9; WANCHO
6326             0x1E2FA,  // 1E2FA..1E2FE; UNKNOWN
6327             0x1E2FF,  // 1E2FF       ; WANCHO
6328             0x1E300,  // 1E300..1E7FF; UNKNOWN
6329             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6330             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6331             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6332             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6333             0x1E900,  // 1E900..1E94B; ADLAM
6334             0x1E94C,  // 1E94C..1E94F; UNKNOWN
6335             0x1E950,  // 1E950..1E959; ADLAM
6336             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6337             0x1E95E,  // 1E95E..1E95F; ADLAM
6338             0x1E960,  // 1E960..1EC70; UNKNOWN
6339             0x1EC71,  // 1EC71..1ECB4; COMMON
6340             0x1ECB5,  // 1ECB5..1ED00; UNKNOWN
6341             0x1ED01,  // 1ED01..1ED3D; COMMON
6342             0x1ED3E,  // 1ED3E..1EDFF; UNKNOWN
6343             0x1EE00,  // 1EE00..1EE03; ARABIC
6344             0x1EE04,  // 1EE04       ; UNKNOWN
6345             0x1EE05,  // 1EE05..1EE1F; ARABIC
6346             0x1EE20,  // 1EE20       ; UNKNOWN
6347             0x1EE21,  // 1EE21..1EE22; ARABIC
6348             0x1EE23,  // 1EE23       ; UNKNOWN
6349             0x1EE24,  // 1EE24       ; ARABIC
6350             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6351             0x1EE27,  // 1EE27       ; ARABIC
6352             0x1EE28,  // 1EE28       ; UNKNOWN
6353             0x1EE29,  // 1EE29..1EE32; ARABIC
6354             0x1EE33,  // 1EE33       ; UNKNOWN
6355             0x1EE34,  // 1EE34..1EE37; ARABIC
6356             0x1EE38,  // 1EE38       ; UNKNOWN
6357             0x1EE39,  // 1EE39       ; ARABIC
6358             0x1EE3A,  // 1EE3A       ; UNKNOWN
6359             0x1EE3B,  // 1EE3B       ; ARABIC
6360             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6361             0x1EE42,  // 1EE42       ; ARABIC
6362             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6363             0x1EE47,  // 1EE47       ; ARABIC
6364             0x1EE48,  // 1EE48       ; UNKNOWN
6365             0x1EE49,  // 1EE49       ; ARABIC
6366             0x1EE4A,  // 1EE4A       ; UNKNOWN
6367             0x1EE4B,  // 1EE4B       ; ARABIC
6368             0x1EE4C,  // 1EE4C       ; UNKNOWN
6369             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6370             0x1EE50,  // 1EE50       ; UNKNOWN
6371             0x1EE51,  // 1EE51..1EE52; ARABIC
6372             0x1EE53,  // 1EE53       ; UNKNOWN
6373             0x1EE54,  // 1EE54       ; ARABIC
6374             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6375             0x1EE57,  // 1EE57       ; ARABIC
6376             0x1EE58,  // 1EE58       ; UNKNOWN
6377             0x1EE59,  // 1EE59       ; ARABIC
6378             0x1EE5A,  // 1EE5A       ; UNKNOWN
6379             0x1EE5B,  // 1EE5B       ; ARABIC
6380             0x1EE5C,  // 1EE5C       ; UNKNOWN
6381             0x1EE5D,  // 1EE5D       ; ARABIC
6382             0x1EE5E,  // 1EE5E       ; UNKNOWN
6383             0x1EE5F,  // 1EE5F       ; ARABIC
6384             0x1EE60,  // 1EE60       ; UNKNOWN
6385             0x1EE61,  // 1EE61..1EE62; ARABIC
6386             0x1EE63,  // 1EE63       ; UNKNOWN
6387             0x1EE64,  // 1EE64       ; ARABIC
6388             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6389             0x1EE67,  // 1EE67..1EE6A; ARABIC
6390             0x1EE6B,  // 1EE6B       ; UNKNOWN
6391             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6392             0x1EE73,  // 1EE73       ; UNKNOWN
6393             0x1EE74,  // 1EE74..1EE77; ARABIC
6394             0x1EE78,  // 1EE78       ; UNKNOWN
6395             0x1EE79,  // 1EE79..1EE7C; ARABIC
6396             0x1EE7D,  // 1EE7D       ; UNKNOWN
6397             0x1EE7E,  // 1EE7E       ; ARABIC
6398             0x1EE7F,  // 1EE7F       ; UNKNOWN
6399             0x1EE80,  // 1EE80..1EE89; ARABIC
6400             0x1EE8A,  // 1EE8A       ; UNKNOWN
6401             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6402             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6403             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6404             0x1EEA4,  // 1EEA4       ; UNKNOWN
6405             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6406             0x1EEAA,  // 1EEAA       ; UNKNOWN
6407             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6408             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6409             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6410             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6411             0x1F000,  // 1F000..1F02B; COMMON
6412             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6413             0x1F030,  // 1F030..1F093; COMMON
6414             0x1F094,  // 1F094..1F09F; UNKNOWN
6415             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6416             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6417             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6418             0x1F0C0,  // 1F0C0       ; UNKNOWN
6419             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6420             0x1F0D0,  // 1F0D0       ; UNKNOWN
6421             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6422             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6423             0x1F100,  // 1F100..1F10C; COMMON
6424             0x1F10D,  // 1F10D..1F10F; UNKNOWN
6425             0x1F110,  // 1F110..1F16C; COMMON
6426             0x1F16D,  // 1F16D..1F16F; UNKNOWN
6427             0x1F170,  // 1F170..1F1AC; COMMON
6428             0x1F1AD,  // 1F1AD..1F1E5; UNKNOWN
6429             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6430             0x1F200,  // 1F200       ; HIRAGANA
6431             0x1F201,  // 1F201..1F202; COMMON
6432             0x1F203,  // 1F203..1F20F; UNKNOWN
6433             0x1F210,  // 1F210..1F23B; COMMON
6434             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6435             0x1F240,  // 1F240..1F248; COMMON
6436             0x1F249,  // 1F249..1F24F; UNKNOWN
6437             0x1F250,  // 1F250..1F251; COMMON
6438             0x1F252,  // 1F252..1F25F; UNKNOWN
6439             0x1F260,  // 1F260..1F265; COMMON
6440             0x1F266,  // 1F266..1F2FF; UNKNOWN
6441             0x1F300,  // 1F300..1F6D5; COMMON
6442             0x1F6D6,  // 1F6D6..1F6DF; UNKNOWN
6443             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6444             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6445             0x1F6F0,  // 1F6F0..1F6FA; COMMON
6446             0x1F6FB,  // 1F6FB..1F6FF; UNKNOWN
6447             0x1F700,  // 1F700..1F773; COMMON
6448             0x1F774,  // 1F774..1F77F; UNKNOWN
6449             0x1F780,  // 1F780..1F7D8; COMMON
6450             0x1F7D9,  // 1F7D9..1F7DF; UNKNOWN
6451             0x1F7E0,  // 1F7E0..1F7EB; COMMON
6452             0x1F7EC,  // 1F7EC..1F7FF; UNKNOWN
6453             0x1F800,  // 1F800..1F80B; COMMON
6454             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6455             0x1F810,  // 1F810..1F847; COMMON
6456             0x1F848,  // 1F848..1F84F; UNKNOWN
6457             0x1F850,  // 1F850..1F859; COMMON
6458             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6459             0x1F860,  // 1F860..1F887; COMMON
6460             0x1F888,  // 1F888..1F88F; UNKNOWN
6461             0x1F890,  // 1F890..1F8AD; COMMON
6462             0x1F8AE,  // 1F8AE..1F8FF; UNKNOWN
6463             0x1F900,  // 1F900..1F90B; COMMON
6464             0x1F90C,  // 1F90C       ; UNKNOWN
6465             0x1F90D,  // 1F90D..1F971; COMMON
6466             0x1F972,  // 1F972       ; UNKNOWN
6467             0x1F973,  // 1F973..1F976; COMMON
6468             0x1F977,  // 1F977..1F979; UNKNOWN
6469             0x1F97A,  // 1F97A..1F9A2; COMMON
6470             0x1F9A3,  // 1F9A3..1F9A4; UNKNOWN
6471             0x1F9A5,  // 1F9A5..1F9AA; COMMON
6472             0x1F9AB,  // 1F9AB..1F9AD; UNKNOWN
6473             0x1F9AE,  // 1F9AE..1F9CA; COMMON
6474             0x1F9CB,  // 1F9CB..1F9CC; UNKNOWN
6475             0x1F9CD,  // 1F9CD..1FA53; COMMON
6476             0x1FA54,  // 1FA54..1FA5F; UNKNOWN
6477             0x1FA60,  // 1FA60..1FA6D; COMMON
6478             0x1FA6E,  // 1FA6E..1FA6F; UNKNOWN
6479             0x1FA70,  // 1FA70..1FA73; COMMON
6480             0x1FA74,  // 1FA74..1FA77; UNKNOWN
6481             0x1FA78,  // 1FA78..1FA7A; COMMON
6482             0x1FA7B,  // 1FA7B..1FA7F; UNKNOWN
6483             0x1FA80,  // 1FA80..1FA82; COMMON
6484             0x1FA83,  // 1FA83..1FA8F; UNKNOWN
6485             0x1FA90,  // 1FA90..1FA95; COMMON
6486             0x1FA96,  // 1FA96..1FFFF; UNKNOWN
6487             0x20000,  // 20000..2A6D6; HAN
6488             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
6489             0x2A700,  // 2A700..2B734; HAN
6490             0x2B735,  // 2B735..2B73F; UNKNOWN
6491             0x2B740,  // 2B740..2B81D; HAN
6492             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6493             0x2B820,  // 2B820..2CEA1; HAN
6494             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6495             0x2CEB0,  // 2CEB0..2EBE0; HAN
6496             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6497             0x2F800,  // 2F800..2FA1D; HAN
6498             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
6499             0xE0001,  // E0001       ; COMMON
6500             0xE0002,  // E0002..E001F; UNKNOWN
6501             0xE0020,  // E0020..E007F; COMMON
6502             0xE0080,  // E0080..E00FF; UNKNOWN
6503             0xE0100,  // E0100..E01EF; INHERITED
6504             0xE01F0,  // E01F0..10FFFF; UNKNOWN
6505         };
6506 
6507         private static final UnicodeScript[] scripts = {
6508             COMMON,                   // 0000..0040
6509             LATIN,                    // 0041..005A
6510             COMMON,                   // 005B..0060
6511             LATIN,                    // 0061..007A
6512             COMMON,                   // 007B..00A9
6513             LATIN,                    // 00AA
6514             COMMON,                   // 00AB..00B9
6515             LATIN,                    // 00BA
6516             COMMON,                   // 00BB..00BF
6517             LATIN,                    // 00C0..00D6
6518             COMMON,                   // 00D7
6519             LATIN,                    // 00D8..00F6
6520             COMMON,                   // 00F7
6521             LATIN,                    // 00F8..02B8
6522             COMMON,                   // 02B9..02DF
6523             LATIN,                    // 02E0..02E4
6524             COMMON,                   // 02E5..02E9
6525             BOPOMOFO,                 // 02EA..02EB
6526             COMMON,                   // 02EC..02FF
6527             INHERITED,                // 0300..036F
6528             GREEK,                    // 0370..0373
6529             COMMON,                   // 0374
6530             GREEK,                    // 0375..0377
6531             UNKNOWN,                  // 0378..0379
6532             GREEK,                    // 037A..037D
6533             COMMON,                   // 037E
6534             GREEK,                    // 037F
6535             UNKNOWN,                  // 0380..0383
6536             GREEK,                    // 0384
6537             COMMON,                   // 0385
6538             GREEK,                    // 0386
6539             COMMON,                   // 0387
6540             GREEK,                    // 0388..038A
6541             UNKNOWN,                  // 038B
6542             GREEK,                    // 038C
6543             UNKNOWN,                  // 038D
6544             GREEK,                    // 038E..03A1
6545             UNKNOWN,                  // 03A2
6546             GREEK,                    // 03A3..03E1
6547             COPTIC,                   // 03E2..03EF
6548             GREEK,                    // 03F0..03FF
6549             CYRILLIC,                 // 0400..0484
6550             INHERITED,                // 0485..0486
6551             CYRILLIC,                 // 0487..052F
6552             UNKNOWN,                  // 0530
6553             ARMENIAN,                 // 0531..0556
6554             UNKNOWN,                  // 0557..0558
6555             ARMENIAN,                 // 0559..0588
6556             COMMON,                   // 0589
6557             ARMENIAN,                 // 058A
6558             UNKNOWN,                  // 058B..058C
6559             ARMENIAN,                 // 058D..058F
6560             UNKNOWN,                  // 0590
6561             HEBREW,                   // 0591..05C7
6562             UNKNOWN,                  // 05C8..05CF
6563             HEBREW,                   // 05D0..05EA
6564             UNKNOWN,                  // 05EB..05EE
6565             HEBREW,                   // 05EF..05F4
6566             UNKNOWN,                  // 05F5..05FF
6567             ARABIC,                   // 0600..0604
6568             COMMON,                   // 0605
6569             ARABIC,                   // 0606..060B
6570             COMMON,                   // 060C
6571             ARABIC,                   // 060D..061A
6572             COMMON,                   // 061B
6573             ARABIC,                   // 061C
6574             UNKNOWN,                  // 061D
6575             ARABIC,                   // 061E
6576             COMMON,                   // 061F
6577             ARABIC,                   // 0620..063F
6578             COMMON,                   // 0640
6579             ARABIC,                   // 0641..064A
6580             INHERITED,                // 064B..0655
6581             ARABIC,                   // 0656..066F
6582             INHERITED,                // 0670
6583             ARABIC,                   // 0671..06DC
6584             COMMON,                   // 06DD
6585             ARABIC,                   // 06DE..06FF
6586             SYRIAC,                   // 0700..070D
6587             UNKNOWN,                  // 070E
6588             SYRIAC,                   // 070F..074A
6589             UNKNOWN,                  // 074B..074C
6590             SYRIAC,                   // 074D..074F
6591             ARABIC,                   // 0750..077F
6592             THAANA,                   // 0780..07B1
6593             UNKNOWN,                  // 07B2..07BF
6594             NKO,                      // 07C0..07FA
6595             UNKNOWN,                  // 07FB..07FC
6596             NKO,                      // 07FD..07FF
6597             SAMARITAN,                // 0800..082D
6598             UNKNOWN,                  // 082E..082F
6599             SAMARITAN,                // 0830..083E
6600             UNKNOWN,                  // 083F
6601             MANDAIC,                  // 0840..085B
6602             UNKNOWN,                  // 085C..085D
6603             MANDAIC,                  // 085E
6604             UNKNOWN,                  // 085F
6605             SYRIAC,                   // 0860..086A
6606             UNKNOWN,                  // 086B..089F
6607             ARABIC,                   // 08A0..08B4
6608             UNKNOWN,                  // 08B5
6609             ARABIC,                   // 08B6..08BD
6610             UNKNOWN,                  // 08BE..08D2
6611             ARABIC,                   // 08D3..08E1
6612             COMMON,                   // 08E2
6613             ARABIC,                   // 08E3..08FF
6614             DEVANAGARI,               // 0900..0950
6615             INHERITED,                // 0951..0954
6616             DEVANAGARI,               // 0955..0963
6617             COMMON,                   // 0964..0965
6618             DEVANAGARI,               // 0966..097F
6619             BENGALI,                  // 0980..0983
6620             UNKNOWN,                  // 0984
6621             BENGALI,                  // 0985..098C
6622             UNKNOWN,                  // 098D..098E
6623             BENGALI,                  // 098F..0990
6624             UNKNOWN,                  // 0991..0992
6625             BENGALI,                  // 0993..09A8
6626             UNKNOWN,                  // 09A9
6627             BENGALI,                  // 09AA..09B0
6628             UNKNOWN,                  // 09B1
6629             BENGALI,                  // 09B2
6630             UNKNOWN,                  // 09B3..09B5
6631             BENGALI,                  // 09B6..09B9
6632             UNKNOWN,                  // 09BA..09BB
6633             BENGALI,                  // 09BC..09C4
6634             UNKNOWN,                  // 09C5..09C6
6635             BENGALI,                  // 09C7..09C8
6636             UNKNOWN,                  // 09C9..09CA
6637             BENGALI,                  // 09CB..09CE
6638             UNKNOWN,                  // 09CF..09D6
6639             BENGALI,                  // 09D7
6640             UNKNOWN,                  // 09D8..09DB
6641             BENGALI,                  // 09DC..09DD
6642             UNKNOWN,                  // 09DE
6643             BENGALI,                  // 09DF..09E3
6644             UNKNOWN,                  // 09E4..09E5
6645             BENGALI,                  // 09E6..09FE
6646             UNKNOWN,                  // 09FF..0A00
6647             GURMUKHI,                 // 0A01..0A03
6648             UNKNOWN,                  // 0A04
6649             GURMUKHI,                 // 0A05..0A0A
6650             UNKNOWN,                  // 0A0B..0A0E
6651             GURMUKHI,                 // 0A0F..0A10
6652             UNKNOWN,                  // 0A11..0A12
6653             GURMUKHI,                 // 0A13..0A28
6654             UNKNOWN,                  // 0A29
6655             GURMUKHI,                 // 0A2A..0A30
6656             UNKNOWN,                  // 0A31
6657             GURMUKHI,                 // 0A32..0A33
6658             UNKNOWN,                  // 0A34
6659             GURMUKHI,                 // 0A35..0A36
6660             UNKNOWN,                  // 0A37
6661             GURMUKHI,                 // 0A38..0A39
6662             UNKNOWN,                  // 0A3A..0A3B
6663             GURMUKHI,                 // 0A3C
6664             UNKNOWN,                  // 0A3D
6665             GURMUKHI,                 // 0A3E..0A42
6666             UNKNOWN,                  // 0A43..0A46
6667             GURMUKHI,                 // 0A47..0A48
6668             UNKNOWN,                  // 0A49..0A4A
6669             GURMUKHI,                 // 0A4B..0A4D
6670             UNKNOWN,                  // 0A4E..0A50
6671             GURMUKHI,                 // 0A51
6672             UNKNOWN,                  // 0A52..0A58
6673             GURMUKHI,                 // 0A59..0A5C
6674             UNKNOWN,                  // 0A5D
6675             GURMUKHI,                 // 0A5E
6676             UNKNOWN,                  // 0A5F..0A65
6677             GURMUKHI,                 // 0A66..0A76
6678             UNKNOWN,                  // 0A77..0A80
6679             GUJARATI,                 // 0A81..0A83
6680             UNKNOWN,                  // 0A84
6681             GUJARATI,                 // 0A85..0A8D
6682             UNKNOWN,                  // 0A8E
6683             GUJARATI,                 // 0A8F..0A91
6684             UNKNOWN,                  // 0A92
6685             GUJARATI,                 // 0A93..0AA8
6686             UNKNOWN,                  // 0AA9
6687             GUJARATI,                 // 0AAA..0AB0
6688             UNKNOWN,                  // 0AB1
6689             GUJARATI,                 // 0AB2..0AB3
6690             UNKNOWN,                  // 0AB4
6691             GUJARATI,                 // 0AB5..0AB9
6692             UNKNOWN,                  // 0ABA..0ABB
6693             GUJARATI,                 // 0ABC..0AC5
6694             UNKNOWN,                  // 0AC6
6695             GUJARATI,                 // 0AC7..0AC9
6696             UNKNOWN,                  // 0ACA
6697             GUJARATI,                 // 0ACB..0ACD
6698             UNKNOWN,                  // 0ACE..0ACF
6699             GUJARATI,                 // 0AD0
6700             UNKNOWN,                  // 0AD1..0ADF
6701             GUJARATI,                 // 0AE0..0AE3
6702             UNKNOWN,                  // 0AE4..0AE5
6703             GUJARATI,                 // 0AE6..0AF1
6704             UNKNOWN,                  // 0AF2..0AF8
6705             GUJARATI,                 // 0AF9..0AFF
6706             UNKNOWN,                  // 0B00
6707             ORIYA,                    // 0B01..0B03
6708             UNKNOWN,                  // 0B04
6709             ORIYA,                    // 0B05..0B0C
6710             UNKNOWN,                  // 0B0D..0B0E
6711             ORIYA,                    // 0B0F..0B10
6712             UNKNOWN,                  // 0B11..0B12
6713             ORIYA,                    // 0B13..0B28
6714             UNKNOWN,                  // 0B29
6715             ORIYA,                    // 0B2A..0B30
6716             UNKNOWN,                  // 0B31
6717             ORIYA,                    // 0B32..0B33
6718             UNKNOWN,                  // 0B34
6719             ORIYA,                    // 0B35..0B39
6720             UNKNOWN,                  // 0B3A..0B3B
6721             ORIYA,                    // 0B3C..0B44
6722             UNKNOWN,                  // 0B45..0B46
6723             ORIYA,                    // 0B47..0B48
6724             UNKNOWN,                  // 0B49..0B4A
6725             ORIYA,                    // 0B4B..0B4D
6726             UNKNOWN,                  // 0B4E..0B55
6727             ORIYA,                    // 0B56..0B57
6728             UNKNOWN,                  // 0B58..0B5B
6729             ORIYA,                    // 0B5C..0B5D
6730             UNKNOWN,                  // 0B5E
6731             ORIYA,                    // 0B5F..0B63
6732             UNKNOWN,                  // 0B64..0B65
6733             ORIYA,                    // 0B66..0B77
6734             UNKNOWN,                  // 0B78..0B81
6735             TAMIL,                    // 0B82..0B83
6736             UNKNOWN,                  // 0B84
6737             TAMIL,                    // 0B85..0B8A
6738             UNKNOWN,                  // 0B8B..0B8D
6739             TAMIL,                    // 0B8E..0B90
6740             UNKNOWN,                  // 0B91
6741             TAMIL,                    // 0B92..0B95
6742             UNKNOWN,                  // 0B96..0B98
6743             TAMIL,                    // 0B99..0B9A
6744             UNKNOWN,                  // 0B9B
6745             TAMIL,                    // 0B9C
6746             UNKNOWN,                  // 0B9D
6747             TAMIL,                    // 0B9E..0B9F
6748             UNKNOWN,                  // 0BA0..0BA2
6749             TAMIL,                    // 0BA3..0BA4
6750             UNKNOWN,                  // 0BA5..0BA7
6751             TAMIL,                    // 0BA8..0BAA
6752             UNKNOWN,                  // 0BAB..0BAD
6753             TAMIL,                    // 0BAE..0BB9
6754             UNKNOWN,                  // 0BBA..0BBD
6755             TAMIL,                    // 0BBE..0BC2
6756             UNKNOWN,                  // 0BC3..0BC5
6757             TAMIL,                    // 0BC6..0BC8
6758             UNKNOWN,                  // 0BC9
6759             TAMIL,                    // 0BCA..0BCD
6760             UNKNOWN,                  // 0BCE..0BCF
6761             TAMIL,                    // 0BD0
6762             UNKNOWN,                  // 0BD1..0BD6
6763             TAMIL,                    // 0BD7
6764             UNKNOWN,                  // 0BD8..0BE5
6765             TAMIL,                    // 0BE6..0BFA
6766             UNKNOWN,                  // 0BFB..0BFF
6767             TELUGU,                   // 0C00..0C0C
6768             UNKNOWN,                  // 0C0D
6769             TELUGU,                   // 0C0E..0C10
6770             UNKNOWN,                  // 0C11
6771             TELUGU,                   // 0C12..0C28
6772             UNKNOWN,                  // 0C29
6773             TELUGU,                   // 0C2A..0C39
6774             UNKNOWN,                  // 0C3A..0C3C
6775             TELUGU,                   // 0C3D..0C44
6776             UNKNOWN,                  // 0C45
6777             TELUGU,                   // 0C46..0C48
6778             UNKNOWN,                  // 0C49
6779             TELUGU,                   // 0C4A..0C4D
6780             UNKNOWN,                  // 0C4E..0C54
6781             TELUGU,                   // 0C55..0C56
6782             UNKNOWN,                  // 0C57
6783             TELUGU,                   // 0C58..0C5A
6784             UNKNOWN,                  // 0C5B..0C5F
6785             TELUGU,                   // 0C60..0C63
6786             UNKNOWN,                  // 0C64..0C65
6787             TELUGU,                   // 0C66..0C6F
6788             UNKNOWN,                  // 0C70..0C76
6789             TELUGU,                   // 0C77..0C7F
6790             KANNADA,                  // 0C80..0C8C
6791             UNKNOWN,                  // 0C8D
6792             KANNADA,                  // 0C8E..0C90
6793             UNKNOWN,                  // 0C91
6794             KANNADA,                  // 0C92..0CA8
6795             UNKNOWN,                  // 0CA9
6796             KANNADA,                  // 0CAA..0CB3
6797             UNKNOWN,                  // 0CB4
6798             KANNADA,                  // 0CB5..0CB9
6799             UNKNOWN,                  // 0CBA..0CBB
6800             KANNADA,                  // 0CBC..0CC4
6801             UNKNOWN,                  // 0CC5
6802             KANNADA,                  // 0CC6..0CC8
6803             UNKNOWN,                  // 0CC9
6804             KANNADA,                  // 0CCA..0CCD
6805             UNKNOWN,                  // 0CCE..0CD4
6806             KANNADA,                  // 0CD5..0CD6
6807             UNKNOWN,                  // 0CD7..0CDD
6808             KANNADA,                  // 0CDE
6809             UNKNOWN,                  // 0CDF
6810             KANNADA,                  // 0CE0..0CE3
6811             UNKNOWN,                  // 0CE4..0CE5
6812             KANNADA,                  // 0CE6..0CEF
6813             UNKNOWN,                  // 0CF0
6814             KANNADA,                  // 0CF1..0CF2
6815             UNKNOWN,                  // 0CF3..0CFF
6816             MALAYALAM,                // 0D00..0D03
6817             UNKNOWN,                  // 0D04
6818             MALAYALAM,                // 0D05..0D0C
6819             UNKNOWN,                  // 0D0D
6820             MALAYALAM,                // 0D0E..0D10
6821             UNKNOWN,                  // 0D11
6822             MALAYALAM,                // 0D12..0D44
6823             UNKNOWN,                  // 0D45
6824             MALAYALAM,                // 0D46..0D48
6825             UNKNOWN,                  // 0D49
6826             MALAYALAM,                // 0D4A..0D4F
6827             UNKNOWN,                  // 0D50..0D53
6828             MALAYALAM,                // 0D54..0D63
6829             UNKNOWN,                  // 0D64..0D65
6830             MALAYALAM,                // 0D66..0D7F
6831             UNKNOWN,                  // 0D80..0D81
6832             SINHALA,                  // 0D82..0D83
6833             UNKNOWN,                  // 0D84
6834             SINHALA,                  // 0D85..0D96
6835             UNKNOWN,                  // 0D97..0D99
6836             SINHALA,                  // 0D9A..0DB1
6837             UNKNOWN,                  // 0DB2
6838             SINHALA,                  // 0DB3..0DBB
6839             UNKNOWN,                  // 0DBC
6840             SINHALA,                  // 0DBD
6841             UNKNOWN,                  // 0DBE..0DBF
6842             SINHALA,                  // 0DC0..0DC6
6843             UNKNOWN,                  // 0DC7..0DC9
6844             SINHALA,                  // 0DCA
6845             UNKNOWN,                  // 0DCB..0DCE
6846             SINHALA,                  // 0DCF..0DD4
6847             UNKNOWN,                  // 0DD5
6848             SINHALA,                  // 0DD6
6849             UNKNOWN,                  // 0DD7
6850             SINHALA,                  // 0DD8..0DDF
6851             UNKNOWN,                  // 0DE0..0DE5
6852             SINHALA,                  // 0DE6..0DEF
6853             UNKNOWN,                  // 0DF0..0DF1
6854             SINHALA,                  // 0DF2..0DF4
6855             UNKNOWN,                  // 0DF5..0E00
6856             THAI,                     // 0E01..0E3A
6857             UNKNOWN,                  // 0E3B..0E3E
6858             COMMON,                   // 0E3F
6859             THAI,                     // 0E40..0E5B
6860             UNKNOWN,                  // 0E5C..0E80
6861             LAO,                      // 0E81..0E82
6862             UNKNOWN,                  // 0E83
6863             LAO,                      // 0E84
6864             UNKNOWN,                  // 0E85
6865             LAO,                      // 0E86..0E8A
6866             UNKNOWN,                  // 0E8B
6867             LAO,                      // 0E8C..0EA3
6868             UNKNOWN,                  // 0EA4
6869             LAO,                      // 0EA5
6870             UNKNOWN,                  // 0EA6
6871             LAO,                      // 0EA7..0EBD
6872             UNKNOWN,                  // 0EBE..0EBF
6873             LAO,                      // 0EC0..0EC4
6874             UNKNOWN,                  // 0EC5
6875             LAO,                      // 0EC6
6876             UNKNOWN,                  // 0EC7
6877             LAO,                      // 0EC8..0ECD
6878             UNKNOWN,                  // 0ECE..0ECF
6879             LAO,                      // 0ED0..0ED9
6880             UNKNOWN,                  // 0EDA..0EDB
6881             LAO,                      // 0EDC..0EDF
6882             UNKNOWN,                  // 0EE0..0EFF
6883             TIBETAN,                  // 0F00..0F47
6884             UNKNOWN,                  // 0F48
6885             TIBETAN,                  // 0F49..0F6C
6886             UNKNOWN,                  // 0F6D..0F70
6887             TIBETAN,                  // 0F71..0F97
6888             UNKNOWN,                  // 0F98
6889             TIBETAN,                  // 0F99..0FBC
6890             UNKNOWN,                  // 0FBD
6891             TIBETAN,                  // 0FBE..0FCC
6892             UNKNOWN,                  // 0FCD
6893             TIBETAN,                  // 0FCE..0FD4
6894             COMMON,                   // 0FD5..0FD8
6895             TIBETAN,                  // 0FD9..0FDA
6896             UNKNOWN,                  // 0FDB..0FFF
6897             MYANMAR,                  // 1000..109F
6898             GEORGIAN,                 // 10A0..10C5
6899             UNKNOWN,                  // 10C6
6900             GEORGIAN,                 // 10C7
6901             UNKNOWN,                  // 10C8..10CC
6902             GEORGIAN,                 // 10CD
6903             UNKNOWN,                  // 10CE..10CF
6904             GEORGIAN,                 // 10D0..10FA
6905             COMMON,                   // 10FB
6906             GEORGIAN,                 // 10FC..10FF
6907             HANGUL,                   // 1100..11FF
6908             ETHIOPIC,                 // 1200..1248
6909             UNKNOWN,                  // 1249
6910             ETHIOPIC,                 // 124A..124D
6911             UNKNOWN,                  // 124E..124F
6912             ETHIOPIC,                 // 1250..1256
6913             UNKNOWN,                  // 1257
6914             ETHIOPIC,                 // 1258
6915             UNKNOWN,                  // 1259
6916             ETHIOPIC,                 // 125A..125D
6917             UNKNOWN,                  // 125E..125F
6918             ETHIOPIC,                 // 1260..1288
6919             UNKNOWN,                  // 1289
6920             ETHIOPIC,                 // 128A..128D
6921             UNKNOWN,                  // 128E..128F
6922             ETHIOPIC,                 // 1290..12B0
6923             UNKNOWN,                  // 12B1
6924             ETHIOPIC,                 // 12B2..12B5
6925             UNKNOWN,                  // 12B6..12B7
6926             ETHIOPIC,                 // 12B8..12BE
6927             UNKNOWN,                  // 12BF
6928             ETHIOPIC,                 // 12C0
6929             UNKNOWN,                  // 12C1
6930             ETHIOPIC,                 // 12C2..12C5
6931             UNKNOWN,                  // 12C6..12C7
6932             ETHIOPIC,                 // 12C8..12D6
6933             UNKNOWN,                  // 12D7
6934             ETHIOPIC,                 // 12D8..1310
6935             UNKNOWN,                  // 1311
6936             ETHIOPIC,                 // 1312..1315
6937             UNKNOWN,                  // 1316..1317
6938             ETHIOPIC,                 // 1318..135A
6939             UNKNOWN,                  // 135B..135C
6940             ETHIOPIC,                 // 135D..137C
6941             UNKNOWN,                  // 137D..137F
6942             ETHIOPIC,                 // 1380..1399
6943             UNKNOWN,                  // 139A..139F
6944             CHEROKEE,                 // 13A0..13F5
6945             UNKNOWN,                  // 13F6..13F7
6946             CHEROKEE,                 // 13F8..13FD
6947             UNKNOWN,                  // 13FE..13FF
6948             CANADIAN_ABORIGINAL,      // 1400..167F
6949             OGHAM,                    // 1680..169C
6950             UNKNOWN,                  // 169D..169F
6951             RUNIC,                    // 16A0..16EA
6952             COMMON,                   // 16EB..16ED
6953             RUNIC,                    // 16EE..16F8
6954             UNKNOWN,                  // 16F9..16FF
6955             TAGALOG,                  // 1700..170C
6956             UNKNOWN,                  // 170D
6957             TAGALOG,                  // 170E..1714
6958             UNKNOWN,                  // 1715..171F
6959             HANUNOO,                  // 1720..1734
6960             COMMON,                   // 1735..1736
6961             UNKNOWN,                  // 1737..173F
6962             BUHID,                    // 1740..1753
6963             UNKNOWN,                  // 1754..175F
6964             TAGBANWA,                 // 1760..176C
6965             UNKNOWN,                  // 176D
6966             TAGBANWA,                 // 176E..1770
6967             UNKNOWN,                  // 1771
6968             TAGBANWA,                 // 1772..1773
6969             UNKNOWN,                  // 1774..177F
6970             KHMER,                    // 1780..17DD
6971             UNKNOWN,                  // 17DE..17DF
6972             KHMER,                    // 17E0..17E9
6973             UNKNOWN,                  // 17EA..17EF
6974             KHMER,                    // 17F0..17F9
6975             UNKNOWN,                  // 17FA..17FF
6976             MONGOLIAN,                // 1800..1801
6977             COMMON,                   // 1802..1803
6978             MONGOLIAN,                // 1804
6979             COMMON,                   // 1805
6980             MONGOLIAN,                // 1806..180E
6981             UNKNOWN,                  // 180F
6982             MONGOLIAN,                // 1810..1819
6983             UNKNOWN,                  // 181A..181F
6984             MONGOLIAN,                // 1820..1878
6985             UNKNOWN,                  // 1879..187F
6986             MONGOLIAN,                // 1880..18AA
6987             UNKNOWN,                  // 18AB..18AF
6988             CANADIAN_ABORIGINAL,      // 18B0..18F5
6989             UNKNOWN,                  // 18F6..18FF
6990             LIMBU,                    // 1900..191E
6991             UNKNOWN,                  // 191F
6992             LIMBU,                    // 1920..192B
6993             UNKNOWN,                  // 192C..192F
6994             LIMBU,                    // 1930..193B
6995             UNKNOWN,                  // 193C..193F
6996             LIMBU,                    // 1940
6997             UNKNOWN,                  // 1941..1943
6998             LIMBU,                    // 1944..194F
6999             TAI_LE,                   // 1950..196D
7000             UNKNOWN,                  // 196E..196F
7001             TAI_LE,                   // 1970..1974
7002             UNKNOWN,                  // 1975..197F
7003             NEW_TAI_LUE,              // 1980..19AB
7004             UNKNOWN,                  // 19AC..19AF
7005             NEW_TAI_LUE,              // 19B0..19C9
7006             UNKNOWN,                  // 19CA..19CF
7007             NEW_TAI_LUE,              // 19D0..19DA
7008             UNKNOWN,                  // 19DB..19DD
7009             NEW_TAI_LUE,              // 19DE..19DF
7010             KHMER,                    // 19E0..19FF
7011             BUGINESE,                 // 1A00..1A1B
7012             UNKNOWN,                  // 1A1C..1A1D
7013             BUGINESE,                 // 1A1E..1A1F
7014             TAI_THAM,                 // 1A20..1A5E
7015             UNKNOWN,                  // 1A5F
7016             TAI_THAM,                 // 1A60..1A7C
7017             UNKNOWN,                  // 1A7D..1A7E
7018             TAI_THAM,                 // 1A7F..1A89
7019             UNKNOWN,                  // 1A8A..1A8F
7020             TAI_THAM,                 // 1A90..1A99
7021             UNKNOWN,                  // 1A9A..1A9F
7022             TAI_THAM,                 // 1AA0..1AAD
7023             UNKNOWN,                  // 1AAE..1AAF
7024             INHERITED,                // 1AB0..1ABE
7025             UNKNOWN,                  // 1ABF..1AFF
7026             BALINESE,                 // 1B00..1B4B
7027             UNKNOWN,                  // 1B4C..1B4F
7028             BALINESE,                 // 1B50..1B7C
7029             UNKNOWN,                  // 1B7D..1B7F
7030             SUNDANESE,                // 1B80..1BBF
7031             BATAK,                    // 1BC0..1BF3
7032             UNKNOWN,                  // 1BF4..1BFB
7033             BATAK,                    // 1BFC..1BFF
7034             LEPCHA,                   // 1C00..1C37
7035             UNKNOWN,                  // 1C38..1C3A
7036             LEPCHA,                   // 1C3B..1C49
7037             UNKNOWN,                  // 1C4A..1C4C
7038             LEPCHA,                   // 1C4D..1C4F
7039             OL_CHIKI,                 // 1C50..1C7F
7040             CYRILLIC,                 // 1C80..1C88
7041             UNKNOWN,                  // 1C89..1C8F
7042             GEORGIAN,                 // 1C90..1CBA
7043             UNKNOWN,                  // 1CBB..1CBC
7044             GEORGIAN,                 // 1CBD..1CBF
7045             SUNDANESE,                // 1CC0..1CC7
7046             UNKNOWN,                  // 1CC8..1CCF
7047             INHERITED,                // 1CD0..1CD2
7048             COMMON,                   // 1CD3
7049             INHERITED,                // 1CD4..1CE0
7050             COMMON,                   // 1CE1
7051             INHERITED,                // 1CE2..1CE8
7052             COMMON,                   // 1CE9..1CEC
7053             INHERITED,                // 1CED
7054             COMMON,                   // 1CEE..1CF3
7055             INHERITED,                // 1CF4
7056             COMMON,                   // 1CF5..1CF7
7057             INHERITED,                // 1CF8..1CF9
7058             COMMON,                   // 1CFA
7059             UNKNOWN,                  // 1CFB..1CFF
7060             LATIN,                    // 1D00..1D25
7061             GREEK,                    // 1D26..1D2A
7062             CYRILLIC,                 // 1D2B
7063             LATIN,                    // 1D2C..1D5C
7064             GREEK,                    // 1D5D..1D61
7065             LATIN,                    // 1D62..1D65
7066             GREEK,                    // 1D66..1D6A
7067             LATIN,                    // 1D6B..1D77
7068             CYRILLIC,                 // 1D78
7069             LATIN,                    // 1D79..1DBE
7070             GREEK,                    // 1DBF
7071             INHERITED,                // 1DC0..1DF9
7072             UNKNOWN,                  // 1DFA
7073             INHERITED,                // 1DFB..1DFF
7074             LATIN,                    // 1E00..1EFF
7075             GREEK,                    // 1F00..1F15
7076             UNKNOWN,                  // 1F16..1F17
7077             GREEK,                    // 1F18..1F1D
7078             UNKNOWN,                  // 1F1E..1F1F
7079             GREEK,                    // 1F20..1F45
7080             UNKNOWN,                  // 1F46..1F47
7081             GREEK,                    // 1F48..1F4D
7082             UNKNOWN,                  // 1F4E..1F4F
7083             GREEK,                    // 1F50..1F57
7084             UNKNOWN,                  // 1F58
7085             GREEK,                    // 1F59
7086             UNKNOWN,                  // 1F5A
7087             GREEK,                    // 1F5B
7088             UNKNOWN,                  // 1F5C
7089             GREEK,                    // 1F5D
7090             UNKNOWN,                  // 1F5E
7091             GREEK,                    // 1F5F..1F7D
7092             UNKNOWN,                  // 1F7E..1F7F
7093             GREEK,                    // 1F80..1FB4
7094             UNKNOWN,                  // 1FB5
7095             GREEK,                    // 1FB6..1FC4
7096             UNKNOWN,                  // 1FC5
7097             GREEK,                    // 1FC6..1FD3
7098             UNKNOWN,                  // 1FD4..1FD5
7099             GREEK,                    // 1FD6..1FDB
7100             UNKNOWN,                  // 1FDC
7101             GREEK,                    // 1FDD..1FEF
7102             UNKNOWN,                  // 1FF0..1FF1
7103             GREEK,                    // 1FF2..1FF4
7104             UNKNOWN,                  // 1FF5
7105             GREEK,                    // 1FF6..1FFE
7106             UNKNOWN,                  // 1FFF
7107             COMMON,                   // 2000..200B
7108             INHERITED,                // 200C..200D
7109             COMMON,                   // 200E..2064
7110             UNKNOWN,                  // 2065
7111             COMMON,                   // 2066..2070
7112             LATIN,                    // 2071
7113             UNKNOWN,                  // 2072..2073
7114             COMMON,                   // 2074..207E
7115             LATIN,                    // 207F
7116             COMMON,                   // 2080..208E
7117             UNKNOWN,                  // 208F
7118             LATIN,                    // 2090..209C
7119             UNKNOWN,                  // 209D..209F
7120             COMMON,                   // 20A0..20BF
7121             UNKNOWN,                  // 20C0..20CF
7122             INHERITED,                // 20D0..20F0
7123             UNKNOWN,                  // 20F1..20FF
7124             COMMON,                   // 2100..2125
7125             GREEK,                    // 2126
7126             COMMON,                   // 2127..2129
7127             LATIN,                    // 212A..212B
7128             COMMON,                   // 212C..2131
7129             LATIN,                    // 2132
7130             COMMON,                   // 2133..214D
7131             LATIN,                    // 214E
7132             COMMON,                   // 214F..215F
7133             LATIN,                    // 2160..2188
7134             COMMON,                   // 2189..218B
7135             UNKNOWN,                  // 218C..218F
7136             COMMON,                   // 2190..2426
7137             UNKNOWN,                  // 2427..243F
7138             COMMON,                   // 2440..244A
7139             UNKNOWN,                  // 244B..245F
7140             COMMON,                   // 2460..27FF
7141             BRAILLE,                  // 2800..28FF
7142             COMMON,                   // 2900..2B73
7143             UNKNOWN,                  // 2B74..2B75
7144             COMMON,                   // 2B76..2B95
7145             UNKNOWN,                  // 2B96..2B97
7146             COMMON,                   // 2B98..2BFF
7147             GLAGOLITIC,               // 2C00..2C2E
7148             UNKNOWN,                  // 2C2F
7149             GLAGOLITIC,               // 2C30..2C5E
7150             UNKNOWN,                  // 2C5F
7151             LATIN,                    // 2C60..2C7F
7152             COPTIC,                   // 2C80..2CF3
7153             UNKNOWN,                  // 2CF4..2CF8
7154             COPTIC,                   // 2CF9..2CFF
7155             GEORGIAN,                 // 2D00..2D25
7156             UNKNOWN,                  // 2D26
7157             GEORGIAN,                 // 2D27
7158             UNKNOWN,                  // 2D28..2D2C
7159             GEORGIAN,                 // 2D2D
7160             UNKNOWN,                  // 2D2E..2D2F
7161             TIFINAGH,                 // 2D30..2D67
7162             UNKNOWN,                  // 2D68..2D6E
7163             TIFINAGH,                 // 2D6F..2D70
7164             UNKNOWN,                  // 2D71..2D7E
7165             TIFINAGH,                 // 2D7F
7166             ETHIOPIC,                 // 2D80..2D96
7167             UNKNOWN,                  // 2D97..2D9F
7168             ETHIOPIC,                 // 2DA0..2DA6
7169             UNKNOWN,                  // 2DA7
7170             ETHIOPIC,                 // 2DA8..2DAE
7171             UNKNOWN,                  // 2DAF
7172             ETHIOPIC,                 // 2DB0..2DB6
7173             UNKNOWN,                  // 2DB7
7174             ETHIOPIC,                 // 2DB8..2DBE
7175             UNKNOWN,                  // 2DBF
7176             ETHIOPIC,                 // 2DC0..2DC6
7177             UNKNOWN,                  // 2DC7
7178             ETHIOPIC,                 // 2DC8..2DCE
7179             UNKNOWN,                  // 2DCF
7180             ETHIOPIC,                 // 2DD0..2DD6
7181             UNKNOWN,                  // 2DD7
7182             ETHIOPIC,                 // 2DD8..2DDE
7183             UNKNOWN,                  // 2DDF
7184             CYRILLIC,                 // 2DE0..2DFF
7185             COMMON,                   // 2E00..2E4F
7186             UNKNOWN,                  // 2E50..2E7F
7187             HAN,                      // 2E80..2E99
7188             UNKNOWN,                  // 2E9A
7189             HAN,                      // 2E9B..2EF3
7190             UNKNOWN,                  // 2EF4..2EFF
7191             HAN,                      // 2F00..2FD5
7192             UNKNOWN,                  // 2FD6..2FEF
7193             COMMON,                   // 2FF0..2FFB
7194             UNKNOWN,                  // 2FFC..2FFF
7195             COMMON,                   // 3000..3004
7196             HAN,                      // 3005
7197             COMMON,                   // 3006
7198             HAN,                      // 3007
7199             COMMON,                   // 3008..3020
7200             HAN,                      // 3021..3029
7201             INHERITED,                // 302A..302D
7202             HANGUL,                   // 302E..302F
7203             COMMON,                   // 3030..3037
7204             HAN,                      // 3038..303B
7205             COMMON,                   // 303C..303F
7206             UNKNOWN,                  // 3040
7207             HIRAGANA,                 // 3041..3096
7208             UNKNOWN,                  // 3097..3098
7209             INHERITED,                // 3099..309A
7210             COMMON,                   // 309B..309C
7211             HIRAGANA,                 // 309D..309F
7212             COMMON,                   // 30A0
7213             KATAKANA,                 // 30A1..30FA
7214             COMMON,                   // 30FB..30FC
7215             KATAKANA,                 // 30FD..30FF
7216             UNKNOWN,                  // 3100..3104
7217             BOPOMOFO,                 // 3105..312F
7218             UNKNOWN,                  // 3130
7219             HANGUL,                   // 3131..318E
7220             UNKNOWN,                  // 318F
7221             COMMON,                   // 3190..319F
7222             BOPOMOFO,                 // 31A0..31BA
7223             UNKNOWN,                  // 31BB..31BF
7224             COMMON,                   // 31C0..31E3
7225             UNKNOWN,                  // 31E4..31EF
7226             KATAKANA,                 // 31F0..31FF
7227             HANGUL,                   // 3200..321E
7228             UNKNOWN,                  // 321F
7229             COMMON,                   // 3220..325F
7230             HANGUL,                   // 3260..327E
7231             COMMON,                   // 327F..32CF
7232             KATAKANA,                 // 32D0..32FE
7233             COMMON,                   // 32FF
7234             KATAKANA,                 // 3300..3357
7235             COMMON,                   // 3358..33FF
7236             HAN,                      // 3400..4DB5
7237             UNKNOWN,                  // 4DB6..4DBF
7238             COMMON,                   // 4DC0..4DFF
7239             HAN,                      // 4E00..9FEF
7240             UNKNOWN,                  // 9FF0..9FFF
7241             YI,                       // A000..A48C
7242             UNKNOWN,                  // A48D..A48F
7243             YI,                       // A490..A4C6
7244             UNKNOWN,                  // A4C7..A4CF
7245             LISU,                     // A4D0..A4FF
7246             VAI,                      // A500..A62B
7247             UNKNOWN,                  // A62C..A63F
7248             CYRILLIC,                 // A640..A69F
7249             BAMUM,                    // A6A0..A6F7
7250             UNKNOWN,                  // A6F8..A6FF
7251             COMMON,                   // A700..A721
7252             LATIN,                    // A722..A787
7253             COMMON,                   // A788..A78A
7254             LATIN,                    // A78B..A7BF
7255             UNKNOWN,                  // A7C0..A7C1
7256             LATIN,                    // A7C2..A7C6
7257             UNKNOWN,                  // A7C7..A7F6
7258             LATIN,                    // A7F7..A7FF
7259             SYLOTI_NAGRI,             // A800..A82B
7260             UNKNOWN,                  // A82C..A82F
7261             COMMON,                   // A830..A839
7262             UNKNOWN,                  // A83A..A83F
7263             PHAGS_PA,                 // A840..A877
7264             UNKNOWN,                  // A878..A87F
7265             SAURASHTRA,               // A880..A8C5
7266             UNKNOWN,                  // A8C6..A8CD
7267             SAURASHTRA,               // A8CE..A8D9
7268             UNKNOWN,                  // A8DA..A8DF
7269             DEVANAGARI,               // A8E0..A8FF
7270             KAYAH_LI,                 // A900..A92D
7271             COMMON,                   // A92E
7272             KAYAH_LI,                 // A92F
7273             REJANG,                   // A930..A953
7274             UNKNOWN,                  // A954..A95E
7275             REJANG,                   // A95F
7276             HANGUL,                   // A960..A97C
7277             UNKNOWN,                  // A97D..A97F
7278             JAVANESE,                 // A980..A9CD
7279             UNKNOWN,                  // A9CE
7280             COMMON,                   // A9CF
7281             JAVANESE,                 // A9D0..A9D9
7282             UNKNOWN,                  // A9DA..A9DD
7283             JAVANESE,                 // A9DE..A9DF
7284             MYANMAR,                  // A9E0..A9FE
7285             UNKNOWN,                  // A9FF
7286             CHAM,                     // AA00..AA36
7287             UNKNOWN,                  // AA37..AA3F
7288             CHAM,                     // AA40..AA4D
7289             UNKNOWN,                  // AA4E..AA4F
7290             CHAM,                     // AA50..AA59
7291             UNKNOWN,                  // AA5A..AA5B
7292             CHAM,                     // AA5C..AA5F
7293             MYANMAR,                  // AA60..AA7F
7294             TAI_VIET,                 // AA80..AAC2
7295             UNKNOWN,                  // AAC3..AADA
7296             TAI_VIET,                 // AADB..AADF
7297             MEETEI_MAYEK,             // AAE0..AAF6
7298             UNKNOWN,                  // AAF7..AB00
7299             ETHIOPIC,                 // AB01..AB06
7300             UNKNOWN,                  // AB07..AB08
7301             ETHIOPIC,                 // AB09..AB0E
7302             UNKNOWN,                  // AB0F..AB10
7303             ETHIOPIC,                 // AB11..AB16
7304             UNKNOWN,                  // AB17..AB1F
7305             ETHIOPIC,                 // AB20..AB26
7306             UNKNOWN,                  // AB27
7307             ETHIOPIC,                 // AB28..AB2E
7308             UNKNOWN,                  // AB2F
7309             LATIN,                    // AB30..AB5A
7310             COMMON,                   // AB5B
7311             LATIN,                    // AB5C..AB64
7312             GREEK,                    // AB65
7313             LATIN,                    // AB66..AB67
7314             UNKNOWN,                  // AB68..AB6F
7315             CHEROKEE,                 // AB70..ABBF
7316             MEETEI_MAYEK,             // ABC0..ABED
7317             UNKNOWN,                  // ABEE..ABEF
7318             MEETEI_MAYEK,             // ABF0..ABF9
7319             UNKNOWN,                  // ABFA..ABFF
7320             HANGUL,                   // AC00..D7A3
7321             UNKNOWN,                  // D7A4..D7AF
7322             HANGUL,                   // D7B0..D7C6
7323             UNKNOWN,                  // D7C7..D7CA
7324             HANGUL,                   // D7CB..D7FB
7325             UNKNOWN,                  // D7FC..F8FF
7326             HAN,                      // F900..FA6D
7327             UNKNOWN,                  // FA6E..FA6F
7328             HAN,                      // FA70..FAD9
7329             UNKNOWN,                  // FADA..FAFF
7330             LATIN,                    // FB00..FB06
7331             UNKNOWN,                  // FB07..FB12
7332             ARMENIAN,                 // FB13..FB17
7333             UNKNOWN,                  // FB18..FB1C
7334             HEBREW,                   // FB1D..FB36
7335             UNKNOWN,                  // FB37
7336             HEBREW,                   // FB38..FB3C
7337             UNKNOWN,                  // FB3D
7338             HEBREW,                   // FB3E
7339             UNKNOWN,                  // FB3F
7340             HEBREW,                   // FB40..FB41
7341             UNKNOWN,                  // FB42
7342             HEBREW,                   // FB43..FB44
7343             UNKNOWN,                  // FB45
7344             HEBREW,                   // FB46..FB4F
7345             ARABIC,                   // FB50..FBC1
7346             UNKNOWN,                  // FBC2..FBD2
7347             ARABIC,                   // FBD3..FD3D
7348             COMMON,                   // FD3E..FD3F
7349             UNKNOWN,                  // FD40..FD4F
7350             ARABIC,                   // FD50..FD8F
7351             UNKNOWN,                  // FD90..FD91
7352             ARABIC,                   // FD92..FDC7
7353             UNKNOWN,                  // FDC8..FDEF
7354             ARABIC,                   // FDF0..FDFD
7355             UNKNOWN,                  // FDFE..FDFF
7356             INHERITED,                // FE00..FE0F
7357             COMMON,                   // FE10..FE19
7358             UNKNOWN,                  // FE1A..FE1F
7359             INHERITED,                // FE20..FE2D
7360             CYRILLIC,                 // FE2E..FE2F
7361             COMMON,                   // FE30..FE52
7362             UNKNOWN,                  // FE53
7363             COMMON,                   // FE54..FE66
7364             UNKNOWN,                  // FE67
7365             COMMON,                   // FE68..FE6B
7366             UNKNOWN,                  // FE6C..FE6F
7367             ARABIC,                   // FE70..FE74
7368             UNKNOWN,                  // FE75
7369             ARABIC,                   // FE76..FEFC
7370             UNKNOWN,                  // FEFD..FEFE
7371             COMMON,                   // FEFF
7372             UNKNOWN,                  // FF00
7373             COMMON,                   // FF01..FF20
7374             LATIN,                    // FF21..FF3A
7375             COMMON,                   // FF3B..FF40
7376             LATIN,                    // FF41..FF5A
7377             COMMON,                   // FF5B..FF65
7378             KATAKANA,                 // FF66..FF6F
7379             COMMON,                   // FF70
7380             KATAKANA,                 // FF71..FF9D
7381             COMMON,                   // FF9E..FF9F
7382             HANGUL,                   // FFA0..FFBE
7383             UNKNOWN,                  // FFBF..FFC1
7384             HANGUL,                   // FFC2..FFC7
7385             UNKNOWN,                  // FFC8..FFC9
7386             HANGUL,                   // FFCA..FFCF
7387             UNKNOWN,                  // FFD0..FFD1
7388             HANGUL,                   // FFD2..FFD7
7389             UNKNOWN,                  // FFD8..FFD9
7390             HANGUL,                   // FFDA..FFDC
7391             UNKNOWN,                  // FFDD..FFDF
7392             COMMON,                   // FFE0..FFE6
7393             UNKNOWN,                  // FFE7
7394             COMMON,                   // FFE8..FFEE
7395             UNKNOWN,                  // FFEF..FFF8
7396             COMMON,                   // FFF9..FFFD
7397             UNKNOWN,                  // FFFE..FFFF
7398             LINEAR_B,                 // 10000..1000B
7399             UNKNOWN,                  // 1000C
7400             LINEAR_B,                 // 1000D..10026
7401             UNKNOWN,                  // 10027
7402             LINEAR_B,                 // 10028..1003A
7403             UNKNOWN,                  // 1003B
7404             LINEAR_B,                 // 1003C..1003D
7405             UNKNOWN,                  // 1003E
7406             LINEAR_B,                 // 1003F..1004D
7407             UNKNOWN,                  // 1004E..1004F
7408             LINEAR_B,                 // 10050..1005D
7409             UNKNOWN,                  // 1005E..1007F
7410             LINEAR_B,                 // 10080..100FA
7411             UNKNOWN,                  // 100FB..100FF
7412             COMMON,                   // 10100..10102
7413             UNKNOWN,                  // 10103..10106
7414             COMMON,                   // 10107..10133
7415             UNKNOWN,                  // 10134..10136
7416             COMMON,                   // 10137..1013F
7417             GREEK,                    // 10140..1018E
7418             UNKNOWN,                  // 1018F
7419             COMMON,                   // 10190..1019B
7420             UNKNOWN,                  // 1019C..1019F
7421             GREEK,                    // 101A0
7422             UNKNOWN,                  // 101A1..101CF
7423             COMMON,                   // 101D0..101FC
7424             INHERITED,                // 101FD
7425             UNKNOWN,                  // 101FE..1027F
7426             LYCIAN,                   // 10280..1029C
7427             UNKNOWN,                  // 1029D..1029F
7428             CARIAN,                   // 102A0..102D0
7429             UNKNOWN,                  // 102D1..102DF
7430             INHERITED,                // 102E0
7431             COMMON,                   // 102E1..102FB
7432             UNKNOWN,                  // 102FC..102FF
7433             OLD_ITALIC,               // 10300..10323
7434             UNKNOWN,                  // 10324..1032C
7435             OLD_ITALIC,               // 1032D..1032F
7436             GOTHIC,                   // 10330..1034A
7437             UNKNOWN,                  // 1034B..1034F
7438             OLD_PERMIC,               // 10350..1037A
7439             UNKNOWN,                  // 1037B..1037F
7440             UGARITIC,                 // 10380..1039D
7441             UNKNOWN,                  // 1039E
7442             UGARITIC,                 // 1039F
7443             OLD_PERSIAN,              // 103A0..103C3
7444             UNKNOWN,                  // 103C4..103C7
7445             OLD_PERSIAN,              // 103C8..103D5
7446             UNKNOWN,                  // 103D6..103FF
7447             DESERET,                  // 10400..1044F
7448             SHAVIAN,                  // 10450..1047F
7449             OSMANYA,                  // 10480..1049D
7450             UNKNOWN,                  // 1049E..1049F
7451             OSMANYA,                  // 104A0..104A9
7452             UNKNOWN,                  // 104AA..104AF
7453             OSAGE,                    // 104B0..104D3
7454             UNKNOWN,                  // 104D4..104D7
7455             OSAGE,                    // 104D8..104FB
7456             UNKNOWN,                  // 104FC..104FF
7457             ELBASAN,                  // 10500..10527
7458             UNKNOWN,                  // 10528..1052F
7459             CAUCASIAN_ALBANIAN,       // 10530..10563
7460             UNKNOWN,                  // 10564..1056E
7461             CAUCASIAN_ALBANIAN,       // 1056F
7462             UNKNOWN,                  // 10570..105FF
7463             LINEAR_A,                 // 10600..10736
7464             UNKNOWN,                  // 10737..1073F
7465             LINEAR_A,                 // 10740..10755
7466             UNKNOWN,                  // 10756..1075F
7467             LINEAR_A,                 // 10760..10767
7468             UNKNOWN,                  // 10768..107FF
7469             CYPRIOT,                  // 10800..10805
7470             UNKNOWN,                  // 10806..10807
7471             CYPRIOT,                  // 10808
7472             UNKNOWN,                  // 10809
7473             CYPRIOT,                  // 1080A..10835
7474             UNKNOWN,                  // 10836
7475             CYPRIOT,                  // 10837..10838
7476             UNKNOWN,                  // 10839..1083B
7477             CYPRIOT,                  // 1083C
7478             UNKNOWN,                  // 1083D..1083E
7479             CYPRIOT,                  // 1083F
7480             IMPERIAL_ARAMAIC,         // 10840..10855
7481             UNKNOWN,                  // 10856
7482             IMPERIAL_ARAMAIC,         // 10857..1085F
7483             PALMYRENE,                // 10860..1087F
7484             NABATAEAN,                // 10880..1089E
7485             UNKNOWN,                  // 1089F..108A6
7486             NABATAEAN,                // 108A7..108AF
7487             UNKNOWN,                  // 108B0..108DF
7488             HATRAN,                   // 108E0..108F2
7489             UNKNOWN,                  // 108F3
7490             HATRAN,                   // 108F4..108F5
7491             UNKNOWN,                  // 108F6..108FA
7492             HATRAN,                   // 108FB..108FF
7493             PHOENICIAN,               // 10900..1091B
7494             UNKNOWN,                  // 1091C..1091E
7495             PHOENICIAN,               // 1091F
7496             LYDIAN,                   // 10920..10939
7497             UNKNOWN,                  // 1093A..1093E
7498             LYDIAN,                   // 1093F
7499             UNKNOWN,                  // 10940..1097F
7500             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7501             MEROITIC_CURSIVE,         // 109A0..109B7
7502             UNKNOWN,                  // 109B8..109BB
7503             MEROITIC_CURSIVE,         // 109BC..109CF
7504             UNKNOWN,                  // 109D0..109D1
7505             MEROITIC_CURSIVE,         // 109D2..109FF
7506             KHAROSHTHI,               // 10A00..10A03
7507             UNKNOWN,                  // 10A04
7508             KHAROSHTHI,               // 10A05..10A06
7509             UNKNOWN,                  // 10A07..10A0B
7510             KHAROSHTHI,               // 10A0C..10A13
7511             UNKNOWN,                  // 10A14
7512             KHAROSHTHI,               // 10A15..10A17
7513             UNKNOWN,                  // 10A18
7514             KHAROSHTHI,               // 10A19..10A35
7515             UNKNOWN,                  // 10A36..10A37
7516             KHAROSHTHI,               // 10A38..10A3A
7517             UNKNOWN,                  // 10A3B..10A3E
7518             KHAROSHTHI,               // 10A3F..10A48
7519             UNKNOWN,                  // 10A49..10A4F
7520             KHAROSHTHI,               // 10A50..10A58
7521             UNKNOWN,                  // 10A59..10A5F
7522             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7523             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7524             UNKNOWN,                  // 10AA0..10ABF
7525             MANICHAEAN,               // 10AC0..10AE6
7526             UNKNOWN,                  // 10AE7..10AEA
7527             MANICHAEAN,               // 10AEB..10AF6
7528             UNKNOWN,                  // 10AF7..10AFF
7529             AVESTAN,                  // 10B00..10B35
7530             UNKNOWN,                  // 10B36..10B38
7531             AVESTAN,                  // 10B39..10B3F
7532             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7533             UNKNOWN,                  // 10B56..10B57
7534             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7535             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7536             UNKNOWN,                  // 10B73..10B77
7537             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7538             PSALTER_PAHLAVI,          // 10B80..10B91
7539             UNKNOWN,                  // 10B92..10B98
7540             PSALTER_PAHLAVI,          // 10B99..10B9C
7541             UNKNOWN,                  // 10B9D..10BA8
7542             PSALTER_PAHLAVI,          // 10BA9..10BAF
7543             UNKNOWN,                  // 10BB0..10BFF
7544             OLD_TURKIC,               // 10C00..10C48
7545             UNKNOWN,                  // 10C49..10C7F
7546             OLD_HUNGARIAN,            // 10C80..10CB2
7547             UNKNOWN,                  // 10CB3..10CBF
7548             OLD_HUNGARIAN,            // 10CC0..10CF2
7549             UNKNOWN,                  // 10CF3..10CF9
7550             OLD_HUNGARIAN,            // 10CFA..10CFF
7551             HANIFI_ROHINGYA,          // 10D00..10D27
7552             UNKNOWN,                  // 10D28..10D2F
7553             HANIFI_ROHINGYA,          // 10D30..10D39
7554             UNKNOWN,                  // 10D3A..10E5F
7555             ARABIC,                   // 10E60..10E7E
7556             UNKNOWN,                  // 10E7F..10EFF
7557             OLD_SOGDIAN,              // 10F00..10F27
7558             UNKNOWN,                  // 10F28..10F2F
7559             SOGDIAN,                  // 10F30..10F59
7560             UNKNOWN,                  // 10F5A..10FDF
7561             ELYMAIC,                  // 10FE0..10FF6
7562             UNKNOWN,                  // 10FF7..10FFF
7563             BRAHMI,                   // 11000..1104D
7564             UNKNOWN,                  // 1104E..11051
7565             BRAHMI,                   // 11052..1106F
7566             UNKNOWN,                  // 11070..1107E
7567             BRAHMI,                   // 1107F
7568             KAITHI,                   // 11080..110C1
7569             UNKNOWN,                  // 110C2..110CC
7570             KAITHI,                   // 110CD
7571             UNKNOWN,                  // 110CE..110CF
7572             SORA_SOMPENG,             // 110D0..110E8
7573             UNKNOWN,                  // 110E9..110EF
7574             SORA_SOMPENG,             // 110F0..110F9
7575             UNKNOWN,                  // 110FA..110FF
7576             CHAKMA,                   // 11100..11134
7577             UNKNOWN,                  // 11135
7578             CHAKMA,                   // 11136..11146
7579             UNKNOWN,                  // 11147..1114F
7580             MAHAJANI,                 // 11150..11176
7581             UNKNOWN,                  // 11177..1117F
7582             SHARADA,                  // 11180..111CD
7583             UNKNOWN,                  // 111CE..111CF
7584             SHARADA,                  // 111D0..111DF
7585             UNKNOWN,                  // 111E0
7586             SINHALA,                  // 111E1..111F4
7587             UNKNOWN,                  // 111F5..111FF
7588             KHOJKI,                   // 11200..11211
7589             UNKNOWN,                  // 11212
7590             KHOJKI,                   // 11213..1123E
7591             UNKNOWN,                  // 1123F..1127F
7592             MULTANI,                  // 11280..11286
7593             UNKNOWN,                  // 11287
7594             MULTANI,                  // 11288
7595             UNKNOWN,                  // 11289
7596             MULTANI,                  // 1128A..1128D
7597             UNKNOWN,                  // 1128E
7598             MULTANI,                  // 1128F..1129D
7599             UNKNOWN,                  // 1129E
7600             MULTANI,                  // 1129F..112A9
7601             UNKNOWN,                  // 112AA..112AF
7602             KHUDAWADI,                // 112B0..112EA
7603             UNKNOWN,                  // 112EB..112EF
7604             KHUDAWADI,                // 112F0..112F9
7605             UNKNOWN,                  // 112FA..112FF
7606             GRANTHA,                  // 11300..11303
7607             UNKNOWN,                  // 11304
7608             GRANTHA,                  // 11305..1130C
7609             UNKNOWN,                  // 1130D..1130E
7610             GRANTHA,                  // 1130F..11310
7611             UNKNOWN,                  // 11311..11312
7612             GRANTHA,                  // 11313..11328
7613             UNKNOWN,                  // 11329
7614             GRANTHA,                  // 1132A..11330
7615             UNKNOWN,                  // 11331
7616             GRANTHA,                  // 11332..11333
7617             UNKNOWN,                  // 11334
7618             GRANTHA,                  // 11335..11339
7619             UNKNOWN,                  // 1133A
7620             INHERITED,                // 1133B
7621             GRANTHA,                  // 1133C..11344
7622             UNKNOWN,                  // 11345..11346
7623             GRANTHA,                  // 11347..11348
7624             UNKNOWN,                  // 11349..1134A
7625             GRANTHA,                  // 1134B..1134D
7626             UNKNOWN,                  // 1134E..1134F
7627             GRANTHA,                  // 11350
7628             UNKNOWN,                  // 11351..11356
7629             GRANTHA,                  // 11357
7630             UNKNOWN,                  // 11358..1135C
7631             GRANTHA,                  // 1135D..11363
7632             UNKNOWN,                  // 11364..11365
7633             GRANTHA,                  // 11366..1136C
7634             UNKNOWN,                  // 1136D..1136F
7635             GRANTHA,                  // 11370..11374
7636             UNKNOWN,                  // 11375..113FF
7637             NEWA,                     // 11400..11459
7638             UNKNOWN,                  // 1145A
7639             NEWA,                     // 1145B
7640             UNKNOWN,                  // 1145C
7641             NEWA,                     // 1145D..1145F
7642             UNKNOWN,                  // 11460..1147F
7643             TIRHUTA,                  // 11480..114C7
7644             UNKNOWN,                  // 114C8..114CF
7645             TIRHUTA,                  // 114D0..114D9
7646             UNKNOWN,                  // 114DA..1157F
7647             SIDDHAM,                  // 11580..115B5
7648             UNKNOWN,                  // 115B6..115B7
7649             SIDDHAM,                  // 115B8..115DD
7650             UNKNOWN,                  // 115DE..115FF
7651             MODI,                     // 11600..11644
7652             UNKNOWN,                  // 11645..1164F
7653             MODI,                     // 11650..11659
7654             UNKNOWN,                  // 1165A..1165F
7655             MONGOLIAN,                // 11660..1166C
7656             UNKNOWN,                  // 1166D..1167F
7657             TAKRI,                    // 11680..116B8
7658             UNKNOWN,                  // 116B9..116BF
7659             TAKRI,                    // 116C0..116C9
7660             UNKNOWN,                  // 116CA..116FF
7661             AHOM,                     // 11700..1171A
7662             UNKNOWN,                  // 1171B..1171C
7663             AHOM,                     // 1171D..1172B
7664             UNKNOWN,                  // 1172C..1172F
7665             AHOM,                     // 11730..1173F
7666             UNKNOWN,                  // 11740..117FF
7667             DOGRA,                    // 11800..1183B
7668             UNKNOWN,                  // 1183C..1189F
7669             WARANG_CITI,              // 118A0..118F2
7670             UNKNOWN,                  // 118F3..118FE
7671             WARANG_CITI,              // 118FF
7672             UNKNOWN,                  // 11900..1199F
7673             NANDINAGARI,              // 119A0..119A7
7674             UNKNOWN,                  // 119A8..119A9
7675             NANDINAGARI,              // 119AA..119D7
7676             UNKNOWN,                  // 119D8..119D9
7677             NANDINAGARI,              // 119DA..119E4
7678             UNKNOWN,                  // 119E5..119FF
7679             ZANABAZAR_SQUARE,         // 11A00..11A47
7680             UNKNOWN,                  // 11A48..11A4F
7681             SOYOMBO,                  // 11A50..11AA2
7682             UNKNOWN,                  // 11AA3..11ABF
7683             PAU_CIN_HAU,              // 11AC0..11AF8
7684             UNKNOWN,                  // 11AF9..11BFF
7685             BHAIKSUKI,                // 11C00..11C08
7686             UNKNOWN,                  // 11C09
7687             BHAIKSUKI,                // 11C0A..11C36
7688             UNKNOWN,                  // 11C37
7689             BHAIKSUKI,                // 11C38..11C45
7690             UNKNOWN,                  // 11C46..11C4F
7691             BHAIKSUKI,                // 11C50..11C6C
7692             UNKNOWN,                  // 11C6D..11C6F
7693             MARCHEN,                  // 11C70..11C8F
7694             UNKNOWN,                  // 11C90..11C91
7695             MARCHEN,                  // 11C92..11CA7
7696             UNKNOWN,                  // 11CA8
7697             MARCHEN,                  // 11CA9..11CB6
7698             UNKNOWN,                  // 11CB7..11CFF
7699             MASARAM_GONDI,            // 11D00..11D06
7700             UNKNOWN,                  // 11D07
7701             MASARAM_GONDI,            // 11D08..11D09
7702             UNKNOWN,                  // 11D0A
7703             MASARAM_GONDI,            // 11D0B..11D36
7704             UNKNOWN,                  // 11D37..11D39
7705             MASARAM_GONDI,            // 11D3A
7706             UNKNOWN,                  // 11D3B
7707             MASARAM_GONDI,            // 11D3C..11D3D
7708             UNKNOWN,                  // 11D3E
7709             MASARAM_GONDI,            // 11D3F..11D47
7710             UNKNOWN,                  // 11D48..11D4F
7711             MASARAM_GONDI,            // 11D50..11D59
7712             UNKNOWN,                  // 11D5A..11D5F
7713             GUNJALA_GONDI,            // 11D60..11D65
7714             UNKNOWN,                  // 11D66
7715             GUNJALA_GONDI,            // 11D67..11D68
7716             UNKNOWN,                  // 11D69
7717             GUNJALA_GONDI,            // 11D6A..11D8E
7718             UNKNOWN,                  // 11D8F
7719             GUNJALA_GONDI,            // 11D90..11D91
7720             UNKNOWN,                  // 11D92
7721             GUNJALA_GONDI,            // 11D93..11D98
7722             UNKNOWN,                  // 11D99..11D9F
7723             GUNJALA_GONDI,            // 11DA0..11DA9
7724             UNKNOWN,                  // 11DAA..11EDF
7725             MAKASAR,                  // 11EE0..11EF8
7726             UNKNOWN,                  // 11EF9..11FBF
7727             TAMIL,                    // 11FC0..11FF1
7728             UNKNOWN,                  // 11FF2..11FFE
7729             TAMIL,                    // 11FFF
7730             CUNEIFORM,                // 12000..12399
7731             UNKNOWN,                  // 1239A..123FF
7732             CUNEIFORM,                // 12400..1246E
7733             UNKNOWN,                  // 1246F
7734             CUNEIFORM,                // 12470..12474
7735             UNKNOWN,                  // 12475..1247F
7736             CUNEIFORM,                // 12480..12543
7737             UNKNOWN,                  // 12544..12FFF
7738             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
7739             UNKNOWN,                  // 1342F
7740             EGYPTIAN_HIEROGLYPHS,     // 13430..13438
7741             UNKNOWN,                  // 13439..143FF
7742             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
7743             UNKNOWN,                  // 14647..167FF
7744             BAMUM,                    // 16800..16A38
7745             UNKNOWN,                  // 16A39..16A3F
7746             MRO,                      // 16A40..16A5E
7747             UNKNOWN,                  // 16A5F
7748             MRO,                      // 16A60..16A69
7749             UNKNOWN,                  // 16A6A..16A6D
7750             MRO,                      // 16A6E..16A6F
7751             UNKNOWN,                  // 16A70..16ACF
7752             BASSA_VAH,                // 16AD0..16AED
7753             UNKNOWN,                  // 16AEE..16AEF
7754             BASSA_VAH,                // 16AF0..16AF5
7755             UNKNOWN,                  // 16AF6..16AFF
7756             PAHAWH_HMONG,             // 16B00..16B45
7757             UNKNOWN,                  // 16B46..16B4F
7758             PAHAWH_HMONG,             // 16B50..16B59
7759             UNKNOWN,                  // 16B5A
7760             PAHAWH_HMONG,             // 16B5B..16B61
7761             UNKNOWN,                  // 16B62
7762             PAHAWH_HMONG,             // 16B63..16B77
7763             UNKNOWN,                  // 16B78..16B7C
7764             PAHAWH_HMONG,             // 16B7D..16B8F
7765             UNKNOWN,                  // 16B90..16E3F
7766             MEDEFAIDRIN,              // 16E40..16E9A
7767             UNKNOWN,                  // 16E9B..16EFF
7768             MIAO,                     // 16F00..16F4A
7769             UNKNOWN,                  // 16F4B..16F4E
7770             MIAO,                     // 16F4F..16F87
7771             UNKNOWN,                  // 16F88..16F8E
7772             MIAO,                     // 16F8F..16F9F
7773             UNKNOWN,                  // 16FA0..16FDF
7774             TANGUT,                   // 16FE0
7775             NUSHU,                    // 16FE1
7776             COMMON,                   // 16FE2..16FE3
7777             UNKNOWN,                  // 16FE4..16FFF
7778             TANGUT,                   // 17000..187F7
7779             UNKNOWN,                  // 187F8..187FF
7780             TANGUT,                   // 18800..18AF2
7781             UNKNOWN,                  // 18AF3..1AFFF
7782             KATAKANA,                 // 1B000
7783             HIRAGANA,                 // 1B001..1B11E
7784             UNKNOWN,                  // 1B11F..1B14F
7785             HIRAGANA,                 // 1B150..1B152
7786             UNKNOWN,                  // 1B153..1B163
7787             KATAKANA,                 // 1B164..1B167
7788             UNKNOWN,                  // 1B168..1B16F
7789             NUSHU,                    // 1B170..1B2FB
7790             UNKNOWN,                  // 1B2FC..1BBFF
7791             DUPLOYAN,                 // 1BC00..1BC6A
7792             UNKNOWN,                  // 1BC6B..1BC6F
7793             DUPLOYAN,                 // 1BC70..1BC7C
7794             UNKNOWN,                  // 1BC7D..1BC7F
7795             DUPLOYAN,                 // 1BC80..1BC88
7796             UNKNOWN,                  // 1BC89..1BC8F
7797             DUPLOYAN,                 // 1BC90..1BC99
7798             UNKNOWN,                  // 1BC9A..1BC9B
7799             DUPLOYAN,                 // 1BC9C..1BC9F
7800             COMMON,                   // 1BCA0..1BCA3
7801             UNKNOWN,                  // 1BCA4..1CFFF
7802             COMMON,                   // 1D000..1D0F5
7803             UNKNOWN,                  // 1D0F6..1D0FF
7804             COMMON,                   // 1D100..1D126
7805             UNKNOWN,                  // 1D127..1D128
7806             COMMON,                   // 1D129..1D166
7807             INHERITED,                // 1D167..1D169
7808             COMMON,                   // 1D16A..1D17A
7809             INHERITED,                // 1D17B..1D182
7810             COMMON,                   // 1D183..1D184
7811             INHERITED,                // 1D185..1D18B
7812             COMMON,                   // 1D18C..1D1A9
7813             INHERITED,                // 1D1AA..1D1AD
7814             COMMON,                   // 1D1AE..1D1E8
7815             UNKNOWN,                  // 1D1E9..1D1FF
7816             GREEK,                    // 1D200..1D245
7817             UNKNOWN,                  // 1D246..1D2DF
7818             COMMON,                   // 1D2E0..1D2F3
7819             UNKNOWN,                  // 1D2F4..1D2FF
7820             COMMON,                   // 1D300..1D356
7821             UNKNOWN,                  // 1D357..1D35F
7822             COMMON,                   // 1D360..1D378
7823             UNKNOWN,                  // 1D379..1D3FF
7824             COMMON,                   // 1D400..1D454
7825             UNKNOWN,                  // 1D455
7826             COMMON,                   // 1D456..1D49C
7827             UNKNOWN,                  // 1D49D
7828             COMMON,                   // 1D49E..1D49F
7829             UNKNOWN,                  // 1D4A0..1D4A1
7830             COMMON,                   // 1D4A2
7831             UNKNOWN,                  // 1D4A3..1D4A4
7832             COMMON,                   // 1D4A5..1D4A6
7833             UNKNOWN,                  // 1D4A7..1D4A8
7834             COMMON,                   // 1D4A9..1D4AC
7835             UNKNOWN,                  // 1D4AD
7836             COMMON,                   // 1D4AE..1D4B9
7837             UNKNOWN,                  // 1D4BA
7838             COMMON,                   // 1D4BB
7839             UNKNOWN,                  // 1D4BC
7840             COMMON,                   // 1D4BD..1D4C3
7841             UNKNOWN,                  // 1D4C4
7842             COMMON,                   // 1D4C5..1D505
7843             UNKNOWN,                  // 1D506
7844             COMMON,                   // 1D507..1D50A
7845             UNKNOWN,                  // 1D50B..1D50C
7846             COMMON,                   // 1D50D..1D514
7847             UNKNOWN,                  // 1D515
7848             COMMON,                   // 1D516..1D51C
7849             UNKNOWN,                  // 1D51D
7850             COMMON,                   // 1D51E..1D539
7851             UNKNOWN,                  // 1D53A
7852             COMMON,                   // 1D53B..1D53E
7853             UNKNOWN,                  // 1D53F
7854             COMMON,                   // 1D540..1D544
7855             UNKNOWN,                  // 1D545
7856             COMMON,                   // 1D546
7857             UNKNOWN,                  // 1D547..1D549
7858             COMMON,                   // 1D54A..1D550
7859             UNKNOWN,                  // 1D551
7860             COMMON,                   // 1D552..1D6A5
7861             UNKNOWN,                  // 1D6A6..1D6A7
7862             COMMON,                   // 1D6A8..1D7CB
7863             UNKNOWN,                  // 1D7CC..1D7CD
7864             COMMON,                   // 1D7CE..1D7FF
7865             SIGNWRITING,              // 1D800..1DA8B
7866             UNKNOWN,                  // 1DA8C..1DA9A
7867             SIGNWRITING,              // 1DA9B..1DA9F
7868             UNKNOWN,                  // 1DAA0
7869             SIGNWRITING,              // 1DAA1..1DAAF
7870             UNKNOWN,                  // 1DAB0..1DFFF
7871             GLAGOLITIC,               // 1E000..1E006
7872             UNKNOWN,                  // 1E007
7873             GLAGOLITIC,               // 1E008..1E018
7874             UNKNOWN,                  // 1E019..1E01A
7875             GLAGOLITIC,               // 1E01B..1E021
7876             UNKNOWN,                  // 1E022
7877             GLAGOLITIC,               // 1E023..1E024
7878             UNKNOWN,                  // 1E025
7879             GLAGOLITIC,               // 1E026..1E02A
7880             UNKNOWN,                  // 1E02B..1E0FF
7881             NYIAKENG_PUACHUE_HMONG,   // 1E100..1E12C
7882             UNKNOWN,                  // 1E12D..1E12F
7883             NYIAKENG_PUACHUE_HMONG,   // 1E130..1E13D
7884             UNKNOWN,                  // 1E13E..1E13F
7885             NYIAKENG_PUACHUE_HMONG,   // 1E140..1E149
7886             UNKNOWN,                  // 1E14A..1E14D
7887             NYIAKENG_PUACHUE_HMONG,   // 1E14E..1E14F
7888             UNKNOWN,                  // 1E150..1E2BF
7889             WANCHO,                   // 1E2C0..1E2F9
7890             UNKNOWN,                  // 1E2FA..1E2FE
7891             WANCHO,                   // 1E2FF
7892             UNKNOWN,                  // 1E300..1E7FF
7893             MENDE_KIKAKUI,            // 1E800..1E8C4
7894             UNKNOWN,                  // 1E8C5..1E8C6
7895             MENDE_KIKAKUI,            // 1E8C7..1E8D6
7896             UNKNOWN,                  // 1E8D7..1E8FF
7897             ADLAM,                    // 1E900..1E94B
7898             UNKNOWN,                  // 1E94C..1E94F
7899             ADLAM,                    // 1E950..1E959
7900             UNKNOWN,                  // 1E95A..1E95D
7901             ADLAM,                    // 1E95E..1E95F
7902             UNKNOWN,                  // 1E960..1EC70
7903             COMMON,                   // 1EC71..1ECB4
7904             UNKNOWN,                  // 1ECB5..1ED00
7905             COMMON,                   // 1ED01..1ED3D
7906             UNKNOWN,                  // 1ED3E..1EDFF
7907             ARABIC,                   // 1EE00..1EE03
7908             UNKNOWN,                  // 1EE04
7909             ARABIC,                   // 1EE05..1EE1F
7910             UNKNOWN,                  // 1EE20
7911             ARABIC,                   // 1EE21..1EE22
7912             UNKNOWN,                  // 1EE23
7913             ARABIC,                   // 1EE24
7914             UNKNOWN,                  // 1EE25..1EE26
7915             ARABIC,                   // 1EE27
7916             UNKNOWN,                  // 1EE28
7917             ARABIC,                   // 1EE29..1EE32
7918             UNKNOWN,                  // 1EE33
7919             ARABIC,                   // 1EE34..1EE37
7920             UNKNOWN,                  // 1EE38
7921             ARABIC,                   // 1EE39
7922             UNKNOWN,                  // 1EE3A
7923             ARABIC,                   // 1EE3B
7924             UNKNOWN,                  // 1EE3C..1EE41
7925             ARABIC,                   // 1EE42
7926             UNKNOWN,                  // 1EE43..1EE46
7927             ARABIC,                   // 1EE47
7928             UNKNOWN,                  // 1EE48
7929             ARABIC,                   // 1EE49
7930             UNKNOWN,                  // 1EE4A
7931             ARABIC,                   // 1EE4B
7932             UNKNOWN,                  // 1EE4C
7933             ARABIC,                   // 1EE4D..1EE4F
7934             UNKNOWN,                  // 1EE50
7935             ARABIC,                   // 1EE51..1EE52
7936             UNKNOWN,                  // 1EE53
7937             ARABIC,                   // 1EE54
7938             UNKNOWN,                  // 1EE55..1EE56
7939             ARABIC,                   // 1EE57
7940             UNKNOWN,                  // 1EE58
7941             ARABIC,                   // 1EE59
7942             UNKNOWN,                  // 1EE5A
7943             ARABIC,                   // 1EE5B
7944             UNKNOWN,                  // 1EE5C
7945             ARABIC,                   // 1EE5D
7946             UNKNOWN,                  // 1EE5E
7947             ARABIC,                   // 1EE5F
7948             UNKNOWN,                  // 1EE60
7949             ARABIC,                   // 1EE61..1EE62
7950             UNKNOWN,                  // 1EE63
7951             ARABIC,                   // 1EE64
7952             UNKNOWN,                  // 1EE65..1EE66
7953             ARABIC,                   // 1EE67..1EE6A
7954             UNKNOWN,                  // 1EE6B
7955             ARABIC,                   // 1EE6C..1EE72
7956             UNKNOWN,                  // 1EE73
7957             ARABIC,                   // 1EE74..1EE77
7958             UNKNOWN,                  // 1EE78
7959             ARABIC,                   // 1EE79..1EE7C
7960             UNKNOWN,                  // 1EE7D
7961             ARABIC,                   // 1EE7E
7962             UNKNOWN,                  // 1EE7F
7963             ARABIC,                   // 1EE80..1EE89
7964             UNKNOWN,                  // 1EE8A
7965             ARABIC,                   // 1EE8B..1EE9B
7966             UNKNOWN,                  // 1EE9C..1EEA0
7967             ARABIC,                   // 1EEA1..1EEA3
7968             UNKNOWN,                  // 1EEA4
7969             ARABIC,                   // 1EEA5..1EEA9
7970             UNKNOWN,                  // 1EEAA
7971             ARABIC,                   // 1EEAB..1EEBB
7972             UNKNOWN,                  // 1EEBC..1EEEF
7973             ARABIC,                   // 1EEF0..1EEF1
7974             UNKNOWN,                  // 1EEF2..1EFFF
7975             COMMON,                   // 1F000..1F02B
7976             UNKNOWN,                  // 1F02C..1F02F
7977             COMMON,                   // 1F030..1F093
7978             UNKNOWN,                  // 1F094..1F09F
7979             COMMON,                   // 1F0A0..1F0AE
7980             UNKNOWN,                  // 1F0AF..1F0B0
7981             COMMON,                   // 1F0B1..1F0BF
7982             UNKNOWN,                  // 1F0C0
7983             COMMON,                   // 1F0C1..1F0CF
7984             UNKNOWN,                  // 1F0D0
7985             COMMON,                   // 1F0D1..1F0F5
7986             UNKNOWN,                  // 1F0F6..1F0FF
7987             COMMON,                   // 1F100..1F10C
7988             UNKNOWN,                  // 1F10D..1F10F
7989             COMMON,                   // 1F110..1F16C
7990             UNKNOWN,                  // 1F16D..1F16F
7991             COMMON,                   // 1F170..1F1AC
7992             UNKNOWN,                  // 1F1AD..1F1E5
7993             COMMON,                   // 1F1E6..1F1FF
7994             HIRAGANA,                 // 1F200
7995             COMMON,                   // 1F201..1F202
7996             UNKNOWN,                  // 1F203..1F20F
7997             COMMON,                   // 1F210..1F23B
7998             UNKNOWN,                  // 1F23C..1F23F
7999             COMMON,                   // 1F240..1F248
8000             UNKNOWN,                  // 1F249..1F24F
8001             COMMON,                   // 1F250..1F251
8002             UNKNOWN,                  // 1F252..1F25F
8003             COMMON,                   // 1F260..1F265
8004             UNKNOWN,                  // 1F266..1F2FF
8005             COMMON,                   // 1F300..1F6D5
8006             UNKNOWN,                  // 1F6D6..1F6DF
8007             COMMON,                   // 1F6E0..1F6EC
8008             UNKNOWN,                  // 1F6ED..1F6EF
8009             COMMON,                   // 1F6F0..1F6FA
8010             UNKNOWN,                  // 1F6FB..1F6FF
8011             COMMON,                   // 1F700..1F773
8012             UNKNOWN,                  // 1F774..1F77F
8013             COMMON,                   // 1F780..1F7D8
8014             UNKNOWN,                  // 1F7D9..1F7DF
8015             COMMON,                   // 1F7E0..1F7EB
8016             UNKNOWN,                  // 1F7EC..1F7FF
8017             COMMON,                   // 1F800..1F80B
8018             UNKNOWN,                  // 1F80C..1F80F
8019             COMMON,                   // 1F810..1F847
8020             UNKNOWN,                  // 1F848..1F84F
8021             COMMON,                   // 1F850..1F859
8022             UNKNOWN,                  // 1F85A..1F85F
8023             COMMON,                   // 1F860..1F887
8024             UNKNOWN,                  // 1F888..1F88F
8025             COMMON,                   // 1F890..1F8AD
8026             UNKNOWN,                  // 1F8AE..1F8FF
8027             COMMON,                   // 1F900..1F90B
8028             UNKNOWN,                  // 1F90C
8029             COMMON,                   // 1F90D..1F971
8030             UNKNOWN,                  // 1F972
8031             COMMON,                   // 1F973..1F976
8032             UNKNOWN,                  // 1F977..1F979
8033             COMMON,                   // 1F97A..1F9A2
8034             UNKNOWN,                  // 1F9A3..1F9A4
8035             COMMON,                   // 1F9A5..1F9AA
8036             UNKNOWN,                  // 1F9AB..1F9AD
8037             COMMON,                   // 1F9AE..1F9CA
8038             UNKNOWN,                  // 1F9CB..1F9CC
8039             COMMON,                   // 1F9CD..1FA53
8040             UNKNOWN,                  // 1FA54..1FA5F
8041             COMMON,                   // 1FA60..1FA6D
8042             UNKNOWN,                  // 1FA6E..1FA6F
8043             COMMON,                   // 1FA70..1FA73
8044             UNKNOWN,                  // 1FA74..1FA77
8045             COMMON,                   // 1FA78..1FA7A
8046             UNKNOWN,                  // 1FA7B..1FA7F
8047             COMMON,                   // 1FA80..1FA82
8048             UNKNOWN,                  // 1FA83..1FA8F
8049             COMMON,                   // 1FA90..1FA95
8050             UNKNOWN,                  // 1FA96..1FFFF
8051             HAN,                      // 20000..2A6D6
8052             UNKNOWN,                  // 2A6D7..2A6FF
8053             HAN,                      // 2A700..2B734
8054             UNKNOWN,                  // 2B735..2B73F
8055             HAN,                      // 2B740..2B81D
8056             UNKNOWN,                  // 2B81E..2B81F
8057             HAN,                      // 2B820..2CEA1
8058             UNKNOWN,                  // 2CEA2..2CEAF
8059             HAN,                      // 2CEB0..2EBE0
8060             UNKNOWN,                  // 2EBE1..2F7FF
8061             HAN,                      // 2F800..2FA1D
8062             UNKNOWN,                  // 2FA1E..E0000
8063             COMMON,                   // E0001
8064             UNKNOWN,                  // E0002..E001F
8065             COMMON,                   // E0020..E007F
8066             UNKNOWN,                  // E0080..E00FF
8067             INHERITED,                // E0100..E01EF
8068             UNKNOWN,                  // E01F0..10FFFF
8069         };
8070 
8071         private static HashMap<String, Character.UnicodeScript> aliases;
8072         static {
8073             aliases = new HashMap<>((int)(153 / 0.75f + 1.0f));
8074             aliases.put("ADLM", ADLAM);
8075             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8076             aliases.put("AHOM", AHOM);
8077             aliases.put("ARAB", ARABIC);
8078             aliases.put("ARMI", IMPERIAL_ARAMAIC);
8079             aliases.put("ARMN", ARMENIAN);
8080             aliases.put("AVST", AVESTAN);
8081             aliases.put("BALI", BALINESE);
8082             aliases.put("BAMU", BAMUM);
8083             aliases.put("BASS", BASSA_VAH);
8084             aliases.put("BATK", BATAK);
8085             aliases.put("BENG", BENGALI);
8086             aliases.put("BHKS", BHAIKSUKI);
8087             aliases.put("BOPO", BOPOMOFO);
8088             aliases.put("BRAH", BRAHMI);
8089             aliases.put("BRAI", BRAILLE);
8090             aliases.put("BUGI", BUGINESE);
8091             aliases.put("BUHD", BUHID);
8092             aliases.put("CAKM", CHAKMA);
8093             aliases.put("CANS", CANADIAN_ABORIGINAL);
8094             aliases.put("CARI", CARIAN);
8095             aliases.put("CHAM", CHAM);
8096             aliases.put("CHER", CHEROKEE);
8097             aliases.put("COPT", COPTIC);
8098             aliases.put("CPRT", CYPRIOT);
8099             aliases.put("CYRL", CYRILLIC);
8100             aliases.put("DEVA", DEVANAGARI);
8101             aliases.put("DOGR", DOGRA);
8102             aliases.put("DSRT", DESERET);
8103             aliases.put("DUPL", DUPLOYAN);
8104             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
8105             aliases.put("ELBA", ELBASAN);
8106             aliases.put("ELYM", ELYMAIC);
8107             aliases.put("ETHI", ETHIOPIC);
8108             aliases.put("GEOR", GEORGIAN);
8109             aliases.put("GLAG", GLAGOLITIC);
8110             aliases.put("GONM", MASARAM_GONDI);
8111             aliases.put("GOTH", GOTHIC);
8112             aliases.put("GONG", GUNJALA_GONDI);
8113             aliases.put("GRAN", GRANTHA);
8114             aliases.put("GREK", GREEK);
8115             aliases.put("GUJR", GUJARATI);
8116             aliases.put("GURU", GURMUKHI);
8117             aliases.put("HANG", HANGUL);
8118             aliases.put("HANI", HAN);
8119             aliases.put("HANO", HANUNOO);
8120             aliases.put("HATR", HATRAN);
8121             aliases.put("HEBR", HEBREW);
8122             aliases.put("HIRA", HIRAGANA);
8123             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
8124             aliases.put("HMNG", PAHAWH_HMONG);
8125             aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
8126             // it appears we don't have the KATAKANA_OR_HIRAGANA
8127             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
8128             aliases.put("HUNG", OLD_HUNGARIAN);
8129             aliases.put("ITAL", OLD_ITALIC);
8130             aliases.put("JAVA", JAVANESE);
8131             aliases.put("KALI", KAYAH_LI);
8132             aliases.put("KANA", KATAKANA);
8133             aliases.put("KHAR", KHAROSHTHI);
8134             aliases.put("KHMR", KHMER);
8135             aliases.put("KHOJ", KHOJKI);
8136             aliases.put("KNDA", KANNADA);
8137             aliases.put("KTHI", KAITHI);
8138             aliases.put("LANA", TAI_THAM);
8139             aliases.put("LAOO", LAO);
8140             aliases.put("LATN", LATIN);
8141             aliases.put("LEPC", LEPCHA);
8142             aliases.put("LIMB", LIMBU);
8143             aliases.put("LINA", LINEAR_A);
8144             aliases.put("LINB", LINEAR_B);
8145             aliases.put("LISU", LISU);
8146             aliases.put("LYCI", LYCIAN);
8147             aliases.put("LYDI", LYDIAN);
8148             aliases.put("MAHJ", MAHAJANI);
8149             aliases.put("MAKA", MAKASAR);
8150             aliases.put("MARC", MARCHEN);
8151             aliases.put("MAND", MANDAIC);
8152             aliases.put("MANI", MANICHAEAN);
8153             aliases.put("MEDF", MEDEFAIDRIN);
8154             aliases.put("MEND", MENDE_KIKAKUI);
8155             aliases.put("MERC", MEROITIC_CURSIVE);
8156             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
8157             aliases.put("MLYM", MALAYALAM);
8158             aliases.put("MODI", MODI);
8159             aliases.put("MONG", MONGOLIAN);
8160             aliases.put("MROO", MRO);
8161             aliases.put("MTEI", MEETEI_MAYEK);
8162             aliases.put("MULT", MULTANI);
8163             aliases.put("MYMR", MYANMAR);
8164             aliases.put("NAND", NANDINAGARI);
8165             aliases.put("NARB", OLD_NORTH_ARABIAN);
8166             aliases.put("NBAT", NABATAEAN);
8167             aliases.put("NEWA", NEWA);
8168             aliases.put("NKOO", NKO);
8169             aliases.put("NSHU", NUSHU);
8170             aliases.put("OGAM", OGHAM);
8171             aliases.put("OLCK", OL_CHIKI);
8172             aliases.put("ORKH", OLD_TURKIC);
8173             aliases.put("ORYA", ORIYA);
8174             aliases.put("OSGE", OSAGE);
8175             aliases.put("OSMA", OSMANYA);
8176             aliases.put("PALM", PALMYRENE);
8177             aliases.put("PAUC", PAU_CIN_HAU);
8178             aliases.put("PERM", OLD_PERMIC);
8179             aliases.put("PHAG", PHAGS_PA);
8180             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8181             aliases.put("PHLP", PSALTER_PAHLAVI);
8182             aliases.put("PHNX", PHOENICIAN);
8183             aliases.put("PLRD", MIAO);
8184             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8185             aliases.put("RJNG", REJANG);
8186             aliases.put("ROHG", HANIFI_ROHINGYA);
8187             aliases.put("RUNR", RUNIC);
8188             aliases.put("SAMR", SAMARITAN);
8189             aliases.put("SARB", OLD_SOUTH_ARABIAN);
8190             aliases.put("SAUR", SAURASHTRA);
8191             aliases.put("SGNW", SIGNWRITING);
8192             aliases.put("SHAW", SHAVIAN);
8193             aliases.put("SHRD", SHARADA);
8194             aliases.put("SIDD", SIDDHAM);
8195             aliases.put("SIND", KHUDAWADI);
8196             aliases.put("SINH", SINHALA);
8197             aliases.put("SOGD", SOGDIAN);
8198             aliases.put("SOGO", OLD_SOGDIAN);
8199             aliases.put("SORA", SORA_SOMPENG);
8200             aliases.put("SOYO", SOYOMBO);
8201             aliases.put("SUND", SUNDANESE);
8202             aliases.put("SYLO", SYLOTI_NAGRI);
8203             aliases.put("SYRC", SYRIAC);
8204             aliases.put("TAGB", TAGBANWA);
8205             aliases.put("TAKR", TAKRI);
8206             aliases.put("TALE", TAI_LE);
8207             aliases.put("TALU", NEW_TAI_LUE);
8208             aliases.put("TAML", TAMIL);
8209             aliases.put("TANG", TANGUT);
8210             aliases.put("TAVT", TAI_VIET);
8211             aliases.put("TELU", TELUGU);
8212             aliases.put("TFNG", TIFINAGH);
8213             aliases.put("TGLG", TAGALOG);
8214             aliases.put("THAA", THAANA);
8215             aliases.put("THAI", THAI);
8216             aliases.put("TIBT", TIBETAN);
8217             aliases.put("TIRH", TIRHUTA);
8218             aliases.put("UGAR", UGARITIC);
8219             aliases.put("VAII", VAI);
8220             aliases.put("WARA", WARANG_CITI);
8221             aliases.put("WCHO", WANCHO);
8222             aliases.put("XPEO", OLD_PERSIAN);
8223             aliases.put("XSUX", CUNEIFORM);
8224             aliases.put("YIII", YI);
8225             aliases.put("ZANB", ZANABAZAR_SQUARE);
8226             aliases.put("ZINH", INHERITED);
8227             aliases.put("ZYYY", COMMON);
8228             aliases.put("ZZZZ", UNKNOWN);
8229         }
8230 
8231         /**
8232          * Returns the enum constant representing the Unicode script of which
8233          * the given character (Unicode code point) is assigned to.
8234          *
8235          * @param   codePoint the character (Unicode code point) in question.
8236          * @return  The {@code UnicodeScript} constant representing the
8237          *          Unicode script of which this character is assigned to.
8238          *
8239          * @throws  IllegalArgumentException if the specified
8240          * {@code codePoint} is an invalid Unicode code point.
8241          * @see Character#isValidCodePoint(int)
8242          *
8243          */
8244         public static UnicodeScript of(int codePoint) {
8245             if (!isValidCodePoint(codePoint))
8246                 throw new IllegalArgumentException(
8247                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
8248             int type = getType(codePoint);
8249             // leave SURROGATE and PRIVATE_USE for table lookup
8250             if (type == UNASSIGNED)
8251                 return UNKNOWN;
8252             int index = Arrays.binarySearch(scriptStarts, codePoint);
8253             if (index < 0)
8254                 index = -index - 2;
8255             return scripts[index];
8256         }
8257 
8258         /**
8259          * Returns the UnicodeScript constant with the given Unicode script
8260          * name or the script name alias. Script names and their aliases are
8261          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
8262          * and {@code PropertyValueAliases<version>.txt} define script names
8263          * and the script name aliases for a particular version of the
8264          * standard. The {@link Character} class specifies the version of
8265          * the standard that it supports.
8266          * <p>
8267          * Character case is ignored for all of the valid script names.
8268          * The en_US locale's case mapping rules are used to provide
8269          * case-insensitive string comparisons for script name validation.
8270          *
8271          * @param scriptName A {@code UnicodeScript} name.
8272          * @return The {@code UnicodeScript} constant identified
8273          *         by {@code scriptName}
8274          * @throws IllegalArgumentException if {@code scriptName} is an
8275          *         invalid name
8276          * @throws NullPointerException if {@code scriptName} is null
8277          */
8278         public static final UnicodeScript forName(String scriptName) {
8279             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8280                                  //.replace(' ', '_'));
8281             UnicodeScript sc = aliases.get(scriptName);
8282             if (sc != null)
8283                 return sc;
8284             return valueOf(scriptName);
8285         }
8286     }
8287 
8288     /**
8289      * The value of the {@code Character}.
8290      *
8291      * @serial
8292      */
8293     private final char value;
8294 
8295     /** use serialVersionUID from JDK 1.0.2 for interoperability */
8296     @java.io.Serial
8297     private static final long serialVersionUID = 3786198910865385080L;
8298 
8299     /**
8300      * Constructs a newly allocated {@code Character} object that
8301      * represents the specified {@code char} value.
8302      *
8303      * @param  value   the value to be represented by the
8304      *                  {@code Character} object.
8305      *
8306      * @deprecated
8307      * It is rarely appropriate to use this constructor. The static factory
8308      * {@link #valueOf(char)} is generally a better choice, as it is
8309      * likely to yield significantly better space and time performance.
8310      */
8311     @Deprecated(since="9")
8312     public Character(char value) {
8313         this.value = value;
8314     }
8315 
8316     private static class CharacterCache {
8317         private CharacterCache(){}
8318 
8319         static final Character[] cache;
8320         static Character[] archivedCache;
8321 
8322         static {
8323             int size = 127 + 1;
8324 
8325             // Load and use the archived cache if it exists
8326             VM.initializeFromArchive(CharacterCache.class);
8327             if (archivedCache == null || archivedCache.length != size) {
8328                 Character[] c = new Character[size];
8329                 for (int i = 0; i < size; i++) {
8330                     c[i] = new Character((char) i);
8331                 }
8332                 archivedCache = c;
8333             }
8334             cache = archivedCache;
8335         }
8336     }
8337 
8338     /**
8339      * Returns a {@code Character} instance representing the specified
8340      * {@code char} value.
8341      * If a new {@code Character} instance is not required, this method
8342      * should generally be used in preference to the constructor
8343      * {@link #Character(char)}, as this method is likely to yield
8344      * significantly better space and time performance by caching
8345      * frequently requested values.
8346      *
8347      * This method will always cache values in the range {@code
8348      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
8349      * cache other values outside of this range.
8350      *
8351      * @param  c a char value.
8352      * @return a {@code Character} instance representing {@code c}.
8353      * @since  1.5
8354      */
8355     @HotSpotIntrinsicCandidate
8356     public static Character valueOf(char c) {
8357         if (c <= 127) { // must cache
8358             return CharacterCache.cache[(int)c];
8359         }
8360         return new Character(c);
8361     }
8362 
8363     /**
8364      * Returns the value of this {@code Character} object.
8365      * @return  the primitive {@code char} value represented by
8366      *          this object.
8367      */
8368     @HotSpotIntrinsicCandidate
8369     public char charValue() {
8370         return value;
8371     }
8372 
8373     /**
8374      * Returns a hash code for this {@code Character}; equal to the result
8375      * of invoking {@code charValue()}.
8376      *
8377      * @return a hash code value for this {@code Character}
8378      */
8379     @Override
8380     public int hashCode() {
8381         return Character.hashCode(value);
8382     }
8383 
8384     /**
8385      * Returns a hash code for a {@code char} value; compatible with
8386      * {@code Character.hashCode()}.
8387      *
8388      * @since 1.8
8389      *
8390      * @param value The {@code char} for which to return a hash code.
8391      * @return a hash code value for a {@code char} value.
8392      */
8393     public static int hashCode(char value) {
8394         return (int)value;
8395     }
8396 
8397     /**
8398      * Compares this object against the specified object.
8399      * The result is {@code true} if and only if the argument is not
8400      * {@code null} and is a {@code Character} object that
8401      * represents the same {@code char} value as this object.
8402      *
8403      * @param   obj   the object to compare with.
8404      * @return  {@code true} if the objects are the same;
8405      *          {@code false} otherwise.
8406      */
8407     public boolean equals(Object obj) {
8408         if (obj instanceof Character) {
8409             return value == ((Character)obj).charValue();
8410         }
8411         return false;
8412     }
8413 
8414     /**
8415      * Returns a {@code String} object representing this
8416      * {@code Character}'s value.  The result is a string of
8417      * length 1 whose sole component is the primitive
8418      * {@code char} value represented by this
8419      * {@code Character} object.
8420      *
8421      * @return  a string representation of this object.
8422      */
8423     public String toString() {
8424         char buf[] = {value};
8425         return String.valueOf(buf);
8426     }
8427 
8428     /**
8429      * Returns a {@code String} object representing the
8430      * specified {@code char}.  The result is a string of length
8431      * 1 consisting solely of the specified {@code char}.
8432      *
8433      * @apiNote This method cannot handle <a
8434      * href="#supplementary"> supplementary characters</a>. To support
8435      * all Unicode characters, including supplementary characters, use
8436      * the {@link #toString(int)} method.
8437      *
8438      * @param c the {@code char} to be converted
8439      * @return the string representation of the specified {@code char}
8440      * @since 1.4
8441      */
8442     public static String toString(char c) {
8443         return String.valueOf(c);
8444     }
8445 
8446     /**
8447      * Returns a {@code String} object representing the
8448      * specified character (Unicode code point).  The result is a string of
8449      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8450      *
8451      * @param codePoint the {@code codePoint} to be converted
8452      * @return the string representation of the specified {@code codePoint}
8453      * @throws IllegalArgumentException if the specified
8454      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8455      *      valid Unicode code point}.
8456      * @since 11
8457      */
8458     public static String toString(int codePoint) {
8459         return String.valueOfCodePoint(codePoint);
8460     }
8461 
8462     /**
8463      * Determines whether the specified code point is a valid
8464      * <a href="http://www.unicode.org/glossary/#code_point">
8465      * Unicode code point value</a>.
8466      *
8467      * @param  codePoint the Unicode code point to be tested
8468      * @return {@code true} if the specified code point value is between
8469      *         {@link #MIN_CODE_POINT} and
8470      *         {@link #MAX_CODE_POINT} inclusive;
8471      *         {@code false} otherwise.
8472      * @since  1.5
8473      */
8474     public static boolean isValidCodePoint(int codePoint) {
8475         // Optimized form of:
8476         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8477         int plane = codePoint >>> 16;
8478         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8479     }
8480 
8481     /**
8482      * Determines whether the specified character (Unicode code point)
8483      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8484      * Such code points can be represented using a single {@code char}.
8485      *
8486      * @param  codePoint the character (Unicode code point) to be tested
8487      * @return {@code true} if the specified code point is between
8488      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8489      *         {@code false} otherwise.
8490      * @since  1.7
8491      */
8492     public static boolean isBmpCodePoint(int codePoint) {
8493         return codePoint >>> 16 == 0;
8494         // Optimized form of:
8495         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8496         // We consistently use logical shift (>>>) to facilitate
8497         // additional runtime optimizations.
8498     }
8499 
8500     /**
8501      * Determines whether the specified character (Unicode code point)
8502      * is in the <a href="#supplementary">supplementary character</a> range.
8503      *
8504      * @param  codePoint the character (Unicode code point) to be tested
8505      * @return {@code true} if the specified code point is between
8506      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8507      *         {@link #MAX_CODE_POINT} inclusive;
8508      *         {@code false} otherwise.
8509      * @since  1.5
8510      */
8511     public static boolean isSupplementaryCodePoint(int codePoint) {
8512         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8513             && codePoint <  MAX_CODE_POINT + 1;
8514     }
8515 
8516     /**
8517      * Determines if the given {@code char} value is a
8518      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8519      * Unicode high-surrogate code unit</a>
8520      * (also known as <i>leading-surrogate code unit</i>).
8521      *
8522      * <p>Such values do not represent characters by themselves,
8523      * but are used in the representation of
8524      * <a href="#supplementary">supplementary characters</a>
8525      * in the UTF-16 encoding.
8526      *
8527      * @param  ch the {@code char} value to be tested.
8528      * @return {@code true} if the {@code char} value is between
8529      *         {@link #MIN_HIGH_SURROGATE} and
8530      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8531      *         {@code false} otherwise.
8532      * @see    Character#isLowSurrogate(char)
8533      * @see    Character.UnicodeBlock#of(int)
8534      * @since  1.5
8535      */
8536     public static boolean isHighSurrogate(char ch) {
8537         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8538         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8539     }
8540 
8541     /**
8542      * Determines if the given {@code char} value is a
8543      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8544      * Unicode low-surrogate code unit</a>
8545      * (also known as <i>trailing-surrogate code unit</i>).
8546      *
8547      * <p>Such values do not represent characters by themselves,
8548      * but are used in the representation of
8549      * <a href="#supplementary">supplementary characters</a>
8550      * in the UTF-16 encoding.
8551      *
8552      * @param  ch the {@code char} value to be tested.
8553      * @return {@code true} if the {@code char} value is between
8554      *         {@link #MIN_LOW_SURROGATE} and
8555      *         {@link #MAX_LOW_SURROGATE} inclusive;
8556      *         {@code false} otherwise.
8557      * @see    Character#isHighSurrogate(char)
8558      * @since  1.5
8559      */
8560     public static boolean isLowSurrogate(char ch) {
8561         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8562     }
8563 
8564     /**
8565      * Determines if the given {@code char} value is a Unicode
8566      * <i>surrogate code unit</i>.
8567      *
8568      * <p>Such values do not represent characters by themselves,
8569      * but are used in the representation of
8570      * <a href="#supplementary">supplementary characters</a>
8571      * in the UTF-16 encoding.
8572      *
8573      * <p>A char value is a surrogate code unit if and only if it is either
8574      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8575      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8576      *
8577      * @param  ch the {@code char} value to be tested.
8578      * @return {@code true} if the {@code char} value is between
8579      *         {@link #MIN_SURROGATE} and
8580      *         {@link #MAX_SURROGATE} inclusive;
8581      *         {@code false} otherwise.
8582      * @since  1.7
8583      */
8584     public static boolean isSurrogate(char ch) {
8585         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8586     }
8587 
8588     /**
8589      * Determines whether the specified pair of {@code char}
8590      * values is a valid
8591      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8592      * Unicode surrogate pair</a>.
8593 
8594      * <p>This method is equivalent to the expression:
8595      * <blockquote><pre>{@code
8596      * isHighSurrogate(high) && isLowSurrogate(low)
8597      * }</pre></blockquote>
8598      *
8599      * @param  high the high-surrogate code value to be tested
8600      * @param  low the low-surrogate code value to be tested
8601      * @return {@code true} if the specified high and
8602      * low-surrogate code values represent a valid surrogate pair;
8603      * {@code false} otherwise.
8604      * @since  1.5
8605      */
8606     public static boolean isSurrogatePair(char high, char low) {
8607         return isHighSurrogate(high) && isLowSurrogate(low);
8608     }
8609 
8610     /**
8611      * Determines the number of {@code char} values needed to
8612      * represent the specified character (Unicode code point). If the
8613      * specified character is equal to or greater than 0x10000, then
8614      * the method returns 2. Otherwise, the method returns 1.
8615      *
8616      * <p>This method doesn't validate the specified character to be a
8617      * valid Unicode code point. The caller must validate the
8618      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8619      * if necessary.
8620      *
8621      * @param   codePoint the character (Unicode code point) to be tested.
8622      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8623      * @see     Character#isSupplementaryCodePoint(int)
8624      * @since   1.5
8625      */
8626     public static int charCount(int codePoint) {
8627         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8628     }
8629 
8630     /**
8631      * Converts the specified surrogate pair to its supplementary code
8632      * point value. This method does not validate the specified
8633      * surrogate pair. The caller must validate it using {@link
8634      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8635      *
8636      * @param  high the high-surrogate code unit
8637      * @param  low the low-surrogate code unit
8638      * @return the supplementary code point composed from the
8639      *         specified surrogate pair.
8640      * @since  1.5
8641      */
8642     public static int toCodePoint(char high, char low) {
8643         // Optimized form of:
8644         // return ((high - MIN_HIGH_SURROGATE) << 10)
8645         //         + (low - MIN_LOW_SURROGATE)
8646         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8647         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8648                                        - (MIN_HIGH_SURROGATE << 10)
8649                                        - MIN_LOW_SURROGATE);
8650     }
8651 
8652     /**
8653      * Returns the code point at the given index of the
8654      * {@code CharSequence}. If the {@code char} value at
8655      * the given index in the {@code CharSequence} is in the
8656      * high-surrogate range, the following index is less than the
8657      * length of the {@code CharSequence}, and the
8658      * {@code char} value at the following index is in the
8659      * low-surrogate range, then the supplementary code point
8660      * corresponding to this surrogate pair is returned. Otherwise,
8661      * the {@code char} value at the given index is returned.
8662      *
8663      * @param seq a sequence of {@code char} values (Unicode code
8664      * units)
8665      * @param index the index to the {@code char} values (Unicode
8666      * code units) in {@code seq} to be converted
8667      * @return the Unicode code point at the given index
8668      * @throws NullPointerException if {@code seq} is null.
8669      * @throws IndexOutOfBoundsException if the value
8670      * {@code index} is negative or not less than
8671      * {@link CharSequence#length() seq.length()}.
8672      * @since  1.5
8673      */
8674     public static int codePointAt(CharSequence seq, int index) {
8675         char c1 = seq.charAt(index);
8676         if (isHighSurrogate(c1) && ++index < seq.length()) {
8677             char c2 = seq.charAt(index);
8678             if (isLowSurrogate(c2)) {
8679                 return toCodePoint(c1, c2);
8680             }
8681         }
8682         return c1;
8683     }
8684 
8685     /**
8686      * Returns the code point at the given index of the
8687      * {@code char} array. If the {@code char} value at
8688      * the given index in the {@code char} array is in the
8689      * high-surrogate range, the following index is less than the
8690      * length of the {@code char} array, and the
8691      * {@code char} value at the following index is in the
8692      * low-surrogate range, then the supplementary code point
8693      * corresponding to this surrogate pair is returned. Otherwise,
8694      * the {@code char} value at the given index is returned.
8695      *
8696      * @param a the {@code char} array
8697      * @param index the index to the {@code char} values (Unicode
8698      * code units) in the {@code char} array to be converted
8699      * @return the Unicode code point at the given index
8700      * @throws NullPointerException if {@code a} is null.
8701      * @throws IndexOutOfBoundsException if the value
8702      * {@code index} is negative or not less than
8703      * the length of the {@code char} array.
8704      * @since  1.5
8705      */
8706     public static int codePointAt(char[] a, int index) {
8707         return codePointAtImpl(a, index, a.length);
8708     }
8709 
8710     /**
8711      * Returns the code point at the given index of the
8712      * {@code char} array, where only array elements with
8713      * {@code index} less than {@code limit} can be used. If
8714      * the {@code char} value at the given index in the
8715      * {@code char} array is in the high-surrogate range, the
8716      * following index is less than the {@code limit}, and the
8717      * {@code char} value at the following index is in the
8718      * low-surrogate range, then the supplementary code point
8719      * corresponding to this surrogate pair is returned. Otherwise,
8720      * the {@code char} value at the given index is returned.
8721      *
8722      * @param a the {@code char} array
8723      * @param index the index to the {@code char} values (Unicode
8724      * code units) in the {@code char} array to be converted
8725      * @param limit the index after the last array element that
8726      * can be used in the {@code char} array
8727      * @return the Unicode code point at the given index
8728      * @throws NullPointerException if {@code a} is null.
8729      * @throws IndexOutOfBoundsException if the {@code index}
8730      * argument is negative or not less than the {@code limit}
8731      * argument, or if the {@code limit} argument is negative or
8732      * greater than the length of the {@code char} array.
8733      * @since  1.5
8734      */
8735     public static int codePointAt(char[] a, int index, int limit) {
8736         if (index >= limit || limit < 0 || limit > a.length) {
8737             throw new IndexOutOfBoundsException();
8738         }
8739         return codePointAtImpl(a, index, limit);
8740     }
8741 
8742     // throws ArrayIndexOutOfBoundsException if index out of bounds
8743     static int codePointAtImpl(char[] a, int index, int limit) {
8744         char c1 = a[index];
8745         if (isHighSurrogate(c1) && ++index < limit) {
8746             char c2 = a[index];
8747             if (isLowSurrogate(c2)) {
8748                 return toCodePoint(c1, c2);
8749             }
8750         }
8751         return c1;
8752     }
8753 
8754     /**
8755      * Returns the code point preceding the given index of the
8756      * {@code CharSequence}. If the {@code char} value at
8757      * {@code (index - 1)} in the {@code CharSequence} is in
8758      * the low-surrogate range, {@code (index - 2)} is not
8759      * negative, and the {@code char} value at {@code (index - 2)}
8760      * in the {@code CharSequence} is in the
8761      * high-surrogate range, then the supplementary code point
8762      * corresponding to this surrogate pair is returned. Otherwise,
8763      * the {@code char} value at {@code (index - 1)} is
8764      * returned.
8765      *
8766      * @param seq the {@code CharSequence} instance
8767      * @param index the index following the code point that should be returned
8768      * @return the Unicode code point value before the given index.
8769      * @throws NullPointerException if {@code seq} is null.
8770      * @throws IndexOutOfBoundsException if the {@code index}
8771      * argument is less than 1 or greater than {@link
8772      * CharSequence#length() seq.length()}.
8773      * @since  1.5
8774      */
8775     public static int codePointBefore(CharSequence seq, int index) {
8776         char c2 = seq.charAt(--index);
8777         if (isLowSurrogate(c2) && index > 0) {
8778             char c1 = seq.charAt(--index);
8779             if (isHighSurrogate(c1)) {
8780                 return toCodePoint(c1, c2);
8781             }
8782         }
8783         return c2;
8784     }
8785 
8786     /**
8787      * Returns the code point preceding the given index of the
8788      * {@code char} array. If the {@code char} value at
8789      * {@code (index - 1)} in the {@code char} array is in
8790      * the low-surrogate range, {@code (index - 2)} is not
8791      * negative, and the {@code char} value at {@code (index - 2)}
8792      * in the {@code char} array is in the
8793      * high-surrogate range, then the supplementary code point
8794      * corresponding to this surrogate pair is returned. Otherwise,
8795      * the {@code char} value at {@code (index - 1)} is
8796      * returned.
8797      *
8798      * @param a the {@code char} array
8799      * @param index the index following the code point that should be returned
8800      * @return the Unicode code point value before the given index.
8801      * @throws NullPointerException if {@code a} is null.
8802      * @throws IndexOutOfBoundsException if the {@code index}
8803      * argument is less than 1 or greater than the length of the
8804      * {@code char} array
8805      * @since  1.5
8806      */
8807     public static int codePointBefore(char[] a, int index) {
8808         return codePointBeforeImpl(a, index, 0);
8809     }
8810 
8811     /**
8812      * Returns the code point preceding the given index of the
8813      * {@code char} array, where only array elements with
8814      * {@code index} greater than or equal to {@code start}
8815      * can be used. If the {@code char} value at {@code (index - 1)}
8816      * in the {@code char} array is in the
8817      * low-surrogate range, {@code (index - 2)} is not less than
8818      * {@code start}, and the {@code char} value at
8819      * {@code (index - 2)} in the {@code char} array is in
8820      * the high-surrogate range, then the supplementary code point
8821      * corresponding to this surrogate pair is returned. Otherwise,
8822      * the {@code char} value at {@code (index - 1)} is
8823      * returned.
8824      *
8825      * @param a the {@code char} array
8826      * @param index the index following the code point that should be returned
8827      * @param start the index of the first array element in the
8828      * {@code char} array
8829      * @return the Unicode code point value before the given index.
8830      * @throws NullPointerException if {@code a} is null.
8831      * @throws IndexOutOfBoundsException if the {@code index}
8832      * argument is not greater than the {@code start} argument or
8833      * is greater than the length of the {@code char} array, or
8834      * if the {@code start} argument is negative or not less than
8835      * the length of the {@code char} array.
8836      * @since  1.5
8837      */
8838     public static int codePointBefore(char[] a, int index, int start) {
8839         if (index <= start || start < 0 || start >= a.length) {
8840             throw new IndexOutOfBoundsException();
8841         }
8842         return codePointBeforeImpl(a, index, start);
8843     }
8844 
8845     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8846     static int codePointBeforeImpl(char[] a, int index, int start) {
8847         char c2 = a[--index];
8848         if (isLowSurrogate(c2) && index > start) {
8849             char c1 = a[--index];
8850             if (isHighSurrogate(c1)) {
8851                 return toCodePoint(c1, c2);
8852             }
8853         }
8854         return c2;
8855     }
8856 
8857     /**
8858      * Returns the leading surrogate (a
8859      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8860      * high surrogate code unit</a>) of the
8861      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8862      * surrogate pair</a>
8863      * representing the specified supplementary character (Unicode
8864      * code point) in the UTF-16 encoding.  If the specified character
8865      * is not a
8866      * <a href="Character.html#supplementary">supplementary character</a>,
8867      * an unspecified {@code char} is returned.
8868      *
8869      * <p>If
8870      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8871      * is {@code true}, then
8872      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8873      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8874      * are also always {@code true}.
8875      *
8876      * @param   codePoint a supplementary character (Unicode code point)
8877      * @return  the leading surrogate code unit used to represent the
8878      *          character in the UTF-16 encoding
8879      * @since   1.7
8880      */
8881     public static char highSurrogate(int codePoint) {
8882         return (char) ((codePoint >>> 10)
8883             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8884     }
8885 
8886     /**
8887      * Returns the trailing surrogate (a
8888      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8889      * low surrogate code unit</a>) of the
8890      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8891      * surrogate pair</a>
8892      * representing the specified supplementary character (Unicode
8893      * code point) in the UTF-16 encoding.  If the specified character
8894      * is not a
8895      * <a href="Character.html#supplementary">supplementary character</a>,
8896      * an unspecified {@code char} is returned.
8897      *
8898      * <p>If
8899      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8900      * is {@code true}, then
8901      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8902      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8903      * are also always {@code true}.
8904      *
8905      * @param   codePoint a supplementary character (Unicode code point)
8906      * @return  the trailing surrogate code unit used to represent the
8907      *          character in the UTF-16 encoding
8908      * @since   1.7
8909      */
8910     public static char lowSurrogate(int codePoint) {
8911         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8912     }
8913 
8914     /**
8915      * Converts the specified character (Unicode code point) to its
8916      * UTF-16 representation. If the specified code point is a BMP
8917      * (Basic Multilingual Plane or Plane 0) value, the same value is
8918      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8919      * specified code point is a supplementary character, its
8920      * surrogate values are stored in {@code dst[dstIndex]}
8921      * (high-surrogate) and {@code dst[dstIndex+1]}
8922      * (low-surrogate), and 2 is returned.
8923      *
8924      * @param  codePoint the character (Unicode code point) to be converted.
8925      * @param  dst an array of {@code char} in which the
8926      * {@code codePoint}'s UTF-16 value is stored.
8927      * @param dstIndex the start index into the {@code dst}
8928      * array where the converted value is stored.
8929      * @return 1 if the code point is a BMP code point, 2 if the
8930      * code point is a supplementary code point.
8931      * @throws IllegalArgumentException if the specified
8932      * {@code codePoint} is not a valid Unicode code point.
8933      * @throws NullPointerException if the specified {@code dst} is null.
8934      * @throws IndexOutOfBoundsException if {@code dstIndex}
8935      * is negative or not less than {@code dst.length}, or if
8936      * {@code dst} at {@code dstIndex} doesn't have enough
8937      * array element(s) to store the resulting {@code char}
8938      * value(s). (If {@code dstIndex} is equal to
8939      * {@code dst.length-1} and the specified
8940      * {@code codePoint} is a supplementary character, the
8941      * high-surrogate value is not stored in
8942      * {@code dst[dstIndex]}.)
8943      * @since  1.5
8944      */
8945     public static int toChars(int codePoint, char[] dst, int dstIndex) {
8946         if (isBmpCodePoint(codePoint)) {
8947             dst[dstIndex] = (char) codePoint;
8948             return 1;
8949         } else if (isValidCodePoint(codePoint)) {
8950             toSurrogates(codePoint, dst, dstIndex);
8951             return 2;
8952         } else {
8953             throw new IllegalArgumentException(
8954                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8955         }
8956     }
8957 
8958     /**
8959      * Converts the specified character (Unicode code point) to its
8960      * UTF-16 representation stored in a {@code char} array. If
8961      * the specified code point is a BMP (Basic Multilingual Plane or
8962      * Plane 0) value, the resulting {@code char} array has
8963      * the same value as {@code codePoint}. If the specified code
8964      * point is a supplementary code point, the resulting
8965      * {@code char} array has the corresponding surrogate pair.
8966      *
8967      * @param  codePoint a Unicode code point
8968      * @return a {@code char} array having
8969      *         {@code codePoint}'s UTF-16 representation.
8970      * @throws IllegalArgumentException if the specified
8971      * {@code codePoint} is not a valid Unicode code point.
8972      * @since  1.5
8973      */
8974     public static char[] toChars(int codePoint) {
8975         if (isBmpCodePoint(codePoint)) {
8976             return new char[] { (char) codePoint };
8977         } else if (isValidCodePoint(codePoint)) {
8978             char[] result = new char[2];
8979             toSurrogates(codePoint, result, 0);
8980             return result;
8981         } else {
8982             throw new IllegalArgumentException(
8983                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8984         }
8985     }
8986 
8987     static void toSurrogates(int codePoint, char[] dst, int index) {
8988         // We write elements "backwards" to guarantee all-or-nothing
8989         dst[index+1] = lowSurrogate(codePoint);
8990         dst[index] = highSurrogate(codePoint);
8991     }
8992 
8993     /**
8994      * Returns the number of Unicode code points in the text range of
8995      * the specified char sequence. The text range begins at the
8996      * specified {@code beginIndex} and extends to the
8997      * {@code char} at index {@code endIndex - 1}. Thus the
8998      * length (in {@code char}s) of the text range is
8999      * {@code endIndex-beginIndex}. Unpaired surrogates within
9000      * the text range count as one code point each.
9001      *
9002      * @param seq the char sequence
9003      * @param beginIndex the index to the first {@code char} of
9004      * the text range.
9005      * @param endIndex the index after the last {@code char} of
9006      * the text range.
9007      * @return the number of Unicode code points in the specified text
9008      * range
9009      * @throws NullPointerException if {@code seq} is null.
9010      * @throws IndexOutOfBoundsException if the
9011      * {@code beginIndex} is negative, or {@code endIndex}
9012      * is larger than the length of the given sequence, or
9013      * {@code beginIndex} is larger than {@code endIndex}.
9014      * @since  1.5
9015      */
9016     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9017         int length = seq.length();
9018         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
9019             throw new IndexOutOfBoundsException();
9020         }
9021         int n = endIndex - beginIndex;
9022         for (int i = beginIndex; i < endIndex; ) {
9023             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9024                 isLowSurrogate(seq.charAt(i))) {
9025                 n--;
9026                 i++;
9027             }
9028         }
9029         return n;
9030     }
9031 
9032     /**
9033      * Returns the number of Unicode code points in a subarray of the
9034      * {@code char} array argument. The {@code offset}
9035      * argument is the index of the first {@code char} of the
9036      * subarray and the {@code count} argument specifies the
9037      * length of the subarray in {@code char}s. Unpaired
9038      * surrogates within the subarray count as one code point each.
9039      *
9040      * @param a the {@code char} array
9041      * @param offset the index of the first {@code char} in the
9042      * given {@code char} array
9043      * @param count the length of the subarray in {@code char}s
9044      * @return the number of Unicode code points in the specified subarray
9045      * @throws NullPointerException if {@code a} is null.
9046      * @throws IndexOutOfBoundsException if {@code offset} or
9047      * {@code count} is negative, or if {@code offset +
9048      * count} is larger than the length of the given array.
9049      * @since  1.5
9050      */
9051     public static int codePointCount(char[] a, int offset, int count) {
9052         if (count > a.length - offset || offset < 0 || count < 0) {
9053             throw new IndexOutOfBoundsException();
9054         }
9055         return codePointCountImpl(a, offset, count);
9056     }
9057 
9058     static int codePointCountImpl(char[] a, int offset, int count) {
9059         int endIndex = offset + count;
9060         int n = count;
9061         for (int i = offset; i < endIndex; ) {
9062             if (isHighSurrogate(a[i++]) && i < endIndex &&
9063                 isLowSurrogate(a[i])) {
9064                 n--;
9065                 i++;
9066             }
9067         }
9068         return n;
9069     }
9070 
9071     /**
9072      * Returns the index within the given char sequence that is offset
9073      * from the given {@code index} by {@code codePointOffset}
9074      * code points. Unpaired surrogates within the text range given by
9075      * {@code index} and {@code codePointOffset} count as
9076      * one code point each.
9077      *
9078      * @param seq the char sequence
9079      * @param index the index to be offset
9080      * @param codePointOffset the offset in code points
9081      * @return the index within the char sequence
9082      * @throws NullPointerException if {@code seq} is null.
9083      * @throws IndexOutOfBoundsException if {@code index}
9084      *   is negative or larger then the length of the char sequence,
9085      *   or if {@code codePointOffset} is positive and the
9086      *   subsequence starting with {@code index} has fewer than
9087      *   {@code codePointOffset} code points, or if
9088      *   {@code codePointOffset} is negative and the subsequence
9089      *   before {@code index} has fewer than the absolute value
9090      *   of {@code codePointOffset} code points.
9091      * @since 1.5
9092      */
9093     public static int offsetByCodePoints(CharSequence seq, int index,
9094                                          int codePointOffset) {
9095         int length = seq.length();
9096         if (index < 0 || index > length) {
9097             throw new IndexOutOfBoundsException();
9098         }
9099 
9100         int x = index;
9101         if (codePointOffset >= 0) {
9102             int i;
9103             for (i = 0; x < length && i < codePointOffset; i++) {
9104                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
9105                     isLowSurrogate(seq.charAt(x))) {
9106                     x++;
9107                 }
9108             }
9109             if (i < codePointOffset) {
9110                 throw new IndexOutOfBoundsException();
9111             }
9112         } else {
9113             int i;
9114             for (i = codePointOffset; x > 0 && i < 0; i++) {
9115                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
9116                     isHighSurrogate(seq.charAt(x-1))) {
9117                     x--;
9118                 }
9119             }
9120             if (i < 0) {
9121                 throw new IndexOutOfBoundsException();
9122             }
9123         }
9124         return x;
9125     }
9126 
9127     /**
9128      * Returns the index within the given {@code char} subarray
9129      * that is offset from the given {@code index} by
9130      * {@code codePointOffset} code points. The
9131      * {@code start} and {@code count} arguments specify a
9132      * subarray of the {@code char} array. Unpaired surrogates
9133      * within the text range given by {@code index} and
9134      * {@code codePointOffset} count as one code point each.
9135      *
9136      * @param a the {@code char} array
9137      * @param start the index of the first {@code char} of the
9138      * subarray
9139      * @param count the length of the subarray in {@code char}s
9140      * @param index the index to be offset
9141      * @param codePointOffset the offset in code points
9142      * @return the index within the subarray
9143      * @throws NullPointerException if {@code a} is null.
9144      * @throws IndexOutOfBoundsException
9145      *   if {@code start} or {@code count} is negative,
9146      *   or if {@code start + count} is larger than the length of
9147      *   the given array,
9148      *   or if {@code index} is less than {@code start} or
9149      *   larger then {@code start + count},
9150      *   or if {@code codePointOffset} is positive and the text range
9151      *   starting with {@code index} and ending with {@code start + count - 1}
9152      *   has fewer than {@code codePointOffset} code
9153      *   points,
9154      *   or if {@code codePointOffset} is negative and the text range
9155      *   starting with {@code start} and ending with {@code index - 1}
9156      *   has fewer than the absolute value of
9157      *   {@code codePointOffset} code points.
9158      * @since 1.5
9159      */
9160     public static int offsetByCodePoints(char[] a, int start, int count,
9161                                          int index, int codePointOffset) {
9162         if (count > a.length-start || start < 0 || count < 0
9163             || index < start || index > start+count) {
9164             throw new IndexOutOfBoundsException();
9165         }
9166         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
9167     }
9168 
9169     static int offsetByCodePointsImpl(char[]a, int start, int count,
9170                                       int index, int codePointOffset) {
9171         int x = index;
9172         if (codePointOffset >= 0) {
9173             int limit = start + count;
9174             int i;
9175             for (i = 0; x < limit && i < codePointOffset; i++) {
9176                 if (isHighSurrogate(a[x++]) && x < limit &&
9177                     isLowSurrogate(a[x])) {
9178                     x++;
9179                 }
9180             }
9181             if (i < codePointOffset) {
9182                 throw new IndexOutOfBoundsException();
9183             }
9184         } else {
9185             int i;
9186             for (i = codePointOffset; x > start && i < 0; i++) {
9187                 if (isLowSurrogate(a[--x]) && x > start &&
9188                     isHighSurrogate(a[x-1])) {
9189                     x--;
9190                 }
9191             }
9192             if (i < 0) {
9193                 throw new IndexOutOfBoundsException();
9194             }
9195         }
9196         return x;
9197     }
9198 
9199     /**
9200      * Determines if the specified character is a lowercase character.
9201      * <p>
9202      * A character is lowercase if its general category type, provided
9203      * by {@code Character.getType(ch)}, is
9204      * {@code LOWERCASE_LETTER}, or it has contributory property
9205      * Other_Lowercase as defined by the Unicode Standard.
9206      * <p>
9207      * The following are examples of lowercase characters:
9208      * <blockquote><pre>
9209      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9210      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
9211      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
9212      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
9213      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
9214      * </pre></blockquote>
9215      * <p> Many other Unicode characters are lowercase too.
9216      *
9217      * <p><b>Note:</b> This method cannot handle <a
9218      * href="#supplementary"> supplementary characters</a>. To support
9219      * all Unicode characters, including supplementary characters, use
9220      * the {@link #isLowerCase(int)} method.
9221      *
9222      * @param   ch   the character to be tested.
9223      * @return  {@code true} if the character is lowercase;
9224      *          {@code false} otherwise.
9225      * @see     Character#isLowerCase(char)
9226      * @see     Character#isTitleCase(char)
9227      * @see     Character#toLowerCase(char)
9228      * @see     Character#getType(char)
9229      */
9230     public static boolean isLowerCase(char ch) {
9231         return isLowerCase((int)ch);
9232     }
9233 
9234     /**
9235      * Determines if the specified character (Unicode code point) is a
9236      * lowercase character.
9237      * <p>
9238      * A character is lowercase if its general category type, provided
9239      * by {@link Character#getType getType(codePoint)}, is
9240      * {@code LOWERCASE_LETTER}, or it has contributory property
9241      * Other_Lowercase as defined by the Unicode Standard.
9242      * <p>
9243      * The following are examples of lowercase characters:
9244      * <blockquote><pre>
9245      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9246      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
9247      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
9248      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
9249      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
9250      * </pre></blockquote>
9251      * <p> Many other Unicode characters are lowercase too.
9252      *
9253      * @param   codePoint the character (Unicode code point) to be tested.
9254      * @return  {@code true} if the character is lowercase;
9255      *          {@code false} otherwise.
9256      * @see     Character#isLowerCase(int)
9257      * @see     Character#isTitleCase(int)
9258      * @see     Character#toLowerCase(int)
9259      * @see     Character#getType(int)
9260      * @since   1.5
9261      */
9262     public static boolean isLowerCase(int codePoint) {
9263         return CharacterData.of(codePoint).isLowerCase(codePoint) ||
9264                CharacterData.of(codePoint).isOtherLowercase(codePoint);
9265     }
9266 
9267     /**
9268      * Determines if the specified character is an uppercase character.
9269      * <p>
9270      * A character is uppercase if its general category type, provided by
9271      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9272      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9273      * <p>
9274      * The following are examples of uppercase characters:
9275      * <blockquote><pre>
9276      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9277      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
9278      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
9279      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
9280      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
9281      * </pre></blockquote>
9282      * <p> Many other Unicode characters are uppercase too.
9283      *
9284      * <p><b>Note:</b> This method cannot handle <a
9285      * href="#supplementary"> supplementary characters</a>. To support
9286      * all Unicode characters, including supplementary characters, use
9287      * the {@link #isUpperCase(int)} method.
9288      *
9289      * @param   ch   the character to be tested.
9290      * @return  {@code true} if the character is uppercase;
9291      *          {@code false} otherwise.
9292      * @see     Character#isLowerCase(char)
9293      * @see     Character#isTitleCase(char)
9294      * @see     Character#toUpperCase(char)
9295      * @see     Character#getType(char)
9296      * @since   1.0
9297      */
9298     public static boolean isUpperCase(char ch) {
9299         return isUpperCase((int)ch);
9300     }
9301 
9302     /**
9303      * Determines if the specified character (Unicode code point) is an uppercase character.
9304      * <p>
9305      * A character is uppercase if its general category type, provided by
9306      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
9307      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9308      * <p>
9309      * The following are examples of uppercase characters:
9310      * <blockquote><pre>
9311      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9312      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
9313      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
9314      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
9315      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
9316      * </pre></blockquote>
9317      * <p> Many other Unicode characters are uppercase too.
9318      *
9319      * @param   codePoint the character (Unicode code point) to be tested.
9320      * @return  {@code true} if the character is uppercase;
9321      *          {@code false} otherwise.
9322      * @see     Character#isLowerCase(int)
9323      * @see     Character#isTitleCase(int)
9324      * @see     Character#toUpperCase(int)
9325      * @see     Character#getType(int)
9326      * @since   1.5
9327      */
9328     public static boolean isUpperCase(int codePoint) {
9329         return CharacterData.of(codePoint).isUpperCase(codePoint) ||
9330                CharacterData.of(codePoint).isOtherUppercase(codePoint);
9331     }
9332 
9333     /**
9334      * Determines if the specified character is a titlecase character.
9335      * <p>
9336      * A character is a titlecase character if its general
9337      * category type, provided by {@code Character.getType(ch)},
9338      * is {@code TITLECASE_LETTER}.
9339      * <p>
9340      * Some characters look like pairs of Latin letters. For example, there
9341      * is an uppercase letter that looks like "LJ" and has a corresponding
9342      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9343      * is the appropriate form to use when rendering a word in lowercase
9344      * with initial capitals, as for a book title.
9345      * <p>
9346      * These are some of the Unicode characters for which this method returns
9347      * {@code true}:
9348      * <ul>
9349      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9350      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9351      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9352      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9353      * </ul>
9354      * <p> Many other Unicode characters are titlecase too.
9355      *
9356      * <p><b>Note:</b> This method cannot handle <a
9357      * href="#supplementary"> supplementary characters</a>. To support
9358      * all Unicode characters, including supplementary characters, use
9359      * the {@link #isTitleCase(int)} method.
9360      *
9361      * @param   ch   the character to be tested.
9362      * @return  {@code true} if the character is titlecase;
9363      *          {@code false} otherwise.
9364      * @see     Character#isLowerCase(char)
9365      * @see     Character#isUpperCase(char)
9366      * @see     Character#toTitleCase(char)
9367      * @see     Character#getType(char)
9368      * @since   1.0.2
9369      */
9370     public static boolean isTitleCase(char ch) {
9371         return isTitleCase((int)ch);
9372     }
9373 
9374     /**
9375      * Determines if the specified character (Unicode code point) is a titlecase character.
9376      * <p>
9377      * A character is a titlecase character if its general
9378      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9379      * is {@code TITLECASE_LETTER}.
9380      * <p>
9381      * Some characters look like pairs of Latin letters. For example, there
9382      * is an uppercase letter that looks like "LJ" and has a corresponding
9383      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9384      * is the appropriate form to use when rendering a word in lowercase
9385      * with initial capitals, as for a book title.
9386      * <p>
9387      * These are some of the Unicode characters for which this method returns
9388      * {@code true}:
9389      * <ul>
9390      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9391      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9392      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9393      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9394      * </ul>
9395      * <p> Many other Unicode characters are titlecase too.
9396      *
9397      * @param   codePoint the character (Unicode code point) to be tested.
9398      * @return  {@code true} if the character is titlecase;
9399      *          {@code false} otherwise.
9400      * @see     Character#isLowerCase(int)
9401      * @see     Character#isUpperCase(int)
9402      * @see     Character#toTitleCase(int)
9403      * @see     Character#getType(int)
9404      * @since   1.5
9405      */
9406     public static boolean isTitleCase(int codePoint) {
9407         return getType(codePoint) == Character.TITLECASE_LETTER;
9408     }
9409 
9410     /**
9411      * Determines if the specified character is a digit.
9412      * <p>
9413      * A character is a digit if its general category type, provided
9414      * by {@code Character.getType(ch)}, is
9415      * {@code DECIMAL_DIGIT_NUMBER}.
9416      * <p>
9417      * Some Unicode character ranges that contain digits:
9418      * <ul>
9419      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9420      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9421      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9422      *     Arabic-Indic digits
9423      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9424      *     Extended Arabic-Indic digits
9425      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9426      *     Devanagari digits
9427      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9428      *     Fullwidth digits
9429      * </ul>
9430      *
9431      * Many other character ranges contain digits as well.
9432      *
9433      * <p><b>Note:</b> This method cannot handle <a
9434      * href="#supplementary"> supplementary characters</a>. To support
9435      * all Unicode characters, including supplementary characters, use
9436      * the {@link #isDigit(int)} method.
9437      *
9438      * @param   ch   the character to be tested.
9439      * @return  {@code true} if the character is a digit;
9440      *          {@code false} otherwise.
9441      * @see     Character#digit(char, int)
9442      * @see     Character#forDigit(int, int)
9443      * @see     Character#getType(char)
9444      */
9445     public static boolean isDigit(char ch) {
9446         return isDigit((int)ch);
9447     }
9448 
9449     /**
9450      * Determines if the specified character (Unicode code point) is a digit.
9451      * <p>
9452      * A character is a digit if its general category type, provided
9453      * by {@link Character#getType(int) getType(codePoint)}, is
9454      * {@code DECIMAL_DIGIT_NUMBER}.
9455      * <p>
9456      * Some Unicode character ranges that contain digits:
9457      * <ul>
9458      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9459      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9460      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9461      *     Arabic-Indic digits
9462      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9463      *     Extended Arabic-Indic digits
9464      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9465      *     Devanagari digits
9466      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9467      *     Fullwidth digits
9468      * </ul>
9469      *
9470      * Many other character ranges contain digits as well.
9471      *
9472      * @param   codePoint the character (Unicode code point) to be tested.
9473      * @return  {@code true} if the character is a digit;
9474      *          {@code false} otherwise.
9475      * @see     Character#forDigit(int, int)
9476      * @see     Character#getType(int)
9477      * @since   1.5
9478      */
9479     public static boolean isDigit(int codePoint) {
9480         return CharacterData.of(codePoint).isDigit(codePoint);
9481     }
9482 
9483     /**
9484      * Determines if a character is defined in Unicode.
9485      * <p>
9486      * A character is defined if at least one of the following is true:
9487      * <ul>
9488      * <li>It has an entry in the UnicodeData file.
9489      * <li>It has a value in a range defined by the UnicodeData file.
9490      * </ul>
9491      *
9492      * <p><b>Note:</b> This method cannot handle <a
9493      * href="#supplementary"> supplementary characters</a>. To support
9494      * all Unicode characters, including supplementary characters, use
9495      * the {@link #isDefined(int)} method.
9496      *
9497      * @param   ch   the character to be tested
9498      * @return  {@code true} if the character has a defined meaning
9499      *          in Unicode; {@code false} otherwise.
9500      * @see     Character#isDigit(char)
9501      * @see     Character#isLetter(char)
9502      * @see     Character#isLetterOrDigit(char)
9503      * @see     Character#isLowerCase(char)
9504      * @see     Character#isTitleCase(char)
9505      * @see     Character#isUpperCase(char)
9506      * @since   1.0.2
9507      */
9508     public static boolean isDefined(char ch) {
9509         return isDefined((int)ch);
9510     }
9511 
9512     /**
9513      * Determines if a character (Unicode code point) is defined in Unicode.
9514      * <p>
9515      * A character is defined if at least one of the following is true:
9516      * <ul>
9517      * <li>It has an entry in the UnicodeData file.
9518      * <li>It has a value in a range defined by the UnicodeData file.
9519      * </ul>
9520      *
9521      * @param   codePoint the character (Unicode code point) to be tested.
9522      * @return  {@code true} if the character has a defined meaning
9523      *          in Unicode; {@code false} otherwise.
9524      * @see     Character#isDigit(int)
9525      * @see     Character#isLetter(int)
9526      * @see     Character#isLetterOrDigit(int)
9527      * @see     Character#isLowerCase(int)
9528      * @see     Character#isTitleCase(int)
9529      * @see     Character#isUpperCase(int)
9530      * @since   1.5
9531      */
9532     public static boolean isDefined(int codePoint) {
9533         return getType(codePoint) != Character.UNASSIGNED;
9534     }
9535 
9536     /**
9537      * Determines if the specified character is a letter.
9538      * <p>
9539      * A character is considered to be a letter if its general
9540      * category type, provided by {@code Character.getType(ch)},
9541      * is any of the following:
9542      * <ul>
9543      * <li> {@code UPPERCASE_LETTER}
9544      * <li> {@code LOWERCASE_LETTER}
9545      * <li> {@code TITLECASE_LETTER}
9546      * <li> {@code MODIFIER_LETTER}
9547      * <li> {@code OTHER_LETTER}
9548      * </ul>
9549      *
9550      * Not all letters have case. Many characters are
9551      * letters but are neither uppercase nor lowercase nor titlecase.
9552      *
9553      * <p><b>Note:</b> This method cannot handle <a
9554      * href="#supplementary"> supplementary characters</a>. To support
9555      * all Unicode characters, including supplementary characters, use
9556      * the {@link #isLetter(int)} method.
9557      *
9558      * @param   ch   the character to be tested.
9559      * @return  {@code true} if the character is a letter;
9560      *          {@code false} otherwise.
9561      * @see     Character#isDigit(char)
9562      * @see     Character#isJavaIdentifierStart(char)
9563      * @see     Character#isJavaLetter(char)
9564      * @see     Character#isJavaLetterOrDigit(char)
9565      * @see     Character#isLetterOrDigit(char)
9566      * @see     Character#isLowerCase(char)
9567      * @see     Character#isTitleCase(char)
9568      * @see     Character#isUnicodeIdentifierStart(char)
9569      * @see     Character#isUpperCase(char)
9570      */
9571     public static boolean isLetter(char ch) {
9572         return isLetter((int)ch);
9573     }
9574 
9575     /**
9576      * Determines if the specified character (Unicode code point) is a letter.
9577      * <p>
9578      * A character is considered to be a letter if its general
9579      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9580      * is any of the following:
9581      * <ul>
9582      * <li> {@code UPPERCASE_LETTER}
9583      * <li> {@code LOWERCASE_LETTER}
9584      * <li> {@code TITLECASE_LETTER}
9585      * <li> {@code MODIFIER_LETTER}
9586      * <li> {@code OTHER_LETTER}
9587      * </ul>
9588      *
9589      * Not all letters have case. Many characters are
9590      * letters but are neither uppercase nor lowercase nor titlecase.
9591      *
9592      * @param   codePoint the character (Unicode code point) to be tested.
9593      * @return  {@code true} if the character is a letter;
9594      *          {@code false} otherwise.
9595      * @see     Character#isDigit(int)
9596      * @see     Character#isJavaIdentifierStart(int)
9597      * @see     Character#isLetterOrDigit(int)
9598      * @see     Character#isLowerCase(int)
9599      * @see     Character#isTitleCase(int)
9600      * @see     Character#isUnicodeIdentifierStart(int)
9601      * @see     Character#isUpperCase(int)
9602      * @since   1.5
9603      */
9604     public static boolean isLetter(int codePoint) {
9605         return ((((1 << Character.UPPERCASE_LETTER) |
9606             (1 << Character.LOWERCASE_LETTER) |
9607             (1 << Character.TITLECASE_LETTER) |
9608             (1 << Character.MODIFIER_LETTER) |
9609             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9610             != 0;
9611     }
9612 
9613     /**
9614      * Determines if the specified character is a letter or digit.
9615      * <p>
9616      * A character is considered to be a letter or digit if either
9617      * {@code Character.isLetter(char ch)} or
9618      * {@code Character.isDigit(char ch)} returns
9619      * {@code true} for the character.
9620      *
9621      * <p><b>Note:</b> This method cannot handle <a
9622      * href="#supplementary"> supplementary characters</a>. To support
9623      * all Unicode characters, including supplementary characters, use
9624      * the {@link #isLetterOrDigit(int)} method.
9625      *
9626      * @param   ch   the character to be tested.
9627      * @return  {@code true} if the character is a letter or digit;
9628      *          {@code false} otherwise.
9629      * @see     Character#isDigit(char)
9630      * @see     Character#isJavaIdentifierPart(char)
9631      * @see     Character#isJavaLetter(char)
9632      * @see     Character#isJavaLetterOrDigit(char)
9633      * @see     Character#isLetter(char)
9634      * @see     Character#isUnicodeIdentifierPart(char)
9635      * @since   1.0.2
9636      */
9637     public static boolean isLetterOrDigit(char ch) {
9638         return isLetterOrDigit((int)ch);
9639     }
9640 
9641     /**
9642      * Determines if the specified character (Unicode code point) is a letter or digit.
9643      * <p>
9644      * A character is considered to be a letter or digit if either
9645      * {@link #isLetter(int) isLetter(codePoint)} or
9646      * {@link #isDigit(int) isDigit(codePoint)} returns
9647      * {@code true} for the character.
9648      *
9649      * @param   codePoint the character (Unicode code point) to be tested.
9650      * @return  {@code true} if the character is a letter or digit;
9651      *          {@code false} otherwise.
9652      * @see     Character#isDigit(int)
9653      * @see     Character#isJavaIdentifierPart(int)
9654      * @see     Character#isLetter(int)
9655      * @see     Character#isUnicodeIdentifierPart(int)
9656      * @since   1.5
9657      */
9658     public static boolean isLetterOrDigit(int codePoint) {
9659         return ((((1 << Character.UPPERCASE_LETTER) |
9660             (1 << Character.LOWERCASE_LETTER) |
9661             (1 << Character.TITLECASE_LETTER) |
9662             (1 << Character.MODIFIER_LETTER) |
9663             (1 << Character.OTHER_LETTER) |
9664             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9665             != 0;
9666     }
9667 
9668     /**
9669      * Determines if the specified character is permissible as the first
9670      * character in a Java identifier.
9671      * <p>
9672      * A character may start a Java identifier if and only if
9673      * one of the following conditions is true:
9674      * <ul>
9675      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9676      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9677      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9678      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9679      * </ul>
9680      *
9681      * @param   ch the character to be tested.
9682      * @return  {@code true} if the character may start a Java
9683      *          identifier; {@code false} otherwise.
9684      * @see     Character#isJavaLetterOrDigit(char)
9685      * @see     Character#isJavaIdentifierStart(char)
9686      * @see     Character#isJavaIdentifierPart(char)
9687      * @see     Character#isLetter(char)
9688      * @see     Character#isLetterOrDigit(char)
9689      * @see     Character#isUnicodeIdentifierStart(char)
9690      * @since   1.0.2
9691      * @deprecated Replaced by isJavaIdentifierStart(char).
9692      */
9693     @Deprecated(since="1.1")
9694     public static boolean isJavaLetter(char ch) {
9695         return isJavaIdentifierStart(ch);
9696     }
9697 
9698     /**
9699      * Determines if the specified character may be part of a Java
9700      * identifier as other than the first character.
9701      * <p>
9702      * A character may be part of a Java identifier if and only if one
9703      * of the following conditions is true:
9704      * <ul>
9705      * <li>  it is a letter
9706      * <li>  it is a currency symbol (such as {@code '$'})
9707      * <li>  it is a connecting punctuation character (such as {@code '_'})
9708      * <li>  it is a digit
9709      * <li>  it is a numeric letter (such as a Roman numeral character)
9710      * <li>  it is a combining mark
9711      * <li>  it is a non-spacing mark
9712      * <li> {@code isIdentifierIgnorable} returns
9713      * {@code true} for the character.
9714      * </ul>
9715      *
9716      * @param   ch the character to be tested.
9717      * @return  {@code true} if the character may be part of a
9718      *          Java identifier; {@code false} otherwise.
9719      * @see     Character#isJavaLetter(char)
9720      * @see     Character#isJavaIdentifierStart(char)
9721      * @see     Character#isJavaIdentifierPart(char)
9722      * @see     Character#isLetter(char)
9723      * @see     Character#isLetterOrDigit(char)
9724      * @see     Character#isUnicodeIdentifierPart(char)
9725      * @see     Character#isIdentifierIgnorable(char)
9726      * @since   1.0.2
9727      * @deprecated Replaced by isJavaIdentifierPart(char).
9728      */
9729     @Deprecated(since="1.1")
9730     public static boolean isJavaLetterOrDigit(char ch) {
9731         return isJavaIdentifierPart(ch);
9732     }
9733 
9734     /**
9735      * Determines if the specified character (Unicode code point) is alphabetic.
9736      * <p>
9737      * A character is considered to be alphabetic if its general category type,
9738      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9739      * the following:
9740      * <ul>
9741      * <li> {@code UPPERCASE_LETTER}
9742      * <li> {@code LOWERCASE_LETTER}
9743      * <li> {@code TITLECASE_LETTER}
9744      * <li> {@code MODIFIER_LETTER}
9745      * <li> {@code OTHER_LETTER}
9746      * <li> {@code LETTER_NUMBER}
9747      * </ul>
9748      * or it has contributory property Other_Alphabetic as defined by the
9749      * Unicode Standard.
9750      *
9751      * @param   codePoint the character (Unicode code point) to be tested.
9752      * @return  {@code true} if the character is a Unicode alphabet
9753      *          character, {@code false} otherwise.
9754      * @since   1.7
9755      */
9756     public static boolean isAlphabetic(int codePoint) {
9757         return (((((1 << Character.UPPERCASE_LETTER) |
9758             (1 << Character.LOWERCASE_LETTER) |
9759             (1 << Character.TITLECASE_LETTER) |
9760             (1 << Character.MODIFIER_LETTER) |
9761             (1 << Character.OTHER_LETTER) |
9762             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9763             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9764     }
9765 
9766     /**
9767      * Determines if the specified character (Unicode code point) is a CJKV
9768      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9769      * the Unicode Standard.
9770      *
9771      * @param   codePoint the character (Unicode code point) to be tested.
9772      * @return  {@code true} if the character is a Unicode ideograph
9773      *          character, {@code false} otherwise.
9774      * @since   1.7
9775      */
9776     public static boolean isIdeographic(int codePoint) {
9777         return CharacterData.of(codePoint).isIdeographic(codePoint);
9778     }
9779 
9780     /**
9781      * Determines if the specified character is
9782      * permissible as the first character in a Java identifier.
9783      * <p>
9784      * A character may start a Java identifier if and only if
9785      * one of the following conditions is true:
9786      * <ul>
9787      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9788      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9789      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9790      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9791      * </ul>
9792      *
9793      * <p><b>Note:</b> This method cannot handle <a
9794      * href="#supplementary"> supplementary characters</a>. To support
9795      * all Unicode characters, including supplementary characters, use
9796      * the {@link #isJavaIdentifierStart(int)} method.
9797      *
9798      * @param   ch the character to be tested.
9799      * @return  {@code true} if the character may start a Java identifier;
9800      *          {@code false} otherwise.
9801      * @see     Character#isJavaIdentifierPart(char)
9802      * @see     Character#isLetter(char)
9803      * @see     Character#isUnicodeIdentifierStart(char)
9804      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9805      * @since   1.1
9806      */
9807     public static boolean isJavaIdentifierStart(char ch) {
9808         return isJavaIdentifierStart((int)ch);
9809     }
9810 
9811     /**
9812      * Determines if the character (Unicode code point) is
9813      * permissible as the first character in a Java identifier.
9814      * <p>
9815      * A character may start a Java identifier if and only if
9816      * one of the following conditions is true:
9817      * <ul>
9818      * <li> {@link #isLetter(int) isLetter(codePoint)}
9819      *      returns {@code true}
9820      * <li> {@link #getType(int) getType(codePoint)}
9821      *      returns {@code LETTER_NUMBER}
9822      * <li> the referenced character is a currency symbol (such as {@code '$'})
9823      * <li> the referenced character is a connecting punctuation character
9824      *      (such as {@code '_'}).
9825      * </ul>
9826      *
9827      * @param   codePoint the character (Unicode code point) to be tested.
9828      * @return  {@code true} if the character may start a Java identifier;
9829      *          {@code false} otherwise.
9830      * @see     Character#isJavaIdentifierPart(int)
9831      * @see     Character#isLetter(int)
9832      * @see     Character#isUnicodeIdentifierStart(int)
9833      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9834      * @since   1.5
9835      */
9836     public static boolean isJavaIdentifierStart(int codePoint) {
9837         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9838     }
9839 
9840     /**
9841      * Determines if the specified character may be part of a Java
9842      * identifier as other than the first character.
9843      * <p>
9844      * A character may be part of a Java identifier if any of the following
9845      * conditions are true:
9846      * <ul>
9847      * <li>  it is a letter
9848      * <li>  it is a currency symbol (such as {@code '$'})
9849      * <li>  it is a connecting punctuation character (such as {@code '_'})
9850      * <li>  it is a digit
9851      * <li>  it is a numeric letter (such as a Roman numeral character)
9852      * <li>  it is a combining mark
9853      * <li>  it is a non-spacing mark
9854      * <li> {@code isIdentifierIgnorable} returns
9855      * {@code true} for the character
9856      * </ul>
9857      *
9858      * <p><b>Note:</b> This method cannot handle <a
9859      * href="#supplementary"> supplementary characters</a>. To support
9860      * all Unicode characters, including supplementary characters, use
9861      * the {@link #isJavaIdentifierPart(int)} method.
9862      *
9863      * @param   ch      the character to be tested.
9864      * @return {@code true} if the character may be part of a
9865      *          Java identifier; {@code false} otherwise.
9866      * @see     Character#isIdentifierIgnorable(char)
9867      * @see     Character#isJavaIdentifierStart(char)
9868      * @see     Character#isLetterOrDigit(char)
9869      * @see     Character#isUnicodeIdentifierPart(char)
9870      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9871      * @since   1.1
9872      */
9873     public static boolean isJavaIdentifierPart(char ch) {
9874         return isJavaIdentifierPart((int)ch);
9875     }
9876 
9877     /**
9878      * Determines if the character (Unicode code point) may be part of a Java
9879      * identifier as other than the first character.
9880      * <p>
9881      * A character may be part of a Java identifier if any of the following
9882      * conditions are true:
9883      * <ul>
9884      * <li>  it is a letter
9885      * <li>  it is a currency symbol (such as {@code '$'})
9886      * <li>  it is a connecting punctuation character (such as {@code '_'})
9887      * <li>  it is a digit
9888      * <li>  it is a numeric letter (such as a Roman numeral character)
9889      * <li>  it is a combining mark
9890      * <li>  it is a non-spacing mark
9891      * <li> {@link #isIdentifierIgnorable(int)
9892      * isIdentifierIgnorable(codePoint)} returns {@code true} for
9893      * the code point
9894      * </ul>
9895      *
9896      * @param   codePoint the character (Unicode code point) to be tested.
9897      * @return {@code true} if the character may be part of a
9898      *          Java identifier; {@code false} otherwise.
9899      * @see     Character#isIdentifierIgnorable(int)
9900      * @see     Character#isJavaIdentifierStart(int)
9901      * @see     Character#isLetterOrDigit(int)
9902      * @see     Character#isUnicodeIdentifierPart(int)
9903      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9904      * @since   1.5
9905      */
9906     public static boolean isJavaIdentifierPart(int codePoint) {
9907         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9908     }
9909 
9910     /**
9911      * Determines if the specified character is permissible as the
9912      * first character in a Unicode identifier.
9913      * <p>
9914      * A character may start a Unicode identifier if and only if
9915      * one of the following conditions is true:
9916      * <ul>
9917      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9918      * <li> {@link #getType(char) getType(ch)} returns
9919      *      {@code LETTER_NUMBER}.
9920      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
9921      *      {@code Other_ID_Start}</a> character.
9922      * </ul>
9923      * <p>
9924      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
9925      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
9926      * with the following profile of UAX31:
9927      * <pre>
9928      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
9929      * </pre>
9930      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
9931      * compatibility.
9932      *
9933      * <p><b>Note:</b> This method cannot handle <a
9934      * href="#supplementary"> supplementary characters</a>. To support
9935      * all Unicode characters, including supplementary characters, use
9936      * the {@link #isUnicodeIdentifierStart(int)} method.
9937      *
9938      * @param   ch      the character to be tested.
9939      * @return  {@code true} if the character may start a Unicode
9940      *          identifier; {@code false} otherwise.
9941      * @see     Character#isJavaIdentifierStart(char)
9942      * @see     Character#isLetter(char)
9943      * @see     Character#isUnicodeIdentifierPart(char)
9944      * @since   1.1
9945      */
9946     public static boolean isUnicodeIdentifierStart(char ch) {
9947         return isUnicodeIdentifierStart((int)ch);
9948     }
9949 
9950     /**
9951      * Determines if the specified character (Unicode code point) is permissible as the
9952      * first character in a Unicode identifier.
9953      * <p>
9954      * A character may start a Unicode identifier if and only if
9955      * one of the following conditions is true:
9956      * <ul>
9957      * <li> {@link #isLetter(int) isLetter(codePoint)}
9958      *      returns {@code true}
9959      * <li> {@link #getType(int) getType(codePoint)}
9960      *      returns {@code LETTER_NUMBER}.
9961      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
9962      *      {@code Other_ID_Start}</a> character.
9963      * </ul>
9964      * <p>
9965      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
9966      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
9967      * with the following profile of UAX31:
9968      * <pre>
9969      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
9970      * </pre>
9971      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
9972      * compatibility.
9973      *
9974      * @param   codePoint the character (Unicode code point) to be tested.
9975      * @return  {@code true} if the character may start a Unicode
9976      *          identifier; {@code false} otherwise.
9977      * @see     Character#isJavaIdentifierStart(int)
9978      * @see     Character#isLetter(int)
9979      * @see     Character#isUnicodeIdentifierPart(int)
9980      * @since   1.5
9981      */
9982     public static boolean isUnicodeIdentifierStart(int codePoint) {
9983         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9984     }
9985 
9986     /**
9987      * Determines if the specified character may be part of a Unicode
9988      * identifier as other than the first character.
9989      * <p>
9990      * A character may be part of a Unicode identifier if and only if
9991      * one of the following statements is true:
9992      * <ul>
9993      * <li>  it is a letter
9994      * <li>  it is a connecting punctuation character (such as {@code '_'})
9995      * <li>  it is a digit
9996      * <li>  it is a numeric letter (such as a Roman numeral character)
9997      * <li>  it is a combining mark
9998      * <li>  it is a non-spacing mark
9999      * <li> {@code isIdentifierIgnorable} returns
10000      * {@code true} for this character.
10001      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10002      *      {@code Other_ID_Start}</a> character.
10003      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10004      *      {@code Other_ID_Continue}</a> character.
10005      * </ul>
10006      * <p>
10007      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10008      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10009      * with the following profile of UAX31:
10010      * <pre>
10011      * Continue := Start + ID_Continue + ignorable
10012      * Medial := empty
10013      * ignorable := isIdentifierIgnorable(char) returns true for the character
10014      * </pre>
10015      * {@code ignorable} is added to {@code Continue} for backward
10016      * compatibility.
10017      *
10018      * <p><b>Note:</b> This method cannot handle <a
10019      * href="#supplementary"> supplementary characters</a>. To support
10020      * all Unicode characters, including supplementary characters, use
10021      * the {@link #isUnicodeIdentifierPart(int)} method.
10022      *
10023      * @param   ch      the character to be tested.
10024      * @return  {@code true} if the character may be part of a
10025      *          Unicode identifier; {@code false} otherwise.
10026      * @see     Character#isIdentifierIgnorable(char)
10027      * @see     Character#isJavaIdentifierPart(char)
10028      * @see     Character#isLetterOrDigit(char)
10029      * @see     Character#isUnicodeIdentifierStart(char)
10030      * @since   1.1
10031      */
10032     public static boolean isUnicodeIdentifierPart(char ch) {
10033         return isUnicodeIdentifierPart((int)ch);
10034     }
10035 
10036     /**
10037      * Determines if the specified character (Unicode code point) may be part of a Unicode
10038      * identifier as other than the first character.
10039      * <p>
10040      * A character may be part of a Unicode identifier if and only if
10041      * one of the following statements is true:
10042      * <ul>
10043      * <li>  it is a letter
10044      * <li>  it is a connecting punctuation character (such as {@code '_'})
10045      * <li>  it is a digit
10046      * <li>  it is a numeric letter (such as a Roman numeral character)
10047      * <li>  it is a combining mark
10048      * <li>  it is a non-spacing mark
10049      * <li> {@code isIdentifierIgnorable} returns
10050      * {@code true} for this character.
10051      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10052      *      {@code Other_ID_Start}</a> character.
10053      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10054      *      {@code Other_ID_Continue}</a> character.
10055      * </ul>
10056      * <p>
10057      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10058      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10059      * with the following profile of UAX31:
10060      * <pre>
10061      * Continue := Start + ID_Continue + ignorable
10062      * Medial := empty
10063      * ignorable := isIdentifierIgnorable(int) returns true for the character
10064      * </pre>
10065      * {@code ignorable} is added to {@code Continue} for backward
10066      * compatibility.
10067      *
10068      * @param   codePoint the character (Unicode code point) to be tested.
10069      * @return  {@code true} if the character may be part of a
10070      *          Unicode identifier; {@code false} otherwise.
10071      * @see     Character#isIdentifierIgnorable(int)
10072      * @see     Character#isJavaIdentifierPart(int)
10073      * @see     Character#isLetterOrDigit(int)
10074      * @see     Character#isUnicodeIdentifierStart(int)
10075      * @since   1.5
10076      */
10077     public static boolean isUnicodeIdentifierPart(int codePoint) {
10078         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
10079     }
10080 
10081     /**
10082      * Determines if the specified character should be regarded as
10083      * an ignorable character in a Java identifier or a Unicode identifier.
10084      * <p>
10085      * The following Unicode characters are ignorable in a Java identifier
10086      * or a Unicode identifier:
10087      * <ul>
10088      * <li>ISO control characters that are not whitespace
10089      * <ul>
10090      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10091      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10092      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10093      * </ul>
10094      *
10095      * <li>all characters that have the {@code FORMAT} general
10096      * category value
10097      * </ul>
10098      *
10099      * <p><b>Note:</b> This method cannot handle <a
10100      * href="#supplementary"> supplementary characters</a>. To support
10101      * all Unicode characters, including supplementary characters, use
10102      * the {@link #isIdentifierIgnorable(int)} method.
10103      *
10104      * @param   ch      the character to be tested.
10105      * @return  {@code true} if the character is an ignorable control
10106      *          character that may be part of a Java or Unicode identifier;
10107      *           {@code false} otherwise.
10108      * @see     Character#isJavaIdentifierPart(char)
10109      * @see     Character#isUnicodeIdentifierPart(char)
10110      * @since   1.1
10111      */
10112     public static boolean isIdentifierIgnorable(char ch) {
10113         return isIdentifierIgnorable((int)ch);
10114     }
10115 
10116     /**
10117      * Determines if the specified character (Unicode code point) should be regarded as
10118      * an ignorable character in a Java identifier or a Unicode identifier.
10119      * <p>
10120      * The following Unicode characters are ignorable in a Java identifier
10121      * or a Unicode identifier:
10122      * <ul>
10123      * <li>ISO control characters that are not whitespace
10124      * <ul>
10125      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10126      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10127      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10128      * </ul>
10129      *
10130      * <li>all characters that have the {@code FORMAT} general
10131      * category value
10132      * </ul>
10133      *
10134      * @param   codePoint the character (Unicode code point) to be tested.
10135      * @return  {@code true} if the character is an ignorable control
10136      *          character that may be part of a Java or Unicode identifier;
10137      *          {@code false} otherwise.
10138      * @see     Character#isJavaIdentifierPart(int)
10139      * @see     Character#isUnicodeIdentifierPart(int)
10140      * @since   1.5
10141      */
10142     public static boolean isIdentifierIgnorable(int codePoint) {
10143         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
10144     }
10145 
10146     /**
10147      * Converts the character argument to lowercase using case
10148      * mapping information from the UnicodeData file.
10149      * <p>
10150      * Note that
10151      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
10152      * does not always return {@code true} for some ranges of
10153      * characters, particularly those that are symbols or ideographs.
10154      *
10155      * <p>In general, {@link String#toLowerCase()} should be used to map
10156      * characters to lowercase. {@code String} case mapping methods
10157      * have several benefits over {@code Character} case mapping methods.
10158      * {@code String} case mapping methods can perform locale-sensitive
10159      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10160      * the {@code Character} case mapping methods cannot.
10161      *
10162      * <p><b>Note:</b> This method cannot handle <a
10163      * href="#supplementary"> supplementary characters</a>. To support
10164      * all Unicode characters, including supplementary characters, use
10165      * the {@link #toLowerCase(int)} method.
10166      *
10167      * @param   ch   the character to be converted.
10168      * @return  the lowercase equivalent of the character, if any;
10169      *          otherwise, the character itself.
10170      * @see     Character#isLowerCase(char)
10171      * @see     String#toLowerCase()
10172      */
10173     public static char toLowerCase(char ch) {
10174         return (char)toLowerCase((int)ch);
10175     }
10176 
10177     /**
10178      * Converts the character (Unicode code point) argument to
10179      * lowercase using case mapping information from the UnicodeData
10180      * file.
10181      *
10182      * <p> Note that
10183      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
10184      * does not always return {@code true} for some ranges of
10185      * characters, particularly those that are symbols or ideographs.
10186      *
10187      * <p>In general, {@link String#toLowerCase()} should be used to map
10188      * characters to lowercase. {@code String} case mapping methods
10189      * have several benefits over {@code Character} case mapping methods.
10190      * {@code String} case mapping methods can perform locale-sensitive
10191      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10192      * the {@code Character} case mapping methods cannot.
10193      *
10194      * @param   codePoint   the character (Unicode code point) to be converted.
10195      * @return  the lowercase equivalent of the character (Unicode code
10196      *          point), if any; otherwise, the character itself.
10197      * @see     Character#isLowerCase(int)
10198      * @see     String#toLowerCase()
10199      *
10200      * @since   1.5
10201      */
10202     public static int toLowerCase(int codePoint) {
10203         return CharacterData.of(codePoint).toLowerCase(codePoint);
10204     }
10205 
10206     /**
10207      * Converts the character argument to uppercase using case mapping
10208      * information from the UnicodeData file.
10209      * <p>
10210      * Note that
10211      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
10212      * does not always return {@code true} for some ranges of
10213      * characters, particularly those that are symbols or ideographs.
10214      *
10215      * <p>In general, {@link String#toUpperCase()} should be used to map
10216      * characters to uppercase. {@code String} case mapping methods
10217      * have several benefits over {@code Character} case mapping methods.
10218      * {@code String} case mapping methods can perform locale-sensitive
10219      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10220      * the {@code Character} case mapping methods cannot.
10221      *
10222      * <p><b>Note:</b> This method cannot handle <a
10223      * href="#supplementary"> supplementary characters</a>. To support
10224      * all Unicode characters, including supplementary characters, use
10225      * the {@link #toUpperCase(int)} method.
10226      *
10227      * @param   ch   the character to be converted.
10228      * @return  the uppercase equivalent of the character, if any;
10229      *          otherwise, the character itself.
10230      * @see     Character#isUpperCase(char)
10231      * @see     String#toUpperCase()
10232      */
10233     public static char toUpperCase(char ch) {
10234         return (char)toUpperCase((int)ch);
10235     }
10236 
10237     /**
10238      * Converts the character (Unicode code point) argument to
10239      * uppercase using case mapping information from the UnicodeData
10240      * file.
10241      *
10242      * <p>Note that
10243      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
10244      * does not always return {@code true} for some ranges of
10245      * characters, particularly those that are symbols or ideographs.
10246      *
10247      * <p>In general, {@link String#toUpperCase()} should be used to map
10248      * characters to uppercase. {@code String} case mapping methods
10249      * have several benefits over {@code Character} case mapping methods.
10250      * {@code String} case mapping methods can perform locale-sensitive
10251      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10252      * the {@code Character} case mapping methods cannot.
10253      *
10254      * @param   codePoint   the character (Unicode code point) to be converted.
10255      * @return  the uppercase equivalent of the character, if any;
10256      *          otherwise, the character itself.
10257      * @see     Character#isUpperCase(int)
10258      * @see     String#toUpperCase()
10259      *
10260      * @since   1.5
10261      */
10262     public static int toUpperCase(int codePoint) {
10263         return CharacterData.of(codePoint).toUpperCase(codePoint);
10264     }
10265 
10266     /**
10267      * Converts the character argument to titlecase using case mapping
10268      * information from the UnicodeData file. If a character has no
10269      * explicit titlecase mapping and is not itself a titlecase char
10270      * according to UnicodeData, then the uppercase mapping is
10271      * returned as an equivalent titlecase mapping. If the
10272      * {@code char} argument is already a titlecase
10273      * {@code char}, the same {@code char} value will be
10274      * returned.
10275      * <p>
10276      * Note that
10277      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
10278      * does not always return {@code true} for some ranges of
10279      * characters.
10280      *
10281      * <p><b>Note:</b> This method cannot handle <a
10282      * href="#supplementary"> supplementary characters</a>. To support
10283      * all Unicode characters, including supplementary characters, use
10284      * the {@link #toTitleCase(int)} method.
10285      *
10286      * @param   ch   the character to be converted.
10287      * @return  the titlecase equivalent of the character, if any;
10288      *          otherwise, the character itself.
10289      * @see     Character#isTitleCase(char)
10290      * @see     Character#toLowerCase(char)
10291      * @see     Character#toUpperCase(char)
10292      * @since   1.0.2
10293      */
10294     public static char toTitleCase(char ch) {
10295         return (char)toTitleCase((int)ch);
10296     }
10297 
10298     /**
10299      * Converts the character (Unicode code point) argument to titlecase using case mapping
10300      * information from the UnicodeData file. If a character has no
10301      * explicit titlecase mapping and is not itself a titlecase char
10302      * according to UnicodeData, then the uppercase mapping is
10303      * returned as an equivalent titlecase mapping. If the
10304      * character argument is already a titlecase
10305      * character, the same character value will be
10306      * returned.
10307      *
10308      * <p>Note that
10309      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
10310      * does not always return {@code true} for some ranges of
10311      * characters.
10312      *
10313      * @param   codePoint   the character (Unicode code point) to be converted.
10314      * @return  the titlecase equivalent of the character, if any;
10315      *          otherwise, the character itself.
10316      * @see     Character#isTitleCase(int)
10317      * @see     Character#toLowerCase(int)
10318      * @see     Character#toUpperCase(int)
10319      * @since   1.5
10320      */
10321     public static int toTitleCase(int codePoint) {
10322         return CharacterData.of(codePoint).toTitleCase(codePoint);
10323     }
10324 
10325     /**
10326      * Returns the numeric value of the character {@code ch} in the
10327      * specified radix.
10328      * <p>
10329      * If the radix is not in the range {@code MIN_RADIX} &le;
10330      * {@code radix} &le; {@code MAX_RADIX} or if the
10331      * value of {@code ch} is not a valid digit in the specified
10332      * radix, {@code -1} is returned. A character is a valid digit
10333      * if at least one of the following is true:
10334      * <ul>
10335      * <li>The method {@code isDigit} is {@code true} of the character
10336      *     and the Unicode decimal digit value of the character (or its
10337      *     single-character decomposition) is less than the specified radix.
10338      *     In this case the decimal digit value is returned.
10339      * <li>The character is one of the uppercase Latin letters
10340      *     {@code 'A'} through {@code 'Z'} and its code is less than
10341      *     {@code radix + 'A' - 10}.
10342      *     In this case, {@code ch - 'A' + 10}
10343      *     is returned.
10344      * <li>The character is one of the lowercase Latin letters
10345      *     {@code 'a'} through {@code 'z'} and its code is less than
10346      *     {@code radix + 'a' - 10}.
10347      *     In this case, {@code ch - 'a' + 10}
10348      *     is returned.
10349      * <li>The character is one of the fullwidth uppercase Latin letters A
10350      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10351      *     and its code is less than
10352      *     {@code radix + '\u005CuFF21' - 10}.
10353      *     In this case, {@code ch - '\u005CuFF21' + 10}
10354      *     is returned.
10355      * <li>The character is one of the fullwidth lowercase Latin letters a
10356      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10357      *     and its code is less than
10358      *     {@code radix + '\u005CuFF41' - 10}.
10359      *     In this case, {@code ch - '\u005CuFF41' + 10}
10360      *     is returned.
10361      * </ul>
10362      *
10363      * <p><b>Note:</b> This method cannot handle <a
10364      * href="#supplementary"> supplementary characters</a>. To support
10365      * all Unicode characters, including supplementary characters, use
10366      * the {@link #digit(int, int)} method.
10367      *
10368      * @param   ch      the character to be converted.
10369      * @param   radix   the radix.
10370      * @return  the numeric value represented by the character in the
10371      *          specified radix.
10372      * @see     Character#forDigit(int, int)
10373      * @see     Character#isDigit(char)
10374      */
10375     public static int digit(char ch, int radix) {
10376         return digit((int)ch, radix);
10377     }
10378 
10379     /**
10380      * Returns the numeric value of the specified character (Unicode
10381      * code point) in the specified radix.
10382      *
10383      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
10384      * {@code radix} &le; {@code MAX_RADIX} or if the
10385      * character is not a valid digit in the specified
10386      * radix, {@code -1} is returned. A character is a valid digit
10387      * if at least one of the following is true:
10388      * <ul>
10389      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
10390      *     and the Unicode decimal digit value of the character (or its
10391      *     single-character decomposition) is less than the specified radix.
10392      *     In this case the decimal digit value is returned.
10393      * <li>The character is one of the uppercase Latin letters
10394      *     {@code 'A'} through {@code 'Z'} and its code is less than
10395      *     {@code radix + 'A' - 10}.
10396      *     In this case, {@code codePoint - 'A' + 10}
10397      *     is returned.
10398      * <li>The character is one of the lowercase Latin letters
10399      *     {@code 'a'} through {@code 'z'} and its code is less than
10400      *     {@code radix + 'a' - 10}.
10401      *     In this case, {@code codePoint - 'a' + 10}
10402      *     is returned.
10403      * <li>The character is one of the fullwidth uppercase Latin letters A
10404      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10405      *     and its code is less than
10406      *     {@code radix + '\u005CuFF21' - 10}.
10407      *     In this case,
10408      *     {@code codePoint - '\u005CuFF21' + 10}
10409      *     is returned.
10410      * <li>The character is one of the fullwidth lowercase Latin letters a
10411      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10412      *     and its code is less than
10413      *     {@code radix + '\u005CuFF41'- 10}.
10414      *     In this case,
10415      *     {@code codePoint - '\u005CuFF41' + 10}
10416      *     is returned.
10417      * </ul>
10418      *
10419      * @param   codePoint the character (Unicode code point) to be converted.
10420      * @param   radix   the radix.
10421      * @return  the numeric value represented by the character in the
10422      *          specified radix.
10423      * @see     Character#forDigit(int, int)
10424      * @see     Character#isDigit(int)
10425      * @since   1.5
10426      */
10427     public static int digit(int codePoint, int radix) {
10428         return CharacterData.of(codePoint).digit(codePoint, radix);
10429     }
10430 
10431     /**
10432      * Returns the {@code int} value that the specified Unicode
10433      * character represents. For example, the character
10434      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
10435      * an int with a value of 50.
10436      * <p>
10437      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10438      * {@code '\u005Cu005A'}), lowercase
10439      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10440      * full width variant ({@code '\u005CuFF21'} through
10441      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10442      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10443      * through 35. This is independent of the Unicode specification,
10444      * which does not assign numeric values to these {@code char}
10445      * values.
10446      * <p>
10447      * If the character does not have a numeric value, then -1 is returned.
10448      * If the character has a numeric value that cannot be represented as a
10449      * nonnegative integer (for example, a fractional value), then -2
10450      * is returned.
10451      *
10452      * <p><b>Note:</b> This method cannot handle <a
10453      * href="#supplementary"> supplementary characters</a>. To support
10454      * all Unicode characters, including supplementary characters, use
10455      * the {@link #getNumericValue(int)} method.
10456      *
10457      * @param   ch      the character to be converted.
10458      * @return  the numeric value of the character, as a nonnegative {@code int}
10459      *          value; -2 if the character has a numeric value but the value
10460      *          can not be represented as a nonnegative {@code int} value;
10461      *          -1 if the character has no numeric value.
10462      * @see     Character#forDigit(int, int)
10463      * @see     Character#isDigit(char)
10464      * @since   1.1
10465      */
10466     public static int getNumericValue(char ch) {
10467         return getNumericValue((int)ch);
10468     }
10469 
10470     /**
10471      * Returns the {@code int} value that the specified
10472      * character (Unicode code point) represents. For example, the character
10473      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
10474      * an {@code int} with a value of 50.
10475      * <p>
10476      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10477      * {@code '\u005Cu005A'}), lowercase
10478      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10479      * full width variant ({@code '\u005CuFF21'} through
10480      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10481      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10482      * through 35. This is independent of the Unicode specification,
10483      * which does not assign numeric values to these {@code char}
10484      * values.
10485      * <p>
10486      * If the character does not have a numeric value, then -1 is returned.
10487      * If the character has a numeric value that cannot be represented as a
10488      * nonnegative integer (for example, a fractional value), then -2
10489      * is returned.
10490      *
10491      * @param   codePoint the character (Unicode code point) to be converted.
10492      * @return  the numeric value of the character, as a nonnegative {@code int}
10493      *          value; -2 if the character has a numeric value but the value
10494      *          can not be represented as a nonnegative {@code int} value;
10495      *          -1 if the character has no numeric value.
10496      * @see     Character#forDigit(int, int)
10497      * @see     Character#isDigit(int)
10498      * @since   1.5
10499      */
10500     public static int getNumericValue(int codePoint) {
10501         return CharacterData.of(codePoint).getNumericValue(codePoint);
10502     }
10503 
10504     /**
10505      * Determines if the specified character is ISO-LATIN-1 white space.
10506      * This method returns {@code true} for the following five
10507      * characters only:
10508      * <table class="striped">
10509      * <caption style="display:none">truechars</caption>
10510      * <thead>
10511      * <tr><th scope="col">Character
10512      *     <th scope="col">Code
10513      *     <th scope="col">Name
10514      * </thead>
10515      * <tbody>
10516      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
10517      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
10518      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
10519      *     <td>{@code NEW LINE}</td></tr>
10520      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
10521      *     <td>{@code FORM FEED}</td></tr>
10522      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
10523      *     <td>{@code CARRIAGE RETURN}</td></tr>
10524      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
10525      *     <td>{@code SPACE}</td></tr>
10526      * </tbody>
10527      * </table>
10528      *
10529      * @param      ch   the character to be tested.
10530      * @return     {@code true} if the character is ISO-LATIN-1 white
10531      *             space; {@code false} otherwise.
10532      * @see        Character#isSpaceChar(char)
10533      * @see        Character#isWhitespace(char)
10534      * @deprecated Replaced by isWhitespace(char).
10535      */
10536     @Deprecated(since="1.1")
10537     public static boolean isSpace(char ch) {
10538         return (ch <= 0x0020) &&
10539             (((((1L << 0x0009) |
10540             (1L << 0x000A) |
10541             (1L << 0x000C) |
10542             (1L << 0x000D) |
10543             (1L << 0x0020)) >> ch) & 1L) != 0);
10544     }
10545 
10546 
10547     /**
10548      * Determines if the specified character is a Unicode space character.
10549      * A character is considered to be a space character if and only if
10550      * it is specified to be a space character by the Unicode Standard. This
10551      * method returns true if the character's general category type is any of
10552      * the following:
10553      * <ul>
10554      * <li> {@code SPACE_SEPARATOR}
10555      * <li> {@code LINE_SEPARATOR}
10556      * <li> {@code PARAGRAPH_SEPARATOR}
10557      * </ul>
10558      *
10559      * <p><b>Note:</b> This method cannot handle <a
10560      * href="#supplementary"> supplementary characters</a>. To support
10561      * all Unicode characters, including supplementary characters, use
10562      * the {@link #isSpaceChar(int)} method.
10563      *
10564      * @param   ch      the character to be tested.
10565      * @return  {@code true} if the character is a space character;
10566      *          {@code false} otherwise.
10567      * @see     Character#isWhitespace(char)
10568      * @since   1.1
10569      */
10570     public static boolean isSpaceChar(char ch) {
10571         return isSpaceChar((int)ch);
10572     }
10573 
10574     /**
10575      * Determines if the specified character (Unicode code point) is a
10576      * Unicode space character.  A character is considered to be a
10577      * space character if and only if it is specified to be a space
10578      * character by the Unicode Standard. This method returns true if
10579      * the character's general category type is any of the following:
10580      *
10581      * <ul>
10582      * <li> {@link #SPACE_SEPARATOR}
10583      * <li> {@link #LINE_SEPARATOR}
10584      * <li> {@link #PARAGRAPH_SEPARATOR}
10585      * </ul>
10586      *
10587      * @param   codePoint the character (Unicode code point) to be tested.
10588      * @return  {@code true} if the character is a space character;
10589      *          {@code false} otherwise.
10590      * @see     Character#isWhitespace(int)
10591      * @since   1.5
10592      */
10593     public static boolean isSpaceChar(int codePoint) {
10594         return ((((1 << Character.SPACE_SEPARATOR) |
10595                   (1 << Character.LINE_SEPARATOR) |
10596                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10597             != 0;
10598     }
10599 
10600     /**
10601      * Determines if the specified character is white space according to Java.
10602      * A character is a Java whitespace character if and only if it satisfies
10603      * one of the following criteria:
10604      * <ul>
10605      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10606      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10607      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10608      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10609      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10610      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10611      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10612      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10613      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10614      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10615      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10616      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10617      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10618      * </ul>
10619      *
10620      * <p><b>Note:</b> This method cannot handle <a
10621      * href="#supplementary"> supplementary characters</a>. To support
10622      * all Unicode characters, including supplementary characters, use
10623      * the {@link #isWhitespace(int)} method.
10624      *
10625      * @param   ch the character to be tested.
10626      * @return  {@code true} if the character is a Java whitespace
10627      *          character; {@code false} otherwise.
10628      * @see     Character#isSpaceChar(char)
10629      * @since   1.1
10630      */
10631     public static boolean isWhitespace(char ch) {
10632         return isWhitespace((int)ch);
10633     }
10634 
10635     /**
10636      * Determines if the specified character (Unicode code point) is
10637      * white space according to Java.  A character is a Java
10638      * whitespace character if and only if it satisfies one of the
10639      * following criteria:
10640      * <ul>
10641      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10642      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10643      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10644      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10645      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10646      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10647      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10648      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10649      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10650      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10651      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10652      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10653      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10654      * </ul>
10655      *
10656      * @param   codePoint the character (Unicode code point) to be tested.
10657      * @return  {@code true} if the character is a Java whitespace
10658      *          character; {@code false} otherwise.
10659      * @see     Character#isSpaceChar(int)
10660      * @since   1.5
10661      */
10662     public static boolean isWhitespace(int codePoint) {
10663         return CharacterData.of(codePoint).isWhitespace(codePoint);
10664     }
10665 
10666     /**
10667      * Determines if the specified character is an ISO control
10668      * character.  A character is considered to be an ISO control
10669      * character if its code is in the range {@code '\u005Cu0000'}
10670      * through {@code '\u005Cu001F'} or in the range
10671      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10672      *
10673      * <p><b>Note:</b> This method cannot handle <a
10674      * href="#supplementary"> supplementary characters</a>. To support
10675      * all Unicode characters, including supplementary characters, use
10676      * the {@link #isISOControl(int)} method.
10677      *
10678      * @param   ch      the character to be tested.
10679      * @return  {@code true} if the character is an ISO control character;
10680      *          {@code false} otherwise.
10681      *
10682      * @see     Character#isSpaceChar(char)
10683      * @see     Character#isWhitespace(char)
10684      * @since   1.1
10685      */
10686     public static boolean isISOControl(char ch) {
10687         return isISOControl((int)ch);
10688     }
10689 
10690     /**
10691      * Determines if the referenced character (Unicode code point) is an ISO control
10692      * character.  A character is considered to be an ISO control
10693      * character if its code is in the range {@code '\u005Cu0000'}
10694      * through {@code '\u005Cu001F'} or in the range
10695      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10696      *
10697      * @param   codePoint the character (Unicode code point) to be tested.
10698      * @return  {@code true} if the character is an ISO control character;
10699      *          {@code false} otherwise.
10700      * @see     Character#isSpaceChar(int)
10701      * @see     Character#isWhitespace(int)
10702      * @since   1.5
10703      */
10704     public static boolean isISOControl(int codePoint) {
10705         // Optimized form of:
10706         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
10707         //     (codePoint >= 0x7F && codePoint <= 0x9F);
10708         return codePoint <= 0x9F &&
10709             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10710     }
10711 
10712     /**
10713      * Returns a value indicating a character's general category.
10714      *
10715      * <p><b>Note:</b> This method cannot handle <a
10716      * href="#supplementary"> supplementary characters</a>. To support
10717      * all Unicode characters, including supplementary characters, use
10718      * the {@link #getType(int)} method.
10719      *
10720      * @param   ch      the character to be tested.
10721      * @return  a value of type {@code int} representing the
10722      *          character's general category.
10723      * @see     Character#COMBINING_SPACING_MARK
10724      * @see     Character#CONNECTOR_PUNCTUATION
10725      * @see     Character#CONTROL
10726      * @see     Character#CURRENCY_SYMBOL
10727      * @see     Character#DASH_PUNCTUATION
10728      * @see     Character#DECIMAL_DIGIT_NUMBER
10729      * @see     Character#ENCLOSING_MARK
10730      * @see     Character#END_PUNCTUATION
10731      * @see     Character#FINAL_QUOTE_PUNCTUATION
10732      * @see     Character#FORMAT
10733      * @see     Character#INITIAL_QUOTE_PUNCTUATION
10734      * @see     Character#LETTER_NUMBER
10735      * @see     Character#LINE_SEPARATOR
10736      * @see     Character#LOWERCASE_LETTER
10737      * @see     Character#MATH_SYMBOL
10738      * @see     Character#MODIFIER_LETTER
10739      * @see     Character#MODIFIER_SYMBOL
10740      * @see     Character#NON_SPACING_MARK
10741      * @see     Character#OTHER_LETTER
10742      * @see     Character#OTHER_NUMBER
10743      * @see     Character#OTHER_PUNCTUATION
10744      * @see     Character#OTHER_SYMBOL
10745      * @see     Character#PARAGRAPH_SEPARATOR
10746      * @see     Character#PRIVATE_USE
10747      * @see     Character#SPACE_SEPARATOR
10748      * @see     Character#START_PUNCTUATION
10749      * @see     Character#SURROGATE
10750      * @see     Character#TITLECASE_LETTER
10751      * @see     Character#UNASSIGNED
10752      * @see     Character#UPPERCASE_LETTER
10753      * @since   1.1
10754      */
10755     public static int getType(char ch) {
10756         return getType((int)ch);
10757     }
10758 
10759     /**
10760      * Returns a value indicating a character's general category.
10761      *
10762      * @param   codePoint the character (Unicode code point) to be tested.
10763      * @return  a value of type {@code int} representing the
10764      *          character's general category.
10765      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10766      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10767      * @see     Character#CONTROL CONTROL
10768      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10769      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
10770      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10771      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
10772      * @see     Character#END_PUNCTUATION END_PUNCTUATION
10773      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10774      * @see     Character#FORMAT FORMAT
10775      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10776      * @see     Character#LETTER_NUMBER LETTER_NUMBER
10777      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
10778      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
10779      * @see     Character#MATH_SYMBOL MATH_SYMBOL
10780      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
10781      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10782      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
10783      * @see     Character#OTHER_LETTER OTHER_LETTER
10784      * @see     Character#OTHER_NUMBER OTHER_NUMBER
10785      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10786      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
10787      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10788      * @see     Character#PRIVATE_USE PRIVATE_USE
10789      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
10790      * @see     Character#START_PUNCTUATION START_PUNCTUATION
10791      * @see     Character#SURROGATE SURROGATE
10792      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
10793      * @see     Character#UNASSIGNED UNASSIGNED
10794      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
10795      * @since   1.5
10796      */
10797     public static int getType(int codePoint) {
10798         return CharacterData.of(codePoint).getType(codePoint);
10799     }
10800 
10801     /**
10802      * Determines the character representation for a specific digit in
10803      * the specified radix. If the value of {@code radix} is not a
10804      * valid radix, or the value of {@code digit} is not a valid
10805      * digit in the specified radix, the null character
10806      * ({@code '\u005Cu0000'}) is returned.
10807      * <p>
10808      * The {@code radix} argument is valid if it is greater than or
10809      * equal to {@code MIN_RADIX} and less than or equal to
10810      * {@code MAX_RADIX}. The {@code digit} argument is valid if
10811      * {@code 0 <= digit < radix}.
10812      * <p>
10813      * If the digit is less than 10, then
10814      * {@code '0' + digit} is returned. Otherwise, the value
10815      * {@code 'a' + digit - 10} is returned.
10816      *
10817      * @param   digit   the number to convert to a character.
10818      * @param   radix   the radix.
10819      * @return  the {@code char} representation of the specified digit
10820      *          in the specified radix.
10821      * @see     Character#MIN_RADIX
10822      * @see     Character#MAX_RADIX
10823      * @see     Character#digit(char, int)
10824      */
10825     public static char forDigit(int digit, int radix) {
10826         if ((digit >= radix) || (digit < 0)) {
10827             return '\0';
10828         }
10829         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10830             return '\0';
10831         }
10832         if (digit < 10) {
10833             return (char)('0' + digit);
10834         }
10835         return (char)('a' - 10 + digit);
10836     }
10837 
10838     /**
10839      * Returns the Unicode directionality property for the given
10840      * character.  Character directionality is used to calculate the
10841      * visual ordering of text. The directionality value of undefined
10842      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10843      *
10844      * <p><b>Note:</b> This method cannot handle <a
10845      * href="#supplementary"> supplementary characters</a>. To support
10846      * all Unicode characters, including supplementary characters, use
10847      * the {@link #getDirectionality(int)} method.
10848      *
10849      * @param  ch {@code char} for which the directionality property
10850      *            is requested.
10851      * @return the directionality property of the {@code char} value.
10852      *
10853      * @see Character#DIRECTIONALITY_UNDEFINED
10854      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10855      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10856      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10857      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10858      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10859      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10860      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10861      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10862      * @see Character#DIRECTIONALITY_NONSPACING_MARK
10863      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10864      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10865      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10866      * @see Character#DIRECTIONALITY_WHITESPACE
10867      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10868      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10869      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10870      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10871      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10872      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10873      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10874      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10875      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10876      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10877      * @since 1.4
10878      */
10879     public static byte getDirectionality(char ch) {
10880         return getDirectionality((int)ch);
10881     }
10882 
10883     /**
10884      * Returns the Unicode directionality property for the given
10885      * character (Unicode code point).  Character directionality is
10886      * used to calculate the visual ordering of text. The
10887      * directionality value of undefined character is {@link
10888      * #DIRECTIONALITY_UNDEFINED}.
10889      *
10890      * @param   codePoint the character (Unicode code point) for which
10891      *          the directionality property is requested.
10892      * @return the directionality property of the character.
10893      *
10894      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10895      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10896      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10897      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10898      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10899      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10900      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10901      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10902      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10903      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10904      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10905      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10906      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10907      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10908      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10909      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10910      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10911      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10912      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10913      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10914      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10915      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10916      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10917      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10918      * @since    1.5
10919      */
10920     public static byte getDirectionality(int codePoint) {
10921         return CharacterData.of(codePoint).getDirectionality(codePoint);
10922     }
10923 
10924     /**
10925      * Determines whether the character is mirrored according to the
10926      * Unicode specification.  Mirrored characters should have their
10927      * glyphs horizontally mirrored when displayed in text that is
10928      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
10929      * PARENTHESIS is semantically defined to be an <i>opening
10930      * parenthesis</i>.  This will appear as a "(" in text that is
10931      * left-to-right but as a ")" in text that is right-to-left.
10932      *
10933      * <p><b>Note:</b> This method cannot handle <a
10934      * href="#supplementary"> supplementary characters</a>. To support
10935      * all Unicode characters, including supplementary characters, use
10936      * the {@link #isMirrored(int)} method.
10937      *
10938      * @param  ch {@code char} for which the mirrored property is requested
10939      * @return {@code true} if the char is mirrored, {@code false}
10940      *         if the {@code char} is not mirrored or is not defined.
10941      * @since 1.4
10942      */
10943     public static boolean isMirrored(char ch) {
10944         return isMirrored((int)ch);
10945     }
10946 
10947     /**
10948      * Determines whether the specified character (Unicode code point)
10949      * is mirrored according to the Unicode specification.  Mirrored
10950      * characters should have their glyphs horizontally mirrored when
10951      * displayed in text that is right-to-left.  For example,
10952      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10953      * defined to be an <i>opening parenthesis</i>.  This will appear
10954      * as a "(" in text that is left-to-right but as a ")" in text
10955      * that is right-to-left.
10956      *
10957      * @param   codePoint the character (Unicode code point) to be tested.
10958      * @return  {@code true} if the character is mirrored, {@code false}
10959      *          if the character is not mirrored or is not defined.
10960      * @since   1.5
10961      */
10962     public static boolean isMirrored(int codePoint) {
10963         return CharacterData.of(codePoint).isMirrored(codePoint);
10964     }
10965 
10966     /**
10967      * Compares two {@code Character} objects numerically.
10968      *
10969      * @param   anotherCharacter   the {@code Character} to be compared.
10970 
10971      * @return  the value {@code 0} if the argument {@code Character}
10972      *          is equal to this {@code Character}; a value less than
10973      *          {@code 0} if this {@code Character} is numerically less
10974      *          than the {@code Character} argument; and a value greater than
10975      *          {@code 0} if this {@code Character} is numerically greater
10976      *          than the {@code Character} argument (unsigned comparison).
10977      *          Note that this is strictly a numerical comparison; it is not
10978      *          locale-dependent.
10979      * @since   1.2
10980      */
10981     public int compareTo(Character anotherCharacter) {
10982         return compare(this.value, anotherCharacter.value);
10983     }
10984 
10985     /**
10986      * Compares two {@code char} values numerically.
10987      * The value returned is identical to what would be returned by:
10988      * <pre>
10989      *    Character.valueOf(x).compareTo(Character.valueOf(y))
10990      * </pre>
10991      *
10992      * @param  x the first {@code char} to compare
10993      * @param  y the second {@code char} to compare
10994      * @return the value {@code 0} if {@code x == y};
10995      *         a value less than {@code 0} if {@code x < y}; and
10996      *         a value greater than {@code 0} if {@code x > y}
10997      * @since 1.7
10998      */
10999     public static int compare(char x, char y) {
11000         return x - y;
11001     }
11002 
11003     /**
11004      * Converts the character (Unicode code point) argument to uppercase using
11005      * information from the UnicodeData file.
11006      *
11007      * @param   codePoint   the character (Unicode code point) to be converted.
11008      * @return  either the uppercase equivalent of the character, if
11009      *          any, or an error flag ({@code Character.ERROR})
11010      *          that indicates that a 1:M {@code char} mapping exists.
11011      * @see     Character#isLowerCase(char)
11012      * @see     Character#isUpperCase(char)
11013      * @see     Character#toLowerCase(char)
11014      * @see     Character#toTitleCase(char)
11015      * @since 1.4
11016      */
11017     static int toUpperCaseEx(int codePoint) {
11018         assert isValidCodePoint(codePoint);
11019         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
11020     }
11021 
11022     /**
11023      * Converts the character (Unicode code point) argument to uppercase using case
11024      * mapping information from the SpecialCasing file in the Unicode
11025      * specification. If a character has no explicit uppercase
11026      * mapping, then the {@code char} itself is returned in the
11027      * {@code char[]}.
11028      *
11029      * @param   codePoint   the character (Unicode code point) to be converted.
11030      * @return a {@code char[]} with the uppercased character.
11031      * @since 1.4
11032      */
11033     static char[] toUpperCaseCharArray(int codePoint) {
11034         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
11035         assert isBmpCodePoint(codePoint);
11036         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
11037     }
11038 
11039     /**
11040      * The number of bits used to represent a {@code char} value in unsigned
11041      * binary form, constant {@code 16}.
11042      *
11043      * @since 1.5
11044      */
11045     public static final int SIZE = 16;
11046 
11047     /**
11048      * The number of bytes used to represent a {@code char} value in unsigned
11049      * binary form.
11050      *
11051      * @since 1.8
11052      */
11053     public static final int BYTES = SIZE / Byte.SIZE;
11054 
11055     /**
11056      * Returns the value obtained by reversing the order of the bytes in the
11057      * specified {@code char} value.
11058      *
11059      * @param ch The {@code char} of which to reverse the byte order.
11060      * @return the value obtained by reversing (or, equivalently, swapping)
11061      *     the bytes in the specified {@code char} value.
11062      * @since 1.5
11063      */
11064     @HotSpotIntrinsicCandidate
11065     public static char reverseBytes(char ch) {
11066         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
11067     }
11068 
11069     /**
11070      * Returns the Unicode name of the specified character
11071      * {@code codePoint}, or null if the code point is
11072      * {@link #UNASSIGNED unassigned}.
11073      * <p>
11074      * Note: if the specified character is not assigned a name by
11075      * the <i>UnicodeData</i> file (part of the Unicode Character
11076      * Database maintained by the Unicode Consortium), the returned
11077      * name is the same as the result of expression.
11078      *
11079      * <blockquote>{@code
11080      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11081      *     + " "
11082      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11083      *
11084      * }</blockquote>
11085      *
11086      * @param  codePoint the character (Unicode code point)
11087      *
11088      * @return the Unicode name of the specified character, or null if
11089      *         the code point is unassigned.
11090      *
11091      * @throws IllegalArgumentException if the specified
11092      *            {@code codePoint} is not a valid Unicode
11093      *            code point.
11094      *
11095      * @since 1.7
11096      */
11097     public static String getName(int codePoint) {
11098         if (!isValidCodePoint(codePoint)) {
11099             throw new IllegalArgumentException(
11100                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
11101         }
11102         String name = CharacterName.getInstance().getName(codePoint);
11103         if (name != null)
11104             return name;
11105         if (getType(codePoint) == UNASSIGNED)
11106             return null;
11107         UnicodeBlock block = UnicodeBlock.of(codePoint);
11108         if (block != null)
11109             return block.toString().replace('_', ' ') + " "
11110                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11111         // should never come here
11112         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11113     }
11114 
11115     /**
11116      * Returns the code point value of the Unicode character specified by
11117      * the given Unicode character name.
11118      * <p>
11119      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
11120      * file (part of the Unicode Character Database maintained by the Unicode
11121      * Consortium), its name is defined as the result of expression
11122      *
11123      * <blockquote>{@code
11124      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11125      *     + " "
11126      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11127      *
11128      * }</blockquote>
11129      * <p>
11130      * The {@code name} matching is case insensitive, with any leading and
11131      * trailing whitespace character removed.
11132      *
11133      * @param  name the Unicode character name
11134      *
11135      * @return the code point value of the character specified by its name.
11136      *
11137      * @throws IllegalArgumentException if the specified {@code name}
11138      *         is not a valid Unicode character name.
11139      * @throws NullPointerException if {@code name} is {@code null}
11140      *
11141      * @since 9
11142      */
11143     public static int codePointOf(String name) {
11144         name = name.trim().toUpperCase(Locale.ROOT);
11145         int cp = CharacterName.getInstance().getCodePoint(name);
11146         if (cp != -1)
11147             return cp;
11148         try {
11149             int off = name.lastIndexOf(' ');
11150             if (off != -1) {
11151                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
11152                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
11153                     return cp;
11154             }
11155         } catch (Exception x) {}
11156         throw new IllegalArgumentException("Unrecognized character name :" + name);
11157     }
11158 }