1 /*
   2  * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 10.0.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  *
  59  * <h3><a id="unicode">Unicode Character Representations</a></h3>
  60  *
  61  * <p>The {@code char} data type (and therefore the value that a
  62  * {@code Character} object encapsulates) are based on the
  63  * original Unicode specification, which defined characters as
  64  * fixed-width 16-bit entities. The Unicode Standard has since been
  65  * changed to allow for characters whose representation requires more
  66  * than 16 bits.  The range of legal <em>code point</em>s is now
  67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  68  * (Refer to the <a
  69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  71  * Standard.)
  72  *
  73  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
  74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  75  * <a id="supplementary">Characters</a> whose code points are greater
  76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  77  * platform uses the UTF-16 representation in {@code char} arrays and
  78  * in the {@code String} and {@code StringBuffer} classes. In
  79  * this representation, supplementary characters are represented as a pair
  80  * of {@code char} values, the first from the <em>high-surrogates</em>
  81  * range, (\uD800-\uDBFF), the second from the
  82  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  83  *
  84  * <p>A {@code char} value, therefore, represents Basic
  85  * Multilingual Plane (BMP) code points, including the surrogate
  86  * code points, or code units of the UTF-16 encoding. An
  87  * {@code int} value represents all Unicode code points,
  88  * including supplementary code points. The lower (least significant)
  89  * 21 bits of {@code int} are used to represent Unicode code
  90  * points and the upper (most significant) 11 bits must be zero.
  91  * Unless otherwise specified, the behavior with respect to
  92  * supplementary characters and surrogate {@code char} values is
  93  * as follows:
  94  *
  95  * <ul>
  96  * <li>The methods that only accept a {@code char} value cannot support
  97  * supplementary characters. They treat {@code char} values from the
  98  * surrogate ranges as undefined characters. For example,
  99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 100  * this specific value if followed by any low-surrogate value in a string
 101  * would represent a letter.
 102  *
 103  * <li>The methods that accept an {@code int} value support all
 104  * Unicode characters, including supplementary characters. For
 105  * example, {@code Character.isLetter(0x2F81A)} returns
 106  * {@code true} because the code point value represents a letter
 107  * (a CJK ideograph).
 108  * </ul>
 109  *
 110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 111  * used for character values in the range between U+0000 and U+10FFFF,
 112  * and <em>Unicode code unit</em> is used for 16-bit
 113  * {@code char} values that are code units of the <em>UTF-16</em>
 114  * encoding. For more information on Unicode terminology, refer to the
 115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 116  *
 117  * @author  Lee Boynton
 118  * @author  Guy Steele
 119  * @author  Akira Tanaka
 120  * @author  Martin Buchholz
 121  * @author  Ulf Zibis
 122  * @since   1.0
 123  */
 124 public final
 125 class Character implements java.io.Serializable, Comparable<Character> {
 126     /**
 127      * The minimum radix available for conversion to and from strings.
 128      * The constant value of this field is the smallest value permitted
 129      * for the radix argument in radix-conversion methods such as the
 130      * {@code digit} method, the {@code forDigit} method, and the
 131      * {@code toString} method of class {@code Integer}.
 132      *
 133      * @see     Character#digit(char, int)
 134      * @see     Character#forDigit(int, int)
 135      * @see     Integer#toString(int, int)
 136      * @see     Integer#valueOf(String)
 137      */
 138     public static final int MIN_RADIX = 2;
 139 
 140     /**
 141      * The maximum radix available for conversion to and from strings.
 142      * The constant value of this field is the largest value permitted
 143      * for the radix argument in radix-conversion methods such as the
 144      * {@code digit} method, the {@code forDigit} method, and the
 145      * {@code toString} method of class {@code Integer}.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * {@code char}, {@code '\u005Cu0000'}.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * {@code char}, {@code '\u005CuFFFF'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The {@code Class} instance representing the primitive type
 172      * {@code char}.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined {@code char}
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * Weak bidirectional character type "LRI" in the Unicode specification.
 496      * @since 9
 497      */
 498     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 499 
 500     /**
 501      * Weak bidirectional character type "RLI" in the Unicode specification.
 502      * @since 9
 503      */
 504     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 505 
 506     /**
 507      * Weak bidirectional character type "FSI" in the Unicode specification.
 508      * @since 9
 509      */
 510     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 511 
 512     /**
 513      * Weak bidirectional character type "PDI" in the Unicode specification.
 514      * @since 9
 515      */
 516     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 517 
 518     /**
 519      * The minimum value of a
 520      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 521      * Unicode high-surrogate code unit</a>
 522      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 523      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a
 531      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 532      * Unicode high-surrogate code unit</a>
 533      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 534      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 535      *
 536      * @since 1.5
 537      */
 538     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 539 
 540     /**
 541      * The minimum value of a
 542      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 543      * Unicode low-surrogate code unit</a>
 544      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 545      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 546      *
 547      * @since 1.5
 548      */
 549     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 550 
 551     /**
 552      * The maximum value of a
 553      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 554      * Unicode low-surrogate code unit</a>
 555      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 556      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 557      *
 558      * @since 1.5
 559      */
 560     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 561 
 562     /**
 563      * The minimum value of a Unicode surrogate code unit in the
 564      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 569 
 570     /**
 571      * The maximum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 577 
 578     /**
 579      * The minimum value of a
 580      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 581      * Unicode supplementary code point</a>, constant {@code U+10000}.
 582      *
 583      * @since 1.5
 584      */
 585     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 586 
 587     /**
 588      * The minimum value of a
 589      * <a href="http://www.unicode.org/glossary/#code_point">
 590      * Unicode code point</a>, constant {@code U+0000}.
 591      *
 592      * @since 1.5
 593      */
 594     public static final int MIN_CODE_POINT = 0x000000;
 595 
 596     /**
 597      * The maximum value of a
 598      * <a href="http://www.unicode.org/glossary/#code_point">
 599      * Unicode code point</a>, constant {@code U+10FFFF}.
 600      *
 601      * @since 1.5
 602      */
 603     public static final int MAX_CODE_POINT = 0X10FFFF;
 604 
 605 
 606     /**
 607      * Instances of this class represent particular subsets of the Unicode
 608      * character set.  The only family of subsets defined in the
 609      * {@code Character} class is {@link Character.UnicodeBlock}.
 610      * Other portions of the Java API may define other subsets for their
 611      * own purposes.
 612      *
 613      * @since 1.2
 614      */
 615     public static class Subset  {
 616 
 617         private String name;
 618 
 619         /**
 620          * Constructs a new {@code Subset} instance.
 621          *
 622          * @param  name  The name of this subset
 623          * @throws NullPointerException if name is {@code null}
 624          */
 625         protected Subset(String name) {
 626             if (name == null) {
 627                 throw new NullPointerException("name");
 628             }
 629             this.name = name;
 630         }
 631 
 632         /**
 633          * Compares two {@code Subset} objects for equality.
 634          * This method returns {@code true} if and only if
 635          * {@code this} and the argument refer to the same
 636          * object; since this method is {@code final}, this
 637          * guarantee holds for all subclasses.
 638          */
 639         public final boolean equals(Object obj) {
 640             return (this == obj);
 641         }
 642 
 643         /**
 644          * Returns the standard hash code as defined by the
 645          * {@link Object#hashCode} method.  This method
 646          * is {@code final} in order to ensure that the
 647          * {@code equals} and {@code hashCode} methods will
 648          * be consistent in all subclasses.
 649          */
 650         public final int hashCode() {
 651             return super.hashCode();
 652         }
 653 
 654         /**
 655          * Returns the name of this subset.
 656          */
 657         public final String toString() {
 658             return name;
 659         }
 660     }
 661 
 662     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 663     // for the latest specification of Unicode Blocks.
 664 
 665     /**
 666      * A family of character subsets representing the character blocks in the
 667      * Unicode specification. Character blocks generally define characters
 668      * used for a specific script or purpose. A character is contained by
 669      * at most one Unicode block.
 670      *
 671      * @since 1.2
 672      */
 673     public static final class UnicodeBlock extends Subset {
 674         /**
 675          * 638  - the expected number of entities
 676          * 0.75 - the default load factor of HashMap
 677          */
 678         private static Map<String, UnicodeBlock> map =
 679                 new HashMap<>((int)(638 / 0.75f + 1.0f));
 680 
 681         /**
 682          * Creates a UnicodeBlock with the given identifier name.
 683          * This name must be the same as the block identifier.
 684          */
 685         private UnicodeBlock(String idName) {
 686             super(idName);
 687             map.put(idName, this);
 688         }
 689 
 690         /**
 691          * Creates a UnicodeBlock with the given identifier name and
 692          * alias name.
 693          */
 694         private UnicodeBlock(String idName, String alias) {
 695             this(idName);
 696             map.put(alias, this);
 697         }
 698 
 699         /**
 700          * Creates a UnicodeBlock with the given identifier name and
 701          * alias names.
 702          */
 703         private UnicodeBlock(String idName, String... aliases) {
 704             this(idName);
 705             for (String alias : aliases)
 706                 map.put(alias, this);
 707         }
 708 
 709         /**
 710          * Constant for the "Basic Latin" Unicode character block.
 711          * @since 1.2
 712          */
 713         public static final UnicodeBlock  BASIC_LATIN =
 714             new UnicodeBlock("BASIC_LATIN",
 715                              "BASIC LATIN",
 716                              "BASICLATIN");
 717 
 718         /**
 719          * Constant for the "Latin-1 Supplement" Unicode character block.
 720          * @since 1.2
 721          */
 722         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 723             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 724                              "LATIN-1 SUPPLEMENT",
 725                              "LATIN-1SUPPLEMENT");
 726 
 727         /**
 728          * Constant for the "Latin Extended-A" Unicode character block.
 729          * @since 1.2
 730          */
 731         public static final UnicodeBlock LATIN_EXTENDED_A =
 732             new UnicodeBlock("LATIN_EXTENDED_A",
 733                              "LATIN EXTENDED-A",
 734                              "LATINEXTENDED-A");
 735 
 736         /**
 737          * Constant for the "Latin Extended-B" Unicode character block.
 738          * @since 1.2
 739          */
 740         public static final UnicodeBlock LATIN_EXTENDED_B =
 741             new UnicodeBlock("LATIN_EXTENDED_B",
 742                              "LATIN EXTENDED-B",
 743                              "LATINEXTENDED-B");
 744 
 745         /**
 746          * Constant for the "IPA Extensions" Unicode character block.
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock IPA_EXTENSIONS =
 750             new UnicodeBlock("IPA_EXTENSIONS",
 751                              "IPA EXTENSIONS",
 752                              "IPAEXTENSIONS");
 753 
 754         /**
 755          * Constant for the "Spacing Modifier Letters" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 759             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 760                              "SPACING MODIFIER LETTERS",
 761                              "SPACINGMODIFIERLETTERS");
 762 
 763         /**
 764          * Constant for the "Combining Diacritical Marks" Unicode character block.
 765          * @since 1.2
 766          */
 767         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 768             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 769                              "COMBINING DIACRITICAL MARKS",
 770                              "COMBININGDIACRITICALMARKS");
 771 
 772         /**
 773          * Constant for the "Greek and Coptic" Unicode character block.
 774          * <p>
 775          * This block was previously known as the "Greek" block.
 776          *
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock GREEK =
 780             new UnicodeBlock("GREEK",
 781                              "GREEK AND COPTIC",
 782                              "GREEKANDCOPTIC");
 783 
 784         /**
 785          * Constant for the "Cyrillic" Unicode character block.
 786          * @since 1.2
 787          */
 788         public static final UnicodeBlock CYRILLIC =
 789             new UnicodeBlock("CYRILLIC");
 790 
 791         /**
 792          * Constant for the "Armenian" Unicode character block.
 793          * @since 1.2
 794          */
 795         public static final UnicodeBlock ARMENIAN =
 796             new UnicodeBlock("ARMENIAN");
 797 
 798         /**
 799          * Constant for the "Hebrew" Unicode character block.
 800          * @since 1.2
 801          */
 802         public static final UnicodeBlock HEBREW =
 803             new UnicodeBlock("HEBREW");
 804 
 805         /**
 806          * Constant for the "Arabic" Unicode character block.
 807          * @since 1.2
 808          */
 809         public static final UnicodeBlock ARABIC =
 810             new UnicodeBlock("ARABIC");
 811 
 812         /**
 813          * Constant for the "Devanagari" Unicode character block.
 814          * @since 1.2
 815          */
 816         public static final UnicodeBlock DEVANAGARI =
 817             new UnicodeBlock("DEVANAGARI");
 818 
 819         /**
 820          * Constant for the "Bengali" Unicode character block.
 821          * @since 1.2
 822          */
 823         public static final UnicodeBlock BENGALI =
 824             new UnicodeBlock("BENGALI");
 825 
 826         /**
 827          * Constant for the "Gurmukhi" Unicode character block.
 828          * @since 1.2
 829          */
 830         public static final UnicodeBlock GURMUKHI =
 831             new UnicodeBlock("GURMUKHI");
 832 
 833         /**
 834          * Constant for the "Gujarati" Unicode character block.
 835          * @since 1.2
 836          */
 837         public static final UnicodeBlock GUJARATI =
 838             new UnicodeBlock("GUJARATI");
 839 
 840         /**
 841          * Constant for the "Oriya" Unicode character block.
 842          * @since 1.2
 843          */
 844         public static final UnicodeBlock ORIYA =
 845             new UnicodeBlock("ORIYA");
 846 
 847         /**
 848          * Constant for the "Tamil" Unicode character block.
 849          * @since 1.2
 850          */
 851         public static final UnicodeBlock TAMIL =
 852             new UnicodeBlock("TAMIL");
 853 
 854         /**
 855          * Constant for the "Telugu" Unicode character block.
 856          * @since 1.2
 857          */
 858         public static final UnicodeBlock TELUGU =
 859             new UnicodeBlock("TELUGU");
 860 
 861         /**
 862          * Constant for the "Kannada" Unicode character block.
 863          * @since 1.2
 864          */
 865         public static final UnicodeBlock KANNADA =
 866             new UnicodeBlock("KANNADA");
 867 
 868         /**
 869          * Constant for the "Malayalam" Unicode character block.
 870          * @since 1.2
 871          */
 872         public static final UnicodeBlock MALAYALAM =
 873             new UnicodeBlock("MALAYALAM");
 874 
 875         /**
 876          * Constant for the "Thai" Unicode character block.
 877          * @since 1.2
 878          */
 879         public static final UnicodeBlock THAI =
 880             new UnicodeBlock("THAI");
 881 
 882         /**
 883          * Constant for the "Lao" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LAO =
 887             new UnicodeBlock("LAO");
 888 
 889         /**
 890          * Constant for the "Tibetan" Unicode character block.
 891          * @since 1.2
 892          */
 893         public static final UnicodeBlock TIBETAN =
 894             new UnicodeBlock("TIBETAN");
 895 
 896         /**
 897          * Constant for the "Georgian" Unicode character block.
 898          * @since 1.2
 899          */
 900         public static final UnicodeBlock GEORGIAN =
 901             new UnicodeBlock("GEORGIAN");
 902 
 903         /**
 904          * Constant for the "Hangul Jamo" Unicode character block.
 905          * @since 1.2
 906          */
 907         public static final UnicodeBlock HANGUL_JAMO =
 908             new UnicodeBlock("HANGUL_JAMO",
 909                              "HANGUL JAMO",
 910                              "HANGULJAMO");
 911 
 912         /**
 913          * Constant for the "Latin Extended Additional" Unicode character block.
 914          * @since 1.2
 915          */
 916         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 917             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 918                              "LATIN EXTENDED ADDITIONAL",
 919                              "LATINEXTENDEDADDITIONAL");
 920 
 921         /**
 922          * Constant for the "Greek Extended" Unicode character block.
 923          * @since 1.2
 924          */
 925         public static final UnicodeBlock GREEK_EXTENDED =
 926             new UnicodeBlock("GREEK_EXTENDED",
 927                              "GREEK EXTENDED",
 928                              "GREEKEXTENDED");
 929 
 930         /**
 931          * Constant for the "General Punctuation" Unicode character block.
 932          * @since 1.2
 933          */
 934         public static final UnicodeBlock GENERAL_PUNCTUATION =
 935             new UnicodeBlock("GENERAL_PUNCTUATION",
 936                              "GENERAL PUNCTUATION",
 937                              "GENERALPUNCTUATION");
 938 
 939         /**
 940          * Constant for the "Superscripts and Subscripts" Unicode character
 941          * block.
 942          * @since 1.2
 943          */
 944         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 945             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 946                              "SUPERSCRIPTS AND SUBSCRIPTS",
 947                              "SUPERSCRIPTSANDSUBSCRIPTS");
 948 
 949         /**
 950          * Constant for the "Currency Symbols" Unicode character block.
 951          * @since 1.2
 952          */
 953         public static final UnicodeBlock CURRENCY_SYMBOLS =
 954             new UnicodeBlock("CURRENCY_SYMBOLS",
 955                              "CURRENCY SYMBOLS",
 956                              "CURRENCYSYMBOLS");
 957 
 958         /**
 959          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 960          * character block.
 961          * <p>
 962          * This block was previously known as "Combining Marks for Symbols".
 963          * @since 1.2
 964          */
 965         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 966             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 967                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 968                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 969                              "COMBINING MARKS FOR SYMBOLS",
 970                              "COMBININGMARKSFORSYMBOLS");
 971 
 972         /**
 973          * Constant for the "Letterlike Symbols" Unicode character block.
 974          * @since 1.2
 975          */
 976         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 977             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 978                              "LETTERLIKE SYMBOLS",
 979                              "LETTERLIKESYMBOLS");
 980 
 981         /**
 982          * Constant for the "Number Forms" Unicode character block.
 983          * @since 1.2
 984          */
 985         public static final UnicodeBlock NUMBER_FORMS =
 986             new UnicodeBlock("NUMBER_FORMS",
 987                              "NUMBER FORMS",
 988                              "NUMBERFORMS");
 989 
 990         /**
 991          * Constant for the "Arrows" Unicode character block.
 992          * @since 1.2
 993          */
 994         public static final UnicodeBlock ARROWS =
 995             new UnicodeBlock("ARROWS");
 996 
 997         /**
 998          * Constant for the "Mathematical Operators" Unicode character block.
 999          * @since 1.2
1000          */
1001         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1002             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1003                              "MATHEMATICAL OPERATORS",
1004                              "MATHEMATICALOPERATORS");
1005 
1006         /**
1007          * Constant for the "Miscellaneous Technical" Unicode character block.
1008          * @since 1.2
1009          */
1010         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1011             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1012                              "MISCELLANEOUS TECHNICAL",
1013                              "MISCELLANEOUSTECHNICAL");
1014 
1015         /**
1016          * Constant for the "Control Pictures" Unicode character block.
1017          * @since 1.2
1018          */
1019         public static final UnicodeBlock CONTROL_PICTURES =
1020             new UnicodeBlock("CONTROL_PICTURES",
1021                              "CONTROL PICTURES",
1022                              "CONTROLPICTURES");
1023 
1024         /**
1025          * Constant for the "Optical Character Recognition" Unicode character block.
1026          * @since 1.2
1027          */
1028         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1029             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1030                              "OPTICAL CHARACTER RECOGNITION",
1031                              "OPTICALCHARACTERRECOGNITION");
1032 
1033         /**
1034          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1035          * @since 1.2
1036          */
1037         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1038             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1039                              "ENCLOSED ALPHANUMERICS",
1040                              "ENCLOSEDALPHANUMERICS");
1041 
1042         /**
1043          * Constant for the "Box Drawing" Unicode character block.
1044          * @since 1.2
1045          */
1046         public static final UnicodeBlock BOX_DRAWING =
1047             new UnicodeBlock("BOX_DRAWING",
1048                              "BOX DRAWING",
1049                              "BOXDRAWING");
1050 
1051         /**
1052          * Constant for the "Block Elements" Unicode character block.
1053          * @since 1.2
1054          */
1055         public static final UnicodeBlock BLOCK_ELEMENTS =
1056             new UnicodeBlock("BLOCK_ELEMENTS",
1057                              "BLOCK ELEMENTS",
1058                              "BLOCKELEMENTS");
1059 
1060         /**
1061          * Constant for the "Geometric Shapes" Unicode character block.
1062          * @since 1.2
1063          */
1064         public static final UnicodeBlock GEOMETRIC_SHAPES =
1065             new UnicodeBlock("GEOMETRIC_SHAPES",
1066                              "GEOMETRIC SHAPES",
1067                              "GEOMETRICSHAPES");
1068 
1069         /**
1070          * Constant for the "Miscellaneous Symbols" Unicode character block.
1071          * @since 1.2
1072          */
1073         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1074             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1075                              "MISCELLANEOUS SYMBOLS",
1076                              "MISCELLANEOUSSYMBOLS");
1077 
1078         /**
1079          * Constant for the "Dingbats" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock DINGBATS =
1083             new UnicodeBlock("DINGBATS");
1084 
1085         /**
1086          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1090             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1091                              "CJK SYMBOLS AND PUNCTUATION",
1092                              "CJKSYMBOLSANDPUNCTUATION");
1093 
1094         /**
1095          * Constant for the "Hiragana" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock HIRAGANA =
1099             new UnicodeBlock("HIRAGANA");
1100 
1101         /**
1102          * Constant for the "Katakana" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock KATAKANA =
1106             new UnicodeBlock("KATAKANA");
1107 
1108         /**
1109          * Constant for the "Bopomofo" Unicode character block.
1110          * @since 1.2
1111          */
1112         public static final UnicodeBlock BOPOMOFO =
1113             new UnicodeBlock("BOPOMOFO");
1114 
1115         /**
1116          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1117          * @since 1.2
1118          */
1119         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1120             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1121                              "HANGUL COMPATIBILITY JAMO",
1122                              "HANGULCOMPATIBILITYJAMO");
1123 
1124         /**
1125          * Constant for the "Kanbun" Unicode character block.
1126          * @since 1.2
1127          */
1128         public static final UnicodeBlock KANBUN =
1129             new UnicodeBlock("KANBUN");
1130 
1131         /**
1132          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1133          * @since 1.2
1134          */
1135         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1136             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1137                              "ENCLOSED CJK LETTERS AND MONTHS",
1138                              "ENCLOSEDCJKLETTERSANDMONTHS");
1139 
1140         /**
1141          * Constant for the "CJK Compatibility" Unicode character block.
1142          * @since 1.2
1143          */
1144         public static final UnicodeBlock CJK_COMPATIBILITY =
1145             new UnicodeBlock("CJK_COMPATIBILITY",
1146                              "CJK COMPATIBILITY",
1147                              "CJKCOMPATIBILITY");
1148 
1149         /**
1150          * Constant for the "CJK Unified Ideographs" Unicode character block.
1151          * @since 1.2
1152          */
1153         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1154             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1155                              "CJK UNIFIED IDEOGRAPHS",
1156                              "CJKUNIFIEDIDEOGRAPHS");
1157 
1158         /**
1159          * Constant for the "Hangul Syllables" Unicode character block.
1160          * @since 1.2
1161          */
1162         public static final UnicodeBlock HANGUL_SYLLABLES =
1163             new UnicodeBlock("HANGUL_SYLLABLES",
1164                              "HANGUL SYLLABLES",
1165                              "HANGULSYLLABLES");
1166 
1167         /**
1168          * Constant for the "Private Use Area" Unicode character block.
1169          * @since 1.2
1170          */
1171         public static final UnicodeBlock PRIVATE_USE_AREA =
1172             new UnicodeBlock("PRIVATE_USE_AREA",
1173                              "PRIVATE USE AREA",
1174                              "PRIVATEUSEAREA");
1175 
1176         /**
1177          * Constant for the "CJK Compatibility Ideographs" Unicode character
1178          * block.
1179          * @since 1.2
1180          */
1181         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1182             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1183                              "CJK COMPATIBILITY IDEOGRAPHS",
1184                              "CJKCOMPATIBILITYIDEOGRAPHS");
1185 
1186         /**
1187          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1188          * @since 1.2
1189          */
1190         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1191             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1192                              "ALPHABETIC PRESENTATION FORMS",
1193                              "ALPHABETICPRESENTATIONFORMS");
1194 
1195         /**
1196          * Constant for the "Arabic Presentation Forms-A" Unicode character
1197          * block.
1198          * @since 1.2
1199          */
1200         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1201             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1202                              "ARABIC PRESENTATION FORMS-A",
1203                              "ARABICPRESENTATIONFORMS-A");
1204 
1205         /**
1206          * Constant for the "Combining Half Marks" Unicode character block.
1207          * @since 1.2
1208          */
1209         public static final UnicodeBlock COMBINING_HALF_MARKS =
1210             new UnicodeBlock("COMBINING_HALF_MARKS",
1211                              "COMBINING HALF MARKS",
1212                              "COMBININGHALFMARKS");
1213 
1214         /**
1215          * Constant for the "CJK Compatibility Forms" Unicode character block.
1216          * @since 1.2
1217          */
1218         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1219             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1220                              "CJK COMPATIBILITY FORMS",
1221                              "CJKCOMPATIBILITYFORMS");
1222 
1223         /**
1224          * Constant for the "Small Form Variants" Unicode character block.
1225          * @since 1.2
1226          */
1227         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1228             new UnicodeBlock("SMALL_FORM_VARIANTS",
1229                              "SMALL FORM VARIANTS",
1230                              "SMALLFORMVARIANTS");
1231 
1232         /**
1233          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1234          * @since 1.2
1235          */
1236         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1237             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1238                              "ARABIC PRESENTATION FORMS-B",
1239                              "ARABICPRESENTATIONFORMS-B");
1240 
1241         /**
1242          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1243          * block.
1244          * @since 1.2
1245          */
1246         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1247             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1248                              "HALFWIDTH AND FULLWIDTH FORMS",
1249                              "HALFWIDTHANDFULLWIDTHFORMS");
1250 
1251         /**
1252          * Constant for the "Specials" Unicode character block.
1253          * @since 1.2
1254          */
1255         public static final UnicodeBlock SPECIALS =
1256             new UnicodeBlock("SPECIALS");
1257 
1258         /**
1259          * @deprecated
1260          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1261          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1262          * These constants match the block definitions of the Unicode Standard.
1263          * The {@link #of(char)} and {@link #of(int)} methods return the
1264          * standard constants.
1265          */
1266         @Deprecated(since="1.5")
1267         public static final UnicodeBlock SURROGATES_AREA =
1268             new UnicodeBlock("SURROGATES_AREA");
1269 
1270         /**
1271          * Constant for the "Syriac" Unicode character block.
1272          * @since 1.4
1273          */
1274         public static final UnicodeBlock SYRIAC =
1275             new UnicodeBlock("SYRIAC");
1276 
1277         /**
1278          * Constant for the "Thaana" Unicode character block.
1279          * @since 1.4
1280          */
1281         public static final UnicodeBlock THAANA =
1282             new UnicodeBlock("THAANA");
1283 
1284         /**
1285          * Constant for the "Sinhala" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock SINHALA =
1289             new UnicodeBlock("SINHALA");
1290 
1291         /**
1292          * Constant for the "Myanmar" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock MYANMAR =
1296             new UnicodeBlock("MYANMAR");
1297 
1298         /**
1299          * Constant for the "Ethiopic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock ETHIOPIC =
1303             new UnicodeBlock("ETHIOPIC");
1304 
1305         /**
1306          * Constant for the "Cherokee" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock CHEROKEE =
1310             new UnicodeBlock("CHEROKEE");
1311 
1312         /**
1313          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1317             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1318                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1319                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1320 
1321         /**
1322          * Constant for the "Ogham" Unicode character block.
1323          * @since 1.4
1324          */
1325         public static final UnicodeBlock OGHAM =
1326             new UnicodeBlock("OGHAM");
1327 
1328         /**
1329          * Constant for the "Runic" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock RUNIC =
1333             new UnicodeBlock("RUNIC");
1334 
1335         /**
1336          * Constant for the "Khmer" Unicode character block.
1337          * @since 1.4
1338          */
1339         public static final UnicodeBlock KHMER =
1340             new UnicodeBlock("KHMER");
1341 
1342         /**
1343          * Constant for the "Mongolian" Unicode character block.
1344          * @since 1.4
1345          */
1346         public static final UnicodeBlock MONGOLIAN =
1347             new UnicodeBlock("MONGOLIAN");
1348 
1349         /**
1350          * Constant for the "Braille Patterns" Unicode character block.
1351          * @since 1.4
1352          */
1353         public static final UnicodeBlock BRAILLE_PATTERNS =
1354             new UnicodeBlock("BRAILLE_PATTERNS",
1355                              "BRAILLE PATTERNS",
1356                              "BRAILLEPATTERNS");
1357 
1358         /**
1359          * Constant for the "CJK Radicals Supplement" Unicode character block.
1360          * @since 1.4
1361          */
1362         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1363             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1364                              "CJK RADICALS SUPPLEMENT",
1365                              "CJKRADICALSSUPPLEMENT");
1366 
1367         /**
1368          * Constant for the "Kangxi Radicals" Unicode character block.
1369          * @since 1.4
1370          */
1371         public static final UnicodeBlock KANGXI_RADICALS =
1372             new UnicodeBlock("KANGXI_RADICALS",
1373                              "KANGXI RADICALS",
1374                              "KANGXIRADICALS");
1375 
1376         /**
1377          * Constant for the "Ideographic Description Characters" Unicode character block.
1378          * @since 1.4
1379          */
1380         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1381             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1382                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1383                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1384 
1385         /**
1386          * Constant for the "Bopomofo Extended" Unicode character block.
1387          * @since 1.4
1388          */
1389         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1390             new UnicodeBlock("BOPOMOFO_EXTENDED",
1391                              "BOPOMOFO EXTENDED",
1392                              "BOPOMOFOEXTENDED");
1393 
1394         /**
1395          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1396          * @since 1.4
1397          */
1398         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1399             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1400                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1401                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1402 
1403         /**
1404          * Constant for the "Yi Syllables" Unicode character block.
1405          * @since 1.4
1406          */
1407         public static final UnicodeBlock YI_SYLLABLES =
1408             new UnicodeBlock("YI_SYLLABLES",
1409                              "YI SYLLABLES",
1410                              "YISYLLABLES");
1411 
1412         /**
1413          * Constant for the "Yi Radicals" Unicode character block.
1414          * @since 1.4
1415          */
1416         public static final UnicodeBlock YI_RADICALS =
1417             new UnicodeBlock("YI_RADICALS",
1418                              "YI RADICALS",
1419                              "YIRADICALS");
1420 
1421         /**
1422          * Constant for the "Cyrillic Supplement" Unicode character block.
1423          * This block was previously known as the "Cyrillic Supplementary" block.
1424          * @since 1.5
1425          */
1426         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1427             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1428                              "CYRILLIC SUPPLEMENTARY",
1429                              "CYRILLICSUPPLEMENTARY",
1430                              "CYRILLIC SUPPLEMENT",
1431                              "CYRILLICSUPPLEMENT");
1432 
1433         /**
1434          * Constant for the "Tagalog" Unicode character block.
1435          * @since 1.5
1436          */
1437         public static final UnicodeBlock TAGALOG =
1438             new UnicodeBlock("TAGALOG");
1439 
1440         /**
1441          * Constant for the "Hanunoo" Unicode character block.
1442          * @since 1.5
1443          */
1444         public static final UnicodeBlock HANUNOO =
1445             new UnicodeBlock("HANUNOO");
1446 
1447         /**
1448          * Constant for the "Buhid" Unicode character block.
1449          * @since 1.5
1450          */
1451         public static final UnicodeBlock BUHID =
1452             new UnicodeBlock("BUHID");
1453 
1454         /**
1455          * Constant for the "Tagbanwa" Unicode character block.
1456          * @since 1.5
1457          */
1458         public static final UnicodeBlock TAGBANWA =
1459             new UnicodeBlock("TAGBANWA");
1460 
1461         /**
1462          * Constant for the "Limbu" Unicode character block.
1463          * @since 1.5
1464          */
1465         public static final UnicodeBlock LIMBU =
1466             new UnicodeBlock("LIMBU");
1467 
1468         /**
1469          * Constant for the "Tai Le" Unicode character block.
1470          * @since 1.5
1471          */
1472         public static final UnicodeBlock TAI_LE =
1473             new UnicodeBlock("TAI_LE",
1474                              "TAI LE",
1475                              "TAILE");
1476 
1477         /**
1478          * Constant for the "Khmer Symbols" Unicode character block.
1479          * @since 1.5
1480          */
1481         public static final UnicodeBlock KHMER_SYMBOLS =
1482             new UnicodeBlock("KHMER_SYMBOLS",
1483                              "KHMER SYMBOLS",
1484                              "KHMERSYMBOLS");
1485 
1486         /**
1487          * Constant for the "Phonetic Extensions" Unicode character block.
1488          * @since 1.5
1489          */
1490         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1491             new UnicodeBlock("PHONETIC_EXTENSIONS",
1492                              "PHONETIC EXTENSIONS",
1493                              "PHONETICEXTENSIONS");
1494 
1495         /**
1496          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1497          * @since 1.5
1498          */
1499         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1500             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1501                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1502                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1503 
1504         /**
1505          * Constant for the "Supplemental Arrows-A" Unicode character block.
1506          * @since 1.5
1507          */
1508         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1509             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1510                              "SUPPLEMENTAL ARROWS-A",
1511                              "SUPPLEMENTALARROWS-A");
1512 
1513         /**
1514          * Constant for the "Supplemental Arrows-B" Unicode character block.
1515          * @since 1.5
1516          */
1517         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1518             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1519                              "SUPPLEMENTAL ARROWS-B",
1520                              "SUPPLEMENTALARROWS-B");
1521 
1522         /**
1523          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1524          * character block.
1525          * @since 1.5
1526          */
1527         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1528             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1529                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1530                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1531 
1532         /**
1533          * Constant for the "Supplemental Mathematical Operators" Unicode
1534          * character block.
1535          * @since 1.5
1536          */
1537         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1538             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1539                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1540                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1541 
1542         /**
1543          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1544          * block.
1545          * @since 1.5
1546          */
1547         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1548             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1549                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1550                              "MISCELLANEOUSSYMBOLSANDARROWS");
1551 
1552         /**
1553          * Constant for the "Katakana Phonetic Extensions" Unicode character
1554          * block.
1555          * @since 1.5
1556          */
1557         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1558             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1559                              "KATAKANA PHONETIC EXTENSIONS",
1560                              "KATAKANAPHONETICEXTENSIONS");
1561 
1562         /**
1563          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1564          * @since 1.5
1565          */
1566         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1567             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1568                              "YIJING HEXAGRAM SYMBOLS",
1569                              "YIJINGHEXAGRAMSYMBOLS");
1570 
1571         /**
1572          * Constant for the "Variation Selectors" Unicode character block.
1573          * @since 1.5
1574          */
1575         public static final UnicodeBlock VARIATION_SELECTORS =
1576             new UnicodeBlock("VARIATION_SELECTORS",
1577                              "VARIATION SELECTORS",
1578                              "VARIATIONSELECTORS");
1579 
1580         /**
1581          * Constant for the "Linear B Syllabary" Unicode character block.
1582          * @since 1.5
1583          */
1584         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1585             new UnicodeBlock("LINEAR_B_SYLLABARY",
1586                              "LINEAR B SYLLABARY",
1587                              "LINEARBSYLLABARY");
1588 
1589         /**
1590          * Constant for the "Linear B Ideograms" Unicode character block.
1591          * @since 1.5
1592          */
1593         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1594             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1595                              "LINEAR B IDEOGRAMS",
1596                              "LINEARBIDEOGRAMS");
1597 
1598         /**
1599          * Constant for the "Aegean Numbers" Unicode character block.
1600          * @since 1.5
1601          */
1602         public static final UnicodeBlock AEGEAN_NUMBERS =
1603             new UnicodeBlock("AEGEAN_NUMBERS",
1604                              "AEGEAN NUMBERS",
1605                              "AEGEANNUMBERS");
1606 
1607         /**
1608          * Constant for the "Old Italic" Unicode character block.
1609          * @since 1.5
1610          */
1611         public static final UnicodeBlock OLD_ITALIC =
1612             new UnicodeBlock("OLD_ITALIC",
1613                              "OLD ITALIC",
1614                              "OLDITALIC");
1615 
1616         /**
1617          * Constant for the "Gothic" Unicode character block.
1618          * @since 1.5
1619          */
1620         public static final UnicodeBlock GOTHIC =
1621             new UnicodeBlock("GOTHIC");
1622 
1623         /**
1624          * Constant for the "Ugaritic" Unicode character block.
1625          * @since 1.5
1626          */
1627         public static final UnicodeBlock UGARITIC =
1628             new UnicodeBlock("UGARITIC");
1629 
1630         /**
1631          * Constant for the "Deseret" Unicode character block.
1632          * @since 1.5
1633          */
1634         public static final UnicodeBlock DESERET =
1635             new UnicodeBlock("DESERET");
1636 
1637         /**
1638          * Constant for the "Shavian" Unicode character block.
1639          * @since 1.5
1640          */
1641         public static final UnicodeBlock SHAVIAN =
1642             new UnicodeBlock("SHAVIAN");
1643 
1644         /**
1645          * Constant for the "Osmanya" Unicode character block.
1646          * @since 1.5
1647          */
1648         public static final UnicodeBlock OSMANYA =
1649             new UnicodeBlock("OSMANYA");
1650 
1651         /**
1652          * Constant for the "Cypriot Syllabary" Unicode character block.
1653          * @since 1.5
1654          */
1655         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1656             new UnicodeBlock("CYPRIOT_SYLLABARY",
1657                              "CYPRIOT SYLLABARY",
1658                              "CYPRIOTSYLLABARY");
1659 
1660         /**
1661          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1662          * @since 1.5
1663          */
1664         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1665             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1666                              "BYZANTINE MUSICAL SYMBOLS",
1667                              "BYZANTINEMUSICALSYMBOLS");
1668 
1669         /**
1670          * Constant for the "Musical Symbols" Unicode character block.
1671          * @since 1.5
1672          */
1673         public static final UnicodeBlock MUSICAL_SYMBOLS =
1674             new UnicodeBlock("MUSICAL_SYMBOLS",
1675                              "MUSICAL SYMBOLS",
1676                              "MUSICALSYMBOLS");
1677 
1678         /**
1679          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1680          * @since 1.5
1681          */
1682         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1683             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1684                              "TAI XUAN JING SYMBOLS",
1685                              "TAIXUANJINGSYMBOLS");
1686 
1687         /**
1688          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1689          * character block.
1690          * @since 1.5
1691          */
1692         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1693             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1694                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1695                              "MATHEMATICALALPHANUMERICSYMBOLS");
1696 
1697         /**
1698          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1699          * character block.
1700          * @since 1.5
1701          */
1702         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1703             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1704                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1705                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1706 
1707         /**
1708          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1709          * @since 1.5
1710          */
1711         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1712             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1713                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1714                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1715 
1716         /**
1717          * Constant for the "Tags" Unicode character block.
1718          * @since 1.5
1719          */
1720         public static final UnicodeBlock TAGS =
1721             new UnicodeBlock("TAGS");
1722 
1723         /**
1724          * Constant for the "Variation Selectors Supplement" Unicode character
1725          * block.
1726          * @since 1.5
1727          */
1728         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1729             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1730                              "VARIATION SELECTORS SUPPLEMENT",
1731                              "VARIATIONSELECTORSSUPPLEMENT");
1732 
1733         /**
1734          * Constant for the "Supplementary Private Use Area-A" Unicode character
1735          * block.
1736          * @since 1.5
1737          */
1738         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1739             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1740                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1741                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1742 
1743         /**
1744          * Constant for the "Supplementary Private Use Area-B" Unicode character
1745          * block.
1746          * @since 1.5
1747          */
1748         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1749             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1750                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1751                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1752 
1753         /**
1754          * Constant for the "High Surrogates" Unicode character block.
1755          * This block represents codepoint values in the high surrogate
1756          * range: U+D800 through U+DB7F
1757          *
1758          * @since 1.5
1759          */
1760         public static final UnicodeBlock HIGH_SURROGATES =
1761             new UnicodeBlock("HIGH_SURROGATES",
1762                              "HIGH SURROGATES",
1763                              "HIGHSURROGATES");
1764 
1765         /**
1766          * Constant for the "High Private Use Surrogates" Unicode character
1767          * block.
1768          * This block represents codepoint values in the private use high
1769          * surrogate range: U+DB80 through U+DBFF
1770          *
1771          * @since 1.5
1772          */
1773         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1774             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1775                              "HIGH PRIVATE USE SURROGATES",
1776                              "HIGHPRIVATEUSESURROGATES");
1777 
1778         /**
1779          * Constant for the "Low Surrogates" Unicode character block.
1780          * This block represents codepoint values in the low surrogate
1781          * range: U+DC00 through U+DFFF
1782          *
1783          * @since 1.5
1784          */
1785         public static final UnicodeBlock LOW_SURROGATES =
1786             new UnicodeBlock("LOW_SURROGATES",
1787                              "LOW SURROGATES",
1788                              "LOWSURROGATES");
1789 
1790         /**
1791          * Constant for the "Arabic Supplement" Unicode character block.
1792          * @since 1.7
1793          */
1794         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1795             new UnicodeBlock("ARABIC_SUPPLEMENT",
1796                              "ARABIC SUPPLEMENT",
1797                              "ARABICSUPPLEMENT");
1798 
1799         /**
1800          * Constant for the "NKo" Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock NKO =
1804             new UnicodeBlock("NKO");
1805 
1806         /**
1807          * Constant for the "Samaritan" Unicode character block.
1808          * @since 1.7
1809          */
1810         public static final UnicodeBlock SAMARITAN =
1811             new UnicodeBlock("SAMARITAN");
1812 
1813         /**
1814          * Constant for the "Mandaic" Unicode character block.
1815          * @since 1.7
1816          */
1817         public static final UnicodeBlock MANDAIC =
1818             new UnicodeBlock("MANDAIC");
1819 
1820         /**
1821          * Constant for the "Ethiopic Supplement" Unicode character block.
1822          * @since 1.7
1823          */
1824         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1825             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1826                              "ETHIOPIC SUPPLEMENT",
1827                              "ETHIOPICSUPPLEMENT");
1828 
1829         /**
1830          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1831          * Unicode character block.
1832          * @since 1.7
1833          */
1834         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1835             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1836                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1837                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1838 
1839         /**
1840          * Constant for the "New Tai Lue" Unicode character block.
1841          * @since 1.7
1842          */
1843         public static final UnicodeBlock NEW_TAI_LUE =
1844             new UnicodeBlock("NEW_TAI_LUE",
1845                              "NEW TAI LUE",
1846                              "NEWTAILUE");
1847 
1848         /**
1849          * Constant for the "Buginese" Unicode character block.
1850          * @since 1.7
1851          */
1852         public static final UnicodeBlock BUGINESE =
1853             new UnicodeBlock("BUGINESE");
1854 
1855         /**
1856          * Constant for the "Tai Tham" Unicode character block.
1857          * @since 1.7
1858          */
1859         public static final UnicodeBlock TAI_THAM =
1860             new UnicodeBlock("TAI_THAM",
1861                              "TAI THAM",
1862                              "TAITHAM");
1863 
1864         /**
1865          * Constant for the "Balinese" Unicode character block.
1866          * @since 1.7
1867          */
1868         public static final UnicodeBlock BALINESE =
1869             new UnicodeBlock("BALINESE");
1870 
1871         /**
1872          * Constant for the "Sundanese" Unicode character block.
1873          * @since 1.7
1874          */
1875         public static final UnicodeBlock SUNDANESE =
1876             new UnicodeBlock("SUNDANESE");
1877 
1878         /**
1879          * Constant for the "Batak" Unicode character block.
1880          * @since 1.7
1881          */
1882         public static final UnicodeBlock BATAK =
1883             new UnicodeBlock("BATAK");
1884 
1885         /**
1886          * Constant for the "Lepcha" Unicode character block.
1887          * @since 1.7
1888          */
1889         public static final UnicodeBlock LEPCHA =
1890             new UnicodeBlock("LEPCHA");
1891 
1892         /**
1893          * Constant for the "Ol Chiki" Unicode character block.
1894          * @since 1.7
1895          */
1896         public static final UnicodeBlock OL_CHIKI =
1897             new UnicodeBlock("OL_CHIKI",
1898                              "OL CHIKI",
1899                              "OLCHIKI");
1900 
1901         /**
1902          * Constant for the "Vedic Extensions" Unicode character block.
1903          * @since 1.7
1904          */
1905         public static final UnicodeBlock VEDIC_EXTENSIONS =
1906             new UnicodeBlock("VEDIC_EXTENSIONS",
1907                              "VEDIC EXTENSIONS",
1908                              "VEDICEXTENSIONS");
1909 
1910         /**
1911          * Constant for the "Phonetic Extensions Supplement" Unicode character
1912          * block.
1913          * @since 1.7
1914          */
1915         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1916             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1917                              "PHONETIC EXTENSIONS SUPPLEMENT",
1918                              "PHONETICEXTENSIONSSUPPLEMENT");
1919 
1920         /**
1921          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1922          * character block.
1923          * @since 1.7
1924          */
1925         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1926             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1927                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1928                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1929 
1930         /**
1931          * Constant for the "Glagolitic" Unicode character block.
1932          * @since 1.7
1933          */
1934         public static final UnicodeBlock GLAGOLITIC =
1935             new UnicodeBlock("GLAGOLITIC");
1936 
1937         /**
1938          * Constant for the "Latin Extended-C" Unicode character block.
1939          * @since 1.7
1940          */
1941         public static final UnicodeBlock LATIN_EXTENDED_C =
1942             new UnicodeBlock("LATIN_EXTENDED_C",
1943                              "LATIN EXTENDED-C",
1944                              "LATINEXTENDED-C");
1945 
1946         /**
1947          * Constant for the "Coptic" Unicode character block.
1948          * @since 1.7
1949          */
1950         public static final UnicodeBlock COPTIC =
1951             new UnicodeBlock("COPTIC");
1952 
1953         /**
1954          * Constant for the "Georgian Supplement" Unicode character block.
1955          * @since 1.7
1956          */
1957         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1958             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1959                              "GEORGIAN SUPPLEMENT",
1960                              "GEORGIANSUPPLEMENT");
1961 
1962         /**
1963          * Constant for the "Tifinagh" Unicode character block.
1964          * @since 1.7
1965          */
1966         public static final UnicodeBlock TIFINAGH =
1967             new UnicodeBlock("TIFINAGH");
1968 
1969         /**
1970          * Constant for the "Ethiopic Extended" Unicode character block.
1971          * @since 1.7
1972          */
1973         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1974             new UnicodeBlock("ETHIOPIC_EXTENDED",
1975                              "ETHIOPIC EXTENDED",
1976                              "ETHIOPICEXTENDED");
1977 
1978         /**
1979          * Constant for the "Cyrillic Extended-A" Unicode character block.
1980          * @since 1.7
1981          */
1982         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1983             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1984                              "CYRILLIC EXTENDED-A",
1985                              "CYRILLICEXTENDED-A");
1986 
1987         /**
1988          * Constant for the "Supplemental Punctuation" Unicode character block.
1989          * @since 1.7
1990          */
1991         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1992             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1993                              "SUPPLEMENTAL PUNCTUATION",
1994                              "SUPPLEMENTALPUNCTUATION");
1995 
1996         /**
1997          * Constant for the "CJK Strokes" Unicode character block.
1998          * @since 1.7
1999          */
2000         public static final UnicodeBlock CJK_STROKES =
2001             new UnicodeBlock("CJK_STROKES",
2002                              "CJK STROKES",
2003                              "CJKSTROKES");
2004 
2005         /**
2006          * Constant for the "Lisu" Unicode character block.
2007          * @since 1.7
2008          */
2009         public static final UnicodeBlock LISU =
2010             new UnicodeBlock("LISU");
2011 
2012         /**
2013          * Constant for the "Vai" Unicode character block.
2014          * @since 1.7
2015          */
2016         public static final UnicodeBlock VAI =
2017             new UnicodeBlock("VAI");
2018 
2019         /**
2020          * Constant for the "Cyrillic Extended-B" Unicode character block.
2021          * @since 1.7
2022          */
2023         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2024             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2025                              "CYRILLIC EXTENDED-B",
2026                              "CYRILLICEXTENDED-B");
2027 
2028         /**
2029          * Constant for the "Bamum" Unicode character block.
2030          * @since 1.7
2031          */
2032         public static final UnicodeBlock BAMUM =
2033             new UnicodeBlock("BAMUM");
2034 
2035         /**
2036          * Constant for the "Modifier Tone Letters" Unicode character block.
2037          * @since 1.7
2038          */
2039         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2040             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2041                              "MODIFIER TONE LETTERS",
2042                              "MODIFIERTONELETTERS");
2043 
2044         /**
2045          * Constant for the "Latin Extended-D" Unicode character block.
2046          * @since 1.7
2047          */
2048         public static final UnicodeBlock LATIN_EXTENDED_D =
2049             new UnicodeBlock("LATIN_EXTENDED_D",
2050                              "LATIN EXTENDED-D",
2051                              "LATINEXTENDED-D");
2052 
2053         /**
2054          * Constant for the "Syloti Nagri" Unicode character block.
2055          * @since 1.7
2056          */
2057         public static final UnicodeBlock SYLOTI_NAGRI =
2058             new UnicodeBlock("SYLOTI_NAGRI",
2059                              "SYLOTI NAGRI",
2060                              "SYLOTINAGRI");
2061 
2062         /**
2063          * Constant for the "Common Indic Number Forms" Unicode character block.
2064          * @since 1.7
2065          */
2066         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2067             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2068                              "COMMON INDIC NUMBER FORMS",
2069                              "COMMONINDICNUMBERFORMS");
2070 
2071         /**
2072          * Constant for the "Phags-pa" Unicode character block.
2073          * @since 1.7
2074          */
2075         public static final UnicodeBlock PHAGS_PA =
2076             new UnicodeBlock("PHAGS_PA",
2077                              "PHAGS-PA");
2078 
2079         /**
2080          * Constant for the "Saurashtra" Unicode character block.
2081          * @since 1.7
2082          */
2083         public static final UnicodeBlock SAURASHTRA =
2084             new UnicodeBlock("SAURASHTRA");
2085 
2086         /**
2087          * Constant for the "Devanagari Extended" Unicode character block.
2088          * @since 1.7
2089          */
2090         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2091             new UnicodeBlock("DEVANAGARI_EXTENDED",
2092                              "DEVANAGARI EXTENDED",
2093                              "DEVANAGARIEXTENDED");
2094 
2095         /**
2096          * Constant for the "Kayah Li" Unicode character block.
2097          * @since 1.7
2098          */
2099         public static final UnicodeBlock KAYAH_LI =
2100             new UnicodeBlock("KAYAH_LI",
2101                              "KAYAH LI",
2102                              "KAYAHLI");
2103 
2104         /**
2105          * Constant for the "Rejang" Unicode character block.
2106          * @since 1.7
2107          */
2108         public static final UnicodeBlock REJANG =
2109             new UnicodeBlock("REJANG");
2110 
2111         /**
2112          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2113          * @since 1.7
2114          */
2115         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2116             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2117                              "HANGUL JAMO EXTENDED-A",
2118                              "HANGULJAMOEXTENDED-A");
2119 
2120         /**
2121          * Constant for the "Javanese" Unicode character block.
2122          * @since 1.7
2123          */
2124         public static final UnicodeBlock JAVANESE =
2125             new UnicodeBlock("JAVANESE");
2126 
2127         /**
2128          * Constant for the "Cham" Unicode character block.
2129          * @since 1.7
2130          */
2131         public static final UnicodeBlock CHAM =
2132             new UnicodeBlock("CHAM");
2133 
2134         /**
2135          * Constant for the "Myanmar Extended-A" Unicode character block.
2136          * @since 1.7
2137          */
2138         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2139             new UnicodeBlock("MYANMAR_EXTENDED_A",
2140                              "MYANMAR EXTENDED-A",
2141                              "MYANMAREXTENDED-A");
2142 
2143         /**
2144          * Constant for the "Tai Viet" Unicode character block.
2145          * @since 1.7
2146          */
2147         public static final UnicodeBlock TAI_VIET =
2148             new UnicodeBlock("TAI_VIET",
2149                              "TAI VIET",
2150                              "TAIVIET");
2151 
2152         /**
2153          * Constant for the "Ethiopic Extended-A" Unicode character block.
2154          * @since 1.7
2155          */
2156         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2157             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2158                              "ETHIOPIC EXTENDED-A",
2159                              "ETHIOPICEXTENDED-A");
2160 
2161         /**
2162          * Constant for the "Meetei Mayek" Unicode character block.
2163          * @since 1.7
2164          */
2165         public static final UnicodeBlock MEETEI_MAYEK =
2166             new UnicodeBlock("MEETEI_MAYEK",
2167                              "MEETEI MAYEK",
2168                              "MEETEIMAYEK");
2169 
2170         /**
2171          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2172          * @since 1.7
2173          */
2174         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2175             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2176                              "HANGUL JAMO EXTENDED-B",
2177                              "HANGULJAMOEXTENDED-B");
2178 
2179         /**
2180          * Constant for the "Vertical Forms" Unicode character block.
2181          * @since 1.7
2182          */
2183         public static final UnicodeBlock VERTICAL_FORMS =
2184             new UnicodeBlock("VERTICAL_FORMS",
2185                              "VERTICAL FORMS",
2186                              "VERTICALFORMS");
2187 
2188         /**
2189          * Constant for the "Ancient Greek Numbers" Unicode character block.
2190          * @since 1.7
2191          */
2192         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2193             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2194                              "ANCIENT GREEK NUMBERS",
2195                              "ANCIENTGREEKNUMBERS");
2196 
2197         /**
2198          * Constant for the "Ancient Symbols" Unicode character block.
2199          * @since 1.7
2200          */
2201         public static final UnicodeBlock ANCIENT_SYMBOLS =
2202             new UnicodeBlock("ANCIENT_SYMBOLS",
2203                              "ANCIENT SYMBOLS",
2204                              "ANCIENTSYMBOLS");
2205 
2206         /**
2207          * Constant for the "Phaistos Disc" Unicode character block.
2208          * @since 1.7
2209          */
2210         public static final UnicodeBlock PHAISTOS_DISC =
2211             new UnicodeBlock("PHAISTOS_DISC",
2212                              "PHAISTOS DISC",
2213                              "PHAISTOSDISC");
2214 
2215         /**
2216          * Constant for the "Lycian" Unicode character block.
2217          * @since 1.7
2218          */
2219         public static final UnicodeBlock LYCIAN =
2220             new UnicodeBlock("LYCIAN");
2221 
2222         /**
2223          * Constant for the "Carian" Unicode character block.
2224          * @since 1.7
2225          */
2226         public static final UnicodeBlock CARIAN =
2227             new UnicodeBlock("CARIAN");
2228 
2229         /**
2230          * Constant for the "Old Persian" Unicode character block.
2231          * @since 1.7
2232          */
2233         public static final UnicodeBlock OLD_PERSIAN =
2234             new UnicodeBlock("OLD_PERSIAN",
2235                              "OLD PERSIAN",
2236                              "OLDPERSIAN");
2237 
2238         /**
2239          * Constant for the "Imperial Aramaic" Unicode character block.
2240          * @since 1.7
2241          */
2242         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2243             new UnicodeBlock("IMPERIAL_ARAMAIC",
2244                              "IMPERIAL ARAMAIC",
2245                              "IMPERIALARAMAIC");
2246 
2247         /**
2248          * Constant for the "Phoenician" Unicode character block.
2249          * @since 1.7
2250          */
2251         public static final UnicodeBlock PHOENICIAN =
2252             new UnicodeBlock("PHOENICIAN");
2253 
2254         /**
2255          * Constant for the "Lydian" Unicode character block.
2256          * @since 1.7
2257          */
2258         public static final UnicodeBlock LYDIAN =
2259             new UnicodeBlock("LYDIAN");
2260 
2261         /**
2262          * Constant for the "Kharoshthi" Unicode character block.
2263          * @since 1.7
2264          */
2265         public static final UnicodeBlock KHAROSHTHI =
2266             new UnicodeBlock("KHAROSHTHI");
2267 
2268         /**
2269          * Constant for the "Old South Arabian" Unicode character block.
2270          * @since 1.7
2271          */
2272         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2273             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2274                              "OLD SOUTH ARABIAN",
2275                              "OLDSOUTHARABIAN");
2276 
2277         /**
2278          * Constant for the "Avestan" Unicode character block.
2279          * @since 1.7
2280          */
2281         public static final UnicodeBlock AVESTAN =
2282             new UnicodeBlock("AVESTAN");
2283 
2284         /**
2285          * Constant for the "Inscriptional Parthian" Unicode character block.
2286          * @since 1.7
2287          */
2288         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2289             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2290                              "INSCRIPTIONAL PARTHIAN",
2291                              "INSCRIPTIONALPARTHIAN");
2292 
2293         /**
2294          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2295          * @since 1.7
2296          */
2297         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2298             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2299                              "INSCRIPTIONAL PAHLAVI",
2300                              "INSCRIPTIONALPAHLAVI");
2301 
2302         /**
2303          * Constant for the "Old Turkic" Unicode character block.
2304          * @since 1.7
2305          */
2306         public static final UnicodeBlock OLD_TURKIC =
2307             new UnicodeBlock("OLD_TURKIC",
2308                              "OLD TURKIC",
2309                              "OLDTURKIC");
2310 
2311         /**
2312          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2313          * @since 1.7
2314          */
2315         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2316             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2317                              "RUMI NUMERAL SYMBOLS",
2318                              "RUMINUMERALSYMBOLS");
2319 
2320         /**
2321          * Constant for the "Brahmi" Unicode character block.
2322          * @since 1.7
2323          */
2324         public static final UnicodeBlock BRAHMI =
2325             new UnicodeBlock("BRAHMI");
2326 
2327         /**
2328          * Constant for the "Kaithi" Unicode character block.
2329          * @since 1.7
2330          */
2331         public static final UnicodeBlock KAITHI =
2332             new UnicodeBlock("KAITHI");
2333 
2334         /**
2335          * Constant for the "Cuneiform" Unicode character block.
2336          * @since 1.7
2337          */
2338         public static final UnicodeBlock CUNEIFORM =
2339             new UnicodeBlock("CUNEIFORM");
2340 
2341         /**
2342          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2343          * character block.
2344          * @since 1.7
2345          */
2346         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2347             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2348                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2349                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2350 
2351         /**
2352          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2353          * @since 1.7
2354          */
2355         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2356             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2357                              "EGYPTIAN HIEROGLYPHS",
2358                              "EGYPTIANHIEROGLYPHS");
2359 
2360         /**
2361          * Constant for the "Bamum Supplement" Unicode character block.
2362          * @since 1.7
2363          */
2364         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2365             new UnicodeBlock("BAMUM_SUPPLEMENT",
2366                              "BAMUM SUPPLEMENT",
2367                              "BAMUMSUPPLEMENT");
2368 
2369         /**
2370          * Constant for the "Kana Supplement" Unicode character block.
2371          * @since 1.7
2372          */
2373         public static final UnicodeBlock KANA_SUPPLEMENT =
2374             new UnicodeBlock("KANA_SUPPLEMENT",
2375                              "KANA SUPPLEMENT",
2376                              "KANASUPPLEMENT");
2377 
2378         /**
2379          * Constant for the "Ancient Greek Musical Notation" Unicode character
2380          * block.
2381          * @since 1.7
2382          */
2383         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2384             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2385                              "ANCIENT GREEK MUSICAL NOTATION",
2386                              "ANCIENTGREEKMUSICALNOTATION");
2387 
2388         /**
2389          * Constant for the "Counting Rod Numerals" Unicode character block.
2390          * @since 1.7
2391          */
2392         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2393             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2394                              "COUNTING ROD NUMERALS",
2395                              "COUNTINGRODNUMERALS");
2396 
2397         /**
2398          * Constant for the "Mahjong Tiles" Unicode character block.
2399          * @since 1.7
2400          */
2401         public static final UnicodeBlock MAHJONG_TILES =
2402             new UnicodeBlock("MAHJONG_TILES",
2403                              "MAHJONG TILES",
2404                              "MAHJONGTILES");
2405 
2406         /**
2407          * Constant for the "Domino Tiles" Unicode character block.
2408          * @since 1.7
2409          */
2410         public static final UnicodeBlock DOMINO_TILES =
2411             new UnicodeBlock("DOMINO_TILES",
2412                              "DOMINO TILES",
2413                              "DOMINOTILES");
2414 
2415         /**
2416          * Constant for the "Playing Cards" Unicode character block.
2417          * @since 1.7
2418          */
2419         public static final UnicodeBlock PLAYING_CARDS =
2420             new UnicodeBlock("PLAYING_CARDS",
2421                              "PLAYING CARDS",
2422                              "PLAYINGCARDS");
2423 
2424         /**
2425          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2426          * block.
2427          * @since 1.7
2428          */
2429         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2430             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2431                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2432                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2433 
2434         /**
2435          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2436          * block.
2437          * @since 1.7
2438          */
2439         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2440             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2441                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2442                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2443 
2444         /**
2445          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2446          * character block.
2447          * @since 1.7
2448          */
2449         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2450             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2451                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2452                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2453 
2454         /**
2455          * Constant for the "Emoticons" Unicode character block.
2456          * @since 1.7
2457          */
2458         public static final UnicodeBlock EMOTICONS =
2459             new UnicodeBlock("EMOTICONS");
2460 
2461         /**
2462          * Constant for the "Transport And Map Symbols" Unicode character block.
2463          * @since 1.7
2464          */
2465         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2466             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2467                              "TRANSPORT AND MAP SYMBOLS",
2468                              "TRANSPORTANDMAPSYMBOLS");
2469 
2470         /**
2471          * Constant for the "Alchemical Symbols" Unicode character block.
2472          * @since 1.7
2473          */
2474         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2475             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2476                              "ALCHEMICAL SYMBOLS",
2477                              "ALCHEMICALSYMBOLS");
2478 
2479         /**
2480          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2481          * character block.
2482          * @since 1.7
2483          */
2484         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2485             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2486                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2487                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2488 
2489         /**
2490          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2491          * character block.
2492          * @since 1.7
2493          */
2494         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2495             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2496                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2497                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2498 
2499         /**
2500          * Constant for the "Arabic Extended-A" Unicode character block.
2501          * @since 1.8
2502          */
2503         public static final UnicodeBlock ARABIC_EXTENDED_A =
2504             new UnicodeBlock("ARABIC_EXTENDED_A",
2505                              "ARABIC EXTENDED-A",
2506                              "ARABICEXTENDED-A");
2507 
2508         /**
2509          * Constant for the "Sundanese Supplement" Unicode character block.
2510          * @since 1.8
2511          */
2512         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2513             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2514                              "SUNDANESE SUPPLEMENT",
2515                              "SUNDANESESUPPLEMENT");
2516 
2517         /**
2518          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2519          * @since 1.8
2520          */
2521         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2522             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2523                              "MEETEI MAYEK EXTENSIONS",
2524                              "MEETEIMAYEKEXTENSIONS");
2525 
2526         /**
2527          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2528          * @since 1.8
2529          */
2530         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2531             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2532                              "MEROITIC HIEROGLYPHS",
2533                              "MEROITICHIEROGLYPHS");
2534 
2535         /**
2536          * Constant for the "Meroitic Cursive" Unicode character block.
2537          * @since 1.8
2538          */
2539         public static final UnicodeBlock MEROITIC_CURSIVE =
2540             new UnicodeBlock("MEROITIC_CURSIVE",
2541                              "MEROITIC CURSIVE",
2542                              "MEROITICCURSIVE");
2543 
2544         /**
2545          * Constant for the "Sora Sompeng" Unicode character block.
2546          * @since 1.8
2547          */
2548         public static final UnicodeBlock SORA_SOMPENG =
2549             new UnicodeBlock("SORA_SOMPENG",
2550                              "SORA SOMPENG",
2551                              "SORASOMPENG");
2552 
2553         /**
2554          * Constant for the "Chakma" Unicode character block.
2555          * @since 1.8
2556          */
2557         public static final UnicodeBlock CHAKMA =
2558             new UnicodeBlock("CHAKMA");
2559 
2560         /**
2561          * Constant for the "Sharada" Unicode character block.
2562          * @since 1.8
2563          */
2564         public static final UnicodeBlock SHARADA =
2565             new UnicodeBlock("SHARADA");
2566 
2567         /**
2568          * Constant for the "Takri" Unicode character block.
2569          * @since 1.8
2570          */
2571         public static final UnicodeBlock TAKRI =
2572             new UnicodeBlock("TAKRI");
2573 
2574         /**
2575          * Constant for the "Miao" Unicode character block.
2576          * @since 1.8
2577          */
2578         public static final UnicodeBlock MIAO =
2579             new UnicodeBlock("MIAO");
2580 
2581         /**
2582          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2583          * character block.
2584          * @since 1.8
2585          */
2586         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2587             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2588                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2589                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2590 
2591         /**
2592          * Constant for the "Combining Diacritical Marks Extended" Unicode
2593          * character block.
2594          * @since 9
2595          */
2596         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2597             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2598                              "COMBINING DIACRITICAL MARKS EXTENDED",
2599                              "COMBININGDIACRITICALMARKSEXTENDED");
2600 
2601         /**
2602          * Constant for the "Myanmar Extended-B" Unicode character block.
2603          * @since 9
2604          */
2605         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2606             new UnicodeBlock("MYANMAR_EXTENDED_B",
2607                              "MYANMAR EXTENDED-B",
2608                              "MYANMAREXTENDED-B");
2609 
2610         /**
2611          * Constant for the "Latin Extended-E" Unicode character block.
2612          * @since 9
2613          */
2614         public static final UnicodeBlock LATIN_EXTENDED_E =
2615             new UnicodeBlock("LATIN_EXTENDED_E",
2616                              "LATIN EXTENDED-E",
2617                              "LATINEXTENDED-E");
2618 
2619         /**
2620          * Constant for the "Coptic Epact Numbers" Unicode character block.
2621          * @since 9
2622          */
2623         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2624             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2625                              "COPTIC EPACT NUMBERS",
2626                              "COPTICEPACTNUMBERS");
2627 
2628         /**
2629          * Constant for the "Old Permic" Unicode character block.
2630          * @since 9
2631          */
2632         public static final UnicodeBlock OLD_PERMIC =
2633             new UnicodeBlock("OLD_PERMIC",
2634                              "OLD PERMIC",
2635                              "OLDPERMIC");
2636 
2637         /**
2638          * Constant for the "Elbasan" Unicode character block.
2639          * @since 9
2640          */
2641         public static final UnicodeBlock ELBASAN =
2642             new UnicodeBlock("ELBASAN");
2643 
2644         /**
2645          * Constant for the "Caucasian Albanian" Unicode character block.
2646          * @since 9
2647          */
2648         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2649             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2650                              "CAUCASIAN ALBANIAN",
2651                              "CAUCASIANALBANIAN");
2652 
2653         /**
2654          * Constant for the "Linear A" Unicode character block.
2655          * @since 9
2656          */
2657         public static final UnicodeBlock LINEAR_A =
2658             new UnicodeBlock("LINEAR_A",
2659                              "LINEAR A",
2660                              "LINEARA");
2661 
2662         /**
2663          * Constant for the "Palmyrene" Unicode character block.
2664          * @since 9
2665          */
2666         public static final UnicodeBlock PALMYRENE =
2667             new UnicodeBlock("PALMYRENE");
2668 
2669         /**
2670          * Constant for the "Nabataean" Unicode character block.
2671          * @since 9
2672          */
2673         public static final UnicodeBlock NABATAEAN =
2674             new UnicodeBlock("NABATAEAN");
2675 
2676         /**
2677          * Constant for the "Old North Arabian" Unicode character block.
2678          * @since 9
2679          */
2680         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2681             new UnicodeBlock("OLD_NORTH_ARABIAN",
2682                              "OLD NORTH ARABIAN",
2683                              "OLDNORTHARABIAN");
2684 
2685         /**
2686          * Constant for the "Manichaean" Unicode character block.
2687          * @since 9
2688          */
2689         public static final UnicodeBlock MANICHAEAN =
2690             new UnicodeBlock("MANICHAEAN");
2691 
2692         /**
2693          * Constant for the "Psalter Pahlavi" Unicode character block.
2694          * @since 9
2695          */
2696         public static final UnicodeBlock PSALTER_PAHLAVI =
2697             new UnicodeBlock("PSALTER_PAHLAVI",
2698                              "PSALTER PAHLAVI",
2699                              "PSALTERPAHLAVI");
2700 
2701         /**
2702          * Constant for the "Mahajani" Unicode character block.
2703          * @since 9
2704          */
2705         public static final UnicodeBlock MAHAJANI =
2706             new UnicodeBlock("MAHAJANI");
2707 
2708         /**
2709          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2710          * @since 9
2711          */
2712         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2713             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2714                              "SINHALA ARCHAIC NUMBERS",
2715                              "SINHALAARCHAICNUMBERS");
2716 
2717         /**
2718          * Constant for the "Khojki" Unicode character block.
2719          * @since 9
2720          */
2721         public static final UnicodeBlock KHOJKI =
2722             new UnicodeBlock("KHOJKI");
2723 
2724         /**
2725          * Constant for the "Khudawadi" Unicode character block.
2726          * @since 9
2727          */
2728         public static final UnicodeBlock KHUDAWADI =
2729             new UnicodeBlock("KHUDAWADI");
2730 
2731         /**
2732          * Constant for the "Grantha" Unicode character block.
2733          * @since 9
2734          */
2735         public static final UnicodeBlock GRANTHA =
2736             new UnicodeBlock("GRANTHA");
2737 
2738         /**
2739          * Constant for the "Tirhuta" Unicode character block.
2740          * @since 9
2741          */
2742         public static final UnicodeBlock TIRHUTA =
2743             new UnicodeBlock("TIRHUTA");
2744 
2745         /**
2746          * Constant for the "Siddham" Unicode character block.
2747          * @since 9
2748          */
2749         public static final UnicodeBlock SIDDHAM =
2750             new UnicodeBlock("SIDDHAM");
2751 
2752         /**
2753          * Constant for the "Modi" Unicode character block.
2754          * @since 9
2755          */
2756         public static final UnicodeBlock MODI =
2757             new UnicodeBlock("MODI");
2758 
2759         /**
2760          * Constant for the "Warang Citi" Unicode character block.
2761          * @since 9
2762          */
2763         public static final UnicodeBlock WARANG_CITI =
2764             new UnicodeBlock("WARANG_CITI",
2765                              "WARANG CITI",
2766                              "WARANGCITI");
2767 
2768         /**
2769          * Constant for the "Pau Cin Hau" Unicode character block.
2770          * @since 9
2771          */
2772         public static final UnicodeBlock PAU_CIN_HAU =
2773             new UnicodeBlock("PAU_CIN_HAU",
2774                              "PAU CIN HAU",
2775                              "PAUCINHAU");
2776 
2777         /**
2778          * Constant for the "Mro" Unicode character block.
2779          * @since 9
2780          */
2781         public static final UnicodeBlock MRO =
2782             new UnicodeBlock("MRO");
2783 
2784         /**
2785          * Constant for the "Bassa Vah" Unicode character block.
2786          * @since 9
2787          */
2788         public static final UnicodeBlock BASSA_VAH =
2789             new UnicodeBlock("BASSA_VAH",
2790                              "BASSA VAH",
2791                              "BASSAVAH");
2792 
2793         /**
2794          * Constant for the "Pahawh Hmong" Unicode character block.
2795          * @since 9
2796          */
2797         public static final UnicodeBlock PAHAWH_HMONG =
2798             new UnicodeBlock("PAHAWH_HMONG",
2799                              "PAHAWH HMONG",
2800                              "PAHAWHHMONG");
2801 
2802         /**
2803          * Constant for the "Duployan" Unicode character block.
2804          * @since 9
2805          */
2806         public static final UnicodeBlock DUPLOYAN =
2807             new UnicodeBlock("DUPLOYAN");
2808 
2809         /**
2810          * Constant for the "Shorthand Format Controls" Unicode character block.
2811          * @since 9
2812          */
2813         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2814             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2815                              "SHORTHAND FORMAT CONTROLS",
2816                              "SHORTHANDFORMATCONTROLS");
2817 
2818         /**
2819          * Constant for the "Mende Kikakui" Unicode character block.
2820          * @since 9
2821          */
2822         public static final UnicodeBlock MENDE_KIKAKUI =
2823             new UnicodeBlock("MENDE_KIKAKUI",
2824                              "MENDE KIKAKUI",
2825                              "MENDEKIKAKUI");
2826 
2827         /**
2828          * Constant for the "Ornamental Dingbats" Unicode character block.
2829          * @since 9
2830          */
2831         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2832             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2833                              "ORNAMENTAL DINGBATS",
2834                              "ORNAMENTALDINGBATS");
2835 
2836         /**
2837          * Constant for the "Geometric Shapes Extended" Unicode character block.
2838          * @since 9
2839          */
2840         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2841             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2842                              "GEOMETRIC SHAPES EXTENDED",
2843                              "GEOMETRICSHAPESEXTENDED");
2844 
2845         /**
2846          * Constant for the "Supplemental Arrows-C" Unicode character block.
2847          * @since 9
2848          */
2849         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2850             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2851                              "SUPPLEMENTAL ARROWS-C",
2852                              "SUPPLEMENTALARROWS-C");
2853 
2854         /**
2855          * Constant for the "Cherokee Supplement" Unicode character block.
2856          * @since 9
2857          */
2858         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2859             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2860                              "CHEROKEE SUPPLEMENT",
2861                              "CHEROKEESUPPLEMENT");
2862 
2863         /**
2864          * Constant for the "Hatran" Unicode character block.
2865          * @since 9
2866          */
2867         public static final UnicodeBlock HATRAN =
2868             new UnicodeBlock("HATRAN");
2869 
2870         /**
2871          * Constant for the "Old Hungarian" Unicode character block.
2872          * @since 9
2873          */
2874         public static final UnicodeBlock OLD_HUNGARIAN =
2875             new UnicodeBlock("OLD_HUNGARIAN",
2876                              "OLD HUNGARIAN",
2877                              "OLDHUNGARIAN");
2878 
2879         /**
2880          * Constant for the "Multani" Unicode character block.
2881          * @since 9
2882          */
2883         public static final UnicodeBlock MULTANI =
2884             new UnicodeBlock("MULTANI");
2885 
2886         /**
2887          * Constant for the "Ahom" Unicode character block.
2888          * @since 9
2889          */
2890         public static final UnicodeBlock AHOM =
2891             new UnicodeBlock("AHOM");
2892 
2893         /**
2894          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2895          * @since 9
2896          */
2897         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2898             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2899                              "EARLY DYNASTIC CUNEIFORM",
2900                              "EARLYDYNASTICCUNEIFORM");
2901 
2902         /**
2903          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2904          * @since 9
2905          */
2906         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2907             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2908                              "ANATOLIAN HIEROGLYPHS",
2909                              "ANATOLIANHIEROGLYPHS");
2910 
2911         /**
2912          * Constant for the "Sutton SignWriting" Unicode character block.
2913          * @since 9
2914          */
2915         public static final UnicodeBlock SUTTON_SIGNWRITING =
2916             new UnicodeBlock("SUTTON_SIGNWRITING",
2917                              "SUTTON SIGNWRITING",
2918                              "SUTTONSIGNWRITING");
2919 
2920         /**
2921          * Constant for the "Supplemental Symbols and Pictographs" Unicode
2922          * character block.
2923          * @since 9
2924          */
2925         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2926             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2927                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2928                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2929 
2930         /**
2931          * Constant for the "CJK Unified Ideographs Extension E" Unicode
2932          * character block.
2933          * @since 9
2934          */
2935         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2936             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2937                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2938                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2939 
2940         /**
2941          * Constant for the "Syriac Supplement" Unicode
2942          * character block.
2943          * @since 11
2944          */
2945         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2946             new UnicodeBlock("SYRIAC_SUPPLEMENT",
2947                              "SYRIAC SUPPLEMENT",
2948                              "SYRIACSUPPLEMENT");
2949 
2950         /**
2951          * Constant for the "Cyrillic Extended-C" Unicode
2952          * character block.
2953          * @since 11
2954          */
2955         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2956             new UnicodeBlock("CYRILLIC_EXTENDED_C",
2957                              "CYRILLIC EXTENDED-C",
2958                              "CYRILLICEXTENDED-C");
2959 
2960         /**
2961          * Constant for the "Osage" Unicode
2962          * character block.
2963          * @since 11
2964          */
2965         public static final UnicodeBlock OSAGE =
2966             new UnicodeBlock("OSAGE");
2967 
2968         /**
2969          * Constant for the "Newa" Unicode
2970          * character block.
2971          * @since 11
2972          */
2973         public static final UnicodeBlock NEWA =
2974             new UnicodeBlock("NEWA");
2975 
2976         /**
2977          * Constant for the "Mongolian Supplement" Unicode
2978          * character block.
2979          * @since 11
2980          */
2981         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2982             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2983                              "MONGOLIAN SUPPLEMENT",
2984                              "MONGOLIANSUPPLEMENT");
2985 
2986         /**
2987          * Constant for the "Marchen" Unicode
2988          * character block.
2989          * @since 11
2990          */
2991         public static final UnicodeBlock MARCHEN =
2992             new UnicodeBlock("MARCHEN");
2993 
2994         /**
2995          * Constant for the "Ideographic Symbols and Punctuation" Unicode
2996          * character block.
2997          * @since 11
2998          */
2999         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3000             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3001                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3002                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3003 
3004         /**
3005          * Constant for the "Tangut" Unicode
3006          * character block.
3007          * @since 11
3008          */
3009         public static final UnicodeBlock TANGUT =
3010             new UnicodeBlock("TANGUT");
3011 
3012         /**
3013          * Constant for the "Tangut Components" Unicode
3014          * character block.
3015          * @since 11
3016          */
3017         public static final UnicodeBlock TANGUT_COMPONENTS =
3018             new UnicodeBlock("TANGUT_COMPONENTS",
3019                              "TANGUT COMPONENTS",
3020                              "TANGUTCOMPONENTS");
3021 
3022         /**
3023          * Constant for the "Kana Extended-A" Unicode
3024          * character block.
3025          * @since 11
3026          */
3027         public static final UnicodeBlock KANA_EXTENDED_A =
3028             new UnicodeBlock("KANA_EXTENDED_A",
3029                              "KANA EXTENDED-A",
3030                              "KANAEXTENDED-A");
3031         /**
3032          * Constant for the "Glagolitic Supplement" Unicode
3033          * character block.
3034          * @since 11
3035          */
3036         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3037             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3038                              "GLAGOLITIC SUPPLEMENT",
3039                              "GLAGOLITICSUPPLEMENT");
3040         /**
3041          * Constant for the "Adlam" Unicode
3042          * character block.
3043          * @since 11
3044          */
3045         public static final UnicodeBlock ADLAM =
3046             new UnicodeBlock("ADLAM");
3047 
3048         /**
3049          * Constant for the "Masaram Gondi" Unicode
3050          * character block.
3051          * @since 11
3052          */
3053         public static final UnicodeBlock MASARAM_GONDI =
3054             new UnicodeBlock("MASARAM_GONDI",
3055                              "MASARAM GONDI",
3056                              "MASARAMGONDI");
3057 
3058         /**
3059          * Constant for the "Zanabazar Square" Unicode
3060          * character block.
3061          * @since 11
3062          */
3063         public static final UnicodeBlock ZANABAZAR_SQUARE =
3064             new UnicodeBlock("ZANABAZAR_SQUARE",
3065                              "ZANABAZAR SQUARE",
3066                              "ZANABAZARSQUARE");
3067 
3068         /**
3069          * Constant for the "Nushu" Unicode
3070          * character block.
3071          * @since 11
3072          */
3073         public static final UnicodeBlock NUSHU =
3074             new UnicodeBlock("NUSHU");
3075 
3076         /**
3077          * Constant for the "Soyombo" Unicode
3078          * character block.
3079          * @since 11
3080          */
3081         public static final UnicodeBlock SOYOMBO =
3082             new UnicodeBlock("SOYOMBO");
3083 
3084         /**
3085          * Constant for the "Bhaiksuki" Unicode
3086          * character block.
3087          * @since 11
3088          */
3089         public static final UnicodeBlock BHAIKSUKI =
3090             new UnicodeBlock("BHAIKSUKI");
3091 
3092         /**
3093          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3094          * character block.
3095          * @since 11
3096          */
3097         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3098             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3099                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3100                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3101 
3102         private static final int blockStarts[] = {
3103             0x0000,   // 0000..007F; Basic Latin
3104             0x0080,   // 0080..00FF; Latin-1 Supplement
3105             0x0100,   // 0100..017F; Latin Extended-A
3106             0x0180,   // 0180..024F; Latin Extended-B
3107             0x0250,   // 0250..02AF; IPA Extensions
3108             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3109             0x0300,   // 0300..036F; Combining Diacritical Marks
3110             0x0370,   // 0370..03FF; Greek and Coptic
3111             0x0400,   // 0400..04FF; Cyrillic
3112             0x0500,   // 0500..052F; Cyrillic Supplement
3113             0x0530,   // 0530..058F; Armenian
3114             0x0590,   // 0590..05FF; Hebrew
3115             0x0600,   // 0600..06FF; Arabic
3116             0x0700,   // 0700..074F; Syriac
3117             0x0750,   // 0750..077F; Arabic Supplement
3118             0x0780,   // 0780..07BF; Thaana
3119             0x07C0,   // 07C0..07FF; NKo
3120             0x0800,   // 0800..083F; Samaritan
3121             0x0840,   // 0840..085F; Mandaic
3122             0x0860,   // 0860..086F; Syriac Supplement
3123             0x0870,   //             unassigned
3124             0x08A0,   // 08A0..08FF; Arabic Extended-A
3125             0x0900,   // 0900..097F; Devanagari
3126             0x0980,   // 0980..09FF; Bengali
3127             0x0A00,   // 0A00..0A7F; Gurmukhi
3128             0x0A80,   // 0A80..0AFF; Gujarati
3129             0x0B00,   // 0B00..0B7F; Oriya
3130             0x0B80,   // 0B80..0BFF; Tamil
3131             0x0C00,   // 0C00..0C7F; Telugu
3132             0x0C80,   // 0C80..0CFF; Kannada
3133             0x0D00,   // 0D00..0D7F; Malayalam
3134             0x0D80,   // 0D80..0DFF; Sinhala
3135             0x0E00,   // 0E00..0E7F; Thai
3136             0x0E80,   // 0E80..0EFF; Lao
3137             0x0F00,   // 0F00..0FFF; Tibetan
3138             0x1000,   // 1000..109F; Myanmar
3139             0x10A0,   // 10A0..10FF; Georgian
3140             0x1100,   // 1100..11FF; Hangul Jamo
3141             0x1200,   // 1200..137F; Ethiopic
3142             0x1380,   // 1380..139F; Ethiopic Supplement
3143             0x13A0,   // 13A0..13FF; Cherokee
3144             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3145             0x1680,   // 1680..169F; Ogham
3146             0x16A0,   // 16A0..16FF; Runic
3147             0x1700,   // 1700..171F; Tagalog
3148             0x1720,   // 1720..173F; Hanunoo
3149             0x1740,   // 1740..175F; Buhid
3150             0x1760,   // 1760..177F; Tagbanwa
3151             0x1780,   // 1780..17FF; Khmer
3152             0x1800,   // 1800..18AF; Mongolian
3153             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3154             0x1900,   // 1900..194F; Limbu
3155             0x1950,   // 1950..197F; Tai Le
3156             0x1980,   // 1980..19DF; New Tai Lue
3157             0x19E0,   // 19E0..19FF; Khmer Symbols
3158             0x1A00,   // 1A00..1A1F; Buginese
3159             0x1A20,   // 1A20..1AAF; Tai Tham
3160             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3161             0x1B00,   // 1B00..1B7F; Balinese
3162             0x1B80,   // 1B80..1BBF; Sundanese
3163             0x1BC0,   // 1BC0..1BFF; Batak
3164             0x1C00,   // 1C00..1C4F; Lepcha
3165             0x1C50,   // 1C50..1C7F; Ol Chiki
3166             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3167             0x1C90,   //             unassigned
3168             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3169             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3170             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3171             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3172             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3173             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3174             0x1F00,   // 1F00..1FFF; Greek Extended
3175             0x2000,   // 2000..206F; General Punctuation
3176             0x2070,   // 2070..209F; Superscripts and Subscripts
3177             0x20A0,   // 20A0..20CF; Currency Symbols
3178             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3179             0x2100,   // 2100..214F; Letterlike Symbols
3180             0x2150,   // 2150..218F; Number Forms
3181             0x2190,   // 2190..21FF; Arrows
3182             0x2200,   // 2200..22FF; Mathematical Operators
3183             0x2300,   // 2300..23FF; Miscellaneous Technical
3184             0x2400,   // 2400..243F; Control Pictures
3185             0x2440,   // 2440..245F; Optical Character Recognition
3186             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3187             0x2500,   // 2500..257F; Box Drawing
3188             0x2580,   // 2580..259F; Block Elements
3189             0x25A0,   // 25A0..25FF; Geometric Shapes
3190             0x2600,   // 2600..26FF; Miscellaneous Symbols
3191             0x2700,   // 2700..27BF; Dingbats
3192             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3193             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3194             0x2800,   // 2800..28FF; Braille Patterns
3195             0x2900,   // 2900..297F; Supplemental Arrows-B
3196             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3197             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3198             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3199             0x2C00,   // 2C00..2C5F; Glagolitic
3200             0x2C60,   // 2C60..2C7F; Latin Extended-C
3201             0x2C80,   // 2C80..2CFF; Coptic
3202             0x2D00,   // 2D00..2D2F; Georgian Supplement
3203             0x2D30,   // 2D30..2D7F; Tifinagh
3204             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3205             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3206             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3207             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3208             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3209             0x2FE0,   //             unassigned
3210             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3211             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3212             0x3040,   // 3040..309F; Hiragana
3213             0x30A0,   // 30A0..30FF; Katakana
3214             0x3100,   // 3100..312F; Bopomofo
3215             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3216             0x3190,   // 3190..319F; Kanbun
3217             0x31A0,   // 31A0..31BF; Bopomofo Extended
3218             0x31C0,   // 31C0..31EF; CJK Strokes
3219             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3220             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3221             0x3300,   // 3300..33FF; CJK Compatibility
3222             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3223             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3224             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3225             0xA000,   // A000..A48F; Yi Syllables
3226             0xA490,   // A490..A4CF; Yi Radicals
3227             0xA4D0,   // A4D0..A4FF; Lisu
3228             0xA500,   // A500..A63F; Vai
3229             0xA640,   // A640..A69F; Cyrillic Extended-B
3230             0xA6A0,   // A6A0..A6FF; Bamum
3231             0xA700,   // A700..A71F; Modifier Tone Letters
3232             0xA720,   // A720..A7FF; Latin Extended-D
3233             0xA800,   // A800..A82F; Syloti Nagri
3234             0xA830,   // A830..A83F; Common Indic Number Forms
3235             0xA840,   // A840..A87F; Phags-pa
3236             0xA880,   // A880..A8DF; Saurashtra
3237             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3238             0xA900,   // A900..A92F; Kayah Li
3239             0xA930,   // A930..A95F; Rejang
3240             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3241             0xA980,   // A980..A9DF; Javanese
3242             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3243             0xAA00,   // AA00..AA5F; Cham
3244             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3245             0xAA80,   // AA80..AADF; Tai Viet
3246             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3247             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3248             0xAB30,   // AB30..AB6F; Latin Extended-E
3249             0xAB70,   // AB70..ABBF; Cherokee Supplement
3250             0xABC0,   // ABC0..ABFF; Meetei Mayek
3251             0xAC00,   // AC00..D7AF; Hangul Syllables
3252             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3253             0xD800,   // D800..DB7F; High Surrogates
3254             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3255             0xDC00,   // DC00..DFFF; Low Surrogates
3256             0xE000,   // E000..F8FF; Private Use Area
3257             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3258             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3259             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3260             0xFE00,   // FE00..FE0F; Variation Selectors
3261             0xFE10,   // FE10..FE1F; Vertical Forms
3262             0xFE20,   // FE20..FE2F; Combining Half Marks
3263             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3264             0xFE50,   // FE50..FE6F; Small Form Variants
3265             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3266             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3267             0xFFF0,   // FFF0..FFFF; Specials
3268             0x10000,  // 10000..1007F; Linear B Syllabary
3269             0x10080,  // 10080..100FF; Linear B Ideograms
3270             0x10100,  // 10100..1013F; Aegean Numbers
3271             0x10140,  // 10140..1018F; Ancient Greek Numbers
3272             0x10190,  // 10190..101CF; Ancient Symbols
3273             0x101D0,  // 101D0..101FF; Phaistos Disc
3274             0x10200,  //               unassigned
3275             0x10280,  // 10280..1029F; Lycian
3276             0x102A0,  // 102A0..102DF; Carian
3277             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3278             0x10300,  // 10300..1032F; Old Italic
3279             0x10330,  // 10330..1034F; Gothic
3280             0x10350,  // 10350..1037F; Old Permic
3281             0x10380,  // 10380..1039F; Ugaritic
3282             0x103A0,  // 103A0..103DF; Old Persian
3283             0x103E0,  //               unassigned
3284             0x10400,  // 10400..1044F; Deseret
3285             0x10450,  // 10450..1047F; Shavian
3286             0x10480,  // 10480..104AF; Osmanya
3287             0x104B0,  // 104B0..104FF; Osage
3288             0x10500,  // 10500..1052F; Elbasan
3289             0x10530,  // 10530..1056F; Caucasian Albanian
3290             0x10570,  //               unassigned
3291             0x10600,  // 10600..1077F; Linear A
3292             0x10780,  //               unassigned
3293             0x10800,  // 10800..1083F; Cypriot Syllabary
3294             0x10840,  // 10840..1085F; Imperial Aramaic
3295             0x10860,  // 10860..1087F; Palmyrene
3296             0x10880,  // 10880..108AF; Nabataean
3297             0x108B0,  //               unassigned
3298             0x108E0,  // 108E0..108FF; Hatran
3299             0x10900,  // 10900..1091F; Phoenician
3300             0x10920,  // 10920..1093F; Lydian
3301             0x10940,  //               unassigned
3302             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3303             0x109A0,  // 109A0..109FF; Meroitic Cursive
3304             0x10A00,  // 10A00..10A5F; Kharoshthi
3305             0x10A60,  // 10A60..10A7F; Old South Arabian
3306             0x10A80,  // 10A80..10A9F; Old North Arabian
3307             0x10AA0,  //               unassigned
3308             0x10AC0,  // 10AC0..10AFF; Manichaean
3309             0x10B00,  // 10B00..10B3F; Avestan
3310             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3311             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3312             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3313             0x10BB0,  //               unassigned
3314             0x10C00,  // 10C00..10C4F; Old Turkic
3315             0x10C50,  //               unassigned
3316             0x10C80,  // 10C80..10CFF; Old Hungarian
3317             0x10D00,  //               unassigned
3318             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3319             0x10E80,  //               unassigned
3320             0x11000,  // 11000..1107F; Brahmi
3321             0x11080,  // 11080..110CF; Kaithi
3322             0x110D0,  // 110D0..110FF; Sora Sompeng
3323             0x11100,  // 11100..1114F; Chakma
3324             0x11150,  // 11150..1117F; Mahajani
3325             0x11180,  // 11180..111DF; Sharada
3326             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3327             0x11200,  // 11200..1124F; Khojki
3328             0x11250,  //               unassigned
3329             0x11280,  // 11280..112AF; Multani
3330             0x112B0,  // 112B0..112FF; Khudawadi
3331             0x11300,  // 11300..1137F; Grantha
3332             0x11380,  //               unassigned
3333             0x11400,  // 11400..1147F; Newa
3334             0x11480,  // 11480..114DF; Tirhuta
3335             0x114E0,  //               unassigned
3336             0x11580,  // 11580..115FF; Siddham
3337             0x11600,  // 11600..1165F; Modi
3338             0x11660, //  11660..1167F; Mongolian Supplement
3339             0x11680,  // 11680..116CF; Takri
3340             0x116D0,  //               unassigned
3341             0x11700,  // 11700..1173F; Ahom
3342             0x11740,  //               unassigned
3343             0x118A0,  // 118A0..118FF; Warang Citi
3344             0x11900,  //               unassigned
3345             0x11A00,  // 11A00..11A4F; Zanabazar Square
3346             0x11A50,  // 11A50..11AAF; Soyombo
3347             0x11AB0,  //               unassigned
3348             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3349             0x11B00,  //               unassigned
3350             0x11C00,  // 11C00..11C6F; Bhaiksuki
3351             0x11C70,  // 11C70..11CBF; Marchen
3352             0x11CC0,  //               unassigned
3353             0x11D00,  // 11D00..11D5F; Masaram Gondi
3354             0x11D60,  //               unassigned
3355             0x12000,  // 12000..123FF; Cuneiform
3356             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3357             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3358             0x12550,  //               unassigned
3359             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3360             0x13430,  //               unassigned
3361             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3362             0x14680,  //               unassigned
3363             0x16800,  // 16800..16A3F; Bamum Supplement
3364             0x16A40,  // 16A40..16A6F; Mro
3365             0x16A70,  //               unassigned
3366             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3367             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3368             0x16B90,  //               unassigned
3369             0x16F00,  // 16F00..16F9F; Miao
3370             0x16FA0,  //               unassigned
3371             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3372             0x17000,  // 17000..187FF; Tangut
3373             0x18800,  // 18800..18AFF; Tangut Components
3374             0x18B00,  //               unassigned
3375             0x1B000,  // 1B000..1B0FF; Kana Supplement
3376             0x1B100,  // 1B100..1B12F; Kana Extended-A
3377             0x1B130,  //               unassigned
3378             0x1B170,  // 1B170..1B2FF; Nushu
3379             0x1B300,  //               unassigned
3380             0x1BC00,  // 1BC00..1BC9F; Duployan
3381             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3382             0x1BCB0,  //               unassigned
3383             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3384             0x1D100,  // 1D100..1D1FF; Musical Symbols
3385             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3386             0x1D250,  //               unassigned
3387             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3388             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3389             0x1D380,  //               unassigned
3390             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3391             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3392             0x1DAB0,  //               unassigned
3393             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3394             0x1E030,  //               unassigned
3395             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3396             0x1E8E0,  //               unassigned
3397             0x1E900,  // 1E900..1E95F; Adlam
3398             0x1E960,  //               unassigned
3399             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3400             0x1EF00,  //               unassigned
3401             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3402             0x1F030,  // 1F030..1F09F; Domino Tiles
3403             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3404             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3405             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3406             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3407             0x1F600,  // 1F600..1F64F; Emoticons
3408             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3409             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3410             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3411             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3412             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3413             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3414             0x1FA00,  //               unassigned
3415             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3416             0x2A6E0,  //               unassigned
3417             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3418             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3419             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3420             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3421             0x2EBF0,  //               unassigned
3422             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3423             0x2FA20,  //               unassigned
3424             0xE0000,  // E0000..E007F; Tags
3425             0xE0080,  //               unassigned
3426             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3427             0xE01F0,  //               unassigned
3428             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3429             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
3430         };
3431 
3432         private static final UnicodeBlock[] blocks = {
3433             BASIC_LATIN,
3434             LATIN_1_SUPPLEMENT,
3435             LATIN_EXTENDED_A,
3436             LATIN_EXTENDED_B,
3437             IPA_EXTENSIONS,
3438             SPACING_MODIFIER_LETTERS,
3439             COMBINING_DIACRITICAL_MARKS,
3440             GREEK,
3441             CYRILLIC,
3442             CYRILLIC_SUPPLEMENTARY,
3443             ARMENIAN,
3444             HEBREW,
3445             ARABIC,
3446             SYRIAC,
3447             ARABIC_SUPPLEMENT,
3448             THAANA,
3449             NKO,
3450             SAMARITAN,
3451             MANDAIC,
3452             SYRIAC_SUPPLEMENT,
3453             null,
3454             ARABIC_EXTENDED_A,
3455             DEVANAGARI,
3456             BENGALI,
3457             GURMUKHI,
3458             GUJARATI,
3459             ORIYA,
3460             TAMIL,
3461             TELUGU,
3462             KANNADA,
3463             MALAYALAM,
3464             SINHALA,
3465             THAI,
3466             LAO,
3467             TIBETAN,
3468             MYANMAR,
3469             GEORGIAN,
3470             HANGUL_JAMO,
3471             ETHIOPIC,
3472             ETHIOPIC_SUPPLEMENT,
3473             CHEROKEE,
3474             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3475             OGHAM,
3476             RUNIC,
3477             TAGALOG,
3478             HANUNOO,
3479             BUHID,
3480             TAGBANWA,
3481             KHMER,
3482             MONGOLIAN,
3483             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3484             LIMBU,
3485             TAI_LE,
3486             NEW_TAI_LUE,
3487             KHMER_SYMBOLS,
3488             BUGINESE,
3489             TAI_THAM,
3490             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3491             BALINESE,
3492             SUNDANESE,
3493             BATAK,
3494             LEPCHA,
3495             OL_CHIKI,
3496             CYRILLIC_EXTENDED_C,
3497             null,
3498             SUNDANESE_SUPPLEMENT,
3499             VEDIC_EXTENSIONS,
3500             PHONETIC_EXTENSIONS,
3501             PHONETIC_EXTENSIONS_SUPPLEMENT,
3502             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3503             LATIN_EXTENDED_ADDITIONAL,
3504             GREEK_EXTENDED,
3505             GENERAL_PUNCTUATION,
3506             SUPERSCRIPTS_AND_SUBSCRIPTS,
3507             CURRENCY_SYMBOLS,
3508             COMBINING_MARKS_FOR_SYMBOLS,
3509             LETTERLIKE_SYMBOLS,
3510             NUMBER_FORMS,
3511             ARROWS,
3512             MATHEMATICAL_OPERATORS,
3513             MISCELLANEOUS_TECHNICAL,
3514             CONTROL_PICTURES,
3515             OPTICAL_CHARACTER_RECOGNITION,
3516             ENCLOSED_ALPHANUMERICS,
3517             BOX_DRAWING,
3518             BLOCK_ELEMENTS,
3519             GEOMETRIC_SHAPES,
3520             MISCELLANEOUS_SYMBOLS,
3521             DINGBATS,
3522             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3523             SUPPLEMENTAL_ARROWS_A,
3524             BRAILLE_PATTERNS,
3525             SUPPLEMENTAL_ARROWS_B,
3526             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3527             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3528             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3529             GLAGOLITIC,
3530             LATIN_EXTENDED_C,
3531             COPTIC,
3532             GEORGIAN_SUPPLEMENT,
3533             TIFINAGH,
3534             ETHIOPIC_EXTENDED,
3535             CYRILLIC_EXTENDED_A,
3536             SUPPLEMENTAL_PUNCTUATION,
3537             CJK_RADICALS_SUPPLEMENT,
3538             KANGXI_RADICALS,
3539             null,
3540             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3541             CJK_SYMBOLS_AND_PUNCTUATION,
3542             HIRAGANA,
3543             KATAKANA,
3544             BOPOMOFO,
3545             HANGUL_COMPATIBILITY_JAMO,
3546             KANBUN,
3547             BOPOMOFO_EXTENDED,
3548             CJK_STROKES,
3549             KATAKANA_PHONETIC_EXTENSIONS,
3550             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3551             CJK_COMPATIBILITY,
3552             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3553             YIJING_HEXAGRAM_SYMBOLS,
3554             CJK_UNIFIED_IDEOGRAPHS,
3555             YI_SYLLABLES,
3556             YI_RADICALS,
3557             LISU,
3558             VAI,
3559             CYRILLIC_EXTENDED_B,
3560             BAMUM,
3561             MODIFIER_TONE_LETTERS,
3562             LATIN_EXTENDED_D,
3563             SYLOTI_NAGRI,
3564             COMMON_INDIC_NUMBER_FORMS,
3565             PHAGS_PA,
3566             SAURASHTRA,
3567             DEVANAGARI_EXTENDED,
3568             KAYAH_LI,
3569             REJANG,
3570             HANGUL_JAMO_EXTENDED_A,
3571             JAVANESE,
3572             MYANMAR_EXTENDED_B,
3573             CHAM,
3574             MYANMAR_EXTENDED_A,
3575             TAI_VIET,
3576             MEETEI_MAYEK_EXTENSIONS,
3577             ETHIOPIC_EXTENDED_A,
3578             LATIN_EXTENDED_E,
3579             CHEROKEE_SUPPLEMENT,
3580             MEETEI_MAYEK,
3581             HANGUL_SYLLABLES,
3582             HANGUL_JAMO_EXTENDED_B,
3583             HIGH_SURROGATES,
3584             HIGH_PRIVATE_USE_SURROGATES,
3585             LOW_SURROGATES,
3586             PRIVATE_USE_AREA,
3587             CJK_COMPATIBILITY_IDEOGRAPHS,
3588             ALPHABETIC_PRESENTATION_FORMS,
3589             ARABIC_PRESENTATION_FORMS_A,
3590             VARIATION_SELECTORS,
3591             VERTICAL_FORMS,
3592             COMBINING_HALF_MARKS,
3593             CJK_COMPATIBILITY_FORMS,
3594             SMALL_FORM_VARIANTS,
3595             ARABIC_PRESENTATION_FORMS_B,
3596             HALFWIDTH_AND_FULLWIDTH_FORMS,
3597             SPECIALS,
3598             LINEAR_B_SYLLABARY,
3599             LINEAR_B_IDEOGRAMS,
3600             AEGEAN_NUMBERS,
3601             ANCIENT_GREEK_NUMBERS,
3602             ANCIENT_SYMBOLS,
3603             PHAISTOS_DISC,
3604             null,
3605             LYCIAN,
3606             CARIAN,
3607             COPTIC_EPACT_NUMBERS,
3608             OLD_ITALIC,
3609             GOTHIC,
3610             OLD_PERMIC,
3611             UGARITIC,
3612             OLD_PERSIAN,
3613             null,
3614             DESERET,
3615             SHAVIAN,
3616             OSMANYA,
3617             OSAGE,
3618             ELBASAN,
3619             CAUCASIAN_ALBANIAN,
3620             null,
3621             LINEAR_A,
3622             null,
3623             CYPRIOT_SYLLABARY,
3624             IMPERIAL_ARAMAIC,
3625             PALMYRENE,
3626             NABATAEAN,
3627             null,
3628             HATRAN,
3629             PHOENICIAN,
3630             LYDIAN,
3631             null,
3632             MEROITIC_HIEROGLYPHS,
3633             MEROITIC_CURSIVE,
3634             KHAROSHTHI,
3635             OLD_SOUTH_ARABIAN,
3636             OLD_NORTH_ARABIAN,
3637             null,
3638             MANICHAEAN,
3639             AVESTAN,
3640             INSCRIPTIONAL_PARTHIAN,
3641             INSCRIPTIONAL_PAHLAVI,
3642             PSALTER_PAHLAVI,
3643             null,
3644             OLD_TURKIC,
3645             null,
3646             OLD_HUNGARIAN,
3647             null,
3648             RUMI_NUMERAL_SYMBOLS,
3649             null,
3650             BRAHMI,
3651             KAITHI,
3652             SORA_SOMPENG,
3653             CHAKMA,
3654             MAHAJANI,
3655             SHARADA,
3656             SINHALA_ARCHAIC_NUMBERS,
3657             KHOJKI,
3658             null,
3659             MULTANI,
3660             KHUDAWADI,
3661             GRANTHA,
3662             null,
3663             NEWA,
3664             TIRHUTA,
3665             null,
3666             SIDDHAM,
3667             MODI,
3668             MONGOLIAN_SUPPLEMENT,
3669             TAKRI,
3670             null,
3671             AHOM,
3672             null,
3673             WARANG_CITI,
3674             null,
3675             ZANABAZAR_SQUARE,
3676             SOYOMBO,
3677             null,
3678             PAU_CIN_HAU,
3679             null,
3680             BHAIKSUKI,
3681             MARCHEN,
3682             null,
3683             MASARAM_GONDI,
3684             null,
3685             CUNEIFORM,
3686             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3687             EARLY_DYNASTIC_CUNEIFORM,
3688             null,
3689             EGYPTIAN_HIEROGLYPHS,
3690             null,
3691             ANATOLIAN_HIEROGLYPHS,
3692             null,
3693             BAMUM_SUPPLEMENT,
3694             MRO,
3695             null,
3696             BASSA_VAH,
3697             PAHAWH_HMONG,
3698             null,
3699             MIAO,
3700             null,
3701             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3702             TANGUT,
3703             TANGUT_COMPONENTS,
3704             null,
3705             KANA_SUPPLEMENT,
3706             KANA_EXTENDED_A,
3707             null,
3708             NUSHU,
3709             null,
3710             DUPLOYAN,
3711             SHORTHAND_FORMAT_CONTROLS,
3712             null,
3713             BYZANTINE_MUSICAL_SYMBOLS,
3714             MUSICAL_SYMBOLS,
3715             ANCIENT_GREEK_MUSICAL_NOTATION,
3716             null,
3717             TAI_XUAN_JING_SYMBOLS,
3718             COUNTING_ROD_NUMERALS,
3719             null,
3720             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3721             SUTTON_SIGNWRITING,
3722             null,
3723             GLAGOLITIC_SUPPLEMENT,
3724             null,
3725             MENDE_KIKAKUI,
3726             null,
3727             ADLAM,
3728             null,
3729             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3730             null,
3731             MAHJONG_TILES,
3732             DOMINO_TILES,
3733             PLAYING_CARDS,
3734             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3735             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3736             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3737             EMOTICONS,
3738             ORNAMENTAL_DINGBATS,
3739             TRANSPORT_AND_MAP_SYMBOLS,
3740             ALCHEMICAL_SYMBOLS,
3741             GEOMETRIC_SHAPES_EXTENDED,
3742             SUPPLEMENTAL_ARROWS_C,
3743             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3744             null,
3745             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3746             null,
3747             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3748             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3749             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3750             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3751             null,
3752             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3753             null,
3754             TAGS,
3755             null,
3756             VARIATION_SELECTORS_SUPPLEMENT,
3757             null,
3758             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3759             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3760         };
3761 
3762 
3763         /**
3764          * Returns the object representing the Unicode block containing the
3765          * given character, or {@code null} if the character is not a
3766          * member of a defined block.
3767          *
3768          * <p><b>Note:</b> This method cannot handle
3769          * <a href="Character.html#supplementary"> supplementary
3770          * characters</a>.  To support all Unicode characters, including
3771          * supplementary characters, use the {@link #of(int)} method.
3772          *
3773          * @param   c  The character in question
3774          * @return  The {@code UnicodeBlock} instance representing the
3775          *          Unicode block of which this character is a member, or
3776          *          {@code null} if the character is not a member of any
3777          *          Unicode block
3778          */
3779         public static UnicodeBlock of(char c) {
3780             return of((int)c);
3781         }
3782 
3783         /**
3784          * Returns the object representing the Unicode block
3785          * containing the given character (Unicode code point), or
3786          * {@code null} if the character is not a member of a
3787          * defined block.
3788          *
3789          * @param   codePoint the character (Unicode code point) in question.
3790          * @return  The {@code UnicodeBlock} instance representing the
3791          *          Unicode block of which this character is a member, or
3792          *          {@code null} if the character is not a member of any
3793          *          Unicode block
3794          * @throws  IllegalArgumentException if the specified
3795          * {@code codePoint} is an invalid Unicode code point.
3796          * @see Character#isValidCodePoint(int)
3797          * @since   1.5
3798          */
3799         public static UnicodeBlock of(int codePoint) {
3800             if (!isValidCodePoint(codePoint)) {
3801                 throw new IllegalArgumentException(
3802                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
3803             }
3804 
3805             int top, bottom, current;
3806             bottom = 0;
3807             top = blockStarts.length;
3808             current = top/2;
3809 
3810             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3811             while (top - bottom > 1) {
3812                 if (codePoint >= blockStarts[current]) {
3813                     bottom = current;
3814                 } else {
3815                     top = current;
3816                 }
3817                 current = (top + bottom) / 2;
3818             }
3819             return blocks[current];
3820         }
3821 
3822         /**
3823          * Returns the UnicodeBlock with the given name. Block
3824          * names are determined by The Unicode Standard. The file
3825          * {@code Blocks-<version>.txt} defines blocks for a particular
3826          * version of the standard. The {@link Character} class specifies
3827          * the version of the standard that it supports.
3828          * <p>
3829          * This method accepts block names in the following forms:
3830          * <ol>
3831          * <li> Canonical block names as defined by the Unicode Standard.
3832          * For example, the standard defines a "Basic Latin" block. Therefore, this
3833          * method accepts "Basic Latin" as a valid block name. The documentation of
3834          * each UnicodeBlock provides the canonical name.
3835          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3836          * is a valid block name for the "Basic Latin" block.
3837          * <li>The text representation of each constant UnicodeBlock identifier.
3838          * For example, this method will return the {@link #BASIC_LATIN} block if
3839          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3840          * hyphens in the canonical name with underscores.
3841          * </ol>
3842          * Finally, character case is ignored for all of the valid block name forms.
3843          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3844          * The en_US locale's case mapping rules are used to provide case-insensitive
3845          * string comparisons for block name validation.
3846          * <p>
3847          * If the Unicode Standard changes block names, both the previous and
3848          * current names will be accepted.
3849          *
3850          * @param blockName A {@code UnicodeBlock} name.
3851          * @return The {@code UnicodeBlock} instance identified
3852          *         by {@code blockName}
3853          * @throws IllegalArgumentException if {@code blockName} is an
3854          *         invalid name
3855          * @throws NullPointerException if {@code blockName} is null
3856          * @since 1.5
3857          */
3858         public static final UnicodeBlock forName(String blockName) {
3859             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3860             if (block == null) {
3861                 throw new IllegalArgumentException("Not a valid block name: "
3862                             + blockName);
3863             }
3864             return block;
3865         }
3866     }
3867 
3868 
3869     /**
3870      * A family of character subsets representing the character scripts
3871      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3872      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3873      * character is assigned to a single Unicode script, either a specific
3874      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3875      * one of the following three special values,
3876      * {@link Character.UnicodeScript#INHERITED Inherited},
3877      * {@link Character.UnicodeScript#COMMON Common} or
3878      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3879      *
3880      * @since 1.7
3881      */
3882     public static enum UnicodeScript {
3883         /**
3884          * Unicode script "Common".
3885          */
3886         COMMON,
3887 
3888         /**
3889          * Unicode script "Latin".
3890          */
3891         LATIN,
3892 
3893         /**
3894          * Unicode script "Greek".
3895          */
3896         GREEK,
3897 
3898         /**
3899          * Unicode script "Cyrillic".
3900          */
3901         CYRILLIC,
3902 
3903         /**
3904          * Unicode script "Armenian".
3905          */
3906         ARMENIAN,
3907 
3908         /**
3909          * Unicode script "Hebrew".
3910          */
3911         HEBREW,
3912 
3913         /**
3914          * Unicode script "Arabic".
3915          */
3916         ARABIC,
3917 
3918         /**
3919          * Unicode script "Syriac".
3920          */
3921         SYRIAC,
3922 
3923         /**
3924          * Unicode script "Thaana".
3925          */
3926         THAANA,
3927 
3928         /**
3929          * Unicode script "Devanagari".
3930          */
3931         DEVANAGARI,
3932 
3933         /**
3934          * Unicode script "Bengali".
3935          */
3936         BENGALI,
3937 
3938         /**
3939          * Unicode script "Gurmukhi".
3940          */
3941         GURMUKHI,
3942 
3943         /**
3944          * Unicode script "Gujarati".
3945          */
3946         GUJARATI,
3947 
3948         /**
3949          * Unicode script "Oriya".
3950          */
3951         ORIYA,
3952 
3953         /**
3954          * Unicode script "Tamil".
3955          */
3956         TAMIL,
3957 
3958         /**
3959          * Unicode script "Telugu".
3960          */
3961         TELUGU,
3962 
3963         /**
3964          * Unicode script "Kannada".
3965          */
3966         KANNADA,
3967 
3968         /**
3969          * Unicode script "Malayalam".
3970          */
3971         MALAYALAM,
3972 
3973         /**
3974          * Unicode script "Sinhala".
3975          */
3976         SINHALA,
3977 
3978         /**
3979          * Unicode script "Thai".
3980          */
3981         THAI,
3982 
3983         /**
3984          * Unicode script "Lao".
3985          */
3986         LAO,
3987 
3988         /**
3989          * Unicode script "Tibetan".
3990          */
3991         TIBETAN,
3992 
3993         /**
3994          * Unicode script "Myanmar".
3995          */
3996         MYANMAR,
3997 
3998         /**
3999          * Unicode script "Georgian".
4000          */
4001         GEORGIAN,
4002 
4003         /**
4004          * Unicode script "Hangul".
4005          */
4006         HANGUL,
4007 
4008         /**
4009          * Unicode script "Ethiopic".
4010          */
4011         ETHIOPIC,
4012 
4013         /**
4014          * Unicode script "Cherokee".
4015          */
4016         CHEROKEE,
4017 
4018         /**
4019          * Unicode script "Canadian_Aboriginal".
4020          */
4021         CANADIAN_ABORIGINAL,
4022 
4023         /**
4024          * Unicode script "Ogham".
4025          */
4026         OGHAM,
4027 
4028         /**
4029          * Unicode script "Runic".
4030          */
4031         RUNIC,
4032 
4033         /**
4034          * Unicode script "Khmer".
4035          */
4036         KHMER,
4037 
4038         /**
4039          * Unicode script "Mongolian".
4040          */
4041         MONGOLIAN,
4042 
4043         /**
4044          * Unicode script "Hiragana".
4045          */
4046         HIRAGANA,
4047 
4048         /**
4049          * Unicode script "Katakana".
4050          */
4051         KATAKANA,
4052 
4053         /**
4054          * Unicode script "Bopomofo".
4055          */
4056         BOPOMOFO,
4057 
4058         /**
4059          * Unicode script "Han".
4060          */
4061         HAN,
4062 
4063         /**
4064          * Unicode script "Yi".
4065          */
4066         YI,
4067 
4068         /**
4069          * Unicode script "Old_Italic".
4070          */
4071         OLD_ITALIC,
4072 
4073         /**
4074          * Unicode script "Gothic".
4075          */
4076         GOTHIC,
4077 
4078         /**
4079          * Unicode script "Deseret".
4080          */
4081         DESERET,
4082 
4083         /**
4084          * Unicode script "Inherited".
4085          */
4086         INHERITED,
4087 
4088         /**
4089          * Unicode script "Tagalog".
4090          */
4091         TAGALOG,
4092 
4093         /**
4094          * Unicode script "Hanunoo".
4095          */
4096         HANUNOO,
4097 
4098         /**
4099          * Unicode script "Buhid".
4100          */
4101         BUHID,
4102 
4103         /**
4104          * Unicode script "Tagbanwa".
4105          */
4106         TAGBANWA,
4107 
4108         /**
4109          * Unicode script "Limbu".
4110          */
4111         LIMBU,
4112 
4113         /**
4114          * Unicode script "Tai_Le".
4115          */
4116         TAI_LE,
4117 
4118         /**
4119          * Unicode script "Linear_B".
4120          */
4121         LINEAR_B,
4122 
4123         /**
4124          * Unicode script "Ugaritic".
4125          */
4126         UGARITIC,
4127 
4128         /**
4129          * Unicode script "Shavian".
4130          */
4131         SHAVIAN,
4132 
4133         /**
4134          * Unicode script "Osmanya".
4135          */
4136         OSMANYA,
4137 
4138         /**
4139          * Unicode script "Cypriot".
4140          */
4141         CYPRIOT,
4142 
4143         /**
4144          * Unicode script "Braille".
4145          */
4146         BRAILLE,
4147 
4148         /**
4149          * Unicode script "Buginese".
4150          */
4151         BUGINESE,
4152 
4153         /**
4154          * Unicode script "Coptic".
4155          */
4156         COPTIC,
4157 
4158         /**
4159          * Unicode script "New_Tai_Lue".
4160          */
4161         NEW_TAI_LUE,
4162 
4163         /**
4164          * Unicode script "Glagolitic".
4165          */
4166         GLAGOLITIC,
4167 
4168         /**
4169          * Unicode script "Tifinagh".
4170          */
4171         TIFINAGH,
4172 
4173         /**
4174          * Unicode script "Syloti_Nagri".
4175          */
4176         SYLOTI_NAGRI,
4177 
4178         /**
4179          * Unicode script "Old_Persian".
4180          */
4181         OLD_PERSIAN,
4182 
4183         /**
4184          * Unicode script "Kharoshthi".
4185          */
4186         KHAROSHTHI,
4187 
4188         /**
4189          * Unicode script "Balinese".
4190          */
4191         BALINESE,
4192 
4193         /**
4194          * Unicode script "Cuneiform".
4195          */
4196         CUNEIFORM,
4197 
4198         /**
4199          * Unicode script "Phoenician".
4200          */
4201         PHOENICIAN,
4202 
4203         /**
4204          * Unicode script "Phags_Pa".
4205          */
4206         PHAGS_PA,
4207 
4208         /**
4209          * Unicode script "Nko".
4210          */
4211         NKO,
4212 
4213         /**
4214          * Unicode script "Sundanese".
4215          */
4216         SUNDANESE,
4217 
4218         /**
4219          * Unicode script "Batak".
4220          */
4221         BATAK,
4222 
4223         /**
4224          * Unicode script "Lepcha".
4225          */
4226         LEPCHA,
4227 
4228         /**
4229          * Unicode script "Ol_Chiki".
4230          */
4231         OL_CHIKI,
4232 
4233         /**
4234          * Unicode script "Vai".
4235          */
4236         VAI,
4237 
4238         /**
4239          * Unicode script "Saurashtra".
4240          */
4241         SAURASHTRA,
4242 
4243         /**
4244          * Unicode script "Kayah_Li".
4245          */
4246         KAYAH_LI,
4247 
4248         /**
4249          * Unicode script "Rejang".
4250          */
4251         REJANG,
4252 
4253         /**
4254          * Unicode script "Lycian".
4255          */
4256         LYCIAN,
4257 
4258         /**
4259          * Unicode script "Carian".
4260          */
4261         CARIAN,
4262 
4263         /**
4264          * Unicode script "Lydian".
4265          */
4266         LYDIAN,
4267 
4268         /**
4269          * Unicode script "Cham".
4270          */
4271         CHAM,
4272 
4273         /**
4274          * Unicode script "Tai_Tham".
4275          */
4276         TAI_THAM,
4277 
4278         /**
4279          * Unicode script "Tai_Viet".
4280          */
4281         TAI_VIET,
4282 
4283         /**
4284          * Unicode script "Avestan".
4285          */
4286         AVESTAN,
4287 
4288         /**
4289          * Unicode script "Egyptian_Hieroglyphs".
4290          */
4291         EGYPTIAN_HIEROGLYPHS,
4292 
4293         /**
4294          * Unicode script "Samaritan".
4295          */
4296         SAMARITAN,
4297 
4298         /**
4299          * Unicode script "Mandaic".
4300          */
4301         MANDAIC,
4302 
4303         /**
4304          * Unicode script "Lisu".
4305          */
4306         LISU,
4307 
4308         /**
4309          * Unicode script "Bamum".
4310          */
4311         BAMUM,
4312 
4313         /**
4314          * Unicode script "Javanese".
4315          */
4316         JAVANESE,
4317 
4318         /**
4319          * Unicode script "Meetei_Mayek".
4320          */
4321         MEETEI_MAYEK,
4322 
4323         /**
4324          * Unicode script "Imperial_Aramaic".
4325          */
4326         IMPERIAL_ARAMAIC,
4327 
4328         /**
4329          * Unicode script "Old_South_Arabian".
4330          */
4331         OLD_SOUTH_ARABIAN,
4332 
4333         /**
4334          * Unicode script "Inscriptional_Parthian".
4335          */
4336         INSCRIPTIONAL_PARTHIAN,
4337 
4338         /**
4339          * Unicode script "Inscriptional_Pahlavi".
4340          */
4341         INSCRIPTIONAL_PAHLAVI,
4342 
4343         /**
4344          * Unicode script "Old_Turkic".
4345          */
4346         OLD_TURKIC,
4347 
4348         /**
4349          * Unicode script "Brahmi".
4350          */
4351         BRAHMI,
4352 
4353         /**
4354          * Unicode script "Kaithi".
4355          */
4356         KAITHI,
4357 
4358         /**
4359          * Unicode script "Meroitic Hieroglyphs".
4360          * @since 1.8
4361          */
4362         MEROITIC_HIEROGLYPHS,
4363 
4364         /**
4365          * Unicode script "Meroitic Cursive".
4366          * @since 1.8
4367          */
4368         MEROITIC_CURSIVE,
4369 
4370         /**
4371          * Unicode script "Sora Sompeng".
4372          * @since 1.8
4373          */
4374         SORA_SOMPENG,
4375 
4376         /**
4377          * Unicode script "Chakma".
4378          * @since 1.8
4379          */
4380         CHAKMA,
4381 
4382         /**
4383          * Unicode script "Sharada".
4384          * @since 1.8
4385          */
4386         SHARADA,
4387 
4388         /**
4389          * Unicode script "Takri".
4390          * @since 1.8
4391          */
4392         TAKRI,
4393 
4394         /**
4395          * Unicode script "Miao".
4396          * @since 1.8
4397          */
4398         MIAO,
4399 
4400         /**
4401          * Unicode script "Caucasian Albanian".
4402          * @since 9
4403          */
4404         CAUCASIAN_ALBANIAN,
4405 
4406         /**
4407          * Unicode script "Bassa Vah".
4408          * @since 9
4409          */
4410         BASSA_VAH,
4411 
4412         /**
4413          * Unicode script "Duployan".
4414          * @since 9
4415          */
4416         DUPLOYAN,
4417 
4418         /**
4419          * Unicode script "Elbasan".
4420          * @since 9
4421          */
4422         ELBASAN,
4423 
4424         /**
4425          * Unicode script "Grantha".
4426          * @since 9
4427          */
4428         GRANTHA,
4429 
4430         /**
4431          * Unicode script "Pahawh Hmong".
4432          * @since 9
4433          */
4434         PAHAWH_HMONG,
4435 
4436         /**
4437          * Unicode script "Khojki".
4438          * @since 9
4439          */
4440         KHOJKI,
4441 
4442         /**
4443          * Unicode script "Linear A".
4444          * @since 9
4445          */
4446         LINEAR_A,
4447 
4448         /**
4449          * Unicode script "Mahajani".
4450          * @since 9
4451          */
4452         MAHAJANI,
4453 
4454         /**
4455          * Unicode script "Manichaean".
4456          * @since 9
4457          */
4458         MANICHAEAN,
4459 
4460         /**
4461          * Unicode script "Mende Kikakui".
4462          * @since 9
4463          */
4464         MENDE_KIKAKUI,
4465 
4466         /**
4467          * Unicode script "Modi".
4468          * @since 9
4469          */
4470         MODI,
4471 
4472         /**
4473          * Unicode script "Mro".
4474          * @since 9
4475          */
4476         MRO,
4477 
4478         /**
4479          * Unicode script "Old North Arabian".
4480          * @since 9
4481          */
4482         OLD_NORTH_ARABIAN,
4483 
4484         /**
4485          * Unicode script "Nabataean".
4486          * @since 9
4487          */
4488         NABATAEAN,
4489 
4490         /**
4491          * Unicode script "Palmyrene".
4492          * @since 9
4493          */
4494         PALMYRENE,
4495 
4496         /**
4497          * Unicode script "Pau Cin Hau".
4498          * @since 9
4499          */
4500         PAU_CIN_HAU,
4501 
4502         /**
4503          * Unicode script "Old Permic".
4504          * @since 9
4505          */
4506         OLD_PERMIC,
4507 
4508         /**
4509          * Unicode script "Psalter Pahlavi".
4510          * @since 9
4511          */
4512         PSALTER_PAHLAVI,
4513 
4514         /**
4515          * Unicode script "Siddham".
4516          * @since 9
4517          */
4518         SIDDHAM,
4519 
4520         /**
4521          * Unicode script "Khudawadi".
4522          * @since 9
4523          */
4524         KHUDAWADI,
4525 
4526         /**
4527          * Unicode script "Tirhuta".
4528          * @since 9
4529          */
4530         TIRHUTA,
4531 
4532         /**
4533          * Unicode script "Warang Citi".
4534          * @since 9
4535          */
4536         WARANG_CITI,
4537 
4538          /**
4539          * Unicode script "Ahom".
4540          * @since 9
4541          */
4542         AHOM,
4543 
4544         /**
4545          * Unicode script "Anatolian Hieroglyphs".
4546          * @since 9
4547          */
4548         ANATOLIAN_HIEROGLYPHS,
4549 
4550         /**
4551          * Unicode script "Hatran".
4552          * @since 9
4553          */
4554         HATRAN,
4555 
4556         /**
4557          * Unicode script "Multani".
4558          * @since 9
4559          */
4560         MULTANI,
4561 
4562         /**
4563          * Unicode script "Old Hungarian".
4564          * @since 9
4565          */
4566         OLD_HUNGARIAN,
4567 
4568         /**
4569          * Unicode script "SignWriting".
4570          * @since 9
4571          */
4572         SIGNWRITING,
4573 
4574         /**
4575           * Unicode script "Adlam".
4576           * @since 11
4577           */
4578         ADLAM,
4579 
4580         /**
4581           * Unicode script "Bhaiksuki".
4582           * @since 11
4583           */
4584         BHAIKSUKI,
4585 
4586         /**
4587           * Unicode script "Marchen".
4588           * @since 11
4589           */
4590         MARCHEN,
4591 
4592         /**
4593           * Unicode script "Newa".
4594           * @since 11
4595           */
4596         NEWA,
4597 
4598         /**
4599           * Unicode script "Osage".
4600           * @since 11
4601           */
4602         OSAGE,
4603 
4604         /**
4605           * Unicode script "Tangut".
4606           * @since 11
4607           */
4608         TANGUT,
4609 
4610         /**
4611           * Unicode script "Masaram Gondi".
4612           * @since 11
4613           */
4614         MASARAM_GONDI,
4615 
4616         /**
4617           * Unicode script "Nushu".
4618           * @since 11
4619           */
4620         NUSHU,
4621 
4622         /**
4623           * Unicode script "Soyombo".
4624           * @since 11
4625           */
4626         SOYOMBO,
4627 
4628         /**
4629           * Unicode script "Zanabazar Square".
4630           * @since 11
4631           */
4632         ZANABAZAR_SQUARE,
4633 
4634         /**
4635          * Unicode script "Unknown".
4636          */
4637         UNKNOWN;
4638 
4639         private static final int[] scriptStarts = {
4640             0x0000,   // 0000..0040; COMMON
4641             0x0041,   // 0041..005A; LATIN
4642             0x005B,   // 005B..0060; COMMON
4643             0x0061,   // 0061..007A; LATIN
4644             0x007B,   // 007B..00A9; COMMON
4645             0x00AA,   // 00AA      ; LATIN
4646             0x00AB,   // 00AB..00B9; COMMON
4647             0x00BA,   // 00BA      ; LATIN
4648             0x00BB,   // 00BB..00BF; COMMON
4649             0x00C0,   // 00C0..00D6; LATIN
4650             0x00D7,   // 00D7      ; COMMON
4651             0x00D8,   // 00D8..00F6; LATIN
4652             0x00F7,   // 00F7      ; COMMON
4653             0x00F8,   // 00F8..02B8; LATIN
4654             0x02B9,   // 02B9..02DF; COMMON
4655             0x02E0,   // 02E0..02E4; LATIN
4656             0x02E5,   // 02E5..02E9; COMMON
4657             0x02EA,   // 02EA..02EB; BOPOMOFO
4658             0x02EC,   // 02EC..02FF; COMMON
4659             0x0300,   // 0300..036F; INHERITED
4660             0x0370,   // 0370..0373; GREEK
4661             0x0374,   // 0374      ; COMMON
4662             0x0375,   // 0375..0377; GREEK
4663             0x0378,   // 0378..0379; UNKNOWN
4664             0x037A,   // 037A..037D; GREEK
4665             0x037E,   // 037E      ; COMMON
4666             0x037F,   // 037F      ; GREEK
4667             0x0380,   // 0380..0383; UNKNOWN
4668             0x0384,   // 0384      ; GREEK
4669             0x0385,   // 0385      ; COMMON
4670             0x0386,   // 0386      ; GREEK
4671             0x0387,   // 0387      ; COMMON
4672             0x0388,   // 0388..038A; GREEK
4673             0x038B,   // 038B      ; UNKNOWN
4674             0x038C,   // 038C      ; GREEK
4675             0x038D,   // 038D      ; UNKNOWN
4676             0x038E,   // 038E..03A1; GREEK
4677             0x03A2,   // 03A2      ; UNKNOWN
4678             0x03A3,   // 03A3..03E1; GREEK
4679             0x03E2,   // 03E2..03EF; COPTIC
4680             0x03F0,   // 03F0..03FF; GREEK
4681             0x0400,   // 0400..0484; CYRILLIC
4682             0x0485,   // 0485..0486; INHERITED
4683             0x0487,   // 0487..052F; CYRILLIC
4684             0x0530,   // 0530      ; UNKNOWN
4685             0x0531,   // 0531..0556; ARMENIAN
4686             0x0557,   // 0557..0558; UNKNOWN
4687             0x0559,   // 0559..055F; ARMENIAN
4688             0x0560,   // 0560      ; UNKNOWN
4689             0x0561,   // 0561..0587; ARMENIAN
4690             0x0588,   // 0588      ; UNKNOWN
4691             0x0589,   // 0589      ; COMMON
4692             0x058A,   // 058A      ; ARMENIAN
4693             0x058B,   // 058B..058C; UNKNOWN
4694             0x058D,   // 058D..058F; ARMENIAN
4695             0x0590,   // 0590      ; UNKNOWN
4696             0x0591,   // 0591..05C7; HEBREW
4697             0x05C8,   // 05C8..05CF; UNKNOWN
4698             0x05D0,   // 05D0..05EA; HEBREW
4699             0x05EB,   // 05EB..05EF; UNKNOWN
4700             0x05F0,   // 05F0..05F4; HEBREW
4701             0x05F5,   // 05F5..05FF; UNKNOWN
4702             0x0600,   // 0600..0604; ARABIC
4703             0x0605,   // 0605      ; COMMON
4704             0x0606,   // 0606..060B; ARABIC
4705             0x060C,   // 060C      ; COMMON
4706             0x060D,   // 060D..061A; ARABIC
4707             0x061B,   // 061B      ; COMMON
4708             0x061C,   // 061C      ; ARABIC
4709             0x061D,   // 061D      ; UNKNOWN
4710             0x061E,   // 061E      ; ARABIC
4711             0x061F,   // 061F      ; COMMON
4712             0x0620,   // 0620..063F; ARABIC
4713             0x0640,   // 0640      ; COMMON
4714             0x0641,   // 0641..064A; ARABIC
4715             0x064B,   // 064B..0655; INHERITED
4716             0x0656,   // 0656..066F; ARABIC
4717             0x0670,   // 0670      ; INHERITED
4718             0x0671,   // 0671..06DC; ARABIC
4719             0x06DD,   // 06DD      ; COMMON
4720             0x06DE,   // 06DE..06FF; ARABIC
4721             0x0700,   // 0700..070D; SYRIAC
4722             0x070E,   // 070E      ; UNKNOWN
4723             0x070F,   // 070F..074A; SYRIAC
4724             0x074B,   // 074B..074C; UNKNOWN
4725             0x074D,   // 074D..074F; SYRIAC
4726             0x0750,   // 0750..077F; ARABIC
4727             0x0780,   // 0780..07B1; THAANA
4728             0x07B2,   // 07B2..07BF; UNKNOWN
4729             0x07C0,   // 07C0..07FA; NKO
4730             0x07FB,   // 07FB..07FF; UNKNOWN
4731             0x0800,   // 0800..082D; SAMARITAN
4732             0x082E,   // 082E..082F; UNKNOWN
4733             0x0830,   // 0830..083E; SAMARITAN
4734             0x083F,   // 083F      ; UNKNOWN
4735             0x0840,   // 0840..085B; MANDAIC
4736             0x085C,   // 085C..085D; UNKNOWN
4737             0x085E,   // 085E      ; MANDAIC
4738             0x085F,   // 085F      ; UNKNOWN
4739             0x0860,   // 0860..086A; SYRIAC
4740             0x086B,   // 086B..089F; UNKNOWN
4741             0x08A0,   // 08A0..08B4; ARABIC
4742             0x08B5,   // 08B5      ; UNKNOWN
4743             0x08B6,   // 08B6..08BD; ARABIC
4744             0x08BE,   // 08BE..08D3; UNKNOWN
4745             0x08D4,   // 08D4..08E1; ARABIC
4746             0x08E2,   // 08E2      ; COMMON
4747             0x08E3,   // 08E3..08FF; ARABIC
4748             0x0900,   // 0900..0950; DEVANAGARI
4749             0x0951,   // 0951..0952; INHERITED
4750             0x0953,   // 0953..0963; DEVANAGARI
4751             0x0964,   // 0964..0965; COMMON
4752             0x0966,   // 0966..097F; DEVANAGARI
4753             0x0980,   // 0980..0983; BENGALI
4754             0x0984,   // 0984      ; UNKNOWN
4755             0x0985,   // 0985..098C; BENGALI
4756             0x098D,   // 098D..098E; UNKNOWN
4757             0x098F,   // 098F..0990; BENGALI
4758             0x0991,   // 0991..0992; UNKNOWN
4759             0x0993,   // 0993..09A8; BENGALI
4760             0x09A9,   // 09A9      ; UNKNOWN
4761             0x09AA,   // 09AA..09B0; BENGALI
4762             0x09B1,   // 09B1      ; UNKNOWN
4763             0x09B2,   // 09B2      ; BENGALI
4764             0x09B3,   // 09B3..09B5; UNKNOWN
4765             0x09B6,   // 09B6..09B9; BENGALI
4766             0x09BA,   // 09BA..09BB; UNKNOWN
4767             0x09BC,   // 09BC..09C4; BENGALI
4768             0x09C5,   // 09C5..09C6; UNKNOWN
4769             0x09C7,   // 09C7..09C8; BENGALI
4770             0x09C9,   // 09C9..09CA; UNKNOWN
4771             0x09CB,   // 09CB..09CE; BENGALI
4772             0x09CF,   // 09CF..09D6; UNKNOWN
4773             0x09D7,   // 09D7      ; BENGALI
4774             0x09D8,   // 09D8..09DB; UNKNOWN
4775             0x09DC,   // 09DC..09DD; BENGALI
4776             0x09DE,   // 09DE      ; UNKNOWN
4777             0x09DF,   // 09DF..09E3; BENGALI
4778             0x09E4,   // 09E4..09E5; UNKNOWN
4779             0x09E6,   // 09E6..09FD; BENGALI
4780             0x09FE,   // 09FE..0A00; UNKNOWN
4781             0x0A01,   // 0A01..0A03; GURMUKHI
4782             0x0A04,   // 0A04      ; UNKNOWN
4783             0x0A05,   // 0A05..0A0A; GURMUKHI
4784             0x0A0B,   // 0A0B..0A0E; UNKNOWN
4785             0x0A0F,   // 0A0F..0A10; GURMUKHI
4786             0x0A11,   // 0A11..0A12; UNKNOWN
4787             0x0A13,   // 0A13..0A28; GURMUKHI
4788             0x0A29,   // 0A29      ; UNKNOWN
4789             0x0A2A,   // 0A2A..0A30; GURMUKHI
4790             0x0A31,   // 0A31      ; UNKNOWN
4791             0x0A32,   // 0A32..0A33; GURMUKHI
4792             0x0A34,   // 0A34      ; UNKNOWN
4793             0x0A35,   // 0A35..0A36; GURMUKHI
4794             0x0A37,   // 0A37      ; UNKNOWN
4795             0x0A38,   // 0A38..0A39; GURMUKHI
4796             0x0A3A,   // 0A3A..0A3B; UNKNOWN
4797             0x0A3C,   // 0A3C      ; GURMUKHI
4798             0x0A3D,   // 0A3D      ; UNKNOWN
4799             0x0A3E,   // 0A3E..0A42; GURMUKHI
4800             0x0A43,   // 0A43..0A46; UNKNOWN
4801             0x0A47,   // 0A47..0A48; GURMUKHI
4802             0x0A49,   // 0A49..0A4A; UNKNOWN
4803             0x0A4B,   // 0A4B..0A4D; GURMUKHI
4804             0x0A4E,   // 0A4E..0A50; UNKNOWN
4805             0x0A51,   // 0A51      ; GURMUKHI
4806             0x0A52,   // 0A52..0A58; UNKNOWN
4807             0x0A59,   // 0A59..0A5C; GURMUKHI
4808             0x0A5D,   // 0A5D      ; UNKNOWN
4809             0x0A5E,   // 0A5E      ; GURMUKHI
4810             0x0A5F,   // 0A5F..0A65; UNKNOWN
4811             0x0A66,   // 0A66..0A75; GURMUKHI
4812             0x0A76,   // 0A76..0A80; UNKNOWN
4813             0x0A81,   // 0A81..0A83; GUJARATI
4814             0x0A84,   // 0A84      ; UNKNOWN
4815             0x0A85,   // 0A85..0A8D; GUJARATI
4816             0x0A8E,   // 0A8E      ; UNKNOWN
4817             0x0A8F,   // 0A8F..0A91; GUJARATI
4818             0x0A92,   // 0A92      ; UNKNOWN
4819             0x0A93,   // 0A93..0AA8; GUJARATI
4820             0x0AA9,   // 0AA9      ; UNKNOWN
4821             0x0AAA,   // 0AAA..0AB0; GUJARATI
4822             0x0AB1,   // 0AB1      ; UNKNOWN
4823             0x0AB2,   // 0AB2..0AB3; GUJARATI
4824             0x0AB4,   // 0AB4      ; UNKNOWN
4825             0x0AB5,   // 0AB5..0AB9; GUJARATI
4826             0x0ABA,   // 0ABA..0ABB; UNKNOWN
4827             0x0ABC,   // 0ABC..0AC5; GUJARATI
4828             0x0AC6,   // 0AC6      ; UNKNOWN
4829             0x0AC7,   // 0AC7..0AC9; GUJARATI
4830             0x0ACA,   // 0ACA      ; UNKNOWN
4831             0x0ACB,   // 0ACB..0ACD; GUJARATI
4832             0x0ACE,   // 0ACE..0ACF; UNKNOWN
4833             0x0AD0,   // 0AD0      ; GUJARATI
4834             0x0AD1,   // 0AD1..0ADF; UNKNOWN
4835             0x0AE0,   // 0AE0..0AE3; GUJARATI
4836             0x0AE4,   // 0AE4..0AE5; UNKNOWN
4837             0x0AE6,   // 0AE6..0AF1; GUJARATI
4838             0x0AF2,   // 0AF2..0AF8; UNKNOWN
4839             0x0AF9,   // 0AF9..0AFF; GUJARATI
4840             0x0B00,   // 0B00      ; UNKNOWN
4841             0x0B01,   // 0B01..0B03; ORIYA
4842             0x0B04,   // 0B04      ; UNKNOWN
4843             0x0B05,   // 0B05..0B0C; ORIYA
4844             0x0B0D,   // 0B0D..0B0E; UNKNOWN
4845             0x0B0F,   // 0B0F..0B10; ORIYA
4846             0x0B11,   // 0B11..0B12; UNKNOWN
4847             0x0B13,   // 0B13..0B28; ORIYA
4848             0x0B29,   // 0B29      ; UNKNOWN
4849             0x0B2A,   // 0B2A..0B30; ORIYA
4850             0x0B31,   // 0B31      ; UNKNOWN
4851             0x0B32,   // 0B32..0B33; ORIYA
4852             0x0B34,   // 0B34      ; UNKNOWN
4853             0x0B35,   // 0B35..0B39; ORIYA
4854             0x0B3A,   // 0B3A..0B3B; UNKNOWN
4855             0x0B3C,   // 0B3C..0B44; ORIYA
4856             0x0B45,   // 0B45..0B46; UNKNOWN
4857             0x0B47,   // 0B47..0B48; ORIYA
4858             0x0B49,   // 0B49..0B4A; UNKNOWN
4859             0x0B4B,   // 0B4B..0B4D; ORIYA
4860             0x0B4E,   // 0B4E..0B55; UNKNOWN
4861             0x0B56,   // 0B56..0B57; ORIYA
4862             0x0B58,   // 0B58..0B5B; UNKNOWN
4863             0x0B5C,   // 0B5C..0B5D; ORIYA
4864             0x0B5E,   // 0B5E      ; UNKNOWN
4865             0x0B5F,   // 0B5F..0B63; ORIYA
4866             0x0B64,   // 0B64..0B65; UNKNOWN
4867             0x0B66,   // 0B66..0B77; ORIYA
4868             0x0B78,   // 0B78..0B81; UNKNOWN
4869             0x0B82,   // 0B82..0B83; TAMIL
4870             0x0B84,   // 0B84      ; UNKNOWN
4871             0x0B85,   // 0B85..0B8A; TAMIL
4872             0x0B8B,   // 0B8B..0B8D; UNKNOWN
4873             0x0B8E,   // 0B8E..0B90; TAMIL
4874             0x0B91,   // 0B91      ; UNKNOWN
4875             0x0B92,   // 0B92..0B95; TAMIL
4876             0x0B96,   // 0B96..0B98; UNKNOWN
4877             0x0B99,   // 0B99..0B9A; TAMIL
4878             0x0B9B,   // 0B9B      ; UNKNOWN
4879             0x0B9C,   // 0B9C      ; TAMIL
4880             0x0B9D,   // 0B9D      ; UNKNOWN
4881             0x0B9E,   // 0B9E..0B9F; TAMIL
4882             0x0BA0,   // 0BA0..0BA2; UNKNOWN
4883             0x0BA3,   // 0BA3..0BA4; TAMIL
4884             0x0BA5,   // 0BA5..0BA7; UNKNOWN
4885             0x0BA8,   // 0BA8..0BAA; TAMIL
4886             0x0BAB,   // 0BAB..0BAD; UNKNOWN
4887             0x0BAE,   // 0BAE..0BB9; TAMIL
4888             0x0BBA,   // 0BBA..0BBD; UNKNOWN
4889             0x0BBE,   // 0BBE..0BC2; TAMIL
4890             0x0BC3,   // 0BC3..0BC5; UNKNOWN
4891             0x0BC6,   // 0BC6..0BC8; TAMIL
4892             0x0BC9,   // 0BC9      ; UNKNOWN
4893             0x0BCA,   // 0BCA..0BCD; TAMIL
4894             0x0BCE,   // 0BCE..0BCF; UNKNOWN
4895             0x0BD0,   // 0BD0      ; TAMIL
4896             0x0BD1,   // 0BD1..0BD6; UNKNOWN
4897             0x0BD7,   // 0BD7      ; TAMIL
4898             0x0BD8,   // 0BD8..0BE5; UNKNOWN
4899             0x0BE6,   // 0BE6..0BFA; TAMIL
4900             0x0BFB,   // 0BFB..0BFF; UNKNOWN
4901             0x0C00,   // 0C00..0C03; TELUGU
4902             0x0C04,   // 0C04      ; UNKNOWN
4903             0x0C05,   // 0C05..0C0C; TELUGU
4904             0x0C0D,   // 0C0D      ; UNKNOWN
4905             0x0C0E,   // 0C0E..0C10; TELUGU
4906             0x0C11,   // 0C11      ; UNKNOWN
4907             0x0C12,   // 0C12..0C28; TELUGU
4908             0x0C29,   // 0C29      ; UNKNOWN
4909             0x0C2A,   // 0C2A..0C39; TELUGU
4910             0x0C3A,   // 0C3A..0C3C; UNKNOWN
4911             0x0C3D,   // 0C3D..0C44; TELUGU
4912             0x0C45,   // 0C45      ; UNKNOWN
4913             0x0C46,   // 0C46..0C48; TELUGU
4914             0x0C49,   // 0C49      ; UNKNOWN
4915             0x0C4A,   // 0C4A..0C4D; TELUGU
4916             0x0C4E,   // 0C4E..0C54; UNKNOWN
4917             0x0C55,   // 0C55..0C56; TELUGU
4918             0x0C57,   // 0C57      ; UNKNOWN
4919             0x0C58,   // 0C58..0C5A; TELUGU
4920             0x0C5B,   // 0C5B..0C5F; UNKNOWN
4921             0x0C60,   // 0C60..0C63; TELUGU
4922             0x0C64,   // 0C64..0C65; UNKNOWN
4923             0x0C66,   // 0C66..0C6F; TELUGU
4924             0x0C70,   // 0C70..0C77; UNKNOWN
4925             0x0C78,   // 0C78..0C7F; TELUGU
4926             0x0C80,   // 0C80..0C83; KANNADA
4927             0x0C84,   // 0C84      ; UNKNOWN
4928             0x0C85,   // 0C85..0C8C; KANNADA
4929             0x0C8D,   // 0C8D      ; UNKNOWN
4930             0x0C8E,   // 0C8E..0C90; KANNADA
4931             0x0C91,   // 0C91      ; UNKNOWN
4932             0x0C92,   // 0C92..0CA8; KANNADA
4933             0x0CA9,   // 0CA9      ; UNKNOWN
4934             0x0CAA,   // 0CAA..0CB3; KANNADA
4935             0x0CB4,   // 0CB4      ; UNKNOWN
4936             0x0CB5,   // 0CB5..0CB9; KANNADA
4937             0x0CBA,   // 0CBA..0CBB; UNKNOWN
4938             0x0CBC,   // 0CBC..0CC4; KANNADA
4939             0x0CC5,   // 0CC5      ; UNKNOWN
4940             0x0CC6,   // 0CC6..0CC8; KANNADA
4941             0x0CC9,   // 0CC9      ; UNKNOWN
4942             0x0CCA,   // 0CCA..0CCD; KANNADA
4943             0x0CCE,   // 0CCE..0CD4; UNKNOWN
4944             0x0CD5,   // 0CD5..0CD6; KANNADA
4945             0x0CD7,   // 0CD7..0CDD; UNKNOWN
4946             0x0CDE,   // 0CDE      ; KANNADA
4947             0x0CDF,   // 0CDF      ; UNKNOWN
4948             0x0CE0,   // 0CE0..0CE3; KANNADA
4949             0x0CE4,   // 0CE4..0CE5; UNKNOWN
4950             0x0CE6,   // 0CE6..0CEF; KANNADA
4951             0x0CF0,   // 0CF0      ; UNKNOWN
4952             0x0CF1,   // 0CF1..0CF2; KANNADA
4953             0x0CF3,   // 0CF3..0CFF; UNKNOWN
4954             0x0D00,   // 0D00..0D03; MALAYALAM
4955             0x0D04,   // 0D04      ; UNKNOWN
4956             0x0D05,   // 0D05..0D0C; MALAYALAM
4957             0x0D0D,   // 0D0D      ; UNKNOWN
4958             0x0D0E,   // 0D0E..0D10; MALAYALAM
4959             0x0D11,   // 0D11      ; UNKNOWN
4960             0x0D12,   // 0D12..0D44; MALAYALAM
4961             0x0D45,   // 0D45      ; UNKNOWN
4962             0x0D46,   // 0D46..0D48; MALAYALAM
4963             0x0D49,   // 0D49      ; UNKNOWN
4964             0x0D4A,   // 0D4A..0D4F; MALAYALAM
4965             0x0D50,   // 0D50..0D53; UNKNOWN
4966             0x0D54,   // 0D54..0D63; MALAYALAM
4967             0x0D64,   // 0D64..0D65; UNKNOWN
4968             0x0D66,   // 0D66..0D7F; MALAYALAM
4969             0x0D80,   // 0D80..0D81; UNKNOWN
4970             0x0D82,   // 0D82..0D83; SINHALA
4971             0x0D84,   // 0D84      ; UNKNOWN
4972             0x0D85,   // 0D85..0D96; SINHALA
4973             0x0D97,   // 0D97..0D99; UNKNOWN
4974             0x0D9A,   // 0D9A..0DB1; SINHALA
4975             0x0DB2,   // 0DB2      ; UNKNOWN
4976             0x0DB3,   // 0DB3..0DBB; SINHALA
4977             0x0DBC,   // 0DBC      ; UNKNOWN
4978             0x0DBD,   // 0DBD      ; SINHALA
4979             0x0DBE,   // 0DBE..0DBF; UNKNOWN
4980             0x0DC0,   // 0DC0..0DC6; SINHALA
4981             0x0DC7,   // 0DC7..0DC9; UNKNOWN
4982             0x0DCA,   // 0DCA      ; SINHALA
4983             0x0DCB,   // 0DCB..0DCE; UNKNOWN
4984             0x0DCF,   // 0DCF..0DD4; SINHALA
4985             0x0DD5,   // 0DD5      ; UNKNOWN
4986             0x0DD6,   // 0DD6      ; SINHALA
4987             0x0DD7,   // 0DD7      ; UNKNOWN
4988             0x0DD8,   // 0DD8..0DDF; SINHALA
4989             0x0DE0,   // 0DE0..0DE5; UNKNOWN
4990             0x0DE6,   // 0DE6..0DEF; SINHALA
4991             0x0DF0,   // 0DF0..0DF1; UNKNOWN
4992             0x0DF2,   // 0DF2..0DF4; SINHALA
4993             0x0DF5,   // 0DF5..0E00; UNKNOWN
4994             0x0E01,   // 0E01..0E3A; THAI
4995             0x0E3B,   // 0E3B..0E3E; UNKNOWN
4996             0x0E3F,   // 0E3F      ; COMMON
4997             0x0E40,   // 0E40..0E5B; THAI
4998             0x0E5C,   // 0E5C..0E80; UNKNOWN
4999             0x0E81,   // 0E81..0E82; LAO
5000             0x0E83,   // 0E83      ; UNKNOWN
5001             0x0E84,   // 0E84      ; LAO
5002             0x0E85,   // 0E85..0E86; UNKNOWN
5003             0x0E87,   // 0E87..0E88; LAO
5004             0x0E89,   // 0E89      ; UNKNOWN
5005             0x0E8A,   // 0E8A      ; LAO
5006             0x0E8B,   // 0E8B..0E8C; UNKNOWN
5007             0x0E8D,   // 0E8D      ; LAO
5008             0x0E8E,   // 0E8E..0E93; UNKNOWN
5009             0x0E94,   // 0E94..0E97; LAO
5010             0x0E98,   // 0E98      ; UNKNOWN
5011             0x0E99,   // 0E99..0E9F; LAO
5012             0x0EA0,   // 0EA0      ; UNKNOWN
5013             0x0EA1,   // 0EA1..0EA3; LAO
5014             0x0EA4,   // 0EA4      ; UNKNOWN
5015             0x0EA5,   // 0EA5      ; LAO
5016             0x0EA6,   // 0EA6      ; UNKNOWN
5017             0x0EA7,   // 0EA7      ; LAO
5018             0x0EA8,   // 0EA8..0EA9; UNKNOWN
5019             0x0EAA,   // 0EAA..0EAB; LAO
5020             0x0EAC,   // 0EAC      ; UNKNOWN
5021             0x0EAD,   // 0EAD..0EB9; LAO
5022             0x0EBA,   // 0EBA      ; UNKNOWN
5023             0x0EBB,   // 0EBB..0EBD; LAO
5024             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5025             0x0EC0,   // 0EC0..0EC4; LAO
5026             0x0EC5,   // 0EC5      ; UNKNOWN
5027             0x0EC6,   // 0EC6      ; LAO
5028             0x0EC7,   // 0EC7      ; UNKNOWN
5029             0x0EC8,   // 0EC8..0ECD; LAO
5030             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5031             0x0ED0,   // 0ED0..0ED9; LAO
5032             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5033             0x0EDC,   // 0EDC..0EDF; LAO
5034             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5035             0x0F00,   // 0F00..0F47; TIBETAN
5036             0x0F48,   // 0F48      ; UNKNOWN
5037             0x0F49,   // 0F49..0F6C; TIBETAN
5038             0x0F6D,   // 0F6D..0F70; UNKNOWN
5039             0x0F71,   // 0F71..0F97; TIBETAN
5040             0x0F98,   // 0F98      ; UNKNOWN
5041             0x0F99,   // 0F99..0FBC; TIBETAN
5042             0x0FBD,   // 0FBD      ; UNKNOWN
5043             0x0FBE,   // 0FBE..0FCC; TIBETAN
5044             0x0FCD,   // 0FCD      ; UNKNOWN
5045             0x0FCE,   // 0FCE..0FD4; TIBETAN
5046             0x0FD5,   // 0FD5..0FD8; COMMON
5047             0x0FD9,   // 0FD9..0FDA; TIBETAN
5048             0x0FDB,   // 0FDB..FFF; UNKNOWN
5049             0x1000,   // 1000..109F; MYANMAR
5050             0x10A0,   // 10A0..10C5; GEORGIAN
5051             0x10C6,   // 10C6      ; UNKNOWN
5052             0x10C7,   // 10C7      ; GEORGIAN
5053             0x10C8,   // 10C8..10CC; UNKNOWN
5054             0x10CD,   // 10CD      ; GEORGIAN
5055             0x10CE,   // 10CE..10CF; UNKNOWN
5056             0x10D0,   // 10D0..10FA; GEORGIAN
5057             0x10FB,   // 10FB      ; COMMON
5058             0x10FC,   // 10FC..10FF; GEORGIAN
5059             0x1100,   // 1100..11FF; HANGUL
5060             0x1200,   // 1200..1248; ETHIOPIC
5061             0x1249,   // 1249      ; UNKNOWN
5062             0x124A,   // 124A..124D; ETHIOPIC
5063             0x124E,   // 124E..124F; UNKNOWN
5064             0x1250,   // 1250..1256; ETHIOPIC
5065             0x1257,   // 1257      ; UNKNOWN
5066             0x1258,   // 1258      ; ETHIOPIC
5067             0x1259,   // 1259      ; UNKNOWN
5068             0x125A,   // 125A..125D; ETHIOPIC
5069             0x125E,   // 125E..125F; UNKNOWN
5070             0x1260,   // 1260..1288; ETHIOPIC
5071             0x1289,   // 1289      ; UNKNOWN
5072             0x128A,   // 128A..128D; ETHIOPIC
5073             0x128E,   // 128E..128F; UNKNOWN
5074             0x1290,   // 1290..12B0; ETHIOPIC
5075             0x12B1,   // 12B1      ; UNKNOWN
5076             0x12B2,   // 12B2..12B5; ETHIOPIC
5077             0x12B6,   // 12B6..12B7; UNKNOWN
5078             0x12B8,   // 12B8..12BE; ETHIOPIC
5079             0x12BF,   // 12BF      ; UNKNOWN
5080             0x12C0,   // 12C0      ; ETHIOPIC
5081             0x12C1,   // 12C1      ; UNKNOWN
5082             0x12C2,   // 12C2..12C5; ETHIOPIC
5083             0x12C6,   // 12C6..12C7; UNKNOWN
5084             0x12C8,   // 12C8..12D6; ETHIOPIC
5085             0x12D7,   // 12D7      ; UNKNOWN
5086             0x12D8,   // 12D8..1310; ETHIOPIC
5087             0x1311,   // 1311      ; UNKNOWN
5088             0x1312,   // 1312..1315; ETHIOPIC
5089             0x1316,   // 1316..1317; UNKNOWN
5090             0x1318,   // 1318..135A; ETHIOPIC
5091             0x135B,   // 135B..135C; UNKNOWN
5092             0x135D,   // 135D..137C; ETHIOPIC
5093             0x137D,   // 137D..137F; UNKNOWN
5094             0x1380,   // 1380..1399; ETHIOPIC
5095             0x139A,   // 139A..139F; UNKNOWN
5096             0x13A0,   // 13A0..13F5; CHEROKEE
5097             0x13F6,   // 13F6..13F7; UNKNOWN
5098             0x13F8,   // 13F8..13FD; CHEROKEE
5099             0x13FE,   // 13FE..13FF; UNKNOWN
5100             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5101             0x1680,   // 1680..169C; OGHAM
5102             0x169D,   // 169D..169F; UNKNOWN
5103             0x16A0,   // 16A0..16EA; RUNIC
5104             0x16EB,   // 16EB..16ED; COMMON
5105             0x16EE,   // 16EE..16F8; RUNIC
5106             0x16F9,   // 16F9..16FF; UNKNOWN
5107             0x1700,   // 1700..170C; TAGALOG
5108             0x170D,   // 170D      ; UNKNOWN
5109             0x170E,   // 170E..1714; TAGALOG
5110             0x1715,   // 1715..171F; UNKNOWN
5111             0x1720,   // 1720..1734; HANUNOO
5112             0x1735,   // 1735..1736; COMMON
5113             0x1737,   // 1737..173F; UNKNOWN
5114             0x1740,   // 1740..1753; BUHID
5115             0x1754,   // 1754..175F; UNKNOWN
5116             0x1760,   // 1760..176C; TAGBANWA
5117             0x176D,   // 176D      ; UNKNOWN
5118             0x176E,   // 176E..1770; TAGBANWA
5119             0x1771,   // 1771      ; UNKNOWN
5120             0x1772,   // 1772..1773; TAGBANWA
5121             0x1774,   // 1774..177F; UNKNOWN
5122             0x1780,   // 1780..17DD; KHMER
5123             0x17DE,   // 17DE..17DF; UNKNOWN
5124             0x17E0,   // 17E0..17E9; KHMER
5125             0x17EA,   // 17EA..17EF; UNKNOWN
5126             0x17F0,   // 17F0..17F9; KHMER
5127             0x17FA,   // 17FA..17FF; UNKNOWN
5128             0x1800,   // 1800..1801; MONGOLIAN
5129             0x1802,   // 1802..1803; COMMON
5130             0x1804,   // 1804      ; MONGOLIAN
5131             0x1805,   // 1805      ; COMMON
5132             0x1806,   // 1806..180E; MONGOLIAN
5133             0x180F,   // 180F      ; UNKNOWN
5134             0x1810,   // 1810..1819; MONGOLIAN
5135             0x181A,   // 181A..181F; UNKNOWN
5136             0x1820,   // 1820..1877; MONGOLIAN
5137             0x1878,   // 1878..187F; UNKNOWN
5138             0x1880,   // 1880..18AA; MONGOLIAN
5139             0x18AB,   // 18AB..18AF; UNKNOWN
5140             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5141             0x18F6,   // 18F6..18FF; UNKNOWN
5142             0x1900,   // 1900..191E; LIMBU
5143             0x191F,   // 191F      ; UNKNOWN
5144             0x1920,   // 1920..192B; LIMBU
5145             0x192C,   // 192C..192F; UNKNOWN
5146             0x1930,   // 1930..193B; LIMBU
5147             0x193C,   // 193C..193F; UNKNOWN
5148             0x1940,   // 1940      ; LIMBU
5149             0x1941,   // 1941..1943; UNKNOWN
5150             0x1944,   // 1944..194F; LIMBU
5151             0x1950,   // 1950..196D; TAI_LE
5152             0x196E,   // 196E..196F; UNKNOWN
5153             0x1970,   // 1970..1974; TAI_LE
5154             0x1975,   // 1975..197F; UNKNOWN
5155             0x1980,   // 1980..19AB; NEW_TAI_LUE
5156             0x19AC,   // 19AC..19AF; UNKNOWN
5157             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5158             0x19CA,   // 19CA..19CF; UNKNOWN
5159             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5160             0x19DB,   // 19DB..19DD; UNKNOWN
5161             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5162             0x19E0,   // 19E0..19FF; KHMER
5163             0x1A00,   // 1A00..1A1B; BUGINESE
5164             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5165             0x1A1E,   // 1A1E..1A1F; BUGINESE
5166             0x1A20,   // 1A20..1A5E; TAI_THAM
5167             0x1A5F,   // 1A5F      ; UNKNOWN
5168             0x1A60,   // 1A60..1A7C; TAI_THAM
5169             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5170             0x1A7F,   // 1A7F..1A89; TAI_THAM
5171             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5172             0x1A90,   // 1A90..1A99; TAI_THAM
5173             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5174             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5175             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5176             0x1AB0,   // 1AB0..1ABE; INHERITED
5177             0x1ABF,   // 1ABF..1AFF; UNKNOWN
5178             0x1B00,   // 1B00..1B4B; BALINESE
5179             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5180             0x1B50,   // 1B50..1B7C; BALINESE
5181             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5182             0x1B80,   // 1B80..1BBF; SUNDANESE
5183             0x1BC0,   // 1BC0..1BF3; BATAK
5184             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5185             0x1BFC,   // 1BFC..1BFF; BATAK
5186             0x1C00,   // 1C00..1C37; LEPCHA
5187             0x1C38,   // 1C38..1C3A; UNKNOWN
5188             0x1C3B,   // 1C3B..1C49; LEPCHA
5189             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5190             0x1C4D,   // 1C4D..1C4F; LEPCHA
5191             0x1C50,   // 1C50..1C7F; OL_CHIKI
5192             0x1C80,   // 1C80..1C88; CYRILLIC
5193             0x1C89,   // 1C89..1CBF; UNKNOWN
5194             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5195             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5196             0x1CD0,   // 1CD0..1CD2; INHERITED
5197             0x1CD3,   // 1CD3      ; COMMON
5198             0x1CD4,   // 1CD4..1CE0; INHERITED
5199             0x1CE1,   // 1CE1      ; COMMON
5200             0x1CE2,   // 1CE2..1CE8; INHERITED
5201             0x1CE9,   // 1CE9..1CEC; COMMON
5202             0x1CED,   // 1CED      ; INHERITED
5203             0x1CEE,   // 1CEE..1CF3; COMMON
5204             0x1CF4,   // 1CF4      ; INHERITED
5205             0x1CF5,   // 1CF5..1CF7; COMMON
5206             0x1CF8,   // 1CF8..1CF9; INHERITED
5207             0x1CFA,   // 1CFA..1CFF; UNKNOWN
5208             0x1D00,   // 1D00..1D25; LATIN
5209             0x1D26,   // 1D26..1D2A; GREEK
5210             0x1D2B,   // 1D2B      ; CYRILLIC
5211             0x1D2C,   // 1D2C..1D5C; LATIN
5212             0x1D5D,   // 1D5D..1D61; GREEK
5213             0x1D62,   // 1D62..1D65; LATIN
5214             0x1D66,   // 1D66..1D6A; GREEK
5215             0x1D6B,   // 1D6B..1D77; LATIN
5216             0x1D78,   // 1D78      ; CYRILLIC
5217             0x1D79,   // 1D79..1DBE; LATIN
5218             0x1DBF,   // 1DBF      ; GREEK
5219             0x1DC0,   // 1DC0..1DF9; INHERITED
5220             0x1DFA,   // 1DFA      ; UNKNOWN
5221             0x1DFB,   // 1DFB..1DFF; INHERITED
5222             0x1E00,   // 1E00..1EFF; LATIN
5223             0x1F00,   // 1F00..1F15; GREEK
5224             0x1F16,   // 1F16..1F17; UNKNOWN
5225             0x1F18,   // 1F18..1F1D; GREEK
5226             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5227             0x1F20,   // 1F20..1F45; GREEK
5228             0x1F46,   // 1F46..1F47; UNKNOWN
5229             0x1F48,   // 1F48..1F4D; GREEK
5230             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5231             0x1F50,   // 1F50..1F57; GREEK
5232             0x1F58,   // 1F58      ; UNKNOWN
5233             0x1F59,   // 1F59      ; GREEK
5234             0x1F5A,   // 1F5A      ; UNKNOWN
5235             0x1F5B,   // 1F5B      ; GREEK
5236             0x1F5C,   // 1F5C      ; UNKNOWN
5237             0x1F5D,   // 1F5D      ; GREEK
5238             0x1F5E,   // 1F5E      ; UNKNOWN
5239             0x1F5F,   // 1F5F..1F7D; GREEK
5240             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5241             0x1F80,   // 1F80..1FB4; GREEK
5242             0x1FB5,   // 1FB5      ; UNKNOWN
5243             0x1FB6,   // 1FB6..1FC4; GREEK
5244             0x1FC5,   // 1FC5      ; UNKNOWN
5245             0x1FC6,   // 1FC6..1FD3; GREEK
5246             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5247             0x1FD6,   // 1FD6..1FDB; GREEK
5248             0x1FDC,   // 1FDC      ; UNKNOWN
5249             0x1FDD,   // 1FDD..1FEF; GREEK
5250             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5251             0x1FF2,   // 1FF2..1FF4; GREEK
5252             0x1FF5,   // 1FF5      ; UNKNOWN
5253             0x1FF6,   // 1FF6..1FFE; GREEK
5254             0x1FFF,   // 1FFF      ; UNKNOWN
5255             0x2000,   // 2000..200B; COMMON
5256             0x200C,   // 200C..200D; INHERITED
5257             0x200E,   // 200E..2064; COMMON
5258             0x2065,   // 2065      ; UNKNOWN
5259             0x2066,   // 2066..2070; COMMON
5260             0x2071,   // 2071      ; LATIN
5261             0x2072,   // 2072..2073; UNKNOWN
5262             0x2074,   // 2074..207E; COMMON
5263             0x207F,   // 207F      ; LATIN
5264             0x2080,   // 2080..208E; COMMON
5265             0x208F,   // 208F      ; UNKNOWN
5266             0x2090,   // 2090..209C; LATIN
5267             0x209D,   // 209D..209F; UNKNOWN
5268             0x20A0,   // 20A0..20BF; COMMON
5269             0x20C0,   // 20C0..20CF; UNKNOWN
5270             0x20D0,   // 20D0..20F0; INHERITED
5271             0x20F1,   // 20F1..20FF; UNKNOWN
5272             0x2100,   // 2100..2125; COMMON
5273             0x2126,   // 2126      ; GREEK
5274             0x2127,   // 2127..2129; COMMON
5275             0x212A,   // 212A..212B; LATIN
5276             0x212C,   // 212C..2131; COMMON
5277             0x2132,   // 2132      ; LATIN
5278             0x2133,   // 2133..214D; COMMON
5279             0x214E,   // 214E      ; LATIN
5280             0x214F,   // 214F..215F; COMMON
5281             0x2160,   // 2160..2188; LATIN
5282             0x2189,   // 2189..218B; COMMON
5283             0x218C,   // 218C..218F; UNKNOWN
5284             0x2190,   // 2190..2426; COMMON
5285             0x2427,   // 2427..243F; UNKNOWN
5286             0x2440,   // 2440..244A; COMMON
5287             0x244B,   // 244B..245F; UNKNOWN
5288             0x2460,   // 2460..27FF; COMMON
5289             0x2800,   // 2800..28FF; BRAILLE
5290             0x2900,   // 2900..2B73; COMMON
5291             0x2B74,   // 2B74..2B75; UNKNOWN
5292             0x2B76,   // 2B76..2B95; COMMON
5293             0x2B96,   // 2B96..2B97; UNKNOWN
5294             0x2B98,   // 2B98..2BB9; COMMON
5295             0x2BBA,   // 2BBA..2BBC; UNKNOWN
5296             0x2BBD,   // 2BBD..2BC8; COMMON
5297             0x2BC9,   // 2BC9      ; UNKNOWN
5298             0x2BCA,   // 2BCA..2BD2; COMMON
5299             0x2BD3,   // 2BD3..2BEB; UNKNOWN
5300             0x2BEC,   // 2BEC..2BEF; COMMON
5301             0x2BF0,   // 2BF0..2BFF; UNKNOWN
5302             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5303             0x2C2F,   // 2C2F      ; UNKNOWN
5304             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5305             0x2C5F,   // 2C5F      ; UNKNOWN
5306             0x2C60,   // 2C60..2C7F; LATIN
5307             0x2C80,   // 2C80..2CF3; COPTIC
5308             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5309             0x2CF9,   // 2CF9..2CFF; COPTIC
5310             0x2D00,   // 2D00..2D25; GEORGIAN
5311             0x2D26,   // 2D26      ; UNKNOWN
5312             0x2D27,   // 2D27      ; GEORGIAN
5313             0x2D28,   // 2D28..2D2C; UNKNOWN
5314             0x2D2D,   // 2D2D      ; GEORGIAN
5315             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5316             0x2D30,   // 2D30..2D67; TIFINAGH
5317             0x2D68,   // 2D68..2D6E; UNKNOWN
5318             0x2D6F,   // 2D6F..2D70; TIFINAGH
5319             0x2D71,   // 2D71..2D7E; UNKNOWN
5320             0x2D7F,   // 2D7F      ; TIFINAGH
5321             0x2D80,   // 2D80..2D96; ETHIOPIC
5322             0x2D97,   // 2D97..2D9F; UNKNOWN
5323             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5324             0x2DA7,   // 2DA7      ; UNKNOWN
5325             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5326             0x2DAF,   // 2DAF      ; UNKNOWN
5327             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5328             0x2DB7,   // 2DB7      ; UNKNOWN
5329             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5330             0x2DBF,   // 2DBF      ; UNKNOWN
5331             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5332             0x2DC7,   // 2DC7      ; UNKNOWN
5333             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5334             0x2DCF,   // 2DCF      ; UNKNOWN
5335             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5336             0x2DD7,   // 2DD7      ; UNKNOWN
5337             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5338             0x2DDF,   // 2DDF      ; UNKNOWN
5339             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5340             0x2E00,   // 2E00..2E49; COMMON
5341             0x2E50,   // 2E50..2E7F; UNKNOWN
5342             0x2E80,   // 2E80..2E99; HAN
5343             0x2E9A,   // 2E9A      ; UNKNOWN
5344             0x2E9B,   // 2E9B..2EF3; HAN
5345             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5346             0x2F00,   // 2F00..2FD5; HAN
5347             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5348             0x2FF0,   // 2FF0..2FFB; COMMON
5349             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5350             0x3000,   // 3000..3004; COMMON
5351             0x3005,   // 3005      ; HAN
5352             0x3006,   // 3006      ; COMMON
5353             0x3007,   // 3007      ; HAN
5354             0x3008,   // 3008..3020; COMMON
5355             0x3021,   // 3021..3029; HAN
5356             0x302A,   // 302A..302D; INHERITED
5357             0x302E,   // 302E..302F; HANGUL
5358             0x3030,   // 3030..3037; COMMON
5359             0x3038,   // 3038..303B; HAN
5360             0x303C,   // 303C..303F; COMMON
5361             0x3040,   // 3040      ; UNKNOWN
5362             0x3041,   // 3041..3096; HIRAGANA
5363             0x3097,   // 3097..3098; UNKNOWN
5364             0x3099,   // 3099..309A; INHERITED
5365             0x309B,   // 309B..309C; COMMON
5366             0x309D,   // 309D..309F; HIRAGANA
5367             0x30A0,   // 30A0      ; COMMON
5368             0x30A1,   // 30A1..30FA; KATAKANA
5369             0x30FB,   // 30FB..30FC; COMMON
5370             0x30FD,   // 30FD..30FF; KATAKANA
5371             0x3100,   // 3100..3104; UNKNOWN
5372             0x3105,   // 3105..312E; BOPOMOFO
5373             0x312F,   // 312F..3130; UNKNOWN
5374             0x3131,   // 3131..318E; HANGUL
5375             0x318F,   // 318F      ; UNKNOWN
5376             0x3190,   // 3190..319F; COMMON
5377             0x31A0,   // 31A0..31BA; BOPOMOFO
5378             0x31BB,   // 31BB..31BF; UNKNOWN
5379             0x31C0,   // 31C0..31E3; COMMON
5380             0x31E4,   // 31E4..31EF; UNKNOWN
5381             0x31F0,   // 31F0..31FF; KATAKANA
5382             0x3200,   // 3200..321E; HANGUL
5383             0x321F,   // 321F      ; UNKNOWN
5384             0x3220,   // 3220..325F; COMMON
5385             0x3260,   // 3260..327E; HANGUL
5386             0x327F,   // 327F..32CF; COMMON
5387             0x32D0,   // 32D0..32FE; KATAKANA
5388             0x32FF,   // 32FF      ; UNKNOWN
5389             0x3300,   // 3300..3357; KATAKANA
5390             0x3358,   // 3358..33FF; COMMON
5391             0x3400,   // 3400..4DB5; HAN
5392             0x4DB6,   // 4DB6..4DBF; UNKNOWN
5393             0x4DC0,   // 4DC0..4DFF; COMMON
5394             0x4E00,   // 4E00..9FEA; HAN
5395             0x9FEB,   // 9FEB..9FFF; UNKNOWN
5396             0xA000,   // A000..A48C; YI
5397             0xA48D,   // A48D..A48F; UNKNOWN
5398             0xA490,   // A490..A4C6; YI
5399             0xA4C7,   // A4C7..A4CF; UNKNOWN
5400             0xA4D0,   // A4D0..A4FF; LISU
5401             0xA500,   // A500..A62B; VAI
5402             0xA62C,   // A62C..A63F; UNKNOWN
5403             0xA640,   // A640..A69F; CYRILLIC
5404             0xA6A0,   // A6A0..A6F7; BAMUM
5405             0xA6F8,   // A6F8..A6FF; UNKNOWN
5406             0xA700,   // A700..A721; COMMON
5407             0xA722,   // A722..A787; LATIN
5408             0xA788,   // A788..A78A; COMMON
5409             0xA78B,   // A78B..A7AE; LATIN
5410             0xA7AF,   // A7AF      ; UNKNOWN
5411             0xA7B0,   // A7B0..A7B7; LATIN
5412             0xA7B8,   // A7B8..A7F6; UNKNOWN
5413             0xA7F7,   // A7F7..A7FF; LATIN
5414             0xA800,   // A800..A82B; SYLOTI_NAGRI
5415             0xA82C,   // A82C..A82F; UNKNOWN
5416             0xA830,   // A830..A839; COMMON
5417             0xA83A,   // A83A..A83F; UNKNOWN
5418             0xA840,   // A840..A877; PHAGS_PA
5419             0xA878,   // A878..A87F; UNKNOWN
5420             0xA880,   // A880..A8C5; SAURASHTRA
5421             0xA8C6,   // A8C6..A8CD; UNKNOWN
5422             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5423             0xA8DA,   // A8DA..A8DF; UNKNOWN
5424             0xA8E0,   // A8E0..A8FD; DEVANAGARI
5425             0xA8FE,   // A8FE..A8FF; UNKNOWN
5426             0xA900,   // A900..A92D; KAYAH_LI
5427             0xA92E,   // A92E      ; COMMON
5428             0xA92F,   // A92F      ; KAYAH_LI
5429             0xA930,   // A930..A953; REJANG
5430             0xA954,   // A954..A95E; UNKNOWN
5431             0xA95F,   // A95F      ; REJANG
5432             0xA960,   // A960..A97C; HANGUL
5433             0xA97D,   // A97D..A97F; UNKNOWN
5434             0xA980,   // A980..A9CD; JAVANESE
5435             0xA9CE,   // A9CE      ; UNKNOWN
5436             0xA9CF,   // A9CF      ; COMMON
5437             0xA9D0,   // A9D0..A9D9; JAVANESE
5438             0xA9DA,   // A9DA..A9DD; UNKNOWN
5439             0xA9DE,   // A9DE..A9DF; JAVANESE
5440             0xA9E0,   // A9E0..A9FE; MYANMAR
5441             0xA9FF,   // A9FF      ; UNKNOWN
5442             0xAA00,   // AA00..AA36; CHAM
5443             0xAA37,   // AA37..AA3F; UNKNOWN
5444             0xAA40,   // AA40..AA4D; CHAM
5445             0xAA4E,   // AA4E..AA4F; UNKNOWN
5446             0xAA50,   // AA50..AA59; CHAM
5447             0xAA5A,   // AA5A..AA5B; UNKNOWN
5448             0xAA5C,   // AA5C..AA5F; CHAM
5449             0xAA60,   // AA60..AA7F; MYANMAR
5450             0xAA80,   // AA80..AAC2; TAI_VIET
5451             0xAAC3,   // AAC3..AADA; UNKNOWN
5452             0xAADB,   // AADB..AADF; TAI_VIET
5453             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5454             0xAAF7,   // AAF7..AB00; UNKNOWN
5455             0xAB01,   // AB01..AB06; ETHIOPIC
5456             0xAB07,   // AB07..AB08; UNKNOWN
5457             0xAB09,   // AB09..AB0E; ETHIOPIC
5458             0xAB0F,   // AB0F..AB10; UNKNOWN
5459             0xAB11,   // AB11..AB16; ETHIOPIC
5460             0xAB17,   // AB17..AB1F; UNKNOWN
5461             0xAB20,   // AB20..AB26; ETHIOPIC
5462             0xAB27,   // AB27      ; UNKNOWN
5463             0xAB28,   // AB28..AB2E; ETHIOPIC
5464             0xAB2F,   // AB2F      ; UNKNOWN
5465             0xAB30,   // AB30..AB5A; LATIN
5466             0xAB5B,   // AB5B      ; COMMON
5467             0xAB5C,   // AB5C..AB64; LATIN
5468             0xAB65,   // AB65      ; GREEK
5469             0xAB66,   // AB66..AB6F; UNKNOWN
5470             0xAB70,   // AB70..ABBF; CHEROKEE
5471             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5472             0xABEE,   // ABEE..ABEF; UNKNOWN
5473             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5474             0xABFA,   // ABFA..ABFF; UNKNOWN
5475             0xAC00,   // AC00..D7A3; HANGUL
5476             0xD7A4,   // D7A4..D7AF; UNKNOWN
5477             0xD7B0,   // D7B0..D7C6; HANGUL
5478             0xD7C7,   // D7C7..D7CA; UNKNOWN
5479             0xD7CB,   // D7CB..D7FB; HANGUL
5480             0xD7FC,   // D7FC..F8FF; UNKNOWN
5481             0xF900,   // F900..FA6D; HAN
5482             0xFA6E,   // FA6E..FA6F; UNKNOWN
5483             0xFA70,   // FA70..FAD9; HAN
5484             0xFADA,   // FADA..FAFF; UNKNOWN
5485             0xFB00,   // FB00..FB06; LATIN
5486             0xFB07,   // FB07..FB12; UNKNOWN
5487             0xFB13,   // FB13..FB17; ARMENIAN
5488             0xFB18,   // FB18..FB1C; UNKNOWN
5489             0xFB1D,   // FB1D..FB36; HEBREW
5490             0xFB37,   // FB37      ; UNKNOWN
5491             0xFB38,   // FB38..FB3C; HEBREW
5492             0xFB3D,   // FB3D      ; UNKNOWN
5493             0xFB3E,   // FB3E      ; HEBREW
5494             0xFB3F,   // FB3F      ; UNKNOWN
5495             0xFB40,   // FB40..FB41; HEBREW
5496             0xFB42,   // FB42      ; UNKNOWN
5497             0xFB43,   // FB43..FB44; HEBREW
5498             0xFB45,   // FB45      ; UNKNOWN
5499             0xFB46,   // FB46..FB4F; HEBREW
5500             0xFB50,   // FB50..FBC1; ARABIC
5501             0xFBC2,   // FBC2..FBD2; UNKNOWN
5502             0xFBD3,   // FBD3..FD3D; ARABIC
5503             0xFD3E,   // FD3E..FD3F; COMMON
5504             0xFD40,   // FD40..FD4F; UNKNOWN
5505             0xFD50,   // FD50..FD8F; ARABIC
5506             0xFD90,   // FD90..FD91; UNKNOWN
5507             0xFD92,   // FD92..FDC7; ARABIC
5508             0xFDC8,   // FDC8..FDEF; UNKNOWN
5509             0xFDF0,   // FDF0..FDFD; ARABIC
5510             0xFDFE,   // FDFE..FDFF; UNKNOWN
5511             0xFE00,   // FE00..FE0F; INHERITED
5512             0xFE10,   // FE10..FE19; COMMON
5513             0xFE1A,   // FE1A..FE1F; UNKNOWN
5514             0xFE20,   // FE20..FE2D; INHERITED
5515             0xFE2E,   // FE2E..FE2F; CYRILLIC
5516             0xFE30,   // FE30..FE52; COMMON
5517             0xFE53,   // FE53      ; UNKNOWN
5518             0xFE54,   // FE54..FE66; COMMON
5519             0xFE67,   // FE67      ; UNKNOWN
5520             0xFE68,   // FE68..FE6B; COMMON
5521             0xFE6C,   // FE6C..FE6F; UNKNOWN
5522             0xFE70,   // FE70..FE74; ARABIC
5523             0xFE75,   // FE75      ; UNKNOWN
5524             0xFE76,   // FE76..FEFC; ARABIC
5525             0xFEFD,   // FEFD..FEFE; UNKNOWN
5526             0xFEFF,   // FEFF      ; COMMON
5527             0xFF00,   // FF00      ; UNKNOWN
5528             0xFF01,   // FF01..FF20; COMMON
5529             0xFF21,   // FF21..FF3A; LATIN
5530             0xFF3B,   // FF3B..FF40; COMMON
5531             0xFF41,   // FF41..FF5A; LATIN
5532             0xFF5B,   // FF5B..FF65; COMMON
5533             0xFF66,   // FF66..FF6F; KATAKANA
5534             0xFF70,   // FF70      ; COMMON
5535             0xFF71,   // FF71..FF9D; KATAKANA
5536             0xFF9E,   // FF9E..FF9F; COMMON
5537             0xFFA0,   // FFA0..FFBE; HANGUL
5538             0xFFBF,   // FFBF..FFC1; UNKNOWN
5539             0xFFC2,   // FFC2..FFC7; HANGUL
5540             0xFFC8,   // FFC8..FFC9; UNKNOWN
5541             0xFFCA,   // FFCA..FFCF; HANGUL
5542             0xFFD0,   // FFD0..FFD1; UNKNOWN
5543             0xFFD2,   // FFD2..FFD7; HANGUL
5544             0xFFD8,   // FFD8..FFD9; UNKNOWN
5545             0xFFDA,   // FFDA..FFDC; HANGUL
5546             0xFFDD,   // FFDD..FFDF; UNKNOWN
5547             0xFFE0,   // FFE0..FFE6; COMMON
5548             0xFFE7,   // FFE7      ; UNKNOWN
5549             0xFFE8,   // FFE8..FFEE; COMMON
5550             0xFFEF,   // FFEF..FFF8; UNKNOWN
5551             0xFFF9,   // FFF9..FFFD; COMMON
5552             0xFFFE,   // FFFE..FFFF; UNKNOWN
5553             0x10000,  // 10000..1000B; LINEAR_B
5554             0x1000C,  // 1000C       ; UNKNOWN
5555             0x1000D,  // 1000D..10026; LINEAR_B
5556             0x10027,  // 10027       ; UNKNOWN
5557             0x10028,  // 10028..1003A; LINEAR_B
5558             0x1003B,  // 1003B       ; UNKNOWN
5559             0x1003C,  // 1003C..1003D; LINEAR_B
5560             0x1003E,  // 1003E       ; UNKNOWN
5561             0x1003F,  // 1003F..1004D; LINEAR_B
5562             0x1004E,  // 1004E..1004F; UNKNOWN
5563             0x10050,  // 10050..1005D; LINEAR_B
5564             0x1005E,  // 1005E..1007F; UNKNOWN
5565             0x10080,  // 10080..100FA; LINEAR_B
5566             0x100FB,  // 100FB..100FF; UNKNOWN
5567             0x10100,  // 10100..10102; COMMON
5568             0x10103,  // 10103..10106; UNKNOWN
5569             0x10107,  // 10107..10133; COMMON
5570             0x10134,  // 10134..10136; UNKNOWN
5571             0x10137,  // 10137..1013F; COMMON
5572             0x10140,  // 10140..1018E; GREEK
5573             0x1018F,  // 1018F       ; UNKNOWN
5574             0x10190,  // 10190..1019B; COMMON
5575             0x1019C,  // 1019C..1019F; UNKNOWN
5576             0x101A0,  // 101A0       ; GREEK
5577             0x101A1,  // 101A1..101CF; UNKNOWN
5578             0x101D0,  // 101D0..101FC; COMMON
5579             0x101FD,  // 101FD       ; INHERITED
5580             0x101FE,  // 101FE..1027F; UNKNOWN
5581             0x10280,  // 10280..1029C; LYCIAN
5582             0x1029D,  // 1029D..1029F; UNKNOWN
5583             0x102A0,  // 102A0..102D0; CARIAN
5584             0x102D1,  // 102D1..102DF; UNKNOWN
5585             0x102E0,  // 102E0       ; INHERITED
5586             0x102E1,  // 102E1..102FB; COMMON
5587             0x102FC,  // 102FC..102FF; UNKNOWN
5588             0x10300,  // 10300..10323; OLD_ITALIC
5589             0x10324,  // 10324..1032C; UNKNOWN
5590             0x1032D,  // 1032D..1032F; OLD_ITALIC
5591             0x10330,  // 10330..1034A; GOTHIC
5592             0x1034B,  // 1034B..1034F; UNKNOWN
5593             0x10350,  // 10350..1037A; OLD_PERMIC
5594             0x1037B,  // 1037B..1037F; UNKNOWN
5595             0x10380,  // 10380..1039D; UGARITIC
5596             0x1039E,  // 1039E       ; UNKNOWN
5597             0x1039F,  // 1039F       ; UGARITIC
5598             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5599             0x103C4,  // 103C4..103C7; UNKNOWN
5600             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5601             0x103D6,  // 103D6..103FF; UNKNOWN
5602             0x10400,  // 10400..1044F; DESERET
5603             0x10450,  // 10450..1047F; SHAVIAN
5604             0x10480,  // 10480..1049D; OSMANYA
5605             0x1049E,  // 1049E..1049F; UNKNOWN
5606             0x104A0,  // 104A0..104A9; OSMANYA
5607             0x104AA,  // 104AA..104AF; UNKNOWN
5608             0x104B0,  // 104B0..104D3; OSAGE
5609             0x104D4,  // 104D4..104D7; UNKNOWN
5610             0x104D8,  // 104D8..104FB; OSAGE
5611             0x104FC,  // 104FC..104FF; UNKNOWN
5612             0x10500,  // 10500..10527; ELBASAN
5613             0x10528,  // 10528..1052F; UNKNOWN
5614             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5615             0x10564,  // 10564..1056E; UNKNOWN
5616             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5617             0x10570,  // 10570..105FF; UNKNOWN
5618             0x10600,  // 10600..10736; LINEAR_A
5619             0x10737,  // 10737..1073F; UNKNOWN
5620             0x10740,  // 10740..10755; LINEAR_A
5621             0x10756,  // 10756..1075F; UNKNOWN
5622             0x10760,  // 10760..10767; LINEAR_A
5623             0x10768,  // 10768..107FF; UNKNOWN
5624             0x10800,  // 10800..10805; CYPRIOT
5625             0x10806,  // 10806..10807; UNKNOWN
5626             0x10808,  // 10808       ; CYPRIOT
5627             0x10809,  // 10809       ; UNKNOWN
5628             0x1080A,  // 1080A..10835; CYPRIOT
5629             0x10836,  // 10836       ; UNKNOWN
5630             0x10837,  // 10837..10838; CYPRIOT
5631             0x10839,  // 10839..1083B; UNKNOWN
5632             0x1083C,  // 1083C       ; CYPRIOT
5633             0x1083D,  // 1083D..1083E; UNKNOWN
5634             0x1083F,  // 1083F       ; CYPRIOT
5635             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5636             0x10856,  // 10856       ; UNKNOWN
5637             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5638             0x10860,  // 10860..1087F; PALMYRENE
5639             0x10880,  // 10880..1089E; NABATAEAN
5640             0x1089F,  // 1089F..108A6; UNKNOWN
5641             0x108A7,  // 108A7..108AF; NABATAEAN
5642             0x108B0,  // 108B0..108DF; UNKNOWN
5643             0x108E0,  // 108E0..108F2; HATRAN
5644             0x108F3,  // 108F3       ; UNKNOWN
5645             0x108F4,  // 108F4..108F5; HATRAN
5646             0x108F6,  // 108F6..108FA; UNKNOWN
5647             0x108FB,  // 108FB..108FF; HATRAN
5648             0x10900,  // 10900..1091B; PHOENICIAN
5649             0x1091C,  // 1091C..1091E; UNKNOWN
5650             0x1091F,  // 1091F       ; PHOENICIAN
5651             0x10920,  // 10920..10939; LYDIAN
5652             0x1093A,  // 1093A..1093E; UNKNOWN
5653             0x1093F,  // 1093F       ; LYDIAN
5654             0x10940,  // 10940..1097F; UNKNOWN
5655             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5656             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5657             0x109B8,  // 109B8..109BB; UNKNOWN
5658             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
5659             0x109D0,  // 109D0..109D1; UNKNOWN
5660             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
5661             0x10A00,  // 10A00..10A03; KHAROSHTHI
5662             0x10A04,  // 10A04       ; UNKNOWN
5663             0x10A05,  // 10A05..10A06; KHAROSHTHI
5664             0x10A07,  // 10A07..10A0B; UNKNOWN
5665             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5666             0x10A14,  // 10A14       ; UNKNOWN
5667             0x10A15,  // 10A15..10A17; KHAROSHTHI
5668             0x10A18,  // 10A18       ; UNKNOWN
5669             0x10A19,  // 10A19..10A33; KHAROSHTHI
5670             0x10A34,  // 10A34..10A37; UNKNOWN
5671             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5672             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5673             0x10A3F,  // 10A3F..10A47; KHAROSHTHI
5674             0x10A48,  // 10A48..10A4F; UNKNOWN
5675             0x10A50,  // 10A50..10A58; KHAROSHTHI
5676             0x10A59,  // 10A59..10A5F; UNKNOWN
5677             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5678             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5679             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5680             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5681             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5682             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5683             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5684             0x10B00,  // 10B00..10B35; AVESTAN
5685             0x10B36,  // 10B36..10B38; UNKNOWN
5686             0x10B39,  // 10B39..10B3F; AVESTAN
5687             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5688             0x10B56,  // 10B56..10B57; UNKNOWN
5689             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5690             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5691             0x10B73,  // 10B73..10B77; UNKNOWN
5692             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5693             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5694             0x10B92,  // 10B92..10B98; UNKNOWN
5695             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5696             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5697             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5698             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5699             0x10C00,  // 10C00..10C48; OLD_TURKIC
5700             0x10C49,  // 10C49..10C7F; UNKNOWN
5701             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
5702             0x10CB3,  // 10CB3..10CBF; UNKNOWN
5703             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
5704             0x10CF3,  // 10CF3..10CF9; UNKNOWN
5705             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
5706             0x10D00,  // 10D00..10E5F; UNKNOWN
5707             0x10E60,  // 10E60..10E7E; ARABIC
5708             0x10E7F,  // 10E7F..10FFF; UNKNOWN
5709             0x11000,  // 11000..1104D; BRAHMI
5710             0x1104E,  // 1104E..11051; UNKNOWN
5711             0x11052,  // 11052..1106F; BRAHMI
5712             0x11070,  // 11070..1107E; UNKNOWN
5713             0x1107F,  // 1107F       ; BRAHMI
5714             0x11080,  // 11080..110C1; KAITHI
5715             0x110C2,  // 110C2..110CF; UNKNOWN
5716             0x110D0,  // 110D0..110E8; SORA_SOMPENG
5717             0x110E9,  // 110E9..110EF; UNKNOWN
5718             0x110F0,  // 110F0..110F9; SORA_SOMPENG
5719             0x110FA,  // 110FA..110FF; UNKNOWN
5720             0x11100,  // 11100..11134; CHAKMA
5721             0x11135,  // 11135       ; UNKNOWN
5722             0x11136,  // 11136..11143; CHAKMA
5723             0x11144,  // 11144..1114F; UNKNOWN
5724             0x11150,  // 11150..11176; MAHAJANI
5725             0x11177,  // 11177..1117F; UNKNOWN
5726             0x11180,  // 11180..111CD; SHARADA
5727             0x111CE,  // 111CE..111CF; UNKNOWN
5728             0x111D0,  // 111D0..111DF; SHARADA
5729             0x111E0,  // 111E0       ; UNKNOWN
5730             0x111E1,  // 111E1..111F4; SINHALA
5731             0x111F5,  // 111F5..111FF; UNKNOWN
5732             0x11200,  // 11200..11211; KHOJKI
5733             0x11212,  // 11212       ; UNKNOWN
5734             0x11213,  // 11213..1123E; KHOJKI
5735             0x1123F,  // 1123F..1127F; UNKNOWN
5736             0x11280,  // 11280..11286; MULTANI
5737             0x11287,  // 11287       ; UNKNOWN
5738             0x11288,  // 11288       ; MULTANI
5739             0x11289,  // 11289       ; UNKNOWN
5740             0x1128A,  // 1128A..1128D; MULTANI
5741             0x1128E,  // 1128E       ; UNKNOWN
5742             0x1128F,  // 1128F..1129D; MULTANI
5743             0x1129E,  // 1129E       ; UNKNOWN
5744             0x1129F,  // 1129F..112A9; MULTANI
5745             0x112AA,  // 112AA..112AF; UNKNOWN
5746             0x112B0,  // 112B0..112EA; KHUDAWADI
5747             0x112EB,  // 112EB..112EF; UNKNOWN
5748             0x112F0,  // 112F0..112F9; KHUDAWADI
5749             0x112FA,  // 112FA..112FF; UNKNOWN
5750             0x11300,  // 11300..11303; GRANTHA
5751             0x11304,  // 11304       ; UNKNOWN
5752             0x11305,  // 11305..1130C; GRANTHA
5753             0x1130D,  // 1130D..1130E; UNKNOWN
5754             0x1130F,  // 1130F..11310; GRANTHA
5755             0x11311,  // 11311..11312; UNKNOWN
5756             0x11313,  // 11313..11328; GRANTHA
5757             0x11329,  // 11329       ; UNKNOWN
5758             0x1132A,  // 1132A..11330; GRANTHA
5759             0x11331,  // 11331       ; UNKNOWN
5760             0x11332,  // 11332..11333; GRANTHA
5761             0x11334,  // 11334       ; UNKNOWN
5762             0x11335,  // 11335..11339; GRANTHA
5763             0x1133A,  // 1133A..1133B; UNKNOWN
5764             0x1133C,  // 1133C..11344; GRANTHA
5765             0x11345,  // 11345..11346; UNKNOWN
5766             0x11347,  // 11347..11348; GRANTHA
5767             0x11349,  // 11349..1134A; UNKNOWN
5768             0x1134B,  // 1134B..1134D; GRANTHA
5769             0x1134E,  // 1134E..1134F; UNKNOWN
5770             0x11350,  // 11350       ; GRANTHA
5771             0x11351,  // 11351..11356; UNKNOWN
5772             0x11357,  // 11357       ; GRANTHA
5773             0x11358,  // 11358..1135C; UNKNOWN
5774             0x1135D,  // 1135D..11363; GRANTHA
5775             0x11364,  // 11364..11365; UNKNOWN
5776             0x11366,  // 11366..1136C; GRANTHA
5777             0x1136D,  // 1136D..1136F; UNKNOWN
5778             0x11370,  // 11370..11374; GRANTHA
5779             0x11375,  // 11375..113FF; UNKNOWN
5780             0x11400,  // 11400..11459; NEWA
5781             0x1145A,  // 1145A       ; UNKNOWN
5782             0x1145B,  // 1145B       ; NEWA
5783             0x1145C,  // 1145C       ; UNKNOWN
5784             0x1145D,  // 1145D       ; NEWA
5785             0x1145E,  // 1145E..1147F; UNKNOWN
5786             0x11480,  // 11480..114C7; TIRHUTA
5787             0x114C8,  // 114C8..114CF; UNKNOWN
5788             0x114D0,  // 114D0..114D9; TIRHUTA
5789             0x114DA,  // 114DA..1157F; UNKNOWN
5790             0x11580,  // 11580..115B5; SIDDHAM
5791             0x115B6,  // 115B6..115B7; UNKNOWN
5792             0x115B8,  // 115B8..115DD; SIDDHAM
5793             0x115DE,  // 115DE..115FF; UNKNOWN
5794             0x11600,  // 11600..11644; MODI
5795             0x11645,  // 11645..1164F; UNKNOWN
5796             0x11650,  // 11650..11659; MODI
5797             0x1165A,  // 1165A..1165F; UNKNOWN
5798             0x11660,  // 11660..1166C; MONGOLIAN
5799             0X1166D,  // 1166D..1167F; UNKNOWN
5800             0x11680,  // 11680..116B7; TAKRI
5801             0x116B8,  // 116B8..116BF; UNKNOWN
5802             0x116C0,  // 116C0..116C9; TAKRI
5803             0x116CA,  // 116CA..116FF; UNKNOWN
5804             0x11700,  // 11700..11719; AHOM
5805             0x1171A,  // 1171A..1171C; UNKNOWN
5806             0x1171D,  // 1171D..1172B; AHOM
5807             0x1172C,  // 1172C..1172F; UNKNOWN
5808             0x11730,  // 11730..1173F; AHOM
5809             0x11740,  // 11740..1189F; UNKNOWN
5810             0x118A0,  // 118A0..118F2; WARANG_CITI
5811             0x118F3,  // 118F3..118FE; UNKNOWN
5812             0x118FF,  // 118FF       ; WARANG_CITI
5813             0x11900,  // 11900..119FF; UNKNOWN
5814             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
5815             0X11A48,  // 11A48..11A4F; UNKNOWN
5816             0x11A50,  // 11A50..11A83; SOYOMBO
5817             0x11A84,  // 11A84..11A85; UNKNOWN
5818             0x11A86,  // 11A86..11A9C; SOYOMBO
5819             0x11A9D,  // 11A9D       ; UNKNOWN
5820             0x11A9E,  // 11A9E..11AA2; SOYOMBO
5821             0x11AA3,  // 11AA3..11ABF; UNKNOWN
5822             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
5823             0x11AF9,  // 11AF9..11BFF; UNKNOWN
5824             0x11C00,  // 11C00..11C08; BHAIKSUKI
5825             0x11C09,  // 11C09       ; UNKNOWN
5826             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
5827             0x11C37,  // 11C37       ; UNKNOWN
5828             0x11C38,  // 11C38..11C45; BHAIKSUKI
5829             0x11C46,  // 11C46..11C49; UNKNOWN
5830             0x11C50,  // 11C50..11C6C; BHAIKSUKI
5831             0x11C6D,  // 11C6D..11C6F; UNKNOWN
5832             0x11C70,  // 11C70..11C8F; MARCHEN
5833             0x11C90,  // 11C90..11C91; UNKNOWN
5834             0x11C92,  // 11C92..11CA7; MARCHEN
5835             0x11CA8,  // 11CA8       ; UNKNOWN
5836             0x11CA9,  // 11CA9..11CB6; MARCHEN
5837             0x11CB7,  // 11CB7..11CFF; UNKNOWN
5838             0x11D00,  // 11D00..11D06; MASARAM_GONDI
5839             0x11D07,  // 11D07       ; UNKNOWN
5840             0x11D08,  // 11D08..11D09; MASARAM_GONDI
5841             0x11D0A,  // 11D0A       ; UNKNOWN
5842             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
5843             0x11D37,  // 11D37..11D39; UNKNOWN
5844             0x11D3A,  // 11D3A       ; MASARAM_GONDI
5845             0x11D3B,  // 11D3B       ; UNKNOWN
5846             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
5847             0x11D3E,  // 11D3E       ; UNKNOWN
5848             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
5849             0x11D48,  // 11D48..11D49, UNKNOWN
5850             0x11D50,  // 11D50..11D59; MASARAM_GONDI
5851             0x11D5A,  // 11D5A..1AFFF; UNKNOWN
5852             0x12000,  // 12000..12399; CUNEIFORM
5853             0x1239A,  // 1239A..123FF; UNKNOWN
5854             0x12400,  // 12400..1246E; CUNEIFORM
5855             0x1246F,  // 1246F       ; UNKNOWN
5856             0x12470,  // 12470..12474; CUNEIFORM
5857             0x12475,  // 12475..1247F; UNKNOWN
5858             0x12480,  // 12480..12543; CUNEIFORM
5859             0x12544,  // 12544..12FFF; UNKNOWN
5860             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
5861             0x1342F,  // 1342F..143FF; UNKNOWN
5862             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
5863             0x14647,  // 14647..167FF; UNKNOWN
5864             0x16800,  // 16800..16A38; BAMUM
5865             0x16A39,  // 16A39..16A3F; UNKNOWN
5866             0x16A40,  // 16A40..16A5E; MRO
5867             0x16A5F,  // 16A5F       ; UNKNOWN
5868             0x16A60,  // 16A60..16A69; MRO
5869             0x16A6A,  // 16A6A..16A6D; UNKNOWN
5870             0x16A6E,  // 16A6E..16A6F; MRO
5871             0x16A70,  // 16A70..16ACF; UNKNOWN
5872             0x16AD0,  // 16AD0..16AED; BASSA_VAH
5873             0x16AEE,  // 16AEE..16AEF; UNKNOWN
5874             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
5875             0x16AF6,  // 16AF6..16AFF; UNKNOWN
5876             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
5877             0x16B46,  // 16B46..16B4F; UNKNOWN
5878             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
5879             0x16B5A,  // 16B5A       ; UNKNOWN
5880             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
5881             0x16B62,  // 16B62       ; UNKNOWN
5882             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
5883             0x16B78,  // 16B78..16B7C; UNKNOWN
5884             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
5885             0x16B90,  // 16B90..16EFF; UNKNOWN
5886             0x16F00,  // 16F00..16F44; MIAO
5887             0x16F45,  // 16F45..16F4F; UNKNOWN
5888             0x16F50,  // 16F50..16F7E; MIAO
5889             0x16F7F,  // 16F7F..16F8E; UNKNOWN
5890             0x16F8F,  // 16F8F..16F9F; MIAO
5891             0x16FA0,  // 16FA0..16FDF; UNKNOWN
5892             0x16FE0,  // 16FE0       ; TANGUT
5893             0x16FE1,  // 16FE1       ; NUSHU
5894             0x16FE2,  // 16FE2..16FFF; UNKNOWN
5895             0x17000,  // 17000..187EC; TANGUT
5896             0x187ED,  // 187ED..187FF; UNKNOWN
5897             0x18800,  // 18800..18AF2; TANGUT
5898             0x18AF3,  // 18AF3..1AFFF; UNKNOWN
5899             0x1B000,  // 1B000       ; KATAKANA
5900             0x1B001,  // 1B001..1B11E; HIRAGANA
5901             0x1B11F,  // 1B11F..1B16F; UNKNOWN
5902             0x1B170,  // 1B170..1B2FB; NUSHU
5903             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
5904             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
5905             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
5906             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
5907             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
5908             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
5909             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
5910             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
5911             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
5912             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
5913             0x1BCA0,  // 1BCA0..1BCA3; COMMON
5914             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
5915             0x1D000,  // 1D000..1D0F5; COMMON
5916             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
5917             0x1D100,  // 1D100..1D126; COMMON
5918             0x1D127,  // 1D127..1D128; UNKNOWN
5919             0x1D129,  // 1D129..1D166; COMMON
5920             0x1D167,  // 1D167..1D169; INHERITED
5921             0x1D16A,  // 1D16A..1D17A; COMMON
5922             0x1D17B,  // 1D17B..1D182; INHERITED
5923             0x1D183,  // 1D183..1D184; COMMON
5924             0x1D185,  // 1D185..1D18B; INHERITED
5925             0x1D18C,  // 1D18C..1D1A9; COMMON
5926             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
5927             0x1D1AE,  // 1D1AE..1D1E8; COMMON
5928             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
5929             0x1D200,  // 1D200..1D245; GREEK
5930             0x1D246,  // 1D246..1D2FF; UNKNOWN
5931             0x1D300,  // 1D300..1D356; COMMON
5932             0x1D357,  // 1D357..1D35F; UNKNOWN
5933             0x1D360,  // 1D360..1D371; COMMON
5934             0x1D372,  // 1D372..1D3FF; UNKNOWN
5935             0x1D400,  // 1D400..1D454; COMMON
5936             0x1D455,  // 1D455       ; UNKNOWN
5937             0x1D456,  // 1D456..1D49C; COMMON
5938             0x1D49D,  // 1D49D       ; UNKNOWN
5939             0x1D49E,  // 1D49E..1D49F; COMMON
5940             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
5941             0x1D4A2,  // 1D4A2       ; COMMON
5942             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
5943             0x1D4A5,  // 1D4A5..1D4A6; COMMON
5944             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
5945             0x1D4A9,  // 1D4A9..1D4AC; COMMON
5946             0x1D4AD,  // 1D4AD       ; UNKNOWN
5947             0x1D4AE,  // 1D4AE..1D4B9; COMMON
5948             0x1D4BA,  // 1D4BA       ; UNKNOWN
5949             0x1D4BB,  // 1D4BB       ; COMMON
5950             0x1D4BC,  // 1D4BC       ; UNKNOWN
5951             0x1D4BD,  // 1D4BD..1D4C3; COMMON
5952             0x1D4C4,  // 1D4C4       ; UNKNOWN
5953             0x1D4C5,  // 1D4C5..1D505; COMMON
5954             0x1D506,  // 1D506       ; UNKNOWN
5955             0x1D507,  // 1D507..1D50A; COMMON
5956             0x1D50B,  // 1D50B..1D50C; UNKNOWN
5957             0x1D50D,  // 1D50D..1D514; COMMON
5958             0x1D515,  // 1D515       ; UNKNOWN
5959             0x1D516,  // 1D516..1D51C; COMMON
5960             0x1D51D,  // 1D51D       ; UNKNOWN
5961             0x1D51E,  // 1D51E..1D539; COMMON
5962             0x1D53A,  // 1D53A       ; UNKNOWN
5963             0x1D53B,  // 1D53B..1D53E; COMMON
5964             0x1D53F,  // 1D53F       ; UNKNOWN
5965             0x1D540,  // 1D540..1D544; COMMON
5966             0x1D545,  // 1D545       ; UNKNOWN
5967             0x1D546,  // 1D546       ; COMMON
5968             0x1D547,  // 1D547..1D549; UNKNOWN
5969             0x1D54A,  // 1D54A..1D550; COMMON
5970             0x1D551,  // 1D551       ; UNKNOWN
5971             0x1D552,  // 1D552..1D6A5; COMMON
5972             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
5973             0x1D6A8,  // 1D6A8..1D7CB; COMMON
5974             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
5975             0x1D7CE,  // 1D7CE..1D7FF; COMMON
5976             0x1D800,  // 1D800..1DA8B; SIGNWRITING
5977             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
5978             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
5979             0x1DAA0,  // 1DAA0       ; UNKNOWN
5980             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
5981             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
5982             0x1E000,  // 1E000..1E006; GLAGOLITIC
5983             0x1E007,  // 1E007       ; UNKNOWN
5984             0x1E008,  // 1E008..1E018; GLAGOLITIC
5985             0x1E019,  // 1E019..1E01A; UNKNOWN
5986             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
5987             0x1E022,  // 1E022       ; UNKNOWN
5988             0x1E023,  // 1E023..1E024; GLAGOLITIC
5989             0x1E025,  // 1E025       ; UNKNOWN
5990             0x1E026,  // 1E026..1E02A; GLAGOLITIC
5991             0x1E02B,  // 1E02B..1E7FF; UNKNOWN
5992             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
5993             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
5994             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
5995             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
5996             0x1E900,  // 1E900..1E94A; ADLAM
5997             0x1E94B,  // 1E94B..1E94F; UNKNOWN
5998             0x1E950,  // 1E950..1E959; ADLAM
5999             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6000             0x1E95E,  // 1E95E..1E95F; ADLAM
6001             0x1E960,  // 1E960..1EDFF; UNKNOWN
6002             0x1EE00,  // 1EE00..1EE03; ARABIC
6003             0x1EE04,  // 1EE04       ; UNKNOWN
6004             0x1EE05,  // 1EE05..1EE1F; ARABIC
6005             0x1EE20,  // 1EE20       ; UNKNOWN
6006             0x1EE21,  // 1EE21..1EE22; ARABIC
6007             0x1EE23,  // 1EE23       ; UNKNOWN
6008             0x1EE24,  // 1EE24       ; ARABIC
6009             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6010             0x1EE27,  // 1EE27       ; ARABIC
6011             0x1EE28,  // 1EE28       ; UNKNOWN
6012             0x1EE29,  // 1EE29..1EE32; ARABIC
6013             0x1EE33,  // 1EE33       ; UNKNOWN
6014             0x1EE34,  // 1EE34..1EE37; ARABIC
6015             0x1EE38,  // 1EE38       ; UNKNOWN
6016             0x1EE39,  // 1EE39       ; ARABIC
6017             0x1EE3A,  // 1EE3A       ; UNKNOWN
6018             0x1EE3B,  // 1EE3B       ; ARABIC
6019             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6020             0x1EE42,  // 1EE42       ; ARABIC
6021             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6022             0x1EE47,  // 1EE47       ; ARABIC
6023             0x1EE48,  // 1EE48       ; UNKNOWN
6024             0x1EE49,  // 1EE49       ; ARABIC
6025             0x1EE4A,  // 1EE4A       ; UNKNOWN
6026             0x1EE4B,  // 1EE4B       ; ARABIC
6027             0x1EE4C,  // 1EE4C       ; UNKNOWN
6028             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6029             0x1EE50,  // 1EE50       ; UNKNOWN
6030             0x1EE51,  // 1EE51..1EE52; ARABIC
6031             0x1EE53,  // 1EE53       ; UNKNOWN
6032             0x1EE54,  // 1EE54       ; ARABIC
6033             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6034             0x1EE57,  // 1EE57       ; ARABIC
6035             0x1EE58,  // 1EE58       ; UNKNOWN
6036             0x1EE59,  // 1EE59       ; ARABIC
6037             0x1EE5A,  // 1EE5A       ; UNKNOWN
6038             0x1EE5B,  // 1EE5B       ; ARABIC
6039             0x1EE5C,  // 1EE5C       ; UNKNOWN
6040             0x1EE5D,  // 1EE5D       ; ARABIC
6041             0x1EE5E,  // 1EE5E       ; UNKNOWN
6042             0x1EE5F,  // 1EE5F       ; ARABIC
6043             0x1EE60,  // 1EE60       ; UNKNOWN
6044             0x1EE61,  // 1EE61..1EE62; ARABIC
6045             0x1EE63,  // 1EE63       ; UNKNOWN
6046             0x1EE64,  // 1EE64       ; ARABIC
6047             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6048             0x1EE67,  // 1EE67..1EE6A; ARABIC
6049             0x1EE6B,  // 1EE6B       ; UNKNOWN
6050             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6051             0x1EE73,  // 1EE73       ; UNKNOWN
6052             0x1EE74,  // 1EE74..1EE77; ARABIC
6053             0x1EE78,  // 1EE78       ; UNKNOWN
6054             0x1EE79,  // 1EE79..1EE7C; ARABIC
6055             0x1EE7D,  // 1EE7D       ; UNKNOWN
6056             0x1EE7E,  // 1EE7E       ; ARABIC
6057             0x1EE7F,  // 1EE7F       ; UNKNOWN
6058             0x1EE80,  // 1EE80..1EE89; ARABIC
6059             0x1EE8A,  // 1EE8A       ; UNKNOWN
6060             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6061             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6062             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6063             0x1EEA4,  // 1EEA4       ; UNKNOWN
6064             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6065             0x1EEAA,  // 1EEAA       ; UNKNOWN
6066             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6067             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6068             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6069             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6070             0x1F000,  // 1F000..1F02B; COMMON
6071             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6072             0x1F030,  // 1F030..1F093; COMMON
6073             0x1F094,  // 1F094..1F09F; UNKNOWN
6074             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6075             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6076             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6077             0x1F0C0,  // 1F0C0       ; UNKNOWN
6078             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6079             0x1F0D0,  // 1F0D0       ; UNKNOWN
6080             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6081             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6082             0x1F100,  // 1F100..1F10C; COMMON
6083             0x1F10D,  // 1F10D..1F10F; UNKNOWN
6084             0x1F110,  // 1F110..1F12E; COMMON
6085             0x1F12F,  // 1F12F       ; UNKNOWN
6086             0x1F130,  // 1F130..1F16B; COMMON
6087             0x1F16C,  // 1F16C..1F16F; UNKNOWN
6088             0x1F170,  // 1F170..1F1AC; COMMON
6089             0x1F1AD,  // 1F1AD..1F1E5; UNKNOWN
6090             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6091             0x1F200,  // 1F200       ; HIRAGANA
6092             0x1F201,  // 1F201..1F202; COMMON
6093             0x1F203,  // 1F203..1F20F; UNKNOWN
6094             0x1F210,  // 1F210..1F23B; COMMON
6095             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6096             0x1F240,  // 1F240..1F248; COMMON
6097             0x1F249,  // 1F249..1F24F; UNKNOWN
6098             0x1F250,  // 1F250..1F251; COMMON
6099             0x1F252,  // 1F252..1F25F; UNKNOWN
6100             0x1F260,  // 1F260..1F265; COMMON
6101             0x1F266,  // 1F266..1F2FF; UNKNOWN
6102             0x1F300,  // 1F300..1F6D4; COMMON
6103             0x1F6D5,  // 1F6D5..1F6DF; UNKNOWN
6104             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6105             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6106             0x1F6F0,  // 1F6F0..1F6F8; COMMON
6107             0x1F6F9,  // 1F6F9..1F6FF; UNKNOWN
6108             0x1F700,  // 1F700..1F773; COMMON
6109             0x1F774,  // 1F774..1F77F; UNKNOWN
6110             0x1F780,  // 1F780..1F7D4; COMMON
6111             0x1F7D5,  // 1F7D5..1F7FF; UNKNOWN
6112             0x1F800,  // 1F800..1F80B; COMMON
6113             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6114             0x1F810,  // 1F810..1F847; COMMON
6115             0x1F848,  // 1F848..1F84F; UNKNOWN
6116             0x1F850,  // 1F850..1F859; COMMON
6117             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6118             0x1F860,  // 1F860..1F887; COMMON
6119             0x1F888,  // 1F888..1F88F; UNKNOWN
6120             0x1F890,  // 1F890..1F8AD; COMMON
6121             0x1F8AE,  // 1F8AE..1F8FF; UNKNOWN
6122             0x1F900,  // 1F900..1F90B; COMMON
6123             0x1F90C,  // 1F90C..1F90F; UNKNOWN
6124             0x1F910,  // 1F910..1F93E; COMMON
6125             0x1F93F,  // 1F93F       ; UNKNOWN
6126             0x1F940,  // 1F940..1F94C; COMMON
6127             0x1F94D,  // 1F94D..1F94F; UNKNOWN
6128             0x1F950,  // 1F950..1F96B; COMMON
6129             0x1F96C,  // 1F96C..1F97F; UNKNOWN
6130             0x1F980,  // 1F980..1F997; COMMON
6131             0x1F998,  // 1F998..1F9BF; UNKNOWN
6132             0x1F9C0,  // 1F9C0       ; COMMON
6133             0x1F9C1,  // 1F9C1..1F9CF; UNKNOWN
6134             0x1F9D0,  // 1F9D0..1F9E6; COMMON
6135             0x1F9E7,  // 1F9E7..1FFFF; UNKNOWN
6136             0x20000,  // 20000..2A6D6; HAN
6137             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
6138             0x2A700,  // 2A700..2B734; HAN
6139             0x2B735,  // 2B735..2B73F; UNKNOWN
6140             0x2B740,  // 2B740..2B81D; HAN
6141             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6142             0x2B820,  // 2B820..2CEA1; HAN
6143             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6144             0x2CEB0,  // 2CEB0..2EBE0; HAN
6145             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6146             0x2F800,  // 2F800..2FA1D; HAN
6147             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
6148             0xE0001,  // E0001       ; COMMON
6149             0xE0002,  // E0002..E001F; UNKNOWN
6150             0xE0020,  // E0020..E007F; COMMON
6151             0xE0080,  // E0080..E00FF; UNKNOWN
6152             0xE0100,  // E0100..E01EF; INHERITED
6153             0xE01F0   // E01F0..10FFFF; UNKNOWN
6154         };
6155 
6156         private static final UnicodeScript[] scripts = {
6157             COMMON,                   // 0000..0040
6158             LATIN,                    // 0041..005A
6159             COMMON,                   // 005B..0060
6160             LATIN,                    // 0061..007A
6161             COMMON,                   // 007B..00A9
6162             LATIN,                    // 00AA
6163             COMMON,                   // 00AB..00B9
6164             LATIN,                    // 00BA
6165             COMMON,                   // 00BB..00BF
6166             LATIN,                    // 00C0..00D6
6167             COMMON,                   // 00D7
6168             LATIN,                    // 00D8..00F6
6169             COMMON,                   // 00F7
6170             LATIN,                    // 00F8..02B8
6171             COMMON,                   // 02B9..02DF
6172             LATIN,                    // 02E0..02E4
6173             COMMON,                   // 02E5..02E9
6174             BOPOMOFO,                 // 02EA..02EB
6175             COMMON,                   // 02EC..02FF
6176             INHERITED,                // 0300..036F
6177             GREEK,                    // 0370..0373
6178             COMMON,                   // 0374
6179             GREEK,                    // 0375..0377
6180             UNKNOWN,                  // 0378..0379
6181             GREEK,                    // 037A..037D
6182             COMMON,                   // 037E
6183             GREEK,                    // 037F
6184             UNKNOWN,                  // 0380..0383
6185             GREEK,                    // 0384
6186             COMMON,                   // 0385
6187             GREEK,                    // 0386
6188             COMMON,                   // 0387
6189             GREEK,                    // 0388..038A
6190             UNKNOWN,                  // 038B
6191             GREEK,                    // 038C
6192             UNKNOWN,                  // 038D
6193             GREEK,                    // 038E..03A1
6194             UNKNOWN,                  // 03A2
6195             GREEK,                    // 03A3..03E1
6196             COPTIC,                   // 03E2..03EF
6197             GREEK,                    // 03F0..03FF
6198             CYRILLIC,                 // 0400..0484
6199             INHERITED,                // 0485..0486
6200             CYRILLIC,                 // 0487..052F
6201             UNKNOWN,                  // 0530
6202             ARMENIAN,                 // 0531..0556
6203             UNKNOWN,                  // 0557..0558
6204             ARMENIAN,                 // 0559..055F
6205             UNKNOWN,                  // 0560
6206             ARMENIAN,                 // 0561..0587
6207             UNKNOWN,                  // 0588
6208             COMMON,                   // 0589
6209             ARMENIAN,                 // 058A
6210             UNKNOWN,                  // 058B..058C
6211             ARMENIAN,                 // 058D..058F
6212             UNKNOWN,                  // 0590
6213             HEBREW,                   // 0591..05C7
6214             UNKNOWN,                  // 05C8..05CF
6215             HEBREW,                   // 05D0..05EA
6216             UNKNOWN,                  // 05EB..05EF
6217             HEBREW,                   // 05F0..05F4
6218             UNKNOWN,                  // 05F5..05FF
6219             ARABIC,                   // 0600..0604
6220             COMMON,                   // 0605
6221             ARABIC,                   // 0606..060B
6222             COMMON,                   // 060C
6223             ARABIC,                   // 060D..061A
6224             COMMON,                   // 061B
6225             ARABIC,                   // 061C
6226             UNKNOWN,                  // 061D
6227             ARABIC,                   // 061E
6228             COMMON,                   // 061F
6229             ARABIC,                   // 0620..063F
6230             COMMON,                   // 0640
6231             ARABIC,                   // 0641..064A
6232             INHERITED,                // 064B..0655
6233             ARABIC,                   // 0656..066F
6234             INHERITED,                // 0670
6235             ARABIC,                   // 0671..06DC
6236             COMMON,                   // 06DD
6237             ARABIC,                   // 06DE..06FF
6238             SYRIAC,                   // 0700..070D
6239             UNKNOWN,                  // 070E
6240             SYRIAC,                   // 070F..074A
6241             UNKNOWN,                  // 074B..074C
6242             SYRIAC,                   // 074D..074F
6243             ARABIC,                   // 0750..077F
6244             THAANA,                   // 0780..07B1
6245             UNKNOWN,                  // 07B2..07BF
6246             NKO,                      // 07C0..07FA
6247             UNKNOWN,                  // 07FB..07FF
6248             SAMARITAN,                // 0800..082D
6249             UNKNOWN,                  // 082E..082F
6250             SAMARITAN,                // 0830..083E
6251             UNKNOWN,                  // 083F
6252             MANDAIC,                  // 0840..085B
6253             UNKNOWN,                  // 085C..085D
6254             MANDAIC,                  // 085E
6255             UNKNOWN,                  // 085F
6256             SYRIAC,                   // 0860..086A
6257             UNKNOWN,                  // 086B..089F
6258             ARABIC,                   // 08A0..08B4
6259             UNKNOWN,                  // 08B5
6260             ARABIC,                   // 08B6..08BD
6261             UNKNOWN,                  // 08BE..08D3
6262             ARABIC,                   // 08D4..08E1
6263             COMMON,                   // 08E2
6264             ARABIC,                   // 08E3..08FF
6265             DEVANAGARI,               // 0900..0950
6266             INHERITED,                // 0951..0952
6267             DEVANAGARI,               // 0953..0963
6268             COMMON,                   // 0964..0965
6269             DEVANAGARI,               // 0966..097F
6270             BENGALI,                  // 0980..0983
6271             UNKNOWN,                  // 0984
6272             BENGALI,                  // 0985..098C
6273             UNKNOWN,                  // 098D..098E
6274             BENGALI,                  // 098F..0990
6275             UNKNOWN,                  // 0991..0992
6276             BENGALI,                  // 0993..09A8
6277             UNKNOWN,                  // 09A9
6278             BENGALI,                  // 09AA..09B0
6279             UNKNOWN,                  // 09B1
6280             BENGALI,                  // 09B2
6281             UNKNOWN,                  // 09B3..09B5
6282             BENGALI,                  // 09B6..09B9
6283             UNKNOWN,                  // 09BA..09BB
6284             BENGALI,                  // 09BC..09C4
6285             UNKNOWN,                  // 09C5..09C6
6286             BENGALI,                  // 09C7..09C8
6287             UNKNOWN,                  // 09C9..09CA
6288             BENGALI,                  // 09CB..09CE
6289             UNKNOWN,                  // 09CF..09D6
6290             BENGALI,                  // 09D7
6291             UNKNOWN,                  // 09D8..09DB
6292             BENGALI,                  // 09DC..09DD
6293             UNKNOWN,                  // 09DE
6294             BENGALI,                  // 09DF..09E3
6295             UNKNOWN,                  // 09E4..09E5
6296             BENGALI,                  // 09E6..09FD
6297             UNKNOWN,                  // 09FE..0A00
6298             GURMUKHI,                 // 0A01..0A03
6299             UNKNOWN,                  // 0A04
6300             GURMUKHI,                 // 0A05..0A0A
6301             UNKNOWN,                  // 0A0B..0A0E
6302             GURMUKHI,                 // 0A0F..0A10
6303             UNKNOWN,                  // 0A11..0A12
6304             GURMUKHI,                 // 0A13..0A28
6305             UNKNOWN,                  // 0A29
6306             GURMUKHI,                 // 0A2A..0A30
6307             UNKNOWN,                  // 0A31
6308             GURMUKHI,                 // 0A32..0A33
6309             UNKNOWN,                  // 0A34
6310             GURMUKHI,                 // 0A35..0A36
6311             UNKNOWN,                  // 0A37
6312             GURMUKHI,                 // 0A38..0A39
6313             UNKNOWN,                  // 0A3A..0A3B
6314             GURMUKHI,                 // 0A3C
6315             UNKNOWN,                  // 0A3D
6316             GURMUKHI,                 // 0A3E..0A42
6317             UNKNOWN,                  // 0A43..0A46
6318             GURMUKHI,                 // 0A47..0A48
6319             UNKNOWN,                  // 0A49..0A4A
6320             GURMUKHI,                 // 0A4B..0A4D
6321             UNKNOWN,                  // 0A4E..0A50
6322             GURMUKHI,                 // 0A51
6323             UNKNOWN,                  // 0A52..0A58
6324             GURMUKHI,                 // 0A59..0A5C
6325             UNKNOWN,                  // 0A5D
6326             GURMUKHI,                 // 0A5E
6327             UNKNOWN,                  // 0A5F..0A65
6328             GURMUKHI,                 // 0A66..0A75
6329             UNKNOWN,                  // 0A76..0A80
6330             GUJARATI,                 // 0A81..0A83
6331             UNKNOWN,                  // 0A84
6332             GUJARATI,                 // 0A85..0A8D
6333             UNKNOWN,                  // 0A8E
6334             GUJARATI,                 // 0A8F..0A91
6335             UNKNOWN,                  // 0A92
6336             GUJARATI,                 // 0A93..0AA8
6337             UNKNOWN,                  // 0AA9
6338             GUJARATI,                 // 0AAA..0AB0
6339             UNKNOWN,                  // 0AB1
6340             GUJARATI,                 // 0AB2..0AB3
6341             UNKNOWN,                  // 0AB4
6342             GUJARATI,                 // 0AB5..0AB9
6343             UNKNOWN,                  // 0ABA..0ABB
6344             GUJARATI,                 // 0ABC..0AC5
6345             UNKNOWN,                  // 0AC6
6346             GUJARATI,                 // 0AC7..0AC9
6347             UNKNOWN,                  // 0ACA
6348             GUJARATI,                 // 0ACB..0ACD
6349             UNKNOWN,                  // 0ACE..0ACF
6350             GUJARATI,                 // 0AD0
6351             UNKNOWN,                  // 0AD1..0ADF
6352             GUJARATI,                 // 0AE0..0AE3
6353             UNKNOWN,                  // 0AE4..0AE5
6354             GUJARATI,                 // 0AE6..0AF1
6355             UNKNOWN,                  // 0AF2..0AF8
6356             GUJARATI,                 // 0AF9..0AFF
6357             UNKNOWN,                  // 0B00
6358             ORIYA,                    // 0B01..0B03
6359             UNKNOWN,                  // 0B04
6360             ORIYA,                    // 0B05..0B0C
6361             UNKNOWN,                  // 0B0D..0B0E
6362             ORIYA,                    // 0B0F..0B10
6363             UNKNOWN,                  // 0B11..0B12
6364             ORIYA,                    // 0B13..0B28
6365             UNKNOWN,                  // 0B29
6366             ORIYA,                    // 0B2A..0B30
6367             UNKNOWN,                  // 0B31
6368             ORIYA,                    // 0B32..0B33
6369             UNKNOWN,                  // 0B34
6370             ORIYA,                    // 0B35..0B39
6371             UNKNOWN,                  // 0B3A..0B3B
6372             ORIYA,                    // 0B3C..0B44
6373             UNKNOWN,                  // 0B45..0B46
6374             ORIYA,                    // 0B47..0B48
6375             UNKNOWN,                  // 0B49..0B4A
6376             ORIYA,                    // 0B4B..0B4D
6377             UNKNOWN,                  // 0B4E..0B55
6378             ORIYA,                    // 0B56..0B57
6379             UNKNOWN,                  // 0B58..0B5B
6380             ORIYA,                    // 0B5C..0B5D
6381             UNKNOWN,                  // 0B5E
6382             ORIYA,                    // 0B5F..0B63
6383             UNKNOWN,                  // 0B64..0B65
6384             ORIYA,                    // 0B66..0B77
6385             UNKNOWN,                  // 0B78..0B81
6386             TAMIL,                    // 0B82..0B83
6387             UNKNOWN,                  // 0B84
6388             TAMIL,                    // 0B85..0B8A
6389             UNKNOWN,                  // 0B8B..0B8D
6390             TAMIL,                    // 0B8E..0B90
6391             UNKNOWN,                  // 0B91
6392             TAMIL,                    // 0B92..0B95
6393             UNKNOWN,                  // 0B96..0B98
6394             TAMIL,                    // 0B99..0B9A
6395             UNKNOWN,                  // 0B9B
6396             TAMIL,                    // 0B9C
6397             UNKNOWN,                  // 0B9D
6398             TAMIL,                    // 0B9E..0B9F
6399             UNKNOWN,                  // 0BA0..0BA2
6400             TAMIL,                    // 0BA3..0BA4
6401             UNKNOWN,                  // 0BA5..0BA7
6402             TAMIL,                    // 0BA8..0BAA
6403             UNKNOWN,                  // 0BAB..0BAD
6404             TAMIL,                    // 0BAE..0BB9
6405             UNKNOWN,                  // 0BBA..0BBD
6406             TAMIL,                    // 0BBE..0BC2
6407             UNKNOWN,                  // 0BC3..0BC5
6408             TAMIL,                    // 0BC6..0BC8
6409             UNKNOWN,                  // 0BC9
6410             TAMIL,                    // 0BCA..0BCD
6411             UNKNOWN,                  // 0BCE..0BCF
6412             TAMIL,                    // 0BD0
6413             UNKNOWN,                  // 0BD1..0BD6
6414             TAMIL,                    // 0BD7
6415             UNKNOWN,                  // 0BD8..0BE5
6416             TAMIL,                    // 0BE6..0BFA
6417             UNKNOWN,                  // 0BFB..0BFF
6418             TELUGU,                   // 0C00..0C03
6419             UNKNOWN,                  // 0C04
6420             TELUGU,                   // 0C05..0C0C
6421             UNKNOWN,                  // 0C0D
6422             TELUGU,                   // 0C0E..0C10
6423             UNKNOWN,                  // 0C11
6424             TELUGU,                   // 0C12..0C28
6425             UNKNOWN,                  // 0C29
6426             TELUGU,                   // 0C2A..0C39
6427             UNKNOWN,                  // 0C3A..0C3C
6428             TELUGU,                   // 0C3D..0C44
6429             UNKNOWN,                  // 0C45
6430             TELUGU,                   // 0C46..0C48
6431             UNKNOWN,                  // 0C49
6432             TELUGU,                   // 0C4A..0C4D
6433             UNKNOWN,                  // 0C4E..0C54
6434             TELUGU,                   // 0C55..0C56
6435             UNKNOWN,                  // 0C57
6436             TELUGU,                   // 0C58..0C5A
6437             UNKNOWN,                  // 0C5B..0C5F
6438             TELUGU,                   // 0C60..0C63
6439             UNKNOWN,                  // 0C64..0C65
6440             TELUGU,                   // 0C66..0C6F
6441             UNKNOWN,                  // 0C70..0C77
6442             TELUGU,                   // 0C78..0C7F
6443             KANNADA,                  // 0C80..0C83
6444             UNKNOWN,                  // 0C84
6445             KANNADA,                  // 0C85..0C8C
6446             UNKNOWN,                  // 0C8D
6447             KANNADA,                  // 0C8E..0C90
6448             UNKNOWN,                  // 0C91
6449             KANNADA,                  // 0C92..0CA8
6450             UNKNOWN,                  // 0CA9
6451             KANNADA,                  // 0CAA..0CB3
6452             UNKNOWN,                  // 0CB4
6453             KANNADA,                  // 0CB5..0CB9
6454             UNKNOWN,                  // 0CBA..0CBB
6455             KANNADA,                  // 0CBC..0CC4
6456             UNKNOWN,                  // 0CC5
6457             KANNADA,                  // 0CC6..0CC8
6458             UNKNOWN,                  // 0CC9
6459             KANNADA,                  // 0CCA..0CCD
6460             UNKNOWN,                  // 0CCE..0CD4
6461             KANNADA,                  // 0CD5..0CD6
6462             UNKNOWN,                  // 0CD7..0CDD
6463             KANNADA,                  // 0CDE
6464             UNKNOWN,                  // 0CDF
6465             KANNADA,                  // 0CE0..0CE3
6466             UNKNOWN,                  // 0CE4..0CE5
6467             KANNADA,                  // 0CE6..0CEF
6468             UNKNOWN,                  // 0CF0
6469             KANNADA,                  // 0CF1..0CF2
6470             UNKNOWN,                  // 0CF3..0CFF
6471             MALAYALAM,                // 0D00..0D03
6472             UNKNOWN,                  // 0D04
6473             MALAYALAM,                // 0D05..0D0C
6474             UNKNOWN,                  // 0D0D
6475             MALAYALAM,                // 0D0E..0D10
6476             UNKNOWN,                  // 0D11
6477             MALAYALAM,                // 0D12..0D44
6478             UNKNOWN,                  // 0D45
6479             MALAYALAM,                // 0D46..0D48
6480             UNKNOWN,                  // 0D49
6481             MALAYALAM,                // 0D4A..0D4F
6482             UNKNOWN,                  // 0D50..0D53
6483             MALAYALAM,                // 0D54..0D63
6484             UNKNOWN,                  // 0D64..0D65
6485             MALAYALAM,                // 0D66..0D7F
6486             UNKNOWN,                  // 0D80..0D81
6487             SINHALA,                  // 0D82..0D83
6488             UNKNOWN,                  // 0D84
6489             SINHALA,                  // 0D85..0D96
6490             UNKNOWN,                  // 0D97..0D99
6491             SINHALA,                  // 0D9A..0DB1
6492             UNKNOWN,                  // 0DB2
6493             SINHALA,                  // 0DB3..0DBB
6494             UNKNOWN,                  // 0DBC
6495             SINHALA,                  // 0DBD
6496             UNKNOWN,                  // 0DBE..0DBF
6497             SINHALA,                  // 0DC0..0DC6
6498             UNKNOWN,                  // 0DC7..0DC9
6499             SINHALA,                  // 0DCA
6500             UNKNOWN,                  // 0DCB..0DCE
6501             SINHALA,                  // 0DCF..0DD4
6502             UNKNOWN,                  // 0DD5
6503             SINHALA,                  // 0DD6
6504             UNKNOWN,                  // 0DD7
6505             SINHALA,                  // 0DD8..0DDF
6506             UNKNOWN,                  // 0DE0..0DE5
6507             SINHALA,                  // 0DE6..0DEF
6508             UNKNOWN,                  // 0DF0..0DF1
6509             SINHALA,                  // 0DF2..0DF4
6510             UNKNOWN,                  // 0DF5..0E00
6511             THAI,                     // 0E01..0E3A
6512             UNKNOWN,                  // 0E3B..0E3E
6513             COMMON,                   // 0E3F
6514             THAI,                     // 0E40..0E5B
6515             UNKNOWN,                  // 0E5C..0E80
6516             LAO,                      // 0E81..0E82
6517             UNKNOWN,                  // 0E83
6518             LAO,                      // 0E84
6519             UNKNOWN,                  // 0E85..0E86
6520             LAO,                      // 0E87..0E88
6521             UNKNOWN,                  // 0E89
6522             LAO,                      // 0E8A
6523             UNKNOWN,                  // 0E8B..0E8C
6524             LAO,                      // 0E8D
6525             UNKNOWN,                  // 0E8E..0E93
6526             LAO,                      // 0E94..0E97
6527             UNKNOWN,                  // 0E98
6528             LAO,                      // 0E99..0E9F
6529             UNKNOWN,                  // 0EA0
6530             LAO,                      // 0EA1..0EA3
6531             UNKNOWN,                  // 0EA4
6532             LAO,                      // 0EA5
6533             UNKNOWN,                  // 0EA6
6534             LAO,                      // 0EA7
6535             UNKNOWN,                  // 0EA8..0EA9
6536             LAO,                      // 0EAA..0EAB
6537             UNKNOWN,                  // 0EAC
6538             LAO,                      // 0EAD..0EB9
6539             UNKNOWN,                  // 0EBA
6540             LAO,                      // 0EBB..0EBD
6541             UNKNOWN,                  // 0EBE..0EBF
6542             LAO,                      // 0EC0..0EC4
6543             UNKNOWN,                  // 0EC5
6544             LAO,                      // 0EC6
6545             UNKNOWN,                  // 0EC7
6546             LAO,                      // 0EC8..0ECD
6547             UNKNOWN,                  // 0ECE..0ECF
6548             LAO,                      // 0ED0..0ED9
6549             UNKNOWN,                  // 0EDA..0EDB
6550             LAO,                      // 0EDC..0EDF
6551             UNKNOWN,                  // 0EE0..0EFF
6552             TIBETAN,                  // 0F00..0F47
6553             UNKNOWN,                  // 0F48
6554             TIBETAN,                  // 0F49..0F6C
6555             UNKNOWN,                  // 0F6D..0F70
6556             TIBETAN,                  // 0F71..0F97
6557             UNKNOWN,                  // 0F98
6558             TIBETAN,                  // 0F99..0FBC
6559             UNKNOWN,                  // 0FBD
6560             TIBETAN,                  // 0FBE..0FCC
6561             UNKNOWN,                  // 0FCD
6562             TIBETAN,                  // 0FCE..0FD4
6563             COMMON,                   // 0FD5..0FD8
6564             TIBETAN,                  // 0FD9..0FDA
6565             UNKNOWN,                  // 0FDB..FFF
6566             MYANMAR,                  // 1000..109F
6567             GEORGIAN,                 // 10A0..10C5
6568             UNKNOWN,                  // 10C6
6569             GEORGIAN,                 // 10C7
6570             UNKNOWN,                  // 10C8..10CC
6571             GEORGIAN,                 // 10CD
6572             UNKNOWN,                  // 10CE..10CF
6573             GEORGIAN,                 // 10D0..10FA
6574             COMMON,                   // 10FB
6575             GEORGIAN,                 // 10FC..10FF
6576             HANGUL,                   // 1100..11FF
6577             ETHIOPIC,                 // 1200..1248
6578             UNKNOWN,                  // 1249
6579             ETHIOPIC,                 // 124A..124D
6580             UNKNOWN,                  // 124E..124F
6581             ETHIOPIC,                 // 1250..1256
6582             UNKNOWN,                  // 1257
6583             ETHIOPIC,                 // 1258
6584             UNKNOWN,                  // 1259
6585             ETHIOPIC,                 // 125A..125D
6586             UNKNOWN,                  // 125E..125F
6587             ETHIOPIC,                 // 1260..1288
6588             UNKNOWN,                  // 1289
6589             ETHIOPIC,                 // 128A..128D
6590             UNKNOWN,                  // 128E..128F
6591             ETHIOPIC,                 // 1290..12B0
6592             UNKNOWN,                  // 12B1
6593             ETHIOPIC,                 // 12B2..12B5
6594             UNKNOWN,                  // 12B6..12B7
6595             ETHIOPIC,                 // 12B8..12BE
6596             UNKNOWN,                  // 12BF
6597             ETHIOPIC,                 // 12C0
6598             UNKNOWN,                  // 12C1
6599             ETHIOPIC,                 // 12C2..12C5
6600             UNKNOWN,                  // 12C6..12C7
6601             ETHIOPIC,                 // 12C8..12D6
6602             UNKNOWN,                  // 12D7
6603             ETHIOPIC,                 // 12D8..1310
6604             UNKNOWN,                  // 1311
6605             ETHIOPIC,                 // 1312..1315
6606             UNKNOWN,                  // 1316..1317
6607             ETHIOPIC,                 // 1318..135A
6608             UNKNOWN,                  // 135B..135C
6609             ETHIOPIC,                 // 135D..137C
6610             UNKNOWN,                  // 137D..137F
6611             ETHIOPIC,                 // 1380..1399
6612             UNKNOWN,                  // 139A..139F
6613             CHEROKEE,                 // 13A0..13F5
6614             UNKNOWN,                  // 13F6..13F7
6615             CHEROKEE,                 // 13F8..13FD
6616             UNKNOWN,                  // 13FE..13FF
6617             CANADIAN_ABORIGINAL,      // 1400..167F
6618             OGHAM,                    // 1680..169C
6619             UNKNOWN,                  // 169D..169F
6620             RUNIC,                    // 16A0..16EA
6621             COMMON,                   // 16EB..16ED
6622             RUNIC,                    // 16EE..16F8
6623             UNKNOWN,                  // 16F9..16FF
6624             TAGALOG,                  // 1700..170C
6625             UNKNOWN,                  // 170D
6626             TAGALOG,                  // 170E..1714
6627             UNKNOWN,                  // 1715..171F
6628             HANUNOO,                  // 1720..1734
6629             COMMON,                   // 1735..1736
6630             UNKNOWN,                  // 1737..173F
6631             BUHID,                    // 1740..1753
6632             UNKNOWN,                  // 1754..175F
6633             TAGBANWA,                 // 1760..176C
6634             UNKNOWN,                  // 176D
6635             TAGBANWA,                 // 176E..1770
6636             UNKNOWN,                  // 1771
6637             TAGBANWA,                 // 1772..1773
6638             UNKNOWN,                  // 1774..177F
6639             KHMER,                    // 1780..17DD
6640             UNKNOWN,                  // 17DE..17DF
6641             KHMER,                    // 17E0..17E9
6642             UNKNOWN,                  // 17EA..17EF
6643             KHMER,                    // 17F0..17F9
6644             UNKNOWN,                  // 17FA..17FF
6645             MONGOLIAN,                // 1800..1801
6646             COMMON,                   // 1802..1803
6647             MONGOLIAN,                // 1804
6648             COMMON,                   // 1805
6649             MONGOLIAN,                // 1806..180E
6650             UNKNOWN,                  // 180F
6651             MONGOLIAN,                // 1810..1819
6652             UNKNOWN,                  // 181A..181F
6653             MONGOLIAN,                // 1820..1877
6654             UNKNOWN,                  // 1878..187F
6655             MONGOLIAN,                // 1880..18AA
6656             UNKNOWN,                  // 18AB..18AF
6657             CANADIAN_ABORIGINAL,      // 18B0..18F5
6658             UNKNOWN,                  // 18F6..18FF
6659             LIMBU,                    // 1900..191E
6660             UNKNOWN,                  // 191F
6661             LIMBU,                    // 1920..192B
6662             UNKNOWN,                  // 192C..192F
6663             LIMBU,                    // 1930..193B
6664             UNKNOWN,                  // 193C..193F
6665             LIMBU,                    // 1940
6666             UNKNOWN,                  // 1941..1943
6667             LIMBU,                    // 1944..194F
6668             TAI_LE,                   // 1950..196D
6669             UNKNOWN,                  // 196E..196F
6670             TAI_LE,                   // 1970..1974
6671             UNKNOWN,                  // 1975..197F
6672             NEW_TAI_LUE,              // 1980..19AB
6673             UNKNOWN,                  // 19AC..19AF
6674             NEW_TAI_LUE,              // 19B0..19C9
6675             UNKNOWN,                  // 19CA..19CF
6676             NEW_TAI_LUE,              // 19D0..19DA
6677             UNKNOWN,                  // 19DB..19DD
6678             NEW_TAI_LUE,              // 19DE..19DF
6679             KHMER,                    // 19E0..19FF
6680             BUGINESE,                 // 1A00..1A1B
6681             UNKNOWN,                  // 1A1C..1A1D
6682             BUGINESE,                 // 1A1E..1A1F
6683             TAI_THAM,                 // 1A20..1A5E
6684             UNKNOWN,                  // 1A5F
6685             TAI_THAM,                 // 1A60..1A7C
6686             UNKNOWN,                  // 1A7D..1A7E
6687             TAI_THAM,                 // 1A7F..1A89
6688             UNKNOWN,                  // 1A8A..1A8F
6689             TAI_THAM,                 // 1A90..1A99
6690             UNKNOWN,                  // 1A9A..1A9F
6691             TAI_THAM,                 // 1AA0..1AAD
6692             UNKNOWN,                  // 1AAE..1AAF
6693             INHERITED,                // 1AB0..1ABE
6694             UNKNOWN,                  // 1ABF..1AFF
6695             BALINESE,                 // 1B00..1B4B
6696             UNKNOWN,                  // 1B4C..1B4F
6697             BALINESE,                 // 1B50..1B7C
6698             UNKNOWN,                  // 1B7D..1B7F
6699             SUNDANESE,                // 1B80..1BBF
6700             BATAK,                    // 1BC0..1BF3
6701             UNKNOWN,                  // 1BF4..1BFB
6702             BATAK,                    // 1BFC..1BFF
6703             LEPCHA,                   // 1C00..1C37
6704             UNKNOWN,                  // 1C38..1C3A
6705             LEPCHA,                   // 1C3B..1C49
6706             UNKNOWN,                  // 1C4A..1C4C
6707             LEPCHA,                   // 1C4D..1C4F
6708             OL_CHIKI,                 // 1C50..1C7F
6709             CYRILLIC,                 // 1C80..1C88
6710             UNKNOWN,                  // 1C89..1CBF
6711             SUNDANESE,                // 1CC0..1CC7
6712             UNKNOWN,                  // 1CC8..1CCF
6713             INHERITED,                // 1CD0..1CD2
6714             COMMON,                   // 1CD3
6715             INHERITED,                // 1CD4..1CE0
6716             COMMON,                   // 1CE1
6717             INHERITED,                // 1CE2..1CE8
6718             COMMON,                   // 1CE9..1CEC
6719             INHERITED,                // 1CED
6720             COMMON,                   // 1CEE..1CF3
6721             INHERITED,                // 1CF4
6722             COMMON,                   // 1CF5..1CF7
6723             INHERITED,                // 1CF8..1CF9
6724             UNKNOWN,                  // 1CFA..1CFF
6725             LATIN,                    // 1D00..1D25
6726             GREEK,                    // 1D26..1D2A
6727             CYRILLIC,                 // 1D2B
6728             LATIN,                    // 1D2C..1D5C
6729             GREEK,                    // 1D5D..1D61
6730             LATIN,                    // 1D62..1D65
6731             GREEK,                    // 1D66..1D6A
6732             LATIN,                    // 1D6B..1D77
6733             CYRILLIC,                 // 1D78
6734             LATIN,                    // 1D79..1DBE
6735             GREEK,                    // 1DBF
6736             INHERITED,                // 1DC0..1DF9
6737             UNKNOWN,                  // 1DFA
6738             INHERITED,                // 1DFB..1DFF
6739             LATIN,                    // 1E00..1EFF
6740             GREEK,                    // 1F00..1F15
6741             UNKNOWN,                  // 1F16..1F17
6742             GREEK,                    // 1F18..1F1D
6743             UNKNOWN,                  // 1F1E..1F1F
6744             GREEK,                    // 1F20..1F45
6745             UNKNOWN,                  // 1F46..1F47
6746             GREEK,                    // 1F48..1F4D
6747             UNKNOWN,                  // 1F4E..1F4F
6748             GREEK,                    // 1F50..1F57
6749             UNKNOWN,                  // 1F58
6750             GREEK,                    // 1F59
6751             UNKNOWN,                  // 1F5A
6752             GREEK,                    // 1F5B
6753             UNKNOWN,                  // 1F5C
6754             GREEK,                    // 1F5D
6755             UNKNOWN,                  // 1F5E
6756             GREEK,                    // 1F5F..1F7D
6757             UNKNOWN,                  // 1F7E..1F7F
6758             GREEK,                    // 1F80..1FB4
6759             UNKNOWN,                  // 1FB5
6760             GREEK,                    // 1FB6..1FC4
6761             UNKNOWN,                  // 1FC5
6762             GREEK,                    // 1FC6..1FD3
6763             UNKNOWN,                  // 1FD4..1FD5
6764             GREEK,                    // 1FD6..1FDB
6765             UNKNOWN,                  // 1FDC
6766             GREEK,                    // 1FDD..1FEF
6767             UNKNOWN,                  // 1FF0..1FF1
6768             GREEK,                    // 1FF2..1FF4
6769             UNKNOWN,                  // 1FF5
6770             GREEK,                    // 1FF6..1FFE
6771             UNKNOWN,                  // 1FFF
6772             COMMON,                   // 2000..200B
6773             INHERITED,                // 200C..200D
6774             COMMON,                   // 200E..2064
6775             UNKNOWN,                  // 2065
6776             COMMON,                   // 2066..2070
6777             LATIN,                    // 2071
6778             UNKNOWN,                  // 2072..2073
6779             COMMON,                   // 2074..207E
6780             LATIN,                    // 207F
6781             COMMON,                   // 2080..208E
6782             UNKNOWN,                  // 208F
6783             LATIN,                    // 2090..209C
6784             UNKNOWN,                  // 209D..209F
6785             COMMON,                   // 20A0..20BF
6786             UNKNOWN,                  // 20C0..20CF
6787             INHERITED,                // 20D0..20F0
6788             UNKNOWN,                  // 20F1..20FF
6789             COMMON,                   // 2100..2125
6790             GREEK,                    // 2126
6791             COMMON,                   // 2127..2129
6792             LATIN,                    // 212A..212B
6793             COMMON,                   // 212C..2131
6794             LATIN,                    // 2132
6795             COMMON,                   // 2133..214D
6796             LATIN,                    // 214E
6797             COMMON,                   // 214F..215F
6798             LATIN,                    // 2160..2188
6799             COMMON,                   // 2189..218B
6800             UNKNOWN,                  // 218C..218F
6801             COMMON,                   // 2190..2426
6802             UNKNOWN,                  // 2427..243F
6803             COMMON,                   // 2440..244A
6804             UNKNOWN,                  // 244B..245F
6805             COMMON,                   // 2460..27FF
6806             BRAILLE,                  // 2800..28FF
6807             COMMON,                   // 2900..2B73
6808             UNKNOWN,                  // 2B74..2B75
6809             COMMON,                   // 2B76..2B95
6810             UNKNOWN,                  // 2B96..2B97
6811             COMMON,                   // 2B98..2BB9
6812             UNKNOWN,                  // 2BBA..2BBC
6813             COMMON,                   // 2BBD..2BC8
6814             UNKNOWN,                  // 2BC9
6815             COMMON,                   // 2BCA..2BD2
6816             UNKNOWN,                  // 2BD3..2BEB
6817             COMMON,                   // 2BEC..2BEF
6818             UNKNOWN,                  // 2BF0..2BFF
6819             GLAGOLITIC,               // 2C00..2C2E
6820             UNKNOWN,                  // 2C2F
6821             GLAGOLITIC,               // 2C30..2C5E
6822             UNKNOWN,                  // 2C5F
6823             LATIN,                    // 2C60..2C7F
6824             COPTIC,                   // 2C80..2CF3
6825             UNKNOWN,                  // 2CF4..2CF8
6826             COPTIC,                   // 2CF9..2CFF
6827             GEORGIAN,                 // 2D00..2D25
6828             UNKNOWN,                  // 2D26
6829             GEORGIAN,                 // 2D27
6830             UNKNOWN,                  // 2D28..2D2C
6831             GEORGIAN,                 // 2D2D
6832             UNKNOWN,                  // 2D2E..2D2F
6833             TIFINAGH,                 // 2D30..2D67
6834             UNKNOWN,                  // 2D68..2D6E
6835             TIFINAGH,                 // 2D6F..2D70
6836             UNKNOWN,                  // 2D71..2D7E
6837             TIFINAGH,                 // 2D7F
6838             ETHIOPIC,                 // 2D80..2D96
6839             UNKNOWN,                  // 2D97..2D9F
6840             ETHIOPIC,                 // 2DA0..2DA6
6841             UNKNOWN,                  // 2DA7
6842             ETHIOPIC,                 // 2DA8..2DAE
6843             UNKNOWN,                  // 2DAF
6844             ETHIOPIC,                 // 2DB0..2DB6
6845             UNKNOWN,                  // 2DB7
6846             ETHIOPIC,                 // 2DB8..2DBE
6847             UNKNOWN,                  // 2DBF
6848             ETHIOPIC,                 // 2DC0..2DC6
6849             UNKNOWN,                  // 2DC7
6850             ETHIOPIC,                 // 2DC8..2DCE
6851             UNKNOWN,                  // 2DCF
6852             ETHIOPIC,                 // 2DD0..2DD6
6853             UNKNOWN,                  // 2DD7
6854             ETHIOPIC,                 // 2DD8..2DDE
6855             UNKNOWN,                  // 2DDF
6856             CYRILLIC,                 // 2DE0..2DFF
6857             COMMON,                   // 2E00..2E49
6858             UNKNOWN,                  // 2E50..2E7F
6859             HAN,                      // 2E80..2E99
6860             UNKNOWN,                  // 2E9A
6861             HAN,                      // 2E9B..2EF3
6862             UNKNOWN,                  // 2EF4..2EFF
6863             HAN,                      // 2F00..2FD5
6864             UNKNOWN,                  // 2FD6..2FEF
6865             COMMON,                   // 2FF0..2FFB
6866             UNKNOWN,                  // 2FFC..2FFF
6867             COMMON,                   // 3000..3004
6868             HAN,                      // 3005
6869             COMMON,                   // 3006
6870             HAN,                      // 3007
6871             COMMON,                   // 3008..3020
6872             HAN,                      // 3021..3029
6873             INHERITED,                // 302A..302D
6874             HANGUL,                   // 302E..302F
6875             COMMON,                   // 3030..3037
6876             HAN,                      // 3038..303B
6877             COMMON,                   // 303C..303F
6878             UNKNOWN,                  // 3040
6879             HIRAGANA,                 // 3041..3096
6880             UNKNOWN,                  // 3097..3098
6881             INHERITED,                // 3099..309A
6882             COMMON,                   // 309B..309C
6883             HIRAGANA,                 // 309D..309F
6884             COMMON,                   // 30A0
6885             KATAKANA,                 // 30A1..30FA
6886             COMMON,                   // 30FB..30FC
6887             KATAKANA,                 // 30FD..30FF
6888             UNKNOWN,                  // 3100..3104
6889             BOPOMOFO,                 // 3105..312E
6890             UNKNOWN,                  // 312F..3130
6891             HANGUL,                   // 3131..318E
6892             UNKNOWN,                  // 318F
6893             COMMON,                   // 3190..319F
6894             BOPOMOFO,                 // 31A0..31BA
6895             UNKNOWN,                  // 31BB..31BF
6896             COMMON,                   // 31C0..31E3
6897             UNKNOWN,                  // 31E4..31EF
6898             KATAKANA,                 // 31F0..31FF
6899             HANGUL,                   // 3200..321E
6900             UNKNOWN,                  // 321F
6901             COMMON,                   // 3220..325F
6902             HANGUL,                   // 3260..327E
6903             COMMON,                   // 327F..32CF
6904             KATAKANA,                 // 32D0..32FE
6905             UNKNOWN,                  // 32FF
6906             KATAKANA,                 // 3300..3357
6907             COMMON,                   // 3358..33FF
6908             HAN,                      // 3400..4DB5
6909             UNKNOWN,                  // 4DB6..4DBF
6910             COMMON,                   // 4DC0..4DFF
6911             HAN,                      // 4E00..9FEA
6912             UNKNOWN,                  // 9FEB..9FFF
6913             YI,                       // A000..A48C
6914             UNKNOWN,                  // A48D..A48F
6915             YI,                       // A490..A4C6
6916             UNKNOWN,                  // A4C7..A4CF
6917             LISU,                     // A4D0..A4FF
6918             VAI,                      // A500..A62B
6919             UNKNOWN,                  // A62C..A63F
6920             CYRILLIC,                 // A640..A69F
6921             BAMUM,                    // A6A0..A6F7
6922             UNKNOWN,                  // A6F8..A6FF
6923             COMMON,                   // A700..A721
6924             LATIN,                    // A722..A787
6925             COMMON,                   // A788..A78A
6926             LATIN,                    // A78B..A7AE
6927             UNKNOWN,                  // A7AF
6928             LATIN,                    // A7B0..A7B7
6929             UNKNOWN,                  // A7B8..A7F6
6930             LATIN,                    // A7F7..A7FF
6931             SYLOTI_NAGRI,             // A800..A82B
6932             UNKNOWN,                  // A82C..A82F
6933             COMMON,                   // A830..A839
6934             UNKNOWN,                  // A83A..A83F
6935             PHAGS_PA,                 // A840..A877
6936             UNKNOWN,                  // A878..A87F
6937             SAURASHTRA,               // A880..A8C5
6938             UNKNOWN,                  // A8C6..A8CD
6939             SAURASHTRA,               // A8CE..A8D9
6940             UNKNOWN,                  // A8DA..A8DF
6941             DEVANAGARI,               // A8E0..A8FD
6942             UNKNOWN,                  // A8FE..A8FF
6943             KAYAH_LI,                 // A900..A92D
6944             COMMON,                   // A92E
6945             KAYAH_LI,                 // A92F
6946             REJANG,                   // A930..A953
6947             UNKNOWN,                  // A954..A95E
6948             REJANG,                   // A95F
6949             HANGUL,                   // A960..A97C
6950             UNKNOWN,                  // A97D..A97F
6951             JAVANESE,                 // A980..A9CD
6952             UNKNOWN,                  // A9CE
6953             COMMON,                   // A9CF
6954             JAVANESE,                 // A9D0..A9D9
6955             UNKNOWN,                  // A9DA..A9DD
6956             JAVANESE,                 // A9DE..A9DF
6957             MYANMAR,                  // A9E0..A9FE
6958             UNKNOWN,                  // A9FF
6959             CHAM,                     // AA00..AA36
6960             UNKNOWN,                  // AA37..AA3F
6961             CHAM,                     // AA40..AA4D
6962             UNKNOWN,                  // AA4E..AA4F
6963             CHAM,                     // AA50..AA59
6964             UNKNOWN,                  // AA5A..AA5B
6965             CHAM,                     // AA5C..AA5F
6966             MYANMAR,                  // AA60..AA7F
6967             TAI_VIET,                 // AA80..AAC2
6968             UNKNOWN,                  // AAC3..AADA
6969             TAI_VIET,                 // AADB..AADF
6970             MEETEI_MAYEK,             // AAE0..AAF6
6971             UNKNOWN,                  // AAF7..AB00
6972             ETHIOPIC,                 // AB01..AB06
6973             UNKNOWN,                  // AB07..AB08
6974             ETHIOPIC,                 // AB09..AB0E
6975             UNKNOWN,                  // AB0F..AB10
6976             ETHIOPIC,                 // AB11..AB16
6977             UNKNOWN,                  // AB17..AB1F
6978             ETHIOPIC,                 // AB20..AB26
6979             UNKNOWN,                  // AB27
6980             ETHIOPIC,                 // AB28..AB2E
6981             UNKNOWN,                  // AB2F
6982             LATIN,                    // AB30..AB5A
6983             COMMON,                   // AB5B
6984             LATIN,                    // AB5C..AB64
6985             GREEK,                    // AB65
6986             UNKNOWN,                  // AB66..AB6F
6987             CHEROKEE,                 // AB70..ABBF
6988             MEETEI_MAYEK,             // ABC0..ABED
6989             UNKNOWN,                  // ABEE..ABEF
6990             MEETEI_MAYEK,             // ABF0..ABF9
6991             UNKNOWN,                  // ABFA..ABFF
6992             HANGUL,                   // AC00..D7A3
6993             UNKNOWN,                  // D7A4..D7AF
6994             HANGUL,                   // D7B0..D7C6
6995             UNKNOWN,                  // D7C7..D7CA
6996             HANGUL,                   // D7CB..D7FB
6997             UNKNOWN,                  // D7FC..F8FF
6998             HAN,                      // F900..FA6D
6999             UNKNOWN,                  // FA6E..FA6F
7000             HAN,                      // FA70..FAD9
7001             UNKNOWN,                  // FADA..FAFF
7002             LATIN,                    // FB00..FB06
7003             UNKNOWN,                  // FB07..FB12
7004             ARMENIAN,                 // FB13..FB17
7005             UNKNOWN,                  // FB18..FB1C
7006             HEBREW,                   // FB1D..FB36
7007             UNKNOWN,                  // FB37
7008             HEBREW,                   // FB38..FB3C
7009             UNKNOWN,                  // FB3D
7010             HEBREW,                   // FB3E
7011             UNKNOWN,                  // FB3F
7012             HEBREW,                   // FB40..FB41
7013             UNKNOWN,                  // FB42
7014             HEBREW,                   // FB43..FB44
7015             UNKNOWN,                  // FB45
7016             HEBREW,                   // FB46..FB4F
7017             ARABIC,                   // FB50..FBC1
7018             UNKNOWN,                  // FBC2..FBD2
7019             ARABIC,                   // FBD3..FD3D
7020             COMMON,                   // FD3E..FD3F
7021             UNKNOWN,                  // FD40..FD4F
7022             ARABIC,                   // FD50..FD8F
7023             UNKNOWN,                  // FD90..FD91
7024             ARABIC,                   // FD92..FDC7
7025             UNKNOWN,                  // FDC8..FDEF
7026             ARABIC,                   // FDF0..FDFD
7027             UNKNOWN,                  // FDFE..FDFF
7028             INHERITED,                // FE00..FE0F
7029             COMMON,                   // FE10..FE19
7030             UNKNOWN,                  // FE1A..FE1F
7031             INHERITED,                // FE20..FE2D
7032             CYRILLIC,                 // FE2E..FE2F
7033             COMMON,                   // FE30..FE52
7034             UNKNOWN,                  // FE53
7035             COMMON,                   // FE54..FE66
7036             UNKNOWN,                  // FE67
7037             COMMON,                   // FE68..FE6B
7038             UNKNOWN,                  // FE6C..FE6F
7039             ARABIC,                   // FE70..FE74
7040             UNKNOWN,                  // FE75
7041             ARABIC,                   // FE76..FEFC
7042             UNKNOWN,                  // FEFD..FEFE
7043             COMMON,                   // FEFF
7044             UNKNOWN,                  // FF00
7045             COMMON,                   // FF01..FF20
7046             LATIN,                    // FF21..FF3A
7047             COMMON,                   // FF3B..FF40
7048             LATIN,                    // FF41..FF5A
7049             COMMON,                   // FF5B..FF65
7050             KATAKANA,                 // FF66..FF6F
7051             COMMON,                   // FF70
7052             KATAKANA,                 // FF71..FF9D
7053             COMMON,                   // FF9E..FF9F
7054             HANGUL,                   // FFA0..FFBE
7055             UNKNOWN,                  // FFBF..FFC1
7056             HANGUL,                   // FFC2..FFC7
7057             UNKNOWN,                  // FFC8..FFC9
7058             HANGUL,                   // FFCA..FFCF
7059             UNKNOWN,                  // FFD0..FFD1
7060             HANGUL,                   // FFD2..FFD7
7061             UNKNOWN,                  // FFD8..FFD9
7062             HANGUL,                   // FFDA..FFDC
7063             UNKNOWN,                  // FFDD..FFDF
7064             COMMON,                   // FFE0..FFE6
7065             UNKNOWN,                  // FFE7
7066             COMMON,                   // FFE8..FFEE
7067             UNKNOWN,                  // FFEF..FFF8
7068             COMMON,                   // FFF9..FFFD
7069             UNKNOWN,                  // FFFE..FFFF
7070             LINEAR_B,                 // 10000..1000B
7071             UNKNOWN,                  // 1000C
7072             LINEAR_B,                 // 1000D..10026
7073             UNKNOWN,                  // 10027
7074             LINEAR_B,                 // 10028..1003A
7075             UNKNOWN,                  // 1003B
7076             LINEAR_B,                 // 1003C..1003D
7077             UNKNOWN,                  // 1003E
7078             LINEAR_B,                 // 1003F..1004D
7079             UNKNOWN,                  // 1004E..1004F
7080             LINEAR_B,                 // 10050..1005D
7081             UNKNOWN,                  // 1005E..1007F
7082             LINEAR_B,                 // 10080..100FA
7083             UNKNOWN,                  // 100FB..100FF
7084             COMMON,                   // 10100..10102
7085             UNKNOWN,                  // 10103..10106
7086             COMMON,                   // 10107..10133
7087             UNKNOWN,                  // 10134..10136
7088             COMMON,                   // 10137..1013F
7089             GREEK,                    // 10140..1018E
7090             UNKNOWN,                  // 1018F
7091             COMMON,                   // 10190..1019B
7092             UNKNOWN,                  // 1019C..1019F
7093             GREEK,                    // 101A0
7094             UNKNOWN,                  // 101A1..101CF
7095             COMMON,                   // 101D0..101FC
7096             INHERITED,                // 101FD
7097             UNKNOWN,                  // 101FE..1027F
7098             LYCIAN,                   // 10280..1029C
7099             UNKNOWN,                  // 1029D..1029F
7100             CARIAN,                   // 102A0..102D0
7101             UNKNOWN,                  // 102D1..102DF
7102             INHERITED,                // 102E0
7103             COMMON,                   // 102E1..102FB
7104             UNKNOWN,                  // 102FC..102FF
7105             OLD_ITALIC,               // 10300..10323
7106             UNKNOWN,                  // 10324..1032C
7107             OLD_ITALIC,               // 1032D..1032F
7108             GOTHIC,                   // 10330..1034A
7109             UNKNOWN,                  // 1034B..1034F
7110             OLD_PERMIC,               // 10350..1037A
7111             UNKNOWN,                  // 1037B..1037F
7112             UGARITIC,                 // 10380..1039D
7113             UNKNOWN,                  // 1039E
7114             UGARITIC,                 // 1039F
7115             OLD_PERSIAN,              // 103A0..103C3
7116             UNKNOWN,                  // 103C4..103C7
7117             OLD_PERSIAN,              // 103C8..103D5
7118             UNKNOWN,                  // 103D6..103FF
7119             DESERET,                  // 10400..1044F
7120             SHAVIAN,                  // 10450..1047F
7121             OSMANYA,                  // 10480..1049D
7122             UNKNOWN,                  // 1049E..1049F
7123             OSMANYA,                  // 104A0..104A9
7124             UNKNOWN,                  // 104AA..104AF
7125             OSAGE,                    // 104B0..104D3;
7126             UNKNOWN,                  // 104D4..104D7;
7127             OSAGE,                    // 104D8..104FB;
7128             UNKNOWN,                  // 104FC..104FF;
7129             ELBASAN,                  // 10500..10527
7130             UNKNOWN,                  // 10528..1052F
7131             CAUCASIAN_ALBANIAN,       // 10530..10563
7132             UNKNOWN,                  // 10564..1056E
7133             CAUCASIAN_ALBANIAN,       // 1056F
7134             UNKNOWN,                  // 10570..105FF
7135             LINEAR_A,                 // 10600..10736
7136             UNKNOWN,                  // 10737..1073F
7137             LINEAR_A,                 // 10740..10755
7138             UNKNOWN,                  // 10756..1075F
7139             LINEAR_A,                 // 10760..10767
7140             UNKNOWN,                  // 10768..107FF
7141             CYPRIOT,                  // 10800..10805
7142             UNKNOWN,                  // 10806..10807
7143             CYPRIOT,                  // 10808
7144             UNKNOWN,                  // 10809
7145             CYPRIOT,                  // 1080A..10835
7146             UNKNOWN,                  // 10836
7147             CYPRIOT,                  // 10837..10838
7148             UNKNOWN,                  // 10839..1083B
7149             CYPRIOT,                  // 1083C
7150             UNKNOWN,                  // 1083D..1083E
7151             CYPRIOT,                  // 1083F
7152             IMPERIAL_ARAMAIC,         // 10840..10855
7153             UNKNOWN,                  // 10856
7154             IMPERIAL_ARAMAIC,         // 10857..1085F
7155             PALMYRENE,                // 10860..1087F
7156             NABATAEAN,                // 10880..1089E
7157             UNKNOWN,                  // 1089F..108A6
7158             NABATAEAN,                // 108A7..108AF
7159             UNKNOWN,                  // 108B0..108DF
7160             HATRAN,                   // 108E0..108F2
7161             UNKNOWN,                  // 108F3
7162             HATRAN,                   // 108F4..108F5
7163             UNKNOWN,                  // 108F6..108FA
7164             HATRAN,                   // 108FB..108FF
7165             PHOENICIAN,               // 10900..1091B
7166             UNKNOWN,                  // 1091C..1091E
7167             PHOENICIAN,               // 1091F
7168             LYDIAN,                   // 10920..10939
7169             UNKNOWN,                  // 1093A..1093E
7170             LYDIAN,                   // 1093F
7171             UNKNOWN,                  // 10940..1097F
7172             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7173             MEROITIC_CURSIVE,         // 109A0..109B7
7174             UNKNOWN,                  // 109B8..109BB
7175             MEROITIC_CURSIVE,         // 109BC..109CF
7176             UNKNOWN,                  // 109D0..109D1
7177             MEROITIC_CURSIVE,         // 109D2..109FF
7178             KHAROSHTHI,               // 10A00..10A03
7179             UNKNOWN,                  // 10A04
7180             KHAROSHTHI,               // 10A05..10A06
7181             UNKNOWN,                  // 10A07..10A0B
7182             KHAROSHTHI,               // 10A0C..10A13
7183             UNKNOWN,                  // 10A14
7184             KHAROSHTHI,               // 10A15..10A17
7185             UNKNOWN,                  // 10A18
7186             KHAROSHTHI,               // 10A19..10A33
7187             UNKNOWN,                  // 10A34..10A37
7188             KHAROSHTHI,               // 10A38..10A3A
7189             UNKNOWN,                  // 10A3B..10A3E
7190             KHAROSHTHI,               // 10A3F..10A47
7191             UNKNOWN,                  // 10A48..10A4F
7192             KHAROSHTHI,               // 10A50..10A58
7193             UNKNOWN,                  // 10A59..10A5F
7194             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7195             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7196             UNKNOWN,                  // 10AA0..10ABF
7197             MANICHAEAN,               // 10AC0..10AE6
7198             UNKNOWN,                  // 10AE7..10AEA
7199             MANICHAEAN,               // 10AEB..10AF6
7200             UNKNOWN,                  // 10AF7..10AFF
7201             AVESTAN,                  // 10B00..10B35
7202             UNKNOWN,                  // 10B36..10B38
7203             AVESTAN,                  // 10B39..10B3F
7204             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7205             UNKNOWN,                  // 10B56..10B57
7206             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7207             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7208             UNKNOWN,                  // 10B73..10B77
7209             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7210             PSALTER_PAHLAVI,          // 10B80..10B91
7211             UNKNOWN,                  // 10B92..10B98
7212             PSALTER_PAHLAVI,          // 10B99..10B9C
7213             UNKNOWN,                  // 10B9D..10BA8
7214             PSALTER_PAHLAVI,          // 10BA9..10BAF
7215             UNKNOWN,                  // 10BB0..10BFF
7216             OLD_TURKIC,               // 10C00..10C48
7217             UNKNOWN,                  // 10C49..10C7F
7218             OLD_HUNGARIAN,            // 10C80..10CB2
7219             UNKNOWN,                  // 10CB3..10CBF
7220             OLD_HUNGARIAN,            // 10CC0..10CF2
7221             UNKNOWN,                  // 10CF3..10CF9
7222             OLD_HUNGARIAN,            // 10CFA..10CFF
7223             UNKNOWN,                  // 10D00..10E5F
7224             ARABIC,                   // 10E60..10E7E
7225             UNKNOWN,                  // 10E7F..10FFF
7226             BRAHMI,                   // 11000..1104D
7227             UNKNOWN,                  // 1104E..11051
7228             BRAHMI,                   // 11052..1106F
7229             UNKNOWN,                  // 11070..1107E
7230             BRAHMI,                   // 1107F
7231             KAITHI,                   // 11080..110C1
7232             UNKNOWN,                  // 110C2..110CF
7233             SORA_SOMPENG,             // 110D0..110E8
7234             UNKNOWN,                  // 110E9..110EF
7235             SORA_SOMPENG,             // 110F0..110F9
7236             UNKNOWN,                  // 110FA..110FF
7237             CHAKMA,                   // 11100..11134
7238             UNKNOWN,                  // 11135
7239             CHAKMA,                   // 11136..11143
7240             UNKNOWN,                  // 11144..1114F
7241             MAHAJANI,                 // 11150..11176
7242             UNKNOWN,                  // 11177..1117F
7243             SHARADA,                  // 11180..111CD
7244             UNKNOWN,                  // 111CE..111CF
7245             SHARADA,                  // 111D0..111DF
7246             UNKNOWN,                  // 111E0
7247             SINHALA,                  // 111E1..111F4
7248             UNKNOWN,                  // 111F5..111FF
7249             KHOJKI,                   // 11200..11211
7250             UNKNOWN,                  // 11212
7251             KHOJKI,                   // 11213..1123E
7252             UNKNOWN,                  // 1123F..1127F
7253             MULTANI,                  // 11280..11286
7254             UNKNOWN,                  // 11287
7255             MULTANI,                  // 11288
7256             UNKNOWN,                  // 11289
7257             MULTANI,                  // 1128A..1128D
7258             UNKNOWN,                  // 1128E
7259             MULTANI,                  // 1128F..1129D
7260             UNKNOWN,                  // 1129E
7261             MULTANI,                  // 1129F..112A9
7262             UNKNOWN,                  // 112AA..112AF
7263             KHUDAWADI,                // 112B0..112EA
7264             UNKNOWN,                  // 112EB..112EF
7265             KHUDAWADI,                // 112F0..112F9
7266             UNKNOWN,                  // 112FA..112FF
7267             GRANTHA,                  // 11300..11303
7268             UNKNOWN,                  // 11304
7269             GRANTHA,                  // 11305..1130C
7270             UNKNOWN,                  // 1130D..1130E
7271             GRANTHA,                  // 1130F..11310
7272             UNKNOWN,                  // 11311..11312
7273             GRANTHA,                  // 11313..11328
7274             UNKNOWN,                  // 11329
7275             GRANTHA,                  // 1132A..11330
7276             UNKNOWN,                  // 11331
7277             GRANTHA,                  // 11332..11333
7278             UNKNOWN,                  // 11334
7279             GRANTHA,                  // 11335..11339
7280             UNKNOWN,                  // 1133A..1133B
7281             GRANTHA,                  // 1133C..11344
7282             UNKNOWN,                  // 11345..11346
7283             GRANTHA,                  // 11347..11348
7284             UNKNOWN,                  // 11349..1134A
7285             GRANTHA,                  // 1134B..1134D
7286             UNKNOWN,                  // 1134E..1134F
7287             GRANTHA,                  // 11350
7288             UNKNOWN,                  // 11351..11356
7289             GRANTHA,                  // 11357
7290             UNKNOWN,                  // 11358..1135C
7291             GRANTHA,                  // 1135D..11363
7292             UNKNOWN,                  // 11364..11365
7293             GRANTHA,                  // 11366..1136C
7294             UNKNOWN,                  // 1136D..1136F
7295             GRANTHA,                  // 11370..11374
7296             UNKNOWN,                  // 11375..113FF
7297             NEWA,                     // 11400..11459
7298             UNKNOWN,                  // 1145A
7299             NEWA,                     // 1145B
7300             UNKNOWN,                  // 1145C
7301             NEWA,                     // 1145D
7302             UNKNOWN,                  // 1145E..1147F
7303             TIRHUTA,                  // 11480..114C7
7304             UNKNOWN,                  // 114C8..114CF
7305             TIRHUTA,                  // 114D0..114D9
7306             UNKNOWN,                  // 114DA..1157F
7307             SIDDHAM,                  // 11580..115B5
7308             UNKNOWN,                  // 115B6..115B7
7309             SIDDHAM,                  // 115B8..115DD
7310             UNKNOWN,                  // 115DE..115FF
7311             MODI,                     // 11600..11644
7312             UNKNOWN,                  // 11645..1164F
7313             MODI,                     // 11650..11659
7314             UNKNOWN,                  // 1165A..1165F
7315             MONGOLIAN,                // 11660..1166C
7316             UNKNOWN,                  // 1166D..1167F
7317             TAKRI,                    // 11680..116B7
7318             UNKNOWN,                  // 116B8..116BF
7319             TAKRI,                    // 116C0..116C9
7320             UNKNOWN,                  // 116CA..116FF
7321             AHOM,                     // 11700..11719
7322             UNKNOWN,                  // 1171A..1171C
7323             AHOM,                     // 1171D..1172B
7324             UNKNOWN,                  // 1172C..1172F
7325             AHOM,                     // 11730..1173F
7326             UNKNOWN,                  // 11740..1189F
7327             WARANG_CITI,              // 118A0..118F2
7328             UNKNOWN,                  // 118F3..118FE
7329             WARANG_CITI,              // 118FF
7330             UNKNOWN,                  // 11900..119FF
7331             ZANABAZAR_SQUARE,         // 11A00..11A47
7332             UNKNOWN,                  // 11A48..11A4F
7333             SOYOMBO,                  // 11A50..11A83
7334             UNKNOWN,                  // 11A84..11A85
7335             SOYOMBO,                  // 11A86..11A9C
7336             UNKNOWN,                  // 11A9D
7337             SOYOMBO,                  // 11A9E..11AA2
7338             UNKNOWN,                  // 11AA3..11ABF
7339             PAU_CIN_HAU,              // 11AC0..11AF8
7340             UNKNOWN,                  // 11AF9..11BFF
7341             BHAIKSUKI,                // 11C00..11C08
7342             UNKNOWN,                  // 11C09
7343             BHAIKSUKI,                // 11C0A..11C36
7344             UNKNOWN,                  // 11C37
7345             BHAIKSUKI,                // 11C38..11C45
7346             UNKNOWN,                  // 11C46..11C49
7347             BHAIKSUKI,                // 11C50..11C6C
7348             UNKNOWN,                  // 11C6D..11C6F
7349             MARCHEN,                  // 11C70..11C8F
7350             UNKNOWN,                  // 11C90..11C91
7351             MARCHEN,                  // 11C92..11CA7
7352             UNKNOWN,                  // 11CA8
7353             MARCHEN,                  // 11CA9..11CB6
7354             UNKNOWN,                  // 11CB7..11CFF
7355             MASARAM_GONDI,            // 11D00..11D06
7356             UNKNOWN,                  // 11D07
7357             MASARAM_GONDI,            // 11D08..11D09
7358             UNKNOWN,                  // 11D0A
7359             MASARAM_GONDI,            // 11D0B..11D36
7360             UNKNOWN,                  // 11D37..11D39
7361             MASARAM_GONDI,            // 11D3A
7362             UNKNOWN,                  // 11D3B
7363             MASARAM_GONDI,            // 11D3C..11D3D
7364             UNKNOWN,                  // 11D3E
7365             MASARAM_GONDI,            // 11D3F..11D47
7366             UNKNOWN,                  // 11D48..11D49
7367             MASARAM_GONDI,            // 11D50..11D59
7368             UNKNOWN,                  // 11D5A..1AFFF;
7369             CUNEIFORM,                // 12000..12399
7370             UNKNOWN,                  // 1239A..123FF
7371             CUNEIFORM,                // 12400..1246E
7372             UNKNOWN,                  // 1246F
7373             CUNEIFORM,                // 12470..12474
7374             UNKNOWN,                  // 12475..1247F
7375             CUNEIFORM,                // 12480..12543
7376             UNKNOWN,                  // 12544..12FFF
7377             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
7378             UNKNOWN,                  // 1342F..143FF
7379             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
7380             UNKNOWN,                  // 14647..167FF
7381             BAMUM,                    // 16800..16A38
7382             UNKNOWN,                  // 16A39..16A3F
7383             MRO,                      // 16A40..16A5E
7384             UNKNOWN,                  // 16A5F
7385             MRO,                      // 16A60..16A69
7386             UNKNOWN,                  // 16A6A..16A6D
7387             MRO,                      // 16A6E..16A6F
7388             UNKNOWN,                  // 16A70..16ACF
7389             BASSA_VAH,                // 16AD0..16AED
7390             UNKNOWN,                  // 16AEE..16AEF
7391             BASSA_VAH,                // 16AF0..16AF5
7392             UNKNOWN,                  // 16AF6..16AFF
7393             PAHAWH_HMONG,             // 16B00..16B45
7394             UNKNOWN,                  // 16B46..16B4F
7395             PAHAWH_HMONG,             // 16B50..16B59
7396             UNKNOWN,                  // 16B5A
7397             PAHAWH_HMONG,             // 16B5B..16B61
7398             UNKNOWN,                  // 16B62
7399             PAHAWH_HMONG,             // 16B63..16B77
7400             UNKNOWN,                  // 16B78..16B7C
7401             PAHAWH_HMONG,             // 16B7D..16B8F
7402             UNKNOWN,                  // 16B90..16EFF
7403             MIAO,                     // 16F00..16F44
7404             UNKNOWN,                  // 16F45..16F4F
7405             MIAO,                     // 16F50..16F7E
7406             UNKNOWN,                  // 16F7F..16F8E
7407             MIAO,                     // 16F8F..16F9F
7408             UNKNOWN,                  // 16FA0..16FDF
7409             TANGUT,                   // 16FE0
7410             NUSHU,                    // 16FE1
7411             UNKNOWN,                  // 16FE2..16FFF
7412             TANGUT,                   // 17000..187EC
7413             UNKNOWN,                  // 187ED..187FF
7414             TANGUT,                   // 18800..18AF2
7415             UNKNOWN,                  // 18AF3..1AFFF
7416             KATAKANA,                 // 1B000
7417             HIRAGANA,                 // 1B001..1B11E
7418             UNKNOWN,                  // 1B11F..1B16F
7419             NUSHU,                    // 1B170..1B2FB
7420             UNKNOWN,                  // 1B2FC..1BBFF
7421             DUPLOYAN,                 // 1BC00..1BC6A
7422             UNKNOWN,                  // 1BC6B..1BC6F
7423             DUPLOYAN,                 // 1BC70..1BC7C
7424             UNKNOWN,                  // 1BC7D..1BC7F
7425             DUPLOYAN,                 // 1BC80..1BC88
7426             UNKNOWN,                  // 1BC89..1BC8F
7427             DUPLOYAN,                 // 1BC90..1BC99
7428             UNKNOWN,                  // 1BC9A..1BC9B
7429             DUPLOYAN,                 // 1BC9C..1BC9F
7430             COMMON,                   // 1BCA0..1BCA3
7431             UNKNOWN,                  // 1BCA4..1CFFF
7432             COMMON,                   // 1D000..1D0F5
7433             UNKNOWN,                  // 1D0F6..1D0FF
7434             COMMON,                   // 1D100..1D126
7435             UNKNOWN,                  // 1D127..1D128
7436             COMMON,                   // 1D129..1D166
7437             INHERITED,                // 1D167..1D169
7438             COMMON,                   // 1D16A..1D17A
7439             INHERITED,                // 1D17B..1D182
7440             COMMON,                   // 1D183..1D184
7441             INHERITED,                // 1D185..1D18B
7442             COMMON,                   // 1D18C..1D1A9
7443             INHERITED,                // 1D1AA..1D1AD
7444             COMMON,                   // 1D1AE..1D1E8
7445             UNKNOWN,                  // 1D1E9..1D1FF
7446             GREEK,                    // 1D200..1D245
7447             UNKNOWN,                  // 1D246..1D2FF
7448             COMMON,                   // 1D300..1D356
7449             UNKNOWN,                  // 1D357..1D35F
7450             COMMON,                   // 1D360..1D371
7451             UNKNOWN,                  // 1D372..1D3FF
7452             COMMON,                   // 1D400..1D454
7453             UNKNOWN,                  // 1D455
7454             COMMON,                   // 1D456..1D49C
7455             UNKNOWN,                  // 1D49D
7456             COMMON,                   // 1D49E..1D49F
7457             UNKNOWN,                  // 1D4A0..1D4A1
7458             COMMON,                   // 1D4A2
7459             UNKNOWN,                  // 1D4A3..1D4A4
7460             COMMON,                   // 1D4A5..1D4A6
7461             UNKNOWN,                  // 1D4A7..1D4A8
7462             COMMON,                   // 1D4A9..1D4AC
7463             UNKNOWN,                  // 1D4AD
7464             COMMON,                   // 1D4AE..1D4B9
7465             UNKNOWN,                  // 1D4BA
7466             COMMON,                   // 1D4BB
7467             UNKNOWN,                  // 1D4BC
7468             COMMON,                   // 1D4BD..1D4C3
7469             UNKNOWN,                  // 1D4C4
7470             COMMON,                   // 1D4C5..1D505
7471             UNKNOWN,                  // 1D506
7472             COMMON,                   // 1D507..1D50A
7473             UNKNOWN,                  // 1D50B..1D50C
7474             COMMON,                   // 1D50D..1D514
7475             UNKNOWN,                  // 1D515
7476             COMMON,                   // 1D516..1D51C
7477             UNKNOWN,                  // 1D51D
7478             COMMON,                   // 1D51E..1D539
7479             UNKNOWN,                  // 1D53A
7480             COMMON,                   // 1D53B..1D53E
7481             UNKNOWN,                  // 1D53F
7482             COMMON,                   // 1D540..1D544
7483             UNKNOWN,                  // 1D545
7484             COMMON,                   // 1D546
7485             UNKNOWN,                  // 1D547..1D549
7486             COMMON,                   // 1D54A..1D550
7487             UNKNOWN,                  // 1D551
7488             COMMON,                   // 1D552..1D6A5
7489             UNKNOWN,                  // 1D6A6..1D6A7
7490             COMMON,                   // 1D6A8..1D7CB
7491             UNKNOWN,                  // 1D7CC..1D7CD
7492             COMMON,                   // 1D7CE..1D7FF
7493             SIGNWRITING,              // 1D800..1DA8B
7494             UNKNOWN,                  // 1DA8C..1DA9A
7495             SIGNWRITING,              // 1DA9B..1DA9F
7496             UNKNOWN,                  // 1DAA0
7497             SIGNWRITING,              // 1DAA1..1DAAF
7498             UNKNOWN,                  // 1DAB0..1DFFF
7499             GLAGOLITIC,               // 1E000..1E006
7500             UNKNOWN,                  // 1E007
7501             GLAGOLITIC,               // 1E008..1E018
7502             UNKNOWN,                  // 1E019..1E01A
7503             GLAGOLITIC,               // 1E01B..1E021
7504             UNKNOWN,                  // 1E022
7505             GLAGOLITIC,               // 1E023..1E024
7506             UNKNOWN,                  // 1E025
7507             GLAGOLITIC,               // 1E026..1E02A
7508             UNKNOWN,                  // 1E02B..1E7FF
7509             MENDE_KIKAKUI,            // 1E800..1E8C4
7510             UNKNOWN,                  // 1E8C5..1E8C6
7511             MENDE_KIKAKUI,            // 1E8C7..1E8D6
7512             UNKNOWN,                  // 1E8D7..1E8FF
7513             ADLAM,                    // 1E900..1E94A
7514             UNKNOWN,                  // 1E94B..1E94F
7515             ADLAM,                    // 1E950..1E959
7516             UNKNOWN,                  // 1E95A..1E95D
7517             ADLAM,                    // 1E95E..1E95F
7518             UNKNOWN,                  // 1E960..1EDFF
7519             ARABIC,                   // 1EE00..1EE03
7520             UNKNOWN,                  // 1EE04
7521             ARABIC,                   // 1EE05..1EE1F
7522             UNKNOWN,                  // 1EE20
7523             ARABIC,                   // 1EE21..1EE22
7524             UNKNOWN,                  // 1EE23
7525             ARABIC,                   // 1EE24
7526             UNKNOWN,                  // 1EE25..1EE26
7527             ARABIC,                   // 1EE27
7528             UNKNOWN,                  // 1EE28
7529             ARABIC,                   // 1EE29..1EE32
7530             UNKNOWN,                  // 1EE33
7531             ARABIC,                   // 1EE34..1EE37
7532             UNKNOWN,                  // 1EE38
7533             ARABIC,                   // 1EE39
7534             UNKNOWN,                  // 1EE3A
7535             ARABIC,                   // 1EE3B
7536             UNKNOWN,                  // 1EE3C..1EE41
7537             ARABIC,                   // 1EE42
7538             UNKNOWN,                  // 1EE43..1EE46
7539             ARABIC,                   // 1EE47
7540             UNKNOWN,                  // 1EE48
7541             ARABIC,                   // 1EE49
7542             UNKNOWN,                  // 1EE4A
7543             ARABIC,                   // 1EE4B
7544             UNKNOWN,                  // 1EE4C
7545             ARABIC,                   // 1EE4D..1EE4F
7546             UNKNOWN,                  // 1EE50
7547             ARABIC,                   // 1EE51..1EE52
7548             UNKNOWN,                  // 1EE53
7549             ARABIC,                   // 1EE54
7550             UNKNOWN,                  // 1EE55..1EE56
7551             ARABIC,                   // 1EE57
7552             UNKNOWN,                  // 1EE58
7553             ARABIC,                   // 1EE59
7554             UNKNOWN,                  // 1EE5A
7555             ARABIC,                   // 1EE5B
7556             UNKNOWN,                  // 1EE5C
7557             ARABIC,                   // 1EE5D
7558             UNKNOWN,                  // 1EE5E
7559             ARABIC,                   // 1EE5F
7560             UNKNOWN,                  // 1EE60
7561             ARABIC,                   // 1EE61..1EE62
7562             UNKNOWN,                  // 1EE63
7563             ARABIC,                   // 1EE64
7564             UNKNOWN,                  // 1EE65..1EE66
7565             ARABIC,                   // 1EE67..1EE6A
7566             UNKNOWN,                  // 1EE6B
7567             ARABIC,                   // 1EE6C..1EE72
7568             UNKNOWN,                  // 1EE73
7569             ARABIC,                   // 1EE74..1EE77
7570             UNKNOWN,                  // 1EE78
7571             ARABIC,                   // 1EE79..1EE7C
7572             UNKNOWN,                  // 1EE7D
7573             ARABIC,                   // 1EE7E
7574             UNKNOWN,                  // 1EE7F
7575             ARABIC,                   // 1EE80..1EE89
7576             UNKNOWN,                  // 1EE8A
7577             ARABIC,                   // 1EE8B..1EE9B
7578             UNKNOWN,                  // 1EE9C..1EEA0
7579             ARABIC,                   // 1EEA1..1EEA3
7580             UNKNOWN,                  // 1EEA4
7581             ARABIC,                   // 1EEA5..1EEA9
7582             UNKNOWN,                  // 1EEAA
7583             ARABIC,                   // 1EEAB..1EEBB
7584             UNKNOWN,                  // 1EEBC..1EEEF
7585             ARABIC,                   // 1EEF0..1EEF1
7586             UNKNOWN,                  // 1EEF2..1EFFF
7587             COMMON,                   // 1F000..1F02B
7588             UNKNOWN,                  // 1F02C..1F02F
7589             COMMON,                   // 1F030..1F093
7590             UNKNOWN,                  // 1F094..1F09F
7591             COMMON,                   // 1F0A0..1F0AE
7592             UNKNOWN,                  // 1F0AF..1F0B0
7593             COMMON,                   // 1F0B1..1F0BF
7594             UNKNOWN,                  // 1F0C0
7595             COMMON,                   // 1F0C1..1F0CF
7596             UNKNOWN,                  // 1F0D0
7597             COMMON,                   // 1F0D1..1F0F5
7598             UNKNOWN,                  // 1F0F6..1F0FF
7599             COMMON,                   // 1F100..1F10C
7600             UNKNOWN,                  // 1F10D..1F10F
7601             COMMON,                   // 1F110..1F12E
7602             UNKNOWN,                  // 1F12F
7603             COMMON,                   // 1F130..1F16B
7604             UNKNOWN,                  // 1F16C..1F16F
7605             COMMON,                   // 1F170..1F1AC
7606             UNKNOWN,                  // 1F1AD..1F1E5
7607             COMMON,                   // 1F1E6..1F1FF
7608             HIRAGANA,                 // 1F200
7609             COMMON,                   // 1F201..1F202
7610             UNKNOWN,                  // 1F203..1F20F
7611             COMMON,                   // 1F210..1F23B
7612             UNKNOWN,                  // 1F23C..1F23F
7613             COMMON,                   // 1F240..1F248
7614             UNKNOWN,                  // 1F249..1F24F
7615             COMMON,                   // 1F250..1F251
7616             UNKNOWN,                  // 1F252..1F25F
7617             COMMON,                   // 1F260..1F265
7618             UNKNOWN,                  // 1F266..1F2FF
7619             COMMON,                   // 1F300..1F6D4
7620             UNKNOWN,                  // 1F6D5..1F6DF
7621             COMMON,                   // 1F6E0..1F6EC
7622             UNKNOWN,                  // 1F6ED..1F6EF
7623             COMMON,                   // 1F6F0..1F6F8
7624             UNKNOWN,                  // 1F6F9..1F6FF
7625             COMMON,                   // 1F700..1F773
7626             UNKNOWN,                  // 1F774..1F77F
7627             COMMON,                   // 1F780..1F7D4
7628             UNKNOWN,                  // 1F7D5..1F7FF
7629             COMMON,                   // 1F800..1F80B
7630             UNKNOWN,                  // 1F80C..1F80F
7631             COMMON,                   // 1F810..1F847
7632             UNKNOWN,                  // 1F848..1F84F
7633             COMMON,                   // 1F850..1F859
7634             UNKNOWN,                  // 1F85A..1F85F
7635             COMMON,                   // 1F860..1F887
7636             UNKNOWN,                  // 1F888..1F88F
7637             COMMON,                   // 1F890..1F8AD
7638             UNKNOWN,                  // 1F8AE..1F8FF
7639             COMMON,                   // 1F900..1F90B
7640             UNKNOWN,                  // 1F90C..1F90F
7641             COMMON,                   // 1F910..1F93E
7642             UNKNOWN,                  // 1F93F
7643             COMMON,                   // 1F940..1F94C
7644             UNKNOWN,                  // 1F94D..1F94F
7645             COMMON,                   // 1F950..1F96B
7646             UNKNOWN,                  // 1F96C..1F97F
7647             COMMON,                   // 1F980..1F997
7648             UNKNOWN,                  // 1F998..1F9BF
7649             COMMON,                   // 1F9C0
7650             UNKNOWN,                  // 1F9C1..1F9CF
7651             COMMON,                   // 1F9D0..1F9E6
7652             UNKNOWN,                  // 1F9E7..1FFFF
7653             HAN,                      // 20000..2A6D6
7654             UNKNOWN,                  // 2A6D7..2A6FF
7655             HAN,                      // 2A700..2B734
7656             UNKNOWN,                  // 2B735..2B73F
7657             HAN,                      // 2B740..2B81D
7658             UNKNOWN,                  // 2B81E..2B81F
7659             HAN,                      // 2B820..2CEA1
7660             UNKNOWN,                  // 2CEA2..2CEAF
7661             HAN,                      // 2CEB0..2EBE0
7662             UNKNOWN,                  // 2EBE1..2F7FF
7663             HAN,                      // 2F800..2FA1D
7664             UNKNOWN,                  // 2FA1E..E0000
7665             COMMON,                   // E0001
7666             UNKNOWN,                  // E0002..E001F
7667             COMMON,                   // E0020..E007F
7668             UNKNOWN,                  // E0080..E00FF
7669             INHERITED,                // E0100..E01EF
7670             UNKNOWN                   // E01F0..10FFFF
7671         };
7672 
7673         private static HashMap<String, Character.UnicodeScript> aliases;
7674         static {
7675             aliases = new HashMap<>((int)(142 / 0.75f + 1.0f));
7676             aliases.put("ADLM", ADLAM);
7677             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7678             aliases.put("AHOM", AHOM);
7679             aliases.put("ARAB", ARABIC);
7680             aliases.put("ARMI", IMPERIAL_ARAMAIC);
7681             aliases.put("ARMN", ARMENIAN);
7682             aliases.put("AVST", AVESTAN);
7683             aliases.put("BALI", BALINESE);
7684             aliases.put("BAMU", BAMUM);
7685             aliases.put("BASS", BASSA_VAH);
7686             aliases.put("BATK", BATAK);
7687             aliases.put("BENG", BENGALI);
7688             aliases.put("BHKS", BHAIKSUKI);
7689             aliases.put("BOPO", BOPOMOFO);
7690             aliases.put("BRAH", BRAHMI);
7691             aliases.put("BRAI", BRAILLE);
7692             aliases.put("BUGI", BUGINESE);
7693             aliases.put("BUHD", BUHID);
7694             aliases.put("CAKM", CHAKMA);
7695             aliases.put("CANS", CANADIAN_ABORIGINAL);
7696             aliases.put("CARI", CARIAN);
7697             aliases.put("CHAM", CHAM);
7698             aliases.put("CHER", CHEROKEE);
7699             aliases.put("COPT", COPTIC);
7700             aliases.put("CPRT", CYPRIOT);
7701             aliases.put("CYRL", CYRILLIC);
7702             aliases.put("DEVA", DEVANAGARI);
7703             aliases.put("DSRT", DESERET);
7704             aliases.put("DUPL", DUPLOYAN);
7705             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7706             aliases.put("ELBA", ELBASAN);
7707             aliases.put("ETHI", ETHIOPIC);
7708             aliases.put("GEOR", GEORGIAN);
7709             aliases.put("GLAG", GLAGOLITIC);
7710             aliases.put("GONM", MASARAM_GONDI);
7711             aliases.put("GOTH", GOTHIC);
7712             aliases.put("GRAN", GRANTHA);
7713             aliases.put("GREK", GREEK);
7714             aliases.put("GUJR", GUJARATI);
7715             aliases.put("GURU", GURMUKHI);
7716             aliases.put("HANG", HANGUL);
7717             aliases.put("HANI", HAN);
7718             aliases.put("HANO", HANUNOO);
7719             aliases.put("HATR", HATRAN);
7720             aliases.put("HEBR", HEBREW);
7721             aliases.put("HIRA", HIRAGANA);
7722             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
7723             aliases.put("HMNG", PAHAWH_HMONG);
7724             // it appears we don't have the KATAKANA_OR_HIRAGANA
7725             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7726             aliases.put("HUNG", OLD_HUNGARIAN);
7727             aliases.put("ITAL", OLD_ITALIC);
7728             aliases.put("JAVA", JAVANESE);
7729             aliases.put("KALI", KAYAH_LI);
7730             aliases.put("KANA", KATAKANA);
7731             aliases.put("KHAR", KHAROSHTHI);
7732             aliases.put("KHMR", KHMER);
7733             aliases.put("KHOJ", KHOJKI);
7734             aliases.put("KNDA", KANNADA);
7735             aliases.put("KTHI", KAITHI);
7736             aliases.put("LANA", TAI_THAM);
7737             aliases.put("LAOO", LAO);
7738             aliases.put("LATN", LATIN);
7739             aliases.put("LEPC", LEPCHA);
7740             aliases.put("LIMB", LIMBU);
7741             aliases.put("LINA", LINEAR_A);
7742             aliases.put("LINB", LINEAR_B);
7743             aliases.put("LISU", LISU);
7744             aliases.put("LYCI", LYCIAN);
7745             aliases.put("LYDI", LYDIAN);
7746             aliases.put("MAHJ", MAHAJANI);
7747             aliases.put("MARC", MARCHEN);
7748             aliases.put("MAND", MANDAIC);
7749             aliases.put("MANI", MANICHAEAN);
7750             aliases.put("MEND", MENDE_KIKAKUI);
7751             aliases.put("MERC", MEROITIC_CURSIVE);
7752             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7753             aliases.put("MLYM", MALAYALAM);
7754             aliases.put("MODI", MODI);
7755             aliases.put("MONG", MONGOLIAN);
7756             aliases.put("MROO", MRO);
7757             aliases.put("MTEI", MEETEI_MAYEK);
7758             aliases.put("MULT", MULTANI);
7759             aliases.put("MYMR", MYANMAR);
7760             aliases.put("NARB", OLD_NORTH_ARABIAN);
7761             aliases.put("NBAT", NABATAEAN);
7762             aliases.put("NEWA", NEWA);
7763             aliases.put("NKOO", NKO);
7764             aliases.put("NSHU", NUSHU);
7765             aliases.put("OGAM", OGHAM);
7766             aliases.put("OLCK", OL_CHIKI);
7767             aliases.put("ORKH", OLD_TURKIC);
7768             aliases.put("ORYA", ORIYA);
7769             aliases.put("OSGE", OSAGE);
7770             aliases.put("OSMA", OSMANYA);
7771             aliases.put("PALM", PALMYRENE);
7772             aliases.put("PAUC", PAU_CIN_HAU);
7773             aliases.put("PERM", OLD_PERMIC);
7774             aliases.put("PHAG", PHAGS_PA);
7775             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
7776             aliases.put("PHLP", PSALTER_PAHLAVI);
7777             aliases.put("PHNX", PHOENICIAN);
7778             aliases.put("PLRD", MIAO);
7779             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
7780             aliases.put("RJNG", REJANG);
7781             aliases.put("RUNR", RUNIC);
7782             aliases.put("SAMR", SAMARITAN);
7783             aliases.put("SARB", OLD_SOUTH_ARABIAN);
7784             aliases.put("SAUR", SAURASHTRA);
7785             aliases.put("SGNW", SIGNWRITING);
7786             aliases.put("SHAW", SHAVIAN);
7787             aliases.put("SHRD", SHARADA);
7788             aliases.put("SIDD", SIDDHAM);
7789             aliases.put("SIND", KHUDAWADI);
7790             aliases.put("SINH", SINHALA);
7791             aliases.put("SORA", SORA_SOMPENG);
7792             aliases.put("SOYO", SOYOMBO);
7793             aliases.put("SUND", SUNDANESE);
7794             aliases.put("SYLO", SYLOTI_NAGRI);
7795             aliases.put("SYRC", SYRIAC);
7796             aliases.put("TAGB", TAGBANWA);
7797             aliases.put("TAKR", TAKRI);
7798             aliases.put("TALE", TAI_LE);
7799             aliases.put("TALU", NEW_TAI_LUE);
7800             aliases.put("TAML", TAMIL);
7801             aliases.put("TANG", TANGUT);
7802             aliases.put("TAVT", TAI_VIET);
7803             aliases.put("TELU", TELUGU);
7804             aliases.put("TFNG", TIFINAGH);
7805             aliases.put("TGLG", TAGALOG);
7806             aliases.put("THAA", THAANA);
7807             aliases.put("THAI", THAI);
7808             aliases.put("TIBT", TIBETAN);
7809             aliases.put("TIRH", TIRHUTA);
7810             aliases.put("UGAR", UGARITIC);
7811             aliases.put("VAII", VAI);
7812             aliases.put("WARA", WARANG_CITI);
7813             aliases.put("XPEO", OLD_PERSIAN);
7814             aliases.put("XSUX", CUNEIFORM);
7815             aliases.put("YIII", YI);
7816             aliases.put("ZANB", ZANABAZAR_SQUARE);
7817             aliases.put("ZINH", INHERITED);
7818             aliases.put("ZYYY", COMMON);
7819             aliases.put("ZZZZ", UNKNOWN);
7820         }
7821 
7822         /**
7823          * Returns the enum constant representing the Unicode script of which
7824          * the given character (Unicode code point) is assigned to.
7825          *
7826          * @param   codePoint the character (Unicode code point) in question.
7827          * @return  The {@code UnicodeScript} constant representing the
7828          *          Unicode script of which this character is assigned to.
7829          *
7830          * @throws  IllegalArgumentException if the specified
7831          * {@code codePoint} is an invalid Unicode code point.
7832          * @see Character#isValidCodePoint(int)
7833          *
7834          */
7835         public static UnicodeScript of(int codePoint) {
7836             if (!isValidCodePoint(codePoint))
7837                 throw new IllegalArgumentException(
7838                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
7839             int type = getType(codePoint);
7840             // leave SURROGATE and PRIVATE_USE for table lookup
7841             if (type == UNASSIGNED)
7842                 return UNKNOWN;
7843             int index = Arrays.binarySearch(scriptStarts, codePoint);
7844             if (index < 0)
7845                 index = -index - 2;
7846             return scripts[index];
7847         }
7848 
7849         /**
7850          * Returns the UnicodeScript constant with the given Unicode script
7851          * name or the script name alias. Script names and their aliases are
7852          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
7853          * and {@code PropertyValueAliases<version>.txt} define script names
7854          * and the script name aliases for a particular version of the
7855          * standard. The {@link Character} class specifies the version of
7856          * the standard that it supports.
7857          * <p>
7858          * Character case is ignored for all of the valid script names.
7859          * The en_US locale's case mapping rules are used to provide
7860          * case-insensitive string comparisons for script name validation.
7861          *
7862          * @param scriptName A {@code UnicodeScript} name.
7863          * @return The {@code UnicodeScript} constant identified
7864          *         by {@code scriptName}
7865          * @throws IllegalArgumentException if {@code scriptName} is an
7866          *         invalid name
7867          * @throws NullPointerException if {@code scriptName} is null
7868          */
7869         public static final UnicodeScript forName(String scriptName) {
7870             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
7871                                  //.replace(' ', '_'));
7872             UnicodeScript sc = aliases.get(scriptName);
7873             if (sc != null)
7874                 return sc;
7875             return valueOf(scriptName);
7876         }
7877     }
7878 
7879     /**
7880      * The value of the {@code Character}.
7881      *
7882      * @serial
7883      */
7884     private final char value;
7885 
7886     /** use serialVersionUID from JDK 1.0.2 for interoperability */
7887     private static final long serialVersionUID = 3786198910865385080L;
7888 
7889     /**
7890      * Constructs a newly allocated {@code Character} object that
7891      * represents the specified {@code char} value.
7892      *
7893      * @param  value   the value to be represented by the
7894      *                  {@code Character} object.
7895      *
7896      * @deprecated
7897      * It is rarely appropriate to use this constructor. The static factory
7898      * {@link #valueOf(char)} is generally a better choice, as it is
7899      * likely to yield significantly better space and time performance.
7900      */
7901     @Deprecated(since="9")
7902     public Character(char value) {
7903         this.value = value;
7904     }
7905 
7906     private static class CharacterCache {
7907         private CharacterCache(){}
7908 
7909         static final Character cache[] = new Character[127 + 1];
7910 
7911         static {
7912             for (int i = 0; i < cache.length; i++)
7913                 cache[i] = new Character((char)i);
7914         }
7915     }
7916 
7917     /**
7918      * Returns a {@code Character} instance representing the specified
7919      * {@code char} value.
7920      * If a new {@code Character} instance is not required, this method
7921      * should generally be used in preference to the constructor
7922      * {@link #Character(char)}, as this method is likely to yield
7923      * significantly better space and time performance by caching
7924      * frequently requested values.
7925      *
7926      * This method will always cache values in the range {@code
7927      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
7928      * cache other values outside of this range.
7929      *
7930      * @param  c a char value.
7931      * @return a {@code Character} instance representing {@code c}.
7932      * @since  1.5
7933      */
7934     @HotSpotIntrinsicCandidate
7935     public static Character valueOf(char c) {
7936         if (c <= 127) { // must cache
7937             return CharacterCache.cache[(int)c];
7938         }
7939         return new Character(c);
7940     }
7941 
7942     /**
7943      * Returns the value of this {@code Character} object.
7944      * @return  the primitive {@code char} value represented by
7945      *          this object.
7946      */
7947     @HotSpotIntrinsicCandidate
7948     public char charValue() {
7949         return value;
7950     }
7951 
7952     /**
7953      * Returns a hash code for this {@code Character}; equal to the result
7954      * of invoking {@code charValue()}.
7955      *
7956      * @return a hash code value for this {@code Character}
7957      */
7958     @Override
7959     public int hashCode() {
7960         return Character.hashCode(value);
7961     }
7962 
7963     /**
7964      * Returns a hash code for a {@code char} value; compatible with
7965      * {@code Character.hashCode()}.
7966      *
7967      * @since 1.8
7968      *
7969      * @param value The {@code char} for which to return a hash code.
7970      * @return a hash code value for a {@code char} value.
7971      */
7972     public static int hashCode(char value) {
7973         return (int)value;
7974     }
7975 
7976     /**
7977      * Compares this object against the specified object.
7978      * The result is {@code true} if and only if the argument is not
7979      * {@code null} and is a {@code Character} object that
7980      * represents the same {@code char} value as this object.
7981      *
7982      * @param   obj   the object to compare with.
7983      * @return  {@code true} if the objects are the same;
7984      *          {@code false} otherwise.
7985      */
7986     public boolean equals(Object obj) {
7987         if (obj instanceof Character) {
7988             return value == ((Character)obj).charValue();
7989         }
7990         return false;
7991     }
7992 
7993     /**
7994      * Returns a {@code String} object representing this
7995      * {@code Character}'s value.  The result is a string of
7996      * length 1 whose sole component is the primitive
7997      * {@code char} value represented by this
7998      * {@code Character} object.
7999      *
8000      * @return  a string representation of this object.
8001      */
8002     public String toString() {
8003         char buf[] = {value};
8004         return String.valueOf(buf);
8005     }
8006 
8007     /**
8008      * Returns a {@code String} object representing the
8009      * specified {@code char}.  The result is a string of length
8010      * 1 consisting solely of the specified {@code char}.
8011      *
8012      * @apiNote This method cannot handle <a
8013      * href="#supplementary"> supplementary characters</a>. To support
8014      * all Unicode characters, including supplementary characters, use
8015      * the {@link #toString(int)} method.
8016      *
8017      * @param c the {@code char} to be converted
8018      * @return the string representation of the specified {@code char}
8019      * @since 1.4
8020      */
8021     public static String toString(char c) {
8022         return String.valueOf(c);
8023     }
8024 
8025     /**
8026      * Returns a {@code String} object representing the
8027      * specified character (Unicode code point).  The result is a string of
8028      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8029      *
8030      * @param codePoint the {@code codePoint} to be converted
8031      * @return the string representation of the specified {@code codePoint}
8032      * @throws IllegalArgumentException if the specified
8033      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8034      *      valid Unicode code point}.
8035      * @since 11
8036      */
8037     public static String toString(int codePoint) {
8038         return String.valueOfCodePoint(codePoint);
8039     }
8040 
8041     /**
8042      * Determines whether the specified code point is a valid
8043      * <a href="http://www.unicode.org/glossary/#code_point">
8044      * Unicode code point value</a>.
8045      *
8046      * @param  codePoint the Unicode code point to be tested
8047      * @return {@code true} if the specified code point value is between
8048      *         {@link #MIN_CODE_POINT} and
8049      *         {@link #MAX_CODE_POINT} inclusive;
8050      *         {@code false} otherwise.
8051      * @since  1.5
8052      */
8053     public static boolean isValidCodePoint(int codePoint) {
8054         // Optimized form of:
8055         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8056         int plane = codePoint >>> 16;
8057         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8058     }
8059 
8060     /**
8061      * Determines whether the specified character (Unicode code point)
8062      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8063      * Such code points can be represented using a single {@code char}.
8064      *
8065      * @param  codePoint the character (Unicode code point) to be tested
8066      * @return {@code true} if the specified code point is between
8067      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8068      *         {@code false} otherwise.
8069      * @since  1.7
8070      */
8071     public static boolean isBmpCodePoint(int codePoint) {
8072         return codePoint >>> 16 == 0;
8073         // Optimized form of:
8074         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8075         // We consistently use logical shift (>>>) to facilitate
8076         // additional runtime optimizations.
8077     }
8078 
8079     /**
8080      * Determines whether the specified character (Unicode code point)
8081      * is in the <a href="#supplementary">supplementary character</a> range.
8082      *
8083      * @param  codePoint the character (Unicode code point) to be tested
8084      * @return {@code true} if the specified code point is between
8085      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8086      *         {@link #MAX_CODE_POINT} inclusive;
8087      *         {@code false} otherwise.
8088      * @since  1.5
8089      */
8090     public static boolean isSupplementaryCodePoint(int codePoint) {
8091         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8092             && codePoint <  MAX_CODE_POINT + 1;
8093     }
8094 
8095     /**
8096      * Determines if the given {@code char} value is a
8097      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8098      * Unicode high-surrogate code unit</a>
8099      * (also known as <i>leading-surrogate code unit</i>).
8100      *
8101      * <p>Such values do not represent characters by themselves,
8102      * but are used in the representation of
8103      * <a href="#supplementary">supplementary characters</a>
8104      * in the UTF-16 encoding.
8105      *
8106      * @param  ch the {@code char} value to be tested.
8107      * @return {@code true} if the {@code char} value is between
8108      *         {@link #MIN_HIGH_SURROGATE} and
8109      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8110      *         {@code false} otherwise.
8111      * @see    Character#isLowSurrogate(char)
8112      * @see    Character.UnicodeBlock#of(int)
8113      * @since  1.5
8114      */
8115     public static boolean isHighSurrogate(char ch) {
8116         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8117         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8118     }
8119 
8120     /**
8121      * Determines if the given {@code char} value is a
8122      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8123      * Unicode low-surrogate code unit</a>
8124      * (also known as <i>trailing-surrogate code unit</i>).
8125      *
8126      * <p>Such values do not represent characters by themselves,
8127      * but are used in the representation of
8128      * <a href="#supplementary">supplementary characters</a>
8129      * in the UTF-16 encoding.
8130      *
8131      * @param  ch the {@code char} value to be tested.
8132      * @return {@code true} if the {@code char} value is between
8133      *         {@link #MIN_LOW_SURROGATE} and
8134      *         {@link #MAX_LOW_SURROGATE} inclusive;
8135      *         {@code false} otherwise.
8136      * @see    Character#isHighSurrogate(char)
8137      * @since  1.5
8138      */
8139     public static boolean isLowSurrogate(char ch) {
8140         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8141     }
8142 
8143     /**
8144      * Determines if the given {@code char} value is a Unicode
8145      * <i>surrogate code unit</i>.
8146      *
8147      * <p>Such values do not represent characters by themselves,
8148      * but are used in the representation of
8149      * <a href="#supplementary">supplementary characters</a>
8150      * in the UTF-16 encoding.
8151      *
8152      * <p>A char value is a surrogate code unit if and only if it is either
8153      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8154      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8155      *
8156      * @param  ch the {@code char} value to be tested.
8157      * @return {@code true} if the {@code char} value is between
8158      *         {@link #MIN_SURROGATE} and
8159      *         {@link #MAX_SURROGATE} inclusive;
8160      *         {@code false} otherwise.
8161      * @since  1.7
8162      */
8163     public static boolean isSurrogate(char ch) {
8164         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8165     }
8166 
8167     /**
8168      * Determines whether the specified pair of {@code char}
8169      * values is a valid
8170      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8171      * Unicode surrogate pair</a>.
8172 
8173      * <p>This method is equivalent to the expression:
8174      * <blockquote><pre>{@code
8175      * isHighSurrogate(high) && isLowSurrogate(low)
8176      * }</pre></blockquote>
8177      *
8178      * @param  high the high-surrogate code value to be tested
8179      * @param  low the low-surrogate code value to be tested
8180      * @return {@code true} if the specified high and
8181      * low-surrogate code values represent a valid surrogate pair;
8182      * {@code false} otherwise.
8183      * @since  1.5
8184      */
8185     public static boolean isSurrogatePair(char high, char low) {
8186         return isHighSurrogate(high) && isLowSurrogate(low);
8187     }
8188 
8189     /**
8190      * Determines the number of {@code char} values needed to
8191      * represent the specified character (Unicode code point). If the
8192      * specified character is equal to or greater than 0x10000, then
8193      * the method returns 2. Otherwise, the method returns 1.
8194      *
8195      * <p>This method doesn't validate the specified character to be a
8196      * valid Unicode code point. The caller must validate the
8197      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8198      * if necessary.
8199      *
8200      * @param   codePoint the character (Unicode code point) to be tested.
8201      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8202      * @see     Character#isSupplementaryCodePoint(int)
8203      * @since   1.5
8204      */
8205     public static int charCount(int codePoint) {
8206         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8207     }
8208 
8209     /**
8210      * Converts the specified surrogate pair to its supplementary code
8211      * point value. This method does not validate the specified
8212      * surrogate pair. The caller must validate it using {@link
8213      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8214      *
8215      * @param  high the high-surrogate code unit
8216      * @param  low the low-surrogate code unit
8217      * @return the supplementary code point composed from the
8218      *         specified surrogate pair.
8219      * @since  1.5
8220      */
8221     public static int toCodePoint(char high, char low) {
8222         // Optimized form of:
8223         // return ((high - MIN_HIGH_SURROGATE) << 10)
8224         //         + (low - MIN_LOW_SURROGATE)
8225         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8226         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8227                                        - (MIN_HIGH_SURROGATE << 10)
8228                                        - MIN_LOW_SURROGATE);
8229     }
8230 
8231     /**
8232      * Returns the code point at the given index of the
8233      * {@code CharSequence}. If the {@code char} value at
8234      * the given index in the {@code CharSequence} is in the
8235      * high-surrogate range, the following index is less than the
8236      * length of the {@code CharSequence}, and the
8237      * {@code char} value at the following index is in the
8238      * low-surrogate range, then the supplementary code point
8239      * corresponding to this surrogate pair is returned. Otherwise,
8240      * the {@code char} value at the given index is returned.
8241      *
8242      * @param seq a sequence of {@code char} values (Unicode code
8243      * units)
8244      * @param index the index to the {@code char} values (Unicode
8245      * code units) in {@code seq} to be converted
8246      * @return the Unicode code point at the given index
8247      * @throws NullPointerException if {@code seq} is null.
8248      * @throws IndexOutOfBoundsException if the value
8249      * {@code index} is negative or not less than
8250      * {@link CharSequence#length() seq.length()}.
8251      * @since  1.5
8252      */
8253     public static int codePointAt(CharSequence seq, int index) {
8254         char c1 = seq.charAt(index);
8255         if (isHighSurrogate(c1) && ++index < seq.length()) {
8256             char c2 = seq.charAt(index);
8257             if (isLowSurrogate(c2)) {
8258                 return toCodePoint(c1, c2);
8259             }
8260         }
8261         return c1;
8262     }
8263 
8264     /**
8265      * Returns the code point at the given index of the
8266      * {@code char} array. If the {@code char} value at
8267      * the given index in the {@code char} array is in the
8268      * high-surrogate range, the following index is less than the
8269      * length of the {@code char} array, and the
8270      * {@code char} value at the following index is in the
8271      * low-surrogate range, then the supplementary code point
8272      * corresponding to this surrogate pair is returned. Otherwise,
8273      * the {@code char} value at the given index is returned.
8274      *
8275      * @param a the {@code char} array
8276      * @param index the index to the {@code char} values (Unicode
8277      * code units) in the {@code char} array to be converted
8278      * @return the Unicode code point at the given index
8279      * @throws NullPointerException if {@code a} is null.
8280      * @throws IndexOutOfBoundsException if the value
8281      * {@code index} is negative or not less than
8282      * the length of the {@code char} array.
8283      * @since  1.5
8284      */
8285     public static int codePointAt(char[] a, int index) {
8286         return codePointAtImpl(a, index, a.length);
8287     }
8288 
8289     /**
8290      * Returns the code point at the given index of the
8291      * {@code char} array, where only array elements with
8292      * {@code index} less than {@code limit} can be used. If
8293      * the {@code char} value at the given index in the
8294      * {@code char} array is in the high-surrogate range, the
8295      * following index is less than the {@code limit}, and the
8296      * {@code char} value at the following index is in the
8297      * low-surrogate range, then the supplementary code point
8298      * corresponding to this surrogate pair is returned. Otherwise,
8299      * the {@code char} value at the given index is returned.
8300      *
8301      * @param a the {@code char} array
8302      * @param index the index to the {@code char} values (Unicode
8303      * code units) in the {@code char} array to be converted
8304      * @param limit the index after the last array element that
8305      * can be used in the {@code char} array
8306      * @return the Unicode code point at the given index
8307      * @throws NullPointerException if {@code a} is null.
8308      * @throws IndexOutOfBoundsException if the {@code index}
8309      * argument is negative or not less than the {@code limit}
8310      * argument, or if the {@code limit} argument is negative or
8311      * greater than the length of the {@code char} array.
8312      * @since  1.5
8313      */
8314     public static int codePointAt(char[] a, int index, int limit) {
8315         if (index >= limit || limit < 0 || limit > a.length) {
8316             throw new IndexOutOfBoundsException();
8317         }
8318         return codePointAtImpl(a, index, limit);
8319     }
8320 
8321     // throws ArrayIndexOutOfBoundsException if index out of bounds
8322     static int codePointAtImpl(char[] a, int index, int limit) {
8323         char c1 = a[index];
8324         if (isHighSurrogate(c1) && ++index < limit) {
8325             char c2 = a[index];
8326             if (isLowSurrogate(c2)) {
8327                 return toCodePoint(c1, c2);
8328             }
8329         }
8330         return c1;
8331     }
8332 
8333     /**
8334      * Returns the code point preceding the given index of the
8335      * {@code CharSequence}. If the {@code char} value at
8336      * {@code (index - 1)} in the {@code CharSequence} is in
8337      * the low-surrogate range, {@code (index - 2)} is not
8338      * negative, and the {@code char} value at {@code (index - 2)}
8339      * in the {@code CharSequence} is in the
8340      * high-surrogate range, then the supplementary code point
8341      * corresponding to this surrogate pair is returned. Otherwise,
8342      * the {@code char} value at {@code (index - 1)} is
8343      * returned.
8344      *
8345      * @param seq the {@code CharSequence} instance
8346      * @param index the index following the code point that should be returned
8347      * @return the Unicode code point value before the given index.
8348      * @throws NullPointerException if {@code seq} is null.
8349      * @throws IndexOutOfBoundsException if the {@code index}
8350      * argument is less than 1 or greater than {@link
8351      * CharSequence#length() seq.length()}.
8352      * @since  1.5
8353      */
8354     public static int codePointBefore(CharSequence seq, int index) {
8355         char c2 = seq.charAt(--index);
8356         if (isLowSurrogate(c2) && index > 0) {
8357             char c1 = seq.charAt(--index);
8358             if (isHighSurrogate(c1)) {
8359                 return toCodePoint(c1, c2);
8360             }
8361         }
8362         return c2;
8363     }
8364 
8365     /**
8366      * Returns the code point preceding the given index of the
8367      * {@code char} array. If the {@code char} value at
8368      * {@code (index - 1)} in the {@code char} array is in
8369      * the low-surrogate range, {@code (index - 2)} is not
8370      * negative, and the {@code char} value at {@code (index - 2)}
8371      * in the {@code char} array is in the
8372      * high-surrogate range, then the supplementary code point
8373      * corresponding to this surrogate pair is returned. Otherwise,
8374      * the {@code char} value at {@code (index - 1)} is
8375      * returned.
8376      *
8377      * @param a the {@code char} array
8378      * @param index the index following the code point that should be returned
8379      * @return the Unicode code point value before the given index.
8380      * @throws NullPointerException if {@code a} is null.
8381      * @throws IndexOutOfBoundsException if the {@code index}
8382      * argument is less than 1 or greater than the length of the
8383      * {@code char} array
8384      * @since  1.5
8385      */
8386     public static int codePointBefore(char[] a, int index) {
8387         return codePointBeforeImpl(a, index, 0);
8388     }
8389 
8390     /**
8391      * Returns the code point preceding the given index of the
8392      * {@code char} array, where only array elements with
8393      * {@code index} greater than or equal to {@code start}
8394      * can be used. If the {@code char} value at {@code (index - 1)}
8395      * in the {@code char} array is in the
8396      * low-surrogate range, {@code (index - 2)} is not less than
8397      * {@code start}, and the {@code char} value at
8398      * {@code (index - 2)} in the {@code char} array is in
8399      * the high-surrogate range, then the supplementary code point
8400      * corresponding to this surrogate pair is returned. Otherwise,
8401      * the {@code char} value at {@code (index - 1)} is
8402      * returned.
8403      *
8404      * @param a the {@code char} array
8405      * @param index the index following the code point that should be returned
8406      * @param start the index of the first array element in the
8407      * {@code char} array
8408      * @return the Unicode code point value before the given index.
8409      * @throws NullPointerException if {@code a} is null.
8410      * @throws IndexOutOfBoundsException if the {@code index}
8411      * argument is not greater than the {@code start} argument or
8412      * is greater than the length of the {@code char} array, or
8413      * if the {@code start} argument is negative or not less than
8414      * the length of the {@code char} array.
8415      * @since  1.5
8416      */
8417     public static int codePointBefore(char[] a, int index, int start) {
8418         if (index <= start || start < 0 || start >= a.length) {
8419             throw new IndexOutOfBoundsException();
8420         }
8421         return codePointBeforeImpl(a, index, start);
8422     }
8423 
8424     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8425     static int codePointBeforeImpl(char[] a, int index, int start) {
8426         char c2 = a[--index];
8427         if (isLowSurrogate(c2) && index > start) {
8428             char c1 = a[--index];
8429             if (isHighSurrogate(c1)) {
8430                 return toCodePoint(c1, c2);
8431             }
8432         }
8433         return c2;
8434     }
8435 
8436     /**
8437      * Returns the leading surrogate (a
8438      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8439      * high surrogate code unit</a>) of the
8440      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8441      * surrogate pair</a>
8442      * representing the specified supplementary character (Unicode
8443      * code point) in the UTF-16 encoding.  If the specified character
8444      * is not a
8445      * <a href="Character.html#supplementary">supplementary character</a>,
8446      * an unspecified {@code char} is returned.
8447      *
8448      * <p>If
8449      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8450      * is {@code true}, then
8451      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8452      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8453      * are also always {@code true}.
8454      *
8455      * @param   codePoint a supplementary character (Unicode code point)
8456      * @return  the leading surrogate code unit used to represent the
8457      *          character in the UTF-16 encoding
8458      * @since   1.7
8459      */
8460     public static char highSurrogate(int codePoint) {
8461         return (char) ((codePoint >>> 10)
8462             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8463     }
8464 
8465     /**
8466      * Returns the trailing surrogate (a
8467      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8468      * low surrogate code unit</a>) of the
8469      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8470      * surrogate pair</a>
8471      * representing the specified supplementary character (Unicode
8472      * code point) in the UTF-16 encoding.  If the specified character
8473      * is not a
8474      * <a href="Character.html#supplementary">supplementary character</a>,
8475      * an unspecified {@code char} is returned.
8476      *
8477      * <p>If
8478      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8479      * is {@code true}, then
8480      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8481      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8482      * are also always {@code true}.
8483      *
8484      * @param   codePoint a supplementary character (Unicode code point)
8485      * @return  the trailing surrogate code unit used to represent the
8486      *          character in the UTF-16 encoding
8487      * @since   1.7
8488      */
8489     public static char lowSurrogate(int codePoint) {
8490         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8491     }
8492 
8493     /**
8494      * Converts the specified character (Unicode code point) to its
8495      * UTF-16 representation. If the specified code point is a BMP
8496      * (Basic Multilingual Plane or Plane 0) value, the same value is
8497      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8498      * specified code point is a supplementary character, its
8499      * surrogate values are stored in {@code dst[dstIndex]}
8500      * (high-surrogate) and {@code dst[dstIndex+1]}
8501      * (low-surrogate), and 2 is returned.
8502      *
8503      * @param  codePoint the character (Unicode code point) to be converted.
8504      * @param  dst an array of {@code char} in which the
8505      * {@code codePoint}'s UTF-16 value is stored.
8506      * @param dstIndex the start index into the {@code dst}
8507      * array where the converted value is stored.
8508      * @return 1 if the code point is a BMP code point, 2 if the
8509      * code point is a supplementary code point.
8510      * @throws IllegalArgumentException if the specified
8511      * {@code codePoint} is not a valid Unicode code point.
8512      * @throws NullPointerException if the specified {@code dst} is null.
8513      * @throws IndexOutOfBoundsException if {@code dstIndex}
8514      * is negative or not less than {@code dst.length}, or if
8515      * {@code dst} at {@code dstIndex} doesn't have enough
8516      * array element(s) to store the resulting {@code char}
8517      * value(s). (If {@code dstIndex} is equal to
8518      * {@code dst.length-1} and the specified
8519      * {@code codePoint} is a supplementary character, the
8520      * high-surrogate value is not stored in
8521      * {@code dst[dstIndex]}.)
8522      * @since  1.5
8523      */
8524     public static int toChars(int codePoint, char[] dst, int dstIndex) {
8525         if (isBmpCodePoint(codePoint)) {
8526             dst[dstIndex] = (char) codePoint;
8527             return 1;
8528         } else if (isValidCodePoint(codePoint)) {
8529             toSurrogates(codePoint, dst, dstIndex);
8530             return 2;
8531         } else {
8532             throw new IllegalArgumentException(
8533                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8534         }
8535     }
8536 
8537     /**
8538      * Converts the specified character (Unicode code point) to its
8539      * UTF-16 representation stored in a {@code char} array. If
8540      * the specified code point is a BMP (Basic Multilingual Plane or
8541      * Plane 0) value, the resulting {@code char} array has
8542      * the same value as {@code codePoint}. If the specified code
8543      * point is a supplementary code point, the resulting
8544      * {@code char} array has the corresponding surrogate pair.
8545      *
8546      * @param  codePoint a Unicode code point
8547      * @return a {@code char} array having
8548      *         {@code codePoint}'s UTF-16 representation.
8549      * @throws IllegalArgumentException if the specified
8550      * {@code codePoint} is not a valid Unicode code point.
8551      * @since  1.5
8552      */
8553     public static char[] toChars(int codePoint) {
8554         if (isBmpCodePoint(codePoint)) {
8555             return new char[] { (char) codePoint };
8556         } else if (isValidCodePoint(codePoint)) {
8557             char[] result = new char[2];
8558             toSurrogates(codePoint, result, 0);
8559             return result;
8560         } else {
8561             throw new IllegalArgumentException(
8562                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8563         }
8564     }
8565 
8566     static void toSurrogates(int codePoint, char[] dst, int index) {
8567         // We write elements "backwards" to guarantee all-or-nothing
8568         dst[index+1] = lowSurrogate(codePoint);
8569         dst[index] = highSurrogate(codePoint);
8570     }
8571 
8572     /**
8573      * Returns the number of Unicode code points in the text range of
8574      * the specified char sequence. The text range begins at the
8575      * specified {@code beginIndex} and extends to the
8576      * {@code char} at index {@code endIndex - 1}. Thus the
8577      * length (in {@code char}s) of the text range is
8578      * {@code endIndex-beginIndex}. Unpaired surrogates within
8579      * the text range count as one code point each.
8580      *
8581      * @param seq the char sequence
8582      * @param beginIndex the index to the first {@code char} of
8583      * the text range.
8584      * @param endIndex the index after the last {@code char} of
8585      * the text range.
8586      * @return the number of Unicode code points in the specified text
8587      * range
8588      * @throws NullPointerException if {@code seq} is null.
8589      * @throws IndexOutOfBoundsException if the
8590      * {@code beginIndex} is negative, or {@code endIndex}
8591      * is larger than the length of the given sequence, or
8592      * {@code beginIndex} is larger than {@code endIndex}.
8593      * @since  1.5
8594      */
8595     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
8596         int length = seq.length();
8597         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
8598             throw new IndexOutOfBoundsException();
8599         }
8600         int n = endIndex - beginIndex;
8601         for (int i = beginIndex; i < endIndex; ) {
8602             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
8603                 isLowSurrogate(seq.charAt(i))) {
8604                 n--;
8605                 i++;
8606             }
8607         }
8608         return n;
8609     }
8610 
8611     /**
8612      * Returns the number of Unicode code points in a subarray of the
8613      * {@code char} array argument. The {@code offset}
8614      * argument is the index of the first {@code char} of the
8615      * subarray and the {@code count} argument specifies the
8616      * length of the subarray in {@code char}s. Unpaired
8617      * surrogates within the subarray count as one code point each.
8618      *
8619      * @param a the {@code char} array
8620      * @param offset the index of the first {@code char} in the
8621      * given {@code char} array
8622      * @param count the length of the subarray in {@code char}s
8623      * @return the number of Unicode code points in the specified subarray
8624      * @throws NullPointerException if {@code a} is null.
8625      * @throws IndexOutOfBoundsException if {@code offset} or
8626      * {@code count} is negative, or if {@code offset +
8627      * count} is larger than the length of the given array.
8628      * @since  1.5
8629      */
8630     public static int codePointCount(char[] a, int offset, int count) {
8631         if (count > a.length - offset || offset < 0 || count < 0) {
8632             throw new IndexOutOfBoundsException();
8633         }
8634         return codePointCountImpl(a, offset, count);
8635     }
8636 
8637     static int codePointCountImpl(char[] a, int offset, int count) {
8638         int endIndex = offset + count;
8639         int n = count;
8640         for (int i = offset; i < endIndex; ) {
8641             if (isHighSurrogate(a[i++]) && i < endIndex &&
8642                 isLowSurrogate(a[i])) {
8643                 n--;
8644                 i++;
8645             }
8646         }
8647         return n;
8648     }
8649 
8650     /**
8651      * Returns the index within the given char sequence that is offset
8652      * from the given {@code index} by {@code codePointOffset}
8653      * code points. Unpaired surrogates within the text range given by
8654      * {@code index} and {@code codePointOffset} count as
8655      * one code point each.
8656      *
8657      * @param seq the char sequence
8658      * @param index the index to be offset
8659      * @param codePointOffset the offset in code points
8660      * @return the index within the char sequence
8661      * @throws NullPointerException if {@code seq} is null.
8662      * @throws IndexOutOfBoundsException if {@code index}
8663      *   is negative or larger then the length of the char sequence,
8664      *   or if {@code codePointOffset} is positive and the
8665      *   subsequence starting with {@code index} has fewer than
8666      *   {@code codePointOffset} code points, or if
8667      *   {@code codePointOffset} is negative and the subsequence
8668      *   before {@code index} has fewer than the absolute value
8669      *   of {@code codePointOffset} code points.
8670      * @since 1.5
8671      */
8672     public static int offsetByCodePoints(CharSequence seq, int index,
8673                                          int codePointOffset) {
8674         int length = seq.length();
8675         if (index < 0 || index > length) {
8676             throw new IndexOutOfBoundsException();
8677         }
8678 
8679         int x = index;
8680         if (codePointOffset >= 0) {
8681             int i;
8682             for (i = 0; x < length && i < codePointOffset; i++) {
8683                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8684                     isLowSurrogate(seq.charAt(x))) {
8685                     x++;
8686                 }
8687             }
8688             if (i < codePointOffset) {
8689                 throw new IndexOutOfBoundsException();
8690             }
8691         } else {
8692             int i;
8693             for (i = codePointOffset; x > 0 && i < 0; i++) {
8694                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8695                     isHighSurrogate(seq.charAt(x-1))) {
8696                     x--;
8697                 }
8698             }
8699             if (i < 0) {
8700                 throw new IndexOutOfBoundsException();
8701             }
8702         }
8703         return x;
8704     }
8705 
8706     /**
8707      * Returns the index within the given {@code char} subarray
8708      * that is offset from the given {@code index} by
8709      * {@code codePointOffset} code points. The
8710      * {@code start} and {@code count} arguments specify a
8711      * subarray of the {@code char} array. Unpaired surrogates
8712      * within the text range given by {@code index} and
8713      * {@code codePointOffset} count as one code point each.
8714      *
8715      * @param a the {@code char} array
8716      * @param start the index of the first {@code char} of the
8717      * subarray
8718      * @param count the length of the subarray in {@code char}s
8719      * @param index the index to be offset
8720      * @param codePointOffset the offset in code points
8721      * @return the index within the subarray
8722      * @throws NullPointerException if {@code a} is null.
8723      * @throws IndexOutOfBoundsException
8724      *   if {@code start} or {@code count} is negative,
8725      *   or if {@code start + count} is larger than the length of
8726      *   the given array,
8727      *   or if {@code index} is less than {@code start} or
8728      *   larger then {@code start + count},
8729      *   or if {@code codePointOffset} is positive and the text range
8730      *   starting with {@code index} and ending with {@code start + count - 1}
8731      *   has fewer than {@code codePointOffset} code
8732      *   points,
8733      *   or if {@code codePointOffset} is negative and the text range
8734      *   starting with {@code start} and ending with {@code index - 1}
8735      *   has fewer than the absolute value of
8736      *   {@code codePointOffset} code points.
8737      * @since 1.5
8738      */
8739     public static int offsetByCodePoints(char[] a, int start, int count,
8740                                          int index, int codePointOffset) {
8741         if (count > a.length-start || start < 0 || count < 0
8742             || index < start || index > start+count) {
8743             throw new IndexOutOfBoundsException();
8744         }
8745         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8746     }
8747 
8748     static int offsetByCodePointsImpl(char[]a, int start, int count,
8749                                       int index, int codePointOffset) {
8750         int x = index;
8751         if (codePointOffset >= 0) {
8752             int limit = start + count;
8753             int i;
8754             for (i = 0; x < limit && i < codePointOffset; i++) {
8755                 if (isHighSurrogate(a[x++]) && x < limit &&
8756                     isLowSurrogate(a[x])) {
8757                     x++;
8758                 }
8759             }
8760             if (i < codePointOffset) {
8761                 throw new IndexOutOfBoundsException();
8762             }
8763         } else {
8764             int i;
8765             for (i = codePointOffset; x > start && i < 0; i++) {
8766                 if (isLowSurrogate(a[--x]) && x > start &&
8767                     isHighSurrogate(a[x-1])) {
8768                     x--;
8769                 }
8770             }
8771             if (i < 0) {
8772                 throw new IndexOutOfBoundsException();
8773             }
8774         }
8775         return x;
8776     }
8777 
8778     /**
8779      * Determines if the specified character is a lowercase character.
8780      * <p>
8781      * A character is lowercase if its general category type, provided
8782      * by {@code Character.getType(ch)}, is
8783      * {@code LOWERCASE_LETTER}, or it has contributory property
8784      * Other_Lowercase as defined by the Unicode Standard.
8785      * <p>
8786      * The following are examples of lowercase characters:
8787      * <blockquote><pre>
8788      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8789      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8790      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8791      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8792      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8793      * </pre></blockquote>
8794      * <p> Many other Unicode characters are lowercase too.
8795      *
8796      * <p><b>Note:</b> This method cannot handle <a
8797      * href="#supplementary"> supplementary characters</a>. To support
8798      * all Unicode characters, including supplementary characters, use
8799      * the {@link #isLowerCase(int)} method.
8800      *
8801      * @param   ch   the character to be tested.
8802      * @return  {@code true} if the character is lowercase;
8803      *          {@code false} otherwise.
8804      * @see     Character#isLowerCase(char)
8805      * @see     Character#isTitleCase(char)
8806      * @see     Character#toLowerCase(char)
8807      * @see     Character#getType(char)
8808      */
8809     public static boolean isLowerCase(char ch) {
8810         return isLowerCase((int)ch);
8811     }
8812 
8813     /**
8814      * Determines if the specified character (Unicode code point) is a
8815      * lowercase character.
8816      * <p>
8817      * A character is lowercase if its general category type, provided
8818      * by {@link Character#getType getType(codePoint)}, is
8819      * {@code LOWERCASE_LETTER}, or it has contributory property
8820      * Other_Lowercase as defined by the Unicode Standard.
8821      * <p>
8822      * The following are examples of lowercase characters:
8823      * <blockquote><pre>
8824      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8825      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8826      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8827      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8828      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8829      * </pre></blockquote>
8830      * <p> Many other Unicode characters are lowercase too.
8831      *
8832      * @param   codePoint the character (Unicode code point) to be tested.
8833      * @return  {@code true} if the character is lowercase;
8834      *          {@code false} otherwise.
8835      * @see     Character#isLowerCase(int)
8836      * @see     Character#isTitleCase(int)
8837      * @see     Character#toLowerCase(int)
8838      * @see     Character#getType(int)
8839      * @since   1.5
8840      */
8841     public static boolean isLowerCase(int codePoint) {
8842         return getType(codePoint) == Character.LOWERCASE_LETTER ||
8843                CharacterData.of(codePoint).isOtherLowercase(codePoint);
8844     }
8845 
8846     /**
8847      * Determines if the specified character is an uppercase character.
8848      * <p>
8849      * A character is uppercase if its general category type, provided by
8850      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
8851      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8852      * <p>
8853      * The following are examples of uppercase characters:
8854      * <blockquote><pre>
8855      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8856      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8857      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8858      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8859      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8860      * </pre></blockquote>
8861      * <p> Many other Unicode characters are uppercase too.
8862      *
8863      * <p><b>Note:</b> This method cannot handle <a
8864      * href="#supplementary"> supplementary characters</a>. To support
8865      * all Unicode characters, including supplementary characters, use
8866      * the {@link #isUpperCase(int)} method.
8867      *
8868      * @param   ch   the character to be tested.
8869      * @return  {@code true} if the character is uppercase;
8870      *          {@code false} otherwise.
8871      * @see     Character#isLowerCase(char)
8872      * @see     Character#isTitleCase(char)
8873      * @see     Character#toUpperCase(char)
8874      * @see     Character#getType(char)
8875      * @since   1.0
8876      */
8877     public static boolean isUpperCase(char ch) {
8878         return isUpperCase((int)ch);
8879     }
8880 
8881     /**
8882      * Determines if the specified character (Unicode code point) is an uppercase character.
8883      * <p>
8884      * A character is uppercase if its general category type, provided by
8885      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
8886      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8887      * <p>
8888      * The following are examples of uppercase characters:
8889      * <blockquote><pre>
8890      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8891      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8892      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8893      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8894      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8895      * </pre></blockquote>
8896      * <p> Many other Unicode characters are uppercase too.
8897      *
8898      * @param   codePoint the character (Unicode code point) to be tested.
8899      * @return  {@code true} if the character is uppercase;
8900      *          {@code false} otherwise.
8901      * @see     Character#isLowerCase(int)
8902      * @see     Character#isTitleCase(int)
8903      * @see     Character#toUpperCase(int)
8904      * @see     Character#getType(int)
8905      * @since   1.5
8906      */
8907     public static boolean isUpperCase(int codePoint) {
8908         return getType(codePoint) == Character.UPPERCASE_LETTER ||
8909                CharacterData.of(codePoint).isOtherUppercase(codePoint);
8910     }
8911 
8912     /**
8913      * Determines if the specified character is a titlecase character.
8914      * <p>
8915      * A character is a titlecase character if its general
8916      * category type, provided by {@code Character.getType(ch)},
8917      * is {@code TITLECASE_LETTER}.
8918      * <p>
8919      * Some characters look like pairs of Latin letters. For example, there
8920      * is an uppercase letter that looks like "LJ" and has a corresponding
8921      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8922      * is the appropriate form to use when rendering a word in lowercase
8923      * with initial capitals, as for a book title.
8924      * <p>
8925      * These are some of the Unicode characters for which this method returns
8926      * {@code true}:
8927      * <ul>
8928      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8929      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8930      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8931      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8932      * </ul>
8933      * <p> Many other Unicode characters are titlecase too.
8934      *
8935      * <p><b>Note:</b> This method cannot handle <a
8936      * href="#supplementary"> supplementary characters</a>. To support
8937      * all Unicode characters, including supplementary characters, use
8938      * the {@link #isTitleCase(int)} method.
8939      *
8940      * @param   ch   the character to be tested.
8941      * @return  {@code true} if the character is titlecase;
8942      *          {@code false} otherwise.
8943      * @see     Character#isLowerCase(char)
8944      * @see     Character#isUpperCase(char)
8945      * @see     Character#toTitleCase(char)
8946      * @see     Character#getType(char)
8947      * @since   1.0.2
8948      */
8949     public static boolean isTitleCase(char ch) {
8950         return isTitleCase((int)ch);
8951     }
8952 
8953     /**
8954      * Determines if the specified character (Unicode code point) is a titlecase character.
8955      * <p>
8956      * A character is a titlecase character if its general
8957      * category type, provided by {@link Character#getType(int) getType(codePoint)},
8958      * is {@code TITLECASE_LETTER}.
8959      * <p>
8960      * Some characters look like pairs of Latin letters. For example, there
8961      * is an uppercase letter that looks like "LJ" and has a corresponding
8962      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8963      * is the appropriate form to use when rendering a word in lowercase
8964      * with initial capitals, as for a book title.
8965      * <p>
8966      * These are some of the Unicode characters for which this method returns
8967      * {@code true}:
8968      * <ul>
8969      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8970      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8971      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8972      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8973      * </ul>
8974      * <p> Many other Unicode characters are titlecase too.
8975      *
8976      * @param   codePoint the character (Unicode code point) to be tested.
8977      * @return  {@code true} if the character is titlecase;
8978      *          {@code false} otherwise.
8979      * @see     Character#isLowerCase(int)
8980      * @see     Character#isUpperCase(int)
8981      * @see     Character#toTitleCase(int)
8982      * @see     Character#getType(int)
8983      * @since   1.5
8984      */
8985     public static boolean isTitleCase(int codePoint) {
8986         return getType(codePoint) == Character.TITLECASE_LETTER;
8987     }
8988 
8989     /**
8990      * Determines if the specified character is a digit.
8991      * <p>
8992      * A character is a digit if its general category type, provided
8993      * by {@code Character.getType(ch)}, is
8994      * {@code DECIMAL_DIGIT_NUMBER}.
8995      * <p>
8996      * Some Unicode character ranges that contain digits:
8997      * <ul>
8998      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
8999      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9000      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9001      *     Arabic-Indic digits
9002      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9003      *     Extended Arabic-Indic digits
9004      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9005      *     Devanagari digits
9006      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9007      *     Fullwidth digits
9008      * </ul>
9009      *
9010      * Many other character ranges contain digits as well.
9011      *
9012      * <p><b>Note:</b> This method cannot handle <a
9013      * href="#supplementary"> supplementary characters</a>. To support
9014      * all Unicode characters, including supplementary characters, use
9015      * the {@link #isDigit(int)} method.
9016      *
9017      * @param   ch   the character to be tested.
9018      * @return  {@code true} if the character is a digit;
9019      *          {@code false} otherwise.
9020      * @see     Character#digit(char, int)
9021      * @see     Character#forDigit(int, int)
9022      * @see     Character#getType(char)
9023      */
9024     public static boolean isDigit(char ch) {
9025         return isDigit((int)ch);
9026     }
9027 
9028     /**
9029      * Determines if the specified character (Unicode code point) is a digit.
9030      * <p>
9031      * A character is a digit if its general category type, provided
9032      * by {@link Character#getType(int) getType(codePoint)}, is
9033      * {@code DECIMAL_DIGIT_NUMBER}.
9034      * <p>
9035      * Some Unicode character ranges that contain digits:
9036      * <ul>
9037      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9038      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9039      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9040      *     Arabic-Indic digits
9041      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9042      *     Extended Arabic-Indic digits
9043      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9044      *     Devanagari digits
9045      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9046      *     Fullwidth digits
9047      * </ul>
9048      *
9049      * Many other character ranges contain digits as well.
9050      *
9051      * @param   codePoint the character (Unicode code point) to be tested.
9052      * @return  {@code true} if the character is a digit;
9053      *          {@code false} otherwise.
9054      * @see     Character#forDigit(int, int)
9055      * @see     Character#getType(int)
9056      * @since   1.5
9057      */
9058     public static boolean isDigit(int codePoint) {
9059         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
9060     }
9061 
9062     /**
9063      * Determines if a character is defined in Unicode.
9064      * <p>
9065      * A character is defined if at least one of the following is true:
9066      * <ul>
9067      * <li>It has an entry in the UnicodeData file.
9068      * <li>It has a value in a range defined by the UnicodeData file.
9069      * </ul>
9070      *
9071      * <p><b>Note:</b> This method cannot handle <a
9072      * href="#supplementary"> supplementary characters</a>. To support
9073      * all Unicode characters, including supplementary characters, use
9074      * the {@link #isDefined(int)} method.
9075      *
9076      * @param   ch   the character to be tested
9077      * @return  {@code true} if the character has a defined meaning
9078      *          in Unicode; {@code false} otherwise.
9079      * @see     Character#isDigit(char)
9080      * @see     Character#isLetter(char)
9081      * @see     Character#isLetterOrDigit(char)
9082      * @see     Character#isLowerCase(char)
9083      * @see     Character#isTitleCase(char)
9084      * @see     Character#isUpperCase(char)
9085      * @since   1.0.2
9086      */
9087     public static boolean isDefined(char ch) {
9088         return isDefined((int)ch);
9089     }
9090 
9091     /**
9092      * Determines if a character (Unicode code point) is defined in Unicode.
9093      * <p>
9094      * A character is defined if at least one of the following is true:
9095      * <ul>
9096      * <li>It has an entry in the UnicodeData file.
9097      * <li>It has a value in a range defined by the UnicodeData file.
9098      * </ul>
9099      *
9100      * @param   codePoint the character (Unicode code point) to be tested.
9101      * @return  {@code true} if the character has a defined meaning
9102      *          in Unicode; {@code false} otherwise.
9103      * @see     Character#isDigit(int)
9104      * @see     Character#isLetter(int)
9105      * @see     Character#isLetterOrDigit(int)
9106      * @see     Character#isLowerCase(int)
9107      * @see     Character#isTitleCase(int)
9108      * @see     Character#isUpperCase(int)
9109      * @since   1.5
9110      */
9111     public static boolean isDefined(int codePoint) {
9112         return getType(codePoint) != Character.UNASSIGNED;
9113     }
9114 
9115     /**
9116      * Determines if the specified character is a letter.
9117      * <p>
9118      * A character is considered to be a letter if its general
9119      * category type, provided by {@code Character.getType(ch)},
9120      * is any of the following:
9121      * <ul>
9122      * <li> {@code UPPERCASE_LETTER}
9123      * <li> {@code LOWERCASE_LETTER}
9124      * <li> {@code TITLECASE_LETTER}
9125      * <li> {@code MODIFIER_LETTER}
9126      * <li> {@code OTHER_LETTER}
9127      * </ul>
9128      *
9129      * Not all letters have case. Many characters are
9130      * letters but are neither uppercase nor lowercase nor titlecase.
9131      *
9132      * <p><b>Note:</b> This method cannot handle <a
9133      * href="#supplementary"> supplementary characters</a>. To support
9134      * all Unicode characters, including supplementary characters, use
9135      * the {@link #isLetter(int)} method.
9136      *
9137      * @param   ch   the character to be tested.
9138      * @return  {@code true} if the character is a letter;
9139      *          {@code false} otherwise.
9140      * @see     Character#isDigit(char)
9141      * @see     Character#isJavaIdentifierStart(char)
9142      * @see     Character#isJavaLetter(char)
9143      * @see     Character#isJavaLetterOrDigit(char)
9144      * @see     Character#isLetterOrDigit(char)
9145      * @see     Character#isLowerCase(char)
9146      * @see     Character#isTitleCase(char)
9147      * @see     Character#isUnicodeIdentifierStart(char)
9148      * @see     Character#isUpperCase(char)
9149      */
9150     public static boolean isLetter(char ch) {
9151         return isLetter((int)ch);
9152     }
9153 
9154     /**
9155      * Determines if the specified character (Unicode code point) is a letter.
9156      * <p>
9157      * A character is considered to be a letter if its general
9158      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9159      * is any of the following:
9160      * <ul>
9161      * <li> {@code UPPERCASE_LETTER}
9162      * <li> {@code LOWERCASE_LETTER}
9163      * <li> {@code TITLECASE_LETTER}
9164      * <li> {@code MODIFIER_LETTER}
9165      * <li> {@code OTHER_LETTER}
9166      * </ul>
9167      *
9168      * Not all letters have case. Many characters are
9169      * letters but are neither uppercase nor lowercase nor titlecase.
9170      *
9171      * @param   codePoint the character (Unicode code point) to be tested.
9172      * @return  {@code true} if the character is a letter;
9173      *          {@code false} otherwise.
9174      * @see     Character#isDigit(int)
9175      * @see     Character#isJavaIdentifierStart(int)
9176      * @see     Character#isLetterOrDigit(int)
9177      * @see     Character#isLowerCase(int)
9178      * @see     Character#isTitleCase(int)
9179      * @see     Character#isUnicodeIdentifierStart(int)
9180      * @see     Character#isUpperCase(int)
9181      * @since   1.5
9182      */
9183     public static boolean isLetter(int codePoint) {
9184         return ((((1 << Character.UPPERCASE_LETTER) |
9185             (1 << Character.LOWERCASE_LETTER) |
9186             (1 << Character.TITLECASE_LETTER) |
9187             (1 << Character.MODIFIER_LETTER) |
9188             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9189             != 0;
9190     }
9191 
9192     /**
9193      * Determines if the specified character is a letter or digit.
9194      * <p>
9195      * A character is considered to be a letter or digit if either
9196      * {@code Character.isLetter(char ch)} or
9197      * {@code Character.isDigit(char ch)} returns
9198      * {@code true} for the character.
9199      *
9200      * <p><b>Note:</b> This method cannot handle <a
9201      * href="#supplementary"> supplementary characters</a>. To support
9202      * all Unicode characters, including supplementary characters, use
9203      * the {@link #isLetterOrDigit(int)} method.
9204      *
9205      * @param   ch   the character to be tested.
9206      * @return  {@code true} if the character is a letter or digit;
9207      *          {@code false} otherwise.
9208      * @see     Character#isDigit(char)
9209      * @see     Character#isJavaIdentifierPart(char)
9210      * @see     Character#isJavaLetter(char)
9211      * @see     Character#isJavaLetterOrDigit(char)
9212      * @see     Character#isLetter(char)
9213      * @see     Character#isUnicodeIdentifierPart(char)
9214      * @since   1.0.2
9215      */
9216     public static boolean isLetterOrDigit(char ch) {
9217         return isLetterOrDigit((int)ch);
9218     }
9219 
9220     /**
9221      * Determines if the specified character (Unicode code point) is a letter or digit.
9222      * <p>
9223      * A character is considered to be a letter or digit if either
9224      * {@link #isLetter(int) isLetter(codePoint)} or
9225      * {@link #isDigit(int) isDigit(codePoint)} returns
9226      * {@code true} for the character.
9227      *
9228      * @param   codePoint the character (Unicode code point) to be tested.
9229      * @return  {@code true} if the character is a letter or digit;
9230      *          {@code false} otherwise.
9231      * @see     Character#isDigit(int)
9232      * @see     Character#isJavaIdentifierPart(int)
9233      * @see     Character#isLetter(int)
9234      * @see     Character#isUnicodeIdentifierPart(int)
9235      * @since   1.5
9236      */
9237     public static boolean isLetterOrDigit(int codePoint) {
9238         return ((((1 << Character.UPPERCASE_LETTER) |
9239             (1 << Character.LOWERCASE_LETTER) |
9240             (1 << Character.TITLECASE_LETTER) |
9241             (1 << Character.MODIFIER_LETTER) |
9242             (1 << Character.OTHER_LETTER) |
9243             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9244             != 0;
9245     }
9246 
9247     /**
9248      * Determines if the specified character is permissible as the first
9249      * character in a Java identifier.
9250      * <p>
9251      * A character may start a Java identifier if and only if
9252      * one of the following is true:
9253      * <ul>
9254      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9255      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9256      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9257      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9258      * </ul>
9259      *
9260      * @param   ch the character to be tested.
9261      * @return  {@code true} if the character may start a Java
9262      *          identifier; {@code false} otherwise.
9263      * @see     Character#isJavaLetterOrDigit(char)
9264      * @see     Character#isJavaIdentifierStart(char)
9265      * @see     Character#isJavaIdentifierPart(char)
9266      * @see     Character#isLetter(char)
9267      * @see     Character#isLetterOrDigit(char)
9268      * @see     Character#isUnicodeIdentifierStart(char)
9269      * @since   1.0.2
9270      * @deprecated Replaced by isJavaIdentifierStart(char).
9271      */
9272     @Deprecated(since="1.1")
9273     public static boolean isJavaLetter(char ch) {
9274         return isJavaIdentifierStart(ch);
9275     }
9276 
9277     /**
9278      * Determines if the specified character may be part of a Java
9279      * identifier as other than the first character.
9280      * <p>
9281      * A character may be part of a Java identifier if and only if any
9282      * of the following are true:
9283      * <ul>
9284      * <li>  it is a letter
9285      * <li>  it is a currency symbol (such as {@code '$'})
9286      * <li>  it is a connecting punctuation character (such as {@code '_'})
9287      * <li>  it is a digit
9288      * <li>  it is a numeric letter (such as a Roman numeral character)
9289      * <li>  it is a combining mark
9290      * <li>  it is a non-spacing mark
9291      * <li> {@code isIdentifierIgnorable} returns
9292      * {@code true} for the character.
9293      * </ul>
9294      *
9295      * @param   ch the character to be tested.
9296      * @return  {@code true} if the character may be part of a
9297      *          Java identifier; {@code false} otherwise.
9298      * @see     Character#isJavaLetter(char)
9299      * @see     Character#isJavaIdentifierStart(char)
9300      * @see     Character#isJavaIdentifierPart(char)
9301      * @see     Character#isLetter(char)
9302      * @see     Character#isLetterOrDigit(char)
9303      * @see     Character#isUnicodeIdentifierPart(char)
9304      * @see     Character#isIdentifierIgnorable(char)
9305      * @since   1.0.2
9306      * @deprecated Replaced by isJavaIdentifierPart(char).
9307      */
9308     @Deprecated(since="1.1")
9309     public static boolean isJavaLetterOrDigit(char ch) {
9310         return isJavaIdentifierPart(ch);
9311     }
9312 
9313     /**
9314      * Determines if the specified character (Unicode code point) is an alphabet.
9315      * <p>
9316      * A character is considered to be alphabetic if its general category type,
9317      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9318      * the following:
9319      * <ul>
9320      * <li> {@code UPPERCASE_LETTER}
9321      * <li> {@code LOWERCASE_LETTER}
9322      * <li> {@code TITLECASE_LETTER}
9323      * <li> {@code MODIFIER_LETTER}
9324      * <li> {@code OTHER_LETTER}
9325      * <li> {@code LETTER_NUMBER}
9326      * </ul>
9327      * or it has contributory property Other_Alphabetic as defined by the
9328      * Unicode Standard.
9329      *
9330      * @param   codePoint the character (Unicode code point) to be tested.
9331      * @return  {@code true} if the character is a Unicode alphabet
9332      *          character, {@code false} otherwise.
9333      * @since   1.7
9334      */
9335     public static boolean isAlphabetic(int codePoint) {
9336         return (((((1 << Character.UPPERCASE_LETTER) |
9337             (1 << Character.LOWERCASE_LETTER) |
9338             (1 << Character.TITLECASE_LETTER) |
9339             (1 << Character.MODIFIER_LETTER) |
9340             (1 << Character.OTHER_LETTER) |
9341             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9342             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9343     }
9344 
9345     /**
9346      * Determines if the specified character (Unicode code point) is a CJKV
9347      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9348      * the Unicode Standard.
9349      *
9350      * @param   codePoint the character (Unicode code point) to be tested.
9351      * @return  {@code true} if the character is a Unicode ideograph
9352      *          character, {@code false} otherwise.
9353      * @since   1.7
9354      */
9355     public static boolean isIdeographic(int codePoint) {
9356         return CharacterData.of(codePoint).isIdeographic(codePoint);
9357     }
9358 
9359     /**
9360      * Determines if the specified character is
9361      * permissible as the first character in a Java identifier.
9362      * <p>
9363      * A character may start a Java identifier if and only if
9364      * one of the following conditions is true:
9365      * <ul>
9366      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9367      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9368      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9369      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9370      * </ul>
9371      *
9372      * <p><b>Note:</b> This method cannot handle <a
9373      * href="#supplementary"> supplementary characters</a>. To support
9374      * all Unicode characters, including supplementary characters, use
9375      * the {@link #isJavaIdentifierStart(int)} method.
9376      *
9377      * @param   ch the character to be tested.
9378      * @return  {@code true} if the character may start a Java identifier;
9379      *          {@code false} otherwise.
9380      * @see     Character#isJavaIdentifierPart(char)
9381      * @see     Character#isLetter(char)
9382      * @see     Character#isUnicodeIdentifierStart(char)
9383      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9384      * @since   1.1
9385      */
9386     public static boolean isJavaIdentifierStart(char ch) {
9387         return isJavaIdentifierStart((int)ch);
9388     }
9389 
9390     /**
9391      * Determines if the character (Unicode code point) is
9392      * permissible as the first character in a Java identifier.
9393      * <p>
9394      * A character may start a Java identifier if and only if
9395      * one of the following conditions is true:
9396      * <ul>
9397      * <li> {@link #isLetter(int) isLetter(codePoint)}
9398      *      returns {@code true}
9399      * <li> {@link #getType(int) getType(codePoint)}
9400      *      returns {@code LETTER_NUMBER}
9401      * <li> the referenced character is a currency symbol (such as {@code '$'})
9402      * <li> the referenced character is a connecting punctuation character
9403      *      (such as {@code '_'}).
9404      * </ul>
9405      *
9406      * @param   codePoint the character (Unicode code point) to be tested.
9407      * @return  {@code true} if the character may start a Java identifier;
9408      *          {@code false} otherwise.
9409      * @see     Character#isJavaIdentifierPart(int)
9410      * @see     Character#isLetter(int)
9411      * @see     Character#isUnicodeIdentifierStart(int)
9412      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9413      * @since   1.5
9414      */
9415     public static boolean isJavaIdentifierStart(int codePoint) {
9416         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9417     }
9418 
9419     /**
9420      * Determines if the specified character may be part of a Java
9421      * identifier as other than the first character.
9422      * <p>
9423      * A character may be part of a Java identifier if any of the following
9424      * are true:
9425      * <ul>
9426      * <li>  it is a letter
9427      * <li>  it is a currency symbol (such as {@code '$'})
9428      * <li>  it is a connecting punctuation character (such as {@code '_'})
9429      * <li>  it is a digit
9430      * <li>  it is a numeric letter (such as a Roman numeral character)
9431      * <li>  it is a combining mark
9432      * <li>  it is a non-spacing mark
9433      * <li> {@code isIdentifierIgnorable} returns
9434      * {@code true} for the character
9435      * </ul>
9436      *
9437      * <p><b>Note:</b> This method cannot handle <a
9438      * href="#supplementary"> supplementary characters</a>. To support
9439      * all Unicode characters, including supplementary characters, use
9440      * the {@link #isJavaIdentifierPart(int)} method.
9441      *
9442      * @param   ch      the character to be tested.
9443      * @return {@code true} if the character may be part of a
9444      *          Java identifier; {@code false} otherwise.
9445      * @see     Character#isIdentifierIgnorable(char)
9446      * @see     Character#isJavaIdentifierStart(char)
9447      * @see     Character#isLetterOrDigit(char)
9448      * @see     Character#isUnicodeIdentifierPart(char)
9449      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9450      * @since   1.1
9451      */
9452     public static boolean isJavaIdentifierPart(char ch) {
9453         return isJavaIdentifierPart((int)ch);
9454     }
9455 
9456     /**
9457      * Determines if the character (Unicode code point) may be part of a Java
9458      * identifier as other than the first character.
9459      * <p>
9460      * A character may be part of a Java identifier if any of the following
9461      * are true:
9462      * <ul>
9463      * <li>  it is a letter
9464      * <li>  it is a currency symbol (such as {@code '$'})
9465      * <li>  it is a connecting punctuation character (such as {@code '_'})
9466      * <li>  it is a digit
9467      * <li>  it is a numeric letter (such as a Roman numeral character)
9468      * <li>  it is a combining mark
9469      * <li>  it is a non-spacing mark
9470      * <li> {@link #isIdentifierIgnorable(int)
9471      * isIdentifierIgnorable(codePoint)} returns {@code true} for
9472      * the character
9473      * </ul>
9474      *
9475      * @param   codePoint the character (Unicode code point) to be tested.
9476      * @return {@code true} if the character may be part of a
9477      *          Java identifier; {@code false} otherwise.
9478      * @see     Character#isIdentifierIgnorable(int)
9479      * @see     Character#isJavaIdentifierStart(int)
9480      * @see     Character#isLetterOrDigit(int)
9481      * @see     Character#isUnicodeIdentifierPart(int)
9482      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9483      * @since   1.5
9484      */
9485     public static boolean isJavaIdentifierPart(int codePoint) {
9486         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9487     }
9488 
9489     /**
9490      * Determines if the specified character is permissible as the
9491      * first character in a Unicode identifier.
9492      * <p>
9493      * A character may start a Unicode identifier if and only if
9494      * one of the following conditions is true:
9495      * <ul>
9496      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9497      * <li> {@link #getType(char) getType(ch)} returns
9498      *      {@code LETTER_NUMBER}.
9499      * </ul>
9500      *
9501      * <p><b>Note:</b> This method cannot handle <a
9502      * href="#supplementary"> supplementary characters</a>. To support
9503      * all Unicode characters, including supplementary characters, use
9504      * the {@link #isUnicodeIdentifierStart(int)} method.
9505      *
9506      * @param   ch      the character to be tested.
9507      * @return  {@code true} if the character may start a Unicode
9508      *          identifier; {@code false} otherwise.
9509      * @see     Character#isJavaIdentifierStart(char)
9510      * @see     Character#isLetter(char)
9511      * @see     Character#isUnicodeIdentifierPart(char)
9512      * @since   1.1
9513      */
9514     public static boolean isUnicodeIdentifierStart(char ch) {
9515         return isUnicodeIdentifierStart((int)ch);
9516     }
9517 
9518     /**
9519      * Determines if the specified character (Unicode code point) is permissible as the
9520      * first character in a Unicode identifier.
9521      * <p>
9522      * A character may start a Unicode identifier if and only if
9523      * one of the following conditions is true:
9524      * <ul>
9525      * <li> {@link #isLetter(int) isLetter(codePoint)}
9526      *      returns {@code true}
9527      * <li> {@link #getType(int) getType(codePoint)}
9528      *      returns {@code LETTER_NUMBER}.
9529      * </ul>
9530      * @param   codePoint the character (Unicode code point) to be tested.
9531      * @return  {@code true} if the character may start a Unicode
9532      *          identifier; {@code false} otherwise.
9533      * @see     Character#isJavaIdentifierStart(int)
9534      * @see     Character#isLetter(int)
9535      * @see     Character#isUnicodeIdentifierPart(int)
9536      * @since   1.5
9537      */
9538     public static boolean isUnicodeIdentifierStart(int codePoint) {
9539         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9540     }
9541 
9542     /**
9543      * Determines if the specified character may be part of a Unicode
9544      * identifier as other than the first character.
9545      * <p>
9546      * A character may be part of a Unicode identifier if and only if
9547      * one of the following statements is true:
9548      * <ul>
9549      * <li>  it is a letter
9550      * <li>  it is a connecting punctuation character (such as {@code '_'})
9551      * <li>  it is a digit
9552      * <li>  it is a numeric letter (such as a Roman numeral character)
9553      * <li>  it is a combining mark
9554      * <li>  it is a non-spacing mark
9555      * <li> {@code isIdentifierIgnorable} returns
9556      * {@code true} for this character.
9557      * </ul>
9558      *
9559      * <p><b>Note:</b> This method cannot handle <a
9560      * href="#supplementary"> supplementary characters</a>. To support
9561      * all Unicode characters, including supplementary characters, use
9562      * the {@link #isUnicodeIdentifierPart(int)} method.
9563      *
9564      * @param   ch      the character to be tested.
9565      * @return  {@code true} if the character may be part of a
9566      *          Unicode identifier; {@code false} otherwise.
9567      * @see     Character#isIdentifierIgnorable(char)
9568      * @see     Character#isJavaIdentifierPart(char)
9569      * @see     Character#isLetterOrDigit(char)
9570      * @see     Character#isUnicodeIdentifierStart(char)
9571      * @since   1.1
9572      */
9573     public static boolean isUnicodeIdentifierPart(char ch) {
9574         return isUnicodeIdentifierPart((int)ch);
9575     }
9576 
9577     /**
9578      * Determines if the specified character (Unicode code point) may be part of a Unicode
9579      * identifier as other than the first character.
9580      * <p>
9581      * A character may be part of a Unicode identifier if and only if
9582      * one of the following statements is true:
9583      * <ul>
9584      * <li>  it is a letter
9585      * <li>  it is a connecting punctuation character (such as {@code '_'})
9586      * <li>  it is a digit
9587      * <li>  it is a numeric letter (such as a Roman numeral character)
9588      * <li>  it is a combining mark
9589      * <li>  it is a non-spacing mark
9590      * <li> {@code isIdentifierIgnorable} returns
9591      * {@code true} for this character.
9592      * </ul>
9593      * @param   codePoint the character (Unicode code point) to be tested.
9594      * @return  {@code true} if the character may be part of a
9595      *          Unicode identifier; {@code false} otherwise.
9596      * @see     Character#isIdentifierIgnorable(int)
9597      * @see     Character#isJavaIdentifierPart(int)
9598      * @see     Character#isLetterOrDigit(int)
9599      * @see     Character#isUnicodeIdentifierStart(int)
9600      * @since   1.5
9601      */
9602     public static boolean isUnicodeIdentifierPart(int codePoint) {
9603         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
9604     }
9605 
9606     /**
9607      * Determines if the specified character should be regarded as
9608      * an ignorable character in a Java identifier or a Unicode identifier.
9609      * <p>
9610      * The following Unicode characters are ignorable in a Java identifier
9611      * or a Unicode identifier:
9612      * <ul>
9613      * <li>ISO control characters that are not whitespace
9614      * <ul>
9615      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9616      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9617      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9618      * </ul>
9619      *
9620      * <li>all characters that have the {@code FORMAT} general
9621      * category value
9622      * </ul>
9623      *
9624      * <p><b>Note:</b> This method cannot handle <a
9625      * href="#supplementary"> supplementary characters</a>. To support
9626      * all Unicode characters, including supplementary characters, use
9627      * the {@link #isIdentifierIgnorable(int)} method.
9628      *
9629      * @param   ch      the character to be tested.
9630      * @return  {@code true} if the character is an ignorable control
9631      *          character that may be part of a Java or Unicode identifier;
9632      *           {@code false} otherwise.
9633      * @see     Character#isJavaIdentifierPart(char)
9634      * @see     Character#isUnicodeIdentifierPart(char)
9635      * @since   1.1
9636      */
9637     public static boolean isIdentifierIgnorable(char ch) {
9638         return isIdentifierIgnorable((int)ch);
9639     }
9640 
9641     /**
9642      * Determines if the specified character (Unicode code point) should be regarded as
9643      * an ignorable character in a Java identifier or a Unicode identifier.
9644      * <p>
9645      * The following Unicode characters are ignorable in a Java identifier
9646      * or a Unicode identifier:
9647      * <ul>
9648      * <li>ISO control characters that are not whitespace
9649      * <ul>
9650      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9651      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9652      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9653      * </ul>
9654      *
9655      * <li>all characters that have the {@code FORMAT} general
9656      * category value
9657      * </ul>
9658      *
9659      * @param   codePoint the character (Unicode code point) to be tested.
9660      * @return  {@code true} if the character is an ignorable control
9661      *          character that may be part of a Java or Unicode identifier;
9662      *          {@code false} otherwise.
9663      * @see     Character#isJavaIdentifierPart(int)
9664      * @see     Character#isUnicodeIdentifierPart(int)
9665      * @since   1.5
9666      */
9667     public static boolean isIdentifierIgnorable(int codePoint) {
9668         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
9669     }
9670 
9671     /**
9672      * Converts the character argument to lowercase using case
9673      * mapping information from the UnicodeData file.
9674      * <p>
9675      * Note that
9676      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
9677      * does not always return {@code true} for some ranges of
9678      * characters, particularly those that are symbols or ideographs.
9679      *
9680      * <p>In general, {@link String#toLowerCase()} should be used to map
9681      * characters to lowercase. {@code String} case mapping methods
9682      * have several benefits over {@code Character} case mapping methods.
9683      * {@code String} case mapping methods can perform locale-sensitive
9684      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9685      * the {@code Character} case mapping methods cannot.
9686      *
9687      * <p><b>Note:</b> This method cannot handle <a
9688      * href="#supplementary"> supplementary characters</a>. To support
9689      * all Unicode characters, including supplementary characters, use
9690      * the {@link #toLowerCase(int)} method.
9691      *
9692      * @param   ch   the character to be converted.
9693      * @return  the lowercase equivalent of the character, if any;
9694      *          otherwise, the character itself.
9695      * @see     Character#isLowerCase(char)
9696      * @see     String#toLowerCase()
9697      */
9698     public static char toLowerCase(char ch) {
9699         return (char)toLowerCase((int)ch);
9700     }
9701 
9702     /**
9703      * Converts the character (Unicode code point) argument to
9704      * lowercase using case mapping information from the UnicodeData
9705      * file.
9706      *
9707      * <p> Note that
9708      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9709      * does not always return {@code true} for some ranges of
9710      * characters, particularly those that are symbols or ideographs.
9711      *
9712      * <p>In general, {@link String#toLowerCase()} should be used to map
9713      * characters to lowercase. {@code String} case mapping methods
9714      * have several benefits over {@code Character} case mapping methods.
9715      * {@code String} case mapping methods can perform locale-sensitive
9716      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9717      * the {@code Character} case mapping methods cannot.
9718      *
9719      * @param   codePoint   the character (Unicode code point) to be converted.
9720      * @return  the lowercase equivalent of the character (Unicode code
9721      *          point), if any; otherwise, the character itself.
9722      * @see     Character#isLowerCase(int)
9723      * @see     String#toLowerCase()
9724      *
9725      * @since   1.5
9726      */
9727     public static int toLowerCase(int codePoint) {
9728         return CharacterData.of(codePoint).toLowerCase(codePoint);
9729     }
9730 
9731     /**
9732      * Converts the character argument to uppercase using case mapping
9733      * information from the UnicodeData file.
9734      * <p>
9735      * Note that
9736      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9737      * does not always return {@code true} for some ranges of
9738      * characters, particularly those that are symbols or ideographs.
9739      *
9740      * <p>In general, {@link String#toUpperCase()} should be used to map
9741      * characters to uppercase. {@code String} case mapping methods
9742      * have several benefits over {@code Character} case mapping methods.
9743      * {@code String} case mapping methods can perform locale-sensitive
9744      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9745      * the {@code Character} case mapping methods cannot.
9746      *
9747      * <p><b>Note:</b> This method cannot handle <a
9748      * href="#supplementary"> supplementary characters</a>. To support
9749      * all Unicode characters, including supplementary characters, use
9750      * the {@link #toUpperCase(int)} method.
9751      *
9752      * @param   ch   the character to be converted.
9753      * @return  the uppercase equivalent of the character, if any;
9754      *          otherwise, the character itself.
9755      * @see     Character#isUpperCase(char)
9756      * @see     String#toUpperCase()
9757      */
9758     public static char toUpperCase(char ch) {
9759         return (char)toUpperCase((int)ch);
9760     }
9761 
9762     /**
9763      * Converts the character (Unicode code point) argument to
9764      * uppercase using case mapping information from the UnicodeData
9765      * file.
9766      *
9767      * <p>Note that
9768      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
9769      * does not always return {@code true} for some ranges of
9770      * characters, particularly those that are symbols or ideographs.
9771      *
9772      * <p>In general, {@link String#toUpperCase()} should be used to map
9773      * characters to uppercase. {@code String} case mapping methods
9774      * have several benefits over {@code Character} case mapping methods.
9775      * {@code String} case mapping methods can perform locale-sensitive
9776      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9777      * the {@code Character} case mapping methods cannot.
9778      *
9779      * @param   codePoint   the character (Unicode code point) to be converted.
9780      * @return  the uppercase equivalent of the character, if any;
9781      *          otherwise, the character itself.
9782      * @see     Character#isUpperCase(int)
9783      * @see     String#toUpperCase()
9784      *
9785      * @since   1.5
9786      */
9787     public static int toUpperCase(int codePoint) {
9788         return CharacterData.of(codePoint).toUpperCase(codePoint);
9789     }
9790 
9791     /**
9792      * Converts the character argument to titlecase using case mapping
9793      * information from the UnicodeData file. If a character has no
9794      * explicit titlecase mapping and is not itself a titlecase char
9795      * according to UnicodeData, then the uppercase mapping is
9796      * returned as an equivalent titlecase mapping. If the
9797      * {@code char} argument is already a titlecase
9798      * {@code char}, the same {@code char} value will be
9799      * returned.
9800      * <p>
9801      * Note that
9802      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
9803      * does not always return {@code true} for some ranges of
9804      * characters.
9805      *
9806      * <p><b>Note:</b> This method cannot handle <a
9807      * href="#supplementary"> supplementary characters</a>. To support
9808      * all Unicode characters, including supplementary characters, use
9809      * the {@link #toTitleCase(int)} method.
9810      *
9811      * @param   ch   the character to be converted.
9812      * @return  the titlecase equivalent of the character, if any;
9813      *          otherwise, the character itself.
9814      * @see     Character#isTitleCase(char)
9815      * @see     Character#toLowerCase(char)
9816      * @see     Character#toUpperCase(char)
9817      * @since   1.0.2
9818      */
9819     public static char toTitleCase(char ch) {
9820         return (char)toTitleCase((int)ch);
9821     }
9822 
9823     /**
9824      * Converts the character (Unicode code point) argument to titlecase using case mapping
9825      * information from the UnicodeData file. If a character has no
9826      * explicit titlecase mapping and is not itself a titlecase char
9827      * according to UnicodeData, then the uppercase mapping is
9828      * returned as an equivalent titlecase mapping. If the
9829      * character argument is already a titlecase
9830      * character, the same character value will be
9831      * returned.
9832      *
9833      * <p>Note that
9834      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
9835      * does not always return {@code true} for some ranges of
9836      * characters.
9837      *
9838      * @param   codePoint   the character (Unicode code point) to be converted.
9839      * @return  the titlecase equivalent of the character, if any;
9840      *          otherwise, the character itself.
9841      * @see     Character#isTitleCase(int)
9842      * @see     Character#toLowerCase(int)
9843      * @see     Character#toUpperCase(int)
9844      * @since   1.5
9845      */
9846     public static int toTitleCase(int codePoint) {
9847         return CharacterData.of(codePoint).toTitleCase(codePoint);
9848     }
9849 
9850     /**
9851      * Returns the numeric value of the character {@code ch} in the
9852      * specified radix.
9853      * <p>
9854      * If the radix is not in the range {@code MIN_RADIX} &le;
9855      * {@code radix} &le; {@code MAX_RADIX} or if the
9856      * value of {@code ch} is not a valid digit in the specified
9857      * radix, {@code -1} is returned. A character is a valid digit
9858      * if at least one of the following is true:
9859      * <ul>
9860      * <li>The method {@code isDigit} is {@code true} of the character
9861      *     and the Unicode decimal digit value of the character (or its
9862      *     single-character decomposition) is less than the specified radix.
9863      *     In this case the decimal digit value is returned.
9864      * <li>The character is one of the uppercase Latin letters
9865      *     {@code 'A'} through {@code 'Z'} and its code is less than
9866      *     {@code radix + 'A' - 10}.
9867      *     In this case, {@code ch - 'A' + 10}
9868      *     is returned.
9869      * <li>The character is one of the lowercase Latin letters
9870      *     {@code 'a'} through {@code 'z'} and its code is less than
9871      *     {@code radix + 'a' - 10}.
9872      *     In this case, {@code ch - 'a' + 10}
9873      *     is returned.
9874      * <li>The character is one of the fullwidth uppercase Latin letters A
9875      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9876      *     and its code is less than
9877      *     {@code radix + '\u005CuFF21' - 10}.
9878      *     In this case, {@code ch - '\u005CuFF21' + 10}
9879      *     is returned.
9880      * <li>The character is one of the fullwidth lowercase Latin letters a
9881      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9882      *     and its code is less than
9883      *     {@code radix + '\u005CuFF41' - 10}.
9884      *     In this case, {@code ch - '\u005CuFF41' + 10}
9885      *     is returned.
9886      * </ul>
9887      *
9888      * <p><b>Note:</b> This method cannot handle <a
9889      * href="#supplementary"> supplementary characters</a>. To support
9890      * all Unicode characters, including supplementary characters, use
9891      * the {@link #digit(int, int)} method.
9892      *
9893      * @param   ch      the character to be converted.
9894      * @param   radix   the radix.
9895      * @return  the numeric value represented by the character in the
9896      *          specified radix.
9897      * @see     Character#forDigit(int, int)
9898      * @see     Character#isDigit(char)
9899      */
9900     public static int digit(char ch, int radix) {
9901         return digit((int)ch, radix);
9902     }
9903 
9904     /**
9905      * Returns the numeric value of the specified character (Unicode
9906      * code point) in the specified radix.
9907      *
9908      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
9909      * {@code radix} &le; {@code MAX_RADIX} or if the
9910      * character is not a valid digit in the specified
9911      * radix, {@code -1} is returned. A character is a valid digit
9912      * if at least one of the following is true:
9913      * <ul>
9914      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
9915      *     and the Unicode decimal digit value of the character (or its
9916      *     single-character decomposition) is less than the specified radix.
9917      *     In this case the decimal digit value is returned.
9918      * <li>The character is one of the uppercase Latin letters
9919      *     {@code 'A'} through {@code 'Z'} and its code is less than
9920      *     {@code radix + 'A' - 10}.
9921      *     In this case, {@code codePoint - 'A' + 10}
9922      *     is returned.
9923      * <li>The character is one of the lowercase Latin letters
9924      *     {@code 'a'} through {@code 'z'} and its code is less than
9925      *     {@code radix + 'a' - 10}.
9926      *     In this case, {@code codePoint - 'a' + 10}
9927      *     is returned.
9928      * <li>The character is one of the fullwidth uppercase Latin letters A
9929      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9930      *     and its code is less than
9931      *     {@code radix + '\u005CuFF21' - 10}.
9932      *     In this case,
9933      *     {@code codePoint - '\u005CuFF21' + 10}
9934      *     is returned.
9935      * <li>The character is one of the fullwidth lowercase Latin letters a
9936      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9937      *     and its code is less than
9938      *     {@code radix + '\u005CuFF41'- 10}.
9939      *     In this case,
9940      *     {@code codePoint - '\u005CuFF41' + 10}
9941      *     is returned.
9942      * </ul>
9943      *
9944      * @param   codePoint the character (Unicode code point) to be converted.
9945      * @param   radix   the radix.
9946      * @return  the numeric value represented by the character in the
9947      *          specified radix.
9948      * @see     Character#forDigit(int, int)
9949      * @see     Character#isDigit(int)
9950      * @since   1.5
9951      */
9952     public static int digit(int codePoint, int radix) {
9953         return CharacterData.of(codePoint).digit(codePoint, radix);
9954     }
9955 
9956     /**
9957      * Returns the {@code int} value that the specified Unicode
9958      * character represents. For example, the character
9959      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
9960      * an int with a value of 50.
9961      * <p>
9962      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9963      * {@code '\u005Cu005A'}), lowercase
9964      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9965      * full width variant ({@code '\u005CuFF21'} through
9966      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9967      * {@code '\u005CuFF5A'}) forms have numeric values from 10
9968      * through 35. This is independent of the Unicode specification,
9969      * which does not assign numeric values to these {@code char}
9970      * values.
9971      * <p>
9972      * If the character does not have a numeric value, then -1 is returned.
9973      * If the character has a numeric value that cannot be represented as a
9974      * nonnegative integer (for example, a fractional value), then -2
9975      * is returned.
9976      *
9977      * <p><b>Note:</b> This method cannot handle <a
9978      * href="#supplementary"> supplementary characters</a>. To support
9979      * all Unicode characters, including supplementary characters, use
9980      * the {@link #getNumericValue(int)} method.
9981      *
9982      * @param   ch      the character to be converted.
9983      * @return  the numeric value of the character, as a nonnegative {@code int}
9984      *          value; -2 if the character has a numeric value but the value
9985      *          can not be represented as a nonnegative {@code int} value;
9986      *          -1 if the character has no numeric value.
9987      * @see     Character#forDigit(int, int)
9988      * @see     Character#isDigit(char)
9989      * @since   1.1
9990      */
9991     public static int getNumericValue(char ch) {
9992         return getNumericValue((int)ch);
9993     }
9994 
9995     /**
9996      * Returns the {@code int} value that the specified
9997      * character (Unicode code point) represents. For example, the character
9998      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
9999      * an {@code int} with a value of 50.
10000      * <p>
10001      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10002      * {@code '\u005Cu005A'}), lowercase
10003      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10004      * full width variant ({@code '\u005CuFF21'} through
10005      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10006      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10007      * through 35. This is independent of the Unicode specification,
10008      * which does not assign numeric values to these {@code char}
10009      * values.
10010      * <p>
10011      * If the character does not have a numeric value, then -1 is returned.
10012      * If the character has a numeric value that cannot be represented as a
10013      * nonnegative integer (for example, a fractional value), then -2
10014      * is returned.
10015      *
10016      * @param   codePoint the character (Unicode code point) to be converted.
10017      * @return  the numeric value of the character, as a nonnegative {@code int}
10018      *          value; -2 if the character has a numeric value but the value
10019      *          can not be represented as a nonnegative {@code int} value;
10020      *          -1 if the character has no numeric value.
10021      * @see     Character#forDigit(int, int)
10022      * @see     Character#isDigit(int)
10023      * @since   1.5
10024      */
10025     public static int getNumericValue(int codePoint) {
10026         return CharacterData.of(codePoint).getNumericValue(codePoint);
10027     }
10028 
10029     /**
10030      * Determines if the specified character is ISO-LATIN-1 white space.
10031      * This method returns {@code true} for the following five
10032      * characters only:
10033      * <table class="striped">
10034      * <caption style="display:none">truechars</caption>
10035      * <thead>
10036      * <tr><th scope="col">Character
10037      *     <th scope="col">Code
10038      *     <th scope="col">Name
10039      * </thead>
10040      * <tbody>
10041      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
10042      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
10043      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
10044      *     <td>{@code NEW LINE}</td></tr>
10045      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
10046      *     <td>{@code FORM FEED}</td></tr>
10047      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
10048      *     <td>{@code CARRIAGE RETURN}</td></tr>
10049      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
10050      *     <td>{@code SPACE}</td></tr>
10051      * </tbody>
10052      * </table>
10053      *
10054      * @param      ch   the character to be tested.
10055      * @return     {@code true} if the character is ISO-LATIN-1 white
10056      *             space; {@code false} otherwise.
10057      * @see        Character#isSpaceChar(char)
10058      * @see        Character#isWhitespace(char)
10059      * @deprecated Replaced by isWhitespace(char).
10060      */
10061     @Deprecated(since="1.1")
10062     public static boolean isSpace(char ch) {
10063         return (ch <= 0x0020) &&
10064             (((((1L << 0x0009) |
10065             (1L << 0x000A) |
10066             (1L << 0x000C) |
10067             (1L << 0x000D) |
10068             (1L << 0x0020)) >> ch) & 1L) != 0);
10069     }
10070 
10071 
10072     /**
10073      * Determines if the specified character is a Unicode space character.
10074      * A character is considered to be a space character if and only if
10075      * it is specified to be a space character by the Unicode Standard. This
10076      * method returns true if the character's general category type is any of
10077      * the following:
10078      * <ul>
10079      * <li> {@code SPACE_SEPARATOR}
10080      * <li> {@code LINE_SEPARATOR}
10081      * <li> {@code PARAGRAPH_SEPARATOR}
10082      * </ul>
10083      *
10084      * <p><b>Note:</b> This method cannot handle <a
10085      * href="#supplementary"> supplementary characters</a>. To support
10086      * all Unicode characters, including supplementary characters, use
10087      * the {@link #isSpaceChar(int)} method.
10088      *
10089      * @param   ch      the character to be tested.
10090      * @return  {@code true} if the character is a space character;
10091      *          {@code false} otherwise.
10092      * @see     Character#isWhitespace(char)
10093      * @since   1.1
10094      */
10095     public static boolean isSpaceChar(char ch) {
10096         return isSpaceChar((int)ch);
10097     }
10098 
10099     /**
10100      * Determines if the specified character (Unicode code point) is a
10101      * Unicode space character.  A character is considered to be a
10102      * space character if and only if it is specified to be a space
10103      * character by the Unicode Standard. This method returns true if
10104      * the character's general category type is any of the following:
10105      *
10106      * <ul>
10107      * <li> {@link #SPACE_SEPARATOR}
10108      * <li> {@link #LINE_SEPARATOR}
10109      * <li> {@link #PARAGRAPH_SEPARATOR}
10110      * </ul>
10111      *
10112      * @param   codePoint the character (Unicode code point) to be tested.
10113      * @return  {@code true} if the character is a space character;
10114      *          {@code false} otherwise.
10115      * @see     Character#isWhitespace(int)
10116      * @since   1.5
10117      */
10118     public static boolean isSpaceChar(int codePoint) {
10119         return ((((1 << Character.SPACE_SEPARATOR) |
10120                   (1 << Character.LINE_SEPARATOR) |
10121                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10122             != 0;
10123     }
10124 
10125     /**
10126      * Determines if the specified character is white space according to Java.
10127      * A character is a Java whitespace character if and only if it satisfies
10128      * one of the following criteria:
10129      * <ul>
10130      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10131      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10132      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10133      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10134      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10135      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10136      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10137      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10138      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10139      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10140      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10141      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10142      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10143      * </ul>
10144      *
10145      * <p><b>Note:</b> This method cannot handle <a
10146      * href="#supplementary"> supplementary characters</a>. To support
10147      * all Unicode characters, including supplementary characters, use
10148      * the {@link #isWhitespace(int)} method.
10149      *
10150      * @param   ch the character to be tested.
10151      * @return  {@code true} if the character is a Java whitespace
10152      *          character; {@code false} otherwise.
10153      * @see     Character#isSpaceChar(char)
10154      * @since   1.1
10155      */
10156     public static boolean isWhitespace(char ch) {
10157         return isWhitespace((int)ch);
10158     }
10159 
10160     /**
10161      * Determines if the specified character (Unicode code point) is
10162      * white space according to Java.  A character is a Java
10163      * whitespace character if and only if it satisfies one of the
10164      * following criteria:
10165      * <ul>
10166      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10167      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10168      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10169      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10170      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10171      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10172      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10173      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10174      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10175      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10176      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10177      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10178      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10179      * </ul>
10180      *
10181      * @param   codePoint the character (Unicode code point) to be tested.
10182      * @return  {@code true} if the character is a Java whitespace
10183      *          character; {@code false} otherwise.
10184      * @see     Character#isSpaceChar(int)
10185      * @since   1.5
10186      */
10187     public static boolean isWhitespace(int codePoint) {
10188         return CharacterData.of(codePoint).isWhitespace(codePoint);
10189     }
10190 
10191     /**
10192      * Determines if the specified character is an ISO control
10193      * character.  A character is considered to be an ISO control
10194      * character if its code is in the range {@code '\u005Cu0000'}
10195      * through {@code '\u005Cu001F'} or in the range
10196      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10197      *
10198      * <p><b>Note:</b> This method cannot handle <a
10199      * href="#supplementary"> supplementary characters</a>. To support
10200      * all Unicode characters, including supplementary characters, use
10201      * the {@link #isISOControl(int)} method.
10202      *
10203      * @param   ch      the character to be tested.
10204      * @return  {@code true} if the character is an ISO control character;
10205      *          {@code false} otherwise.
10206      *
10207      * @see     Character#isSpaceChar(char)
10208      * @see     Character#isWhitespace(char)
10209      * @since   1.1
10210      */
10211     public static boolean isISOControl(char ch) {
10212         return isISOControl((int)ch);
10213     }
10214 
10215     /**
10216      * Determines if the referenced character (Unicode code point) is an ISO control
10217      * character.  A character is considered to be an ISO control
10218      * character if its code is in the range {@code '\u005Cu0000'}
10219      * through {@code '\u005Cu001F'} or in the range
10220      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10221      *
10222      * @param   codePoint the character (Unicode code point) to be tested.
10223      * @return  {@code true} if the character is an ISO control character;
10224      *          {@code false} otherwise.
10225      * @see     Character#isSpaceChar(int)
10226      * @see     Character#isWhitespace(int)
10227      * @since   1.5
10228      */
10229     public static boolean isISOControl(int codePoint) {
10230         // Optimized form of:
10231         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
10232         //     (codePoint >= 0x7F && codePoint <= 0x9F);
10233         return codePoint <= 0x9F &&
10234             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10235     }
10236 
10237     /**
10238      * Returns a value indicating a character's general category.
10239      *
10240      * <p><b>Note:</b> This method cannot handle <a
10241      * href="#supplementary"> supplementary characters</a>. To support
10242      * all Unicode characters, including supplementary characters, use
10243      * the {@link #getType(int)} method.
10244      *
10245      * @param   ch      the character to be tested.
10246      * @return  a value of type {@code int} representing the
10247      *          character's general category.
10248      * @see     Character#COMBINING_SPACING_MARK
10249      * @see     Character#CONNECTOR_PUNCTUATION
10250      * @see     Character#CONTROL
10251      * @see     Character#CURRENCY_SYMBOL
10252      * @see     Character#DASH_PUNCTUATION
10253      * @see     Character#DECIMAL_DIGIT_NUMBER
10254      * @see     Character#ENCLOSING_MARK
10255      * @see     Character#END_PUNCTUATION
10256      * @see     Character#FINAL_QUOTE_PUNCTUATION
10257      * @see     Character#FORMAT
10258      * @see     Character#INITIAL_QUOTE_PUNCTUATION
10259      * @see     Character#LETTER_NUMBER
10260      * @see     Character#LINE_SEPARATOR
10261      * @see     Character#LOWERCASE_LETTER
10262      * @see     Character#MATH_SYMBOL
10263      * @see     Character#MODIFIER_LETTER
10264      * @see     Character#MODIFIER_SYMBOL
10265      * @see     Character#NON_SPACING_MARK
10266      * @see     Character#OTHER_LETTER
10267      * @see     Character#OTHER_NUMBER
10268      * @see     Character#OTHER_PUNCTUATION
10269      * @see     Character#OTHER_SYMBOL
10270      * @see     Character#PARAGRAPH_SEPARATOR
10271      * @see     Character#PRIVATE_USE
10272      * @see     Character#SPACE_SEPARATOR
10273      * @see     Character#START_PUNCTUATION
10274      * @see     Character#SURROGATE
10275      * @see     Character#TITLECASE_LETTER
10276      * @see     Character#UNASSIGNED
10277      * @see     Character#UPPERCASE_LETTER
10278      * @since   1.1
10279      */
10280     public static int getType(char ch) {
10281         return getType((int)ch);
10282     }
10283 
10284     /**
10285      * Returns a value indicating a character's general category.
10286      *
10287      * @param   codePoint the character (Unicode code point) to be tested.
10288      * @return  a value of type {@code int} representing the
10289      *          character's general category.
10290      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10291      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10292      * @see     Character#CONTROL CONTROL
10293      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10294      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
10295      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10296      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
10297      * @see     Character#END_PUNCTUATION END_PUNCTUATION
10298      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10299      * @see     Character#FORMAT FORMAT
10300      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10301      * @see     Character#LETTER_NUMBER LETTER_NUMBER
10302      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
10303      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
10304      * @see     Character#MATH_SYMBOL MATH_SYMBOL
10305      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
10306      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10307      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
10308      * @see     Character#OTHER_LETTER OTHER_LETTER
10309      * @see     Character#OTHER_NUMBER OTHER_NUMBER
10310      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10311      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
10312      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10313      * @see     Character#PRIVATE_USE PRIVATE_USE
10314      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
10315      * @see     Character#START_PUNCTUATION START_PUNCTUATION
10316      * @see     Character#SURROGATE SURROGATE
10317      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
10318      * @see     Character#UNASSIGNED UNASSIGNED
10319      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
10320      * @since   1.5
10321      */
10322     public static int getType(int codePoint) {
10323         return CharacterData.of(codePoint).getType(codePoint);
10324     }
10325 
10326     /**
10327      * Determines the character representation for a specific digit in
10328      * the specified radix. If the value of {@code radix} is not a
10329      * valid radix, or the value of {@code digit} is not a valid
10330      * digit in the specified radix, the null character
10331      * ({@code '\u005Cu0000'}) is returned.
10332      * <p>
10333      * The {@code radix} argument is valid if it is greater than or
10334      * equal to {@code MIN_RADIX} and less than or equal to
10335      * {@code MAX_RADIX}. The {@code digit} argument is valid if
10336      * {@code 0 <= digit < radix}.
10337      * <p>
10338      * If the digit is less than 10, then
10339      * {@code '0' + digit} is returned. Otherwise, the value
10340      * {@code 'a' + digit - 10} is returned.
10341      *
10342      * @param   digit   the number to convert to a character.
10343      * @param   radix   the radix.
10344      * @return  the {@code char} representation of the specified digit
10345      *          in the specified radix.
10346      * @see     Character#MIN_RADIX
10347      * @see     Character#MAX_RADIX
10348      * @see     Character#digit(char, int)
10349      */
10350     public static char forDigit(int digit, int radix) {
10351         if ((digit >= radix) || (digit < 0)) {
10352             return '\0';
10353         }
10354         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10355             return '\0';
10356         }
10357         if (digit < 10) {
10358             return (char)('0' + digit);
10359         }
10360         return (char)('a' - 10 + digit);
10361     }
10362 
10363     /**
10364      * Returns the Unicode directionality property for the given
10365      * character.  Character directionality is used to calculate the
10366      * visual ordering of text. The directionality value of undefined
10367      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10368      *
10369      * <p><b>Note:</b> This method cannot handle <a
10370      * href="#supplementary"> supplementary characters</a>. To support
10371      * all Unicode characters, including supplementary characters, use
10372      * the {@link #getDirectionality(int)} method.
10373      *
10374      * @param  ch {@code char} for which the directionality property
10375      *            is requested.
10376      * @return the directionality property of the {@code char} value.
10377      *
10378      * @see Character#DIRECTIONALITY_UNDEFINED
10379      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10380      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10381      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10382      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10383      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10384      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10385      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10386      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10387      * @see Character#DIRECTIONALITY_NONSPACING_MARK
10388      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10389      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10390      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10391      * @see Character#DIRECTIONALITY_WHITESPACE
10392      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10393      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10394      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10395      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10396      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10397      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10398      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10399      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10400      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10401      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10402      * @since 1.4
10403      */
10404     public static byte getDirectionality(char ch) {
10405         return getDirectionality((int)ch);
10406     }
10407 
10408     /**
10409      * Returns the Unicode directionality property for the given
10410      * character (Unicode code point).  Character directionality is
10411      * used to calculate the visual ordering of text. The
10412      * directionality value of undefined character is {@link
10413      * #DIRECTIONALITY_UNDEFINED}.
10414      *
10415      * @param   codePoint the character (Unicode code point) for which
10416      *          the directionality property is requested.
10417      * @return the directionality property of the character.
10418      *
10419      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10420      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10421      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10422      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10423      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10424      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10425      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10426      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10427      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10428      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10429      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10430      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10431      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10432      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10433      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10434      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10435      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10436      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10437      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10438      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10439      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10440      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10441      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10442      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10443      * @since    1.5
10444      */
10445     public static byte getDirectionality(int codePoint) {
10446         return CharacterData.of(codePoint).getDirectionality(codePoint);
10447     }
10448 
10449     /**
10450      * Determines whether the character is mirrored according to the
10451      * Unicode specification.  Mirrored characters should have their
10452      * glyphs horizontally mirrored when displayed in text that is
10453      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
10454      * PARENTHESIS is semantically defined to be an <i>opening
10455      * parenthesis</i>.  This will appear as a "(" in text that is
10456      * left-to-right but as a ")" in text that is right-to-left.
10457      *
10458      * <p><b>Note:</b> This method cannot handle <a
10459      * href="#supplementary"> supplementary characters</a>. To support
10460      * all Unicode characters, including supplementary characters, use
10461      * the {@link #isMirrored(int)} method.
10462      *
10463      * @param  ch {@code char} for which the mirrored property is requested
10464      * @return {@code true} if the char is mirrored, {@code false}
10465      *         if the {@code char} is not mirrored or is not defined.
10466      * @since 1.4
10467      */
10468     public static boolean isMirrored(char ch) {
10469         return isMirrored((int)ch);
10470     }
10471 
10472     /**
10473      * Determines whether the specified character (Unicode code point)
10474      * is mirrored according to the Unicode specification.  Mirrored
10475      * characters should have their glyphs horizontally mirrored when
10476      * displayed in text that is right-to-left.  For example,
10477      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10478      * defined to be an <i>opening parenthesis</i>.  This will appear
10479      * as a "(" in text that is left-to-right but as a ")" in text
10480      * that is right-to-left.
10481      *
10482      * @param   codePoint the character (Unicode code point) to be tested.
10483      * @return  {@code true} if the character is mirrored, {@code false}
10484      *          if the character is not mirrored or is not defined.
10485      * @since   1.5
10486      */
10487     public static boolean isMirrored(int codePoint) {
10488         return CharacterData.of(codePoint).isMirrored(codePoint);
10489     }
10490 
10491     /**
10492      * Compares two {@code Character} objects numerically.
10493      *
10494      * @param   anotherCharacter   the {@code Character} to be compared.
10495 
10496      * @return  the value {@code 0} if the argument {@code Character}
10497      *          is equal to this {@code Character}; a value less than
10498      *          {@code 0} if this {@code Character} is numerically less
10499      *          than the {@code Character} argument; and a value greater than
10500      *          {@code 0} if this {@code Character} is numerically greater
10501      *          than the {@code Character} argument (unsigned comparison).
10502      *          Note that this is strictly a numerical comparison; it is not
10503      *          locale-dependent.
10504      * @since   1.2
10505      */
10506     public int compareTo(Character anotherCharacter) {
10507         return compare(this.value, anotherCharacter.value);
10508     }
10509 
10510     /**
10511      * Compares two {@code char} values numerically.
10512      * The value returned is identical to what would be returned by:
10513      * <pre>
10514      *    Character.valueOf(x).compareTo(Character.valueOf(y))
10515      * </pre>
10516      *
10517      * @param  x the first {@code char} to compare
10518      * @param  y the second {@code char} to compare
10519      * @return the value {@code 0} if {@code x == y};
10520      *         a value less than {@code 0} if {@code x < y}; and
10521      *         a value greater than {@code 0} if {@code x > y}
10522      * @since 1.7
10523      */
10524     public static int compare(char x, char y) {
10525         return x - y;
10526     }
10527 
10528     /**
10529      * Converts the character (Unicode code point) argument to uppercase using
10530      * information from the UnicodeData file.
10531      *
10532      * @param   codePoint   the character (Unicode code point) to be converted.
10533      * @return  either the uppercase equivalent of the character, if
10534      *          any, or an error flag ({@code Character.ERROR})
10535      *          that indicates that a 1:M {@code char} mapping exists.
10536      * @see     Character#isLowerCase(char)
10537      * @see     Character#isUpperCase(char)
10538      * @see     Character#toLowerCase(char)
10539      * @see     Character#toTitleCase(char)
10540      * @since 1.4
10541      */
10542     static int toUpperCaseEx(int codePoint) {
10543         assert isValidCodePoint(codePoint);
10544         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
10545     }
10546 
10547     /**
10548      * Converts the character (Unicode code point) argument to uppercase using case
10549      * mapping information from the SpecialCasing file in the Unicode
10550      * specification. If a character has no explicit uppercase
10551      * mapping, then the {@code char} itself is returned in the
10552      * {@code char[]}.
10553      *
10554      * @param   codePoint   the character (Unicode code point) to be converted.
10555      * @return a {@code char[]} with the uppercased character.
10556      * @since 1.4
10557      */
10558     static char[] toUpperCaseCharArray(int codePoint) {
10559         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
10560         assert isBmpCodePoint(codePoint);
10561         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
10562     }
10563 
10564     /**
10565      * The number of bits used to represent a {@code char} value in unsigned
10566      * binary form, constant {@code 16}.
10567      *
10568      * @since 1.5
10569      */
10570     public static final int SIZE = 16;
10571 
10572     /**
10573      * The number of bytes used to represent a {@code char} value in unsigned
10574      * binary form.
10575      *
10576      * @since 1.8
10577      */
10578     public static final int BYTES = SIZE / Byte.SIZE;
10579 
10580     /**
10581      * Returns the value obtained by reversing the order of the bytes in the
10582      * specified {@code char} value.
10583      *
10584      * @param ch The {@code char} of which to reverse the byte order.
10585      * @return the value obtained by reversing (or, equivalently, swapping)
10586      *     the bytes in the specified {@code char} value.
10587      * @since 1.5
10588      */
10589     @HotSpotIntrinsicCandidate
10590     public static char reverseBytes(char ch) {
10591         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
10592     }
10593 
10594     /**
10595      * Returns the Unicode name of the specified character
10596      * {@code codePoint}, or null if the code point is
10597      * {@link #UNASSIGNED unassigned}.
10598      * <p>
10599      * Note: if the specified character is not assigned a name by
10600      * the <i>UnicodeData</i> file (part of the Unicode Character
10601      * Database maintained by the Unicode Consortium), the returned
10602      * name is the same as the result of expression.
10603      *
10604      * <blockquote>{@code
10605      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10606      *     + " "
10607      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10608      *
10609      * }</blockquote>
10610      *
10611      * @param  codePoint the character (Unicode code point)
10612      *
10613      * @return the Unicode name of the specified character, or null if
10614      *         the code point is unassigned.
10615      *
10616      * @throws IllegalArgumentException if the specified
10617      *            {@code codePoint} is not a valid Unicode
10618      *            code point.
10619      *
10620      * @since 1.7
10621      */
10622     public static String getName(int codePoint) {
10623         if (!isValidCodePoint(codePoint)) {
10624             throw new IllegalArgumentException(
10625                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10626         }
10627         String name = CharacterName.getInstance().getName(codePoint);
10628         if (name != null)
10629             return name;
10630         if (getType(codePoint) == UNASSIGNED)
10631             return null;
10632         UnicodeBlock block = UnicodeBlock.of(codePoint);
10633         if (block != null)
10634             return block.toString().replace('_', ' ') + " "
10635                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10636         // should never come here
10637         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10638     }
10639 
10640     /**
10641      * Returns the code point value of the Unicode character specified by
10642      * the given Unicode character name.
10643      * <p>
10644      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
10645      * file (part of the Unicode Character Database maintained by the Unicode
10646      * Consortium), its name is defined as the result of expression
10647      *
10648      * <blockquote>{@code
10649      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10650      *     + " "
10651      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10652      *
10653      * }</blockquote>
10654      * <p>
10655      * The {@code name} matching is case insensitive, with any leading and
10656      * trailing whitespace character removed.
10657      *
10658      * @param  name the Unicode character name
10659      *
10660      * @return the code point value of the character specified by its name.
10661      *
10662      * @throws IllegalArgumentException if the specified {@code name}
10663      *         is not a valid Unicode character name.
10664      * @throws NullPointerException if {@code name} is {@code null}
10665      *
10666      * @since 9
10667      */
10668     public static int codePointOf(String name) {
10669         name = name.trim().toUpperCase(Locale.ROOT);
10670         int cp = CharacterName.getInstance().getCodePoint(name);
10671         if (cp != -1)
10672             return cp;
10673         try {
10674             int off = name.lastIndexOf(' ');
10675             if (off != -1) {
10676                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
10677                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
10678                     return cp;
10679             }
10680         } catch (Exception x) {}
10681         throw new IllegalArgumentException("Unrecognized character name :" + name);
10682     }
10683 }