1 /*
   2  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 7.0.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  *
  59  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  60  *
  61  * <p>The {@code char} data type (and therefore the value that a
  62  * {@code Character} object encapsulates) are based on the
  63  * original Unicode specification, which defined characters as
  64  * fixed-width 16-bit entities. The Unicode Standard has since been
  65  * changed to allow for characters whose representation requires more
  66  * than 16 bits.  The range of legal <em>code point</em>s is now
  67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  68  * (Refer to the <a
  69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  71  * Standard.)
  72  *
  73  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  75  * <a name="supplementary">Characters</a> whose code points are greater
  76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  77  * platform uses the UTF-16 representation in {@code char} arrays and
  78  * in the {@code String} and {@code StringBuffer} classes. In
  79  * this representation, supplementary characters are represented as a pair
  80  * of {@code char} values, the first from the <em>high-surrogates</em>
  81  * range, (\uD800-\uDBFF), the second from the
  82  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  83  *
  84  * <p>A {@code char} value, therefore, represents Basic
  85  * Multilingual Plane (BMP) code points, including the surrogate
  86  * code points, or code units of the UTF-16 encoding. An
  87  * {@code int} value represents all Unicode code points,
  88  * including supplementary code points. The lower (least significant)
  89  * 21 bits of {@code int} are used to represent Unicode code
  90  * points and the upper (most significant) 11 bits must be zero.
  91  * Unless otherwise specified, the behavior with respect to
  92  * supplementary characters and surrogate {@code char} values is
  93  * as follows:
  94  *
  95  * <ul>
  96  * <li>The methods that only accept a {@code char} value cannot support
  97  * supplementary characters. They treat {@code char} values from the
  98  * surrogate ranges as undefined characters. For example,
  99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 100  * this specific value if followed by any low-surrogate value in a string
 101  * would represent a letter.
 102  *
 103  * <li>The methods that accept an {@code int} value support all
 104  * Unicode characters, including supplementary characters. For
 105  * example, {@code Character.isLetter(0x2F81A)} returns
 106  * {@code true} because the code point value represents a letter
 107  * (a CJK ideograph).
 108  * </ul>
 109  *
 110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 111  * used for character values in the range between U+0000 and U+10FFFF,
 112  * and <em>Unicode code unit</em> is used for 16-bit
 113  * {@code char} values that are code units of the <em>UTF-16</em>
 114  * encoding. For more information on Unicode terminology, refer to the
 115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 116  *
 117  * @author  Lee Boynton
 118  * @author  Guy Steele
 119  * @author  Akira Tanaka
 120  * @author  Martin Buchholz
 121  * @author  Ulf Zibis
 122  * @since   1.0
 123  */
 124 public final
 125 class Character implements java.io.Serializable, Comparable<Character> {
 126     /**
 127      * The minimum radix available for conversion to and from strings.
 128      * The constant value of this field is the smallest value permitted
 129      * for the radix argument in radix-conversion methods such as the
 130      * {@code digit} method, the {@code forDigit} method, and the
 131      * {@code toString} method of class {@code Integer}.
 132      *
 133      * @see     Character#digit(char, int)
 134      * @see     Character#forDigit(int, int)
 135      * @see     Integer#toString(int, int)
 136      * @see     Integer#valueOf(String)
 137      */
 138     public static final int MIN_RADIX = 2;
 139 
 140     /**
 141      * The maximum radix available for conversion to and from strings.
 142      * The constant value of this field is the largest value permitted
 143      * for the radix argument in radix-conversion methods such as the
 144      * {@code digit} method, the {@code forDigit} method, and the
 145      * {@code toString} method of class {@code Integer}.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * {@code char}, {@code '\u005Cu0000'}.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * {@code char}, {@code '\u005CuFFFF'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The {@code Class} instance representing the primitive type
 172      * {@code char}.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined {@code char}
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * Weak bidirectional character type "LRI" in the Unicode specification.
 496      * @since 1.9
 497      */
 498     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 499 
 500     /**
 501      * Weak bidirectional character type "RLI" in the Unicode specification.
 502      * @since 1.9
 503      */
 504     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 505 
 506     /**
 507      * Weak bidirectional character type "FSI" in the Unicode specification.
 508      * @since 1.9
 509      */
 510     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 511 
 512     /**
 513      * Weak bidirectional character type "PDI" in the Unicode specification.
 514      * @since 1.9
 515      */
 516     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 517 
 518     /**
 519      * The minimum value of a
 520      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 521      * Unicode high-surrogate code unit</a>
 522      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 523      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a
 531      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 532      * Unicode high-surrogate code unit</a>
 533      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 534      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 535      *
 536      * @since 1.5
 537      */
 538     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 539 
 540     /**
 541      * The minimum value of a
 542      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 543      * Unicode low-surrogate code unit</a>
 544      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 545      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 546      *
 547      * @since 1.5
 548      */
 549     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 550 
 551     /**
 552      * The maximum value of a
 553      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 554      * Unicode low-surrogate code unit</a>
 555      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 556      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 557      *
 558      * @since 1.5
 559      */
 560     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 561 
 562     /**
 563      * The minimum value of a Unicode surrogate code unit in the
 564      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 569 
 570     /**
 571      * The maximum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 577 
 578     /**
 579      * The minimum value of a
 580      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 581      * Unicode supplementary code point</a>, constant {@code U+10000}.
 582      *
 583      * @since 1.5
 584      */
 585     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 586 
 587     /**
 588      * The minimum value of a
 589      * <a href="http://www.unicode.org/glossary/#code_point">
 590      * Unicode code point</a>, constant {@code U+0000}.
 591      *
 592      * @since 1.5
 593      */
 594     public static final int MIN_CODE_POINT = 0x000000;
 595 
 596     /**
 597      * The maximum value of a
 598      * <a href="http://www.unicode.org/glossary/#code_point">
 599      * Unicode code point</a>, constant {@code U+10FFFF}.
 600      *
 601      * @since 1.5
 602      */
 603     public static final int MAX_CODE_POINT = 0X10FFFF;
 604 
 605 
 606     /**
 607      * Instances of this class represent particular subsets of the Unicode
 608      * character set.  The only family of subsets defined in the
 609      * {@code Character} class is {@link Character.UnicodeBlock}.
 610      * Other portions of the Java API may define other subsets for their
 611      * own purposes.
 612      *
 613      * @since 1.2
 614      */
 615     public static class Subset  {
 616 
 617         private String name;
 618 
 619         /**
 620          * Constructs a new {@code Subset} instance.
 621          *
 622          * @param  name  The name of this subset
 623          * @exception NullPointerException if name is {@code null}
 624          */
 625         protected Subset(String name) {
 626             if (name == null) {
 627                 throw new NullPointerException("name");
 628             }
 629             this.name = name;
 630         }
 631 
 632         /**
 633          * Compares two {@code Subset} objects for equality.
 634          * This method returns {@code true} if and only if
 635          * {@code this} and the argument refer to the same
 636          * object; since this method is {@code final}, this
 637          * guarantee holds for all subclasses.
 638          */
 639         public final boolean equals(Object obj) {
 640             return (this == obj);
 641         }
 642 
 643         /**
 644          * Returns the standard hash code as defined by the
 645          * {@link Object#hashCode} method.  This method
 646          * is {@code final} in order to ensure that the
 647          * {@code equals} and {@code hashCode} methods will
 648          * be consistent in all subclasses.
 649          */
 650         public final int hashCode() {
 651             return super.hashCode();
 652         }
 653 
 654         /**
 655          * Returns the name of this subset.
 656          */
 657         public final String toString() {
 658             return name;
 659         }
 660     }
 661 
 662     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 663     // for the latest specification of Unicode Blocks.
 664 
 665     /**
 666      * A family of character subsets representing the character blocks in the
 667      * Unicode specification. Character blocks generally define characters
 668      * used for a specific script or purpose. A character is contained by
 669      * at most one Unicode block.
 670      *
 671      * @since 1.2
 672      */
 673     public static final class UnicodeBlock extends Subset {
 674         /**
 675          * 510  - the expected number of entities
 676          * 0.75 - the default load factor of HashMap
 677          */
 678         private static Map<String, UnicodeBlock> map =
 679                 new HashMap<>((int)(510 / 0.75f + 1.0f));
 680 
 681         /**
 682          * Creates a UnicodeBlock with the given identifier name.
 683          * This name must be the same as the block identifier.
 684          */
 685         private UnicodeBlock(String idName) {
 686             super(idName);
 687             map.put(idName, this);
 688         }
 689 
 690         /**
 691          * Creates a UnicodeBlock with the given identifier name and
 692          * alias name.
 693          */
 694         private UnicodeBlock(String idName, String alias) {
 695             this(idName);
 696             map.put(alias, this);
 697         }
 698 
 699         /**
 700          * Creates a UnicodeBlock with the given identifier name and
 701          * alias names.
 702          */
 703         private UnicodeBlock(String idName, String... aliases) {
 704             this(idName);
 705             for (String alias : aliases)
 706                 map.put(alias, this);
 707         }
 708 
 709         /**
 710          * Constant for the "Basic Latin" Unicode character block.
 711          * @since 1.2
 712          */
 713         public static final UnicodeBlock  BASIC_LATIN =
 714             new UnicodeBlock("BASIC_LATIN",
 715                              "BASIC LATIN",
 716                              "BASICLATIN");
 717 
 718         /**
 719          * Constant for the "Latin-1 Supplement" Unicode character block.
 720          * @since 1.2
 721          */
 722         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 723             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 724                              "LATIN-1 SUPPLEMENT",
 725                              "LATIN-1SUPPLEMENT");
 726 
 727         /**
 728          * Constant for the "Latin Extended-A" Unicode character block.
 729          * @since 1.2
 730          */
 731         public static final UnicodeBlock LATIN_EXTENDED_A =
 732             new UnicodeBlock("LATIN_EXTENDED_A",
 733                              "LATIN EXTENDED-A",
 734                              "LATINEXTENDED-A");
 735 
 736         /**
 737          * Constant for the "Latin Extended-B" Unicode character block.
 738          * @since 1.2
 739          */
 740         public static final UnicodeBlock LATIN_EXTENDED_B =
 741             new UnicodeBlock("LATIN_EXTENDED_B",
 742                              "LATIN EXTENDED-B",
 743                              "LATINEXTENDED-B");
 744 
 745         /**
 746          * Constant for the "IPA Extensions" Unicode character block.
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock IPA_EXTENSIONS =
 750             new UnicodeBlock("IPA_EXTENSIONS",
 751                              "IPA EXTENSIONS",
 752                              "IPAEXTENSIONS");
 753 
 754         /**
 755          * Constant for the "Spacing Modifier Letters" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 759             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 760                              "SPACING MODIFIER LETTERS",
 761                              "SPACINGMODIFIERLETTERS");
 762 
 763         /**
 764          * Constant for the "Combining Diacritical Marks" Unicode character block.
 765          * @since 1.2
 766          */
 767         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 768             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 769                              "COMBINING DIACRITICAL MARKS",
 770                              "COMBININGDIACRITICALMARKS");
 771 
 772         /**
 773          * Constant for the "Greek and Coptic" Unicode character block.
 774          * <p>
 775          * This block was previously known as the "Greek" block.
 776          *
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock GREEK =
 780             new UnicodeBlock("GREEK",
 781                              "GREEK AND COPTIC",
 782                              "GREEKANDCOPTIC");
 783 
 784         /**
 785          * Constant for the "Cyrillic" Unicode character block.
 786          * @since 1.2
 787          */
 788         public static final UnicodeBlock CYRILLIC =
 789             new UnicodeBlock("CYRILLIC");
 790 
 791         /**
 792          * Constant for the "Armenian" Unicode character block.
 793          * @since 1.2
 794          */
 795         public static final UnicodeBlock ARMENIAN =
 796             new UnicodeBlock("ARMENIAN");
 797 
 798         /**
 799          * Constant for the "Hebrew" Unicode character block.
 800          * @since 1.2
 801          */
 802         public static final UnicodeBlock HEBREW =
 803             new UnicodeBlock("HEBREW");
 804 
 805         /**
 806          * Constant for the "Arabic" Unicode character block.
 807          * @since 1.2
 808          */
 809         public static final UnicodeBlock ARABIC =
 810             new UnicodeBlock("ARABIC");
 811 
 812         /**
 813          * Constant for the "Devanagari" Unicode character block.
 814          * @since 1.2
 815          */
 816         public static final UnicodeBlock DEVANAGARI =
 817             new UnicodeBlock("DEVANAGARI");
 818 
 819         /**
 820          * Constant for the "Bengali" Unicode character block.
 821          * @since 1.2
 822          */
 823         public static final UnicodeBlock BENGALI =
 824             new UnicodeBlock("BENGALI");
 825 
 826         /**
 827          * Constant for the "Gurmukhi" Unicode character block.
 828          * @since 1.2
 829          */
 830         public static final UnicodeBlock GURMUKHI =
 831             new UnicodeBlock("GURMUKHI");
 832 
 833         /**
 834          * Constant for the "Gujarati" Unicode character block.
 835          * @since 1.2
 836          */
 837         public static final UnicodeBlock GUJARATI =
 838             new UnicodeBlock("GUJARATI");
 839 
 840         /**
 841          * Constant for the "Oriya" Unicode character block.
 842          * @since 1.2
 843          */
 844         public static final UnicodeBlock ORIYA =
 845             new UnicodeBlock("ORIYA");
 846 
 847         /**
 848          * Constant for the "Tamil" Unicode character block.
 849          * @since 1.2
 850          */
 851         public static final UnicodeBlock TAMIL =
 852             new UnicodeBlock("TAMIL");
 853 
 854         /**
 855          * Constant for the "Telugu" Unicode character block.
 856          * @since 1.2
 857          */
 858         public static final UnicodeBlock TELUGU =
 859             new UnicodeBlock("TELUGU");
 860 
 861         /**
 862          * Constant for the "Kannada" Unicode character block.
 863          * @since 1.2
 864          */
 865         public static final UnicodeBlock KANNADA =
 866             new UnicodeBlock("KANNADA");
 867 
 868         /**
 869          * Constant for the "Malayalam" Unicode character block.
 870          * @since 1.2
 871          */
 872         public static final UnicodeBlock MALAYALAM =
 873             new UnicodeBlock("MALAYALAM");
 874 
 875         /**
 876          * Constant for the "Thai" Unicode character block.
 877          * @since 1.2
 878          */
 879         public static final UnicodeBlock THAI =
 880             new UnicodeBlock("THAI");
 881 
 882         /**
 883          * Constant for the "Lao" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LAO =
 887             new UnicodeBlock("LAO");
 888 
 889         /**
 890          * Constant for the "Tibetan" Unicode character block.
 891          * @since 1.2
 892          */
 893         public static final UnicodeBlock TIBETAN =
 894             new UnicodeBlock("TIBETAN");
 895 
 896         /**
 897          * Constant for the "Georgian" Unicode character block.
 898          * @since 1.2
 899          */
 900         public static final UnicodeBlock GEORGIAN =
 901             new UnicodeBlock("GEORGIAN");
 902 
 903         /**
 904          * Constant for the "Hangul Jamo" Unicode character block.
 905          * @since 1.2
 906          */
 907         public static final UnicodeBlock HANGUL_JAMO =
 908             new UnicodeBlock("HANGUL_JAMO",
 909                              "HANGUL JAMO",
 910                              "HANGULJAMO");
 911 
 912         /**
 913          * Constant for the "Latin Extended Additional" Unicode character block.
 914          * @since 1.2
 915          */
 916         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 917             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 918                              "LATIN EXTENDED ADDITIONAL",
 919                              "LATINEXTENDEDADDITIONAL");
 920 
 921         /**
 922          * Constant for the "Greek Extended" Unicode character block.
 923          * @since 1.2
 924          */
 925         public static final UnicodeBlock GREEK_EXTENDED =
 926             new UnicodeBlock("GREEK_EXTENDED",
 927                              "GREEK EXTENDED",
 928                              "GREEKEXTENDED");
 929 
 930         /**
 931          * Constant for the "General Punctuation" Unicode character block.
 932          * @since 1.2
 933          */
 934         public static final UnicodeBlock GENERAL_PUNCTUATION =
 935             new UnicodeBlock("GENERAL_PUNCTUATION",
 936                              "GENERAL PUNCTUATION",
 937                              "GENERALPUNCTUATION");
 938 
 939         /**
 940          * Constant for the "Superscripts and Subscripts" Unicode character
 941          * block.
 942          * @since 1.2
 943          */
 944         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 945             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 946                              "SUPERSCRIPTS AND SUBSCRIPTS",
 947                              "SUPERSCRIPTSANDSUBSCRIPTS");
 948 
 949         /**
 950          * Constant for the "Currency Symbols" Unicode character block.
 951          * @since 1.2
 952          */
 953         public static final UnicodeBlock CURRENCY_SYMBOLS =
 954             new UnicodeBlock("CURRENCY_SYMBOLS",
 955                              "CURRENCY SYMBOLS",
 956                              "CURRENCYSYMBOLS");
 957 
 958         /**
 959          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 960          * character block.
 961          * <p>
 962          * This block was previously known as "Combining Marks for Symbols".
 963          * @since 1.2
 964          */
 965         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 966             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 967                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 968                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 969                              "COMBINING MARKS FOR SYMBOLS",
 970                              "COMBININGMARKSFORSYMBOLS");
 971 
 972         /**
 973          * Constant for the "Letterlike Symbols" Unicode character block.
 974          * @since 1.2
 975          */
 976         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 977             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 978                              "LETTERLIKE SYMBOLS",
 979                              "LETTERLIKESYMBOLS");
 980 
 981         /**
 982          * Constant for the "Number Forms" Unicode character block.
 983          * @since 1.2
 984          */
 985         public static final UnicodeBlock NUMBER_FORMS =
 986             new UnicodeBlock("NUMBER_FORMS",
 987                              "NUMBER FORMS",
 988                              "NUMBERFORMS");
 989 
 990         /**
 991          * Constant for the "Arrows" Unicode character block.
 992          * @since 1.2
 993          */
 994         public static final UnicodeBlock ARROWS =
 995             new UnicodeBlock("ARROWS");
 996 
 997         /**
 998          * Constant for the "Mathematical Operators" Unicode character block.
 999          * @since 1.2
1000          */
1001         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1002             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1003                              "MATHEMATICAL OPERATORS",
1004                              "MATHEMATICALOPERATORS");
1005 
1006         /**
1007          * Constant for the "Miscellaneous Technical" Unicode character block.
1008          * @since 1.2
1009          */
1010         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1011             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1012                              "MISCELLANEOUS TECHNICAL",
1013                              "MISCELLANEOUSTECHNICAL");
1014 
1015         /**
1016          * Constant for the "Control Pictures" Unicode character block.
1017          * @since 1.2
1018          */
1019         public static final UnicodeBlock CONTROL_PICTURES =
1020             new UnicodeBlock("CONTROL_PICTURES",
1021                              "CONTROL PICTURES",
1022                              "CONTROLPICTURES");
1023 
1024         /**
1025          * Constant for the "Optical Character Recognition" Unicode character block.
1026          * @since 1.2
1027          */
1028         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1029             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1030                              "OPTICAL CHARACTER RECOGNITION",
1031                              "OPTICALCHARACTERRECOGNITION");
1032 
1033         /**
1034          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1035          * @since 1.2
1036          */
1037         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1038             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1039                              "ENCLOSED ALPHANUMERICS",
1040                              "ENCLOSEDALPHANUMERICS");
1041 
1042         /**
1043          * Constant for the "Box Drawing" Unicode character block.
1044          * @since 1.2
1045          */
1046         public static final UnicodeBlock BOX_DRAWING =
1047             new UnicodeBlock("BOX_DRAWING",
1048                              "BOX DRAWING",
1049                              "BOXDRAWING");
1050 
1051         /**
1052          * Constant for the "Block Elements" Unicode character block.
1053          * @since 1.2
1054          */
1055         public static final UnicodeBlock BLOCK_ELEMENTS =
1056             new UnicodeBlock("BLOCK_ELEMENTS",
1057                              "BLOCK ELEMENTS",
1058                              "BLOCKELEMENTS");
1059 
1060         /**
1061          * Constant for the "Geometric Shapes" Unicode character block.
1062          * @since 1.2
1063          */
1064         public static final UnicodeBlock GEOMETRIC_SHAPES =
1065             new UnicodeBlock("GEOMETRIC_SHAPES",
1066                              "GEOMETRIC SHAPES",
1067                              "GEOMETRICSHAPES");
1068 
1069         /**
1070          * Constant for the "Miscellaneous Symbols" Unicode character block.
1071          * @since 1.2
1072          */
1073         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1074             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1075                              "MISCELLANEOUS SYMBOLS",
1076                              "MISCELLANEOUSSYMBOLS");
1077 
1078         /**
1079          * Constant for the "Dingbats" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock DINGBATS =
1083             new UnicodeBlock("DINGBATS");
1084 
1085         /**
1086          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1090             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1091                              "CJK SYMBOLS AND PUNCTUATION",
1092                              "CJKSYMBOLSANDPUNCTUATION");
1093 
1094         /**
1095          * Constant for the "Hiragana" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock HIRAGANA =
1099             new UnicodeBlock("HIRAGANA");
1100 
1101         /**
1102          * Constant for the "Katakana" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock KATAKANA =
1106             new UnicodeBlock("KATAKANA");
1107 
1108         /**
1109          * Constant for the "Bopomofo" Unicode character block.
1110          * @since 1.2
1111          */
1112         public static final UnicodeBlock BOPOMOFO =
1113             new UnicodeBlock("BOPOMOFO");
1114 
1115         /**
1116          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1117          * @since 1.2
1118          */
1119         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1120             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1121                              "HANGUL COMPATIBILITY JAMO",
1122                              "HANGULCOMPATIBILITYJAMO");
1123 
1124         /**
1125          * Constant for the "Kanbun" Unicode character block.
1126          * @since 1.2
1127          */
1128         public static final UnicodeBlock KANBUN =
1129             new UnicodeBlock("KANBUN");
1130 
1131         /**
1132          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1133          * @since 1.2
1134          */
1135         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1136             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1137                              "ENCLOSED CJK LETTERS AND MONTHS",
1138                              "ENCLOSEDCJKLETTERSANDMONTHS");
1139 
1140         /**
1141          * Constant for the "CJK Compatibility" Unicode character block.
1142          * @since 1.2
1143          */
1144         public static final UnicodeBlock CJK_COMPATIBILITY =
1145             new UnicodeBlock("CJK_COMPATIBILITY",
1146                              "CJK COMPATIBILITY",
1147                              "CJKCOMPATIBILITY");
1148 
1149         /**
1150          * Constant for the "CJK Unified Ideographs" Unicode character block.
1151          * @since 1.2
1152          */
1153         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1154             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1155                              "CJK UNIFIED IDEOGRAPHS",
1156                              "CJKUNIFIEDIDEOGRAPHS");
1157 
1158         /**
1159          * Constant for the "Hangul Syllables" Unicode character block.
1160          * @since 1.2
1161          */
1162         public static final UnicodeBlock HANGUL_SYLLABLES =
1163             new UnicodeBlock("HANGUL_SYLLABLES",
1164                              "HANGUL SYLLABLES",
1165                              "HANGULSYLLABLES");
1166 
1167         /**
1168          * Constant for the "Private Use Area" Unicode character block.
1169          * @since 1.2
1170          */
1171         public static final UnicodeBlock PRIVATE_USE_AREA =
1172             new UnicodeBlock("PRIVATE_USE_AREA",
1173                              "PRIVATE USE AREA",
1174                              "PRIVATEUSEAREA");
1175 
1176         /**
1177          * Constant for the "CJK Compatibility Ideographs" Unicode character
1178          * block.
1179          * @since 1.2
1180          */
1181         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1182             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1183                              "CJK COMPATIBILITY IDEOGRAPHS",
1184                              "CJKCOMPATIBILITYIDEOGRAPHS");
1185 
1186         /**
1187          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1188          * @since 1.2
1189          */
1190         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1191             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1192                              "ALPHABETIC PRESENTATION FORMS",
1193                              "ALPHABETICPRESENTATIONFORMS");
1194 
1195         /**
1196          * Constant for the "Arabic Presentation Forms-A" Unicode character
1197          * block.
1198          * @since 1.2
1199          */
1200         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1201             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1202                              "ARABIC PRESENTATION FORMS-A",
1203                              "ARABICPRESENTATIONFORMS-A");
1204 
1205         /**
1206          * Constant for the "Combining Half Marks" Unicode character block.
1207          * @since 1.2
1208          */
1209         public static final UnicodeBlock COMBINING_HALF_MARKS =
1210             new UnicodeBlock("COMBINING_HALF_MARKS",
1211                              "COMBINING HALF MARKS",
1212                              "COMBININGHALFMARKS");
1213 
1214         /**
1215          * Constant for the "CJK Compatibility Forms" Unicode character block.
1216          * @since 1.2
1217          */
1218         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1219             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1220                              "CJK COMPATIBILITY FORMS",
1221                              "CJKCOMPATIBILITYFORMS");
1222 
1223         /**
1224          * Constant for the "Small Form Variants" Unicode character block.
1225          * @since 1.2
1226          */
1227         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1228             new UnicodeBlock("SMALL_FORM_VARIANTS",
1229                              "SMALL FORM VARIANTS",
1230                              "SMALLFORMVARIANTS");
1231 
1232         /**
1233          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1234          * @since 1.2
1235          */
1236         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1237             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1238                              "ARABIC PRESENTATION FORMS-B",
1239                              "ARABICPRESENTATIONFORMS-B");
1240 
1241         /**
1242          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1243          * block.
1244          * @since 1.2
1245          */
1246         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1247             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1248                              "HALFWIDTH AND FULLWIDTH FORMS",
1249                              "HALFWIDTHANDFULLWIDTHFORMS");
1250 
1251         /**
1252          * Constant for the "Specials" Unicode character block.
1253          * @since 1.2
1254          */
1255         public static final UnicodeBlock SPECIALS =
1256             new UnicodeBlock("SPECIALS");
1257 
1258         /**
1259          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1260          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1261          *             {@link #LOW_SURROGATES}. These new constants match
1262          *             the block definitions of the Unicode Standard.
1263          *             The {@link #of(char)} and {@link #of(int)} methods
1264          *             return the new constants, not SURROGATES_AREA.
1265          */
1266         @Deprecated
1267         public static final UnicodeBlock SURROGATES_AREA =
1268             new UnicodeBlock("SURROGATES_AREA");
1269 
1270         /**
1271          * Constant for the "Syriac" Unicode character block.
1272          * @since 1.4
1273          */
1274         public static final UnicodeBlock SYRIAC =
1275             new UnicodeBlock("SYRIAC");
1276 
1277         /**
1278          * Constant for the "Thaana" Unicode character block.
1279          * @since 1.4
1280          */
1281         public static final UnicodeBlock THAANA =
1282             new UnicodeBlock("THAANA");
1283 
1284         /**
1285          * Constant for the "Sinhala" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock SINHALA =
1289             new UnicodeBlock("SINHALA");
1290 
1291         /**
1292          * Constant for the "Myanmar" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock MYANMAR =
1296             new UnicodeBlock("MYANMAR");
1297 
1298         /**
1299          * Constant for the "Ethiopic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock ETHIOPIC =
1303             new UnicodeBlock("ETHIOPIC");
1304 
1305         /**
1306          * Constant for the "Cherokee" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock CHEROKEE =
1310             new UnicodeBlock("CHEROKEE");
1311 
1312         /**
1313          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1317             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1318                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1319                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1320 
1321         /**
1322          * Constant for the "Ogham" Unicode character block.
1323          * @since 1.4
1324          */
1325         public static final UnicodeBlock OGHAM =
1326             new UnicodeBlock("OGHAM");
1327 
1328         /**
1329          * Constant for the "Runic" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock RUNIC =
1333             new UnicodeBlock("RUNIC");
1334 
1335         /**
1336          * Constant for the "Khmer" Unicode character block.
1337          * @since 1.4
1338          */
1339         public static final UnicodeBlock KHMER =
1340             new UnicodeBlock("KHMER");
1341 
1342         /**
1343          * Constant for the "Mongolian" Unicode character block.
1344          * @since 1.4
1345          */
1346         public static final UnicodeBlock MONGOLIAN =
1347             new UnicodeBlock("MONGOLIAN");
1348 
1349         /**
1350          * Constant for the "Braille Patterns" Unicode character block.
1351          * @since 1.4
1352          */
1353         public static final UnicodeBlock BRAILLE_PATTERNS =
1354             new UnicodeBlock("BRAILLE_PATTERNS",
1355                              "BRAILLE PATTERNS",
1356                              "BRAILLEPATTERNS");
1357 
1358         /**
1359          * Constant for the "CJK Radicals Supplement" Unicode character block.
1360          * @since 1.4
1361          */
1362         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1363             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1364                              "CJK RADICALS SUPPLEMENT",
1365                              "CJKRADICALSSUPPLEMENT");
1366 
1367         /**
1368          * Constant for the "Kangxi Radicals" Unicode character block.
1369          * @since 1.4
1370          */
1371         public static final UnicodeBlock KANGXI_RADICALS =
1372             new UnicodeBlock("KANGXI_RADICALS",
1373                              "KANGXI RADICALS",
1374                              "KANGXIRADICALS");
1375 
1376         /**
1377          * Constant for the "Ideographic Description Characters" Unicode character block.
1378          * @since 1.4
1379          */
1380         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1381             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1382                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1383                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1384 
1385         /**
1386          * Constant for the "Bopomofo Extended" Unicode character block.
1387          * @since 1.4
1388          */
1389         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1390             new UnicodeBlock("BOPOMOFO_EXTENDED",
1391                              "BOPOMOFO EXTENDED",
1392                              "BOPOMOFOEXTENDED");
1393 
1394         /**
1395          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1396          * @since 1.4
1397          */
1398         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1399             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1400                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1401                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1402 
1403         /**
1404          * Constant for the "Yi Syllables" Unicode character block.
1405          * @since 1.4
1406          */
1407         public static final UnicodeBlock YI_SYLLABLES =
1408             new UnicodeBlock("YI_SYLLABLES",
1409                              "YI SYLLABLES",
1410                              "YISYLLABLES");
1411 
1412         /**
1413          * Constant for the "Yi Radicals" Unicode character block.
1414          * @since 1.4
1415          */
1416         public static final UnicodeBlock YI_RADICALS =
1417             new UnicodeBlock("YI_RADICALS",
1418                              "YI RADICALS",
1419                              "YIRADICALS");
1420 
1421         /**
1422          * Constant for the "Cyrillic Supplementary" Unicode character block.
1423          * @since 1.5
1424          */
1425         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1426             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1427                              "CYRILLIC SUPPLEMENTARY",
1428                              "CYRILLICSUPPLEMENTARY",
1429                              "CYRILLIC SUPPLEMENT",
1430                              "CYRILLICSUPPLEMENT");
1431 
1432         /**
1433          * Constant for the "Tagalog" Unicode character block.
1434          * @since 1.5
1435          */
1436         public static final UnicodeBlock TAGALOG =
1437             new UnicodeBlock("TAGALOG");
1438 
1439         /**
1440          * Constant for the "Hanunoo" Unicode character block.
1441          * @since 1.5
1442          */
1443         public static final UnicodeBlock HANUNOO =
1444             new UnicodeBlock("HANUNOO");
1445 
1446         /**
1447          * Constant for the "Buhid" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock BUHID =
1451             new UnicodeBlock("BUHID");
1452 
1453         /**
1454          * Constant for the "Tagbanwa" Unicode character block.
1455          * @since 1.5
1456          */
1457         public static final UnicodeBlock TAGBANWA =
1458             new UnicodeBlock("TAGBANWA");
1459 
1460         /**
1461          * Constant for the "Limbu" Unicode character block.
1462          * @since 1.5
1463          */
1464         public static final UnicodeBlock LIMBU =
1465             new UnicodeBlock("LIMBU");
1466 
1467         /**
1468          * Constant for the "Tai Le" Unicode character block.
1469          * @since 1.5
1470          */
1471         public static final UnicodeBlock TAI_LE =
1472             new UnicodeBlock("TAI_LE",
1473                              "TAI LE",
1474                              "TAILE");
1475 
1476         /**
1477          * Constant for the "Khmer Symbols" Unicode character block.
1478          * @since 1.5
1479          */
1480         public static final UnicodeBlock KHMER_SYMBOLS =
1481             new UnicodeBlock("KHMER_SYMBOLS",
1482                              "KHMER SYMBOLS",
1483                              "KHMERSYMBOLS");
1484 
1485         /**
1486          * Constant for the "Phonetic Extensions" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1490             new UnicodeBlock("PHONETIC_EXTENSIONS",
1491                              "PHONETIC EXTENSIONS",
1492                              "PHONETICEXTENSIONS");
1493 
1494         /**
1495          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1499             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1500                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1501                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1502 
1503         /**
1504          * Constant for the "Supplemental Arrows-A" Unicode character block.
1505          * @since 1.5
1506          */
1507         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1508             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1509                              "SUPPLEMENTAL ARROWS-A",
1510                              "SUPPLEMENTALARROWS-A");
1511 
1512         /**
1513          * Constant for the "Supplemental Arrows-B" Unicode character block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1517             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1518                              "SUPPLEMENTAL ARROWS-B",
1519                              "SUPPLEMENTALARROWS-B");
1520 
1521         /**
1522          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1523          * character block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1527             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1528                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1529                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1530 
1531         /**
1532          * Constant for the "Supplemental Mathematical Operators" Unicode
1533          * character block.
1534          * @since 1.5
1535          */
1536         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1537             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1538                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1539                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1540 
1541         /**
1542          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1543          * block.
1544          * @since 1.5
1545          */
1546         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1547             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1548                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1549                              "MISCELLANEOUSSYMBOLSANDARROWS");
1550 
1551         /**
1552          * Constant for the "Katakana Phonetic Extensions" Unicode character
1553          * block.
1554          * @since 1.5
1555          */
1556         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1557             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1558                              "KATAKANA PHONETIC EXTENSIONS",
1559                              "KATAKANAPHONETICEXTENSIONS");
1560 
1561         /**
1562          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1563          * @since 1.5
1564          */
1565         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1566             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1567                              "YIJING HEXAGRAM SYMBOLS",
1568                              "YIJINGHEXAGRAMSYMBOLS");
1569 
1570         /**
1571          * Constant for the "Variation Selectors" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock VARIATION_SELECTORS =
1575             new UnicodeBlock("VARIATION_SELECTORS",
1576                              "VARIATION SELECTORS",
1577                              "VARIATIONSELECTORS");
1578 
1579         /**
1580          * Constant for the "Linear B Syllabary" Unicode character block.
1581          * @since 1.5
1582          */
1583         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1584             new UnicodeBlock("LINEAR_B_SYLLABARY",
1585                              "LINEAR B SYLLABARY",
1586                              "LINEARBSYLLABARY");
1587 
1588         /**
1589          * Constant for the "Linear B Ideograms" Unicode character block.
1590          * @since 1.5
1591          */
1592         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1593             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1594                              "LINEAR B IDEOGRAMS",
1595                              "LINEARBIDEOGRAMS");
1596 
1597         /**
1598          * Constant for the "Aegean Numbers" Unicode character block.
1599          * @since 1.5
1600          */
1601         public static final UnicodeBlock AEGEAN_NUMBERS =
1602             new UnicodeBlock("AEGEAN_NUMBERS",
1603                              "AEGEAN NUMBERS",
1604                              "AEGEANNUMBERS");
1605 
1606         /**
1607          * Constant for the "Old Italic" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock OLD_ITALIC =
1611             new UnicodeBlock("OLD_ITALIC",
1612                              "OLD ITALIC",
1613                              "OLDITALIC");
1614 
1615         /**
1616          * Constant for the "Gothic" Unicode character block.
1617          * @since 1.5
1618          */
1619         public static final UnicodeBlock GOTHIC =
1620             new UnicodeBlock("GOTHIC");
1621 
1622         /**
1623          * Constant for the "Ugaritic" Unicode character block.
1624          * @since 1.5
1625          */
1626         public static final UnicodeBlock UGARITIC =
1627             new UnicodeBlock("UGARITIC");
1628 
1629         /**
1630          * Constant for the "Deseret" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock DESERET =
1634             new UnicodeBlock("DESERET");
1635 
1636         /**
1637          * Constant for the "Shavian" Unicode character block.
1638          * @since 1.5
1639          */
1640         public static final UnicodeBlock SHAVIAN =
1641             new UnicodeBlock("SHAVIAN");
1642 
1643         /**
1644          * Constant for the "Osmanya" Unicode character block.
1645          * @since 1.5
1646          */
1647         public static final UnicodeBlock OSMANYA =
1648             new UnicodeBlock("OSMANYA");
1649 
1650         /**
1651          * Constant for the "Cypriot Syllabary" Unicode character block.
1652          * @since 1.5
1653          */
1654         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1655             new UnicodeBlock("CYPRIOT_SYLLABARY",
1656                              "CYPRIOT SYLLABARY",
1657                              "CYPRIOTSYLLABARY");
1658 
1659         /**
1660          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1664             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1665                              "BYZANTINE MUSICAL SYMBOLS",
1666                              "BYZANTINEMUSICALSYMBOLS");
1667 
1668         /**
1669          * Constant for the "Musical Symbols" Unicode character block.
1670          * @since 1.5
1671          */
1672         public static final UnicodeBlock MUSICAL_SYMBOLS =
1673             new UnicodeBlock("MUSICAL_SYMBOLS",
1674                              "MUSICAL SYMBOLS",
1675                              "MUSICALSYMBOLS");
1676 
1677         /**
1678          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1682             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1683                              "TAI XUAN JING SYMBOLS",
1684                              "TAIXUANJINGSYMBOLS");
1685 
1686         /**
1687          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1688          * character block.
1689          * @since 1.5
1690          */
1691         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1692             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1693                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1694                              "MATHEMATICALALPHANUMERICSYMBOLS");
1695 
1696         /**
1697          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1698          * character block.
1699          * @since 1.5
1700          */
1701         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1702             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1703                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1704                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1705 
1706         /**
1707          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1711             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1712                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1713                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1714 
1715         /**
1716          * Constant for the "Tags" Unicode character block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock TAGS =
1720             new UnicodeBlock("TAGS");
1721 
1722         /**
1723          * Constant for the "Variation Selectors Supplement" Unicode character
1724          * block.
1725          * @since 1.5
1726          */
1727         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1728             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1729                              "VARIATION SELECTORS SUPPLEMENT",
1730                              "VARIATIONSELECTORSSUPPLEMENT");
1731 
1732         /**
1733          * Constant for the "Supplementary Private Use Area-A" Unicode character
1734          * block.
1735          * @since 1.5
1736          */
1737         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1738             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1739                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1740                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1741 
1742         /**
1743          * Constant for the "Supplementary Private Use Area-B" Unicode character
1744          * block.
1745          * @since 1.5
1746          */
1747         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1748             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1749                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1750                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1751 
1752         /**
1753          * Constant for the "High Surrogates" Unicode character block.
1754          * This block represents codepoint values in the high surrogate
1755          * range: U+D800 through U+DB7F
1756          *
1757          * @since 1.5
1758          */
1759         public static final UnicodeBlock HIGH_SURROGATES =
1760             new UnicodeBlock("HIGH_SURROGATES",
1761                              "HIGH SURROGATES",
1762                              "HIGHSURROGATES");
1763 
1764         /**
1765          * Constant for the "High Private Use Surrogates" Unicode character
1766          * block.
1767          * This block represents codepoint values in the private use high
1768          * surrogate range: U+DB80 through U+DBFF
1769          *
1770          * @since 1.5
1771          */
1772         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1773             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1774                              "HIGH PRIVATE USE SURROGATES",
1775                              "HIGHPRIVATEUSESURROGATES");
1776 
1777         /**
1778          * Constant for the "Low Surrogates" Unicode character block.
1779          * This block represents codepoint values in the low surrogate
1780          * range: U+DC00 through U+DFFF
1781          *
1782          * @since 1.5
1783          */
1784         public static final UnicodeBlock LOW_SURROGATES =
1785             new UnicodeBlock("LOW_SURROGATES",
1786                              "LOW SURROGATES",
1787                              "LOWSURROGATES");
1788 
1789         /**
1790          * Constant for the "Arabic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1794             new UnicodeBlock("ARABIC_SUPPLEMENT",
1795                              "ARABIC SUPPLEMENT",
1796                              "ARABICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "NKo" Unicode character block.
1800          * @since 1.7
1801          */
1802         public static final UnicodeBlock NKO =
1803             new UnicodeBlock("NKO");
1804 
1805         /**
1806          * Constant for the "Samaritan" Unicode character block.
1807          * @since 1.7
1808          */
1809         public static final UnicodeBlock SAMARITAN =
1810             new UnicodeBlock("SAMARITAN");
1811 
1812         /**
1813          * Constant for the "Mandaic" Unicode character block.
1814          * @since 1.7
1815          */
1816         public static final UnicodeBlock MANDAIC =
1817             new UnicodeBlock("MANDAIC");
1818 
1819         /**
1820          * Constant for the "Ethiopic Supplement" Unicode character block.
1821          * @since 1.7
1822          */
1823         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1824             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1825                              "ETHIOPIC SUPPLEMENT",
1826                              "ETHIOPICSUPPLEMENT");
1827 
1828         /**
1829          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1830          * Unicode character block.
1831          * @since 1.7
1832          */
1833         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1834             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1835                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1836                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1837 
1838         /**
1839          * Constant for the "New Tai Lue" Unicode character block.
1840          * @since 1.7
1841          */
1842         public static final UnicodeBlock NEW_TAI_LUE =
1843             new UnicodeBlock("NEW_TAI_LUE",
1844                              "NEW TAI LUE",
1845                              "NEWTAILUE");
1846 
1847         /**
1848          * Constant for the "Buginese" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BUGINESE =
1852             new UnicodeBlock("BUGINESE");
1853 
1854         /**
1855          * Constant for the "Tai Tham" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock TAI_THAM =
1859             new UnicodeBlock("TAI_THAM",
1860                              "TAI THAM",
1861                              "TAITHAM");
1862 
1863         /**
1864          * Constant for the "Balinese" Unicode character block.
1865          * @since 1.7
1866          */
1867         public static final UnicodeBlock BALINESE =
1868             new UnicodeBlock("BALINESE");
1869 
1870         /**
1871          * Constant for the "Sundanese" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock SUNDANESE =
1875             new UnicodeBlock("SUNDANESE");
1876 
1877         /**
1878          * Constant for the "Batak" Unicode character block.
1879          * @since 1.7
1880          */
1881         public static final UnicodeBlock BATAK =
1882             new UnicodeBlock("BATAK");
1883 
1884         /**
1885          * Constant for the "Lepcha" Unicode character block.
1886          * @since 1.7
1887          */
1888         public static final UnicodeBlock LEPCHA =
1889             new UnicodeBlock("LEPCHA");
1890 
1891         /**
1892          * Constant for the "Ol Chiki" Unicode character block.
1893          * @since 1.7
1894          */
1895         public static final UnicodeBlock OL_CHIKI =
1896             new UnicodeBlock("OL_CHIKI",
1897                              "OL CHIKI",
1898                              "OLCHIKI");
1899 
1900         /**
1901          * Constant for the "Vedic Extensions" Unicode character block.
1902          * @since 1.7
1903          */
1904         public static final UnicodeBlock VEDIC_EXTENSIONS =
1905             new UnicodeBlock("VEDIC_EXTENSIONS",
1906                              "VEDIC EXTENSIONS",
1907                              "VEDICEXTENSIONS");
1908 
1909         /**
1910          * Constant for the "Phonetic Extensions Supplement" Unicode character
1911          * block.
1912          * @since 1.7
1913          */
1914         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1915             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1916                              "PHONETIC EXTENSIONS SUPPLEMENT",
1917                              "PHONETICEXTENSIONSSUPPLEMENT");
1918 
1919         /**
1920          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1921          * character block.
1922          * @since 1.7
1923          */
1924         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1925             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1926                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1927                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1928 
1929         /**
1930          * Constant for the "Glagolitic" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock GLAGOLITIC =
1934             new UnicodeBlock("GLAGOLITIC");
1935 
1936         /**
1937          * Constant for the "Latin Extended-C" Unicode character block.
1938          * @since 1.7
1939          */
1940         public static final UnicodeBlock LATIN_EXTENDED_C =
1941             new UnicodeBlock("LATIN_EXTENDED_C",
1942                              "LATIN EXTENDED-C",
1943                              "LATINEXTENDED-C");
1944 
1945         /**
1946          * Constant for the "Coptic" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock COPTIC =
1950             new UnicodeBlock("COPTIC");
1951 
1952         /**
1953          * Constant for the "Georgian Supplement" Unicode character block.
1954          * @since 1.7
1955          */
1956         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1957             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1958                              "GEORGIAN SUPPLEMENT",
1959                              "GEORGIANSUPPLEMENT");
1960 
1961         /**
1962          * Constant for the "Tifinagh" Unicode character block.
1963          * @since 1.7
1964          */
1965         public static final UnicodeBlock TIFINAGH =
1966             new UnicodeBlock("TIFINAGH");
1967 
1968         /**
1969          * Constant for the "Ethiopic Extended" Unicode character block.
1970          * @since 1.7
1971          */
1972         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1973             new UnicodeBlock("ETHIOPIC_EXTENDED",
1974                              "ETHIOPIC EXTENDED",
1975                              "ETHIOPICEXTENDED");
1976 
1977         /**
1978          * Constant for the "Cyrillic Extended-A" Unicode character block.
1979          * @since 1.7
1980          */
1981         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1982             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1983                              "CYRILLIC EXTENDED-A",
1984                              "CYRILLICEXTENDED-A");
1985 
1986         /**
1987          * Constant for the "Supplemental Punctuation" Unicode character block.
1988          * @since 1.7
1989          */
1990         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1991             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1992                              "SUPPLEMENTAL PUNCTUATION",
1993                              "SUPPLEMENTALPUNCTUATION");
1994 
1995         /**
1996          * Constant for the "CJK Strokes" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock CJK_STROKES =
2000             new UnicodeBlock("CJK_STROKES",
2001                              "CJK STROKES",
2002                              "CJKSTROKES");
2003 
2004         /**
2005          * Constant for the "Lisu" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock LISU =
2009             new UnicodeBlock("LISU");
2010 
2011         /**
2012          * Constant for the "Vai" Unicode character block.
2013          * @since 1.7
2014          */
2015         public static final UnicodeBlock VAI =
2016             new UnicodeBlock("VAI");
2017 
2018         /**
2019          * Constant for the "Cyrillic Extended-B" Unicode character block.
2020          * @since 1.7
2021          */
2022         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2023             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2024                              "CYRILLIC EXTENDED-B",
2025                              "CYRILLICEXTENDED-B");
2026 
2027         /**
2028          * Constant for the "Bamum" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock BAMUM =
2032             new UnicodeBlock("BAMUM");
2033 
2034         /**
2035          * Constant for the "Modifier Tone Letters" Unicode character block.
2036          * @since 1.7
2037          */
2038         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2039             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2040                              "MODIFIER TONE LETTERS",
2041                              "MODIFIERTONELETTERS");
2042 
2043         /**
2044          * Constant for the "Latin Extended-D" Unicode character block.
2045          * @since 1.7
2046          */
2047         public static final UnicodeBlock LATIN_EXTENDED_D =
2048             new UnicodeBlock("LATIN_EXTENDED_D",
2049                              "LATIN EXTENDED-D",
2050                              "LATINEXTENDED-D");
2051 
2052         /**
2053          * Constant for the "Syloti Nagri" Unicode character block.
2054          * @since 1.7
2055          */
2056         public static final UnicodeBlock SYLOTI_NAGRI =
2057             new UnicodeBlock("SYLOTI_NAGRI",
2058                              "SYLOTI NAGRI",
2059                              "SYLOTINAGRI");
2060 
2061         /**
2062          * Constant for the "Common Indic Number Forms" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2066             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2067                              "COMMON INDIC NUMBER FORMS",
2068                              "COMMONINDICNUMBERFORMS");
2069 
2070         /**
2071          * Constant for the "Phags-pa" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock PHAGS_PA =
2075             new UnicodeBlock("PHAGS_PA",
2076                              "PHAGS-PA");
2077 
2078         /**
2079          * Constant for the "Saurashtra" Unicode character block.
2080          * @since 1.7
2081          */
2082         public static final UnicodeBlock SAURASHTRA =
2083             new UnicodeBlock("SAURASHTRA");
2084 
2085         /**
2086          * Constant for the "Devanagari Extended" Unicode character block.
2087          * @since 1.7
2088          */
2089         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2090             new UnicodeBlock("DEVANAGARI_EXTENDED",
2091                              "DEVANAGARI EXTENDED",
2092                              "DEVANAGARIEXTENDED");
2093 
2094         /**
2095          * Constant for the "Kayah Li" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock KAYAH_LI =
2099             new UnicodeBlock("KAYAH_LI",
2100                              "KAYAH LI",
2101                              "KAYAHLI");
2102 
2103         /**
2104          * Constant for the "Rejang" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock REJANG =
2108             new UnicodeBlock("REJANG");
2109 
2110         /**
2111          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2112          * @since 1.7
2113          */
2114         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2115             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2116                              "HANGUL JAMO EXTENDED-A",
2117                              "HANGULJAMOEXTENDED-A");
2118 
2119         /**
2120          * Constant for the "Javanese" Unicode character block.
2121          * @since 1.7
2122          */
2123         public static final UnicodeBlock JAVANESE =
2124             new UnicodeBlock("JAVANESE");
2125 
2126         /**
2127          * Constant for the "Cham" Unicode character block.
2128          * @since 1.7
2129          */
2130         public static final UnicodeBlock CHAM =
2131             new UnicodeBlock("CHAM");
2132 
2133         /**
2134          * Constant for the "Myanmar Extended-A" Unicode character block.
2135          * @since 1.7
2136          */
2137         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2138             new UnicodeBlock("MYANMAR_EXTENDED_A",
2139                              "MYANMAR EXTENDED-A",
2140                              "MYANMAREXTENDED-A");
2141 
2142         /**
2143          * Constant for the "Tai Viet" Unicode character block.
2144          * @since 1.7
2145          */
2146         public static final UnicodeBlock TAI_VIET =
2147             new UnicodeBlock("TAI_VIET",
2148                              "TAI VIET",
2149                              "TAIVIET");
2150 
2151         /**
2152          * Constant for the "Ethiopic Extended-A" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2156             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2157                              "ETHIOPIC EXTENDED-A",
2158                              "ETHIOPICEXTENDED-A");
2159 
2160         /**
2161          * Constant for the "Meetei Mayek" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock MEETEI_MAYEK =
2165             new UnicodeBlock("MEETEI_MAYEK",
2166                              "MEETEI MAYEK",
2167                              "MEETEIMAYEK");
2168 
2169         /**
2170          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2174             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2175                              "HANGUL JAMO EXTENDED-B",
2176                              "HANGULJAMOEXTENDED-B");
2177 
2178         /**
2179          * Constant for the "Vertical Forms" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock VERTICAL_FORMS =
2183             new UnicodeBlock("VERTICAL_FORMS",
2184                              "VERTICAL FORMS",
2185                              "VERTICALFORMS");
2186 
2187         /**
2188          * Constant for the "Ancient Greek Numbers" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2192             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2193                              "ANCIENT GREEK NUMBERS",
2194                              "ANCIENTGREEKNUMBERS");
2195 
2196         /**
2197          * Constant for the "Ancient Symbols" Unicode character block.
2198          * @since 1.7
2199          */
2200         public static final UnicodeBlock ANCIENT_SYMBOLS =
2201             new UnicodeBlock("ANCIENT_SYMBOLS",
2202                              "ANCIENT SYMBOLS",
2203                              "ANCIENTSYMBOLS");
2204 
2205         /**
2206          * Constant for the "Phaistos Disc" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock PHAISTOS_DISC =
2210             new UnicodeBlock("PHAISTOS_DISC",
2211                              "PHAISTOS DISC",
2212                              "PHAISTOSDISC");
2213 
2214         /**
2215          * Constant for the "Lycian" Unicode character block.
2216          * @since 1.7
2217          */
2218         public static final UnicodeBlock LYCIAN =
2219             new UnicodeBlock("LYCIAN");
2220 
2221         /**
2222          * Constant for the "Carian" Unicode character block.
2223          * @since 1.7
2224          */
2225         public static final UnicodeBlock CARIAN =
2226             new UnicodeBlock("CARIAN");
2227 
2228         /**
2229          * Constant for the "Old Persian" Unicode character block.
2230          * @since 1.7
2231          */
2232         public static final UnicodeBlock OLD_PERSIAN =
2233             new UnicodeBlock("OLD_PERSIAN",
2234                              "OLD PERSIAN",
2235                              "OLDPERSIAN");
2236 
2237         /**
2238          * Constant for the "Imperial Aramaic" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2242             new UnicodeBlock("IMPERIAL_ARAMAIC",
2243                              "IMPERIAL ARAMAIC",
2244                              "IMPERIALARAMAIC");
2245 
2246         /**
2247          * Constant for the "Phoenician" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock PHOENICIAN =
2251             new UnicodeBlock("PHOENICIAN");
2252 
2253         /**
2254          * Constant for the "Lydian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock LYDIAN =
2258             new UnicodeBlock("LYDIAN");
2259 
2260         /**
2261          * Constant for the "Kharoshthi" Unicode character block.
2262          * @since 1.7
2263          */
2264         public static final UnicodeBlock KHAROSHTHI =
2265             new UnicodeBlock("KHAROSHTHI");
2266 
2267         /**
2268          * Constant for the "Old South Arabian" Unicode character block.
2269          * @since 1.7
2270          */
2271         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2272             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2273                              "OLD SOUTH ARABIAN",
2274                              "OLDSOUTHARABIAN");
2275 
2276         /**
2277          * Constant for the "Avestan" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock AVESTAN =
2281             new UnicodeBlock("AVESTAN");
2282 
2283         /**
2284          * Constant for the "Inscriptional Parthian" Unicode character block.
2285          * @since 1.7
2286          */
2287         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2288             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2289                              "INSCRIPTIONAL PARTHIAN",
2290                              "INSCRIPTIONALPARTHIAN");
2291 
2292         /**
2293          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2294          * @since 1.7
2295          */
2296         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2297             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2298                              "INSCRIPTIONAL PAHLAVI",
2299                              "INSCRIPTIONALPAHLAVI");
2300 
2301         /**
2302          * Constant for the "Old Turkic" Unicode character block.
2303          * @since 1.7
2304          */
2305         public static final UnicodeBlock OLD_TURKIC =
2306             new UnicodeBlock("OLD_TURKIC",
2307                              "OLD TURKIC",
2308                              "OLDTURKIC");
2309 
2310         /**
2311          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2312          * @since 1.7
2313          */
2314         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2315             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2316                              "RUMI NUMERAL SYMBOLS",
2317                              "RUMINUMERALSYMBOLS");
2318 
2319         /**
2320          * Constant for the "Brahmi" Unicode character block.
2321          * @since 1.7
2322          */
2323         public static final UnicodeBlock BRAHMI =
2324             new UnicodeBlock("BRAHMI");
2325 
2326         /**
2327          * Constant for the "Kaithi" Unicode character block.
2328          * @since 1.7
2329          */
2330         public static final UnicodeBlock KAITHI =
2331             new UnicodeBlock("KAITHI");
2332 
2333         /**
2334          * Constant for the "Cuneiform" Unicode character block.
2335          * @since 1.7
2336          */
2337         public static final UnicodeBlock CUNEIFORM =
2338             new UnicodeBlock("CUNEIFORM");
2339 
2340         /**
2341          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2342          * character block.
2343          * @since 1.7
2344          */
2345         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2346             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2347                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2348                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2349 
2350         /**
2351          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2352          * @since 1.7
2353          */
2354         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2355             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2356                              "EGYPTIAN HIEROGLYPHS",
2357                              "EGYPTIANHIEROGLYPHS");
2358 
2359         /**
2360          * Constant for the "Bamum Supplement" Unicode character block.
2361          * @since 1.7
2362          */
2363         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2364             new UnicodeBlock("BAMUM_SUPPLEMENT",
2365                              "BAMUM SUPPLEMENT",
2366                              "BAMUMSUPPLEMENT");
2367 
2368         /**
2369          * Constant for the "Kana Supplement" Unicode character block.
2370          * @since 1.7
2371          */
2372         public static final UnicodeBlock KANA_SUPPLEMENT =
2373             new UnicodeBlock("KANA_SUPPLEMENT",
2374                              "KANA SUPPLEMENT",
2375                              "KANASUPPLEMENT");
2376 
2377         /**
2378          * Constant for the "Ancient Greek Musical Notation" Unicode character
2379          * block.
2380          * @since 1.7
2381          */
2382         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2383             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2384                              "ANCIENT GREEK MUSICAL NOTATION",
2385                              "ANCIENTGREEKMUSICALNOTATION");
2386 
2387         /**
2388          * Constant for the "Counting Rod Numerals" Unicode character block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2392             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2393                              "COUNTING ROD NUMERALS",
2394                              "COUNTINGRODNUMERALS");
2395 
2396         /**
2397          * Constant for the "Mahjong Tiles" Unicode character block.
2398          * @since 1.7
2399          */
2400         public static final UnicodeBlock MAHJONG_TILES =
2401             new UnicodeBlock("MAHJONG_TILES",
2402                              "MAHJONG TILES",
2403                              "MAHJONGTILES");
2404 
2405         /**
2406          * Constant for the "Domino Tiles" Unicode character block.
2407          * @since 1.7
2408          */
2409         public static final UnicodeBlock DOMINO_TILES =
2410             new UnicodeBlock("DOMINO_TILES",
2411                              "DOMINO TILES",
2412                              "DOMINOTILES");
2413 
2414         /**
2415          * Constant for the "Playing Cards" Unicode character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock PLAYING_CARDS =
2419             new UnicodeBlock("PLAYING_CARDS",
2420                              "PLAYING CARDS",
2421                              "PLAYINGCARDS");
2422 
2423         /**
2424          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2425          * block.
2426          * @since 1.7
2427          */
2428         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2429             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2430                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2431                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2432 
2433         /**
2434          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2435          * block.
2436          * @since 1.7
2437          */
2438         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2439             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2440                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2441                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2442 
2443         /**
2444          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2445          * character block.
2446          * @since 1.7
2447          */
2448         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2449             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2450                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2451                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2452 
2453         /**
2454          * Constant for the "Emoticons" Unicode character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock EMOTICONS =
2458             new UnicodeBlock("EMOTICONS");
2459 
2460         /**
2461          * Constant for the "Transport And Map Symbols" Unicode character block.
2462          * @since 1.7
2463          */
2464         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2465             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2466                              "TRANSPORT AND MAP SYMBOLS",
2467                              "TRANSPORTANDMAPSYMBOLS");
2468 
2469         /**
2470          * Constant for the "Alchemical Symbols" Unicode character block.
2471          * @since 1.7
2472          */
2473         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2474             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2475                              "ALCHEMICAL SYMBOLS",
2476                              "ALCHEMICALSYMBOLS");
2477 
2478         /**
2479          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2480          * character block.
2481          * @since 1.7
2482          */
2483         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2484             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2485                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2486                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2487 
2488         /**
2489          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2490          * character block.
2491          * @since 1.7
2492          */
2493         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2494             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2495                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2496                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2497 
2498         /**
2499          * Constant for the "Arabic Extended-A" Unicode character block.
2500          * @since 1.8
2501          */
2502         public static final UnicodeBlock ARABIC_EXTENDED_A =
2503             new UnicodeBlock("ARABIC_EXTENDED_A",
2504                              "ARABIC EXTENDED-A",
2505                              "ARABICEXTENDED-A");
2506 
2507         /**
2508          * Constant for the "Sundanese Supplement" Unicode character block.
2509          * @since 1.8
2510          */
2511         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2512             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2513                              "SUNDANESE SUPPLEMENT",
2514                              "SUNDANESESUPPLEMENT");
2515 
2516         /**
2517          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2518          * @since 1.8
2519          */
2520         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2521             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2522                              "MEETEI MAYEK EXTENSIONS",
2523                              "MEETEIMAYEKEXTENSIONS");
2524 
2525         /**
2526          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2527          * @since 1.8
2528          */
2529         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2530             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2531                              "MEROITIC HIEROGLYPHS",
2532                              "MEROITICHIEROGLYPHS");
2533 
2534         /**
2535          * Constant for the "Meroitic Cursive" Unicode character block.
2536          * @since 1.8
2537          */
2538         public static final UnicodeBlock MEROITIC_CURSIVE =
2539             new UnicodeBlock("MEROITIC_CURSIVE",
2540                              "MEROITIC CURSIVE",
2541                              "MEROITICCURSIVE");
2542 
2543         /**
2544          * Constant for the "Sora Sompeng" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock SORA_SOMPENG =
2548             new UnicodeBlock("SORA_SOMPENG",
2549                              "SORA SOMPENG",
2550                              "SORASOMPENG");
2551 
2552         /**
2553          * Constant for the "Chakma" Unicode character block.
2554          * @since 1.8
2555          */
2556         public static final UnicodeBlock CHAKMA =
2557             new UnicodeBlock("CHAKMA");
2558 
2559         /**
2560          * Constant for the "Sharada" Unicode character block.
2561          * @since 1.8
2562          */
2563         public static final UnicodeBlock SHARADA =
2564             new UnicodeBlock("SHARADA");
2565 
2566         /**
2567          * Constant for the "Takri" Unicode character block.
2568          * @since 1.8
2569          */
2570         public static final UnicodeBlock TAKRI =
2571             new UnicodeBlock("TAKRI");
2572 
2573         /**
2574          * Constant for the "Miao" Unicode character block.
2575          * @since 1.8
2576          */
2577         public static final UnicodeBlock MIAO =
2578             new UnicodeBlock("MIAO");
2579 
2580         /**
2581          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2582          * character block.
2583          * @since 1.8
2584          */
2585         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2586             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2587                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2588                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2589 
2590         /**
2591          * Constant for the "Combining Diacritical Marks Extended" Unicode
2592          * character block.
2593          * @since 1.9
2594          */
2595         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2596             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2597                              "COMBINING DIACRITICAL MARKS EXTENDED",
2598                              "COMBININGDIACRITICALMARKSEXTENDED");
2599 
2600         /**
2601          * Constant for the "Myanmar Extended-B" Unicode character block.
2602          * @since 1.9
2603          */
2604         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2605             new UnicodeBlock("MYANMAR_EXTENDED_B",
2606                              "MYANMAR EXTENDED-B",
2607                              "MYANMAREXTENDED-B");
2608 
2609         /**
2610          * Constant for the "Latin Extended-E" Unicode character block.
2611          * @since 1.9
2612          */
2613         public static final UnicodeBlock LATIN_EXTENDED_E =
2614             new UnicodeBlock("LATIN_EXTENDED_E",
2615                              "LATIN EXTENDED-E",
2616                              "LATINEXTENDED-E");
2617 
2618         /**
2619          * Constant for the "Coptic Epact Numbers" Unicode character block.
2620          * @since 1.9
2621          */
2622         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2623             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2624                              "COPTIC EPACT NUMBERS",
2625                              "COPTICEPACTNUMBERS");
2626 
2627         /**
2628          * Constant for the "Old Permic" Unicode character block.
2629          * @since 1.9
2630          */
2631         public static final UnicodeBlock OLD_PERMIC =
2632             new UnicodeBlock("OLD_PERMIC",
2633                              "OLD PERMIC",
2634                              "OLDPERMIC");
2635 
2636         /**
2637          * Constant for the "Elbasan" Unicode character block.
2638          * @since 1.9
2639          */
2640         public static final UnicodeBlock ELBASAN =
2641             new UnicodeBlock("ELBASAN");
2642 
2643         /**
2644          * Constant for the "Caucasian Albanian" Unicode character block.
2645          * @since 1.9
2646          */
2647         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2648             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2649                              "CAUCASIAN ALBANIAN",
2650                              "CAUCASIANALBANIAN");
2651 
2652         /**
2653          * Constant for the "Linear A" Unicode character block.
2654          * @since 1.9
2655          */
2656         public static final UnicodeBlock LINEAR_A =
2657             new UnicodeBlock("LINEAR_A",
2658                              "LINEAR A",
2659                              "LINEARA");
2660 
2661         /**
2662          * Constant for the "Palmyrene" Unicode character block.
2663          * @since 1.9
2664          */
2665         public static final UnicodeBlock PALMYRENE =
2666             new UnicodeBlock("PALMYRENE");
2667 
2668         /**
2669          * Constant for the "Nabataean" Unicode character block.
2670          * @since 1.9
2671          */
2672         public static final UnicodeBlock NABATAEAN =
2673             new UnicodeBlock("NABATAEAN");
2674 
2675         /**
2676          * Constant for the "Old North Arabian" Unicode character block.
2677          * @since 1.9
2678          */
2679         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2680             new UnicodeBlock("OLD_NORTH_ARABIAN",
2681                              "OLD NORTH ARABIAN",
2682                              "OLDNORTHARABIAN");
2683 
2684         /**
2685          * Constant for the "Manichaean" Unicode character block.
2686          * @since 1.9
2687          */
2688         public static final UnicodeBlock MANICHAEAN =
2689             new UnicodeBlock("MANICHAEAN");
2690 
2691         /**
2692          * Constant for the "Psalter Pahlavi" Unicode character block.
2693          * @since 1.9
2694          */
2695         public static final UnicodeBlock PSALTER_PAHLAVI =
2696             new UnicodeBlock("PSALTER_PAHLAVI",
2697                              "PSALTER PAHLAVI",
2698                              "PSALTERPAHLAVI");
2699 
2700         /**
2701          * Constant for the "Mahajani" Unicode character block.
2702          * @since 1.9
2703          */
2704         public static final UnicodeBlock MAHAJANI =
2705             new UnicodeBlock("MAHAJANI");
2706 
2707         /**
2708          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2709          * @since 1.9
2710          */
2711         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2712             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2713                              "SINHALA ARCHAIC NUMBERS",
2714                              "SINHALAARCHAICNUMBERS");
2715 
2716         /**
2717          * Constant for the "Khojki" Unicode character block.
2718          * @since 1.9
2719          */
2720         public static final UnicodeBlock KHOJKI =
2721             new UnicodeBlock("KHOJKI");
2722 
2723         /**
2724          * Constant for the "Khudawadi" Unicode character block.
2725          * @since 1.9
2726          */
2727         public static final UnicodeBlock KHUDAWADI =
2728             new UnicodeBlock("KHUDAWADI");
2729 
2730         /**
2731          * Constant for the "Grantha" Unicode character block.
2732          * @since 1.9
2733          */
2734         public static final UnicodeBlock GRANTHA =
2735             new UnicodeBlock("GRANTHA");
2736 
2737         /**
2738          * Constant for the "Tirhuta" Unicode character block.
2739          * @since 1.9
2740          */
2741         public static final UnicodeBlock TIRHUTA =
2742             new UnicodeBlock("TIRHUTA");
2743 
2744         /**
2745          * Constant for the "Siddham" Unicode character block.
2746          * @since 1.9
2747          */
2748         public static final UnicodeBlock SIDDHAM =
2749             new UnicodeBlock("SIDDHAM");
2750 
2751         /**
2752          * Constant for the "Modi" Unicode character block.
2753          * @since 1.9
2754          */
2755         public static final UnicodeBlock MODI =
2756             new UnicodeBlock("MODI");
2757 
2758         /**
2759          * Constant for the "Warang Citi" Unicode character block.
2760          * @since 1.9
2761          */
2762         public static final UnicodeBlock WARANG_CITI =
2763             new UnicodeBlock("WARANG_CITI",
2764                              "WARANG CITI",
2765                              "WARANGCITI");
2766 
2767         /**
2768          * Constant for the "Pau Cin Hau" Unicode character block.
2769          * @since 1.9
2770          */
2771         public static final UnicodeBlock PAU_CIN_HAU =
2772             new UnicodeBlock("PAU_CIN_HAU",
2773                              "PAU CIN HAU",
2774                              "PAUCINHAU");
2775 
2776         /**
2777          * Constant for the "Mro" Unicode character block.
2778          * @since 1.9
2779          */
2780         public static final UnicodeBlock MRO =
2781             new UnicodeBlock("MRO");
2782 
2783         /**
2784          * Constant for the "Bassa Vah" Unicode character block.
2785          * @since 1.9
2786          */
2787         public static final UnicodeBlock BASSA_VAH =
2788             new UnicodeBlock("BASSA_VAH",
2789                              "BASSA VAH",
2790                              "BASSAVAH");
2791 
2792         /**
2793          * Constant for the "Pahawh Hmong" Unicode character block.
2794          * @since 1.9
2795          */
2796         public static final UnicodeBlock PAHAWH_HMONG =
2797             new UnicodeBlock("PAHAWH_HMONG",
2798                              "PAHAWH HMONG",
2799                              "PAHAWHHMONG");
2800 
2801         /**
2802          * Constant for the "Duployan" Unicode character block.
2803          * @since 1.9
2804          */
2805         public static final UnicodeBlock DUPLOYAN =
2806             new UnicodeBlock("DUPLOYAN");
2807 
2808         /**
2809          * Constant for the "Shorthand Format Controls" Unicode character block.
2810          * @since 1.9
2811          */
2812         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2813             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2814                              "SHORTHAND FORMAT CONTROLS",
2815                              "SHORTHANDFORMATCONTROLS");
2816 
2817         /**
2818          * Constant for the "Mende Kikakui" Unicode character block.
2819          * @since 1.9
2820          */
2821         public static final UnicodeBlock MENDE_KIKAKUI =
2822             new UnicodeBlock("MENDE_KIKAKUI",
2823                              "MENDE KIKAKUI",
2824                              "MENDEKIKAKUI");
2825 
2826         /**
2827          * Constant for the "Ornamental Dingbats" Unicode character block.
2828          * @since 1.9
2829          */
2830         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2831             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2832                              "ORNAMENTAL DINGBATS",
2833                              "ORNAMENTALDINGBATS");
2834 
2835         /**
2836          * Constant for the "Geometric Shapes Extended" Unicode character block.
2837          * @since 1.9
2838          */
2839         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2840             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2841                              "GEOMETRIC SHAPES EXTENDED",
2842                              "GEOMETRICSHAPESEXTENDED");
2843 
2844         /**
2845          * Constant for the "Supplemental Arrows-C" Unicode character block.
2846          * @since 1.9
2847          */
2848         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2849             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2850                              "SUPPLEMENTAL ARROWS-C",
2851                              "SUPPLEMENTALARROWS-C");
2852 
2853         private static final int blockStarts[] = {
2854             0x0000,   // 0000..007F; Basic Latin
2855             0x0080,   // 0080..00FF; Latin-1 Supplement
2856             0x0100,   // 0100..017F; Latin Extended-A
2857             0x0180,   // 0180..024F; Latin Extended-B
2858             0x0250,   // 0250..02AF; IPA Extensions
2859             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2860             0x0300,   // 0300..036F; Combining Diacritical Marks
2861             0x0370,   // 0370..03FF; Greek and Coptic
2862             0x0400,   // 0400..04FF; Cyrillic
2863             0x0500,   // 0500..052F; Cyrillic Supplement
2864             0x0530,   // 0530..058F; Armenian
2865             0x0590,   // 0590..05FF; Hebrew
2866             0x0600,   // 0600..06FF; Arabic
2867             0x0700,   // 0700..074F; Syriac
2868             0x0750,   // 0750..077F; Arabic Supplement
2869             0x0780,   // 0780..07BF; Thaana
2870             0x07C0,   // 07C0..07FF; NKo
2871             0x0800,   // 0800..083F; Samaritan
2872             0x0840,   // 0840..085F; Mandaic
2873             0x0860,   //             unassigned
2874             0x08A0,   // 08A0..08FF; Arabic Extended-A
2875             0x0900,   // 0900..097F; Devanagari
2876             0x0980,   // 0980..09FF; Bengali
2877             0x0A00,   // 0A00..0A7F; Gurmukhi
2878             0x0A80,   // 0A80..0AFF; Gujarati
2879             0x0B00,   // 0B00..0B7F; Oriya
2880             0x0B80,   // 0B80..0BFF; Tamil
2881             0x0C00,   // 0C00..0C7F; Telugu
2882             0x0C80,   // 0C80..0CFF; Kannada
2883             0x0D00,   // 0D00..0D7F; Malayalam
2884             0x0D80,   // 0D80..0DFF; Sinhala
2885             0x0E00,   // 0E00..0E7F; Thai
2886             0x0E80,   // 0E80..0EFF; Lao
2887             0x0F00,   // 0F00..0FFF; Tibetan
2888             0x1000,   // 1000..109F; Myanmar
2889             0x10A0,   // 10A0..10FF; Georgian
2890             0x1100,   // 1100..11FF; Hangul Jamo
2891             0x1200,   // 1200..137F; Ethiopic
2892             0x1380,   // 1380..139F; Ethiopic Supplement
2893             0x13A0,   // 13A0..13FF; Cherokee
2894             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2895             0x1680,   // 1680..169F; Ogham
2896             0x16A0,   // 16A0..16FF; Runic
2897             0x1700,   // 1700..171F; Tagalog
2898             0x1720,   // 1720..173F; Hanunoo
2899             0x1740,   // 1740..175F; Buhid
2900             0x1760,   // 1760..177F; Tagbanwa
2901             0x1780,   // 1780..17FF; Khmer
2902             0x1800,   // 1800..18AF; Mongolian
2903             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2904             0x1900,   // 1900..194F; Limbu
2905             0x1950,   // 1950..197F; Tai Le
2906             0x1980,   // 1980..19DF; New Tai Lue
2907             0x19E0,   // 19E0..19FF; Khmer Symbols
2908             0x1A00,   // 1A00..1A1F; Buginese
2909             0x1A20,   // 1A20..1AAF; Tai Tham
2910             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
2911             0x1B00,   // 1B00..1B7F; Balinese
2912             0x1B80,   // 1B80..1BBF; Sundanese
2913             0x1BC0,   // 1BC0..1BFF; Batak
2914             0x1C00,   // 1C00..1C4F; Lepcha
2915             0x1C50,   // 1C50..1C7F; Ol Chiki
2916             0x1C80,   //             unassigned
2917             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2918             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2919             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2920             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2921             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2922             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2923             0x1F00,   // 1F00..1FFF; Greek Extended
2924             0x2000,   // 2000..206F; General Punctuation
2925             0x2070,   // 2070..209F; Superscripts and Subscripts
2926             0x20A0,   // 20A0..20CF; Currency Symbols
2927             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2928             0x2100,   // 2100..214F; Letterlike Symbols
2929             0x2150,   // 2150..218F; Number Forms
2930             0x2190,   // 2190..21FF; Arrows
2931             0x2200,   // 2200..22FF; Mathematical Operators
2932             0x2300,   // 2300..23FF; Miscellaneous Technical
2933             0x2400,   // 2400..243F; Control Pictures
2934             0x2440,   // 2440..245F; Optical Character Recognition
2935             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2936             0x2500,   // 2500..257F; Box Drawing
2937             0x2580,   // 2580..259F; Block Elements
2938             0x25A0,   // 25A0..25FF; Geometric Shapes
2939             0x2600,   // 2600..26FF; Miscellaneous Symbols
2940             0x2700,   // 2700..27BF; Dingbats
2941             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2942             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2943             0x2800,   // 2800..28FF; Braille Patterns
2944             0x2900,   // 2900..297F; Supplemental Arrows-B
2945             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2946             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2947             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2948             0x2C00,   // 2C00..2C5F; Glagolitic
2949             0x2C60,   // 2C60..2C7F; Latin Extended-C
2950             0x2C80,   // 2C80..2CFF; Coptic
2951             0x2D00,   // 2D00..2D2F; Georgian Supplement
2952             0x2D30,   // 2D30..2D7F; Tifinagh
2953             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2954             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2955             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2956             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2957             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2958             0x2FE0,   //             unassigned
2959             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2960             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2961             0x3040,   // 3040..309F; Hiragana
2962             0x30A0,   // 30A0..30FF; Katakana
2963             0x3100,   // 3100..312F; Bopomofo
2964             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2965             0x3190,   // 3190..319F; Kanbun
2966             0x31A0,   // 31A0..31BF; Bopomofo Extended
2967             0x31C0,   // 31C0..31EF; CJK Strokes
2968             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2969             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2970             0x3300,   // 3300..33FF; CJK Compatibility
2971             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2972             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2973             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2974             0xA000,   // A000..A48F; Yi Syllables
2975             0xA490,   // A490..A4CF; Yi Radicals
2976             0xA4D0,   // A4D0..A4FF; Lisu
2977             0xA500,   // A500..A63F; Vai
2978             0xA640,   // A640..A69F; Cyrillic Extended-B
2979             0xA6A0,   // A6A0..A6FF; Bamum
2980             0xA700,   // A700..A71F; Modifier Tone Letters
2981             0xA720,   // A720..A7FF; Latin Extended-D
2982             0xA800,   // A800..A82F; Syloti Nagri
2983             0xA830,   // A830..A83F; Common Indic Number Forms
2984             0xA840,   // A840..A87F; Phags-pa
2985             0xA880,   // A880..A8DF; Saurashtra
2986             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2987             0xA900,   // A900..A92F; Kayah Li
2988             0xA930,   // A930..A95F; Rejang
2989             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2990             0xA980,   // A980..A9DF; Javanese
2991             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
2992             0xAA00,   // AA00..AA5F; Cham
2993             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2994             0xAA80,   // AA80..AADF; Tai Viet
2995             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2996             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2997             0xAB30,   // AB30..AB6F; Latin Extended-E
2998             0xAB70,   //             unassigned
2999             0xABC0,   // ABC0..ABFF; Meetei Mayek
3000             0xAC00,   // AC00..D7AF; Hangul Syllables
3001             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3002             0xD800,   // D800..DB7F; High Surrogates
3003             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3004             0xDC00,   // DC00..DFFF; Low Surrogates
3005             0xE000,   // E000..F8FF; Private Use Area
3006             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3007             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3008             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3009             0xFE00,   // FE00..FE0F; Variation Selectors
3010             0xFE10,   // FE10..FE1F; Vertical Forms
3011             0xFE20,   // FE20..FE2F; Combining Half Marks
3012             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3013             0xFE50,   // FE50..FE6F; Small Form Variants
3014             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3015             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3016             0xFFF0,   // FFF0..FFFF; Specials
3017             0x10000,  // 10000..1007F; Linear B Syllabary
3018             0x10080,  // 10080..100FF; Linear B Ideograms
3019             0x10100,  // 10100..1013F; Aegean Numbers
3020             0x10140,  // 10140..1018F; Ancient Greek Numbers
3021             0x10190,  // 10190..101CF; Ancient Symbols
3022             0x101D0,  // 101D0..101FF; Phaistos Disc
3023             0x10200,  //               unassigned
3024             0x10280,  // 10280..1029F; Lycian
3025             0x102A0,  // 102A0..102DF; Carian
3026             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3027             0x10300,  // 10300..1032F; Old Italic
3028             0x10330,  // 10330..1034F; Gothic
3029             0x10350,  // 10350..1037F; Old Permic
3030             0x10380,  // 10380..1039F; Ugaritic
3031             0x103A0,  // 103A0..103DF; Old Persian
3032             0x103E0,  //               unassigned
3033             0x10400,  // 10400..1044F; Deseret
3034             0x10450,  // 10450..1047F; Shavian
3035             0x10480,  // 10480..104AF; Osmanya
3036             0x104B0,  //               unassigned
3037             0x10500,  // 10500..1052F; Elbasan
3038             0x10530,  // 10530..1056F; Caucasian Albanian
3039             0x10570,  //               unassigned
3040             0x10600,  // 10600..1077F; Linear A
3041             0x10780,  //               unassigned
3042             0x10800,  // 10800..1083F; Cypriot Syllabary
3043             0x10840,  // 10840..1085F; Imperial Aramaic
3044             0x10860,  // 10860..1087F; Palmyrene
3045             0x10880,  // 10880..108AF; Nabataean
3046             0x108B0,  //               unassigned
3047             0x10900,  // 10900..1091F; Phoenician
3048             0x10920,  // 10920..1093F; Lydian
3049             0x10940,  //               unassigned
3050             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3051             0x109A0,  // 109A0..109FF; Meroitic Cursive
3052             0x10A00,  // 10A00..10A5F; Kharoshthi
3053             0x10A60,  // 10A60..10A7F; Old South Arabian
3054             0x10A80,  // 10A80..10A9F; Old North Arabian
3055             0x10AA0,  //               unassigned
3056             0x10AC0,  // 10AC0..10AFF; Manichaean
3057             0x10B00,  // 10B00..10B3F; Avestan
3058             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3059             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3060             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3061             0x10BB0,  //               unassigned
3062             0x10C00,  // 10C00..10C4F; Old Turkic
3063             0x10C50,  //               unassigned
3064             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3065             0x10E80,  //               unassigned
3066             0x11000,  // 11000..1107F; Brahmi
3067             0x11080,  // 11080..110CF; Kaithi
3068             0x110D0,  // 110D0..110FF; Sora Sompeng
3069             0x11100,  // 11100..1114F; Chakma
3070             0x11150,  // 11150..1117F; Mahajani
3071             0x11180,  // 11180..111DF; Sharada
3072             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3073             0x11200,  // 11200..1124F; Khojki
3074             0x11250,  //               unassigned
3075             0x112B0,  // 112B0..112FF; Khudawadi
3076             0x11300,  // 11300..1137F; Grantha
3077             0x11380,  //               unassigned
3078             0x11480,  // 11480..114DF; Tirhuta
3079             0x114E0,  //               unassigned
3080             0x11580,  // 11580..115FF; Siddham
3081             0x11600,  // 11600..1165F; Modi
3082             0x11660,  //               unassigned
3083             0x11680,  // 11680..116CF; Takri
3084             0x116D0,  //               unassigned
3085             0x118A0,  // 118A0..118FF; Warang Citi
3086             0x11900,  //               unassigned
3087             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3088             0x11B00,  //               unassigned
3089             0x12000,  // 12000..123FF; Cuneiform
3090             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3091             0x12480,  //               unassigned
3092             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3093             0x13430,  //               unassigned
3094             0x16800,  // 16800..16A3F; Bamum Supplement
3095             0x16A40,  // 16A40..16A6F; Mro
3096             0x16A70,  //               unassigned
3097             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3098             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3099             0x16B90,  //               unassigned
3100             0x16F00,  // 16F00..16F9F; Miao
3101             0x16FA0,  //               unassigned
3102             0x1B000,  // 1B000..1B0FF; Kana Supplement
3103             0x1B100,  //               unassigned
3104             0x1BC00,  // 1BC00..1BC9F; Duployan
3105             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3106             0x1BCB0,  //               unassigned
3107             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3108             0x1D100,  // 1D100..1D1FF; Musical Symbols
3109             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3110             0x1D250,  //               unassigned
3111             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3112             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3113             0x1D380,  //               unassigned
3114             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3115             0x1D800,  //               unassigned
3116             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3117             0x1E8E0,  //               unassigned
3118             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3119             0x1EF00,  //               unassigned
3120             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3121             0x1F030,  // 1F030..1F09F; Domino Tiles
3122             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3123             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3124             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3125             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
3126             0x1F600,  // 1F600..1F64F; Emoticons
3127             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3128             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
3129             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3130             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3131             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3132             0x1F900,  //               unassigned
3133             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3134             0x2A6E0,  //               unassigned
3135             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3136             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3137             0x2B820,  //               unassigned
3138             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3139             0x2FA20,  //               unassigned
3140             0xE0000,  // E0000..E007F; Tags
3141             0xE0080,  //               unassigned
3142             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3143             0xE01F0,  //               unassigned
3144             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3145             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
3146         };
3147 
3148         private static final UnicodeBlock[] blocks = {
3149             BASIC_LATIN,
3150             LATIN_1_SUPPLEMENT,
3151             LATIN_EXTENDED_A,
3152             LATIN_EXTENDED_B,
3153             IPA_EXTENSIONS,
3154             SPACING_MODIFIER_LETTERS,
3155             COMBINING_DIACRITICAL_MARKS,
3156             GREEK,
3157             CYRILLIC,
3158             CYRILLIC_SUPPLEMENTARY,
3159             ARMENIAN,
3160             HEBREW,
3161             ARABIC,
3162             SYRIAC,
3163             ARABIC_SUPPLEMENT,
3164             THAANA,
3165             NKO,
3166             SAMARITAN,
3167             MANDAIC,
3168             null,
3169             ARABIC_EXTENDED_A,
3170             DEVANAGARI,
3171             BENGALI,
3172             GURMUKHI,
3173             GUJARATI,
3174             ORIYA,
3175             TAMIL,
3176             TELUGU,
3177             KANNADA,
3178             MALAYALAM,
3179             SINHALA,
3180             THAI,
3181             LAO,
3182             TIBETAN,
3183             MYANMAR,
3184             GEORGIAN,
3185             HANGUL_JAMO,
3186             ETHIOPIC,
3187             ETHIOPIC_SUPPLEMENT,
3188             CHEROKEE,
3189             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3190             OGHAM,
3191             RUNIC,
3192             TAGALOG,
3193             HANUNOO,
3194             BUHID,
3195             TAGBANWA,
3196             KHMER,
3197             MONGOLIAN,
3198             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3199             LIMBU,
3200             TAI_LE,
3201             NEW_TAI_LUE,
3202             KHMER_SYMBOLS,
3203             BUGINESE,
3204             TAI_THAM,
3205             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3206             BALINESE,
3207             SUNDANESE,
3208             BATAK,
3209             LEPCHA,
3210             OL_CHIKI,
3211             null,
3212             SUNDANESE_SUPPLEMENT,
3213             VEDIC_EXTENSIONS,
3214             PHONETIC_EXTENSIONS,
3215             PHONETIC_EXTENSIONS_SUPPLEMENT,
3216             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3217             LATIN_EXTENDED_ADDITIONAL,
3218             GREEK_EXTENDED,
3219             GENERAL_PUNCTUATION,
3220             SUPERSCRIPTS_AND_SUBSCRIPTS,
3221             CURRENCY_SYMBOLS,
3222             COMBINING_MARKS_FOR_SYMBOLS,
3223             LETTERLIKE_SYMBOLS,
3224             NUMBER_FORMS,
3225             ARROWS,
3226             MATHEMATICAL_OPERATORS,
3227             MISCELLANEOUS_TECHNICAL,
3228             CONTROL_PICTURES,
3229             OPTICAL_CHARACTER_RECOGNITION,
3230             ENCLOSED_ALPHANUMERICS,
3231             BOX_DRAWING,
3232             BLOCK_ELEMENTS,
3233             GEOMETRIC_SHAPES,
3234             MISCELLANEOUS_SYMBOLS,
3235             DINGBATS,
3236             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3237             SUPPLEMENTAL_ARROWS_A,
3238             BRAILLE_PATTERNS,
3239             SUPPLEMENTAL_ARROWS_B,
3240             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3241             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3242             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3243             GLAGOLITIC,
3244             LATIN_EXTENDED_C,
3245             COPTIC,
3246             GEORGIAN_SUPPLEMENT,
3247             TIFINAGH,
3248             ETHIOPIC_EXTENDED,
3249             CYRILLIC_EXTENDED_A,
3250             SUPPLEMENTAL_PUNCTUATION,
3251             CJK_RADICALS_SUPPLEMENT,
3252             KANGXI_RADICALS,
3253             null,
3254             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3255             CJK_SYMBOLS_AND_PUNCTUATION,
3256             HIRAGANA,
3257             KATAKANA,
3258             BOPOMOFO,
3259             HANGUL_COMPATIBILITY_JAMO,
3260             KANBUN,
3261             BOPOMOFO_EXTENDED,
3262             CJK_STROKES,
3263             KATAKANA_PHONETIC_EXTENSIONS,
3264             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3265             CJK_COMPATIBILITY,
3266             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3267             YIJING_HEXAGRAM_SYMBOLS,
3268             CJK_UNIFIED_IDEOGRAPHS,
3269             YI_SYLLABLES,
3270             YI_RADICALS,
3271             LISU,
3272             VAI,
3273             CYRILLIC_EXTENDED_B,
3274             BAMUM,
3275             MODIFIER_TONE_LETTERS,
3276             LATIN_EXTENDED_D,
3277             SYLOTI_NAGRI,
3278             COMMON_INDIC_NUMBER_FORMS,
3279             PHAGS_PA,
3280             SAURASHTRA,
3281             DEVANAGARI_EXTENDED,
3282             KAYAH_LI,
3283             REJANG,
3284             HANGUL_JAMO_EXTENDED_A,
3285             JAVANESE,
3286             MYANMAR_EXTENDED_B,
3287             CHAM,
3288             MYANMAR_EXTENDED_A,
3289             TAI_VIET,
3290             MEETEI_MAYEK_EXTENSIONS,
3291             ETHIOPIC_EXTENDED_A,
3292             LATIN_EXTENDED_E,
3293             null,
3294             MEETEI_MAYEK,
3295             HANGUL_SYLLABLES,
3296             HANGUL_JAMO_EXTENDED_B,
3297             HIGH_SURROGATES,
3298             HIGH_PRIVATE_USE_SURROGATES,
3299             LOW_SURROGATES,
3300             PRIVATE_USE_AREA,
3301             CJK_COMPATIBILITY_IDEOGRAPHS,
3302             ALPHABETIC_PRESENTATION_FORMS,
3303             ARABIC_PRESENTATION_FORMS_A,
3304             VARIATION_SELECTORS,
3305             VERTICAL_FORMS,
3306             COMBINING_HALF_MARKS,
3307             CJK_COMPATIBILITY_FORMS,
3308             SMALL_FORM_VARIANTS,
3309             ARABIC_PRESENTATION_FORMS_B,
3310             HALFWIDTH_AND_FULLWIDTH_FORMS,
3311             SPECIALS,
3312             LINEAR_B_SYLLABARY,
3313             LINEAR_B_IDEOGRAMS,
3314             AEGEAN_NUMBERS,
3315             ANCIENT_GREEK_NUMBERS,
3316             ANCIENT_SYMBOLS,
3317             PHAISTOS_DISC,
3318             null,
3319             LYCIAN,
3320             CARIAN,
3321             COPTIC_EPACT_NUMBERS,
3322             OLD_ITALIC,
3323             GOTHIC,
3324             OLD_PERMIC,
3325             UGARITIC,
3326             OLD_PERSIAN,
3327             null,
3328             DESERET,
3329             SHAVIAN,
3330             OSMANYA,
3331             null,
3332             ELBASAN,
3333             CAUCASIAN_ALBANIAN,
3334             null,
3335             LINEAR_A,
3336             null,
3337             CYPRIOT_SYLLABARY,
3338             IMPERIAL_ARAMAIC,
3339             PALMYRENE,
3340             NABATAEAN,
3341             null,
3342             PHOENICIAN,
3343             LYDIAN,
3344             null,
3345             MEROITIC_HIEROGLYPHS,
3346             MEROITIC_CURSIVE,
3347             KHAROSHTHI,
3348             OLD_SOUTH_ARABIAN,
3349             OLD_NORTH_ARABIAN,
3350             null,
3351             MANICHAEAN,
3352             AVESTAN,
3353             INSCRIPTIONAL_PARTHIAN,
3354             INSCRIPTIONAL_PAHLAVI,
3355             PSALTER_PAHLAVI,
3356             null,
3357             OLD_TURKIC,
3358             null,
3359             RUMI_NUMERAL_SYMBOLS,
3360             null,
3361             BRAHMI,
3362             KAITHI,
3363             SORA_SOMPENG,
3364             CHAKMA,
3365             MAHAJANI,
3366             SHARADA,
3367             SINHALA_ARCHAIC_NUMBERS,
3368             KHOJKI,
3369             null,
3370             KHUDAWADI,
3371             GRANTHA,
3372             null,
3373             TIRHUTA,
3374             null,
3375             SIDDHAM,
3376             MODI,
3377             null,
3378             TAKRI,
3379             null,
3380             WARANG_CITI,
3381             null,
3382             PAU_CIN_HAU,
3383             null,
3384             CUNEIFORM,
3385             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3386             null,
3387             EGYPTIAN_HIEROGLYPHS,
3388             null,
3389             BAMUM_SUPPLEMENT,
3390             MRO,
3391             null,
3392             BASSA_VAH,
3393             PAHAWH_HMONG,
3394             null,
3395             MIAO,
3396             null,
3397             KANA_SUPPLEMENT,
3398             null,
3399             DUPLOYAN,
3400             SHORTHAND_FORMAT_CONTROLS,
3401             null,
3402             BYZANTINE_MUSICAL_SYMBOLS,
3403             MUSICAL_SYMBOLS,
3404             ANCIENT_GREEK_MUSICAL_NOTATION,
3405             null,
3406             TAI_XUAN_JING_SYMBOLS,
3407             COUNTING_ROD_NUMERALS,
3408             null,
3409             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3410             null,
3411             MENDE_KIKAKUI,
3412             null,
3413             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3414             null,
3415             MAHJONG_TILES,
3416             DOMINO_TILES,
3417             PLAYING_CARDS,
3418             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3419             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3420             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3421             EMOTICONS,
3422             ORNAMENTAL_DINGBATS,
3423             TRANSPORT_AND_MAP_SYMBOLS,
3424             ALCHEMICAL_SYMBOLS,
3425             GEOMETRIC_SHAPES_EXTENDED,
3426             SUPPLEMENTAL_ARROWS_C,
3427             null,
3428             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3429             null,
3430             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3431             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3432             null,
3433             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3434             null,
3435             TAGS,
3436             null,
3437             VARIATION_SELECTORS_SUPPLEMENT,
3438             null,
3439             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3440             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3441         };
3442 
3443 
3444         /**
3445          * Returns the object representing the Unicode block containing the
3446          * given character, or {@code null} if the character is not a
3447          * member of a defined block.
3448          *
3449          * <p><b>Note:</b> This method cannot handle
3450          * <a href="Character.html#supplementary"> supplementary
3451          * characters</a>.  To support all Unicode characters, including
3452          * supplementary characters, use the {@link #of(int)} method.
3453          *
3454          * @param   c  The character in question
3455          * @return  The {@code UnicodeBlock} instance representing the
3456          *          Unicode block of which this character is a member, or
3457          *          {@code null} if the character is not a member of any
3458          *          Unicode block
3459          */
3460         public static UnicodeBlock of(char c) {
3461             return of((int)c);
3462         }
3463 
3464         /**
3465          * Returns the object representing the Unicode block
3466          * containing the given character (Unicode code point), or
3467          * {@code null} if the character is not a member of a
3468          * defined block.
3469          *
3470          * @param   codePoint the character (Unicode code point) in question.
3471          * @return  The {@code UnicodeBlock} instance representing the
3472          *          Unicode block of which this character is a member, or
3473          *          {@code null} if the character is not a member of any
3474          *          Unicode block
3475          * @exception IllegalArgumentException if the specified
3476          * {@code codePoint} is an invalid Unicode code point.
3477          * @see Character#isValidCodePoint(int)
3478          * @since   1.5
3479          */
3480         public static UnicodeBlock of(int codePoint) {
3481             if (!isValidCodePoint(codePoint)) {
3482                 throw new IllegalArgumentException();
3483             }
3484 
3485             int top, bottom, current;
3486             bottom = 0;
3487             top = blockStarts.length;
3488             current = top/2;
3489 
3490             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3491             while (top - bottom > 1) {
3492                 if (codePoint >= blockStarts[current]) {
3493                     bottom = current;
3494                 } else {
3495                     top = current;
3496                 }
3497                 current = (top + bottom) / 2;
3498             }
3499             return blocks[current];
3500         }
3501 
3502         /**
3503          * Returns the UnicodeBlock with the given name. Block
3504          * names are determined by The Unicode Standard. The file
3505          * {@code Blocks-<version>.txt} defines blocks for a particular
3506          * version of the standard. The {@link Character} class specifies
3507          * the version of the standard that it supports.
3508          * <p>
3509          * This method accepts block names in the following forms:
3510          * <ol>
3511          * <li> Canonical block names as defined by the Unicode Standard.
3512          * For example, the standard defines a "Basic Latin" block. Therefore, this
3513          * method accepts "Basic Latin" as a valid block name. The documentation of
3514          * each UnicodeBlock provides the canonical name.
3515          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3516          * is a valid block name for the "Basic Latin" block.
3517          * <li>The text representation of each constant UnicodeBlock identifier.
3518          * For example, this method will return the {@link #BASIC_LATIN} block if
3519          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3520          * hyphens in the canonical name with underscores.
3521          * </ol>
3522          * Finally, character case is ignored for all of the valid block name forms.
3523          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3524          * The en_US locale's case mapping rules are used to provide case-insensitive
3525          * string comparisons for block name validation.
3526          * <p>
3527          * If the Unicode Standard changes block names, both the previous and
3528          * current names will be accepted.
3529          *
3530          * @param blockName A {@code UnicodeBlock} name.
3531          * @return The {@code UnicodeBlock} instance identified
3532          *         by {@code blockName}
3533          * @throws IllegalArgumentException if {@code blockName} is an
3534          *         invalid name
3535          * @throws NullPointerException if {@code blockName} is null
3536          * @since 1.5
3537          */
3538         public static final UnicodeBlock forName(String blockName) {
3539             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3540             if (block == null) {
3541                 throw new IllegalArgumentException();
3542             }
3543             return block;
3544         }
3545     }
3546 
3547 
3548     /**
3549      * A family of character subsets representing the character scripts
3550      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3551      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3552      * character is assigned to a single Unicode script, either a specific
3553      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3554      * one of the following three special values,
3555      * {@link Character.UnicodeScript#INHERITED Inherited},
3556      * {@link Character.UnicodeScript#COMMON Common} or
3557      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3558      *
3559      * @since 1.7
3560      */
3561     public static enum UnicodeScript {
3562         /**
3563          * Unicode script "Common".
3564          */
3565         COMMON,
3566 
3567         /**
3568          * Unicode script "Latin".
3569          */
3570         LATIN,
3571 
3572         /**
3573          * Unicode script "Greek".
3574          */
3575         GREEK,
3576 
3577         /**
3578          * Unicode script "Cyrillic".
3579          */
3580         CYRILLIC,
3581 
3582         /**
3583          * Unicode script "Armenian".
3584          */
3585         ARMENIAN,
3586 
3587         /**
3588          * Unicode script "Hebrew".
3589          */
3590         HEBREW,
3591 
3592         /**
3593          * Unicode script "Arabic".
3594          */
3595         ARABIC,
3596 
3597         /**
3598          * Unicode script "Syriac".
3599          */
3600         SYRIAC,
3601 
3602         /**
3603          * Unicode script "Thaana".
3604          */
3605         THAANA,
3606 
3607         /**
3608          * Unicode script "Devanagari".
3609          */
3610         DEVANAGARI,
3611 
3612         /**
3613          * Unicode script "Bengali".
3614          */
3615         BENGALI,
3616 
3617         /**
3618          * Unicode script "Gurmukhi".
3619          */
3620         GURMUKHI,
3621 
3622         /**
3623          * Unicode script "Gujarati".
3624          */
3625         GUJARATI,
3626 
3627         /**
3628          * Unicode script "Oriya".
3629          */
3630         ORIYA,
3631 
3632         /**
3633          * Unicode script "Tamil".
3634          */
3635         TAMIL,
3636 
3637         /**
3638          * Unicode script "Telugu".
3639          */
3640         TELUGU,
3641 
3642         /**
3643          * Unicode script "Kannada".
3644          */
3645         KANNADA,
3646 
3647         /**
3648          * Unicode script "Malayalam".
3649          */
3650         MALAYALAM,
3651 
3652         /**
3653          * Unicode script "Sinhala".
3654          */
3655         SINHALA,
3656 
3657         /**
3658          * Unicode script "Thai".
3659          */
3660         THAI,
3661 
3662         /**
3663          * Unicode script "Lao".
3664          */
3665         LAO,
3666 
3667         /**
3668          * Unicode script "Tibetan".
3669          */
3670         TIBETAN,
3671 
3672         /**
3673          * Unicode script "Myanmar".
3674          */
3675         MYANMAR,
3676 
3677         /**
3678          * Unicode script "Georgian".
3679          */
3680         GEORGIAN,
3681 
3682         /**
3683          * Unicode script "Hangul".
3684          */
3685         HANGUL,
3686 
3687         /**
3688          * Unicode script "Ethiopic".
3689          */
3690         ETHIOPIC,
3691 
3692         /**
3693          * Unicode script "Cherokee".
3694          */
3695         CHEROKEE,
3696 
3697         /**
3698          * Unicode script "Canadian_Aboriginal".
3699          */
3700         CANADIAN_ABORIGINAL,
3701 
3702         /**
3703          * Unicode script "Ogham".
3704          */
3705         OGHAM,
3706 
3707         /**
3708          * Unicode script "Runic".
3709          */
3710         RUNIC,
3711 
3712         /**
3713          * Unicode script "Khmer".
3714          */
3715         KHMER,
3716 
3717         /**
3718          * Unicode script "Mongolian".
3719          */
3720         MONGOLIAN,
3721 
3722         /**
3723          * Unicode script "Hiragana".
3724          */
3725         HIRAGANA,
3726 
3727         /**
3728          * Unicode script "Katakana".
3729          */
3730         KATAKANA,
3731 
3732         /**
3733          * Unicode script "Bopomofo".
3734          */
3735         BOPOMOFO,
3736 
3737         /**
3738          * Unicode script "Han".
3739          */
3740         HAN,
3741 
3742         /**
3743          * Unicode script "Yi".
3744          */
3745         YI,
3746 
3747         /**
3748          * Unicode script "Old_Italic".
3749          */
3750         OLD_ITALIC,
3751 
3752         /**
3753          * Unicode script "Gothic".
3754          */
3755         GOTHIC,
3756 
3757         /**
3758          * Unicode script "Deseret".
3759          */
3760         DESERET,
3761 
3762         /**
3763          * Unicode script "Inherited".
3764          */
3765         INHERITED,
3766 
3767         /**
3768          * Unicode script "Tagalog".
3769          */
3770         TAGALOG,
3771 
3772         /**
3773          * Unicode script "Hanunoo".
3774          */
3775         HANUNOO,
3776 
3777         /**
3778          * Unicode script "Buhid".
3779          */
3780         BUHID,
3781 
3782         /**
3783          * Unicode script "Tagbanwa".
3784          */
3785         TAGBANWA,
3786 
3787         /**
3788          * Unicode script "Limbu".
3789          */
3790         LIMBU,
3791 
3792         /**
3793          * Unicode script "Tai_Le".
3794          */
3795         TAI_LE,
3796 
3797         /**
3798          * Unicode script "Linear_B".
3799          */
3800         LINEAR_B,
3801 
3802         /**
3803          * Unicode script "Ugaritic".
3804          */
3805         UGARITIC,
3806 
3807         /**
3808          * Unicode script "Shavian".
3809          */
3810         SHAVIAN,
3811 
3812         /**
3813          * Unicode script "Osmanya".
3814          */
3815         OSMANYA,
3816 
3817         /**
3818          * Unicode script "Cypriot".
3819          */
3820         CYPRIOT,
3821 
3822         /**
3823          * Unicode script "Braille".
3824          */
3825         BRAILLE,
3826 
3827         /**
3828          * Unicode script "Buginese".
3829          */
3830         BUGINESE,
3831 
3832         /**
3833          * Unicode script "Coptic".
3834          */
3835         COPTIC,
3836 
3837         /**
3838          * Unicode script "New_Tai_Lue".
3839          */
3840         NEW_TAI_LUE,
3841 
3842         /**
3843          * Unicode script "Glagolitic".
3844          */
3845         GLAGOLITIC,
3846 
3847         /**
3848          * Unicode script "Tifinagh".
3849          */
3850         TIFINAGH,
3851 
3852         /**
3853          * Unicode script "Syloti_Nagri".
3854          */
3855         SYLOTI_NAGRI,
3856 
3857         /**
3858          * Unicode script "Old_Persian".
3859          */
3860         OLD_PERSIAN,
3861 
3862         /**
3863          * Unicode script "Kharoshthi".
3864          */
3865         KHAROSHTHI,
3866 
3867         /**
3868          * Unicode script "Balinese".
3869          */
3870         BALINESE,
3871 
3872         /**
3873          * Unicode script "Cuneiform".
3874          */
3875         CUNEIFORM,
3876 
3877         /**
3878          * Unicode script "Phoenician".
3879          */
3880         PHOENICIAN,
3881 
3882         /**
3883          * Unicode script "Phags_Pa".
3884          */
3885         PHAGS_PA,
3886 
3887         /**
3888          * Unicode script "Nko".
3889          */
3890         NKO,
3891 
3892         /**
3893          * Unicode script "Sundanese".
3894          */
3895         SUNDANESE,
3896 
3897         /**
3898          * Unicode script "Batak".
3899          */
3900         BATAK,
3901 
3902         /**
3903          * Unicode script "Lepcha".
3904          */
3905         LEPCHA,
3906 
3907         /**
3908          * Unicode script "Ol_Chiki".
3909          */
3910         OL_CHIKI,
3911 
3912         /**
3913          * Unicode script "Vai".
3914          */
3915         VAI,
3916 
3917         /**
3918          * Unicode script "Saurashtra".
3919          */
3920         SAURASHTRA,
3921 
3922         /**
3923          * Unicode script "Kayah_Li".
3924          */
3925         KAYAH_LI,
3926 
3927         /**
3928          * Unicode script "Rejang".
3929          */
3930         REJANG,
3931 
3932         /**
3933          * Unicode script "Lycian".
3934          */
3935         LYCIAN,
3936 
3937         /**
3938          * Unicode script "Carian".
3939          */
3940         CARIAN,
3941 
3942         /**
3943          * Unicode script "Lydian".
3944          */
3945         LYDIAN,
3946 
3947         /**
3948          * Unicode script "Cham".
3949          */
3950         CHAM,
3951 
3952         /**
3953          * Unicode script "Tai_Tham".
3954          */
3955         TAI_THAM,
3956 
3957         /**
3958          * Unicode script "Tai_Viet".
3959          */
3960         TAI_VIET,
3961 
3962         /**
3963          * Unicode script "Avestan".
3964          */
3965         AVESTAN,
3966 
3967         /**
3968          * Unicode script "Egyptian_Hieroglyphs".
3969          */
3970         EGYPTIAN_HIEROGLYPHS,
3971 
3972         /**
3973          * Unicode script "Samaritan".
3974          */
3975         SAMARITAN,
3976 
3977         /**
3978          * Unicode script "Mandaic".
3979          */
3980         MANDAIC,
3981 
3982         /**
3983          * Unicode script "Lisu".
3984          */
3985         LISU,
3986 
3987         /**
3988          * Unicode script "Bamum".
3989          */
3990         BAMUM,
3991 
3992         /**
3993          * Unicode script "Javanese".
3994          */
3995         JAVANESE,
3996 
3997         /**
3998          * Unicode script "Meetei_Mayek".
3999          */
4000         MEETEI_MAYEK,
4001 
4002         /**
4003          * Unicode script "Imperial_Aramaic".
4004          */
4005         IMPERIAL_ARAMAIC,
4006 
4007         /**
4008          * Unicode script "Old_South_Arabian".
4009          */
4010         OLD_SOUTH_ARABIAN,
4011 
4012         /**
4013          * Unicode script "Inscriptional_Parthian".
4014          */
4015         INSCRIPTIONAL_PARTHIAN,
4016 
4017         /**
4018          * Unicode script "Inscriptional_Pahlavi".
4019          */
4020         INSCRIPTIONAL_PAHLAVI,
4021 
4022         /**
4023          * Unicode script "Old_Turkic".
4024          */
4025         OLD_TURKIC,
4026 
4027         /**
4028          * Unicode script "Brahmi".
4029          */
4030         BRAHMI,
4031 
4032         /**
4033          * Unicode script "Kaithi".
4034          */
4035         KAITHI,
4036 
4037         /**
4038          * Unicode script "Meroitic Hieroglyphs".
4039          * @since 1.8
4040          */
4041         MEROITIC_HIEROGLYPHS,
4042 
4043         /**
4044          * Unicode script "Meroitic Cursive".
4045          * @since 1.8
4046          */
4047         MEROITIC_CURSIVE,
4048 
4049         /**
4050          * Unicode script "Sora Sompeng".
4051          * @since 1.8
4052          */
4053         SORA_SOMPENG,
4054 
4055         /**
4056          * Unicode script "Chakma".
4057          * @since 1.8
4058          */
4059         CHAKMA,
4060 
4061         /**
4062          * Unicode script "Sharada".
4063          * @since 1.8
4064          */
4065         SHARADA,
4066 
4067         /**
4068          * Unicode script "Takri".
4069          * @since 1.8
4070          */
4071         TAKRI,
4072 
4073         /**
4074          * Unicode script "Miao".
4075          * @since 1.8
4076          */
4077         MIAO,
4078 
4079         /**
4080          * Unicode script "Caucasian Albanian".
4081          * @since 1.9
4082          */
4083         CAUCASIAN_ALBANIAN,
4084 
4085         /**
4086          * Unicode script "Bassa Vah".
4087          * @since 1.9
4088          */
4089         BASSA_VAH,
4090 
4091         /**
4092          * Unicode script "Duployan".
4093          * @since 1.9
4094          */
4095         DUPLOYAN,
4096 
4097         /**
4098          * Unicode script "Elbasan".
4099          * @since 1.9
4100          */
4101         ELBASAN,
4102 
4103         /**
4104          * Unicode script "Grantha".
4105          * @since 1.9
4106          */
4107         GRANTHA,
4108 
4109         /**
4110          * Unicode script "Pahawh Hmong".
4111          * @since 1.9
4112          */
4113         PAHAWH_HMONG,
4114 
4115         /**
4116          * Unicode script "Khojki".
4117          * @since 1.9
4118          */
4119         KHOJKI,
4120 
4121         /**
4122          * Unicode script "Linear A".
4123          * @since 1.9
4124          */
4125         LINEAR_A,
4126 
4127         /**
4128          * Unicode script "Mahajani".
4129          * @since 1.9
4130          */
4131         MAHAJANI,
4132 
4133         /**
4134          * Unicode script "Manichaean".
4135          * @since 1.9
4136          */
4137         MANICHAEAN,
4138 
4139         /**
4140          * Unicode script "Mende Kikakui".
4141          * @since 1.9
4142          */
4143         MENDE_KIKAKUI,
4144 
4145         /**
4146          * Unicode script "Modi".
4147          * @since 1.9
4148          */
4149         MODI,
4150 
4151         /**
4152          * Unicode script "Mro".
4153          * @since 1.9
4154          */
4155         MRO,
4156 
4157         /**
4158          * Unicode script "Old North Arabian".
4159          * @since 1.9
4160          */
4161         OLD_NORTH_ARABIAN,
4162 
4163         /**
4164          * Unicode script "Nabataean".
4165          * @since 1.9
4166          */
4167         NABATAEAN,
4168 
4169         /**
4170          * Unicode script "Palmyrene".
4171          * @since 1.9
4172          */
4173         PALMYRENE,
4174 
4175         /**
4176          * Unicode script "Pau Cin Hau".
4177          * @since 1.9
4178          */
4179         PAU_CIN_HAU,
4180 
4181         /**
4182          * Unicode script "Old Permic".
4183          * @since 1.9
4184          */
4185         OLD_PERMIC,
4186 
4187         /**
4188          * Unicode script "Psalter Pahlavi".
4189          * @since 1.9
4190          */
4191         PSALTER_PAHLAVI,
4192 
4193         /**
4194          * Unicode script "Siddham".
4195          * @since 1.9
4196          */
4197         SIDDHAM,
4198 
4199         /**
4200          * Unicode script "Khudawadi".
4201          * @since 1.9
4202          */
4203         KHUDAWADI,
4204 
4205         /**
4206          * Unicode script "Tirhuta".
4207          * @since 1.9
4208          */
4209         TIRHUTA,
4210 
4211         /**
4212          * Unicode script "Warang Citi".
4213          * @since 1.9
4214          */
4215         WARANG_CITI,
4216 
4217         /**
4218          * Unicode script "Unknown".
4219          */
4220         UNKNOWN;
4221 
4222         private static final int[] scriptStarts = {
4223             0x0000,   // 0000..0040; COMMON
4224             0x0041,   // 0041..005A; LATIN
4225             0x005B,   // 005B..0060; COMMON
4226             0x0061,   // 0061..007A; LATIN
4227             0x007B,   // 007B..00A9; COMMON
4228             0x00AA,   // 00AA      ; LATIN
4229             0x00AB,   // 00AB..00B9; COMMON
4230             0x00BA,   // 00BA      ; LATIN
4231             0x00BB,   // 00BB..00BF; COMMON
4232             0x00C0,   // 00C0..00D6; LATIN
4233             0x00D7,   // 00D7      ; COMMON
4234             0x00D8,   // 00D8..00F6; LATIN
4235             0x00F7,   // 00F7      ; COMMON
4236             0x00F8,   // 00F8..02B8; LATIN
4237             0x02B9,   // 02B9..02DF; COMMON
4238             0x02E0,   // 02E0..02E4; LATIN
4239             0x02E5,   // 02E5..02E9; COMMON
4240             0x02EA,   // 02EA..02EB; BOPOMOFO
4241             0x02EC,   // 02EC..02FF; COMMON
4242             0x0300,   // 0300..036F; INHERITED
4243             0x0370,   // 0370..0373; GREEK
4244             0x0374,   // 0374      ; COMMON
4245             0x0375,   // 0375..0377; GREEK
4246             0x0378,   // 0378..0379; UNKNOWN
4247             0x037A,   // 037A..037D; GREEK
4248             0x037E,   // 037E      ; COMMON
4249             0x037F,   // 037F      ; GREEK
4250             0x0380,   // 0380..0383; UNKNOWN
4251             0x0384,   // 0384      ; GREEK
4252             0x0385,   // 0385      ; COMMON
4253             0x0386,   // 0386      ; GREEK
4254             0x0387,   // 0387      ; COMMON
4255             0x0388,   // 0388..038A; GREEK
4256             0x038B,   // 038B      ; UNKNOWN
4257             0x038C,   // 038C      ; GREEK
4258             0x038D,   // 038D      ; UNKNOWN
4259             0x038E,   // 038E..03A1; GREEK
4260             0x03A2,   // 03A2      ; UNKNOWN
4261             0x03A3,   // 03A3..03E1; GREEK
4262             0x03E2,   // 03E2..03EF; COPTIC
4263             0x03F0,   // 03F0..03FF; GREEK
4264             0x0400,   // 0400..0484; CYRILLIC
4265             0x0485,   // 0485..0486; INHERITED
4266             0x0487,   // 0487..052F; CYRILLIC
4267             0x0530,   // 0530      ; UNKNOWN
4268             0x0531,   // 0531..0556; ARMENIAN
4269             0x0557,   // 0557..0558; UNKNOWN
4270             0x0559,   // 0559..055F; ARMENIAN
4271             0x0560,   // 0560      ; UNKNOWN
4272             0x0561,   // 0561..0587; ARMENIAN
4273             0x0588,   // 0588      ; UNKNOWN
4274             0x0589,   // 0589      ; COMMON
4275             0x058A,   // 058A      ; ARMENIAN
4276             0x058B,   // 058B..058C; UNKNOWN
4277             0x058D,   // 058D..058F; ARMENIAN
4278             0x0590,   // 0590      ; UNKNOWN
4279             0x0591,   // 0591..05C7; HEBREW
4280             0x05C8,   // 05C8..05CF; UNKNOWN
4281             0x05D0,   // 05D0..05EA; HEBREW
4282             0x05EB,   // 05EB..05EF; UNKNOWN
4283             0x05F0,   // 05F0..05F4; HEBREW
4284             0x05F5,   // 05F5..05FF; UNKNOWN
4285             0x0600,   // 0600..0604; ARABIC
4286             0x0605,   // 0605      ; COMMON
4287             0x0606,   // 0606..060B; ARABIC
4288             0x060C,   // 060C      ; COMMON
4289             0x060D,   // 060D..061A; ARABIC
4290             0x061B,   // 061B..061C; COMMON
4291             0x061D,   // 061D      ; UNKNOWN
4292             0x061E,   // 061E      ; ARABIC
4293             0x061F,   // 061F      ; COMMON
4294             0x0620,   // 0620..063F; ARABIC
4295             0x0640,   // 0640      ; COMMON
4296             0x0641,   // 0641..064A; ARABIC
4297             0x064B,   // 064B..0655; INHERITED
4298             0x0656,   // 0656..065F; ARABIC
4299             0x0660,   // 0660..0669; COMMON
4300             0x066A,   // 066A..066F; ARABIC
4301             0x0670,   // 0670      ; INHERITED
4302             0x0671,   // 0671..06DC; ARABIC
4303             0x06DD,   // 06DD      ; COMMON
4304             0x06DE,   // 06DE..06FF; ARABIC
4305             0x0700,   // 0700..070D; SYRIAC
4306             0x070E,   // 070E      ; UNKNOWN
4307             0x070F,   // 070F..074A; SYRIAC
4308             0x074B,   // 074B..074C; UNKNOWN
4309             0x074D,   // 074D..074F; SYRIAC
4310             0x0750,   // 0750..077F; ARABIC
4311             0x0780,   // 0780..07B1; THAANA
4312             0x07B2,   // 07B2..07BF; UNKNOWN
4313             0x07C0,   // 07C0..07FA; NKO
4314             0x07FB,   // 07FB..07FF; UNKNOWN
4315             0x0800,   // 0800..082D; SAMARITAN
4316             0x082E,   // 082E..082F; UNKNOWN
4317             0x0830,   // 0830..083E; SAMARITAN
4318             0x083F,   // 083F      ; UNKNOWN
4319             0x0840,   // 0840..085B; MANDAIC
4320             0x085C,   // 085C..085D; UNKNOWN
4321             0x085E,   // 085E      ; MANDAIC
4322             0x085F,   // 085F..089F; UNKNOWN
4323             0x08A0,   // 08A0..08B2; ARABIC
4324             0x08B3,   // 08B3..08E3; UNKNOWN
4325             0x08E4,   // 08E4..08FF; ARABIC
4326             0x0900,   // 0900..0950; DEVANAGARI
4327             0x0951,   // 0951..0952; INHERITED
4328             0x0953,   // 0953..0963; DEVANAGARI
4329             0x0964,   // 0964..0965; COMMON
4330             0x0966,   // 0966..097F; DEVANAGARI
4331             0x0980,   // 0980..0983; BENGALI
4332             0x0984,   // 0984      ; UNKNOWN
4333             0x0985,   // 0985..098C; BENGALI
4334             0x098D,   // 098D..098E; UNKNOWN
4335             0x098F,   // 098F..0990; BENGALI
4336             0x0991,   // 0991..0992; UNKNOWN
4337             0x0993,   // 0993..09A8; BENGALI
4338             0x09A9,   // 09A9      ; UNKNOWN
4339             0x09AA,   // 09AA..09B0; BENGALI
4340             0x09B1,   // 09B1      ; UNKNOWN
4341             0x09B2,   // 09B2      ; BENGALI
4342             0x09B3,   // 09B3..09B5; UNKNOWN
4343             0x09B6,   // 09B6..09B9; BENGALI
4344             0x09BA,   // 09BA..09BB; UNKNOWN
4345             0x09BC,   // 09BC..09C4; BENGALI
4346             0x09C5,   // 09C5..09C6; UNKNOWN
4347             0x09C7,   // 09C7..09C8; BENGALI
4348             0x09C9,   // 09C9..09CA; UNKNOWN
4349             0x09CB,   // 09CB..09CE; BENGALI
4350             0x09CF,   // 09CF..09D6; UNKNOWN
4351             0x09D7,   // 09D7      ; BENGALI
4352             0x09D8,   // 09D8..09DB; UNKNOWN
4353             0x09DC,   // 09DC..09DD; BENGALI
4354             0x09DE,   // 09DE      ; UNKNOWN
4355             0x09DF,   // 09DF..09E3; BENGALI
4356             0x09E4,   // 09E4..09E5; UNKNOWN
4357             0x09E6,   // 09E6..09FB; BENGALI
4358             0x09FC,   // 09FC..0A00; UNKNOWN
4359             0x0A01,   // 0A01..0A03; GURMUKHI
4360             0x0A04,   // 0A04      ; UNKNOWN
4361             0x0A05,   // 0A05..0A0A; GURMUKHI
4362             0x0A0B,   // 0A0B..0A0E; UNKNOWN
4363             0x0A0F,   // 0A0F..0A10; GURMUKHI
4364             0x0A11,   // 0A11..0A12; UNKNOWN
4365             0x0A13,   // 0A13..0A28; GURMUKHI
4366             0x0A29,   // 0A29      ; UNKNOWN
4367             0x0A2A,   // 0A2A..0A30; GURMUKHI
4368             0x0A31,   // 0A31      ; UNKNOWN
4369             0x0A32,   // 0A32..0A33; GURMUKHI
4370             0x0A34,   // 0A34      ; UNKNOWN
4371             0x0A35,   // 0A35..0A36; GURMUKHI
4372             0x0A37,   // 0A37      ; UNKNOWN
4373             0x0A38,   // 0A38..0A39; GURMUKHI
4374             0x0A3A,   // 0A3A..0A3B; UNKNOWN
4375             0x0A3C,   // 0A3C      ; GURMUKHI
4376             0x0A3D,   // 0A3D      ; UNKNOWN
4377             0x0A3E,   // 0A3E..0A42; GURMUKHI
4378             0x0A43,   // 0A43..0A46; UNKNOWN
4379             0x0A47,   // 0A47..0A48; GURMUKHI
4380             0x0A49,   // 0A49..0A4A; UNKNOWN
4381             0x0A4B,   // 0A4B..0A4D; GURMUKHI
4382             0x0A4E,   // 0A4E..0A50; UNKNOWN
4383             0x0A51,   // 0A51      ; GURMUKHI
4384             0x0A52,   // 0A52..0A58; UNKNOWN
4385             0x0A59,   // 0A59..0A5C; GURMUKHI
4386             0x0A5D,   // 0A5D      ; UNKNOWN
4387             0x0A5E,   // 0A5E      ; GURMUKHI
4388             0x0A5F,   // 0A5F..0A65; UNKNOWN
4389             0x0A66,   // 0A66..0A75; GURMUKHI
4390             0x0A76,   // 0A76..0A80; UNKNOWN
4391             0x0A81,   // 0A81..0A83; GUJARATI
4392             0x0A84,   // 0A84      ; UNKNOWN
4393             0x0A85,   // 0A85..0A8D; GUJARATI
4394             0x0A8E,   // 0A8E      ; UNKNOWN
4395             0x0A8F,   // 0A8F..0A91; GUJARATI
4396             0x0A92,   // 0A92      ; UNKNOWN
4397             0x0A93,   // 0A93..0AA8; GUJARATI
4398             0x0AA9,   // 0AA9      ; UNKNOWN
4399             0x0AAA,   // 0AAA..0AB0; GUJARATI
4400             0x0AB1,   // 0AB1      ; UNKNOWN
4401             0x0AB2,   // 0AB2..0AB3; GUJARATI
4402             0x0AB4,   // 0AB4      ; UNKNOWN
4403             0x0AB5,   // 0AB5..0AB9; GUJARATI
4404             0x0ABA,   // 0ABA..0ABB; UNKNOWN
4405             0x0ABC,   // 0ABC..0AC5; GUJARATI
4406             0x0AC6,   // 0AC6      ; UNKNOWN
4407             0x0AC7,   // 0AC7..0AC9; GUJARATI
4408             0x0ACA,   // 0ACA      ; UNKNOWN
4409             0x0ACB,   // 0ACB..0ACD; GUJARATI
4410             0x0ACE,   // 0ACE..0ACF; UNKNOWN
4411             0x0AD0,   // 0AD0      ; GUJARATI
4412             0x0AD1,   // 0AD1..0ADF; UNKNOWN
4413             0x0AE0,   // 0AE0..0AE3; GUJARATI
4414             0x0AE4,   // 0AE4..0AE5; UNKNOWN
4415             0x0AE6,   // 0AE6..0AF1; GUJARATI
4416             0x0AF2,   // 0AF2..0B00; UNKNOWN
4417             0x0B01,   // 0B01..0B03; ORIYA
4418             0x0B04,   // 0B04      ; UNKNOWN
4419             0x0B05,   // 0B05..0B0C; ORIYA
4420             0x0B0D,   // 0B0D..0B0E; UNKNOWN
4421             0x0B0F,   // 0B0F..0B10; ORIYA
4422             0x0B11,   // 0B11..0B12; UNKNOWN
4423             0x0B13,   // 0B13..0B28; ORIYA
4424             0x0B29,   // 0B29      ; UNKNOWN
4425             0x0B2A,   // 0B2A..0B30; ORIYA
4426             0x0B31,   // 0B31      ; UNKNOWN
4427             0x0B32,   // 0B32..0B33; ORIYA
4428             0x0B34,   // 0B34      ; UNKNOWN
4429             0x0B35,   // 0B35..0B39; ORIYA
4430             0x0B3A,   // 0B3A..0B3B; UNKNOWN
4431             0x0B3C,   // 0B3C..0B44; ORIYA
4432             0x0B45,   // 0B45..0B46; UNKNOWN
4433             0x0B47,   // 0B47..0B48; ORIYA
4434             0x0B49,   // 0B49..0B4A; UNKNOWN
4435             0x0B4B,   // 0B4B..0B4D; ORIYA
4436             0x0B4E,   // 0B4E..0B55; UNKNOWN
4437             0x0B56,   // 0B56..0B57; ORIYA
4438             0x0B58,   // 0B58..0B5B; UNKNOWN
4439             0x0B5C,   // 0B5C..0B5D; ORIYA
4440             0x0B5E,   // 0B5E      ; UNKNOWN
4441             0x0B5F,   // 0B5F..0B63; ORIYA
4442             0x0B64,   // 0B64..0B65; UNKNOWN
4443             0x0B66,   // 0B66..0B77; ORIYA
4444             0x0B78,   // 0B78..0B81; UNKNOWN
4445             0x0B82,   // 0B82..0B83; TAMIL
4446             0x0B84,   // 0B84      ; UNKNOWN
4447             0x0B85,   // 0B85..0B8A; TAMIL
4448             0x0B8B,   // 0B8B..0B8D; UNKNOWN
4449             0x0B8E,   // 0B8E..0B90; TAMIL
4450             0x0B91,   // 0B91      ; UNKNOWN
4451             0x0B92,   // 0B92..0B95; TAMIL
4452             0x0B96,   // 0B96..0B98; UNKNOWN
4453             0x0B99,   // 0B99..0B9A; TAMIL
4454             0x0B9B,   // 0B9B      ; UNKNOWN
4455             0x0B9C,   // 0B9C      ; TAMIL
4456             0x0B9D,   // 0B9D      ; UNKNOWN
4457             0x0B9E,   // 0B9E..0B9F; TAMIL
4458             0x0BA0,   // 0BA0..0BA2; UNKNOWN
4459             0x0BA3,   // 0BA3..0BA4; TAMIL
4460             0x0BA5,   // 0BA5..0BA7; UNKNOWN
4461             0x0BA8,   // 0BA8..0BAA; TAMIL
4462             0x0BAB,   // 0BAB..0BAD; UNKNOWN
4463             0x0BAE,   // 0BAE..0BB9; TAMIL
4464             0x0BBA,   // 0BBA..0BBD; UNKNOWN
4465             0x0BBE,   // 0BBE..0BC2; TAMIL
4466             0x0BC3,   // 0BC3..0BC5; UNKNOWN
4467             0x0BC6,   // 0BC6..0BC8; TAMIL
4468             0x0BC9,   // 0BC9      ; UNKNOWN
4469             0x0BCA,   // 0BCA..0BCD; TAMIL
4470             0x0BCE,   // 0BCE..0BCF; UNKNOWN
4471             0x0BD0,   // 0BD0      ; TAMIL
4472             0x0BD1,   // 0BD1..0BD6; UNKNOWN
4473             0x0BD7,   // 0BD7      ; TAMIL
4474             0x0BD8,   // 0BD8..0BE5; UNKNOWN
4475             0x0BE6,   // 0BE6..0BFA; TAMIL
4476             0x0BFB,   // 0BFB..0BFF; UNKNOWN
4477             0x0C00,   // 0C00..0C03; TELUGU
4478             0x0C04,   // 0C04      ; UNKNOWN
4479             0x0C05,   // 0C05..0C0C; TELUGU
4480             0x0C0D,   // 0C0D      ; UNKNOWN
4481             0x0C0E,   // 0C0E..0C10; TELUGU
4482             0x0C11,   // 0C11      ; UNKNOWN
4483             0x0C12,   // 0C12..0C28; TELUGU
4484             0x0C29,   // 0C29      ; UNKNOWN
4485             0x0C2A,   // 0C2A..0C39; TELUGU
4486             0x0C3A,   // 0C3A..0C3C; UNKNOWN
4487             0x0C3D,   // 0C3D..0C44; TELUGU
4488             0x0C45,   // 0C45      ; UNKNOWN
4489             0x0C46,   // 0C46..0C48; TELUGU
4490             0x0C49,   // 0C49      ; UNKNOWN
4491             0x0C4A,   // 0C4A..0C4D; TELUGU
4492             0x0C4E,   // 0C4E..0C54; UNKNOWN
4493             0x0C55,   // 0C55..0C56; TELUGU
4494             0x0C57,   // 0C57      ; UNKNOWN
4495             0x0C58,   // 0C58..0C59; TELUGU
4496             0x0C5A,   // 0C5A..0C5F; UNKNOWN
4497             0x0C60,   // 0C60..0C63; TELUGU
4498             0x0C64,   // 0C64..0C65; UNKNOWN
4499             0x0C66,   // 0C66..0C6F; TELUGU
4500             0x0C70,   // 0C70..0C77; UNKNOWN
4501             0x0C78,   // 0C78..0C7F; TELUGU
4502             0x0C80,   // 0C80      ; UNKNOWN
4503             0x0C81,   // 0C81..0C83; KANNADA
4504             0x0C84,   // 0C84      ; UNKNOWN
4505             0x0C85,   // 0C85..0C8C; KANNADA
4506             0x0C8D,   // 0C8D      ; UNKNOWN
4507             0x0C8E,   // 0C8E..0C90; KANNADA
4508             0x0C91,   // 0C91      ; UNKNOWN
4509             0x0C92,   // 0C92..0CA8; KANNADA
4510             0x0CA9,   // 0CA9      ; UNKNOWN
4511             0x0CAA,   // 0CAA..0CB3; KANNADA
4512             0x0CB4,   // 0CB4      ; UNKNOWN
4513             0x0CB5,   // 0CB5..0CB9; KANNADA
4514             0x0CBA,   // 0CBA..0CBB; UNKNOWN
4515             0x0CBC,   // 0CBC..0CC4; KANNADA
4516             0x0CC5,   // 0CC5      ; UNKNOWN
4517             0x0CC6,   // 0CC6..0CC8; KANNADA
4518             0x0CC9,   // 0CC9      ; UNKNOWN
4519             0x0CCA,   // 0CCA..0CCD; KANNADA
4520             0x0CCE,   // 0CCE..0CD4; UNKNOWN
4521             0x0CD5,   // 0CD5..0CD6; KANNADA
4522             0x0CD7,   // 0CD7..0CDD; UNKNOWN
4523             0x0CDE,   // 0CDE      ; KANNADA
4524             0x0CDF,   // 0CDF      ; UNKNOWN
4525             0x0CE0,   // 0CE0..0CE3; KANNADA
4526             0x0CE4,   // 0CE4..0CE5; UNKNOWN
4527             0x0CE6,   // 0CE6..0CEF; KANNADA
4528             0x0CF0,   // 0CF0      ; UNKNOWN
4529             0x0CF1,   // 0CF1..0CF2; KANNADA
4530             0x0CF3,   // 0CF3..0D00; UNKNOWN
4531             0x0D01,   // 0D01..0D03; MALAYALAM
4532             0x0D04,   // 0D04      ; UNKNOWN
4533             0x0D05,   // 0D05..0D0C; MALAYALAM
4534             0x0D0D,   // 0D0D      ; UNKNOWN
4535             0x0D0E,   // 0D0E..0D10; MALAYALAM
4536             0x0D11,   // 0D11      ; UNKNOWN
4537             0x0D12,   // 0D12..0D3A; MALAYALAM
4538             0x0D3B,   // 0D3B..0D3C; UNKNOWN
4539             0x0D3D,   // 0D3D..0D44; MALAYALAM
4540             0x0D45,   // 0D45      ; UNKNOWN
4541             0x0D46,   // 0D46..0D48; MALAYALAM
4542             0x0D49,   // 0D49      ; UNKNOWN
4543             0x0D4A,   // 0D4A..0D4E; MALAYALAM
4544             0x0D4F,   // 0D4F..0D56; UNKNOWN
4545             0x0D57,   // 0D57      ; MALAYALAM
4546             0x0D58,   // 0D58..0D5F; UNKNOWN
4547             0x0D60,   // 0D60..0D63; MALAYALAM
4548             0x0D64,   // 0D64..0D65; UNKNOWN
4549             0x0D66,   // 0D66..0D75; MALAYALAM
4550             0x0D76,   // 0D76..0D78; UNKNOWN
4551             0x0D79,   // 0D79..0D7F; MALAYALAM
4552             0x0D80,   // 0D80..0D81; UNKNOWN
4553             0x0D82,   // 0D82..0D83; SINHALA
4554             0x0D84,   // 0D84      ; UNKNOWN
4555             0x0D85,   // 0D85..0D96; SINHALA
4556             0x0D97,   // 0D97..0D99; UNKNOWN
4557             0x0D9A,   // 0D9A..0DB1; SINHALA
4558             0x0DB2,   // 0DB2      ; UNKNOWN
4559             0x0DB3,   // 0DB3..0DBB; SINHALA
4560             0x0DBC,   // 0DBC      ; UNKNOWN
4561             0x0DBD,   // 0DBD      ; SINHALA
4562             0x0DBE,   // 0DBE..0DBF; UNKNOWN
4563             0x0DC0,   // 0DC0..0DC6; SINHALA
4564             0x0DC7,   // 0DC7..0DC9; UNKNOWN
4565             0x0DCA,   // 0DCA      ; SINHALA
4566             0x0DCB,   // 0DCB..0DCE; UNKNOWN
4567             0x0DCF,   // 0DCF..0DD4; SINHALA
4568             0x0DD5,   // 0DD5      ; UNKNOWN
4569             0x0DD6,   // 0DD6      ; SINHALA
4570             0x0DD7,   // 0DD7      ; UNKNOWN
4571             0x0DD8,   // 0DD8..0DDF; SINHALA
4572             0x0DE0,   // 0DE0..0DE5; UNKNOWN
4573             0x0DE6,   // 0DE6..0DEF; SINHALA
4574             0x0DF0,   // 0DF0..0DF1; UNKNOWN
4575             0x0DF2,   // 0DF2..0DF4; SINHALA
4576             0x0DF5,   // 0DF5..0E00; UNKNOWN
4577             0x0E01,   // 0E01..0E3A; THAI
4578             0x0E3B,   // 0E3B..0E3E; UNKNOWN
4579             0x0E3F,   // 0E3F      ; COMMON
4580             0x0E40,   // 0E40..0E5B; THAI
4581             0x0E5C,   // 0E5C..0E80; UNKNOWN
4582             0x0E81,   // 0E81..0E82; LAO
4583             0x0E83,   // 0E83      ; UNKNOWN
4584             0x0E84,   // 0E84      ; LAO
4585             0x0E85,   // 0E85..0E86; UNKNOWN
4586             0x0E87,   // 0E87..0E88; LAO
4587             0x0E89,   // 0E89      ; UNKNOWN
4588             0x0E8A,   // 0E8A      ; LAO
4589             0x0E8B,   // 0E8B..0E8C; UNKNOWN
4590             0x0E8D,   // 0E8D      ; LAO
4591             0x0E8E,   // 0E8E..0E93; UNKNOWN
4592             0x0E94,   // 0E94..0E97; LAO
4593             0x0E98,   // 0E98      ; UNKNOWN
4594             0x0E99,   // 0E99..0E9F; LAO
4595             0x0EA0,   // 0EA0      ; UNKNOWN
4596             0x0EA1,   // 0EA1..0EA3; LAO
4597             0x0EA4,   // 0EA4      ; UNKNOWN
4598             0x0EA5,   // 0EA5      ; LAO
4599             0x0EA6,   // 0EA6      ; UNKNOWN
4600             0x0EA7,   // 0EA7      ; LAO
4601             0x0EA8,   // 0EA8..0EA9; UNKNOWN
4602             0x0EAA,   // 0EAA..0EAB; LAO
4603             0x0EAC,   // 0EAC      ; UNKNOWN
4604             0x0EAD,   // 0EAD..0EB9; LAO
4605             0x0EBA,   // 0EBA      ; UNKNOWN
4606             0x0EBB,   // 0EBB..0EBD; LAO
4607             0x0EBE,   // 0EBE..0EBF; UNKNOWN
4608             0x0EC0,   // 0EC0..0EC4; LAO
4609             0x0EC5,   // 0EC5      ; UNKNOWN
4610             0x0EC6,   // 0EC6      ; LAO
4611             0x0EC7,   // 0EC7      ; UNKNOWN
4612             0x0EC8,   // 0EC8..0ECD; LAO
4613             0x0ECE,   // 0ECE..0ECF; UNKNOWN
4614             0x0ED0,   // 0ED0..0ED9; LAO
4615             0x0EDA,   // 0EDA..0EDB; UNKNOWN
4616             0x0EDC,   // 0EDC..0EDF; LAO
4617             0x0EE0,   // 0EE0..0EFF; UNKNOWN
4618             0x0F00,   // 0F00..0F47; TIBETAN
4619             0x0F48,   // 0F48      ; UNKNOWN
4620             0x0F49,   // 0F49..0F6C; TIBETAN
4621             0x0F6D,   // 0F6D..0F70; UNKNOWN
4622             0x0F71,   // 0F71..0F97; TIBETAN
4623             0x0F98,   // 0F98      ; UNKNOWN
4624             0x0F99,   // 0F99..0FBC; TIBETAN
4625             0x0FBD,   // 0FBD      ; UNKNOWN
4626             0x0FBE,   // 0FBE..0FCC; TIBETAN
4627             0x0FCD,   // 0FCD      ; UNKNOWN
4628             0x0FCE,   // 0FCE..0FD4; TIBETAN
4629             0x0FD5,   // 0FD5..0FD8; COMMON
4630             0x0FD9,   // 0FD9..0FDA; TIBETAN
4631             0x0FDB,   // 0FDB..FFF; UNKNOWN
4632             0x1000,   // 1000..109F; MYANMAR
4633             0x10A0,   // 10A0..10C5; GEORGIAN
4634             0x10C6,   // 10C6      ; UNKNOWN
4635             0x10C7,   // 10C7      ; GEORGIAN
4636             0x10C8,   // 10C8..10CC; UNKNOWN
4637             0x10CD,   // 10CD      ; GEORGIAN
4638             0x10CE,   // 10CE..10CF; UNKNOWN
4639             0x10D0,   // 10D0..10FA; GEORGIAN
4640             0x10FB,   // 10FB      ; COMMON
4641             0x10FC,   // 10FC..10FF; GEORGIAN
4642             0x1100,   // 1100..11FF; HANGUL
4643             0x1200,   // 1200..1248; ETHIOPIC
4644             0x1249,   // 1249      ; UNKNOWN
4645             0x124A,   // 124A..124D; ETHIOPIC
4646             0x124E,   // 124E..124F; UNKNOWN
4647             0x1250,   // 1250..1256; ETHIOPIC
4648             0x1257,   // 1257      ; UNKNOWN
4649             0x1258,   // 1258      ; ETHIOPIC
4650             0x1259,   // 1259      ; UNKNOWN
4651             0x125A,   // 125A..125D; ETHIOPIC
4652             0x125E,   // 125E..125F; UNKNOWN
4653             0x1260,   // 1260..1288; ETHIOPIC
4654             0x1289,   // 1289      ; UNKNOWN
4655             0x128A,   // 128A..128D; ETHIOPIC
4656             0x128E,   // 128E..128F; UNKNOWN
4657             0x1290,   // 1290..12B0; ETHIOPIC
4658             0x12B1,   // 12B1      ; UNKNOWN
4659             0x12B2,   // 12B2..12B5; ETHIOPIC
4660             0x12B6,   // 12B6..12B7; UNKNOWN
4661             0x12B8,   // 12B8..12BE; ETHIOPIC
4662             0x12BF,   // 12BF      ; UNKNOWN
4663             0x12C0,   // 12C0      ; ETHIOPIC
4664             0x12C1,   // 12C1      ; UNKNOWN
4665             0x12C2,   // 12C2..12C5; ETHIOPIC
4666             0x12C6,   // 12C6..12C7; UNKNOWN
4667             0x12C8,   // 12C8..12D6; ETHIOPIC
4668             0x12D7,   // 12D7      ; UNKNOWN
4669             0x12D8,   // 12D8..1310; ETHIOPIC
4670             0x1311,   // 1311      ; UNKNOWN
4671             0x1312,   // 1312..1315; ETHIOPIC
4672             0x1316,   // 1316..1317; UNKNOWN
4673             0x1318,   // 1318..135A; ETHIOPIC
4674             0x135B,   // 135B..135C; UNKNOWN
4675             0x135D,   // 135D..137C; ETHIOPIC
4676             0x137D,   // 137D..137F; UNKNOWN
4677             0x1380,   // 1380..1399; ETHIOPIC
4678             0x139A,   // 139A..139F; UNKNOWN
4679             0x13A0,   // 13A0..13F4; CHEROKEE
4680             0x13F5,   // 13F5..13FF; UNKNOWN
4681             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
4682             0x1680,   // 1680..169C; OGHAM
4683             0x169D,   // 169D..169F; UNKNOWN
4684             0x16A0,   // 16A0..16EA; RUNIC
4685             0x16EB,   // 16EB..16ED; COMMON
4686             0x16EE,   // 16EE..16F8; RUNIC
4687             0x16F9,   // 16F9..16FF; UNKNOWN
4688             0x1700,   // 1700..170C; TAGALOG
4689             0x170D,   // 170D      ; UNKNOWN
4690             0x170E,   // 170E..1714; TAGALOG
4691             0x1715,   // 1715..171F; UNKNOWN
4692             0x1720,   // 1720..1734; HANUNOO
4693             0x1735,   // 1735..1736; COMMON
4694             0x1737,   // 1737..173F; UNKNOWN
4695             0x1740,   // 1740..1753; BUHID
4696             0x1754,   // 1754..175F; UNKNOWN
4697             0x1760,   // 1760..176C; TAGBANWA
4698             0x176D,   // 176D      ; UNKNOWN
4699             0x176E,   // 176E..1770; TAGBANWA
4700             0x1771,   // 1771      ; UNKNOWN
4701             0x1772,   // 1772..1773; TAGBANWA
4702             0x1774,   // 1774..177F; UNKNOWN
4703             0x1780,   // 1780..17DD; KHMER
4704             0x17DE,   // 17DE..17DF; UNKNOWN
4705             0x17E0,   // 17E0..17E9; KHMER
4706             0x17EA,   // 17EA..17EF; UNKNOWN
4707             0x17F0,   // 17F0..17F9; KHMER
4708             0x17FA,   // 17FA..17FF; UNKNOWN
4709             0x1800,   // 1800..1801; MONGOLIAN
4710             0x1802,   // 1802..1803; COMMON
4711             0x1804,   // 1804      ; MONGOLIAN
4712             0x1805,   // 1805      ; COMMON
4713             0x1806,   // 1806..180E; MONGOLIAN
4714             0x180F,   // 180F      ; UNKNOWN
4715             0x1810,   // 1810..1819; MONGOLIAN
4716             0x181A,   // 181A..181F; UNKNOWN
4717             0x1820,   // 1820..1877; MONGOLIAN
4718             0x1878,   // 1878..187F; UNKNOWN
4719             0x1880,   // 1880..18AA; MONGOLIAN
4720             0x18AB,   // 18AB..18AF; UNKNOWN
4721             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
4722             0x18F6,   // 18F6..18FF; UNKNOWN
4723             0x1900,   // 1900..191E; LIMBU
4724             0x191F,   // 191F      ; UNKNOWN
4725             0x1920,   // 1920..192B; LIMBU
4726             0x192C,   // 192C..192F; UNKNOWN
4727             0x1930,   // 1930..193B; LIMBU
4728             0x193C,   // 193C..193F; UNKNOWN
4729             0x1940,   // 1940      ; LIMBU
4730             0x1941,   // 1941..1943; UNKNOWN
4731             0x1944,   // 1944..194F; LIMBU
4732             0x1950,   // 1950..196D; TAI_LE
4733             0x196E,   // 196E..196F; UNKNOWN
4734             0x1970,   // 1970..1974; TAI_LE
4735             0x1975,   // 1975..197F; UNKNOWN
4736             0x1980,   // 1980..19AB; NEW_TAI_LUE
4737             0x19AC,   // 19AC..19AF; UNKNOWN
4738             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
4739             0x19CA,   // 19CA..19CF; UNKNOWN
4740             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
4741             0x19DB,   // 19DB..19DD; UNKNOWN
4742             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
4743             0x19E0,   // 19E0..19FF; KHMER
4744             0x1A00,   // 1A00..1A1B; BUGINESE
4745             0x1A1C,   // 1A1C..1A1D; UNKNOWN
4746             0x1A1E,   // 1A1E..1A1F; BUGINESE
4747             0x1A20,   // 1A20..1A5E; TAI_THAM
4748             0x1A5F,   // 1A5F      ; UNKNOWN
4749             0x1A60,   // 1A60..1A7C; TAI_THAM
4750             0x1A7D,   // 1A7D..1A7E; UNKNOWN
4751             0x1A7F,   // 1A7F..1A89; TAI_THAM
4752             0x1A8A,   // 1A8A..1A8F; UNKNOWN
4753             0x1A90,   // 1A90..1A99; TAI_THAM
4754             0x1A9A,   // 1A9A..1A9F; UNKNOWN
4755             0x1AA0,   // 1AA0..1AAD; TAI_THAM
4756             0x1AAE,   // 1AAE..1AAF; UNKNOWN
4757             0x1AB0,   // 1AB0..1ABE; INHERITED
4758             0x1ABF,   // 1ABF..1AFF; UNKNOWN
4759             0x1B00,   // 1B00..1B4B; BALINESE
4760             0x1B4C,   // 1B4C..1B4F; UNKNOWN
4761             0x1B50,   // 1B50..1B7C; BALINESE
4762             0x1B7D,   // 1B7D..1B7F; UNKNOWN
4763             0x1B80,   // 1B80..1BBF; SUNDANESE
4764             0x1BC0,   // 1BC0..1BF3; BATAK
4765             0x1BF4,   // 1BF4..1BFB; UNKNOWN
4766             0x1BFC,   // 1BFC..1BFF; BATAK
4767             0x1C00,   // 1C00..1C37; LEPCHA
4768             0x1C38,   // 1C38..1C3A; UNKNOWN
4769             0x1C3B,   // 1C3B..1C49; LEPCHA
4770             0x1C4A,   // 1C4A..1C4C; UNKNOWN
4771             0x1C4D,   // 1C4D..1C4F; LEPCHA
4772             0x1C50,   // 1C50..1C7F; OL_CHIKI
4773             0x1C80,   // 1C80..1CBF; UNKNOWN
4774             0x1CC0,   // 1CC0..1CC7; SUNDANESE
4775             0x1CC8,   // 1CC8..1CCF; UNKNOWN
4776             0x1CD0,   // 1CD0..1CD2; INHERITED
4777             0x1CD3,   // 1CD3      ; COMMON
4778             0x1CD4,   // 1CD4..1CE0; INHERITED
4779             0x1CE1,   // 1CE1      ; COMMON
4780             0x1CE2,   // 1CE2..1CE8; INHERITED
4781             0x1CE9,   // 1CE9..1CEC; COMMON
4782             0x1CED,   // 1CED      ; INHERITED
4783             0x1CEE,   // 1CEE..1CF3; COMMON
4784             0x1CF4,   // 1CF4      ; INHERITED
4785             0x1CF5,   // 1CF5..1CF6; COMMON
4786             0x1CF7,   // 1CF7      ; UNKNOWN
4787             0x1CF8,   // 1CF8..1CF9; INHERITED
4788             0x1CFA,   // 1CFA..1CFF; UNKNOWN
4789             0x1D00,   // 1D00..1D25; LATIN
4790             0x1D26,   // 1D26..1D2A; GREEK
4791             0x1D2B,   // 1D2B      ; CYRILLIC
4792             0x1D2C,   // 1D2C..1D5C; LATIN
4793             0x1D5D,   // 1D5D..1D61; GREEK
4794             0x1D62,   // 1D62..1D65; LATIN
4795             0x1D66,   // 1D66..1D6A; GREEK
4796             0x1D6B,   // 1D6B..1D77; LATIN
4797             0x1D78,   // 1D78      ; CYRILLIC
4798             0x1D79,   // 1D79..1DBE; LATIN
4799             0x1DBF,   // 1DBF      ; GREEK
4800             0x1DC0,   // 1DC0..1DF5; INHERITED
4801             0x1DF6,   // 1DF6..1DFB; UNKNOWN
4802             0x1DFC,   // 1DFC..1DFF; INHERITED
4803             0x1E00,   // 1E00..1EFF; LATIN
4804             0x1F00,   // 1F00..1F15; GREEK
4805             0x1F16,   // 1F16..1F17; UNKNOWN
4806             0x1F18,   // 1F18..1F1D; GREEK
4807             0x1F1E,   // 1F1E..1F1F; UNKNOWN
4808             0x1F20,   // 1F20..1F45; GREEK
4809             0x1F46,   // 1F46..1F47; UNKNOWN
4810             0x1F48,   // 1F48..1F4D; GREEK
4811             0x1F4E,   // 1F4E..1F4F; UNKNOWN
4812             0x1F50,   // 1F50..1F57; GREEK
4813             0x1F58,   // 1F58      ; UNKNOWN
4814             0x1F59,   // 1F59      ; GREEK
4815             0x1F5A,   // 1F5A      ; UNKNOWN
4816             0x1F5B,   // 1F5B      ; GREEK
4817             0x1F5C,   // 1F5C      ; UNKNOWN
4818             0x1F5D,   // 1F5D      ; GREEK
4819             0x1F5E,   // 1F5E      ; UNKNOWN
4820             0x1F5F,   // 1F5F..1F7D; GREEK
4821             0x1F7E,   // 1F7E..1F7F; UNKNOWN
4822             0x1F80,   // 1F80..1FB4; GREEK
4823             0x1FB5,   // 1FB5      ; UNKNOWN
4824             0x1FB6,   // 1FB6..1FC4; GREEK
4825             0x1FC5,   // 1FC5      ; UNKNOWN
4826             0x1FC6,   // 1FC6..1FD3; GREEK
4827             0x1FD4,   // 1FD4..1FD5; UNKNOWN
4828             0x1FD6,   // 1FD6..1FDB; GREEK
4829             0x1FDC,   // 1FDC      ; UNKNOWN
4830             0x1FDD,   // 1FDD..1FEF; GREEK
4831             0x1FF0,   // 1FF0..1FF1; UNKNOWN
4832             0x1FF2,   // 1FF2..1FF4; GREEK
4833             0x1FF5,   // 1FF5      ; UNKNOWN
4834             0x1FF6,   // 1FF6..1FFE; GREEK
4835             0x1FFF,   // 1FFF      ; UNKNOWN
4836             0x2000,   // 2000..200B; COMMON
4837             0x200C,   // 200C..200D; INHERITED
4838             0x200E,   // 200E..2064; COMMON
4839             0x2065,   // 2065      ; UNKNOWN
4840             0x2066,   // 2066..2070; COMMON
4841             0x2071,   // 2071      ; LATIN
4842             0x2072,   // 2072..2073; UNKNOWN
4843             0x2074,   // 2074..207E; COMMON
4844             0x207F,   // 207F      ; LATIN
4845             0x2080,   // 2080..208E; COMMON
4846             0x208F,   // 208F      ; UNKNOWN
4847             0x2090,   // 2090..209C; LATIN
4848             0x209D,   // 209D..209F; UNKNOWN
4849             0x20A0,   // 20A0..20BD; COMMON
4850             0x20BE,   // 20BE..20CF; UNKNOWN
4851             0x20D0,   // 20D0..20F0; INHERITED
4852             0x20F1,   // 20F1..20FF; UNKNOWN
4853             0x2100,   // 2100..2125; COMMON
4854             0x2126,   // 2126      ; GREEK
4855             0x2127,   // 2127..2129; COMMON
4856             0x212A,   // 212A..212B; LATIN
4857             0x212C,   // 212C..2131; COMMON
4858             0x2132,   // 2132      ; LATIN
4859             0x2133,   // 2133..214D; COMMON
4860             0x214E,   // 214E      ; LATIN
4861             0x214F,   // 214F..215F; COMMON
4862             0x2160,   // 2160..2188; LATIN
4863             0x2189,   // 2189      ; COMMON
4864             0x218A,   // 218A..218F; UNKNOWN
4865             0x2190,   // 2190..23FA; COMMON
4866             0x23FB,   // 23FB..23FF; UNKNOWN
4867             0x2400,   // 2400..2426; COMMON
4868             0x2427,   // 2427..243F; UNKNOWN
4869             0x2440,   // 2440..244A; COMMON
4870             0x244B,   // 244B..245F; UNKNOWN
4871             0x2460,   // 2460..27FF; COMMON
4872             0x2800,   // 2800..28FF; BRAILLE
4873             0x2900,   // 2900..2B73; COMMON
4874             0x2B74,   // 2B74..2B75; UNKNOWN
4875             0x2B76,   // 2B76..2B95; COMMON
4876             0x2B96,   // 2B96..2B97; UNKNOWN
4877             0x2B98,   // 2B98..2BB9; COMMON
4878             0x2BBA,   // 2BBA..2BBC; UNKNOWN
4879             0x2BBD,   // 2BBD..2BC8; COMMON
4880             0x2BC9,   // 2BC9      ; UNKNOWN
4881             0x2BCA,   // 2BCA..2BD1; COMMON
4882             0x2BD2,   // 2BD2..2BFF; UNKNOWN
4883             0x2C00,   // 2C00..2C2E; GLAGOLITIC
4884             0x2C2F,   // 2C2F      ; UNKNOWN
4885             0x2C30,   // 2C30..2C5E; GLAGOLITIC
4886             0x2C5F,   // 2C5F      ; UNKNOWN
4887             0x2C60,   // 2C60..2C7F; LATIN
4888             0x2C80,   // 2C80..2CF3; COPTIC
4889             0x2CF4,   // 2CF4..2CF8; UNKNOWN
4890             0x2CF9,   // 2CF9..2CFF; COPTIC
4891             0x2D00,   // 2D00..2D25; GEORGIAN
4892             0x2D26,   // 2D26      ; UNKNOWN
4893             0x2D27,   // 2D27      ; GEORGIAN
4894             0x2D28,   // 2D28..2D2C; UNKNOWN
4895             0x2D2D,   // 2D2D      ; GEORGIAN
4896             0x2D2E,   // 2D2E..2D2F; UNKNOWN
4897             0x2D30,   // 2D30..2D67; TIFINAGH
4898             0x2D68,   // 2D68..2D6E; UNKNOWN
4899             0x2D6F,   // 2D6F..2D70; TIFINAGH
4900             0x2D71,   // 2D71..2D7E; UNKNOWN
4901             0x2D7F,   // 2D7F      ; TIFINAGH
4902             0x2D80,   // 2D80..2D96; ETHIOPIC
4903             0x2D97,   // 2D97..2D9F; UNKNOWN
4904             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
4905             0x2DA7,   // 2DA7      ; UNKNOWN
4906             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
4907             0x2DAF,   // 2DAF      ; UNKNOWN
4908             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
4909             0x2DB7,   // 2DB7      ; UNKNOWN
4910             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
4911             0x2DBF,   // 2DBF      ; UNKNOWN
4912             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
4913             0x2DC7,   // 2DC7      ; UNKNOWN
4914             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
4915             0x2DCF,   // 2DCF      ; UNKNOWN
4916             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
4917             0x2DD7,   // 2DD7      ; UNKNOWN
4918             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
4919             0x2DDF,   // 2DDF      ; UNKNOWN
4920             0x2DE0,   // 2DE0..2DFF; CYRILLIC
4921             0x2E00,   // 2E00..2E42; COMMON
4922             0x2E43,   // 2E43..2E7F; UNKNOWN
4923             0x2E80,   // 2E80..2E99; HAN
4924             0x2E9A,   // 2E9A      ; UNKNOWN
4925             0x2E9B,   // 2E9B..2EF3; HAN
4926             0x2EF4,   // 2EF4..2EFF; UNKNOWN
4927             0x2F00,   // 2F00..2FD5; HAN
4928             0x2FD6,   // 2FD6..2FEF; UNKNOWN
4929             0x2FF0,   // 2FF0..2FFB; COMMON
4930             0x2FFC,   // 2FFC..2FFF; UNKNOWN
4931             0x3000,   // 3000..3004; COMMON
4932             0x3005,   // 3005      ; HAN
4933             0x3006,   // 3006      ; COMMON
4934             0x3007,   // 3007      ; HAN
4935             0x3008,   // 3008..3020; COMMON
4936             0x3021,   // 3021..3029; HAN
4937             0x302A,   // 302A..302D; INHERITED
4938             0x302E,   // 302E..302F; HANGUL
4939             0x3030,   // 3030..3037; COMMON
4940             0x3038,   // 3038..303B; HAN
4941             0x303C,   // 303C..303F; COMMON
4942             0x3040,   // 3040      ; UNKNOWN
4943             0x3041,   // 3041..3096; HIRAGANA
4944             0x3097,   // 3097..3098; UNKNOWN
4945             0x3099,   // 3099..309A; INHERITED
4946             0x309B,   // 309B..309C; COMMON
4947             0x309D,   // 309D..309F; HIRAGANA
4948             0x30A0,   // 30A0      ; COMMON
4949             0x30A1,   // 30A1..30FA; KATAKANA
4950             0x30FB,   // 30FB..30FC; COMMON
4951             0x30FD,   // 30FD..30FF; KATAKANA
4952             0x3100,   // 3100..3104; UNKNOWN
4953             0x3105,   // 3105..312D; BOPOMOFO
4954             0x312E,   // 312E..3130; UNKNOWN
4955             0x3131,   // 3131..318E; HANGUL
4956             0x318F,   // 318F      ; UNKNOWN
4957             0x3190,   // 3190..319F; COMMON
4958             0x31A0,   // 31A0..31BA; BOPOMOFO
4959             0x31BB,   // 31BB..31BF; UNKNOWN
4960             0x31C0,   // 31C0..31E3; COMMON
4961             0x31E4,   // 31E4..31EF; UNKNOWN
4962             0x31F0,   // 31F0..31FF; KATAKANA
4963             0x3200,   // 3200..321E; HANGUL
4964             0x321F,   // 321F      ; UNKNOWN
4965             0x3220,   // 3220..325F; COMMON
4966             0x3260,   // 3260..327E; HANGUL
4967             0x327F,   // 327F..32CF; COMMON
4968             0x32D0,   // 32D0..32FE; KATAKANA
4969             0x32FF,   // 32FF      ; UNKNOWN
4970             0x3300,   // 3300..3357; KATAKANA
4971             0x3358,   // 3358..33FF; COMMON
4972             0x3400,   // 3400..4DB5; HAN
4973             0x4DB6,   // 4DB6..4DBF; UNKNOWN
4974             0x4DC0,   // 4DC0..4DFF; COMMON
4975             0x4E00,   // 4E00..9FCC; HAN
4976             0x9FCD,   // 9FCD..9FFF; UNKNOWN
4977             0xA000,   // A000..A48C; YI
4978             0xA48D,   // A48D..A48F; UNKNOWN
4979             0xA490,   // A490..A4C6; YI
4980             0xA4C7,   // A4C7..A4CF; UNKNOWN
4981             0xA4D0,   // A4D0..A4FF; LISU
4982             0xA500,   // A500..A62B; VAI
4983             0xA62C,   // A62C..A63F; UNKNOWN
4984             0xA640,   // A640..A69D; CYRILLIC
4985             0xA69E,   // A69E      ; UNKNOWN
4986             0xA69F,   // A69F      ; CYRILLIC
4987             0xA6A0,   // A6A0..A6F7; BAMUM
4988             0xA6F8,   // A6F8..A6FF; UNKNOWN
4989             0xA700,   // A700..A721; COMMON
4990             0xA722,   // A722..A787; LATIN
4991             0xA788,   // A788..A78A; COMMON
4992             0xA78B,   // A78B..A78E; LATIN
4993             0xA78F,   // A78F      ; UNKNOWN
4994             0xA790,   // A790..A7AD; LATIN
4995             0xA7AE,   // A7AE..A7AF; UNKNOWN
4996             0xA7B0,   // A7B0..A7B1; LATIN
4997             0xA7B2,   // A7B2..A7F6; UNKNOWN
4998             0xA7F7,   // A7F7..A7FF; LATIN
4999             0xA800,   // A800..A82B; SYLOTI_NAGRI
5000             0xA82C,   // A82C..A82F; UNKNOWN
5001             0xA830,   // A830..A839; COMMON
5002             0xA83A,   // A83A..A83F; UNKNOWN
5003             0xA840,   // A840..A877; PHAGS_PA
5004             0xA878,   // A878..A87F; UNKNOWN
5005             0xA880,   // A880..A8C4; SAURASHTRA
5006             0xA8C5,   // A8C5..A8CD; UNKNOWN
5007             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5008             0xA8DA,   // A8DA..A8DF; UNKNOWN
5009             0xA8E0,   // A8E0..A8FB; DEVANAGARI
5010             0xA8FC,   // A8FC..A8FF; UNKNOWN
5011             0xA900,   // A900..A92D; KAYAH_LI
5012             0xA92E,   // A92E      ; COMMON
5013             0xA92F,   // A92F      ; KAYAH_LI
5014             0xA930,   // A930..A953; REJANG
5015             0xA954,   // A954..A95E; UNKNOWN
5016             0xA95F,   // A95F      ; REJANG
5017             0xA960,   // A960..A97C; HANGUL
5018             0xA97D,   // A97D..A97F; UNKNOWN
5019             0xA980,   // A980..A9CD; JAVANESE
5020             0xA9CE,   // A9CE      ; UNKNOWN
5021             0xA9CF,   // A9CF      ; COMMON
5022             0xA9D0,   // A9D0..A9D9; JAVANESE
5023             0xA9DA,   // A9DA..A9DD; UNKNOWN
5024             0xA9DE,   // A9DE..A9DF; JAVANESE
5025             0xA9E0,   // A9E0..A9FE; MYANMAR
5026             0xA9FF,   // A9FF      ; UNKNOWN
5027             0xAA00,   // AA00..AA36; CHAM
5028             0xAA37,   // AA37..AA3F; UNKNOWN
5029             0xAA40,   // AA40..AA4D; CHAM
5030             0xAA4E,   // AA4E..AA4F; UNKNOWN
5031             0xAA50,   // AA50..AA59; CHAM
5032             0xAA5A,   // AA5A..AA5B; UNKNOWN
5033             0xAA5C,   // AA5C..AA5F; CHAM
5034             0xAA60,   // AA60..AA7F; MYANMAR
5035             0xAA80,   // AA80..AAC2; TAI_VIET
5036             0xAAC3,   // AAC3..AADA; UNKNOWN
5037             0xAADB,   // AADB..AADF; TAI_VIET
5038             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5039             0xAAF7,   // AAF7..AB00; UNKNOWN
5040             0xAB01,   // AB01..AB06; ETHIOPIC
5041             0xAB07,   // AB07..AB08; UNKNOWN
5042             0xAB09,   // AB09..AB0E; ETHIOPIC
5043             0xAB0F,   // AB0F..AB10; UNKNOWN
5044             0xAB11,   // AB11..AB16; ETHIOPIC
5045             0xAB17,   // AB17..AB1F; UNKNOWN
5046             0xAB20,   // AB20..AB26; ETHIOPIC
5047             0xAB27,   // AB27      ; UNKNOWN
5048             0xAB28,   // AB28..AB2E; ETHIOPIC
5049             0xAB2F,   // AB2F      ; UNKNOWN
5050             0xAB30,   // AB30..AB5A; LATIN
5051             0xAB5B,   // AB5B      ; COMMON
5052             0xAB5C,   // AB5C..AB5F; LATIN
5053             0xAB60,   // AB60..AB63; UNKNOWN
5054             0xAB64,   // AB64      ; LATIN
5055             0xAB65,   // AB65      ; GREEK
5056             0xAB66,   // AB66..ABBF; UNKNOWN
5057             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5058             0xABEE,   // ABEE..ABEF; UNKNOWN
5059             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5060             0xABFA,   // ABFA..ABFF; UNKNOWN
5061             0xAC00,   // AC00..D7A3; HANGUL
5062             0xD7A4,   // D7A4..D7AF; UNKNOWN
5063             0xD7B0,   // D7B0..D7C6; HANGUL
5064             0xD7C7,   // D7C7..D7CA; UNKNOWN
5065             0xD7CB,   // D7CB..D7FB; HANGUL
5066             0xD7FC,   // D7FC..F8FF; UNKNOWN
5067             0xF900,   // F900..FA6D; HAN
5068             0xFA6E,   // FA6E..FA6F; UNKNOWN
5069             0xFA70,   // FA70..FAD9; HAN
5070             0xFADA,   // FADA..FAFF; UNKNOWN
5071             0xFB00,   // FB00..FB06; LATIN
5072             0xFB07,   // FB07..FB12; UNKNOWN
5073             0xFB13,   // FB13..FB17; ARMENIAN
5074             0xFB18,   // FB18..FB1C; UNKNOWN
5075             0xFB1D,   // FB1D..FB36; HEBREW
5076             0xFB37,   // FB37      ; UNKNOWN
5077             0xFB38,   // FB38..FB3C; HEBREW
5078             0xFB3D,   // FB3D      ; UNKNOWN
5079             0xFB3E,   // FB3E      ; HEBREW
5080             0xFB3F,   // FB3F      ; UNKNOWN
5081             0xFB40,   // FB40..FB41; HEBREW
5082             0xFB42,   // FB42      ; UNKNOWN
5083             0xFB43,   // FB43..FB44; HEBREW
5084             0xFB45,   // FB45      ; UNKNOWN
5085             0xFB46,   // FB46..FB4F; HEBREW
5086             0xFB50,   // FB50..FBC1; ARABIC
5087             0xFBC2,   // FBC2..FBD2; UNKNOWN
5088             0xFBD3,   // FBD3..FD3D; ARABIC
5089             0xFD3E,   // FD3E..FD3F; COMMON
5090             0xFD40,   // FD40..FD4F; UNKNOWN
5091             0xFD50,   // FD50..FD8F; ARABIC
5092             0xFD90,   // FD90..FD91; UNKNOWN
5093             0xFD92,   // FD92..FDC7; ARABIC
5094             0xFDC8,   // FDC8..FDEF; UNKNOWN
5095             0xFDF0,   // FDF0..FDFD; ARABIC
5096             0xFDFE,   // FDFE..FDFF; UNKNOWN
5097             0xFE00,   // FE00..FE0F; INHERITED
5098             0xFE10,   // FE10..FE19; COMMON
5099             0xFE1A,   // FE1A..FE1F; UNKNOWN
5100             0xFE20,   // FE20..FE2D; INHERITED
5101             0xFE2E,   // FE2E..FE2F; UNKNOWN
5102             0xFE30,   // FE30..FE52; COMMON
5103             0xFE53,   // FE53      ; UNKNOWN
5104             0xFE54,   // FE54..FE66; COMMON
5105             0xFE67,   // FE67      ; UNKNOWN
5106             0xFE68,   // FE68..FE6B; COMMON
5107             0xFE6C,   // FE6C..FE6F; UNKNOWN
5108             0xFE70,   // FE70..FE74; ARABIC
5109             0xFE75,   // FE75      ; UNKNOWN
5110             0xFE76,   // FE76..FEFC; ARABIC
5111             0xFEFD,   // FEFD..FEFE; UNKNOWN
5112             0xFEFF,   // FEFF      ; COMMON
5113             0xFF00,   // FF00      ; UNKNOWN
5114             0xFF01,   // FF01..FF20; COMMON
5115             0xFF21,   // FF21..FF3A; LATIN
5116             0xFF3B,   // FF3B..FF40; COMMON
5117             0xFF41,   // FF41..FF5A; LATIN
5118             0xFF5B,   // FF5B..FF65; COMMON
5119             0xFF66,   // FF66..FF6F; KATAKANA
5120             0xFF70,   // FF70      ; COMMON
5121             0xFF71,   // FF71..FF9D; KATAKANA
5122             0xFF9E,   // FF9E..FF9F; COMMON
5123             0xFFA0,   // FFA0..FFBE; HANGUL
5124             0xFFBF,   // FFBF..FFC1; UNKNOWN
5125             0xFFC2,   // FFC2..FFC7; HANGUL
5126             0xFFC8,   // FFC8..FFC9; UNKNOWN
5127             0xFFCA,   // FFCA..FFCF; HANGUL
5128             0xFFD0,   // FFD0..FFD1; UNKNOWN
5129             0xFFD2,   // FFD2..FFD7; HANGUL
5130             0xFFD8,   // FFD8..FFD9; UNKNOWN
5131             0xFFDA,   // FFDA..FFDC; HANGUL
5132             0xFFDD,   // FFDD..FFDF; UNKNOWN
5133             0xFFE0,   // FFE0..FFE6; COMMON
5134             0xFFE7,   // FFE7      ; UNKNOWN
5135             0xFFE8,   // FFE8..FFEE; COMMON
5136             0xFFEF,   // FFEF..FFF8; UNKNOWN
5137             0xFFF9,   // FFF9..FFFD; COMMON
5138             0xFFFE,   // FFFE..FFFF; UNKNOWN
5139             0x10000,  // 10000..1000B; LINEAR_B
5140             0x1000C,  // 1000C       ; UNKNOWN
5141             0x1000D,  // 1000D..10026; LINEAR_B
5142             0x10027,  // 10027       ; UNKNOWN
5143             0x10028,  // 10028..1003A; LINEAR_B
5144             0x1003B,  // 1003B       ; UNKNOWN
5145             0x1003C,  // 1003C..1003D; LINEAR_B
5146             0x1003E,  // 1003E       ; UNKNOWN
5147             0x1003F,  // 1003F..1004D; LINEAR_B
5148             0x1004E,  // 1004E..1004F; UNKNOWN
5149             0x10050,  // 10050..1005D; LINEAR_B
5150             0x1005E,  // 1005E..1007F; UNKNOWN
5151             0x10080,  // 10080..100FA; LINEAR_B
5152             0x100FB,  // 100FB..100FF; UNKNOWN
5153             0x10100,  // 10100..10102; COMMON
5154             0x10103,  // 10103..10106; UNKNOWN
5155             0x10107,  // 10107..10133; COMMON
5156             0x10134,  // 10134..10136; UNKNOWN
5157             0x10137,  // 10137..1013F; COMMON
5158             0x10140,  // 10140..1018C; GREEK
5159             0x1018D,  // 1018D..1018F; UNKNOWN
5160             0x10190,  // 10190..1019B; COMMON
5161             0x1019C,  // 1019C..1019F; UNKNOWN
5162             0x101A0,  // 101A0       ; GREEK
5163             0x101A1,  // 101A1..101CF; UNKNOWN
5164             0x101D0,  // 101D0..101FC; COMMON
5165             0x101FD,  // 101FD       ; INHERITED
5166             0x101FE,  // 101FE..1027F; UNKNOWN
5167             0x10280,  // 10280..1029C; LYCIAN
5168             0x1029D,  // 1029D..1029F; UNKNOWN
5169             0x102A0,  // 102A0..102D0; CARIAN
5170             0x102D1,  // 102D1..102DF; UNKNOWN
5171             0x102E0,  // 102E0       ; INHERITED
5172             0x102E1,  // 102E1..102FB; COMMON
5173             0x102FC,  // 102FC..102FF; UNKNOWN
5174             0x10300,  // 10300..10323; OLD_ITALIC
5175             0x10324,  // 10324..1032F; UNKNOWN
5176             0x10330,  // 10330..1034A; GOTHIC
5177             0x1034B,  // 1034B..1034F; UNKNOWN
5178             0x10350,  // 10350..1037A; OLD_PERMIC
5179             0x1037B,  // 1037B..1037F; UNKNOWN
5180             0x10380,  // 10380..1039D; UGARITIC
5181             0x1039E,  // 1039E       ; UNKNOWN
5182             0x1039F,  // 1039F       ; UGARITIC
5183             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5184             0x103C4,  // 103C4..103C7; UNKNOWN
5185             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5186             0x103D6,  // 103D6..103FF; UNKNOWN
5187             0x10400,  // 10400..1044F; DESERET
5188             0x10450,  // 10450..1047F; SHAVIAN
5189             0x10480,  // 10480..1049D; OSMANYA
5190             0x1049E,  // 1049E..1049F; UNKNOWN
5191             0x104A0,  // 104A0..104A9; OSMANYA
5192             0x104AA,  // 104AA..104FF; UNKNOWN
5193             0x10500,  // 10500..10527; ELBASAN
5194             0x10528,  // 10528..1052F; UNKNOWN
5195             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5196             0x10564,  // 10564..1056E; UNKNOWN
5197             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5198             0x10570,  // 10570..105FF; UNKNOWN
5199             0x10600,  // 10600..10736; LINEAR_A
5200             0x10737,  // 10737..1073F; UNKNOWN
5201             0x10740,  // 10740..10755; LINEAR_A
5202             0x10756,  // 10756..1075F; UNKNOWN
5203             0x10760,  // 10760..10767; LINEAR_A
5204             0x10768,  // 10768..107FF; UNKNOWN
5205             0x10800,  // 10800..10805; CYPRIOT
5206             0x10806,  // 10806..10807; UNKNOWN
5207             0x10808,  // 10808       ; CYPRIOT
5208             0x10809,  // 10809       ; UNKNOWN
5209             0x1080A,  // 1080A..10835; CYPRIOT
5210             0x10836,  // 10836       ; UNKNOWN
5211             0x10837,  // 10837..10838; CYPRIOT
5212             0x10839,  // 10839..1083B; UNKNOWN
5213             0x1083C,  // 1083C       ; CYPRIOT
5214             0x1083D,  // 1083D..1083E; UNKNOWN
5215             0x1083F,  // 1083F       ; CYPRIOT
5216             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5217             0x10856,  // 10856       ; UNKNOWN
5218             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5219             0x10860,  // 10860..1087F; PALMYRENE
5220             0x10880,  // 10880..1089E; NABATAEAN
5221             0x1089F,  // 1089F..108A6; UNKNOWN
5222             0x108A7,  // 108A7..108AF; NABATAEAN
5223             0x108B0,  // 108B0..108FF; UNKNOWN
5224             0x10900,  // 10900..1091B; PHOENICIAN
5225             0x1091C,  // 1091C..1091E; UNKNOWN
5226             0x1091F,  // 1091F       ; PHOENICIAN
5227             0x10920,  // 10920..10939; LYDIAN
5228             0x1093A,  // 1093A..1093E; UNKNOWN
5229             0x1093F,  // 1093F       ; LYDIAN
5230             0x10940,  // 10940..1097F; UNKNOWN
5231             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5232             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5233             0x109B8,  // 109B8..109BD; UNKNOWN
5234             0x109BE,  // 109BE..109BF; MEROITIC_CURSIVE
5235             0x109C0,  // 109C0..109FF; UNKNOWN
5236             0x10A00,  // 10A00..10A03; KHAROSHTHI
5237             0x10A04,  // 10A04       ; UNKNOWN
5238             0x10A05,  // 10A05..10A06; KHAROSHTHI
5239             0x10A07,  // 10A07..10A0B; UNKNOWN
5240             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5241             0x10A14,  // 10A14       ; UNKNOWN
5242             0x10A15,  // 10A15..10A17; KHAROSHTHI
5243             0x10A18,  // 10A18       ; UNKNOWN
5244             0x10A19,  // 10A19..10A33; KHAROSHTHI
5245             0x10A34,  // 10A34..10A37; UNKNOWN
5246             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5247             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5248             0x10A3F,  // 10A3F..10A47; KHAROSHTHI
5249             0x10A48,  // 10A48..10A4F; UNKNOWN
5250             0x10A50,  // 10A50..10A58; KHAROSHTHI
5251             0x10A59,  // 10A59..10A5F; UNKNOWN
5252             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5253             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5254             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5255             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5256             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5257             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5258             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5259             0x10B00,  // 10B00..10B35; AVESTAN
5260             0x10B36,  // 10B36..10B38; UNKNOWN
5261             0x10B39,  // 10B39..10B3F; AVESTAN
5262             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5263             0x10B56,  // 10B56..10B57; UNKNOWN
5264             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5265             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5266             0x10B73,  // 10B73..10B77; UNKNOWN
5267             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5268             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5269             0x10B92,  // 10B92..10B98; UNKNOWN
5270             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5271             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5272             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5273             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5274             0x10C00,  // 10C00..10C48; OLD_TURKIC
5275             0x10C49,  // 10C49..10E5F; UNKNOWN
5276             0x10E60,  // 10E60..10E7E; ARABIC
5277             0x10E7F,  // 10E7F..10FFF; UNKNOWN
5278             0x11000,  // 11000..1104D; BRAHMI
5279             0x1104E,  // 1104E..11051; UNKNOWN
5280             0x11052,  // 11052..1106F; BRAHMI
5281             0x11070,  // 11070..1107E; UNKNOWN
5282             0x1107F,  // 1107F       ; BRAHMI
5283             0x11080,  // 11080..110C1; KAITHI
5284             0x110C2,  // 110C2..110CF; UNKNOWN
5285             0x110D0,  // 110D0..110E8; SORA_SOMPENG
5286             0x110E9,  // 110E9..110EF; UNKNOWN
5287             0x110F0,  // 110F0..110F9; SORA_SOMPENG
5288             0x110FA,  // 110FA..110FF; UNKNOWN
5289             0x11100,  // 11100..11134; CHAKMA
5290             0x11135,  // 11135       ; UNKNOWN
5291             0x11136,  // 11136..11143; CHAKMA
5292             0x11144,  // 11144..1114F; UNKNOWN
5293             0x11150,  // 11150..11176; MAHAJANI
5294             0x11177,  // 11177..1117F; UNKNOWN
5295             0x11180,  // 11180..111C8; SHARADA
5296             0x111C9,  // 111C9..111CC; UNKNOWN
5297             0x111CD,  // 111CD       ; SHARADA
5298             0x111CE,  // 111CE..111CF; UNKNOWN
5299             0x111D0,  // 111D0..111DA; SHARADA
5300             0x111DB,  // 111DB..111E0; UNKNOWN
5301             0x111E1,  // 111E1..111F4; SINHALA
5302             0x111F5,  // 111F5..111FF; UNKNOWN
5303             0x11200,  // 11200..11211; KHOJKI
5304             0x11212,  // 11212       ; UNKNOWN
5305             0x11213,  // 11213..1123D; KHOJKI
5306             0x1123E,  // 1123E..112AF; UNKNOWN
5307             0x112B0,  // 112B0..112EA; KHUDAWADI
5308             0x112EB,  // 112EB..112EF; UNKNOWN
5309             0x112F0,  // 112F0..112F9; KHUDAWADI
5310             0x112FA,  // 112FA..11300; UNKNOWN
5311             0x11301,  // 11301..11303; GRANTHA
5312             0x11304,  // 11304       ; UNKNOWN
5313             0x11305,  // 11305..1130C; GRANTHA
5314             0x1130D,  // 1130D..1130E; UNKNOWN
5315             0x1130F,  // 1130F..11310; GRANTHA
5316             0x11311,  // 11311..11312; UNKNOWN
5317             0x11313,  // 11313..11328; GRANTHA
5318             0x11329,  // 11329       ; UNKNOWN
5319             0x1132A,  // 1132A..11330; GRANTHA
5320             0x11331,  // 11331       ; UNKNOWN
5321             0x11332,  // 11332..11333; GRANTHA
5322             0x11334,  // 11334       ; UNKNOWN
5323             0x11335,  // 11335..11339; GRANTHA
5324             0x1133A,  // 1133A..1133B; UNKNOWN
5325             0x1133C,  // 1133C..11344; GRANTHA
5326             0x11345,  // 11345..11346; UNKNOWN
5327             0x11347,  // 11347..11348; GRANTHA
5328             0x11349,  // 11349..1134A; UNKNOWN
5329             0x1134B,  // 1134B..1134D; GRANTHA
5330             0x1134E,  // 1134E..11356; UNKNOWN
5331             0x11357,  // 11357       ; GRANTHA
5332             0x11358,  // 11358..1135C; UNKNOWN
5333             0x1135D,  // 1135D..11363; GRANTHA
5334             0x11364,  // 11364..11365; UNKNOWN
5335             0x11366,  // 11366..1136C; GRANTHA
5336             0x1136D,  // 1136D..1136F; UNKNOWN
5337             0x11370,  // 11370..11374; GRANTHA
5338             0x11375,  // 11375..1147F; UNKNOWN
5339             0x11480,  // 11480..114C7; TIRHUTA
5340             0x114C8,  // 114C8..114CF; UNKNOWN
5341             0x114D0,  // 114D0..114D9; TIRHUTA
5342             0x114DA,  // 114DA..1157F; UNKNOWN
5343             0x11580,  // 11580..115B5; SIDDHAM
5344             0x115B6,  // 115B6..115B7; UNKNOWN
5345             0x115B8,  // 115B8..115C9; SIDDHAM
5346             0x115CA,  // 115CA..115FF; UNKNOWN
5347             0x11600,  // 11600..11644; MODI
5348             0x11645,  // 11645..1164F; UNKNOWN
5349             0x11650,  // 11650..11659; MODI
5350             0x1165A,  // 1165A..1167F; UNKNOWN
5351             0x11680,  // 11680..116B7; TAKRI
5352             0x116B8,  // 116B8..116BF; UNKNOWN
5353             0x116C0,  // 116C0..116C9; TAKRI
5354             0x116CA,  // 116CA..1189F; UNKNOWN
5355             0x118A0,  // 118A0..118F2; WARANG_CITI
5356             0x118F3,  // 118F3..118FE; UNKNOWN
5357             0x118FF,  // 118FF       ; WARANG_CITI
5358             0x11900,  // 11900..11ABF; UNKNOWN
5359             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
5360             0x11AF9,  // 11AF9..11FFF; UNKNOWN
5361             0x12000,  // 12000..12398; CUNEIFORM
5362             0x12399,  // 12399..123FF; UNKNOWN
5363             0x12400,  // 12400..1246E; CUNEIFORM
5364             0x1246F,  // 1246F       ; UNKNOWN
5365             0x12470,  // 12470..12474; CUNEIFORM
5366             0x12475,  // 12475..12FFF; UNKNOWN
5367             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
5368             0x1342F,  // 1342F..167FF; UNKNOWN
5369             0x16800,  // 16800..16A38; BAMUM
5370             0x16A39,  // 16A39..16A3F; UNKNOWN
5371             0x16A40,  // 16A40..16A5E; MRO
5372             0x16A5F,  // 16A5F       ; UNKNOWN
5373             0x16A60,  // 16A60..16A69; MRO
5374             0x16A6A,  // 16A6A..16A6D; UNKNOWN
5375             0x16A6E,  // 16A6E..16A6F; MRO
5376             0x16A70,  // 16A70..16ACF; UNKNOWN
5377             0x16AD0,  // 16AD0..16AED; BASSA_VAH
5378             0x16AEE,  // 16AEE..16AEF; UNKNOWN
5379             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
5380             0x16AF6,  // 16AF6..16AFF; UNKNOWN
5381             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
5382             0x16B46,  // 16B46..16B4F; UNKNOWN
5383             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
5384             0x16B5A,  // 16B5A       ; UNKNOWN
5385             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
5386             0x16B62,  // 16B62       ; UNKNOWN
5387             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
5388             0x16B78,  // 16B78..16B7C; UNKNOWN
5389             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
5390             0x16B90,  // 16B90..16EFF; UNKNOWN
5391             0x16F00,  // 16F00..16F44; MIAO
5392             0x16F45,  // 16F45..16F4F; UNKNOWN
5393             0x16F50,  // 16F50..16F7E; MIAO
5394             0x16F7F,  // 16F7F..16F8E; UNKNOWN
5395             0x16F8F,  // 16F8F..16F9F; MIAO
5396             0x16FA0,  // 16FA0..1AFFF; UNKNOWN
5397             0x1B000,  // 1B000       ; KATAKANA
5398             0x1B001,  // 1B001       ; HIRAGANA
5399             0x1B002,  // 1B002..1BBFF; UNKNOWN
5400             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
5401             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
5402             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
5403             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
5404             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
5405             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
5406             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
5407             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
5408             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
5409             0x1BCA0,  // 1BCA0..1BCA3; COMMON
5410             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
5411             0x1D000,  // 1D000..1D0F5; COMMON
5412             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
5413             0x1D100,  // 1D100..1D126; COMMON
5414             0x1D127,  // 1D127..1D128; UNKNOWN
5415             0x1D129,  // 1D129..1D166; COMMON
5416             0x1D167,  // 1D167..1D169; INHERITED
5417             0x1D16A,  // 1D16A..1D17A; COMMON
5418             0x1D17B,  // 1D17B..1D182; INHERITED
5419             0x1D183,  // 1D183..1D184; COMMON
5420             0x1D185,  // 1D185..1D18B; INHERITED
5421             0x1D18C,  // 1D18C..1D1A9; COMMON
5422             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
5423             0x1D1AE,  // 1D1AE..1D1DD; COMMON
5424             0x1D1DE,  // 1D1DE..1D1FF; UNKNOWN
5425             0x1D200,  // 1D200..1D245; GREEK
5426             0x1D246,  // 1D246..1D2FF; UNKNOWN
5427             0x1D300,  // 1D300..1D356; COMMON
5428             0x1D357,  // 1D357..1D35F; UNKNOWN
5429             0x1D360,  // 1D360..1D371; COMMON
5430             0x1D372,  // 1D372..1D3FF; UNKNOWN
5431             0x1D400,  // 1D400..1D454; COMMON
5432             0x1D455,  // 1D455       ; UNKNOWN
5433             0x1D456,  // 1D456..1D49C; COMMON
5434             0x1D49D,  // 1D49D       ; UNKNOWN
5435             0x1D49E,  // 1D49E..1D49F; COMMON
5436             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
5437             0x1D4A2,  // 1D4A2       ; COMMON
5438             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
5439             0x1D4A5,  // 1D4A5..1D4A6; COMMON
5440             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
5441             0x1D4A9,  // 1D4A9..1D4AC; COMMON
5442             0x1D4AD,  // 1D4AD       ; UNKNOWN
5443             0x1D4AE,  // 1D4AE..1D4B9; COMMON
5444             0x1D4BA,  // 1D4BA       ; UNKNOWN
5445             0x1D4BB,  // 1D4BB       ; COMMON
5446             0x1D4BC,  // 1D4BC       ; UNKNOWN
5447             0x1D4BD,  // 1D4BD..1D4C3; COMMON
5448             0x1D4C4,  // 1D4C4       ; UNKNOWN
5449             0x1D4C5,  // 1D4C5..1D505; COMMON
5450             0x1D506,  // 1D506       ; UNKNOWN
5451             0x1D507,  // 1D507..1D50A; COMMON
5452             0x1D50B,  // 1D50B..1D50C; UNKNOWN
5453             0x1D50D,  // 1D50D..1D514; COMMON
5454             0x1D515,  // 1D515       ; UNKNOWN
5455             0x1D516,  // 1D516..1D51C; COMMON
5456             0x1D51D,  // 1D51D       ; UNKNOWN
5457             0x1D51E,  // 1D51E..1D539; COMMON
5458             0x1D53A,  // 1D53A       ; UNKNOWN
5459             0x1D53B,  // 1D53B..1D53E; COMMON
5460             0x1D53F,  // 1D53F       ; UNKNOWN
5461             0x1D540,  // 1D540..1D544; COMMON
5462             0x1D545,  // 1D545       ; UNKNOWN
5463             0x1D546,  // 1D546       ; COMMON
5464             0x1D547,  // 1D547..1D549; UNKNOWN
5465             0x1D54A,  // 1D54A..1D550; COMMON
5466             0x1D551,  // 1D551       ; UNKNOWN
5467             0x1D552,  // 1D552..1D6A5; COMMON
5468             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
5469             0x1D6A8,  // 1D6A8..1D7CB; COMMON
5470             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
5471             0x1D7CE,  // 1D7CE..1D7FF; COMMON
5472             0x1D800,  // 1D800..1E7FF; UNKNOWN
5473             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
5474             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
5475             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
5476             0x1E8D7,  // 1E8D7..1EDFF; UNKNOWN
5477             0x1EE00,  // 1EE00..1EE03; ARABIC
5478             0x1EE04,  // 1EE04       ; UNKNOWN
5479             0x1EE05,  // 1EE05..1EE1F; ARABIC
5480             0x1EE20,  // 1EE20       ; UNKNOWN
5481             0x1EE21,  // 1EE21..1EE22; ARABIC
5482             0x1EE23,  // 1EE23       ; UNKNOWN
5483             0x1EE24,  // 1EE24       ; ARABIC
5484             0x1EE25,  // 1EE25..1EE26; UNKNOWN
5485             0x1EE27,  // 1EE27       ; ARABIC
5486             0x1EE28,  // 1EE28       ; UNKNOWN
5487             0x1EE29,  // 1EE29..1EE32; ARABIC
5488             0x1EE33,  // 1EE33       ; UNKNOWN
5489             0x1EE34,  // 1EE34..1EE37; ARABIC
5490             0x1EE38,  // 1EE38       ; UNKNOWN
5491             0x1EE39,  // 1EE39       ; ARABIC
5492             0x1EE3A,  // 1EE3A       ; UNKNOWN
5493             0x1EE3B,  // 1EE3B       ; ARABIC
5494             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
5495             0x1EE42,  // 1EE42       ; ARABIC
5496             0x1EE43,  // 1EE43..1EE46; UNKNOWN
5497             0x1EE47,  // 1EE47       ; ARABIC
5498             0x1EE48,  // 1EE48       ; UNKNOWN
5499             0x1EE49,  // 1EE49       ; ARABIC
5500             0x1EE4A,  // 1EE4A       ; UNKNOWN
5501             0x1EE4B,  // 1EE4B       ; ARABIC
5502             0x1EE4C,  // 1EE4C       ; UNKNOWN
5503             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
5504             0x1EE50,  // 1EE50       ; UNKNOWN
5505             0x1EE51,  // 1EE51..1EE52; ARABIC
5506             0x1EE53,  // 1EE53       ; UNKNOWN
5507             0x1EE54,  // 1EE54       ; ARABIC
5508             0x1EE55,  // 1EE55..1EE56; UNKNOWN
5509             0x1EE57,  // 1EE57       ; ARABIC
5510             0x1EE58,  // 1EE58       ; UNKNOWN
5511             0x1EE59,  // 1EE59       ; ARABIC
5512             0x1EE5A,  // 1EE5A       ; UNKNOWN
5513             0x1EE5B,  // 1EE5B       ; ARABIC
5514             0x1EE5C,  // 1EE5C       ; UNKNOWN
5515             0x1EE5D,  // 1EE5D       ; ARABIC
5516             0x1EE5E,  // 1EE5E       ; UNKNOWN
5517             0x1EE5F,  // 1EE5F       ; ARABIC
5518             0x1EE60,  // 1EE60       ; UNKNOWN
5519             0x1EE61,  // 1EE61..1EE62; ARABIC
5520             0x1EE63,  // 1EE63       ; UNKNOWN
5521             0x1EE64,  // 1EE64       ; ARABIC
5522             0x1EE65,  // 1EE65..1EE66; UNKNOWN
5523             0x1EE67,  // 1EE67..1EE6A; ARABIC
5524             0x1EE6B,  // 1EE6B       ; UNKNOWN
5525             0x1EE6C,  // 1EE6C..1EE72; ARABIC
5526             0x1EE73,  // 1EE73       ; UNKNOWN
5527             0x1EE74,  // 1EE74..1EE77; ARABIC
5528             0x1EE78,  // 1EE78       ; UNKNOWN
5529             0x1EE79,  // 1EE79..1EE7C; ARABIC
5530             0x1EE7D,  // 1EE7D       ; UNKNOWN
5531             0x1EE7E,  // 1EE7E       ; ARABIC
5532             0x1EE7F,  // 1EE7F       ; UNKNOWN
5533             0x1EE80,  // 1EE80..1EE89; ARABIC
5534             0x1EE8A,  // 1EE8A       ; UNKNOWN
5535             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
5536             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
5537             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
5538             0x1EEA4,  // 1EEA4       ; UNKNOWN
5539             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
5540             0x1EEAA,  // 1EEAA       ; UNKNOWN
5541             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
5542             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
5543             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
5544             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
5545             0x1F000,  // 1F000..1F02B; COMMON
5546             0x1F02C,  // 1F02C..1F02F; UNKNOWN
5547             0x1F030,  // 1F030..1F093; COMMON
5548             0x1F094,  // 1F094..1F09F; UNKNOWN
5549             0x1F0A0,  // 1F0A0..1F0AE; COMMON
5550             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
5551             0x1F0B1,  // 1F0B1..1F0BF; COMMON
5552             0x1F0C0,  // 1F0C0       ; UNKNOWN
5553             0x1F0C1,  // 1F0C1..1F0CF; COMMON
5554             0x1F0D0,  // 1F0D0       ; UNKNOWN
5555             0x1F0D1,  // 1F0D1..1F0F5; COMMON
5556             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
5557             0x1F100,  // 1F100..1F10C; COMMON
5558             0x1F10D,  // 1F10D..1F10F; UNKNOWN
5559             0x1F110,  // 1F110..1F12E; COMMON
5560             0x1F12F,  // 1F12F       ; UNKNOWN
5561             0x1F130,  // 1F130..1F16B; COMMON
5562             0x1F16C,  // 1F16C..1F16F; UNKNOWN
5563             0x1F170,  // 1F170..1F19A; COMMON
5564             0x1F19B,  // 1F19B..1F1E5; UNKNOWN
5565             0x1F1E6,  // 1F1E6..1F1FF; COMMON
5566             0x1F200,  // 1F200       ; HIRAGANA
5567             0x1F201,  // 1F201..1F202; COMMON
5568             0x1F203,  // 1F203..1F20F; UNKNOWN
5569             0x1F210,  // 1F210..1F23A; COMMON
5570             0x1F23B,  // 1F23B..1F23F; UNKNOWN
5571             0x1F240,  // 1F240..1F248; COMMON
5572             0x1F249,  // 1F249..1F24F; UNKNOWN
5573             0x1F250,  // 1F250..1F251; COMMON
5574             0x1F252,  // 1F252..1F2FF; UNKNOWN
5575             0x1F300,  // 1F300..1F32C; COMMON
5576             0x1F32D,  // 1F32D..1F32F; UNKNOWN
5577             0x1F330,  // 1F330..1F37D; COMMON
5578             0x1F37E,  // 1F37E..1F37F; UNKNOWN
5579             0x1F380,  // 1F380..1F3CE; COMMON
5580             0x1F3CF,  // 1F3CF..1F3D3; UNKNOWN
5581             0x1F3D4,  // 1F3D4..1F3F7; COMMON
5582             0x1F3F8,  // 1F3F8..1F3FF; UNKNOWN
5583             0x1F400,  // 1F400..1F4FE; COMMON
5584             0x1F4FF,  // 1F4FF       ; UNKNOWN
5585             0x1F500,  // 1F500..1F54A; COMMON
5586             0x1F54B,  // 1F54B..1F54F; UNKNOWN
5587             0x1F550,  // 1F550..1F579; COMMON
5588             0x1F57A,  // 1F57A       ; UNKNOWN
5589             0x1F57B,  // 1F57B..1F5A3; COMMON
5590             0x1F5A4,  // 1F5A4       ; UNKNOWN
5591             0x1F5A5,  // 1F5A5..1F642; COMMON
5592             0x1F643,  // 1F643..1F644; UNKNOWN
5593             0x1F645,  // 1F645..1F6CF; COMMON
5594             0x1F6D0,  // 1F6D0..1F6DF; UNKNOWN
5595             0x1F6E0,  // 1F6E0..1F6EC; COMMON
5596             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
5597             0x1F6F0,  // 1F6F0..1F6F3; COMMON
5598             0x1F6F4,  // 1F6F4..1F6FF; UNKNOWN
5599             0x1F700,  // 1F700..1F773; COMMON
5600             0x1F774,  // 1F774..1F77F; UNKNOWN
5601             0x1F780,  // 1F780..1F7D4; COMMON
5602             0x1F7D5,  // 1F7D5..1F7FF; UNKNOWN
5603             0x1F800,  // 1F800..1F80B; COMMON
5604             0x1F80C,  // 1F80C..1F80F; UNKNOWN
5605             0x1F810,  // 1F810..1F847; COMMON
5606             0x1F848,  // 1F848..1F84F; UNKNOWN
5607             0x1F850,  // 1F850..1F859; COMMON
5608             0x1F85A,  // 1F85A..1F85F; UNKNOWN
5609             0x1F860,  // 1F860..1F887; COMMON
5610             0x1F888,  // 1F888..1F88F; UNKNOWN
5611             0x1F890,  // 1F890..1F8AD; COMMON
5612             0x1F8AE,  // 1F8AE..1FFFF; UNKNOWN
5613             0x20000,  // 20000..2A6D6; HAN
5614             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
5615             0x2A700,  // 2A700..2B734; HAN
5616             0x2B735,  // 2B735..2B73F; UNKNOWN
5617             0x2B740,  // 2B740..2B81D; HAN
5618             0x2B81E,  // 2B81E..2F7FF; UNKNOWN
5619             0x2F800,  // 2F800..2FA1D; HAN
5620             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
5621             0xE0001,  // E0001       ; COMMON
5622             0xE0002,  // E0002..E001F; UNKNOWN
5623             0xE0020,  // E0020..E007F; COMMON
5624             0xE0080,  // E0080..E00FF; UNKNOWN
5625             0xE0100,  // E0100..E01EF; INHERITED
5626             0xE01F0   // E01F0..10FFFF; UNKNOWN
5627         };
5628 
5629         private static final UnicodeScript[] scripts = {
5630             COMMON,                   // 0000..0040
5631             LATIN,                    // 0041..005A
5632             COMMON,                   // 005B..0060
5633             LATIN,                    // 0061..007A
5634             COMMON,                   // 007B..00A9
5635             LATIN,                    // 00AA
5636             COMMON,                   // 00AB..00B9
5637             LATIN,                    // 00BA
5638             COMMON,                   // 00BB..00BF
5639             LATIN,                    // 00C0..00D6
5640             COMMON,                   // 00D7
5641             LATIN,                    // 00D8..00F6
5642             COMMON,                   // 00F7
5643             LATIN,                    // 00F8..02B8
5644             COMMON,                   // 02B9..02DF
5645             LATIN,                    // 02E0..02E4
5646             COMMON,                   // 02E5..02E9
5647             BOPOMOFO,                 // 02EA..02EB
5648             COMMON,                   // 02EC..02FF
5649             INHERITED,                // 0300..036F
5650             GREEK,                    // 0370..0373
5651             COMMON,                   // 0374
5652             GREEK,                    // 0375..0377
5653             UNKNOWN,                  // 0378..0379
5654             GREEK,                    // 037A..037D
5655             COMMON,                   // 037E
5656             GREEK,                    // 037F
5657             UNKNOWN,                  // 0380..0383
5658             GREEK,                    // 0384
5659             COMMON,                   // 0385
5660             GREEK,                    // 0386
5661             COMMON,                   // 0387
5662             GREEK,                    // 0388..038A
5663             UNKNOWN,                  // 038B
5664             GREEK,                    // 038C
5665             UNKNOWN,                  // 038D
5666             GREEK,                    // 038E..03A1
5667             UNKNOWN,                  // 03A2
5668             GREEK,                    // 03A3..03E1
5669             COPTIC,                   // 03E2..03EF
5670             GREEK,                    // 03F0..03FF
5671             CYRILLIC,                 // 0400..0484
5672             INHERITED,                // 0485..0486
5673             CYRILLIC,                 // 0487..052F
5674             UNKNOWN,                  // 0530
5675             ARMENIAN,                 // 0531..0556
5676             UNKNOWN,                  // 0557..0558
5677             ARMENIAN,                 // 0559..055F
5678             UNKNOWN,                  // 0560
5679             ARMENIAN,                 // 0561..0587
5680             UNKNOWN,                  // 0588
5681             COMMON,                   // 0589
5682             ARMENIAN,                 // 058A
5683             UNKNOWN,                  // 058B..058C
5684             ARMENIAN,                 // 058D..058F
5685             UNKNOWN,                  // 0590
5686             HEBREW,                   // 0591..05C7
5687             UNKNOWN,                  // 05C8..05CF
5688             HEBREW,                   // 05D0..05EA
5689             UNKNOWN,                  // 05EB..05EF
5690             HEBREW,                   // 05F0..05F4
5691             UNKNOWN,                  // 05F5..05FF
5692             ARABIC,                   // 0600..0604
5693             COMMON,                   // 0605
5694             ARABIC,                   // 0606..060B
5695             COMMON,                   // 060C
5696             ARABIC,                   // 060D..061A
5697             COMMON,                   // 061B..061C
5698             UNKNOWN,                  // 061D
5699             ARABIC,                   // 061E
5700             COMMON,                   // 061F
5701             ARABIC,                   // 0620..063F
5702             COMMON,                   // 0640
5703             ARABIC,                   // 0641..064A
5704             INHERITED,                // 064B..0655
5705             ARABIC,                   // 0656..065F
5706             COMMON,                   // 0660..0669
5707             ARABIC,                   // 066A..066F
5708             INHERITED,                // 0670
5709             ARABIC,                   // 0671..06DC
5710             COMMON,                   // 06DD
5711             ARABIC,                   // 06DE..06FF
5712             SYRIAC,                   // 0700..070D
5713             UNKNOWN,                  // 070E
5714             SYRIAC,                   // 070F..074A
5715             UNKNOWN,                  // 074B..074C
5716             SYRIAC,                   // 074D..074F
5717             ARABIC,                   // 0750..077F
5718             THAANA,                   // 0780..07B1
5719             UNKNOWN,                  // 07B2..07BF
5720             NKO,                      // 07C0..07FA
5721             UNKNOWN,                  // 07FB..07FF
5722             SAMARITAN,                // 0800..082D
5723             UNKNOWN,                  // 082E..082F
5724             SAMARITAN,                // 0830..083E
5725             UNKNOWN,                  // 083F
5726             MANDAIC,                  // 0840..085B
5727             UNKNOWN,                  // 085C..085D
5728             MANDAIC,                  // 085E
5729             UNKNOWN,                  // 085F..089F
5730             ARABIC,                   // 08A0..08B2
5731             UNKNOWN,                  // 08B3..08E3
5732             ARABIC,                   // 08E4..08FF
5733             DEVANAGARI,               // 0900..0950
5734             INHERITED,                // 0951..0952
5735             DEVANAGARI,               // 0953..0963
5736             COMMON,                   // 0964..0965
5737             DEVANAGARI,               // 0966..097F
5738             BENGALI,                  // 0980..0983
5739             UNKNOWN,                  // 0984
5740             BENGALI,                  // 0985..098C
5741             UNKNOWN,                  // 098D..098E
5742             BENGALI,                  // 098F..0990
5743             UNKNOWN,                  // 0991..0992
5744             BENGALI,                  // 0993..09A8
5745             UNKNOWN,                  // 09A9
5746             BENGALI,                  // 09AA..09B0
5747             UNKNOWN,                  // 09B1
5748             BENGALI,                  // 09B2
5749             UNKNOWN,                  // 09B3..09B5
5750             BENGALI,                  // 09B6..09B9
5751             UNKNOWN,                  // 09BA..09BB
5752             BENGALI,                  // 09BC..09C4
5753             UNKNOWN,                  // 09C5..09C6
5754             BENGALI,                  // 09C7..09C8
5755             UNKNOWN,                  // 09C9..09CA
5756             BENGALI,                  // 09CB..09CE
5757             UNKNOWN,                  // 09CF..09D6
5758             BENGALI,                  // 09D7
5759             UNKNOWN,                  // 09D8..09DB
5760             BENGALI,                  // 09DC..09DD
5761             UNKNOWN,                  // 09DE
5762             BENGALI,                  // 09DF..09E3
5763             UNKNOWN,                  // 09E4..09E5
5764             BENGALI,                  // 09E6..09FB
5765             UNKNOWN,                  // 09FC..0A00
5766             GURMUKHI,                 // 0A01..0A03
5767             UNKNOWN,                  // 0A04
5768             GURMUKHI,                 // 0A05..0A0A
5769             UNKNOWN,                  // 0A0B..0A0E
5770             GURMUKHI,                 // 0A0F..0A10
5771             UNKNOWN,                  // 0A11..0A12
5772             GURMUKHI,                 // 0A13..0A28
5773             UNKNOWN,                  // 0A29
5774             GURMUKHI,                 // 0A2A..0A30
5775             UNKNOWN,                  // 0A31
5776             GURMUKHI,                 // 0A32..0A33
5777             UNKNOWN,                  // 0A34
5778             GURMUKHI,                 // 0A35..0A36
5779             UNKNOWN,                  // 0A37
5780             GURMUKHI,                 // 0A38..0A39
5781             UNKNOWN,                  // 0A3A..0A3B
5782             GURMUKHI,                 // 0A3C
5783             UNKNOWN,                  // 0A3D
5784             GURMUKHI,                 // 0A3E..0A42
5785             UNKNOWN,                  // 0A43..0A46
5786             GURMUKHI,                 // 0A47..0A48
5787             UNKNOWN,                  // 0A49..0A4A
5788             GURMUKHI,                 // 0A4B..0A4D
5789             UNKNOWN,                  // 0A4E..0A50
5790             GURMUKHI,                 // 0A51
5791             UNKNOWN,                  // 0A52..0A58
5792             GURMUKHI,                 // 0A59..0A5C
5793             UNKNOWN,                  // 0A5D
5794             GURMUKHI,                 // 0A5E
5795             UNKNOWN,                  // 0A5F..0A65
5796             GURMUKHI,                 // 0A66..0A75
5797             UNKNOWN,                  // 0A76..0A80
5798             GUJARATI,                 // 0A81..0A83
5799             UNKNOWN,                  // 0A84
5800             GUJARATI,                 // 0A85..0A8D
5801             UNKNOWN,                  // 0A8E
5802             GUJARATI,                 // 0A8F..0A91
5803             UNKNOWN,                  // 0A92
5804             GUJARATI,                 // 0A93..0AA8
5805             UNKNOWN,                  // 0AA9
5806             GUJARATI,                 // 0AAA..0AB0
5807             UNKNOWN,                  // 0AB1
5808             GUJARATI,                 // 0AB2..0AB3
5809             UNKNOWN,                  // 0AB4
5810             GUJARATI,                 // 0AB5..0AB9
5811             UNKNOWN,                  // 0ABA..0ABB
5812             GUJARATI,                 // 0ABC..0AC5
5813             UNKNOWN,                  // 0AC6
5814             GUJARATI,                 // 0AC7..0AC9
5815             UNKNOWN,                  // 0ACA
5816             GUJARATI,                 // 0ACB..0ACD
5817             UNKNOWN,                  // 0ACE..0ACF
5818             GUJARATI,                 // 0AD0
5819             UNKNOWN,                  // 0AD1..0ADF
5820             GUJARATI,                 // 0AE0..0AE3
5821             UNKNOWN,                  // 0AE4..0AE5
5822             GUJARATI,                 // 0AE6..0AF1
5823             UNKNOWN,                  // 0AF2..0B00
5824             ORIYA,                    // 0B01..0B03
5825             UNKNOWN,                  // 0B04
5826             ORIYA,                    // 0B05..0B0C
5827             UNKNOWN,                  // 0B0D..0B0E
5828             ORIYA,                    // 0B0F..0B10
5829             UNKNOWN,                  // 0B11..0B12
5830             ORIYA,                    // 0B13..0B28
5831             UNKNOWN,                  // 0B29
5832             ORIYA,                    // 0B2A..0B30
5833             UNKNOWN,                  // 0B31
5834             ORIYA,                    // 0B32..0B33
5835             UNKNOWN,                  // 0B34
5836             ORIYA,                    // 0B35..0B39
5837             UNKNOWN,                  // 0B3A..0B3B
5838             ORIYA,                    // 0B3C..0B44
5839             UNKNOWN,                  // 0B45..0B46
5840             ORIYA,                    // 0B47..0B48
5841             UNKNOWN,                  // 0B49..0B4A
5842             ORIYA,                    // 0B4B..0B4D
5843             UNKNOWN,                  // 0B4E..0B55
5844             ORIYA,                    // 0B56..0B57
5845             UNKNOWN,                  // 0B58..0B5B
5846             ORIYA,                    // 0B5C..0B5D
5847             UNKNOWN,                  // 0B5E
5848             ORIYA,                    // 0B5F..0B63
5849             UNKNOWN,                  // 0B64..0B65
5850             ORIYA,                    // 0B66..0B77
5851             UNKNOWN,                  // 0B78..0B81
5852             TAMIL,                    // 0B82..0B83
5853             UNKNOWN,                  // 0B84
5854             TAMIL,                    // 0B85..0B8A
5855             UNKNOWN,                  // 0B8B..0B8D
5856             TAMIL,                    // 0B8E..0B90
5857             UNKNOWN,                  // 0B91
5858             TAMIL,                    // 0B92..0B95
5859             UNKNOWN,                  // 0B96..0B98
5860             TAMIL,                    // 0B99..0B9A
5861             UNKNOWN,                  // 0B9B
5862             TAMIL,                    // 0B9C
5863             UNKNOWN,                  // 0B9D
5864             TAMIL,                    // 0B9E..0B9F
5865             UNKNOWN,                  // 0BA0..0BA2
5866             TAMIL,                    // 0BA3..0BA4
5867             UNKNOWN,                  // 0BA5..0BA7
5868             TAMIL,                    // 0BA8..0BAA
5869             UNKNOWN,                  // 0BAB..0BAD
5870             TAMIL,                    // 0BAE..0BB9
5871             UNKNOWN,                  // 0BBA..0BBD
5872             TAMIL,                    // 0BBE..0BC2
5873             UNKNOWN,                  // 0BC3..0BC5
5874             TAMIL,                    // 0BC6..0BC8
5875             UNKNOWN,                  // 0BC9
5876             TAMIL,                    // 0BCA..0BCD
5877             UNKNOWN,                  // 0BCE..0BCF
5878             TAMIL,                    // 0BD0
5879             UNKNOWN,                  // 0BD1..0BD6
5880             TAMIL,                    // 0BD7
5881             UNKNOWN,                  // 0BD8..0BE5
5882             TAMIL,                    // 0BE6..0BFA
5883             UNKNOWN,                  // 0BFB..0BFF
5884             TELUGU,                   // 0C00..0C03
5885             UNKNOWN,                  // 0C04
5886             TELUGU,                   // 0C05..0C0C
5887             UNKNOWN,                  // 0C0D
5888             TELUGU,                   // 0C0E..0C10
5889             UNKNOWN,                  // 0C11
5890             TELUGU,                   // 0C12..0C28
5891             UNKNOWN,                  // 0C29
5892             TELUGU,                   // 0C2A..0C39
5893             UNKNOWN,                  // 0C3A..0C3C
5894             TELUGU,                   // 0C3D..0C44
5895             UNKNOWN,                  // 0C45
5896             TELUGU,                   // 0C46..0C48
5897             UNKNOWN,                  // 0C49
5898             TELUGU,                   // 0C4A..0C4D
5899             UNKNOWN,                  // 0C4E..0C54
5900             TELUGU,                   // 0C55..0C56
5901             UNKNOWN,                  // 0C57
5902             TELUGU,                   // 0C58..0C59
5903             UNKNOWN,                  // 0C5A..0C5F
5904             TELUGU,                   // 0C60..0C63
5905             UNKNOWN,                  // 0C64..0C65
5906             TELUGU,                   // 0C66..0C6F
5907             UNKNOWN,                  // 0C70..0C77
5908             TELUGU,                   // 0C78..0C7F
5909             UNKNOWN,                  // 0C80
5910             KANNADA,                  // 0C81..0C83
5911             UNKNOWN,                  // 0C84
5912             KANNADA,                  // 0C85..0C8C
5913             UNKNOWN,                  // 0C8D
5914             KANNADA,                  // 0C8E..0C90
5915             UNKNOWN,                  // 0C91
5916             KANNADA,                  // 0C92..0CA8
5917             UNKNOWN,                  // 0CA9
5918             KANNADA,                  // 0CAA..0CB3
5919             UNKNOWN,                  // 0CB4
5920             KANNADA,                  // 0CB5..0CB9
5921             UNKNOWN,                  // 0CBA..0CBB
5922             KANNADA,                  // 0CBC..0CC4
5923             UNKNOWN,                  // 0CC5
5924             KANNADA,                  // 0CC6..0CC8
5925             UNKNOWN,                  // 0CC9
5926             KANNADA,                  // 0CCA..0CCD
5927             UNKNOWN,                  // 0CCE..0CD4
5928             KANNADA,                  // 0CD5..0CD6
5929             UNKNOWN,                  // 0CD7..0CDD
5930             KANNADA,                  // 0CDE
5931             UNKNOWN,                  // 0CDF
5932             KANNADA,                  // 0CE0..0CE3
5933             UNKNOWN,                  // 0CE4..0CE5
5934             KANNADA,                  // 0CE6..0CEF
5935             UNKNOWN,                  // 0CF0
5936             KANNADA,                  // 0CF1..0CF2
5937             UNKNOWN,                  // 0CF3..0D00
5938             MALAYALAM,                // 0D01..0D03
5939             UNKNOWN,                  // 0D04
5940             MALAYALAM,                // 0D05..0D0C
5941             UNKNOWN,                  // 0D0D
5942             MALAYALAM,                // 0D0E..0D10
5943             UNKNOWN,                  // 0D11
5944             MALAYALAM,                // 0D12..0D3A
5945             UNKNOWN,                  // 0D3B..0D3C
5946             MALAYALAM,                // 0D3D..0D44
5947             UNKNOWN,                  // 0D45
5948             MALAYALAM,                // 0D46..0D48
5949             UNKNOWN,                  // 0D49
5950             MALAYALAM,                // 0D4A..0D4E
5951             UNKNOWN,                  // 0D4F..0D56
5952             MALAYALAM,                // 0D57
5953             UNKNOWN,                  // 0D58..0D5F
5954             MALAYALAM,                // 0D60..0D63
5955             UNKNOWN,                  // 0D64..0D65
5956             MALAYALAM,                // 0D66..0D75
5957             UNKNOWN,                  // 0D76..0D78
5958             MALAYALAM,                // 0D79..0D7F
5959             UNKNOWN,                  // 0D80..0D81
5960             SINHALA,                  // 0D82..0D83
5961             UNKNOWN,                  // 0D84
5962             SINHALA,                  // 0D85..0D96
5963             UNKNOWN,                  // 0D97..0D99
5964             SINHALA,                  // 0D9A..0DB1
5965             UNKNOWN,                  // 0DB2
5966             SINHALA,                  // 0DB3..0DBB
5967             UNKNOWN,                  // 0DBC
5968             SINHALA,                  // 0DBD
5969             UNKNOWN,                  // 0DBE..0DBF
5970             SINHALA,                  // 0DC0..0DC6
5971             UNKNOWN,                  // 0DC7..0DC9
5972             SINHALA,                  // 0DCA
5973             UNKNOWN,                  // 0DCB..0DCE
5974             SINHALA,                  // 0DCF..0DD4
5975             UNKNOWN,                  // 0DD5
5976             SINHALA,                  // 0DD6
5977             UNKNOWN,                  // 0DD7
5978             SINHALA,                  // 0DD8..0DDF
5979             UNKNOWN,                  // 0DE0..0DE5
5980             SINHALA,                  // 0DE6..0DEF
5981             UNKNOWN,                  // 0DF0..0DF1
5982             SINHALA,                  // 0DF2..0DF4
5983             UNKNOWN,                  // 0DF5..0E00
5984             THAI,                     // 0E01..0E3A
5985             UNKNOWN,                  // 0E3B..0E3E
5986             COMMON,                   // 0E3F
5987             THAI,                     // 0E40..0E5B
5988             UNKNOWN,                  // 0E5C..0E80
5989             LAO,                      // 0E81..0E82
5990             UNKNOWN,                  // 0E83
5991             LAO,                      // 0E84
5992             UNKNOWN,                  // 0E85..0E86
5993             LAO,                      // 0E87..0E88
5994             UNKNOWN,                  // 0E89
5995             LAO,                      // 0E8A
5996             UNKNOWN,                  // 0E8B..0E8C
5997             LAO,                      // 0E8D
5998             UNKNOWN,                  // 0E8E..0E93
5999             LAO,                      // 0E94..0E97
6000             UNKNOWN,                  // 0E98
6001             LAO,                      // 0E99..0E9F
6002             UNKNOWN,                  // 0EA0
6003             LAO,                      // 0EA1..0EA3
6004             UNKNOWN,                  // 0EA4
6005             LAO,                      // 0EA5
6006             UNKNOWN,                  // 0EA6
6007             LAO,                      // 0EA7
6008             UNKNOWN,                  // 0EA8..0EA9
6009             LAO,                      // 0EAA..0EAB
6010             UNKNOWN,                  // 0EAC
6011             LAO,                      // 0EAD..0EB9
6012             UNKNOWN,                  // 0EBA
6013             LAO,                      // 0EBB..0EBD
6014             UNKNOWN,                  // 0EBE..0EBF
6015             LAO,                      // 0EC0..0EC4
6016             UNKNOWN,                  // 0EC5
6017             LAO,                      // 0EC6
6018             UNKNOWN,                  // 0EC7
6019             LAO,                      // 0EC8..0ECD
6020             UNKNOWN,                  // 0ECE..0ECF
6021             LAO,                      // 0ED0..0ED9
6022             UNKNOWN,                  // 0EDA..0EDB
6023             LAO,                      // 0EDC..0EDF
6024             UNKNOWN,                  // 0EE0..0EFF
6025             TIBETAN,                  // 0F00..0F47
6026             UNKNOWN,                  // 0F48
6027             TIBETAN,                  // 0F49..0F6C
6028             UNKNOWN,                  // 0F6D..0F70
6029             TIBETAN,                  // 0F71..0F97
6030             UNKNOWN,                  // 0F98
6031             TIBETAN,                  // 0F99..0FBC
6032             UNKNOWN,                  // 0FBD
6033             TIBETAN,                  // 0FBE..0FCC
6034             UNKNOWN,                  // 0FCD
6035             TIBETAN,                  // 0FCE..0FD4
6036             COMMON,                   // 0FD5..0FD8
6037             TIBETAN,                  // 0FD9..0FDA
6038             UNKNOWN,                  // 0FDB..FFF
6039             MYANMAR,                  // 1000..109F
6040             GEORGIAN,                 // 10A0..10C5
6041             UNKNOWN,                  // 10C6
6042             GEORGIAN,                 // 10C7
6043             UNKNOWN,                  // 10C8..10CC
6044             GEORGIAN,                 // 10CD
6045             UNKNOWN,                  // 10CE..10CF
6046             GEORGIAN,                 // 10D0..10FA
6047             COMMON,                   // 10FB
6048             GEORGIAN,                 // 10FC..10FF
6049             HANGUL,                   // 1100..11FF
6050             ETHIOPIC,                 // 1200..1248
6051             UNKNOWN,                  // 1249
6052             ETHIOPIC,                 // 124A..124D
6053             UNKNOWN,                  // 124E..124F
6054             ETHIOPIC,                 // 1250..1256
6055             UNKNOWN,                  // 1257
6056             ETHIOPIC,                 // 1258
6057             UNKNOWN,                  // 1259
6058             ETHIOPIC,                 // 125A..125D
6059             UNKNOWN,                  // 125E..125F
6060             ETHIOPIC,                 // 1260..1288
6061             UNKNOWN,                  // 1289
6062             ETHIOPIC,                 // 128A..128D
6063             UNKNOWN,                  // 128E..128F
6064             ETHIOPIC,                 // 1290..12B0
6065             UNKNOWN,                  // 12B1
6066             ETHIOPIC,                 // 12B2..12B5
6067             UNKNOWN,                  // 12B6..12B7
6068             ETHIOPIC,                 // 12B8..12BE
6069             UNKNOWN,                  // 12BF
6070             ETHIOPIC,                 // 12C0
6071             UNKNOWN,                  // 12C1
6072             ETHIOPIC,                 // 12C2..12C5
6073             UNKNOWN,                  // 12C6..12C7
6074             ETHIOPIC,                 // 12C8..12D6
6075             UNKNOWN,                  // 12D7
6076             ETHIOPIC,                 // 12D8..1310
6077             UNKNOWN,                  // 1311
6078             ETHIOPIC,                 // 1312..1315
6079             UNKNOWN,                  // 1316..1317
6080             ETHIOPIC,                 // 1318..135A
6081             UNKNOWN,                  // 135B..135C
6082             ETHIOPIC,                 // 135D..137C
6083             UNKNOWN,                  // 137D..137F
6084             ETHIOPIC,                 // 1380..1399
6085             UNKNOWN,                  // 139A..139F
6086             CHEROKEE,                 // 13A0..13F4
6087             UNKNOWN,                  // 13F5..13FF
6088             CANADIAN_ABORIGINAL,      // 1400..167F
6089             OGHAM,                    // 1680..169C
6090             UNKNOWN,                  // 169D..169F
6091             RUNIC,                    // 16A0..16EA
6092             COMMON,                   // 16EB..16ED
6093             RUNIC,                    // 16EE..16F8
6094             UNKNOWN,                  // 16F9..16FF
6095             TAGALOG,                  // 1700..170C
6096             UNKNOWN,                  // 170D
6097             TAGALOG,                  // 170E..1714
6098             UNKNOWN,                  // 1715..171F
6099             HANUNOO,                  // 1720..1734
6100             COMMON,                   // 1735..1736
6101             UNKNOWN,                  // 1737..173F
6102             BUHID,                    // 1740..1753
6103             UNKNOWN,                  // 1754..175F
6104             TAGBANWA,                 // 1760..176C
6105             UNKNOWN,                  // 176D
6106             TAGBANWA,                 // 176E..1770
6107             UNKNOWN,                  // 1771
6108             TAGBANWA,                 // 1772..1773
6109             UNKNOWN,                  // 1774..177F
6110             KHMER,                    // 1780..17DD
6111             UNKNOWN,                  // 17DE..17DF
6112             KHMER,                    // 17E0..17E9
6113             UNKNOWN,                  // 17EA..17EF
6114             KHMER,                    // 17F0..17F9
6115             UNKNOWN,                  // 17FA..17FF
6116             MONGOLIAN,                // 1800..1801
6117             COMMON,                   // 1802..1803
6118             MONGOLIAN,                // 1804
6119             COMMON,                   // 1805
6120             MONGOLIAN,                // 1806..180E
6121             UNKNOWN,                  // 180F
6122             MONGOLIAN,                // 1810..1819
6123             UNKNOWN,                  // 181A..181F
6124             MONGOLIAN,                // 1820..1877
6125             UNKNOWN,                  // 1878..187F
6126             MONGOLIAN,                // 1880..18AA
6127             UNKNOWN,                  // 18AB..18AF
6128             CANADIAN_ABORIGINAL,      // 18B0..18F5
6129             UNKNOWN,                  // 18F6..18FF
6130             LIMBU,                    // 1900..191E
6131             UNKNOWN,                  // 191F
6132             LIMBU,                    // 1920..192B
6133             UNKNOWN,                  // 192C..192F
6134             LIMBU,                    // 1930..193B
6135             UNKNOWN,                  // 193C..193F
6136             LIMBU,                    // 1940
6137             UNKNOWN,                  // 1941..1943
6138             LIMBU,                    // 1944..194F
6139             TAI_LE,                   // 1950..196D
6140             UNKNOWN,                  // 196E..196F
6141             TAI_LE,                   // 1970..1974
6142             UNKNOWN,                  // 1975..197F
6143             NEW_TAI_LUE,              // 1980..19AB
6144             UNKNOWN,                  // 19AC..19AF
6145             NEW_TAI_LUE,              // 19B0..19C9
6146             UNKNOWN,                  // 19CA..19CF
6147             NEW_TAI_LUE,              // 19D0..19DA
6148             UNKNOWN,                  // 19DB..19DD
6149             NEW_TAI_LUE,              // 19DE..19DF
6150             KHMER,                    // 19E0..19FF
6151             BUGINESE,                 // 1A00..1A1B
6152             UNKNOWN,                  // 1A1C..1A1D
6153             BUGINESE,                 // 1A1E..1A1F
6154             TAI_THAM,                 // 1A20..1A5E
6155             UNKNOWN,                  // 1A5F
6156             TAI_THAM,                 // 1A60..1A7C
6157             UNKNOWN,                  // 1A7D..1A7E
6158             TAI_THAM,                 // 1A7F..1A89
6159             UNKNOWN,                  // 1A8A..1A8F
6160             TAI_THAM,                 // 1A90..1A99
6161             UNKNOWN,                  // 1A9A..1A9F
6162             TAI_THAM,                 // 1AA0..1AAD
6163             UNKNOWN,                  // 1AAE..1AAF
6164             INHERITED,                // 1AB0..1ABE
6165             UNKNOWN,                  // 1ABF..1AFF
6166             BALINESE,                 // 1B00..1B4B
6167             UNKNOWN,                  // 1B4C..1B4F
6168             BALINESE,                 // 1B50..1B7C
6169             UNKNOWN,                  // 1B7D..1B7F
6170             SUNDANESE,                // 1B80..1BBF
6171             BATAK,                    // 1BC0..1BF3
6172             UNKNOWN,                  // 1BF4..1BFB
6173             BATAK,                    // 1BFC..1BFF
6174             LEPCHA,                   // 1C00..1C37
6175             UNKNOWN,                  // 1C38..1C3A
6176             LEPCHA,                   // 1C3B..1C49
6177             UNKNOWN,                  // 1C4A..1C4C
6178             LEPCHA,                   // 1C4D..1C4F
6179             OL_CHIKI,                 // 1C50..1C7F
6180             UNKNOWN,                  // 1C80..1CBF
6181             SUNDANESE,                // 1CC0..1CC7
6182             UNKNOWN,                  // 1CC8..1CCF
6183             INHERITED,                // 1CD0..1CD2
6184             COMMON,                   // 1CD3
6185             INHERITED,                // 1CD4..1CE0
6186             COMMON,                   // 1CE1
6187             INHERITED,                // 1CE2..1CE8
6188             COMMON,                   // 1CE9..1CEC
6189             INHERITED,                // 1CED
6190             COMMON,                   // 1CEE..1CF3
6191             INHERITED,                // 1CF4
6192             COMMON,                   // 1CF5..1CF6
6193             UNKNOWN,                  // 1CF7
6194             INHERITED,                // 1CF8..1CF9
6195             UNKNOWN,                  // 1CFA..1CFF
6196             LATIN,                    // 1D00..1D25
6197             GREEK,                    // 1D26..1D2A
6198             CYRILLIC,                 // 1D2B
6199             LATIN,                    // 1D2C..1D5C
6200             GREEK,                    // 1D5D..1D61
6201             LATIN,                    // 1D62..1D65
6202             GREEK,                    // 1D66..1D6A
6203             LATIN,                    // 1D6B..1D77
6204             CYRILLIC,                 // 1D78
6205             LATIN,                    // 1D79..1DBE
6206             GREEK,                    // 1DBF
6207             INHERITED,                // 1DC0..1DF5
6208             UNKNOWN,                  // 1DF6..1DFB
6209             INHERITED,                // 1DFC..1DFF
6210             LATIN,                    // 1E00..1EFF
6211             GREEK,                    // 1F00..1F15
6212             UNKNOWN,                  // 1F16..1F17
6213             GREEK,                    // 1F18..1F1D
6214             UNKNOWN,                  // 1F1E..1F1F
6215             GREEK,                    // 1F20..1F45
6216             UNKNOWN,                  // 1F46..1F47
6217             GREEK,                    // 1F48..1F4D
6218             UNKNOWN,                  // 1F4E..1F4F
6219             GREEK,                    // 1F50..1F57
6220             UNKNOWN,                  // 1F58
6221             GREEK,                    // 1F59
6222             UNKNOWN,                  // 1F5A
6223             GREEK,                    // 1F5B
6224             UNKNOWN,                  // 1F5C
6225             GREEK,                    // 1F5D
6226             UNKNOWN,                  // 1F5E
6227             GREEK,                    // 1F5F..1F7D
6228             UNKNOWN,                  // 1F7E..1F7F
6229             GREEK,                    // 1F80..1FB4
6230             UNKNOWN,                  // 1FB5
6231             GREEK,                    // 1FB6..1FC4
6232             UNKNOWN,                  // 1FC5
6233             GREEK,                    // 1FC6..1FD3
6234             UNKNOWN,                  // 1FD4..1FD5
6235             GREEK,                    // 1FD6..1FDB
6236             UNKNOWN,                  // 1FDC
6237             GREEK,                    // 1FDD..1FEF
6238             UNKNOWN,                  // 1FF0..1FF1
6239             GREEK,                    // 1FF2..1FF4
6240             UNKNOWN,                  // 1FF5
6241             GREEK,                    // 1FF6..1FFE
6242             UNKNOWN,                  // 1FFF
6243             COMMON,                   // 2000..200B
6244             INHERITED,                // 200C..200D
6245             COMMON,                   // 200E..2064
6246             UNKNOWN,                  // 2065
6247             COMMON,                   // 2066..2070
6248             LATIN,                    // 2071
6249             UNKNOWN,                  // 2072..2073
6250             COMMON,                   // 2074..207E
6251             LATIN,                    // 207F
6252             COMMON,                   // 2080..208E
6253             UNKNOWN,                  // 208F
6254             LATIN,                    // 2090..209C
6255             UNKNOWN,                  // 209D..209F
6256             COMMON,                   // 20A0..20BD
6257             UNKNOWN,                  // 20BE..20CF
6258             INHERITED,                // 20D0..20F0
6259             UNKNOWN,                  // 20F1..20FF
6260             COMMON,                   // 2100..2125
6261             GREEK,                    // 2126
6262             COMMON,                   // 2127..2129
6263             LATIN,                    // 212A..212B
6264             COMMON,                   // 212C..2131
6265             LATIN,                    // 2132
6266             COMMON,                   // 2133..214D
6267             LATIN,                    // 214E
6268             COMMON,                   // 214F..215F
6269             LATIN,                    // 2160..2188
6270             COMMON,                   // 2189
6271             UNKNOWN,                  // 218A..218F
6272             COMMON,                   // 2190..23FA
6273             UNKNOWN,                  // 23FB..23FF
6274             COMMON,                   // 2400..2426
6275             UNKNOWN,                  // 2427..243F
6276             COMMON,                   // 2440..244A
6277             UNKNOWN,                  // 244B..245F
6278             COMMON,                   // 2460..27FF
6279             BRAILLE,                  // 2800..28FF
6280             COMMON,                   // 2900..2B73
6281             UNKNOWN,                  // 2B74..2B75
6282             COMMON,                   // 2B76..2B95
6283             UNKNOWN,                  // 2B96..2B97
6284             COMMON,                   // 2B98..2BB9
6285             UNKNOWN,                  // 2BBA..2BBC
6286             COMMON,                   // 2BBD..2BC8
6287             UNKNOWN,                  // 2BC9
6288             COMMON,                   // 2BCA..2BD1
6289             UNKNOWN,                  // 2BD2..2BFF
6290             GLAGOLITIC,               // 2C00..2C2E
6291             UNKNOWN,                  // 2C2F
6292             GLAGOLITIC,               // 2C30..2C5E
6293             UNKNOWN,                  // 2C5F
6294             LATIN,                    // 2C60..2C7F
6295             COPTIC,                   // 2C80..2CF3
6296             UNKNOWN,                  // 2CF4..2CF8
6297             COPTIC,                   // 2CF9..2CFF
6298             GEORGIAN,                 // 2D00..2D25
6299             UNKNOWN,                  // 2D26
6300             GEORGIAN,                 // 2D27
6301             UNKNOWN,                  // 2D28..2D2C
6302             GEORGIAN,                 // 2D2D
6303             UNKNOWN,                  // 2D2E..2D2F
6304             TIFINAGH,                 // 2D30..2D67
6305             UNKNOWN,                  // 2D68..2D6E
6306             TIFINAGH,                 // 2D6F..2D70
6307             UNKNOWN,                  // 2D71..2D7E
6308             TIFINAGH,                 // 2D7F
6309             ETHIOPIC,                 // 2D80..2D96
6310             UNKNOWN,                  // 2D97..2D9F
6311             ETHIOPIC,                 // 2DA0..2DA6
6312             UNKNOWN,                  // 2DA7
6313             ETHIOPIC,                 // 2DA8..2DAE
6314             UNKNOWN,                  // 2DAF
6315             ETHIOPIC,                 // 2DB0..2DB6
6316             UNKNOWN,                  // 2DB7
6317             ETHIOPIC,                 // 2DB8..2DBE
6318             UNKNOWN,                  // 2DBF
6319             ETHIOPIC,                 // 2DC0..2DC6
6320             UNKNOWN,                  // 2DC7
6321             ETHIOPIC,                 // 2DC8..2DCE
6322             UNKNOWN,                  // 2DCF
6323             ETHIOPIC,                 // 2DD0..2DD6
6324             UNKNOWN,                  // 2DD7
6325             ETHIOPIC,                 // 2DD8..2DDE
6326             UNKNOWN,                  // 2DDF
6327             CYRILLIC,                 // 2DE0..2DFF
6328             COMMON,                   // 2E00..2E42
6329             UNKNOWN,                  // 2E43..2E7F
6330             HAN,                      // 2E80..2E99
6331             UNKNOWN,                  // 2E9A
6332             HAN,                      // 2E9B..2EF3
6333             UNKNOWN,                  // 2EF4..2EFF
6334             HAN,                      // 2F00..2FD5
6335             UNKNOWN,                  // 2FD6..2FEF
6336             COMMON,                   // 2FF0..2FFB
6337             UNKNOWN,                  // 2FFC..2FFF
6338             COMMON,                   // 3000..3004
6339             HAN,                      // 3005
6340             COMMON,                   // 3006
6341             HAN,                      // 3007
6342             COMMON,                   // 3008..3020
6343             HAN,                      // 3021..3029
6344             INHERITED,                // 302A..302D
6345             HANGUL,                   // 302E..302F
6346             COMMON,                   // 3030..3037
6347             HAN,                      // 3038..303B
6348             COMMON,                   // 303C..303F
6349             UNKNOWN,                  // 3040
6350             HIRAGANA,                 // 3041..3096
6351             UNKNOWN,                  // 3097..3098
6352             INHERITED,                // 3099..309A
6353             COMMON,                   // 309B..309C
6354             HIRAGANA,                 // 309D..309F
6355             COMMON,                   // 30A0
6356             KATAKANA,                 // 30A1..30FA
6357             COMMON,                   // 30FB..30FC
6358             KATAKANA,                 // 30FD..30FF
6359             UNKNOWN,                  // 3100..3104
6360             BOPOMOFO,                 // 3105..312D
6361             UNKNOWN,                  // 312E..3130
6362             HANGUL,                   // 3131..318E
6363             UNKNOWN,                  // 318F
6364             COMMON,                   // 3190..319F
6365             BOPOMOFO,                 // 31A0..31BA
6366             UNKNOWN,                  // 31BB..31BF
6367             COMMON,                   // 31C0..31E3
6368             UNKNOWN,                  // 31E4..31EF
6369             KATAKANA,                 // 31F0..31FF
6370             HANGUL,                   // 3200..321E
6371             UNKNOWN,                  // 321F
6372             COMMON,                   // 3220..325F
6373             HANGUL,                   // 3260..327E
6374             COMMON,                   // 327F..32CF
6375             KATAKANA,                 // 32D0..32FE
6376             UNKNOWN,                  // 32FF
6377             KATAKANA,                 // 3300..3357
6378             COMMON,                   // 3358..33FF
6379             HAN,                      // 3400..4DB5
6380             UNKNOWN,                  // 4DB6..4DBF
6381             COMMON,                   // 4DC0..4DFF
6382             HAN,                      // 4E00..9FCC
6383             UNKNOWN,                  // 9FCD..9FFF
6384             YI,                       // A000..A48C
6385             UNKNOWN,                  // A48D..A48F
6386             YI,                       // A490..A4C6
6387             UNKNOWN,                  // A4C7..A4CF
6388             LISU,                     // A4D0..A4FF
6389             VAI,                      // A500..A62B
6390             UNKNOWN,                  // A62C..A63F
6391             CYRILLIC,                 // A640..A69D
6392             UNKNOWN,                  // A69E
6393             CYRILLIC,                 // A69F
6394             BAMUM,                    // A6A0..A6F7
6395             UNKNOWN,                  // A6F8..A6FF
6396             COMMON,                   // A700..A721
6397             LATIN,                    // A722..A787
6398             COMMON,                   // A788..A78A
6399             LATIN,                    // A78B..A78E
6400             UNKNOWN,                  // A78F
6401             LATIN,                    // A790..A7AD
6402             UNKNOWN,                  // A7AE..A7AF
6403             LATIN,                    // A7B0..A7B1
6404             UNKNOWN,                  // A7B2..A7F6
6405             LATIN,                    // A7F7..A7FF
6406             SYLOTI_NAGRI,             // A800..A82B
6407             UNKNOWN,                  // A82C..A82F
6408             COMMON,                   // A830..A839
6409             UNKNOWN,                  // A83A..A83F
6410             PHAGS_PA,                 // A840..A877
6411             UNKNOWN,                  // A878..A87F
6412             SAURASHTRA,               // A880..A8C4
6413             UNKNOWN,                  // A8C5..A8CD
6414             SAURASHTRA,               // A8CE..A8D9
6415             UNKNOWN,                  // A8DA..A8DF
6416             DEVANAGARI,               // A8E0..A8FB
6417             UNKNOWN,                  // A8FC..A8FF
6418             KAYAH_LI,                 // A900..A92D
6419             COMMON,                   // A92E
6420             KAYAH_LI,                 // A92F
6421             REJANG,                   // A930..A953
6422             UNKNOWN,                  // A954..A95E
6423             REJANG,                   // A95F
6424             HANGUL,                   // A960..A97C
6425             UNKNOWN,                  // A97D..A97F
6426             JAVANESE,                 // A980..A9CD
6427             UNKNOWN,                  // A9CE
6428             COMMON,                   // A9CF
6429             JAVANESE,                 // A9D0..A9D9
6430             UNKNOWN,                  // A9DA..A9DD
6431             JAVANESE,                 // A9DE..A9DF
6432             MYANMAR,                  // A9E0..A9FE
6433             UNKNOWN,                  // A9FF
6434             CHAM,                     // AA00..AA36
6435             UNKNOWN,                  // AA37..AA3F
6436             CHAM,                     // AA40..AA4D
6437             UNKNOWN,                  // AA4E..AA4F
6438             CHAM,                     // AA50..AA59
6439             UNKNOWN,                  // AA5A..AA5B
6440             CHAM,                     // AA5C..AA5F
6441             MYANMAR,                  // AA60..AA7F
6442             TAI_VIET,                 // AA80..AAC2
6443             UNKNOWN,                  // AAC3..AADA
6444             TAI_VIET,                 // AADB..AADF
6445             MEETEI_MAYEK,             // AAE0..AAF6
6446             UNKNOWN,                  // AAF7..AB00
6447             ETHIOPIC,                 // AB01..AB06
6448             UNKNOWN,                  // AB07..AB08
6449             ETHIOPIC,                 // AB09..AB0E
6450             UNKNOWN,                  // AB0F..AB10
6451             ETHIOPIC,                 // AB11..AB16
6452             UNKNOWN,                  // AB17..AB1F
6453             ETHIOPIC,                 // AB20..AB26
6454             UNKNOWN,                  // AB27
6455             ETHIOPIC,                 // AB28..AB2E
6456             UNKNOWN,                  // AB2F
6457             LATIN,                    // AB30..AB5A
6458             COMMON,                   // AB5B
6459             LATIN,                    // AB5C..AB5F
6460             UNKNOWN,                  // AB60..AB63
6461             LATIN,                    // AB64
6462             GREEK,                    // AB65
6463             UNKNOWN,                  // AB66..ABBF
6464             MEETEI_MAYEK,             // ABC0..ABED
6465             UNKNOWN,                  // ABEE..ABEF
6466             MEETEI_MAYEK,             // ABF0..ABF9
6467             UNKNOWN,                  // ABFA..ABFF
6468             HANGUL,                   // AC00..D7A3
6469             UNKNOWN,                  // D7A4..D7AF
6470             HANGUL,                   // D7B0..D7C6
6471             UNKNOWN,                  // D7C7..D7CA
6472             HANGUL,                   // D7CB..D7FB
6473             UNKNOWN,                  // D7FC..F8FF
6474             HAN,                      // F900..FA6D
6475             UNKNOWN,                  // FA6E..FA6F
6476             HAN,                      // FA70..FAD9
6477             UNKNOWN,                  // FADA..FAFF
6478             LATIN,                    // FB00..FB06
6479             UNKNOWN,                  // FB07..FB12
6480             ARMENIAN,                 // FB13..FB17
6481             UNKNOWN,                  // FB18..FB1C
6482             HEBREW,                   // FB1D..FB36
6483             UNKNOWN,                  // FB37
6484             HEBREW,                   // FB38..FB3C
6485             UNKNOWN,                  // FB3D
6486             HEBREW,                   // FB3E
6487             UNKNOWN,                  // FB3F
6488             HEBREW,                   // FB40..FB41
6489             UNKNOWN,                  // FB42
6490             HEBREW,                   // FB43..FB44
6491             UNKNOWN,                  // FB45
6492             HEBREW,                   // FB46..FB4F
6493             ARABIC,                   // FB50..FBC1
6494             UNKNOWN,                  // FBC2..FBD2
6495             ARABIC,                   // FBD3..FD3D
6496             COMMON,                   // FD3E..FD3F
6497             UNKNOWN,                  // FD40..FD4F
6498             ARABIC,                   // FD50..FD8F
6499             UNKNOWN,                  // FD90..FD91
6500             ARABIC,                   // FD92..FDC7
6501             UNKNOWN,                  // FDC8..FDEF
6502             ARABIC,                   // FDF0..FDFD
6503             UNKNOWN,                  // FDFE..FDFF
6504             INHERITED,                // FE00..FE0F
6505             COMMON,                   // FE10..FE19
6506             UNKNOWN,                  // FE1A..FE1F
6507             INHERITED,                // FE20..FE2D
6508             UNKNOWN,                  // FE2E..FE2F
6509             COMMON,                   // FE30..FE52
6510             UNKNOWN,                  // FE53
6511             COMMON,                   // FE54..FE66
6512             UNKNOWN,                  // FE67
6513             COMMON,                   // FE68..FE6B
6514             UNKNOWN,                  // FE6C..FE6F
6515             ARABIC,                   // FE70..FE74
6516             UNKNOWN,                  // FE75
6517             ARABIC,                   // FE76..FEFC
6518             UNKNOWN,                  // FEFD..FEFE
6519             COMMON,                   // FEFF
6520             UNKNOWN,                  // FF00
6521             COMMON,                   // FF01..FF20
6522             LATIN,                    // FF21..FF3A
6523             COMMON,                   // FF3B..FF40
6524             LATIN,                    // FF41..FF5A
6525             COMMON,                   // FF5B..FF65
6526             KATAKANA,                 // FF66..FF6F
6527             COMMON,                   // FF70
6528             KATAKANA,                 // FF71..FF9D
6529             COMMON,                   // FF9E..FF9F
6530             HANGUL,                   // FFA0..FFBE
6531             UNKNOWN,                  // FFBF..FFC1
6532             HANGUL,                   // FFC2..FFC7
6533             UNKNOWN,                  // FFC8..FFC9
6534             HANGUL,                   // FFCA..FFCF
6535             UNKNOWN,                  // FFD0..FFD1
6536             HANGUL,                   // FFD2..FFD7
6537             UNKNOWN,                  // FFD8..FFD9
6538             HANGUL,                   // FFDA..FFDC
6539             UNKNOWN,                  // FFDD..FFDF
6540             COMMON,                   // FFE0..FFE6
6541             UNKNOWN,                  // FFE7
6542             COMMON,                   // FFE8..FFEE
6543             UNKNOWN,                  // FFEF..FFF8
6544             COMMON,                   // FFF9..FFFD
6545             UNKNOWN,                  // FFFE..FFFF
6546             LINEAR_B,                 // 10000..1000B
6547             UNKNOWN,                  // 1000C
6548             LINEAR_B,                 // 1000D..10026
6549             UNKNOWN,                  // 10027
6550             LINEAR_B,                 // 10028..1003A
6551             UNKNOWN,                  // 1003B
6552             LINEAR_B,                 // 1003C..1003D
6553             UNKNOWN,                  // 1003E
6554             LINEAR_B,                 // 1003F..1004D
6555             UNKNOWN,                  // 1004E..1004F
6556             LINEAR_B,                 // 10050..1005D
6557             UNKNOWN,                  // 1005E..1007F
6558             LINEAR_B,                 // 10080..100FA
6559             UNKNOWN,                  // 100FB..100FF
6560             COMMON,                   // 10100..10102
6561             UNKNOWN,                  // 10103..10106
6562             COMMON,                   // 10107..10133
6563             UNKNOWN,                  // 10134..10136
6564             COMMON,                   // 10137..1013F
6565             GREEK,                    // 10140..1018C
6566             UNKNOWN,                  // 1018D..1018F
6567             COMMON,                   // 10190..1019B
6568             UNKNOWN,                  // 1019C..1019F
6569             GREEK,                    // 101A0
6570             UNKNOWN,                  // 101A1..101CF
6571             COMMON,                   // 101D0..101FC
6572             INHERITED,                // 101FD
6573             UNKNOWN,                  // 101FE..1027F
6574             LYCIAN,                   // 10280..1029C
6575             UNKNOWN,                  // 1029D..1029F
6576             CARIAN,                   // 102A0..102D0
6577             UNKNOWN,                  // 102D1..102DF
6578             INHERITED,                // 102E0
6579             COMMON,                   // 102E1..102FB
6580             UNKNOWN,                  // 102FC..102FF
6581             OLD_ITALIC,               // 10300..10323
6582             UNKNOWN,                  // 10324..1032F
6583             GOTHIC,                   // 10330..1034A
6584             UNKNOWN,                  // 1034B..1034F
6585             OLD_PERMIC,               // 10350..1037A
6586             UNKNOWN,                  // 1037B..1037F
6587             UGARITIC,                 // 10380..1039D
6588             UNKNOWN,                  // 1039E
6589             UGARITIC,                 // 1039F
6590             OLD_PERSIAN,              // 103A0..103C3
6591             UNKNOWN,                  // 103C4..103C7
6592             OLD_PERSIAN,              // 103C8..103D5
6593             UNKNOWN,                  // 103D6..103FF
6594             DESERET,                  // 10400..1044F
6595             SHAVIAN,                  // 10450..1047F
6596             OSMANYA,                  // 10480..1049D
6597             UNKNOWN,                  // 1049E..1049F
6598             OSMANYA,                  // 104A0..104A9
6599             UNKNOWN,                  // 104AA..104FF
6600             ELBASAN,                  // 10500..10527
6601             UNKNOWN,                  // 10528..1052F
6602             CAUCASIAN_ALBANIAN,       // 10530..10563
6603             UNKNOWN,                  // 10564..1056E
6604             CAUCASIAN_ALBANIAN,       // 1056F
6605             UNKNOWN,                  // 10570..105FF
6606             LINEAR_A,                 // 10600..10736
6607             UNKNOWN,                  // 10737..1073F
6608             LINEAR_A,                 // 10740..10755
6609             UNKNOWN,                  // 10756..1075F
6610             LINEAR_A,                 // 10760..10767
6611             UNKNOWN,                  // 10768..107FF
6612             CYPRIOT,                  // 10800..10805
6613             UNKNOWN,                  // 10806..10807
6614             CYPRIOT,                  // 10808
6615             UNKNOWN,                  // 10809
6616             CYPRIOT,                  // 1080A..10835
6617             UNKNOWN,                  // 10836
6618             CYPRIOT,                  // 10837..10838
6619             UNKNOWN,                  // 10839..1083B
6620             CYPRIOT,                  // 1083C
6621             UNKNOWN,                  // 1083D..1083E
6622             CYPRIOT,                  // 1083F
6623             IMPERIAL_ARAMAIC,         // 10840..10855
6624             UNKNOWN,                  // 10856
6625             IMPERIAL_ARAMAIC,         // 10857..1085F
6626             PALMYRENE,                // 10860..1087F
6627             NABATAEAN,                // 10880..1089E
6628             UNKNOWN,                  // 1089F..108A6
6629             NABATAEAN,                // 108A7..108AF
6630             UNKNOWN,                  // 108B0..108FF
6631             PHOENICIAN,               // 10900..1091B
6632             UNKNOWN,                  // 1091C..1091E
6633             PHOENICIAN,               // 1091F
6634             LYDIAN,                   // 10920..10939
6635             UNKNOWN,                  // 1093A..1093E
6636             LYDIAN,                   // 1093F
6637             UNKNOWN,                  // 10940..1097F
6638             MEROITIC_HIEROGLYPHS,     // 10980..1099F
6639             MEROITIC_CURSIVE,         // 109A0..109B7
6640             UNKNOWN,                  // 109B8..109BD
6641             MEROITIC_CURSIVE,         // 109BE..109BF
6642             UNKNOWN,                  // 109C0..109FF
6643             KHAROSHTHI,               // 10A00..10A03
6644             UNKNOWN,                  // 10A04
6645             KHAROSHTHI,               // 10A05..10A06
6646             UNKNOWN,                  // 10A07..10A0B
6647             KHAROSHTHI,               // 10A0C..10A13
6648             UNKNOWN,                  // 10A14
6649             KHAROSHTHI,               // 10A15..10A17
6650             UNKNOWN,                  // 10A18
6651             KHAROSHTHI,               // 10A19..10A33
6652             UNKNOWN,                  // 10A34..10A37
6653             KHAROSHTHI,               // 10A38..10A3A
6654             UNKNOWN,                  // 10A3B..10A3E
6655             KHAROSHTHI,               // 10A3F..10A47
6656             UNKNOWN,                  // 10A48..10A4F
6657             KHAROSHTHI,               // 10A50..10A58
6658             UNKNOWN,                  // 10A59..10A5F
6659             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
6660             OLD_NORTH_ARABIAN,        // 10A80..10A9F
6661             UNKNOWN,                  // 10AA0..10ABF
6662             MANICHAEAN,               // 10AC0..10AE6
6663             UNKNOWN,                  // 10AE7..10AEA
6664             MANICHAEAN,               // 10AEB..10AF6
6665             UNKNOWN,                  // 10AF7..10AFF
6666             AVESTAN,                  // 10B00..10B35
6667             UNKNOWN,                  // 10B36..10B38
6668             AVESTAN,                  // 10B39..10B3F
6669             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
6670             UNKNOWN,                  // 10B56..10B57
6671             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
6672             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
6673             UNKNOWN,                  // 10B73..10B77
6674             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
6675             PSALTER_PAHLAVI,          // 10B80..10B91
6676             UNKNOWN,                  // 10B92..10B98
6677             PSALTER_PAHLAVI,          // 10B99..10B9C
6678             UNKNOWN,                  // 10B9D..10BA8
6679             PSALTER_PAHLAVI,          // 10BA9..10BAF
6680             UNKNOWN,                  // 10BB0..10BFF
6681             OLD_TURKIC,               // 10C00..10C48
6682             UNKNOWN,                  // 10C49..10E5F
6683             ARABIC,                   // 10E60..10E7E
6684             UNKNOWN,                  // 10E7F..10FFF
6685             BRAHMI,                   // 11000..1104D
6686             UNKNOWN,                  // 1104E..11051
6687             BRAHMI,                   // 11052..1106F
6688             UNKNOWN,                  // 11070..1107E
6689             BRAHMI,                   // 1107F
6690             KAITHI,                   // 11080..110C1
6691             UNKNOWN,                  // 110C2..110CF
6692             SORA_SOMPENG,             // 110D0..110E8
6693             UNKNOWN,                  // 110E9..110EF
6694             SORA_SOMPENG,             // 110F0..110F9
6695             UNKNOWN,                  // 110FA..110FF
6696             CHAKMA,                   // 11100..11134
6697             UNKNOWN,                  // 11135
6698             CHAKMA,                   // 11136..11143
6699             UNKNOWN,                  // 11144..1114F
6700             MAHAJANI,                 // 11150..11176
6701             UNKNOWN,                  // 11177..1117F
6702             SHARADA,                  // 11180..111C8
6703             UNKNOWN,                  // 111C9..111CC
6704             SHARADA,                  // 111CD
6705             UNKNOWN,                  // 111CE..111CF
6706             SHARADA,                  // 111D0..111DA
6707             UNKNOWN,                  // 111DB..111E0
6708             SINHALA,                  // 111E1..111F4
6709             UNKNOWN,                  // 111F5..111FF
6710             KHOJKI,                   // 11200..11211
6711             UNKNOWN,                  // 11212
6712             KHOJKI,                   // 11213..1123D
6713             UNKNOWN,                  // 1123E..112AF
6714             KHUDAWADI,                // 112B0..112EA
6715             UNKNOWN,                  // 112EB..112EF
6716             KHUDAWADI,                // 112F0..112F9
6717             UNKNOWN,                  // 112FA..11300
6718             GRANTHA,                  // 11301..11303
6719             UNKNOWN,                  // 11304
6720             GRANTHA,                  // 11305..1130C
6721             UNKNOWN,                  // 1130D..1130E
6722             GRANTHA,                  // 1130F..11310
6723             UNKNOWN,                  // 11311..11312
6724             GRANTHA,                  // 11313..11328
6725             UNKNOWN,                  // 11329
6726             GRANTHA,                  // 1132A..11330
6727             UNKNOWN,                  // 11331
6728             GRANTHA,                  // 11332..11333
6729             UNKNOWN,                  // 11334
6730             GRANTHA,                  // 11335..11339
6731             UNKNOWN,                  // 1133A..1133B
6732             GRANTHA,                  // 1133C..11344
6733             UNKNOWN,                  // 11345..11346
6734             GRANTHA,                  // 11347..11348
6735             UNKNOWN,                  // 11349..1134A
6736             GRANTHA,                  // 1134B..1134D
6737             UNKNOWN,                  // 1134E..11356
6738             GRANTHA,                  // 11357
6739             UNKNOWN,                  // 11358..1135C
6740             GRANTHA,                  // 1135D..11363
6741             UNKNOWN,                  // 11364..11365
6742             GRANTHA,                  // 11366..1136C
6743             UNKNOWN,                  // 1136D..1136F
6744             GRANTHA,                  // 11370..11374
6745             UNKNOWN,                  // 11375..1147F
6746             TIRHUTA,                  // 11480..114C7
6747             UNKNOWN,                  // 114C8..114CF
6748             TIRHUTA,                  // 114D0..114D9
6749             UNKNOWN,                  // 114DA..1157F
6750             SIDDHAM,                  // 11580..115B5
6751             UNKNOWN,                  // 115B6..115B7
6752             SIDDHAM,                  // 115B8..115C9
6753             UNKNOWN,                  // 115CA..115FF
6754             MODI,                     // 11600..11644
6755             UNKNOWN,                  // 11645..1164F
6756             MODI,                     // 11650..11659
6757             UNKNOWN,                  // 1165A..1167F
6758             TAKRI,                    // 11680..116B7
6759             UNKNOWN,                  // 116B8..116BF
6760             TAKRI,                    // 116C0..116C9
6761             UNKNOWN,                  // 116CA..1189F
6762             WARANG_CITI,              // 118A0..118F2
6763             UNKNOWN,                  // 118F3..118FE
6764             WARANG_CITI,              // 118FF
6765             UNKNOWN,                  // 11900..11ABF
6766             PAU_CIN_HAU,              // 11AC0..11AF8
6767             UNKNOWN,                  // 11AF9..11FFF
6768             CUNEIFORM,                // 12000..12398
6769             UNKNOWN,                  // 12399..123FF
6770             CUNEIFORM,                // 12400..1246E
6771             UNKNOWN,                  // 1246F
6772             CUNEIFORM,                // 12470..12474
6773             UNKNOWN,                  // 12475..12FFF
6774             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
6775             UNKNOWN,                  // 1342F..167FF
6776             BAMUM,                    // 16800..16A38
6777             UNKNOWN,                  // 16A39..16A3F
6778             MRO,                      // 16A40..16A5E
6779             UNKNOWN,                  // 16A5F
6780             MRO,                      // 16A60..16A69
6781             UNKNOWN,                  // 16A6A..16A6D
6782             MRO,                      // 16A6E..16A6F
6783             UNKNOWN,                  // 16A70..16ACF
6784             BASSA_VAH,                // 16AD0..16AED
6785             UNKNOWN,                  // 16AEE..16AEF
6786             BASSA_VAH,                // 16AF0..16AF5
6787             UNKNOWN,                  // 16AF6..16AFF
6788             PAHAWH_HMONG,             // 16B00..16B45
6789             UNKNOWN,                  // 16B46..16B4F
6790             PAHAWH_HMONG,             // 16B50..16B59
6791             UNKNOWN,                  // 16B5A
6792             PAHAWH_HMONG,             // 16B5B..16B61
6793             UNKNOWN,                  // 16B62
6794             PAHAWH_HMONG,             // 16B63..16B77
6795             UNKNOWN,                  // 16B78..16B7C
6796             PAHAWH_HMONG,             // 16B7D..16B8F
6797             UNKNOWN,                  // 16B90..16EFF
6798             MIAO,                     // 16F00..16F44
6799             UNKNOWN,                  // 16F45..16F4F
6800             MIAO,                     // 16F50..16F7E
6801             UNKNOWN,                  // 16F7F..16F8E
6802             MIAO,                     // 16F8F..16F9F
6803             UNKNOWN,                  // 16FA0..1AFFF
6804             KATAKANA,                 // 1B000
6805             HIRAGANA,                 // 1B001
6806             UNKNOWN,                  // 1B002..1BBFF
6807             DUPLOYAN,                 // 1BC00..1BC6A
6808             UNKNOWN,                  // 1BC6B..1BC6F
6809             DUPLOYAN,                 // 1BC70..1BC7C
6810             UNKNOWN,                  // 1BC7D..1BC7F
6811             DUPLOYAN,                 // 1BC80..1BC88
6812             UNKNOWN,                  // 1BC89..1BC8F
6813             DUPLOYAN,                 // 1BC90..1BC99
6814             UNKNOWN,                  // 1BC9A..1BC9B
6815             DUPLOYAN,                 // 1BC9C..1BC9F
6816             COMMON,                   // 1BCA0..1BCA3
6817             UNKNOWN,                  // 1BCA4..1CFFF
6818             COMMON,                   // 1D000..1D0F5
6819             UNKNOWN,                  // 1D0F6..1D0FF
6820             COMMON,                   // 1D100..1D126
6821             UNKNOWN,                  // 1D127..1D128
6822             COMMON,                   // 1D129..1D166
6823             INHERITED,                // 1D167..1D169
6824             COMMON,                   // 1D16A..1D17A
6825             INHERITED,                // 1D17B..1D182
6826             COMMON,                   // 1D183..1D184
6827             INHERITED,                // 1D185..1D18B
6828             COMMON,                   // 1D18C..1D1A9
6829             INHERITED,                // 1D1AA..1D1AD
6830             COMMON,                   // 1D1AE..1D1DD
6831             UNKNOWN,                  // 1D1DE..1D1FF
6832             GREEK,                    // 1D200..1D245
6833             UNKNOWN,                  // 1D246..1D2FF
6834             COMMON,                   // 1D300..1D356
6835             UNKNOWN,                  // 1D357..1D35F
6836             COMMON,                   // 1D360..1D371
6837             UNKNOWN,                  // 1D372..1D3FF
6838             COMMON,                   // 1D400..1D454
6839             UNKNOWN,                  // 1D455
6840             COMMON,                   // 1D456..1D49C
6841             UNKNOWN,                  // 1D49D
6842             COMMON,                   // 1D49E..1D49F
6843             UNKNOWN,                  // 1D4A0..1D4A1
6844             COMMON,                   // 1D4A2
6845             UNKNOWN,                  // 1D4A3..1D4A4
6846             COMMON,                   // 1D4A5..1D4A6
6847             UNKNOWN,                  // 1D4A7..1D4A8
6848             COMMON,                   // 1D4A9..1D4AC
6849             UNKNOWN,                  // 1D4AD
6850             COMMON,                   // 1D4AE..1D4B9
6851             UNKNOWN,                  // 1D4BA
6852             COMMON,                   // 1D4BB
6853             UNKNOWN,                  // 1D4BC
6854             COMMON,                   // 1D4BD..1D4C3
6855             UNKNOWN,                  // 1D4C4
6856             COMMON,                   // 1D4C5..1D505
6857             UNKNOWN,                  // 1D506
6858             COMMON,                   // 1D507..1D50A
6859             UNKNOWN,                  // 1D50B..1D50C
6860             COMMON,                   // 1D50D..1D514
6861             UNKNOWN,                  // 1D515
6862             COMMON,                   // 1D516..1D51C
6863             UNKNOWN,                  // 1D51D
6864             COMMON,                   // 1D51E..1D539
6865             UNKNOWN,                  // 1D53A
6866             COMMON,                   // 1D53B..1D53E
6867             UNKNOWN,                  // 1D53F
6868             COMMON,                   // 1D540..1D544
6869             UNKNOWN,                  // 1D545
6870             COMMON,                   // 1D546
6871             UNKNOWN,                  // 1D547..1D549
6872             COMMON,                   // 1D54A..1D550
6873             UNKNOWN,                  // 1D551
6874             COMMON,                   // 1D552..1D6A5
6875             UNKNOWN,                  // 1D6A6..1D6A7
6876             COMMON,                   // 1D6A8..1D7CB
6877             UNKNOWN,                  // 1D7CC..1D7CD
6878             COMMON,                   // 1D7CE..1D7FF
6879             UNKNOWN,                  // 1D800..1E7FF
6880             MENDE_KIKAKUI,            // 1E800..1E8C4
6881             UNKNOWN,                  // 1E8C5..1E8C6
6882             MENDE_KIKAKUI,            // 1E8C7..1E8D6
6883             UNKNOWN,                  // 1E8D7..1EDFF
6884             ARABIC,                   // 1EE00..1EE03
6885             UNKNOWN,                  // 1EE04
6886             ARABIC,                   // 1EE05..1EE1F
6887             UNKNOWN,                  // 1EE20
6888             ARABIC,                   // 1EE21..1EE22
6889             UNKNOWN,                  // 1EE23
6890             ARABIC,                   // 1EE24
6891             UNKNOWN,                  // 1EE25..1EE26
6892             ARABIC,                   // 1EE27
6893             UNKNOWN,                  // 1EE28
6894             ARABIC,                   // 1EE29..1EE32
6895             UNKNOWN,                  // 1EE33
6896             ARABIC,                   // 1EE34..1EE37
6897             UNKNOWN,                  // 1EE38
6898             ARABIC,                   // 1EE39
6899             UNKNOWN,                  // 1EE3A
6900             ARABIC,                   // 1EE3B
6901             UNKNOWN,                  // 1EE3C..1EE41
6902             ARABIC,                   // 1EE42
6903             UNKNOWN,                  // 1EE43..1EE46
6904             ARABIC,                   // 1EE47
6905             UNKNOWN,                  // 1EE48
6906             ARABIC,                   // 1EE49
6907             UNKNOWN,                  // 1EE4A
6908             ARABIC,                   // 1EE4B
6909             UNKNOWN,                  // 1EE4C
6910             ARABIC,                   // 1EE4D..1EE4F
6911             UNKNOWN,                  // 1EE50
6912             ARABIC,                   // 1EE51..1EE52
6913             UNKNOWN,                  // 1EE53
6914             ARABIC,                   // 1EE54
6915             UNKNOWN,                  // 1EE55..1EE56
6916             ARABIC,                   // 1EE57
6917             UNKNOWN,                  // 1EE58
6918             ARABIC,                   // 1EE59
6919             UNKNOWN,                  // 1EE5A
6920             ARABIC,                   // 1EE5B
6921             UNKNOWN,                  // 1EE5C
6922             ARABIC,                   // 1EE5D
6923             UNKNOWN,                  // 1EE5E
6924             ARABIC,                   // 1EE5F
6925             UNKNOWN,                  // 1EE60
6926             ARABIC,                   // 1EE61..1EE62
6927             UNKNOWN,                  // 1EE63
6928             ARABIC,                   // 1EE64
6929             UNKNOWN,                  // 1EE65..1EE66
6930             ARABIC,                   // 1EE67..1EE6A
6931             UNKNOWN,                  // 1EE6B
6932             ARABIC,                   // 1EE6C..1EE72
6933             UNKNOWN,                  // 1EE73
6934             ARABIC,                   // 1EE74..1EE77
6935             UNKNOWN,                  // 1EE78
6936             ARABIC,                   // 1EE79..1EE7C
6937             UNKNOWN,                  // 1EE7D
6938             ARABIC,                   // 1EE7E
6939             UNKNOWN,                  // 1EE7F
6940             ARABIC,                   // 1EE80..1EE89
6941             UNKNOWN,                  // 1EE8A
6942             ARABIC,                   // 1EE8B..1EE9B
6943             UNKNOWN,                  // 1EE9C..1EEA0
6944             ARABIC,                   // 1EEA1..1EEA3
6945             UNKNOWN,                  // 1EEA4
6946             ARABIC,                   // 1EEA5..1EEA9
6947             UNKNOWN,                  // 1EEAA
6948             ARABIC,                   // 1EEAB..1EEBB
6949             UNKNOWN,                  // 1EEBC..1EEEF
6950             ARABIC,                   // 1EEF0..1EEF1
6951             UNKNOWN,                  // 1EEF2..1EFFF
6952             COMMON,                   // 1F000..1F02B
6953             UNKNOWN,                  // 1F02C..1F02F
6954             COMMON,                   // 1F030..1F093
6955             UNKNOWN,                  // 1F094..1F09F
6956             COMMON,                   // 1F0A0..1F0AE
6957             UNKNOWN,                  // 1F0AF..1F0B0
6958             COMMON,                   // 1F0B1..1F0BF
6959             UNKNOWN,                  // 1F0C0
6960             COMMON,                   // 1F0C1..1F0CF
6961             UNKNOWN,                  // 1F0D0
6962             COMMON,                   // 1F0D1..1F0F5
6963             UNKNOWN,                  // 1F0F6..1F0FF
6964             COMMON,                   // 1F100..1F10C
6965             UNKNOWN,                  // 1F10D..1F10F
6966             COMMON,                   // 1F110..1F12E
6967             UNKNOWN,                  // 1F12F
6968             COMMON,                   // 1F130..1F16B
6969             UNKNOWN,                  // 1F16C..1F16F
6970             COMMON,                   // 1F170..1F19A
6971             UNKNOWN,                  // 1F19B..1F1E5
6972             COMMON,                   // 1F1E6..1F1FF
6973             HIRAGANA,                 // 1F200
6974             COMMON,                   // 1F201..1F202
6975             UNKNOWN,                  // 1F203..1F20F
6976             COMMON,                   // 1F210..1F23A
6977             UNKNOWN,                  // 1F23B..1F23F
6978             COMMON,                   // 1F240..1F248
6979             UNKNOWN,                  // 1F249..1F24F
6980             COMMON,                   // 1F250..1F251
6981             UNKNOWN,                  // 1F252..1F2FF
6982             COMMON,                   // 1F300..1F32C
6983             UNKNOWN,                  // 1F32D..1F32F
6984             COMMON,                   // 1F330..1F37D
6985             UNKNOWN,                  // 1F37E..1F37F
6986             COMMON,                   // 1F380..1F3CE
6987             UNKNOWN,                  // 1F3CF..1F3D3
6988             COMMON,                   // 1F3D4..1F3F7
6989             UNKNOWN,                  // 1F3F8..1F3FF
6990             COMMON,                   // 1F400..1F4FE
6991             UNKNOWN,                  // 1F4FF
6992             COMMON,                   // 1F500..1F54A
6993             UNKNOWN,                  // 1F54B..1F54F
6994             COMMON,                   // 1F550..1F579
6995             UNKNOWN,                  // 1F57A
6996             COMMON,                   // 1F57B..1F5A3
6997             UNKNOWN,                  // 1F5A4
6998             COMMON,                   // 1F5A5..1F642
6999             UNKNOWN,                  // 1F643..1F644
7000             COMMON,                   // 1F645..1F6CF
7001             UNKNOWN,                  // 1F6D0..1F6DF
7002             COMMON,                   // 1F6E0..1F6EC
7003             UNKNOWN,                  // 1F6ED..1F6EF
7004             COMMON,                   // 1F6F0..1F6F3
7005             UNKNOWN,                  // 1F6F4..1F6FF
7006             COMMON,                   // 1F700..1F773
7007             UNKNOWN,                  // 1F774..1F77F
7008             COMMON,                   // 1F780..1F7D4
7009             UNKNOWN,                  // 1F7D5..1F7FF
7010             COMMON,                   // 1F800..1F80B
7011             UNKNOWN,                  // 1F80C..1F80F
7012             COMMON,                   // 1F810..1F847
7013             UNKNOWN,                  // 1F848..1F84F
7014             COMMON,                   // 1F850..1F859
7015             UNKNOWN,                  // 1F85A..1F85F
7016             COMMON,                   // 1F860..1F887
7017             UNKNOWN,                  // 1F888..1F88F
7018             COMMON,                   // 1F890..1F8AD
7019             UNKNOWN,                  // 1F8AE..1FFFF
7020             HAN,                      // 20000..2A6D6
7021             UNKNOWN,                  // 2A6D7..2A6FF
7022             HAN,                      // 2A700..2B734
7023             UNKNOWN,                  // 2B735..2B73F
7024             HAN,                      // 2B740..2B81D
7025             UNKNOWN,                  // 2B81E..2F7FF
7026             HAN,                      // 2F800..2FA1D
7027             UNKNOWN,                  // 2FA1E..E0000
7028             COMMON,                   // E0001
7029             UNKNOWN,                  // E0002..E001F
7030             COMMON,                   // E0020..E007F
7031             UNKNOWN,                  // E0080..E00FF
7032             INHERITED,                // E0100..E01EF
7033             UNKNOWN                   // E01F0..10FFFF
7034         };
7035 
7036         private static HashMap<String, Character.UnicodeScript> aliases;
7037         static {
7038             aliases = new HashMap<>(128);
7039             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7040             aliases.put("ARAB", ARABIC);
7041             aliases.put("ARMI", IMPERIAL_ARAMAIC);
7042             aliases.put("ARMN", ARMENIAN);
7043             aliases.put("AVST", AVESTAN);
7044             aliases.put("BALI", BALINESE);
7045             aliases.put("BAMU", BAMUM);
7046             aliases.put("BASS", BASSA_VAH);
7047             aliases.put("BATK", BATAK);
7048             aliases.put("BENG", BENGALI);
7049             aliases.put("BOPO", BOPOMOFO);
7050             aliases.put("BRAH", BRAHMI);
7051             aliases.put("BRAI", BRAILLE);
7052             aliases.put("BUGI", BUGINESE);
7053             aliases.put("BUHD", BUHID);
7054             aliases.put("CAKM", CHAKMA);
7055             aliases.put("CANS", CANADIAN_ABORIGINAL);
7056             aliases.put("CARI", CARIAN);
7057             aliases.put("CHAM", CHAM);
7058             aliases.put("CHER", CHEROKEE);
7059             aliases.put("COPT", COPTIC);
7060             aliases.put("CPRT", CYPRIOT);
7061             aliases.put("CYRL", CYRILLIC);
7062             aliases.put("DEVA", DEVANAGARI);
7063             aliases.put("DSRT", DESERET);
7064             aliases.put("DUPL", DUPLOYAN);
7065             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7066             aliases.put("ELBA", ELBASAN);
7067             aliases.put("ETHI", ETHIOPIC);
7068             aliases.put("GEOR", GEORGIAN);
7069             aliases.put("GLAG", GLAGOLITIC);
7070             aliases.put("GOTH", GOTHIC);
7071             aliases.put("GRAN", GRANTHA);
7072             aliases.put("GREK", GREEK);
7073             aliases.put("GUJR", GUJARATI);
7074             aliases.put("GURU", GURMUKHI);
7075             aliases.put("HANG", HANGUL);
7076             aliases.put("HANI", HAN);
7077             aliases.put("HANO", HANUNOO);
7078             aliases.put("HEBR", HEBREW);
7079             aliases.put("HIRA", HIRAGANA);
7080             aliases.put("HMNG", PAHAWH_HMONG);
7081             // it appears we don't have the KATAKANA_OR_HIRAGANA
7082             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7083             aliases.put("ITAL", OLD_ITALIC);
7084             aliases.put("JAVA", JAVANESE);
7085             aliases.put("KALI", KAYAH_LI);
7086             aliases.put("KANA", KATAKANA);
7087             aliases.put("KHAR", KHAROSHTHI);
7088             aliases.put("KHMR", KHMER);
7089             aliases.put("KHOJ", KHOJKI);
7090             aliases.put("KNDA", KANNADA);
7091             aliases.put("KTHI", KAITHI);
7092             aliases.put("LANA", TAI_THAM);
7093             aliases.put("LAOO", LAO);
7094             aliases.put("LATN", LATIN);
7095             aliases.put("LEPC", LEPCHA);
7096             aliases.put("LIMB", LIMBU);
7097             aliases.put("LINA", LINEAR_A);
7098             aliases.put("LINB", LINEAR_B);
7099             aliases.put("LISU", LISU);
7100             aliases.put("LYCI", LYCIAN);
7101             aliases.put("LYDI", LYDIAN);
7102             aliases.put("MAHJ", MAHAJANI);
7103             aliases.put("MAND", MANDAIC);
7104             aliases.put("MANI", MANICHAEAN);
7105             aliases.put("MEND", MENDE_KIKAKUI);
7106             aliases.put("MERC", MEROITIC_CURSIVE);
7107             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7108             aliases.put("MLYM", MALAYALAM);
7109             aliases.put("MODI", MODI);
7110             aliases.put("MONG", MONGOLIAN);
7111             aliases.put("MROO", MRO);
7112             aliases.put("MTEI", MEETEI_MAYEK);
7113             aliases.put("MYMR", MYANMAR);
7114             aliases.put("NARB", OLD_NORTH_ARABIAN);
7115             aliases.put("NBAT", NABATAEAN);
7116             aliases.put("NKOO", NKO);
7117             aliases.put("OGAM", OGHAM);
7118             aliases.put("OLCK", OL_CHIKI);
7119             aliases.put("ORKH", OLD_TURKIC);
7120             aliases.put("ORYA", ORIYA);
7121             aliases.put("OSMA", OSMANYA);
7122             aliases.put("PALM", PALMYRENE);
7123             aliases.put("PAUC", PAU_CIN_HAU);
7124             aliases.put("PERM", OLD_PERMIC);
7125             aliases.put("PHAG", PHAGS_PA);
7126             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
7127             aliases.put("PHLP", PSALTER_PAHLAVI);
7128             aliases.put("PHNX", PHOENICIAN);
7129             aliases.put("PLRD", MIAO);
7130             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
7131             aliases.put("RJNG", REJANG);
7132             aliases.put("RUNR", RUNIC);
7133             aliases.put("SAMR", SAMARITAN);
7134             aliases.put("SARB", OLD_SOUTH_ARABIAN);
7135             aliases.put("SAUR", SAURASHTRA);
7136             aliases.put("SHAW", SHAVIAN);
7137             aliases.put("SHRD", SHARADA);
7138             aliases.put("SIDD", SIDDHAM);
7139             aliases.put("SIND", KHUDAWADI);
7140             aliases.put("SINH", SINHALA);
7141             aliases.put("SORA", SORA_SOMPENG);
7142             aliases.put("SUND", SUNDANESE);
7143             aliases.put("SYLO", SYLOTI_NAGRI);
7144             aliases.put("SYRC", SYRIAC);
7145             aliases.put("TAGB", TAGBANWA);
7146             aliases.put("TAKR", TAKRI);
7147             aliases.put("TALE", TAI_LE);
7148             aliases.put("TALU", NEW_TAI_LUE);
7149             aliases.put("TAML", TAMIL);
7150             aliases.put("TAVT", TAI_VIET);
7151             aliases.put("TELU", TELUGU);
7152             aliases.put("TFNG", TIFINAGH);
7153             aliases.put("TGLG", TAGALOG);
7154             aliases.put("THAA", THAANA);
7155             aliases.put("THAI", THAI);
7156             aliases.put("TIBT", TIBETAN);
7157             aliases.put("TIRH", TIRHUTA);
7158             aliases.put("UGAR", UGARITIC);
7159             aliases.put("VAII", VAI);
7160             aliases.put("WARA", WARANG_CITI);
7161             aliases.put("XPEO", OLD_PERSIAN);
7162             aliases.put("XSUX", CUNEIFORM);
7163             aliases.put("YIII", YI);
7164             aliases.put("ZINH", INHERITED);
7165             aliases.put("ZYYY", COMMON);
7166             aliases.put("ZZZZ", UNKNOWN);
7167         }
7168 
7169         /**
7170          * Returns the enum constant representing the Unicode script of which
7171          * the given character (Unicode code point) is assigned to.
7172          *
7173          * @param   codePoint the character (Unicode code point) in question.
7174          * @return  The {@code UnicodeScript} constant representing the
7175          *          Unicode script of which this character is assigned to.
7176          *
7177          * @exception IllegalArgumentException if the specified
7178          * {@code codePoint} is an invalid Unicode code point.
7179          * @see Character#isValidCodePoint(int)
7180          *
7181          */
7182         public static UnicodeScript of(int codePoint) {
7183             if (!isValidCodePoint(codePoint))
7184                 throw new IllegalArgumentException();
7185             int type = getType(codePoint);
7186             // leave SURROGATE and PRIVATE_USE for table lookup
7187             if (type == UNASSIGNED)
7188                 return UNKNOWN;
7189             int index = Arrays.binarySearch(scriptStarts, codePoint);
7190             if (index < 0)
7191                 index = -index - 2;
7192             return scripts[index];
7193         }
7194 
7195         /**
7196          * Returns the UnicodeScript constant with the given Unicode script
7197          * name or the script name alias. Script names and their aliases are
7198          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
7199          * and {@code PropertyValueAliases<version>.txt} define script names
7200          * and the script name aliases for a particular version of the
7201          * standard. The {@link Character} class specifies the version of
7202          * the standard that it supports.
7203          * <p>
7204          * Character case is ignored for all of the valid script names.
7205          * The en_US locale's case mapping rules are used to provide
7206          * case-insensitive string comparisons for script name validation.
7207          *
7208          * @param scriptName A {@code UnicodeScript} name.
7209          * @return The {@code UnicodeScript} constant identified
7210          *         by {@code scriptName}
7211          * @throws IllegalArgumentException if {@code scriptName} is an
7212          *         invalid name
7213          * @throws NullPointerException if {@code scriptName} is null
7214          */
7215         public static final UnicodeScript forName(String scriptName) {
7216             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
7217                                  //.replace(' ', '_'));
7218             UnicodeScript sc = aliases.get(scriptName);
7219             if (sc != null)
7220                 return sc;
7221             return valueOf(scriptName);
7222         }
7223     }
7224 
7225     /**
7226      * The value of the {@code Character}.
7227      *
7228      * @serial
7229      */
7230     private final char value;
7231 
7232     /** use serialVersionUID from JDK 1.0.2 for interoperability */
7233     private static final long serialVersionUID = 3786198910865385080L;
7234 
7235     /**
7236      * Constructs a newly allocated {@code Character} object that
7237      * represents the specified {@code char} value.
7238      *
7239      * @param  value   the value to be represented by the
7240      *                  {@code Character} object.
7241      */
7242     public Character(char value) {
7243         this.value = value;
7244     }
7245 
7246     private static class CharacterCache {
7247         private CharacterCache(){}
7248 
7249         static final Character cache[] = new Character[127 + 1];
7250 
7251         static {
7252             for (int i = 0; i < cache.length; i++)
7253                 cache[i] = new Character((char)i);
7254         }
7255     }
7256 
7257     /**
7258      * Returns a {@code Character} instance representing the specified
7259      * {@code char} value.
7260      * If a new {@code Character} instance is not required, this method
7261      * should generally be used in preference to the constructor
7262      * {@link #Character(char)}, as this method is likely to yield
7263      * significantly better space and time performance by caching
7264      * frequently requested values.
7265      *
7266      * This method will always cache values in the range {@code
7267      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
7268      * cache other values outside of this range.
7269      *
7270      * @param  c a char value.
7271      * @return a {@code Character} instance representing {@code c}.
7272      * @since  1.5
7273      */
7274     @HotSpotIntrinsicCandidate
7275     public static Character valueOf(char c) {
7276         if (c <= 127) { // must cache
7277             return CharacterCache.cache[(int)c];
7278         }
7279         return new Character(c);
7280     }
7281 
7282     /**
7283      * Returns the value of this {@code Character} object.
7284      * @return  the primitive {@code char} value represented by
7285      *          this object.
7286      */
7287     @HotSpotIntrinsicCandidate
7288     public char charValue() {
7289         return value;
7290     }
7291 
7292     /**
7293      * Returns a hash code for this {@code Character}; equal to the result
7294      * of invoking {@code charValue()}.
7295      *
7296      * @return a hash code value for this {@code Character}
7297      */
7298     @Override
7299     public int hashCode() {
7300         return Character.hashCode(value);
7301     }
7302 
7303     /**
7304      * Returns a hash code for a {@code char} value; compatible with
7305      * {@code Character.hashCode()}.
7306      *
7307      * @since 1.8
7308      *
7309      * @param value The {@code char} for which to return a hash code.
7310      * @return a hash code value for a {@code char} value.
7311      */
7312     public static int hashCode(char value) {
7313         return (int)value;
7314     }
7315 
7316     /**
7317      * Compares this object against the specified object.
7318      * The result is {@code true} if and only if the argument is not
7319      * {@code null} and is a {@code Character} object that
7320      * represents the same {@code char} value as this object.
7321      *
7322      * @param   obj   the object to compare with.
7323      * @return  {@code true} if the objects are the same;
7324      *          {@code false} otherwise.
7325      */
7326     public boolean equals(Object obj) {
7327         if (obj instanceof Character) {
7328             return value == ((Character)obj).charValue();
7329         }
7330         return false;
7331     }
7332 
7333     /**
7334      * Returns a {@code String} object representing this
7335      * {@code Character}'s value.  The result is a string of
7336      * length 1 whose sole component is the primitive
7337      * {@code char} value represented by this
7338      * {@code Character} object.
7339      *
7340      * @return  a string representation of this object.
7341      */
7342     public String toString() {
7343         char buf[] = {value};
7344         return String.valueOf(buf);
7345     }
7346 
7347     /**
7348      * Returns a {@code String} object representing the
7349      * specified {@code char}.  The result is a string of length
7350      * 1 consisting solely of the specified {@code char}.
7351      *
7352      * @param c the {@code char} to be converted
7353      * @return the string representation of the specified {@code char}
7354      * @since 1.4
7355      */
7356     public static String toString(char c) {
7357         return String.valueOf(c);
7358     }
7359 
7360     /**
7361      * Determines whether the specified code point is a valid
7362      * <a href="http://www.unicode.org/glossary/#code_point">
7363      * Unicode code point value</a>.
7364      *
7365      * @param  codePoint the Unicode code point to be tested
7366      * @return {@code true} if the specified code point value is between
7367      *         {@link #MIN_CODE_POINT} and
7368      *         {@link #MAX_CODE_POINT} inclusive;
7369      *         {@code false} otherwise.
7370      * @since  1.5
7371      */
7372     public static boolean isValidCodePoint(int codePoint) {
7373         // Optimized form of:
7374         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
7375         int plane = codePoint >>> 16;
7376         return plane < ((MAX_CODE_POINT + 1) >>> 16);
7377     }
7378 
7379     /**
7380      * Determines whether the specified character (Unicode code point)
7381      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
7382      * Such code points can be represented using a single {@code char}.
7383      *
7384      * @param  codePoint the character (Unicode code point) to be tested
7385      * @return {@code true} if the specified code point is between
7386      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
7387      *         {@code false} otherwise.
7388      * @since  1.7
7389      */
7390     public static boolean isBmpCodePoint(int codePoint) {
7391         return codePoint >>> 16 == 0;
7392         // Optimized form of:
7393         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
7394         // We consistently use logical shift (>>>) to facilitate
7395         // additional runtime optimizations.
7396     }
7397 
7398     /**
7399      * Determines whether the specified character (Unicode code point)
7400      * is in the <a href="#supplementary">supplementary character</a> range.
7401      *
7402      * @param  codePoint the character (Unicode code point) to be tested
7403      * @return {@code true} if the specified code point is between
7404      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
7405      *         {@link #MAX_CODE_POINT} inclusive;
7406      *         {@code false} otherwise.
7407      * @since  1.5
7408      */
7409     public static boolean isSupplementaryCodePoint(int codePoint) {
7410         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
7411             && codePoint <  MAX_CODE_POINT + 1;
7412     }
7413 
7414     /**
7415      * Determines if the given {@code char} value is a
7416      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
7417      * Unicode high-surrogate code unit</a>
7418      * (also known as <i>leading-surrogate code unit</i>).
7419      *
7420      * <p>Such values do not represent characters by themselves,
7421      * but are used in the representation of
7422      * <a href="#supplementary">supplementary characters</a>
7423      * in the UTF-16 encoding.
7424      *
7425      * @param  ch the {@code char} value to be tested.
7426      * @return {@code true} if the {@code char} value is between
7427      *         {@link #MIN_HIGH_SURROGATE} and
7428      *         {@link #MAX_HIGH_SURROGATE} inclusive;
7429      *         {@code false} otherwise.
7430      * @see    Character#isLowSurrogate(char)
7431      * @see    Character.UnicodeBlock#of(int)
7432      * @since  1.5
7433      */
7434     public static boolean isHighSurrogate(char ch) {
7435         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
7436         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
7437     }
7438 
7439     /**
7440      * Determines if the given {@code char} value is a
7441      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
7442      * Unicode low-surrogate code unit</a>
7443      * (also known as <i>trailing-surrogate code unit</i>).
7444      *
7445      * <p>Such values do not represent characters by themselves,
7446      * but are used in the representation of
7447      * <a href="#supplementary">supplementary characters</a>
7448      * in the UTF-16 encoding.
7449      *
7450      * @param  ch the {@code char} value to be tested.
7451      * @return {@code true} if the {@code char} value is between
7452      *         {@link #MIN_LOW_SURROGATE} and
7453      *         {@link #MAX_LOW_SURROGATE} inclusive;
7454      *         {@code false} otherwise.
7455      * @see    Character#isHighSurrogate(char)
7456      * @since  1.5
7457      */
7458     public static boolean isLowSurrogate(char ch) {
7459         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
7460     }
7461 
7462     /**
7463      * Determines if the given {@code char} value is a Unicode
7464      * <i>surrogate code unit</i>.
7465      *
7466      * <p>Such values do not represent characters by themselves,
7467      * but are used in the representation of
7468      * <a href="#supplementary">supplementary characters</a>
7469      * in the UTF-16 encoding.
7470      *
7471      * <p>A char value is a surrogate code unit if and only if it is either
7472      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
7473      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
7474      *
7475      * @param  ch the {@code char} value to be tested.
7476      * @return {@code true} if the {@code char} value is between
7477      *         {@link #MIN_SURROGATE} and
7478      *         {@link #MAX_SURROGATE} inclusive;
7479      *         {@code false} otherwise.
7480      * @since  1.7
7481      */
7482     public static boolean isSurrogate(char ch) {
7483         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
7484     }
7485 
7486     /**
7487      * Determines whether the specified pair of {@code char}
7488      * values is a valid
7489      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
7490      * Unicode surrogate pair</a>.
7491 
7492      * <p>This method is equivalent to the expression:
7493      * <blockquote><pre>{@code
7494      * isHighSurrogate(high) && isLowSurrogate(low)
7495      * }</pre></blockquote>
7496      *
7497      * @param  high the high-surrogate code value to be tested
7498      * @param  low the low-surrogate code value to be tested
7499      * @return {@code true} if the specified high and
7500      * low-surrogate code values represent a valid surrogate pair;
7501      * {@code false} otherwise.
7502      * @since  1.5
7503      */
7504     public static boolean isSurrogatePair(char high, char low) {
7505         return isHighSurrogate(high) && isLowSurrogate(low);
7506     }
7507 
7508     /**
7509      * Determines the number of {@code char} values needed to
7510      * represent the specified character (Unicode code point). If the
7511      * specified character is equal to or greater than 0x10000, then
7512      * the method returns 2. Otherwise, the method returns 1.
7513      *
7514      * <p>This method doesn't validate the specified character to be a
7515      * valid Unicode code point. The caller must validate the
7516      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
7517      * if necessary.
7518      *
7519      * @param   codePoint the character (Unicode code point) to be tested.
7520      * @return  2 if the character is a valid supplementary character; 1 otherwise.
7521      * @see     Character#isSupplementaryCodePoint(int)
7522      * @since   1.5
7523      */
7524     public static int charCount(int codePoint) {
7525         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
7526     }
7527 
7528     /**
7529      * Converts the specified surrogate pair to its supplementary code
7530      * point value. This method does not validate the specified
7531      * surrogate pair. The caller must validate it using {@link
7532      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
7533      *
7534      * @param  high the high-surrogate code unit
7535      * @param  low the low-surrogate code unit
7536      * @return the supplementary code point composed from the
7537      *         specified surrogate pair.
7538      * @since  1.5
7539      */
7540     public static int toCodePoint(char high, char low) {
7541         // Optimized form of:
7542         // return ((high - MIN_HIGH_SURROGATE) << 10)
7543         //         + (low - MIN_LOW_SURROGATE)
7544         //         + MIN_SUPPLEMENTARY_CODE_POINT;
7545         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
7546                                        - (MIN_HIGH_SURROGATE << 10)
7547                                        - MIN_LOW_SURROGATE);
7548     }
7549 
7550     /**
7551      * Returns the code point at the given index of the
7552      * {@code CharSequence}. If the {@code char} value at
7553      * the given index in the {@code CharSequence} is in the
7554      * high-surrogate range, the following index is less than the
7555      * length of the {@code CharSequence}, and the
7556      * {@code char} value at the following index is in the
7557      * low-surrogate range, then the supplementary code point
7558      * corresponding to this surrogate pair is returned. Otherwise,
7559      * the {@code char} value at the given index is returned.
7560      *
7561      * @param seq a sequence of {@code char} values (Unicode code
7562      * units)
7563      * @param index the index to the {@code char} values (Unicode
7564      * code units) in {@code seq} to be converted
7565      * @return the Unicode code point at the given index
7566      * @exception NullPointerException if {@code seq} is null.
7567      * @exception IndexOutOfBoundsException if the value
7568      * {@code index} is negative or not less than
7569      * {@link CharSequence#length() seq.length()}.
7570      * @since  1.5
7571      */
7572     public static int codePointAt(CharSequence seq, int index) {
7573         char c1 = seq.charAt(index);
7574         if (isHighSurrogate(c1) && ++index < seq.length()) {
7575             char c2 = seq.charAt(index);
7576             if (isLowSurrogate(c2)) {
7577                 return toCodePoint(c1, c2);
7578             }
7579         }
7580         return c1;
7581     }
7582 
7583     /**
7584      * Returns the code point at the given index of the
7585      * {@code char} array. If the {@code char} value at
7586      * the given index in the {@code char} array is in the
7587      * high-surrogate range, the following index is less than the
7588      * length of the {@code char} array, and the
7589      * {@code char} value at the following index is in the
7590      * low-surrogate range, then the supplementary code point
7591      * corresponding to this surrogate pair is returned. Otherwise,
7592      * the {@code char} value at the given index is returned.
7593      *
7594      * @param a the {@code char} array
7595      * @param index the index to the {@code char} values (Unicode
7596      * code units) in the {@code char} array to be converted
7597      * @return the Unicode code point at the given index
7598      * @exception NullPointerException if {@code a} is null.
7599      * @exception IndexOutOfBoundsException if the value
7600      * {@code index} is negative or not less than
7601      * the length of the {@code char} array.
7602      * @since  1.5
7603      */
7604     public static int codePointAt(char[] a, int index) {
7605         return codePointAtImpl(a, index, a.length);
7606     }
7607 
7608     /**
7609      * Returns the code point at the given index of the
7610      * {@code char} array, where only array elements with
7611      * {@code index} less than {@code limit} can be used. If
7612      * the {@code char} value at the given index in the
7613      * {@code char} array is in the high-surrogate range, the
7614      * following index is less than the {@code limit}, and the
7615      * {@code char} value at the following index is in the
7616      * low-surrogate range, then the supplementary code point
7617      * corresponding to this surrogate pair is returned. Otherwise,
7618      * the {@code char} value at the given index is returned.
7619      *
7620      * @param a the {@code char} array
7621      * @param index the index to the {@code char} values (Unicode
7622      * code units) in the {@code char} array to be converted
7623      * @param limit the index after the last array element that
7624      * can be used in the {@code char} array
7625      * @return the Unicode code point at the given index
7626      * @exception NullPointerException if {@code a} is null.
7627      * @exception IndexOutOfBoundsException if the {@code index}
7628      * argument is negative or not less than the {@code limit}
7629      * argument, or if the {@code limit} argument is negative or
7630      * greater than the length of the {@code char} array.
7631      * @since  1.5
7632      */
7633     public static int codePointAt(char[] a, int index, int limit) {
7634         if (index >= limit || limit < 0 || limit > a.length) {
7635             throw new IndexOutOfBoundsException();
7636         }
7637         return codePointAtImpl(a, index, limit);
7638     }
7639 
7640     // throws ArrayIndexOutOfBoundsException if index out of bounds
7641     static int codePointAtImpl(char[] a, int index, int limit) {
7642         char c1 = a[index];
7643         if (isHighSurrogate(c1) && ++index < limit) {
7644             char c2 = a[index];
7645             if (isLowSurrogate(c2)) {
7646                 return toCodePoint(c1, c2);
7647             }
7648         }
7649         return c1;
7650     }
7651 
7652     /**
7653      * Returns the code point preceding the given index of the
7654      * {@code CharSequence}. If the {@code char} value at
7655      * {@code (index - 1)} in the {@code CharSequence} is in
7656      * the low-surrogate range, {@code (index - 2)} is not
7657      * negative, and the {@code char} value at {@code (index - 2)}
7658      * in the {@code CharSequence} is in the
7659      * high-surrogate range, then the supplementary code point
7660      * corresponding to this surrogate pair is returned. Otherwise,
7661      * the {@code char} value at {@code (index - 1)} is
7662      * returned.
7663      *
7664      * @param seq the {@code CharSequence} instance
7665      * @param index the index following the code point that should be returned
7666      * @return the Unicode code point value before the given index.
7667      * @exception NullPointerException if {@code seq} is null.
7668      * @exception IndexOutOfBoundsException if the {@code index}
7669      * argument is less than 1 or greater than {@link
7670      * CharSequence#length() seq.length()}.
7671      * @since  1.5
7672      */
7673     public static int codePointBefore(CharSequence seq, int index) {
7674         char c2 = seq.charAt(--index);
7675         if (isLowSurrogate(c2) && index > 0) {
7676             char c1 = seq.charAt(--index);
7677             if (isHighSurrogate(c1)) {
7678                 return toCodePoint(c1, c2);
7679             }
7680         }
7681         return c2;
7682     }
7683 
7684     /**
7685      * Returns the code point preceding the given index of the
7686      * {@code char} array. If the {@code char} value at
7687      * {@code (index - 1)} in the {@code char} array is in
7688      * the low-surrogate range, {@code (index - 2)} is not
7689      * negative, and the {@code char} value at {@code (index - 2)}
7690      * in the {@code char} array is in the
7691      * high-surrogate range, then the supplementary code point
7692      * corresponding to this surrogate pair is returned. Otherwise,
7693      * the {@code char} value at {@code (index - 1)} is
7694      * returned.
7695      *
7696      * @param a the {@code char} array
7697      * @param index the index following the code point that should be returned
7698      * @return the Unicode code point value before the given index.
7699      * @exception NullPointerException if {@code a} is null.
7700      * @exception IndexOutOfBoundsException if the {@code index}
7701      * argument is less than 1 or greater than the length of the
7702      * {@code char} array
7703      * @since  1.5
7704      */
7705     public static int codePointBefore(char[] a, int index) {
7706         return codePointBeforeImpl(a, index, 0);
7707     }
7708 
7709     /**
7710      * Returns the code point preceding the given index of the
7711      * {@code char} array, where only array elements with
7712      * {@code index} greater than or equal to {@code start}
7713      * can be used. If the {@code char} value at {@code (index - 1)}
7714      * in the {@code char} array is in the
7715      * low-surrogate range, {@code (index - 2)} is not less than
7716      * {@code start}, and the {@code char} value at
7717      * {@code (index - 2)} in the {@code char} array is in
7718      * the high-surrogate range, then the supplementary code point
7719      * corresponding to this surrogate pair is returned. Otherwise,
7720      * the {@code char} value at {@code (index - 1)} is
7721      * returned.
7722      *
7723      * @param a the {@code char} array
7724      * @param index the index following the code point that should be returned
7725      * @param start the index of the first array element in the
7726      * {@code char} array
7727      * @return the Unicode code point value before the given index.
7728      * @exception NullPointerException if {@code a} is null.
7729      * @exception IndexOutOfBoundsException if the {@code index}
7730      * argument is not greater than the {@code start} argument or
7731      * is greater than the length of the {@code char} array, or
7732      * if the {@code start} argument is negative or not less than
7733      * the length of the {@code char} array.
7734      * @since  1.5
7735      */
7736     public static int codePointBefore(char[] a, int index, int start) {
7737         if (index <= start || start < 0 || start >= a.length) {
7738             throw new IndexOutOfBoundsException();
7739         }
7740         return codePointBeforeImpl(a, index, start);
7741     }
7742 
7743     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
7744     static int codePointBeforeImpl(char[] a, int index, int start) {
7745         char c2 = a[--index];
7746         if (isLowSurrogate(c2) && index > start) {
7747             char c1 = a[--index];
7748             if (isHighSurrogate(c1)) {
7749                 return toCodePoint(c1, c2);
7750             }
7751         }
7752         return c2;
7753     }
7754 
7755     /**
7756      * Returns the leading surrogate (a
7757      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
7758      * high surrogate code unit</a>) of the
7759      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
7760      * surrogate pair</a>
7761      * representing the specified supplementary character (Unicode
7762      * code point) in the UTF-16 encoding.  If the specified character
7763      * is not a
7764      * <a href="Character.html#supplementary">supplementary character</a>,
7765      * an unspecified {@code char} is returned.
7766      *
7767      * <p>If
7768      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
7769      * is {@code true}, then
7770      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
7771      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
7772      * are also always {@code true}.
7773      *
7774      * @param   codePoint a supplementary character (Unicode code point)
7775      * @return  the leading surrogate code unit used to represent the
7776      *          character in the UTF-16 encoding
7777      * @since   1.7
7778      */
7779     public static char highSurrogate(int codePoint) {
7780         return (char) ((codePoint >>> 10)
7781             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
7782     }
7783 
7784     /**
7785      * Returns the trailing surrogate (a
7786      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
7787      * low surrogate code unit</a>) of the
7788      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
7789      * surrogate pair</a>
7790      * representing the specified supplementary character (Unicode
7791      * code point) in the UTF-16 encoding.  If the specified character
7792      * is not a
7793      * <a href="Character.html#supplementary">supplementary character</a>,
7794      * an unspecified {@code char} is returned.
7795      *
7796      * <p>If
7797      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
7798      * is {@code true}, then
7799      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
7800      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
7801      * are also always {@code true}.
7802      *
7803      * @param   codePoint a supplementary character (Unicode code point)
7804      * @return  the trailing surrogate code unit used to represent the
7805      *          character in the UTF-16 encoding
7806      * @since   1.7
7807      */
7808     public static char lowSurrogate(int codePoint) {
7809         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
7810     }
7811 
7812     /**
7813      * Converts the specified character (Unicode code point) to its
7814      * UTF-16 representation. If the specified code point is a BMP
7815      * (Basic Multilingual Plane or Plane 0) value, the same value is
7816      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
7817      * specified code point is a supplementary character, its
7818      * surrogate values are stored in {@code dst[dstIndex]}
7819      * (high-surrogate) and {@code dst[dstIndex+1]}
7820      * (low-surrogate), and 2 is returned.
7821      *
7822      * @param  codePoint the character (Unicode code point) to be converted.
7823      * @param  dst an array of {@code char} in which the
7824      * {@code codePoint}'s UTF-16 value is stored.
7825      * @param dstIndex the start index into the {@code dst}
7826      * array where the converted value is stored.
7827      * @return 1 if the code point is a BMP code point, 2 if the
7828      * code point is a supplementary code point.
7829      * @exception IllegalArgumentException if the specified
7830      * {@code codePoint} is not a valid Unicode code point.
7831      * @exception NullPointerException if the specified {@code dst} is null.
7832      * @exception IndexOutOfBoundsException if {@code dstIndex}
7833      * is negative or not less than {@code dst.length}, or if
7834      * {@code dst} at {@code dstIndex} doesn't have enough
7835      * array element(s) to store the resulting {@code char}
7836      * value(s). (If {@code dstIndex} is equal to
7837      * {@code dst.length-1} and the specified
7838      * {@code codePoint} is a supplementary character, the
7839      * high-surrogate value is not stored in
7840      * {@code dst[dstIndex]}.)
7841      * @since  1.5
7842      */
7843     public static int toChars(int codePoint, char[] dst, int dstIndex) {
7844         if (isBmpCodePoint(codePoint)) {
7845             dst[dstIndex] = (char) codePoint;
7846             return 1;
7847         } else if (isValidCodePoint(codePoint)) {
7848             toSurrogates(codePoint, dst, dstIndex);
7849             return 2;
7850         } else {
7851             throw new IllegalArgumentException();
7852         }
7853     }
7854 
7855     /**
7856      * Converts the specified character (Unicode code point) to its
7857      * UTF-16 representation stored in a {@code char} array. If
7858      * the specified code point is a BMP (Basic Multilingual Plane or
7859      * Plane 0) value, the resulting {@code char} array has
7860      * the same value as {@code codePoint}. If the specified code
7861      * point is a supplementary code point, the resulting
7862      * {@code char} array has the corresponding surrogate pair.
7863      *
7864      * @param  codePoint a Unicode code point
7865      * @return a {@code char} array having
7866      *         {@code codePoint}'s UTF-16 representation.
7867      * @exception IllegalArgumentException if the specified
7868      * {@code codePoint} is not a valid Unicode code point.
7869      * @since  1.5
7870      */
7871     public static char[] toChars(int codePoint) {
7872         if (isBmpCodePoint(codePoint)) {
7873             return new char[] { (char) codePoint };
7874         } else if (isValidCodePoint(codePoint)) {
7875             char[] result = new char[2];
7876             toSurrogates(codePoint, result, 0);
7877             return result;
7878         } else {
7879             throw new IllegalArgumentException();
7880         }
7881     }
7882 
7883     static void toSurrogates(int codePoint, char[] dst, int index) {
7884         // We write elements "backwards" to guarantee all-or-nothing
7885         dst[index+1] = lowSurrogate(codePoint);
7886         dst[index] = highSurrogate(codePoint);
7887     }
7888 
7889     /**
7890      * Returns the number of Unicode code points in the text range of
7891      * the specified char sequence. The text range begins at the
7892      * specified {@code beginIndex} and extends to the
7893      * {@code char} at index {@code endIndex - 1}. Thus the
7894      * length (in {@code char}s) of the text range is
7895      * {@code endIndex-beginIndex}. Unpaired surrogates within
7896      * the text range count as one code point each.
7897      *
7898      * @param seq the char sequence
7899      * @param beginIndex the index to the first {@code char} of
7900      * the text range.
7901      * @param endIndex the index after the last {@code char} of
7902      * the text range.
7903      * @return the number of Unicode code points in the specified text
7904      * range
7905      * @exception NullPointerException if {@code seq} is null.
7906      * @exception IndexOutOfBoundsException if the
7907      * {@code beginIndex} is negative, or {@code endIndex}
7908      * is larger than the length of the given sequence, or
7909      * {@code beginIndex} is larger than {@code endIndex}.
7910      * @since  1.5
7911      */
7912     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
7913         int length = seq.length();
7914         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
7915             throw new IndexOutOfBoundsException();
7916         }
7917         int n = endIndex - beginIndex;
7918         for (int i = beginIndex; i < endIndex; ) {
7919             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
7920                 isLowSurrogate(seq.charAt(i))) {
7921                 n--;
7922                 i++;
7923             }
7924         }
7925         return n;
7926     }
7927 
7928     /**
7929      * Returns the number of Unicode code points in a subarray of the
7930      * {@code char} array argument. The {@code offset}
7931      * argument is the index of the first {@code char} of the
7932      * subarray and the {@code count} argument specifies the
7933      * length of the subarray in {@code char}s. Unpaired
7934      * surrogates within the subarray count as one code point each.
7935      *
7936      * @param a the {@code char} array
7937      * @param offset the index of the first {@code char} in the
7938      * given {@code char} array
7939      * @param count the length of the subarray in {@code char}s
7940      * @return the number of Unicode code points in the specified subarray
7941      * @exception NullPointerException if {@code a} is null.
7942      * @exception IndexOutOfBoundsException if {@code offset} or
7943      * {@code count} is negative, or if {@code offset +
7944      * count} is larger than the length of the given array.
7945      * @since  1.5
7946      */
7947     public static int codePointCount(char[] a, int offset, int count) {
7948         if (count > a.length - offset || offset < 0 || count < 0) {
7949             throw new IndexOutOfBoundsException();
7950         }
7951         return codePointCountImpl(a, offset, count);
7952     }
7953 
7954     static int codePointCountImpl(char[] a, int offset, int count) {
7955         int endIndex = offset + count;
7956         int n = count;
7957         for (int i = offset; i < endIndex; ) {
7958             if (isHighSurrogate(a[i++]) && i < endIndex &&
7959                 isLowSurrogate(a[i])) {
7960                 n--;
7961                 i++;
7962             }
7963         }
7964         return n;
7965     }
7966 
7967     /**
7968      * Returns the index within the given char sequence that is offset
7969      * from the given {@code index} by {@code codePointOffset}
7970      * code points. Unpaired surrogates within the text range given by
7971      * {@code index} and {@code codePointOffset} count as
7972      * one code point each.
7973      *
7974      * @param seq the char sequence
7975      * @param index the index to be offset
7976      * @param codePointOffset the offset in code points
7977      * @return the index within the char sequence
7978      * @exception NullPointerException if {@code seq} is null.
7979      * @exception IndexOutOfBoundsException if {@code index}
7980      *   is negative or larger then the length of the char sequence,
7981      *   or if {@code codePointOffset} is positive and the
7982      *   subsequence starting with {@code index} has fewer than
7983      *   {@code codePointOffset} code points, or if
7984      *   {@code codePointOffset} is negative and the subsequence
7985      *   before {@code index} has fewer than the absolute value
7986      *   of {@code codePointOffset} code points.
7987      * @since 1.5
7988      */
7989     public static int offsetByCodePoints(CharSequence seq, int index,
7990                                          int codePointOffset) {
7991         int length = seq.length();
7992         if (index < 0 || index > length) {
7993             throw new IndexOutOfBoundsException();
7994         }
7995 
7996         int x = index;
7997         if (codePointOffset >= 0) {
7998             int i;
7999             for (i = 0; x < length && i < codePointOffset; i++) {
8000                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8001                     isLowSurrogate(seq.charAt(x))) {
8002                     x++;
8003                 }
8004             }
8005             if (i < codePointOffset) {
8006                 throw new IndexOutOfBoundsException();
8007             }
8008         } else {
8009             int i;
8010             for (i = codePointOffset; x > 0 && i < 0; i++) {
8011                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8012                     isHighSurrogate(seq.charAt(x-1))) {
8013                     x--;
8014                 }
8015             }
8016             if (i < 0) {
8017                 throw new IndexOutOfBoundsException();
8018             }
8019         }
8020         return x;
8021     }
8022 
8023     /**
8024      * Returns the index within the given {@code char} subarray
8025      * that is offset from the given {@code index} by
8026      * {@code codePointOffset} code points. The
8027      * {@code start} and {@code count} arguments specify a
8028      * subarray of the {@code char} array. Unpaired surrogates
8029      * within the text range given by {@code index} and
8030      * {@code codePointOffset} count as one code point each.
8031      *
8032      * @param a the {@code char} array
8033      * @param start the index of the first {@code char} of the
8034      * subarray
8035      * @param count the length of the subarray in {@code char}s
8036      * @param index the index to be offset
8037      * @param codePointOffset the offset in code points
8038      * @return the index within the subarray
8039      * @exception NullPointerException if {@code a} is null.
8040      * @exception IndexOutOfBoundsException
8041      *   if {@code start} or {@code count} is negative,
8042      *   or if {@code start + count} is larger than the length of
8043      *   the given array,
8044      *   or if {@code index} is less than {@code start} or
8045      *   larger then {@code start + count},
8046      *   or if {@code codePointOffset} is positive and the text range
8047      *   starting with {@code index} and ending with {@code start + count - 1}
8048      *   has fewer than {@code codePointOffset} code
8049      *   points,
8050      *   or if {@code codePointOffset} is negative and the text range
8051      *   starting with {@code start} and ending with {@code index - 1}
8052      *   has fewer than the absolute value of
8053      *   {@code codePointOffset} code points.
8054      * @since 1.5
8055      */
8056     public static int offsetByCodePoints(char[] a, int start, int count,
8057                                          int index, int codePointOffset) {
8058         if (count > a.length-start || start < 0 || count < 0
8059             || index < start || index > start+count) {
8060             throw new IndexOutOfBoundsException();
8061         }
8062         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8063     }
8064 
8065     static int offsetByCodePointsImpl(char[]a, int start, int count,
8066                                       int index, int codePointOffset) {
8067         int x = index;
8068         if (codePointOffset >= 0) {
8069             int limit = start + count;
8070             int i;
8071             for (i = 0; x < limit && i < codePointOffset; i++) {
8072                 if (isHighSurrogate(a[x++]) && x < limit &&
8073                     isLowSurrogate(a[x])) {
8074                     x++;
8075                 }
8076             }
8077             if (i < codePointOffset) {
8078                 throw new IndexOutOfBoundsException();
8079             }
8080         } else {
8081             int i;
8082             for (i = codePointOffset; x > start && i < 0; i++) {
8083                 if (isLowSurrogate(a[--x]) && x > start &&
8084                     isHighSurrogate(a[x-1])) {
8085                     x--;
8086                 }
8087             }
8088             if (i < 0) {
8089                 throw new IndexOutOfBoundsException();
8090             }
8091         }
8092         return x;
8093     }
8094 
8095     /**
8096      * Determines if the specified character is a lowercase character.
8097      * <p>
8098      * A character is lowercase if its general category type, provided
8099      * by {@code Character.getType(ch)}, is
8100      * {@code LOWERCASE_LETTER}, or it has contributory property
8101      * Other_Lowercase as defined by the Unicode Standard.
8102      * <p>
8103      * The following are examples of lowercase characters:
8104      * <blockquote><pre>
8105      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8106      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8107      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8108      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8109      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8110      * </pre></blockquote>
8111      * <p> Many other Unicode characters are lowercase too.
8112      *
8113      * <p><b>Note:</b> This method cannot handle <a
8114      * href="#supplementary"> supplementary characters</a>. To support
8115      * all Unicode characters, including supplementary characters, use
8116      * the {@link #isLowerCase(int)} method.
8117      *
8118      * @param   ch   the character to be tested.
8119      * @return  {@code true} if the character is lowercase;
8120      *          {@code false} otherwise.
8121      * @see     Character#isLowerCase(char)
8122      * @see     Character#isTitleCase(char)
8123      * @see     Character#toLowerCase(char)
8124      * @see     Character#getType(char)
8125      */
8126     public static boolean isLowerCase(char ch) {
8127         return isLowerCase((int)ch);
8128     }
8129 
8130     /**
8131      * Determines if the specified character (Unicode code point) is a
8132      * lowercase character.
8133      * <p>
8134      * A character is lowercase if its general category type, provided
8135      * by {@link Character#getType getType(codePoint)}, is
8136      * {@code LOWERCASE_LETTER}, or it has contributory property
8137      * Other_Lowercase as defined by the Unicode Standard.
8138      * <p>
8139      * The following are examples of lowercase characters:
8140      * <blockquote><pre>
8141      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8142      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8143      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8144      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8145      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8146      * </pre></blockquote>
8147      * <p> Many other Unicode characters are lowercase too.
8148      *
8149      * @param   codePoint the character (Unicode code point) to be tested.
8150      * @return  {@code true} if the character is lowercase;
8151      *          {@code false} otherwise.
8152      * @see     Character#isLowerCase(int)
8153      * @see     Character#isTitleCase(int)
8154      * @see     Character#toLowerCase(int)
8155      * @see     Character#getType(int)
8156      * @since   1.5
8157      */
8158     public static boolean isLowerCase(int codePoint) {
8159         return getType(codePoint) == Character.LOWERCASE_LETTER ||
8160                CharacterData.of(codePoint).isOtherLowercase(codePoint);
8161     }
8162 
8163     /**
8164      * Determines if the specified character is an uppercase character.
8165      * <p>
8166      * A character is uppercase if its general category type, provided by
8167      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
8168      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8169      * <p>
8170      * The following are examples of uppercase characters:
8171      * <blockquote><pre>
8172      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8173      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8174      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8175      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8176      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8177      * </pre></blockquote>
8178      * <p> Many other Unicode characters are uppercase too.
8179      *
8180      * <p><b>Note:</b> This method cannot handle <a
8181      * href="#supplementary"> supplementary characters</a>. To support
8182      * all Unicode characters, including supplementary characters, use
8183      * the {@link #isUpperCase(int)} method.
8184      *
8185      * @param   ch   the character to be tested.
8186      * @return  {@code true} if the character is uppercase;
8187      *          {@code false} otherwise.
8188      * @see     Character#isLowerCase(char)
8189      * @see     Character#isTitleCase(char)
8190      * @see     Character#toUpperCase(char)
8191      * @see     Character#getType(char)
8192      * @since   1.0
8193      */
8194     public static boolean isUpperCase(char ch) {
8195         return isUpperCase((int)ch);
8196     }
8197 
8198     /**
8199      * Determines if the specified character (Unicode code point) is an uppercase character.
8200      * <p>
8201      * A character is uppercase if its general category type, provided by
8202      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
8203      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8204      * <p>
8205      * The following are examples of uppercase characters:
8206      * <blockquote><pre>
8207      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8208      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8209      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8210      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8211      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8212      * </pre></blockquote>
8213      * <p> Many other Unicode characters are uppercase too.
8214      *
8215      * @param   codePoint the character (Unicode code point) to be tested.
8216      * @return  {@code true} if the character is uppercase;
8217      *          {@code false} otherwise.
8218      * @see     Character#isLowerCase(int)
8219      * @see     Character#isTitleCase(int)
8220      * @see     Character#toUpperCase(int)
8221      * @see     Character#getType(int)
8222      * @since   1.5
8223      */
8224     public static boolean isUpperCase(int codePoint) {
8225         return getType(codePoint) == Character.UPPERCASE_LETTER ||
8226                CharacterData.of(codePoint).isOtherUppercase(codePoint);
8227     }
8228 
8229     /**
8230      * Determines if the specified character is a titlecase character.
8231      * <p>
8232      * A character is a titlecase character if its general
8233      * category type, provided by {@code Character.getType(ch)},
8234      * is {@code TITLECASE_LETTER}.
8235      * <p>
8236      * Some characters look like pairs of Latin letters. For example, there
8237      * is an uppercase letter that looks like "LJ" and has a corresponding
8238      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8239      * is the appropriate form to use when rendering a word in lowercase
8240      * with initial capitals, as for a book title.
8241      * <p>
8242      * These are some of the Unicode characters for which this method returns
8243      * {@code true}:
8244      * <ul>
8245      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8246      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8247      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8248      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8249      * </ul>
8250      * <p> Many other Unicode characters are titlecase too.
8251      *
8252      * <p><b>Note:</b> This method cannot handle <a
8253      * href="#supplementary"> supplementary characters</a>. To support
8254      * all Unicode characters, including supplementary characters, use
8255      * the {@link #isTitleCase(int)} method.
8256      *
8257      * @param   ch   the character to be tested.
8258      * @return  {@code true} if the character is titlecase;
8259      *          {@code false} otherwise.
8260      * @see     Character#isLowerCase(char)
8261      * @see     Character#isUpperCase(char)
8262      * @see     Character#toTitleCase(char)
8263      * @see     Character#getType(char)
8264      * @since   1.0.2
8265      */
8266     public static boolean isTitleCase(char ch) {
8267         return isTitleCase((int)ch);
8268     }
8269 
8270     /**
8271      * Determines if the specified character (Unicode code point) is a titlecase character.
8272      * <p>
8273      * A character is a titlecase character if its general
8274      * category type, provided by {@link Character#getType(int) getType(codePoint)},
8275      * is {@code TITLECASE_LETTER}.
8276      * <p>
8277      * Some characters look like pairs of Latin letters. For example, there
8278      * is an uppercase letter that looks like "LJ" and has a corresponding
8279      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8280      * is the appropriate form to use when rendering a word in lowercase
8281      * with initial capitals, as for a book title.
8282      * <p>
8283      * These are some of the Unicode characters for which this method returns
8284      * {@code true}:
8285      * <ul>
8286      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8287      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8288      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8289      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8290      * </ul>
8291      * <p> Many other Unicode characters are titlecase too.
8292      *
8293      * @param   codePoint the character (Unicode code point) to be tested.
8294      * @return  {@code true} if the character is titlecase;
8295      *          {@code false} otherwise.
8296      * @see     Character#isLowerCase(int)
8297      * @see     Character#isUpperCase(int)
8298      * @see     Character#toTitleCase(int)
8299      * @see     Character#getType(int)
8300      * @since   1.5
8301      */
8302     public static boolean isTitleCase(int codePoint) {
8303         return getType(codePoint) == Character.TITLECASE_LETTER;
8304     }
8305 
8306     /**
8307      * Determines if the specified character is a digit.
8308      * <p>
8309      * A character is a digit if its general category type, provided
8310      * by {@code Character.getType(ch)}, is
8311      * {@code DECIMAL_DIGIT_NUMBER}.
8312      * <p>
8313      * Some Unicode character ranges that contain digits:
8314      * <ul>
8315      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
8316      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
8317      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
8318      *     Arabic-Indic digits
8319      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
8320      *     Extended Arabic-Indic digits
8321      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
8322      *     Devanagari digits
8323      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
8324      *     Fullwidth digits
8325      * </ul>
8326      *
8327      * Many other character ranges contain digits as well.
8328      *
8329      * <p><b>Note:</b> This method cannot handle <a
8330      * href="#supplementary"> supplementary characters</a>. To support
8331      * all Unicode characters, including supplementary characters, use
8332      * the {@link #isDigit(int)} method.
8333      *
8334      * @param   ch   the character to be tested.
8335      * @return  {@code true} if the character is a digit;
8336      *          {@code false} otherwise.
8337      * @see     Character#digit(char, int)
8338      * @see     Character#forDigit(int, int)
8339      * @see     Character#getType(char)
8340      */
8341     public static boolean isDigit(char ch) {
8342         return isDigit((int)ch);
8343     }
8344 
8345     /**
8346      * Determines if the specified character (Unicode code point) is a digit.
8347      * <p>
8348      * A character is a digit if its general category type, provided
8349      * by {@link Character#getType(int) getType(codePoint)}, is
8350      * {@code DECIMAL_DIGIT_NUMBER}.
8351      * <p>
8352      * Some Unicode character ranges that contain digits:
8353      * <ul>
8354      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
8355      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
8356      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
8357      *     Arabic-Indic digits
8358      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
8359      *     Extended Arabic-Indic digits
8360      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
8361      *     Devanagari digits
8362      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
8363      *     Fullwidth digits
8364      * </ul>
8365      *
8366      * Many other character ranges contain digits as well.
8367      *
8368      * @param   codePoint the character (Unicode code point) to be tested.
8369      * @return  {@code true} if the character is a digit;
8370      *          {@code false} otherwise.
8371      * @see     Character#forDigit(int, int)
8372      * @see     Character#getType(int)
8373      * @since   1.5
8374      */
8375     public static boolean isDigit(int codePoint) {
8376         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
8377     }
8378 
8379     /**
8380      * Determines if a character is defined in Unicode.
8381      * <p>
8382      * A character is defined if at least one of the following is true:
8383      * <ul>
8384      * <li>It has an entry in the UnicodeData file.
8385      * <li>It has a value in a range defined by the UnicodeData file.
8386      * </ul>
8387      *
8388      * <p><b>Note:</b> This method cannot handle <a
8389      * href="#supplementary"> supplementary characters</a>. To support
8390      * all Unicode characters, including supplementary characters, use
8391      * the {@link #isDefined(int)} method.
8392      *
8393      * @param   ch   the character to be tested
8394      * @return  {@code true} if the character has a defined meaning
8395      *          in Unicode; {@code false} otherwise.
8396      * @see     Character#isDigit(char)
8397      * @see     Character#isLetter(char)
8398      * @see     Character#isLetterOrDigit(char)
8399      * @see     Character#isLowerCase(char)
8400      * @see     Character#isTitleCase(char)
8401      * @see     Character#isUpperCase(char)
8402      * @since   1.0.2
8403      */
8404     public static boolean isDefined(char ch) {
8405         return isDefined((int)ch);
8406     }
8407 
8408     /**
8409      * Determines if a character (Unicode code point) is defined in Unicode.
8410      * <p>
8411      * A character is defined if at least one of the following is true:
8412      * <ul>
8413      * <li>It has an entry in the UnicodeData file.
8414      * <li>It has a value in a range defined by the UnicodeData file.
8415      * </ul>
8416      *
8417      * @param   codePoint the character (Unicode code point) to be tested.
8418      * @return  {@code true} if the character has a defined meaning
8419      *          in Unicode; {@code false} otherwise.
8420      * @see     Character#isDigit(int)
8421      * @see     Character#isLetter(int)
8422      * @see     Character#isLetterOrDigit(int)
8423      * @see     Character#isLowerCase(int)
8424      * @see     Character#isTitleCase(int)
8425      * @see     Character#isUpperCase(int)
8426      * @since   1.5
8427      */
8428     public static boolean isDefined(int codePoint) {
8429         return getType(codePoint) != Character.UNASSIGNED;
8430     }
8431 
8432     /**
8433      * Determines if the specified character is a letter.
8434      * <p>
8435      * A character is considered to be a letter if its general
8436      * category type, provided by {@code Character.getType(ch)},
8437      * is any of the following:
8438      * <ul>
8439      * <li> {@code UPPERCASE_LETTER}
8440      * <li> {@code LOWERCASE_LETTER}
8441      * <li> {@code TITLECASE_LETTER}
8442      * <li> {@code MODIFIER_LETTER}
8443      * <li> {@code OTHER_LETTER}
8444      * </ul>
8445      *
8446      * Not all letters have case. Many characters are
8447      * letters but are neither uppercase nor lowercase nor titlecase.
8448      *
8449      * <p><b>Note:</b> This method cannot handle <a
8450      * href="#supplementary"> supplementary characters</a>. To support
8451      * all Unicode characters, including supplementary characters, use
8452      * the {@link #isLetter(int)} method.
8453      *
8454      * @param   ch   the character to be tested.
8455      * @return  {@code true} if the character is a letter;
8456      *          {@code false} otherwise.
8457      * @see     Character#isDigit(char)
8458      * @see     Character#isJavaIdentifierStart(char)
8459      * @see     Character#isJavaLetter(char)
8460      * @see     Character#isJavaLetterOrDigit(char)
8461      * @see     Character#isLetterOrDigit(char)
8462      * @see     Character#isLowerCase(char)
8463      * @see     Character#isTitleCase(char)
8464      * @see     Character#isUnicodeIdentifierStart(char)
8465      * @see     Character#isUpperCase(char)
8466      */
8467     public static boolean isLetter(char ch) {
8468         return isLetter((int)ch);
8469     }
8470 
8471     /**
8472      * Determines if the specified character (Unicode code point) is a letter.
8473      * <p>
8474      * A character is considered to be a letter if its general
8475      * category type, provided by {@link Character#getType(int) getType(codePoint)},
8476      * is any of the following:
8477      * <ul>
8478      * <li> {@code UPPERCASE_LETTER}
8479      * <li> {@code LOWERCASE_LETTER}
8480      * <li> {@code TITLECASE_LETTER}
8481      * <li> {@code MODIFIER_LETTER}
8482      * <li> {@code OTHER_LETTER}
8483      * </ul>
8484      *
8485      * Not all letters have case. Many characters are
8486      * letters but are neither uppercase nor lowercase nor titlecase.
8487      *
8488      * @param   codePoint the character (Unicode code point) to be tested.
8489      * @return  {@code true} if the character is a letter;
8490      *          {@code false} otherwise.
8491      * @see     Character#isDigit(int)
8492      * @see     Character#isJavaIdentifierStart(int)
8493      * @see     Character#isLetterOrDigit(int)
8494      * @see     Character#isLowerCase(int)
8495      * @see     Character#isTitleCase(int)
8496      * @see     Character#isUnicodeIdentifierStart(int)
8497      * @see     Character#isUpperCase(int)
8498      * @since   1.5
8499      */
8500     public static boolean isLetter(int codePoint) {
8501         return ((((1 << Character.UPPERCASE_LETTER) |
8502             (1 << Character.LOWERCASE_LETTER) |
8503             (1 << Character.TITLECASE_LETTER) |
8504             (1 << Character.MODIFIER_LETTER) |
8505             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
8506             != 0;
8507     }
8508 
8509     /**
8510      * Determines if the specified character is a letter or digit.
8511      * <p>
8512      * A character is considered to be a letter or digit if either
8513      * {@code Character.isLetter(char ch)} or
8514      * {@code Character.isDigit(char ch)} returns
8515      * {@code true} for the character.
8516      *
8517      * <p><b>Note:</b> This method cannot handle <a
8518      * href="#supplementary"> supplementary characters</a>. To support
8519      * all Unicode characters, including supplementary characters, use
8520      * the {@link #isLetterOrDigit(int)} method.
8521      *
8522      * @param   ch   the character to be tested.
8523      * @return  {@code true} if the character is a letter or digit;
8524      *          {@code false} otherwise.
8525      * @see     Character#isDigit(char)
8526      * @see     Character#isJavaIdentifierPart(char)
8527      * @see     Character#isJavaLetter(char)
8528      * @see     Character#isJavaLetterOrDigit(char)
8529      * @see     Character#isLetter(char)
8530      * @see     Character#isUnicodeIdentifierPart(char)
8531      * @since   1.0.2
8532      */
8533     public static boolean isLetterOrDigit(char ch) {
8534         return isLetterOrDigit((int)ch);
8535     }
8536 
8537     /**
8538      * Determines if the specified character (Unicode code point) is a letter or digit.
8539      * <p>
8540      * A character is considered to be a letter or digit if either
8541      * {@link #isLetter(int) isLetter(codePoint)} or
8542      * {@link #isDigit(int) isDigit(codePoint)} returns
8543      * {@code true} for the character.
8544      *
8545      * @param   codePoint the character (Unicode code point) to be tested.
8546      * @return  {@code true} if the character is a letter or digit;
8547      *          {@code false} otherwise.
8548      * @see     Character#isDigit(int)
8549      * @see     Character#isJavaIdentifierPart(int)
8550      * @see     Character#isLetter(int)
8551      * @see     Character#isUnicodeIdentifierPart(int)
8552      * @since   1.5
8553      */
8554     public static boolean isLetterOrDigit(int codePoint) {
8555         return ((((1 << Character.UPPERCASE_LETTER) |
8556             (1 << Character.LOWERCASE_LETTER) |
8557             (1 << Character.TITLECASE_LETTER) |
8558             (1 << Character.MODIFIER_LETTER) |
8559             (1 << Character.OTHER_LETTER) |
8560             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
8561             != 0;
8562     }
8563 
8564     /**
8565      * Determines if the specified character is permissible as the first
8566      * character in a Java identifier.
8567      * <p>
8568      * A character may start a Java identifier if and only if
8569      * one of the following is true:
8570      * <ul>
8571      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
8572      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
8573      * <li> {@code ch} is a currency symbol (such as {@code '$'})
8574      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
8575      * </ul>
8576      *
8577      * @param   ch the character to be tested.
8578      * @return  {@code true} if the character may start a Java
8579      *          identifier; {@code false} otherwise.
8580      * @see     Character#isJavaLetterOrDigit(char)
8581      * @see     Character#isJavaIdentifierStart(char)
8582      * @see     Character#isJavaIdentifierPart(char)
8583      * @see     Character#isLetter(char)
8584      * @see     Character#isLetterOrDigit(char)
8585      * @see     Character#isUnicodeIdentifierStart(char)
8586      * @since   1.0.2
8587      * @deprecated Replaced by isJavaIdentifierStart(char).
8588      */
8589     @Deprecated
8590     public static boolean isJavaLetter(char ch) {
8591         return isJavaIdentifierStart(ch);
8592     }
8593 
8594     /**
8595      * Determines if the specified character may be part of a Java
8596      * identifier as other than the first character.
8597      * <p>
8598      * A character may be part of a Java identifier if and only if any
8599      * of the following are true:
8600      * <ul>
8601      * <li>  it is a letter
8602      * <li>  it is a currency symbol (such as {@code '$'})
8603      * <li>  it is a connecting punctuation character (such as {@code '_'})
8604      * <li>  it is a digit
8605      * <li>  it is a numeric letter (such as a Roman numeral character)
8606      * <li>  it is a combining mark
8607      * <li>  it is a non-spacing mark
8608      * <li> {@code isIdentifierIgnorable} returns
8609      * {@code true} for the character.
8610      * </ul>
8611      *
8612      * @param   ch the character to be tested.
8613      * @return  {@code true} if the character may be part of a
8614      *          Java identifier; {@code false} otherwise.
8615      * @see     Character#isJavaLetter(char)
8616      * @see     Character#isJavaIdentifierStart(char)
8617      * @see     Character#isJavaIdentifierPart(char)
8618      * @see     Character#isLetter(char)
8619      * @see     Character#isLetterOrDigit(char)
8620      * @see     Character#isUnicodeIdentifierPart(char)
8621      * @see     Character#isIdentifierIgnorable(char)
8622      * @since   1.0.2
8623      * @deprecated Replaced by isJavaIdentifierPart(char).
8624      */
8625     @Deprecated
8626     public static boolean isJavaLetterOrDigit(char ch) {
8627         return isJavaIdentifierPart(ch);
8628     }
8629 
8630     /**
8631      * Determines if the specified character (Unicode code point) is an alphabet.
8632      * <p>
8633      * A character is considered to be alphabetic if its general category type,
8634      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
8635      * the following:
8636      * <ul>
8637      * <li> <code>UPPERCASE_LETTER</code>
8638      * <li> <code>LOWERCASE_LETTER</code>
8639      * <li> <code>TITLECASE_LETTER</code>
8640      * <li> <code>MODIFIER_LETTER</code>
8641      * <li> <code>OTHER_LETTER</code>
8642      * <li> <code>LETTER_NUMBER</code>
8643      * </ul>
8644      * or it has contributory property Other_Alphabetic as defined by the
8645      * Unicode Standard.
8646      *
8647      * @param   codePoint the character (Unicode code point) to be tested.
8648      * @return  <code>true</code> if the character is a Unicode alphabet
8649      *          character, <code>false</code> otherwise.
8650      * @since   1.7
8651      */
8652     public static boolean isAlphabetic(int codePoint) {
8653         return (((((1 << Character.UPPERCASE_LETTER) |
8654             (1 << Character.LOWERCASE_LETTER) |
8655             (1 << Character.TITLECASE_LETTER) |
8656             (1 << Character.MODIFIER_LETTER) |
8657             (1 << Character.OTHER_LETTER) |
8658             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
8659             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
8660     }
8661 
8662     /**
8663      * Determines if the specified character (Unicode code point) is a CJKV
8664      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
8665      * the Unicode Standard.
8666      *
8667      * @param   codePoint the character (Unicode code point) to be tested.
8668      * @return  <code>true</code> if the character is a Unicode ideograph
8669      *          character, <code>false</code> otherwise.
8670      * @since   1.7
8671      */
8672     public static boolean isIdeographic(int codePoint) {
8673         return CharacterData.of(codePoint).isIdeographic(codePoint);
8674     }
8675 
8676     /**
8677      * Determines if the specified character is
8678      * permissible as the first character in a Java identifier.
8679      * <p>
8680      * A character may start a Java identifier if and only if
8681      * one of the following conditions is true:
8682      * <ul>
8683      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
8684      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
8685      * <li> {@code ch} is a currency symbol (such as {@code '$'})
8686      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
8687      * </ul>
8688      *
8689      * <p><b>Note:</b> This method cannot handle <a
8690      * href="#supplementary"> supplementary characters</a>. To support
8691      * all Unicode characters, including supplementary characters, use
8692      * the {@link #isJavaIdentifierStart(int)} method.
8693      *
8694      * @param   ch the character to be tested.
8695      * @return  {@code true} if the character may start a Java identifier;
8696      *          {@code false} otherwise.
8697      * @see     Character#isJavaIdentifierPart(char)
8698      * @see     Character#isLetter(char)
8699      * @see     Character#isUnicodeIdentifierStart(char)
8700      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
8701      * @since   1.1
8702      */
8703     public static boolean isJavaIdentifierStart(char ch) {
8704         return isJavaIdentifierStart((int)ch);
8705     }
8706 
8707     /**
8708      * Determines if the character (Unicode code point) is
8709      * permissible as the first character in a Java identifier.
8710      * <p>
8711      * A character may start a Java identifier if and only if
8712      * one of the following conditions is true:
8713      * <ul>
8714      * <li> {@link #isLetter(int) isLetter(codePoint)}
8715      *      returns {@code true}
8716      * <li> {@link #getType(int) getType(codePoint)}
8717      *      returns {@code LETTER_NUMBER}
8718      * <li> the referenced character is a currency symbol (such as {@code '$'})
8719      * <li> the referenced character is a connecting punctuation character
8720      *      (such as {@code '_'}).
8721      * </ul>
8722      *
8723      * @param   codePoint the character (Unicode code point) to be tested.
8724      * @return  {@code true} if the character may start a Java identifier;
8725      *          {@code false} otherwise.
8726      * @see     Character#isJavaIdentifierPart(int)
8727      * @see     Character#isLetter(int)
8728      * @see     Character#isUnicodeIdentifierStart(int)
8729      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
8730      * @since   1.5
8731      */
8732     public static boolean isJavaIdentifierStart(int codePoint) {
8733         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
8734     }
8735 
8736     /**
8737      * Determines if the specified character may be part of a Java
8738      * identifier as other than the first character.
8739      * <p>
8740      * A character may be part of a Java identifier if any of the following
8741      * are true:
8742      * <ul>
8743      * <li>  it is a letter
8744      * <li>  it is a currency symbol (such as {@code '$'})
8745      * <li>  it is a connecting punctuation character (such as {@code '_'})
8746      * <li>  it is a digit
8747      * <li>  it is a numeric letter (such as a Roman numeral character)
8748      * <li>  it is a combining mark
8749      * <li>  it is a non-spacing mark
8750      * <li> {@code isIdentifierIgnorable} returns
8751      * {@code true} for the character
8752      * </ul>
8753      *
8754      * <p><b>Note:</b> This method cannot handle <a
8755      * href="#supplementary"> supplementary characters</a>. To support
8756      * all Unicode characters, including supplementary characters, use
8757      * the {@link #isJavaIdentifierPart(int)} method.
8758      *
8759      * @param   ch      the character to be tested.
8760      * @return {@code true} if the character may be part of a
8761      *          Java identifier; {@code false} otherwise.
8762      * @see     Character#isIdentifierIgnorable(char)
8763      * @see     Character#isJavaIdentifierStart(char)
8764      * @see     Character#isLetterOrDigit(char)
8765      * @see     Character#isUnicodeIdentifierPart(char)
8766      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
8767      * @since   1.1
8768      */
8769     public static boolean isJavaIdentifierPart(char ch) {
8770         return isJavaIdentifierPart((int)ch);
8771     }
8772 
8773     /**
8774      * Determines if the character (Unicode code point) may be part of a Java
8775      * identifier as other than the first character.
8776      * <p>
8777      * A character may be part of a Java identifier if any of the following
8778      * are true:
8779      * <ul>
8780      * <li>  it is a letter
8781      * <li>  it is a currency symbol (such as {@code '$'})
8782      * <li>  it is a connecting punctuation character (such as {@code '_'})
8783      * <li>  it is a digit
8784      * <li>  it is a numeric letter (such as a Roman numeral character)
8785      * <li>  it is a combining mark
8786      * <li>  it is a non-spacing mark
8787      * <li> {@link #isIdentifierIgnorable(int)
8788      * isIdentifierIgnorable(codePoint)} returns {@code true} for
8789      * the character
8790      * </ul>
8791      *
8792      * @param   codePoint the character (Unicode code point) to be tested.
8793      * @return {@code true} if the character may be part of a
8794      *          Java identifier; {@code false} otherwise.
8795      * @see     Character#isIdentifierIgnorable(int)
8796      * @see     Character#isJavaIdentifierStart(int)
8797      * @see     Character#isLetterOrDigit(int)
8798      * @see     Character#isUnicodeIdentifierPart(int)
8799      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
8800      * @since   1.5
8801      */
8802     public static boolean isJavaIdentifierPart(int codePoint) {
8803         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
8804     }
8805 
8806     /**
8807      * Determines if the specified character is permissible as the
8808      * first character in a Unicode identifier.
8809      * <p>
8810      * A character may start a Unicode identifier if and only if
8811      * one of the following conditions is true:
8812      * <ul>
8813      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
8814      * <li> {@link #getType(char) getType(ch)} returns
8815      *      {@code LETTER_NUMBER}.
8816      * </ul>
8817      *
8818      * <p><b>Note:</b> This method cannot handle <a
8819      * href="#supplementary"> supplementary characters</a>. To support
8820      * all Unicode characters, including supplementary characters, use
8821      * the {@link #isUnicodeIdentifierStart(int)} method.
8822      *
8823      * @param   ch      the character to be tested.
8824      * @return  {@code true} if the character may start a Unicode
8825      *          identifier; {@code false} otherwise.
8826      * @see     Character#isJavaIdentifierStart(char)
8827      * @see     Character#isLetter(char)
8828      * @see     Character#isUnicodeIdentifierPart(char)
8829      * @since   1.1
8830      */
8831     public static boolean isUnicodeIdentifierStart(char ch) {
8832         return isUnicodeIdentifierStart((int)ch);
8833     }
8834 
8835     /**
8836      * Determines if the specified character (Unicode code point) is permissible as the
8837      * first character in a Unicode identifier.
8838      * <p>
8839      * A character may start a Unicode identifier if and only if
8840      * one of the following conditions is true:
8841      * <ul>
8842      * <li> {@link #isLetter(int) isLetter(codePoint)}
8843      *      returns {@code true}
8844      * <li> {@link #getType(int) getType(codePoint)}
8845      *      returns {@code LETTER_NUMBER}.
8846      * </ul>
8847      * @param   codePoint the character (Unicode code point) to be tested.
8848      * @return  {@code true} if the character may start a Unicode
8849      *          identifier; {@code false} otherwise.
8850      * @see     Character#isJavaIdentifierStart(int)
8851      * @see     Character#isLetter(int)
8852      * @see     Character#isUnicodeIdentifierPart(int)
8853      * @since   1.5
8854      */
8855     public static boolean isUnicodeIdentifierStart(int codePoint) {
8856         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
8857     }
8858 
8859     /**
8860      * Determines if the specified character may be part of a Unicode
8861      * identifier as other than the first character.
8862      * <p>
8863      * A character may be part of a Unicode identifier if and only if
8864      * one of the following statements is true:
8865      * <ul>
8866      * <li>  it is a letter
8867      * <li>  it is a connecting punctuation character (such as {@code '_'})
8868      * <li>  it is a digit
8869      * <li>  it is a numeric letter (such as a Roman numeral character)
8870      * <li>  it is a combining mark
8871      * <li>  it is a non-spacing mark
8872      * <li> {@code isIdentifierIgnorable} returns
8873      * {@code true} for this character.
8874      * </ul>
8875      *
8876      * <p><b>Note:</b> This method cannot handle <a
8877      * href="#supplementary"> supplementary characters</a>. To support
8878      * all Unicode characters, including supplementary characters, use
8879      * the {@link #isUnicodeIdentifierPart(int)} method.
8880      *
8881      * @param   ch      the character to be tested.
8882      * @return  {@code true} if the character may be part of a
8883      *          Unicode identifier; {@code false} otherwise.
8884      * @see     Character#isIdentifierIgnorable(char)
8885      * @see     Character#isJavaIdentifierPart(char)
8886      * @see     Character#isLetterOrDigit(char)
8887      * @see     Character#isUnicodeIdentifierStart(char)
8888      * @since   1.1
8889      */
8890     public static boolean isUnicodeIdentifierPart(char ch) {
8891         return isUnicodeIdentifierPart((int)ch);
8892     }
8893 
8894     /**
8895      * Determines if the specified character (Unicode code point) may be part of a Unicode
8896      * identifier as other than the first character.
8897      * <p>
8898      * A character may be part of a Unicode identifier if and only if
8899      * one of the following statements is true:
8900      * <ul>
8901      * <li>  it is a letter
8902      * <li>  it is a connecting punctuation character (such as {@code '_'})
8903      * <li>  it is a digit
8904      * <li>  it is a numeric letter (such as a Roman numeral character)
8905      * <li>  it is a combining mark
8906      * <li>  it is a non-spacing mark
8907      * <li> {@code isIdentifierIgnorable} returns
8908      * {@code true} for this character.
8909      * </ul>
8910      * @param   codePoint the character (Unicode code point) to be tested.
8911      * @return  {@code true} if the character may be part of a
8912      *          Unicode identifier; {@code false} otherwise.
8913      * @see     Character#isIdentifierIgnorable(int)
8914      * @see     Character#isJavaIdentifierPart(int)
8915      * @see     Character#isLetterOrDigit(int)
8916      * @see     Character#isUnicodeIdentifierStart(int)
8917      * @since   1.5
8918      */
8919     public static boolean isUnicodeIdentifierPart(int codePoint) {
8920         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
8921     }
8922 
8923     /**
8924      * Determines if the specified character should be regarded as
8925      * an ignorable character in a Java identifier or a Unicode identifier.
8926      * <p>
8927      * The following Unicode characters are ignorable in a Java identifier
8928      * or a Unicode identifier:
8929      * <ul>
8930      * <li>ISO control characters that are not whitespace
8931      * <ul>
8932      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
8933      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
8934      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
8935      * </ul>
8936      *
8937      * <li>all characters that have the {@code FORMAT} general
8938      * category value
8939      * </ul>
8940      *
8941      * <p><b>Note:</b> This method cannot handle <a
8942      * href="#supplementary"> supplementary characters</a>. To support
8943      * all Unicode characters, including supplementary characters, use
8944      * the {@link #isIdentifierIgnorable(int)} method.
8945      *
8946      * @param   ch      the character to be tested.
8947      * @return  {@code true} if the character is an ignorable control
8948      *          character that may be part of a Java or Unicode identifier;
8949      *           {@code false} otherwise.
8950      * @see     Character#isJavaIdentifierPart(char)
8951      * @see     Character#isUnicodeIdentifierPart(char)
8952      * @since   1.1
8953      */
8954     public static boolean isIdentifierIgnorable(char ch) {
8955         return isIdentifierIgnorable((int)ch);
8956     }
8957 
8958     /**
8959      * Determines if the specified character (Unicode code point) should be regarded as
8960      * an ignorable character in a Java identifier or a Unicode identifier.
8961      * <p>
8962      * The following Unicode characters are ignorable in a Java identifier
8963      * or a Unicode identifier:
8964      * <ul>
8965      * <li>ISO control characters that are not whitespace
8966      * <ul>
8967      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
8968      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
8969      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
8970      * </ul>
8971      *
8972      * <li>all characters that have the {@code FORMAT} general
8973      * category value
8974      * </ul>
8975      *
8976      * @param   codePoint the character (Unicode code point) to be tested.
8977      * @return  {@code true} if the character is an ignorable control
8978      *          character that may be part of a Java or Unicode identifier;
8979      *          {@code false} otherwise.
8980      * @see     Character#isJavaIdentifierPart(int)
8981      * @see     Character#isUnicodeIdentifierPart(int)
8982      * @since   1.5
8983      */
8984     public static boolean isIdentifierIgnorable(int codePoint) {
8985         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
8986     }
8987 
8988     /**
8989      * Converts the character argument to lowercase using case
8990      * mapping information from the UnicodeData file.
8991      * <p>
8992      * Note that
8993      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
8994      * does not always return {@code true} for some ranges of
8995      * characters, particularly those that are symbols or ideographs.
8996      *
8997      * <p>In general, {@link String#toLowerCase()} should be used to map
8998      * characters to lowercase. {@code String} case mapping methods
8999      * have several benefits over {@code Character} case mapping methods.
9000      * {@code String} case mapping methods can perform locale-sensitive
9001      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9002      * the {@code Character} case mapping methods cannot.
9003      *
9004      * <p><b>Note:</b> This method cannot handle <a
9005      * href="#supplementary"> supplementary characters</a>. To support
9006      * all Unicode characters, including supplementary characters, use
9007      * the {@link #toLowerCase(int)} method.
9008      *
9009      * @param   ch   the character to be converted.
9010      * @return  the lowercase equivalent of the character, if any;
9011      *          otherwise, the character itself.
9012      * @see     Character#isLowerCase(char)
9013      * @see     String#toLowerCase()
9014      */
9015     public static char toLowerCase(char ch) {
9016         return (char)toLowerCase((int)ch);
9017     }
9018 
9019     /**
9020      * Converts the character (Unicode code point) argument to
9021      * lowercase using case mapping information from the UnicodeData
9022      * file.
9023      *
9024      * <p> Note that
9025      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9026      * does not always return {@code true} for some ranges of
9027      * characters, particularly those that are symbols or ideographs.
9028      *
9029      * <p>In general, {@link String#toLowerCase()} should be used to map
9030      * characters to lowercase. {@code String} case mapping methods
9031      * have several benefits over {@code Character} case mapping methods.
9032      * {@code String} case mapping methods can perform locale-sensitive
9033      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9034      * the {@code Character} case mapping methods cannot.
9035      *
9036      * @param   codePoint   the character (Unicode code point) to be converted.
9037      * @return  the lowercase equivalent of the character (Unicode code
9038      *          point), if any; otherwise, the character itself.
9039      * @see     Character#isLowerCase(int)
9040      * @see     String#toLowerCase()
9041      *
9042      * @since   1.5
9043      */
9044     public static int toLowerCase(int codePoint) {
9045         return CharacterData.of(codePoint).toLowerCase(codePoint);
9046     }
9047 
9048     /**
9049      * Converts the character argument to uppercase using case mapping
9050      * information from the UnicodeData file.
9051      * <p>
9052      * Note that
9053      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9054      * does not always return {@code true} for some ranges of
9055      * characters, particularly those that are symbols or ideographs.
9056      *
9057      * <p>In general, {@link String#toUpperCase()} should be used to map
9058      * characters to uppercase. {@code String} case mapping methods
9059      * have several benefits over {@code Character} case mapping methods.
9060      * {@code String} case mapping methods can perform locale-sensitive
9061      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9062      * the {@code Character} case mapping methods cannot.
9063      *
9064      * <p><b>Note:</b> This method cannot handle <a
9065      * href="#supplementary"> supplementary characters</a>. To support
9066      * all Unicode characters, including supplementary characters, use
9067      * the {@link #toUpperCase(int)} method.
9068      *
9069      * @param   ch   the character to be converted.
9070      * @return  the uppercase equivalent of the character, if any;
9071      *          otherwise, the character itself.
9072      * @see     Character#isUpperCase(char)
9073      * @see     String#toUpperCase()
9074      */
9075     public static char toUpperCase(char ch) {
9076         return (char)toUpperCase((int)ch);
9077     }
9078 
9079     /**
9080      * Converts the character (Unicode code point) argument to
9081      * uppercase using case mapping information from the UnicodeData
9082      * file.
9083      *
9084      * <p>Note that
9085      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
9086      * does not always return {@code true} for some ranges of
9087      * characters, particularly those that are symbols or ideographs.
9088      *
9089      * <p>In general, {@link String#toUpperCase()} should be used to map
9090      * characters to uppercase. {@code String} case mapping methods
9091      * have several benefits over {@code Character} case mapping methods.
9092      * {@code String} case mapping methods can perform locale-sensitive
9093      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9094      * the {@code Character} case mapping methods cannot.
9095      *
9096      * @param   codePoint   the character (Unicode code point) to be converted.
9097      * @return  the uppercase equivalent of the character, if any;
9098      *          otherwise, the character itself.
9099      * @see     Character#isUpperCase(int)
9100      * @see     String#toUpperCase()
9101      *
9102      * @since   1.5
9103      */
9104     public static int toUpperCase(int codePoint) {
9105         return CharacterData.of(codePoint).toUpperCase(codePoint);
9106     }
9107 
9108     /**
9109      * Converts the character argument to titlecase using case mapping
9110      * information from the UnicodeData file. If a character has no
9111      * explicit titlecase mapping and is not itself a titlecase char
9112      * according to UnicodeData, then the uppercase mapping is
9113      * returned as an equivalent titlecase mapping. If the
9114      * {@code char} argument is already a titlecase
9115      * {@code char}, the same {@code char} value will be
9116      * returned.
9117      * <p>
9118      * Note that
9119      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
9120      * does not always return {@code true} for some ranges of
9121      * characters.
9122      *
9123      * <p><b>Note:</b> This method cannot handle <a
9124      * href="#supplementary"> supplementary characters</a>. To support
9125      * all Unicode characters, including supplementary characters, use
9126      * the {@link #toTitleCase(int)} method.
9127      *
9128      * @param   ch   the character to be converted.
9129      * @return  the titlecase equivalent of the character, if any;
9130      *          otherwise, the character itself.
9131      * @see     Character#isTitleCase(char)
9132      * @see     Character#toLowerCase(char)
9133      * @see     Character#toUpperCase(char)
9134      * @since   1.0.2
9135      */
9136     public static char toTitleCase(char ch) {
9137         return (char)toTitleCase((int)ch);
9138     }
9139 
9140     /**
9141      * Converts the character (Unicode code point) argument to titlecase using case mapping
9142      * information from the UnicodeData file. If a character has no
9143      * explicit titlecase mapping and is not itself a titlecase char
9144      * according to UnicodeData, then the uppercase mapping is
9145      * returned as an equivalent titlecase mapping. If the
9146      * character argument is already a titlecase
9147      * character, the same character value will be
9148      * returned.
9149      *
9150      * <p>Note that
9151      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
9152      * does not always return {@code true} for some ranges of
9153      * characters.
9154      *
9155      * @param   codePoint   the character (Unicode code point) to be converted.
9156      * @return  the titlecase equivalent of the character, if any;
9157      *          otherwise, the character itself.
9158      * @see     Character#isTitleCase(int)
9159      * @see     Character#toLowerCase(int)
9160      * @see     Character#toUpperCase(int)
9161      * @since   1.5
9162      */
9163     public static int toTitleCase(int codePoint) {
9164         return CharacterData.of(codePoint).toTitleCase(codePoint);
9165     }
9166 
9167     /**
9168      * Returns the numeric value of the character {@code ch} in the
9169      * specified radix.
9170      * <p>
9171      * If the radix is not in the range {@code MIN_RADIX} &le;
9172      * {@code radix} &le; {@code MAX_RADIX} or if the
9173      * value of {@code ch} is not a valid digit in the specified
9174      * radix, {@code -1} is returned. A character is a valid digit
9175      * if at least one of the following is true:
9176      * <ul>
9177      * <li>The method {@code isDigit} is {@code true} of the character
9178      *     and the Unicode decimal digit value of the character (or its
9179      *     single-character decomposition) is less than the specified radix.
9180      *     In this case the decimal digit value is returned.
9181      * <li>The character is one of the uppercase Latin letters
9182      *     {@code 'A'} through {@code 'Z'} and its code is less than
9183      *     {@code radix + 'A' - 10}.
9184      *     In this case, {@code ch - 'A' + 10}
9185      *     is returned.
9186      * <li>The character is one of the lowercase Latin letters
9187      *     {@code 'a'} through {@code 'z'} and its code is less than
9188      *     {@code radix + 'a' - 10}.
9189      *     In this case, {@code ch - 'a' + 10}
9190      *     is returned.
9191      * <li>The character is one of the fullwidth uppercase Latin letters A
9192      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9193      *     and its code is less than
9194      *     {@code radix + '\u005CuFF21' - 10}.
9195      *     In this case, {@code ch - '\u005CuFF21' + 10}
9196      *     is returned.
9197      * <li>The character is one of the fullwidth lowercase Latin letters a
9198      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9199      *     and its code is less than
9200      *     {@code radix + '\u005CuFF41' - 10}.
9201      *     In this case, {@code ch - '\u005CuFF41' + 10}
9202      *     is returned.
9203      * </ul>
9204      *
9205      * <p><b>Note:</b> This method cannot handle <a
9206      * href="#supplementary"> supplementary characters</a>. To support
9207      * all Unicode characters, including supplementary characters, use
9208      * the {@link #digit(int, int)} method.
9209      *
9210      * @param   ch      the character to be converted.
9211      * @param   radix   the radix.
9212      * @return  the numeric value represented by the character in the
9213      *          specified radix.
9214      * @see     Character#forDigit(int, int)
9215      * @see     Character#isDigit(char)
9216      */
9217     public static int digit(char ch, int radix) {
9218         return digit((int)ch, radix);
9219     }
9220 
9221     /**
9222      * Returns the numeric value of the specified character (Unicode
9223      * code point) in the specified radix.
9224      *
9225      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
9226      * {@code radix} &le; {@code MAX_RADIX} or if the
9227      * character is not a valid digit in the specified
9228      * radix, {@code -1} is returned. A character is a valid digit
9229      * if at least one of the following is true:
9230      * <ul>
9231      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
9232      *     and the Unicode decimal digit value of the character (or its
9233      *     single-character decomposition) is less than the specified radix.
9234      *     In this case the decimal digit value is returned.
9235      * <li>The character is one of the uppercase Latin letters
9236      *     {@code 'A'} through {@code 'Z'} and its code is less than
9237      *     {@code radix + 'A' - 10}.
9238      *     In this case, {@code codePoint - 'A' + 10}
9239      *     is returned.
9240      * <li>The character is one of the lowercase Latin letters
9241      *     {@code 'a'} through {@code 'z'} and its code is less than
9242      *     {@code radix + 'a' - 10}.
9243      *     In this case, {@code codePoint - 'a' + 10}
9244      *     is returned.
9245      * <li>The character is one of the fullwidth uppercase Latin letters A
9246      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9247      *     and its code is less than
9248      *     {@code radix + '\u005CuFF21' - 10}.
9249      *     In this case,
9250      *     {@code codePoint - '\u005CuFF21' + 10}
9251      *     is returned.
9252      * <li>The character is one of the fullwidth lowercase Latin letters a
9253      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9254      *     and its code is less than
9255      *     {@code radix + '\u005CuFF41'- 10}.
9256      *     In this case,
9257      *     {@code codePoint - '\u005CuFF41' + 10}
9258      *     is returned.
9259      * </ul>
9260      *
9261      * @param   codePoint the character (Unicode code point) to be converted.
9262      * @param   radix   the radix.
9263      * @return  the numeric value represented by the character in the
9264      *          specified radix.
9265      * @see     Character#forDigit(int, int)
9266      * @see     Character#isDigit(int)
9267      * @since   1.5
9268      */
9269     public static int digit(int codePoint, int radix) {
9270         return CharacterData.of(codePoint).digit(codePoint, radix);
9271     }
9272 
9273     /**
9274      * Returns the {@code int} value that the specified Unicode
9275      * character represents. For example, the character
9276      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
9277      * an int with a value of 50.
9278      * <p>
9279      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9280      * {@code '\u005Cu005A'}), lowercase
9281      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9282      * full width variant ({@code '\u005CuFF21'} through
9283      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9284      * {@code '\u005CuFF5A'}) forms have numeric values from 10
9285      * through 35. This is independent of the Unicode specification,
9286      * which does not assign numeric values to these {@code char}
9287      * values.
9288      * <p>
9289      * If the character does not have a numeric value, then -1 is returned.
9290      * If the character has a numeric value that cannot be represented as a
9291      * nonnegative integer (for example, a fractional value), then -2
9292      * is returned.
9293      *
9294      * <p><b>Note:</b> This method cannot handle <a
9295      * href="#supplementary"> supplementary characters</a>. To support
9296      * all Unicode characters, including supplementary characters, use
9297      * the {@link #getNumericValue(int)} method.
9298      *
9299      * @param   ch      the character to be converted.
9300      * @return  the numeric value of the character, as a nonnegative {@code int}
9301      *          value; -2 if the character has a numeric value but the value
9302      *          can not be represented as a nonnegative {@code int} value;
9303      *          -1 if the character has no numeric value.
9304      * @see     Character#forDigit(int, int)
9305      * @see     Character#isDigit(char)
9306      * @since   1.1
9307      */
9308     public static int getNumericValue(char ch) {
9309         return getNumericValue((int)ch);
9310     }
9311 
9312     /**
9313      * Returns the {@code int} value that the specified
9314      * character (Unicode code point) represents. For example, the character
9315      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
9316      * an {@code int} with a value of 50.
9317      * <p>
9318      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9319      * {@code '\u005Cu005A'}), lowercase
9320      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9321      * full width variant ({@code '\u005CuFF21'} through
9322      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9323      * {@code '\u005CuFF5A'}) forms have numeric values from 10
9324      * through 35. This is independent of the Unicode specification,
9325      * which does not assign numeric values to these {@code char}
9326      * values.
9327      * <p>
9328      * If the character does not have a numeric value, then -1 is returned.
9329      * If the character has a numeric value that cannot be represented as a
9330      * nonnegative integer (for example, a fractional value), then -2
9331      * is returned.
9332      *
9333      * @param   codePoint the character (Unicode code point) to be converted.
9334      * @return  the numeric value of the character, as a nonnegative {@code int}
9335      *          value; -2 if the character has a numeric value but the value
9336      *          can not be represented as a nonnegative {@code int} value;
9337      *          -1 if the character has no numeric value.
9338      * @see     Character#forDigit(int, int)
9339      * @see     Character#isDigit(int)
9340      * @since   1.5
9341      */
9342     public static int getNumericValue(int codePoint) {
9343         return CharacterData.of(codePoint).getNumericValue(codePoint);
9344     }
9345 
9346     /**
9347      * Determines if the specified character is ISO-LATIN-1 white space.
9348      * This method returns {@code true} for the following five
9349      * characters only:
9350      * <table summary="truechars">
9351      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
9352      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
9353      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
9354      *     <td>{@code NEW LINE}</td></tr>
9355      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
9356      *     <td>{@code FORM FEED}</td></tr>
9357      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
9358      *     <td>{@code CARRIAGE RETURN}</td></tr>
9359      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
9360      *     <td>{@code SPACE}</td></tr>
9361      * </table>
9362      *
9363      * @param      ch   the character to be tested.
9364      * @return     {@code true} if the character is ISO-LATIN-1 white
9365      *             space; {@code false} otherwise.
9366      * @see        Character#isSpaceChar(char)
9367      * @see        Character#isWhitespace(char)
9368      * @deprecated Replaced by isWhitespace(char).
9369      */
9370     @Deprecated
9371     public static boolean isSpace(char ch) {
9372         return (ch <= 0x0020) &&
9373             (((((1L << 0x0009) |
9374             (1L << 0x000A) |
9375             (1L << 0x000C) |
9376             (1L << 0x000D) |
9377             (1L << 0x0020)) >> ch) & 1L) != 0);
9378     }
9379 
9380 
9381     /**
9382      * Determines if the specified character is a Unicode space character.
9383      * A character is considered to be a space character if and only if
9384      * it is specified to be a space character by the Unicode Standard. This
9385      * method returns true if the character's general category type is any of
9386      * the following:
9387      * <ul>
9388      * <li> {@code SPACE_SEPARATOR}
9389      * <li> {@code LINE_SEPARATOR}
9390      * <li> {@code PARAGRAPH_SEPARATOR}
9391      * </ul>
9392      *
9393      * <p><b>Note:</b> This method cannot handle <a
9394      * href="#supplementary"> supplementary characters</a>. To support
9395      * all Unicode characters, including supplementary characters, use
9396      * the {@link #isSpaceChar(int)} method.
9397      *
9398      * @param   ch      the character to be tested.
9399      * @return  {@code true} if the character is a space character;
9400      *          {@code false} otherwise.
9401      * @see     Character#isWhitespace(char)
9402      * @since   1.1
9403      */
9404     public static boolean isSpaceChar(char ch) {
9405         return isSpaceChar((int)ch);
9406     }
9407 
9408     /**
9409      * Determines if the specified character (Unicode code point) is a
9410      * Unicode space character.  A character is considered to be a
9411      * space character if and only if it is specified to be a space
9412      * character by the Unicode Standard. This method returns true if
9413      * the character's general category type is any of the following:
9414      *
9415      * <ul>
9416      * <li> {@link #SPACE_SEPARATOR}
9417      * <li> {@link #LINE_SEPARATOR}
9418      * <li> {@link #PARAGRAPH_SEPARATOR}
9419      * </ul>
9420      *
9421      * @param   codePoint the character (Unicode code point) to be tested.
9422      * @return  {@code true} if the character is a space character;
9423      *          {@code false} otherwise.
9424      * @see     Character#isWhitespace(int)
9425      * @since   1.5
9426      */
9427     public static boolean isSpaceChar(int codePoint) {
9428         return ((((1 << Character.SPACE_SEPARATOR) |
9429                   (1 << Character.LINE_SEPARATOR) |
9430                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
9431             != 0;
9432     }
9433 
9434     /**
9435      * Determines if the specified character is white space according to Java.
9436      * A character is a Java whitespace character if and only if it satisfies
9437      * one of the following criteria:
9438      * <ul>
9439      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
9440      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
9441      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
9442      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
9443      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
9444      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
9445      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
9446      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
9447      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
9448      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
9449      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
9450      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
9451      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
9452      * </ul>
9453      *
9454      * <p><b>Note:</b> This method cannot handle <a
9455      * href="#supplementary"> supplementary characters</a>. To support
9456      * all Unicode characters, including supplementary characters, use
9457      * the {@link #isWhitespace(int)} method.
9458      *
9459      * @param   ch the character to be tested.
9460      * @return  {@code true} if the character is a Java whitespace
9461      *          character; {@code false} otherwise.
9462      * @see     Character#isSpaceChar(char)
9463      * @since   1.1
9464      */
9465     public static boolean isWhitespace(char ch) {
9466         return isWhitespace((int)ch);
9467     }
9468 
9469     /**
9470      * Determines if the specified character (Unicode code point) is
9471      * white space according to Java.  A character is a Java
9472      * whitespace character if and only if it satisfies one of the
9473      * following criteria:
9474      * <ul>
9475      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
9476      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
9477      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
9478      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
9479      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
9480      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
9481      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
9482      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
9483      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
9484      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
9485      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
9486      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
9487      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
9488      * </ul>
9489      *
9490      * @param   codePoint the character (Unicode code point) to be tested.
9491      * @return  {@code true} if the character is a Java whitespace
9492      *          character; {@code false} otherwise.
9493      * @see     Character#isSpaceChar(int)
9494      * @since   1.5
9495      */
9496     public static boolean isWhitespace(int codePoint) {
9497         return CharacterData.of(codePoint).isWhitespace(codePoint);
9498     }
9499 
9500     /**
9501      * Determines if the specified character is an ISO control
9502      * character.  A character is considered to be an ISO control
9503      * character if its code is in the range {@code '\u005Cu0000'}
9504      * through {@code '\u005Cu001F'} or in the range
9505      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
9506      *
9507      * <p><b>Note:</b> This method cannot handle <a
9508      * href="#supplementary"> supplementary characters</a>. To support
9509      * all Unicode characters, including supplementary characters, use
9510      * the {@link #isISOControl(int)} method.
9511      *
9512      * @param   ch      the character to be tested.
9513      * @return  {@code true} if the character is an ISO control character;
9514      *          {@code false} otherwise.
9515      *
9516      * @see     Character#isSpaceChar(char)
9517      * @see     Character#isWhitespace(char)
9518      * @since   1.1
9519      */
9520     public static boolean isISOControl(char ch) {
9521         return isISOControl((int)ch);
9522     }
9523 
9524     /**
9525      * Determines if the referenced character (Unicode code point) is an ISO control
9526      * character.  A character is considered to be an ISO control
9527      * character if its code is in the range {@code '\u005Cu0000'}
9528      * through {@code '\u005Cu001F'} or in the range
9529      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
9530      *
9531      * @param   codePoint the character (Unicode code point) to be tested.
9532      * @return  {@code true} if the character is an ISO control character;
9533      *          {@code false} otherwise.
9534      * @see     Character#isSpaceChar(int)
9535      * @see     Character#isWhitespace(int)
9536      * @since   1.5
9537      */
9538     public static boolean isISOControl(int codePoint) {
9539         // Optimized form of:
9540         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
9541         //     (codePoint >= 0x7F && codePoint <= 0x9F);
9542         return codePoint <= 0x9F &&
9543             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
9544     }
9545 
9546     /**
9547      * Returns a value indicating a character's general category.
9548      *
9549      * <p><b>Note:</b> This method cannot handle <a
9550      * href="#supplementary"> supplementary characters</a>. To support
9551      * all Unicode characters, including supplementary characters, use
9552      * the {@link #getType(int)} method.
9553      *
9554      * @param   ch      the character to be tested.
9555      * @return  a value of type {@code int} representing the
9556      *          character's general category.
9557      * @see     Character#COMBINING_SPACING_MARK
9558      * @see     Character#CONNECTOR_PUNCTUATION
9559      * @see     Character#CONTROL
9560      * @see     Character#CURRENCY_SYMBOL
9561      * @see     Character#DASH_PUNCTUATION
9562      * @see     Character#DECIMAL_DIGIT_NUMBER
9563      * @see     Character#ENCLOSING_MARK
9564      * @see     Character#END_PUNCTUATION
9565      * @see     Character#FINAL_QUOTE_PUNCTUATION
9566      * @see     Character#FORMAT
9567      * @see     Character#INITIAL_QUOTE_PUNCTUATION
9568      * @see     Character#LETTER_NUMBER
9569      * @see     Character#LINE_SEPARATOR
9570      * @see     Character#LOWERCASE_LETTER
9571      * @see     Character#MATH_SYMBOL
9572      * @see     Character#MODIFIER_LETTER
9573      * @see     Character#MODIFIER_SYMBOL
9574      * @see     Character#NON_SPACING_MARK
9575      * @see     Character#OTHER_LETTER
9576      * @see     Character#OTHER_NUMBER
9577      * @see     Character#OTHER_PUNCTUATION
9578      * @see     Character#OTHER_SYMBOL
9579      * @see     Character#PARAGRAPH_SEPARATOR
9580      * @see     Character#PRIVATE_USE
9581      * @see     Character#SPACE_SEPARATOR
9582      * @see     Character#START_PUNCTUATION
9583      * @see     Character#SURROGATE
9584      * @see     Character#TITLECASE_LETTER
9585      * @see     Character#UNASSIGNED
9586      * @see     Character#UPPERCASE_LETTER
9587      * @since   1.1
9588      */
9589     public static int getType(char ch) {
9590         return getType((int)ch);
9591     }
9592 
9593     /**
9594      * Returns a value indicating a character's general category.
9595      *
9596      * @param   codePoint the character (Unicode code point) to be tested.
9597      * @return  a value of type {@code int} representing the
9598      *          character's general category.
9599      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
9600      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
9601      * @see     Character#CONTROL CONTROL
9602      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
9603      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
9604      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
9605      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
9606      * @see     Character#END_PUNCTUATION END_PUNCTUATION
9607      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
9608      * @see     Character#FORMAT FORMAT
9609      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
9610      * @see     Character#LETTER_NUMBER LETTER_NUMBER
9611      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
9612      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
9613      * @see     Character#MATH_SYMBOL MATH_SYMBOL
9614      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
9615      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
9616      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
9617      * @see     Character#OTHER_LETTER OTHER_LETTER
9618      * @see     Character#OTHER_NUMBER OTHER_NUMBER
9619      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
9620      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
9621      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
9622      * @see     Character#PRIVATE_USE PRIVATE_USE
9623      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
9624      * @see     Character#START_PUNCTUATION START_PUNCTUATION
9625      * @see     Character#SURROGATE SURROGATE
9626      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
9627      * @see     Character#UNASSIGNED UNASSIGNED
9628      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
9629      * @since   1.5
9630      */
9631     public static int getType(int codePoint) {
9632         return CharacterData.of(codePoint).getType(codePoint);
9633     }
9634 
9635     /**
9636      * Determines the character representation for a specific digit in
9637      * the specified radix. If the value of {@code radix} is not a
9638      * valid radix, or the value of {@code digit} is not a valid
9639      * digit in the specified radix, the null character
9640      * ({@code '\u005Cu0000'}) is returned.
9641      * <p>
9642      * The {@code radix} argument is valid if it is greater than or
9643      * equal to {@code MIN_RADIX} and less than or equal to
9644      * {@code MAX_RADIX}. The {@code digit} argument is valid if
9645      * {@code 0 <= digit < radix}.
9646      * <p>
9647      * If the digit is less than 10, then
9648      * {@code '0' + digit} is returned. Otherwise, the value
9649      * {@code 'a' + digit - 10} is returned.
9650      *
9651      * @param   digit   the number to convert to a character.
9652      * @param   radix   the radix.
9653      * @return  the {@code char} representation of the specified digit
9654      *          in the specified radix.
9655      * @see     Character#MIN_RADIX
9656      * @see     Character#MAX_RADIX
9657      * @see     Character#digit(char, int)
9658      */
9659     public static char forDigit(int digit, int radix) {
9660         if ((digit >= radix) || (digit < 0)) {
9661             return '\0';
9662         }
9663         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
9664             return '\0';
9665         }
9666         if (digit < 10) {
9667             return (char)('0' + digit);
9668         }
9669         return (char)('a' - 10 + digit);
9670     }
9671 
9672     /**
9673      * Returns the Unicode directionality property for the given
9674      * character.  Character directionality is used to calculate the
9675      * visual ordering of text. The directionality value of undefined
9676      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
9677      *
9678      * <p><b>Note:</b> This method cannot handle <a
9679      * href="#supplementary"> supplementary characters</a>. To support
9680      * all Unicode characters, including supplementary characters, use
9681      * the {@link #getDirectionality(int)} method.
9682      *
9683      * @param  ch {@code char} for which the directionality property
9684      *            is requested.
9685      * @return the directionality property of the {@code char} value.
9686      *
9687      * @see Character#DIRECTIONALITY_UNDEFINED
9688      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
9689      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
9690      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
9691      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
9692      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
9693      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
9694      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
9695      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
9696      * @see Character#DIRECTIONALITY_NONSPACING_MARK
9697      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
9698      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
9699      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
9700      * @see Character#DIRECTIONALITY_WHITESPACE
9701      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
9702      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
9703      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
9704      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
9705      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
9706      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
9707      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
9708      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
9709      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
9710      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
9711      * @since 1.4
9712      */
9713     public static byte getDirectionality(char ch) {
9714         return getDirectionality((int)ch);
9715     }
9716 
9717     /**
9718      * Returns the Unicode directionality property for the given
9719      * character (Unicode code point).  Character directionality is
9720      * used to calculate the visual ordering of text. The
9721      * directionality value of undefined character is {@link
9722      * #DIRECTIONALITY_UNDEFINED}.
9723      *
9724      * @param   codePoint the character (Unicode code point) for which
9725      *          the directionality property is requested.
9726      * @return the directionality property of the character.
9727      *
9728      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
9729      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
9730      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
9731      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
9732      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
9733      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
9734      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
9735      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
9736      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
9737      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
9738      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
9739      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
9740      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
9741      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
9742      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
9743      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
9744      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
9745      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
9746      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
9747      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
9748      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
9749      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
9750      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
9751      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
9752      * @since    1.5
9753      */
9754     public static byte getDirectionality(int codePoint) {
9755         return CharacterData.of(codePoint).getDirectionality(codePoint);
9756     }
9757 
9758     /**
9759      * Determines whether the character is mirrored according to the
9760      * Unicode specification.  Mirrored characters should have their
9761      * glyphs horizontally mirrored when displayed in text that is
9762      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
9763      * PARENTHESIS is semantically defined to be an <i>opening
9764      * parenthesis</i>.  This will appear as a "(" in text that is
9765      * left-to-right but as a ")" in text that is right-to-left.
9766      *
9767      * <p><b>Note:</b> This method cannot handle <a
9768      * href="#supplementary"> supplementary characters</a>. To support
9769      * all Unicode characters, including supplementary characters, use
9770      * the {@link #isMirrored(int)} method.
9771      *
9772      * @param  ch {@code char} for which the mirrored property is requested
9773      * @return {@code true} if the char is mirrored, {@code false}
9774      *         if the {@code char} is not mirrored or is not defined.
9775      * @since 1.4
9776      */
9777     public static boolean isMirrored(char ch) {
9778         return isMirrored((int)ch);
9779     }
9780 
9781     /**
9782      * Determines whether the specified character (Unicode code point)
9783      * is mirrored according to the Unicode specification.  Mirrored
9784      * characters should have their glyphs horizontally mirrored when
9785      * displayed in text that is right-to-left.  For example,
9786      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
9787      * defined to be an <i>opening parenthesis</i>.  This will appear
9788      * as a "(" in text that is left-to-right but as a ")" in text
9789      * that is right-to-left.
9790      *
9791      * @param   codePoint the character (Unicode code point) to be tested.
9792      * @return  {@code true} if the character is mirrored, {@code false}
9793      *          if the character is not mirrored or is not defined.
9794      * @since   1.5
9795      */
9796     public static boolean isMirrored(int codePoint) {
9797         return CharacterData.of(codePoint).isMirrored(codePoint);
9798     }
9799 
9800     /**
9801      * Compares two {@code Character} objects numerically.
9802      *
9803      * @param   anotherCharacter   the {@code Character} to be compared.
9804 
9805      * @return  the value {@code 0} if the argument {@code Character}
9806      *          is equal to this {@code Character}; a value less than
9807      *          {@code 0} if this {@code Character} is numerically less
9808      *          than the {@code Character} argument; and a value greater than
9809      *          {@code 0} if this {@code Character} is numerically greater
9810      *          than the {@code Character} argument (unsigned comparison).
9811      *          Note that this is strictly a numerical comparison; it is not
9812      *          locale-dependent.
9813      * @since   1.2
9814      */
9815     public int compareTo(Character anotherCharacter) {
9816         return compare(this.value, anotherCharacter.value);
9817     }
9818 
9819     /**
9820      * Compares two {@code char} values numerically.
9821      * The value returned is identical to what would be returned by:
9822      * <pre>
9823      *    Character.valueOf(x).compareTo(Character.valueOf(y))
9824      * </pre>
9825      *
9826      * @param  x the first {@code char} to compare
9827      * @param  y the second {@code char} to compare
9828      * @return the value {@code 0} if {@code x == y};
9829      *         a value less than {@code 0} if {@code x < y}; and
9830      *         a value greater than {@code 0} if {@code x > y}
9831      * @since 1.7
9832      */
9833     public static int compare(char x, char y) {
9834         return x - y;
9835     }
9836 
9837     /**
9838      * Converts the character (Unicode code point) argument to uppercase using
9839      * information from the UnicodeData file.
9840      *
9841      * @param   codePoint   the character (Unicode code point) to be converted.
9842      * @return  either the uppercase equivalent of the character, if
9843      *          any, or an error flag ({@code Character.ERROR})
9844      *          that indicates that a 1:M {@code char} mapping exists.
9845      * @see     Character#isLowerCase(char)
9846      * @see     Character#isUpperCase(char)
9847      * @see     Character#toLowerCase(char)
9848      * @see     Character#toTitleCase(char)
9849      * @since 1.4
9850      */
9851     static int toUpperCaseEx(int codePoint) {
9852         assert isValidCodePoint(codePoint);
9853         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
9854     }
9855 
9856     /**
9857      * Converts the character (Unicode code point) argument to uppercase using case
9858      * mapping information from the SpecialCasing file in the Unicode
9859      * specification. If a character has no explicit uppercase
9860      * mapping, then the {@code char} itself is returned in the
9861      * {@code char[]}.
9862      *
9863      * @param   codePoint   the character (Unicode code point) to be converted.
9864      * @return a {@code char[]} with the uppercased character.
9865      * @since 1.4
9866      */
9867     static char[] toUpperCaseCharArray(int codePoint) {
9868         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
9869         assert isBmpCodePoint(codePoint);
9870         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
9871     }
9872 
9873     /**
9874      * The number of bits used to represent a {@code char} value in unsigned
9875      * binary form, constant {@code 16}.
9876      *
9877      * @since 1.5
9878      */
9879     public static final int SIZE = 16;
9880 
9881     /**
9882      * The number of bytes used to represent a {@code char} value in unsigned
9883      * binary form.
9884      *
9885      * @since 1.8
9886      */
9887     public static final int BYTES = SIZE / Byte.SIZE;
9888 
9889     /**
9890      * Returns the value obtained by reversing the order of the bytes in the
9891      * specified {@code char} value.
9892      *
9893      * @param ch The {@code char} of which to reverse the byte order.
9894      * @return the value obtained by reversing (or, equivalently, swapping)
9895      *     the bytes in the specified {@code char} value.
9896      * @since 1.5
9897      */
9898     @HotSpotIntrinsicCandidate
9899     public static char reverseBytes(char ch) {
9900         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
9901     }
9902 
9903     /**
9904      * Returns the Unicode name of the specified character
9905      * {@code codePoint}, or null if the code point is
9906      * {@link #UNASSIGNED unassigned}.
9907      * <p>
9908      * Note: if the specified character is not assigned a name by
9909      * the <i>UnicodeData</i> file (part of the Unicode Character
9910      * Database maintained by the Unicode Consortium), the returned
9911      * name is the same as the result of expression.
9912      *
9913      * <blockquote>{@code
9914      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
9915      *     + " "
9916      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
9917      *
9918      * }</blockquote>
9919      *
9920      * @param  codePoint the character (Unicode code point)
9921      *
9922      * @return the Unicode name of the specified character, or null if
9923      *         the code point is unassigned.
9924      *
9925      * @exception IllegalArgumentException if the specified
9926      *            {@code codePoint} is not a valid Unicode
9927      *            code point.
9928      *
9929      * @since 1.7
9930      */
9931     public static String getName(int codePoint) {
9932         if (!isValidCodePoint(codePoint)) {
9933             throw new IllegalArgumentException();
9934         }
9935         String name = CharacterName.get(codePoint);
9936         if (name != null)
9937             return name;
9938         if (getType(codePoint) == UNASSIGNED)
9939             return null;
9940         UnicodeBlock block = UnicodeBlock.of(codePoint);
9941         if (block != null)
9942             return block.toString().replace('_', ' ') + " "
9943                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
9944         // should never come here
9945         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
9946     }
9947 }