1 /*
   2  * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 10.0.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  *
  59  * <h3><a id="unicode">Unicode Character Representations</a></h3>
  60  *
  61  * <p>The {@code char} data type (and therefore the value that a
  62  * {@code Character} object encapsulates) are based on the
  63  * original Unicode specification, which defined characters as
  64  * fixed-width 16-bit entities. The Unicode Standard has since been
  65  * changed to allow for characters whose representation requires more
  66  * than 16 bits.  The range of legal <em>code point</em>s is now
  67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  68  * (Refer to the <a
  69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  71  * Standard.)
  72  *
  73  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
  74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  75  * <a id="supplementary">Characters</a> whose code points are greater
  76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  77  * platform uses the UTF-16 representation in {@code char} arrays and
  78  * in the {@code String} and {@code StringBuffer} classes. In
  79  * this representation, supplementary characters are represented as a pair
  80  * of {@code char} values, the first from the <em>high-surrogates</em>
  81  * range, (\uD800-\uDBFF), the second from the
  82  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  83  *
  84  * <p>A {@code char} value, therefore, represents Basic
  85  * Multilingual Plane (BMP) code points, including the surrogate
  86  * code points, or code units of the UTF-16 encoding. An
  87  * {@code int} value represents all Unicode code points,
  88  * including supplementary code points. The lower (least significant)
  89  * 21 bits of {@code int} are used to represent Unicode code
  90  * points and the upper (most significant) 11 bits must be zero.
  91  * Unless otherwise specified, the behavior with respect to
  92  * supplementary characters and surrogate {@code char} values is
  93  * as follows:
  94  *
  95  * <ul>
  96  * <li>The methods that only accept a {@code char} value cannot support
  97  * supplementary characters. They treat {@code char} values from the
  98  * surrogate ranges as undefined characters. For example,
  99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 100  * this specific value if followed by any low-surrogate value in a string
 101  * would represent a letter.
 102  *
 103  * <li>The methods that accept an {@code int} value support all
 104  * Unicode characters, including supplementary characters. For
 105  * example, {@code Character.isLetter(0x2F81A)} returns
 106  * {@code true} because the code point value represents a letter
 107  * (a CJK ideograph).
 108  * </ul>
 109  *
 110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 111  * used for character values in the range between U+0000 and U+10FFFF,
 112  * and <em>Unicode code unit</em> is used for 16-bit
 113  * {@code char} values that are code units of the <em>UTF-16</em>
 114  * encoding. For more information on Unicode terminology, refer to the
 115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 116  *
 117  * @author  Lee Boynton
 118  * @author  Guy Steele
 119  * @author  Akira Tanaka
 120  * @author  Martin Buchholz
 121  * @author  Ulf Zibis
 122  * @since   1.0
 123  */
 124 public final
 125 class Character implements java.io.Serializable, Comparable<Character> {
 126     /**
 127      * The minimum radix available for conversion to and from strings.
 128      * The constant value of this field is the smallest value permitted
 129      * for the radix argument in radix-conversion methods such as the
 130      * {@code digit} method, the {@code forDigit} method, and the
 131      * {@code toString} method of class {@code Integer}.
 132      *
 133      * @see     Character#digit(char, int)
 134      * @see     Character#forDigit(int, int)
 135      * @see     Integer#toString(int, int)
 136      * @see     Integer#valueOf(String)
 137      */
 138     public static final int MIN_RADIX = 2;
 139 
 140     /**
 141      * The maximum radix available for conversion to and from strings.
 142      * The constant value of this field is the largest value permitted
 143      * for the radix argument in radix-conversion methods such as the
 144      * {@code digit} method, the {@code forDigit} method, and the
 145      * {@code toString} method of class {@code Integer}.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * {@code char}, {@code '\u005Cu0000'}.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * {@code char}, {@code '\u005CuFFFF'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The {@code Class} instance representing the primitive type
 172      * {@code char}.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined {@code char}
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * Weak bidirectional character type "LRI" in the Unicode specification.
 496      * @since 9
 497      */
 498     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 499 
 500     /**
 501      * Weak bidirectional character type "RLI" in the Unicode specification.
 502      * @since 9
 503      */
 504     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 505 
 506     /**
 507      * Weak bidirectional character type "FSI" in the Unicode specification.
 508      * @since 9
 509      */
 510     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 511 
 512     /**
 513      * Weak bidirectional character type "PDI" in the Unicode specification.
 514      * @since 9
 515      */
 516     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 517 
 518     /**
 519      * The minimum value of a
 520      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 521      * Unicode high-surrogate code unit</a>
 522      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 523      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 524      *
 525      * @since 1.5
 526      */
 527     public static final char MIN_HIGH_SURROGATE = '\uD800';
 528 
 529     /**
 530      * The maximum value of a
 531      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 532      * Unicode high-surrogate code unit</a>
 533      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 534      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 535      *
 536      * @since 1.5
 537      */
 538     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 539 
 540     /**
 541      * The minimum value of a
 542      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 543      * Unicode low-surrogate code unit</a>
 544      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 545      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 546      *
 547      * @since 1.5
 548      */
 549     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 550 
 551     /**
 552      * The maximum value of a
 553      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 554      * Unicode low-surrogate code unit</a>
 555      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 556      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 557      *
 558      * @since 1.5
 559      */
 560     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 561 
 562     /**
 563      * The minimum value of a Unicode surrogate code unit in the
 564      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 569 
 570     /**
 571      * The maximum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 577 
 578     /**
 579      * The minimum value of a
 580      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 581      * Unicode supplementary code point</a>, constant {@code U+10000}.
 582      *
 583      * @since 1.5
 584      */
 585     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 586 
 587     /**
 588      * The minimum value of a
 589      * <a href="http://www.unicode.org/glossary/#code_point">
 590      * Unicode code point</a>, constant {@code U+0000}.
 591      *
 592      * @since 1.5
 593      */
 594     public static final int MIN_CODE_POINT = 0x000000;
 595 
 596     /**
 597      * The maximum value of a
 598      * <a href="http://www.unicode.org/glossary/#code_point">
 599      * Unicode code point</a>, constant {@code U+10FFFF}.
 600      *
 601      * @since 1.5
 602      */
 603     public static final int MAX_CODE_POINT = 0X10FFFF;
 604 
 605 
 606     /**
 607      * Instances of this class represent particular subsets of the Unicode
 608      * character set.  The only family of subsets defined in the
 609      * {@code Character} class is {@link Character.UnicodeBlock}.
 610      * Other portions of the Java API may define other subsets for their
 611      * own purposes.
 612      *
 613      * @since 1.2
 614      */
 615     public static class Subset  {
 616 
 617         private String name;
 618 
 619         /**
 620          * Constructs a new {@code Subset} instance.
 621          *
 622          * @param  name  The name of this subset
 623          * @exception NullPointerException if name is {@code null}
 624          */
 625         protected Subset(String name) {
 626             if (name == null) {
 627                 throw new NullPointerException("name");
 628             }
 629             this.name = name;
 630         }
 631 
 632         /**
 633          * Compares two {@code Subset} objects for equality.
 634          * This method returns {@code true} if and only if
 635          * {@code this} and the argument refer to the same
 636          * object; since this method is {@code final}, this
 637          * guarantee holds for all subclasses.
 638          */
 639         public final boolean equals(Object obj) {
 640             return (this == obj);
 641         }
 642 
 643         /**
 644          * Returns the standard hash code as defined by the
 645          * {@link Object#hashCode} method.  This method
 646          * is {@code final} in order to ensure that the
 647          * {@code equals} and {@code hashCode} methods will
 648          * be consistent in all subclasses.
 649          */
 650         public final int hashCode() {
 651             return super.hashCode();
 652         }
 653 
 654         /**
 655          * Returns the name of this subset.
 656          */
 657         public final String toString() {
 658             return name;
 659         }
 660     }
 661 
 662     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 663     // for the latest specification of Unicode Blocks.
 664 
 665     /**
 666      * A family of character subsets representing the character blocks in the
 667      * Unicode specification. Character blocks generally define characters
 668      * used for a specific script or purpose. A character is contained by
 669      * at most one Unicode block.
 670      *
 671      * @since 1.2
 672      */
 673     public static final class UnicodeBlock extends Subset {
 674         /**
 675          * 510  - the expected number of entities
 676          * 0.75 - the default load factor of HashMap
 677          */
 678         private static Map<String, UnicodeBlock> map =
 679                 new HashMap<>((int)(638 / 0.75f + 1.0f));
 680 
 681         /**
 682          * Creates a UnicodeBlock with the given identifier name.
 683          * This name must be the same as the block identifier.
 684          */
 685         private UnicodeBlock(String idName) {
 686             super(idName);
 687             map.put(idName, this);
 688         }
 689 
 690         /**
 691          * Creates a UnicodeBlock with the given identifier name and
 692          * alias name.
 693          */
 694         private UnicodeBlock(String idName, String alias) {
 695             this(idName);
 696             map.put(alias, this);
 697         }
 698 
 699         /**
 700          * Creates a UnicodeBlock with the given identifier name and
 701          * alias names.
 702          */
 703         private UnicodeBlock(String idName, String... aliases) {
 704             this(idName);
 705             for (String alias : aliases)
 706                 map.put(alias, this);
 707         }
 708 
 709         /**
 710          * Constant for the "Basic Latin" Unicode character block.
 711          * @since 1.2
 712          */
 713         public static final UnicodeBlock  BASIC_LATIN =
 714             new UnicodeBlock("BASIC_LATIN",
 715                              "BASIC LATIN",
 716                              "BASICLATIN");
 717 
 718         /**
 719          * Constant for the "Latin-1 Supplement" Unicode character block.
 720          * @since 1.2
 721          */
 722         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 723             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 724                              "LATIN-1 SUPPLEMENT",
 725                              "LATIN-1SUPPLEMENT");
 726 
 727         /**
 728          * Constant for the "Latin Extended-A" Unicode character block.
 729          * @since 1.2
 730          */
 731         public static final UnicodeBlock LATIN_EXTENDED_A =
 732             new UnicodeBlock("LATIN_EXTENDED_A",
 733                              "LATIN EXTENDED-A",
 734                              "LATINEXTENDED-A");
 735 
 736         /**
 737          * Constant for the "Latin Extended-B" Unicode character block.
 738          * @since 1.2
 739          */
 740         public static final UnicodeBlock LATIN_EXTENDED_B =
 741             new UnicodeBlock("LATIN_EXTENDED_B",
 742                              "LATIN EXTENDED-B",
 743                              "LATINEXTENDED-B");
 744 
 745         /**
 746          * Constant for the "IPA Extensions" Unicode character block.
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock IPA_EXTENSIONS =
 750             new UnicodeBlock("IPA_EXTENSIONS",
 751                              "IPA EXTENSIONS",
 752                              "IPAEXTENSIONS");
 753 
 754         /**
 755          * Constant for the "Spacing Modifier Letters" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 759             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 760                              "SPACING MODIFIER LETTERS",
 761                              "SPACINGMODIFIERLETTERS");
 762 
 763         /**
 764          * Constant for the "Combining Diacritical Marks" Unicode character block.
 765          * @since 1.2
 766          */
 767         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 768             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 769                              "COMBINING DIACRITICAL MARKS",
 770                              "COMBININGDIACRITICALMARKS");
 771 
 772         /**
 773          * Constant for the "Greek and Coptic" Unicode character block.
 774          * <p>
 775          * This block was previously known as the "Greek" block.
 776          *
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock GREEK =
 780             new UnicodeBlock("GREEK",
 781                              "GREEK AND COPTIC",
 782                              "GREEKANDCOPTIC");
 783 
 784         /**
 785          * Constant for the "Cyrillic" Unicode character block.
 786          * @since 1.2
 787          */
 788         public static final UnicodeBlock CYRILLIC =
 789             new UnicodeBlock("CYRILLIC");
 790 
 791         /**
 792          * Constant for the "Armenian" Unicode character block.
 793          * @since 1.2
 794          */
 795         public static final UnicodeBlock ARMENIAN =
 796             new UnicodeBlock("ARMENIAN");
 797 
 798         /**
 799          * Constant for the "Hebrew" Unicode character block.
 800          * @since 1.2
 801          */
 802         public static final UnicodeBlock HEBREW =
 803             new UnicodeBlock("HEBREW");
 804 
 805         /**
 806          * Constant for the "Arabic" Unicode character block.
 807          * @since 1.2
 808          */
 809         public static final UnicodeBlock ARABIC =
 810             new UnicodeBlock("ARABIC");
 811 
 812         /**
 813          * Constant for the "Devanagari" Unicode character block.
 814          * @since 1.2
 815          */
 816         public static final UnicodeBlock DEVANAGARI =
 817             new UnicodeBlock("DEVANAGARI");
 818 
 819         /**
 820          * Constant for the "Bengali" Unicode character block.
 821          * @since 1.2
 822          */
 823         public static final UnicodeBlock BENGALI =
 824             new UnicodeBlock("BENGALI");
 825 
 826         /**
 827          * Constant for the "Gurmukhi" Unicode character block.
 828          * @since 1.2
 829          */
 830         public static final UnicodeBlock GURMUKHI =
 831             new UnicodeBlock("GURMUKHI");
 832 
 833         /**
 834          * Constant for the "Gujarati" Unicode character block.
 835          * @since 1.2
 836          */
 837         public static final UnicodeBlock GUJARATI =
 838             new UnicodeBlock("GUJARATI");
 839 
 840         /**
 841          * Constant for the "Oriya" Unicode character block.
 842          * @since 1.2
 843          */
 844         public static final UnicodeBlock ORIYA =
 845             new UnicodeBlock("ORIYA");
 846 
 847         /**
 848          * Constant for the "Tamil" Unicode character block.
 849          * @since 1.2
 850          */
 851         public static final UnicodeBlock TAMIL =
 852             new UnicodeBlock("TAMIL");
 853 
 854         /**
 855          * Constant for the "Telugu" Unicode character block.
 856          * @since 1.2
 857          */
 858         public static final UnicodeBlock TELUGU =
 859             new UnicodeBlock("TELUGU");
 860 
 861         /**
 862          * Constant for the "Kannada" Unicode character block.
 863          * @since 1.2
 864          */
 865         public static final UnicodeBlock KANNADA =
 866             new UnicodeBlock("KANNADA");
 867 
 868         /**
 869          * Constant for the "Malayalam" Unicode character block.
 870          * @since 1.2
 871          */
 872         public static final UnicodeBlock MALAYALAM =
 873             new UnicodeBlock("MALAYALAM");
 874 
 875         /**
 876          * Constant for the "Thai" Unicode character block.
 877          * @since 1.2
 878          */
 879         public static final UnicodeBlock THAI =
 880             new UnicodeBlock("THAI");
 881 
 882         /**
 883          * Constant for the "Lao" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LAO =
 887             new UnicodeBlock("LAO");
 888 
 889         /**
 890          * Constant for the "Tibetan" Unicode character block.
 891          * @since 1.2
 892          */
 893         public static final UnicodeBlock TIBETAN =
 894             new UnicodeBlock("TIBETAN");
 895 
 896         /**
 897          * Constant for the "Georgian" Unicode character block.
 898          * @since 1.2
 899          */
 900         public static final UnicodeBlock GEORGIAN =
 901             new UnicodeBlock("GEORGIAN");
 902 
 903         /**
 904          * Constant for the "Hangul Jamo" Unicode character block.
 905          * @since 1.2
 906          */
 907         public static final UnicodeBlock HANGUL_JAMO =
 908             new UnicodeBlock("HANGUL_JAMO",
 909                              "HANGUL JAMO",
 910                              "HANGULJAMO");
 911 
 912         /**
 913          * Constant for the "Latin Extended Additional" Unicode character block.
 914          * @since 1.2
 915          */
 916         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 917             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 918                              "LATIN EXTENDED ADDITIONAL",
 919                              "LATINEXTENDEDADDITIONAL");
 920 
 921         /**
 922          * Constant for the "Greek Extended" Unicode character block.
 923          * @since 1.2
 924          */
 925         public static final UnicodeBlock GREEK_EXTENDED =
 926             new UnicodeBlock("GREEK_EXTENDED",
 927                              "GREEK EXTENDED",
 928                              "GREEKEXTENDED");
 929 
 930         /**
 931          * Constant for the "General Punctuation" Unicode character block.
 932          * @since 1.2
 933          */
 934         public static final UnicodeBlock GENERAL_PUNCTUATION =
 935             new UnicodeBlock("GENERAL_PUNCTUATION",
 936                              "GENERAL PUNCTUATION",
 937                              "GENERALPUNCTUATION");
 938 
 939         /**
 940          * Constant for the "Superscripts and Subscripts" Unicode character
 941          * block.
 942          * @since 1.2
 943          */
 944         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 945             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 946                              "SUPERSCRIPTS AND SUBSCRIPTS",
 947                              "SUPERSCRIPTSANDSUBSCRIPTS");
 948 
 949         /**
 950          * Constant for the "Currency Symbols" Unicode character block.
 951          * @since 1.2
 952          */
 953         public static final UnicodeBlock CURRENCY_SYMBOLS =
 954             new UnicodeBlock("CURRENCY_SYMBOLS",
 955                              "CURRENCY SYMBOLS",
 956                              "CURRENCYSYMBOLS");
 957 
 958         /**
 959          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 960          * character block.
 961          * <p>
 962          * This block was previously known as "Combining Marks for Symbols".
 963          * @since 1.2
 964          */
 965         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 966             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 967                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 968                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 969                              "COMBINING MARKS FOR SYMBOLS",
 970                              "COMBININGMARKSFORSYMBOLS");
 971 
 972         /**
 973          * Constant for the "Letterlike Symbols" Unicode character block.
 974          * @since 1.2
 975          */
 976         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 977             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 978                              "LETTERLIKE SYMBOLS",
 979                              "LETTERLIKESYMBOLS");
 980 
 981         /**
 982          * Constant for the "Number Forms" Unicode character block.
 983          * @since 1.2
 984          */
 985         public static final UnicodeBlock NUMBER_FORMS =
 986             new UnicodeBlock("NUMBER_FORMS",
 987                              "NUMBER FORMS",
 988                              "NUMBERFORMS");
 989 
 990         /**
 991          * Constant for the "Arrows" Unicode character block.
 992          * @since 1.2
 993          */
 994         public static final UnicodeBlock ARROWS =
 995             new UnicodeBlock("ARROWS");
 996 
 997         /**
 998          * Constant for the "Mathematical Operators" Unicode character block.
 999          * @since 1.2
1000          */
1001         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1002             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1003                              "MATHEMATICAL OPERATORS",
1004                              "MATHEMATICALOPERATORS");
1005 
1006         /**
1007          * Constant for the "Miscellaneous Technical" Unicode character block.
1008          * @since 1.2
1009          */
1010         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1011             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1012                              "MISCELLANEOUS TECHNICAL",
1013                              "MISCELLANEOUSTECHNICAL");
1014 
1015         /**
1016          * Constant for the "Control Pictures" Unicode character block.
1017          * @since 1.2
1018          */
1019         public static final UnicodeBlock CONTROL_PICTURES =
1020             new UnicodeBlock("CONTROL_PICTURES",
1021                              "CONTROL PICTURES",
1022                              "CONTROLPICTURES");
1023 
1024         /**
1025          * Constant for the "Optical Character Recognition" Unicode character block.
1026          * @since 1.2
1027          */
1028         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1029             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1030                              "OPTICAL CHARACTER RECOGNITION",
1031                              "OPTICALCHARACTERRECOGNITION");
1032 
1033         /**
1034          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1035          * @since 1.2
1036          */
1037         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1038             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1039                              "ENCLOSED ALPHANUMERICS",
1040                              "ENCLOSEDALPHANUMERICS");
1041 
1042         /**
1043          * Constant for the "Box Drawing" Unicode character block.
1044          * @since 1.2
1045          */
1046         public static final UnicodeBlock BOX_DRAWING =
1047             new UnicodeBlock("BOX_DRAWING",
1048                              "BOX DRAWING",
1049                              "BOXDRAWING");
1050 
1051         /**
1052          * Constant for the "Block Elements" Unicode character block.
1053          * @since 1.2
1054          */
1055         public static final UnicodeBlock BLOCK_ELEMENTS =
1056             new UnicodeBlock("BLOCK_ELEMENTS",
1057                              "BLOCK ELEMENTS",
1058                              "BLOCKELEMENTS");
1059 
1060         /**
1061          * Constant for the "Geometric Shapes" Unicode character block.
1062          * @since 1.2
1063          */
1064         public static final UnicodeBlock GEOMETRIC_SHAPES =
1065             new UnicodeBlock("GEOMETRIC_SHAPES",
1066                              "GEOMETRIC SHAPES",
1067                              "GEOMETRICSHAPES");
1068 
1069         /**
1070          * Constant for the "Miscellaneous Symbols" Unicode character block.
1071          * @since 1.2
1072          */
1073         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1074             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1075                              "MISCELLANEOUS SYMBOLS",
1076                              "MISCELLANEOUSSYMBOLS");
1077 
1078         /**
1079          * Constant for the "Dingbats" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock DINGBATS =
1083             new UnicodeBlock("DINGBATS");
1084 
1085         /**
1086          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1090             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1091                              "CJK SYMBOLS AND PUNCTUATION",
1092                              "CJKSYMBOLSANDPUNCTUATION");
1093 
1094         /**
1095          * Constant for the "Hiragana" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock HIRAGANA =
1099             new UnicodeBlock("HIRAGANA");
1100 
1101         /**
1102          * Constant for the "Katakana" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock KATAKANA =
1106             new UnicodeBlock("KATAKANA");
1107 
1108         /**
1109          * Constant for the "Bopomofo" Unicode character block.
1110          * @since 1.2
1111          */
1112         public static final UnicodeBlock BOPOMOFO =
1113             new UnicodeBlock("BOPOMOFO");
1114 
1115         /**
1116          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1117          * @since 1.2
1118          */
1119         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1120             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1121                              "HANGUL COMPATIBILITY JAMO",
1122                              "HANGULCOMPATIBILITYJAMO");
1123 
1124         /**
1125          * Constant for the "Kanbun" Unicode character block.
1126          * @since 1.2
1127          */
1128         public static final UnicodeBlock KANBUN =
1129             new UnicodeBlock("KANBUN");
1130 
1131         /**
1132          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1133          * @since 1.2
1134          */
1135         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1136             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1137                              "ENCLOSED CJK LETTERS AND MONTHS",
1138                              "ENCLOSEDCJKLETTERSANDMONTHS");
1139 
1140         /**
1141          * Constant for the "CJK Compatibility" Unicode character block.
1142          * @since 1.2
1143          */
1144         public static final UnicodeBlock CJK_COMPATIBILITY =
1145             new UnicodeBlock("CJK_COMPATIBILITY",
1146                              "CJK COMPATIBILITY",
1147                              "CJKCOMPATIBILITY");
1148 
1149         /**
1150          * Constant for the "CJK Unified Ideographs" Unicode character block.
1151          * @since 1.2
1152          */
1153         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1154             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1155                              "CJK UNIFIED IDEOGRAPHS",
1156                              "CJKUNIFIEDIDEOGRAPHS");
1157 
1158         /**
1159          * Constant for the "Hangul Syllables" Unicode character block.
1160          * @since 1.2
1161          */
1162         public static final UnicodeBlock HANGUL_SYLLABLES =
1163             new UnicodeBlock("HANGUL_SYLLABLES",
1164                              "HANGUL SYLLABLES",
1165                              "HANGULSYLLABLES");
1166 
1167         /**
1168          * Constant for the "Private Use Area" Unicode character block.
1169          * @since 1.2
1170          */
1171         public static final UnicodeBlock PRIVATE_USE_AREA =
1172             new UnicodeBlock("PRIVATE_USE_AREA",
1173                              "PRIVATE USE AREA",
1174                              "PRIVATEUSEAREA");
1175 
1176         /**
1177          * Constant for the "CJK Compatibility Ideographs" Unicode character
1178          * block.
1179          * @since 1.2
1180          */
1181         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1182             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1183                              "CJK COMPATIBILITY IDEOGRAPHS",
1184                              "CJKCOMPATIBILITYIDEOGRAPHS");
1185 
1186         /**
1187          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1188          * @since 1.2
1189          */
1190         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1191             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1192                              "ALPHABETIC PRESENTATION FORMS",
1193                              "ALPHABETICPRESENTATIONFORMS");
1194 
1195         /**
1196          * Constant for the "Arabic Presentation Forms-A" Unicode character
1197          * block.
1198          * @since 1.2
1199          */
1200         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1201             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1202                              "ARABIC PRESENTATION FORMS-A",
1203                              "ARABICPRESENTATIONFORMS-A");
1204 
1205         /**
1206          * Constant for the "Combining Half Marks" Unicode character block.
1207          * @since 1.2
1208          */
1209         public static final UnicodeBlock COMBINING_HALF_MARKS =
1210             new UnicodeBlock("COMBINING_HALF_MARKS",
1211                              "COMBINING HALF MARKS",
1212                              "COMBININGHALFMARKS");
1213 
1214         /**
1215          * Constant for the "CJK Compatibility Forms" Unicode character block.
1216          * @since 1.2
1217          */
1218         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1219             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1220                              "CJK COMPATIBILITY FORMS",
1221                              "CJKCOMPATIBILITYFORMS");
1222 
1223         /**
1224          * Constant for the "Small Form Variants" Unicode character block.
1225          * @since 1.2
1226          */
1227         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1228             new UnicodeBlock("SMALL_FORM_VARIANTS",
1229                              "SMALL FORM VARIANTS",
1230                              "SMALLFORMVARIANTS");
1231 
1232         /**
1233          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1234          * @since 1.2
1235          */
1236         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1237             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1238                              "ARABIC PRESENTATION FORMS-B",
1239                              "ARABICPRESENTATIONFORMS-B");
1240 
1241         /**
1242          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1243          * block.
1244          * @since 1.2
1245          */
1246         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1247             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1248                              "HALFWIDTH AND FULLWIDTH FORMS",
1249                              "HALFWIDTHANDFULLWIDTHFORMS");
1250 
1251         /**
1252          * Constant for the "Specials" Unicode character block.
1253          * @since 1.2
1254          */
1255         public static final UnicodeBlock SPECIALS =
1256             new UnicodeBlock("SPECIALS");
1257 
1258         /**
1259          * @deprecated
1260          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1261          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1262          * These constants match the block definitions of the Unicode Standard.
1263          * The {@link #of(char)} and {@link #of(int)} methods return the
1264          * standard constants.
1265          */
1266         @Deprecated(since="1.5")
1267         public static final UnicodeBlock SURROGATES_AREA =
1268             new UnicodeBlock("SURROGATES_AREA");
1269 
1270         /**
1271          * Constant for the "Syriac" Unicode character block.
1272          * @since 1.4
1273          */
1274         public static final UnicodeBlock SYRIAC =
1275             new UnicodeBlock("SYRIAC");
1276 
1277         /**
1278          * Constant for the "Thaana" Unicode character block.
1279          * @since 1.4
1280          */
1281         public static final UnicodeBlock THAANA =
1282             new UnicodeBlock("THAANA");
1283 
1284         /**
1285          * Constant for the "Sinhala" Unicode character block.
1286          * @since 1.4
1287          */
1288         public static final UnicodeBlock SINHALA =
1289             new UnicodeBlock("SINHALA");
1290 
1291         /**
1292          * Constant for the "Myanmar" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock MYANMAR =
1296             new UnicodeBlock("MYANMAR");
1297 
1298         /**
1299          * Constant for the "Ethiopic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock ETHIOPIC =
1303             new UnicodeBlock("ETHIOPIC");
1304 
1305         /**
1306          * Constant for the "Cherokee" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock CHEROKEE =
1310             new UnicodeBlock("CHEROKEE");
1311 
1312         /**
1313          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1317             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1318                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1319                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1320 
1321         /**
1322          * Constant for the "Ogham" Unicode character block.
1323          * @since 1.4
1324          */
1325         public static final UnicodeBlock OGHAM =
1326             new UnicodeBlock("OGHAM");
1327 
1328         /**
1329          * Constant for the "Runic" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock RUNIC =
1333             new UnicodeBlock("RUNIC");
1334 
1335         /**
1336          * Constant for the "Khmer" Unicode character block.
1337          * @since 1.4
1338          */
1339         public static final UnicodeBlock KHMER =
1340             new UnicodeBlock("KHMER");
1341 
1342         /**
1343          * Constant for the "Mongolian" Unicode character block.
1344          * @since 1.4
1345          */
1346         public static final UnicodeBlock MONGOLIAN =
1347             new UnicodeBlock("MONGOLIAN");
1348 
1349         /**
1350          * Constant for the "Braille Patterns" Unicode character block.
1351          * @since 1.4
1352          */
1353         public static final UnicodeBlock BRAILLE_PATTERNS =
1354             new UnicodeBlock("BRAILLE_PATTERNS",
1355                              "BRAILLE PATTERNS",
1356                              "BRAILLEPATTERNS");
1357 
1358         /**
1359          * Constant for the "CJK Radicals Supplement" Unicode character block.
1360          * @since 1.4
1361          */
1362         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1363             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1364                              "CJK RADICALS SUPPLEMENT",
1365                              "CJKRADICALSSUPPLEMENT");
1366 
1367         /**
1368          * Constant for the "Kangxi Radicals" Unicode character block.
1369          * @since 1.4
1370          */
1371         public static final UnicodeBlock KANGXI_RADICALS =
1372             new UnicodeBlock("KANGXI_RADICALS",
1373                              "KANGXI RADICALS",
1374                              "KANGXIRADICALS");
1375 
1376         /**
1377          * Constant for the "Ideographic Description Characters" Unicode character block.
1378          * @since 1.4
1379          */
1380         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1381             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1382                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1383                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1384 
1385         /**
1386          * Constant for the "Bopomofo Extended" Unicode character block.
1387          * @since 1.4
1388          */
1389         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1390             new UnicodeBlock("BOPOMOFO_EXTENDED",
1391                              "BOPOMOFO EXTENDED",
1392                              "BOPOMOFOEXTENDED");
1393 
1394         /**
1395          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1396          * @since 1.4
1397          */
1398         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1399             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1400                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1401                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1402 
1403         /**
1404          * Constant for the "Yi Syllables" Unicode character block.
1405          * @since 1.4
1406          */
1407         public static final UnicodeBlock YI_SYLLABLES =
1408             new UnicodeBlock("YI_SYLLABLES",
1409                              "YI SYLLABLES",
1410                              "YISYLLABLES");
1411 
1412         /**
1413          * Constant for the "Yi Radicals" Unicode character block.
1414          * @since 1.4
1415          */
1416         public static final UnicodeBlock YI_RADICALS =
1417             new UnicodeBlock("YI_RADICALS",
1418                              "YI RADICALS",
1419                              "YIRADICALS");
1420 
1421         /**
1422          * Constant for the "Cyrillic Supplementary" Unicode character block.
1423          * @since 1.5
1424          */
1425         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1426             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1427                              "CYRILLIC SUPPLEMENTARY",
1428                              "CYRILLICSUPPLEMENTARY",
1429                              "CYRILLIC SUPPLEMENT",
1430                              "CYRILLICSUPPLEMENT");
1431 
1432         /**
1433          * Constant for the "Tagalog" Unicode character block.
1434          * @since 1.5
1435          */
1436         public static final UnicodeBlock TAGALOG =
1437             new UnicodeBlock("TAGALOG");
1438 
1439         /**
1440          * Constant for the "Hanunoo" Unicode character block.
1441          * @since 1.5
1442          */
1443         public static final UnicodeBlock HANUNOO =
1444             new UnicodeBlock("HANUNOO");
1445 
1446         /**
1447          * Constant for the "Buhid" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock BUHID =
1451             new UnicodeBlock("BUHID");
1452 
1453         /**
1454          * Constant for the "Tagbanwa" Unicode character block.
1455          * @since 1.5
1456          */
1457         public static final UnicodeBlock TAGBANWA =
1458             new UnicodeBlock("TAGBANWA");
1459 
1460         /**
1461          * Constant for the "Limbu" Unicode character block.
1462          * @since 1.5
1463          */
1464         public static final UnicodeBlock LIMBU =
1465             new UnicodeBlock("LIMBU");
1466 
1467         /**
1468          * Constant for the "Tai Le" Unicode character block.
1469          * @since 1.5
1470          */
1471         public static final UnicodeBlock TAI_LE =
1472             new UnicodeBlock("TAI_LE",
1473                              "TAI LE",
1474                              "TAILE");
1475 
1476         /**
1477          * Constant for the "Khmer Symbols" Unicode character block.
1478          * @since 1.5
1479          */
1480         public static final UnicodeBlock KHMER_SYMBOLS =
1481             new UnicodeBlock("KHMER_SYMBOLS",
1482                              "KHMER SYMBOLS",
1483                              "KHMERSYMBOLS");
1484 
1485         /**
1486          * Constant for the "Phonetic Extensions" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1490             new UnicodeBlock("PHONETIC_EXTENSIONS",
1491                              "PHONETIC EXTENSIONS",
1492                              "PHONETICEXTENSIONS");
1493 
1494         /**
1495          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1499             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1500                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1501                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1502 
1503         /**
1504          * Constant for the "Supplemental Arrows-A" Unicode character block.
1505          * @since 1.5
1506          */
1507         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1508             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1509                              "SUPPLEMENTAL ARROWS-A",
1510                              "SUPPLEMENTALARROWS-A");
1511 
1512         /**
1513          * Constant for the "Supplemental Arrows-B" Unicode character block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1517             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1518                              "SUPPLEMENTAL ARROWS-B",
1519                              "SUPPLEMENTALARROWS-B");
1520 
1521         /**
1522          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1523          * character block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1527             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1528                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1529                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1530 
1531         /**
1532          * Constant for the "Supplemental Mathematical Operators" Unicode
1533          * character block.
1534          * @since 1.5
1535          */
1536         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1537             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1538                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1539                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1540 
1541         /**
1542          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1543          * block.
1544          * @since 1.5
1545          */
1546         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1547             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1548                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1549                              "MISCELLANEOUSSYMBOLSANDARROWS");
1550 
1551         /**
1552          * Constant for the "Katakana Phonetic Extensions" Unicode character
1553          * block.
1554          * @since 1.5
1555          */
1556         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1557             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1558                              "KATAKANA PHONETIC EXTENSIONS",
1559                              "KATAKANAPHONETICEXTENSIONS");
1560 
1561         /**
1562          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1563          * @since 1.5
1564          */
1565         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1566             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1567                              "YIJING HEXAGRAM SYMBOLS",
1568                              "YIJINGHEXAGRAMSYMBOLS");
1569 
1570         /**
1571          * Constant for the "Variation Selectors" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock VARIATION_SELECTORS =
1575             new UnicodeBlock("VARIATION_SELECTORS",
1576                              "VARIATION SELECTORS",
1577                              "VARIATIONSELECTORS");
1578 
1579         /**
1580          * Constant for the "Linear B Syllabary" Unicode character block.
1581          * @since 1.5
1582          */
1583         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1584             new UnicodeBlock("LINEAR_B_SYLLABARY",
1585                              "LINEAR B SYLLABARY",
1586                              "LINEARBSYLLABARY");
1587 
1588         /**
1589          * Constant for the "Linear B Ideograms" Unicode character block.
1590          * @since 1.5
1591          */
1592         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1593             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1594                              "LINEAR B IDEOGRAMS",
1595                              "LINEARBIDEOGRAMS");
1596 
1597         /**
1598          * Constant for the "Aegean Numbers" Unicode character block.
1599          * @since 1.5
1600          */
1601         public static final UnicodeBlock AEGEAN_NUMBERS =
1602             new UnicodeBlock("AEGEAN_NUMBERS",
1603                              "AEGEAN NUMBERS",
1604                              "AEGEANNUMBERS");
1605 
1606         /**
1607          * Constant for the "Old Italic" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock OLD_ITALIC =
1611             new UnicodeBlock("OLD_ITALIC",
1612                              "OLD ITALIC",
1613                              "OLDITALIC");
1614 
1615         /**
1616          * Constant for the "Gothic" Unicode character block.
1617          * @since 1.5
1618          */
1619         public static final UnicodeBlock GOTHIC =
1620             new UnicodeBlock("GOTHIC");
1621 
1622         /**
1623          * Constant for the "Ugaritic" Unicode character block.
1624          * @since 1.5
1625          */
1626         public static final UnicodeBlock UGARITIC =
1627             new UnicodeBlock("UGARITIC");
1628 
1629         /**
1630          * Constant for the "Deseret" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock DESERET =
1634             new UnicodeBlock("DESERET");
1635 
1636         /**
1637          * Constant for the "Shavian" Unicode character block.
1638          * @since 1.5
1639          */
1640         public static final UnicodeBlock SHAVIAN =
1641             new UnicodeBlock("SHAVIAN");
1642 
1643         /**
1644          * Constant for the "Osmanya" Unicode character block.
1645          * @since 1.5
1646          */
1647         public static final UnicodeBlock OSMANYA =
1648             new UnicodeBlock("OSMANYA");
1649 
1650         /**
1651          * Constant for the "Cypriot Syllabary" Unicode character block.
1652          * @since 1.5
1653          */
1654         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1655             new UnicodeBlock("CYPRIOT_SYLLABARY",
1656                              "CYPRIOT SYLLABARY",
1657                              "CYPRIOTSYLLABARY");
1658 
1659         /**
1660          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1664             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1665                              "BYZANTINE MUSICAL SYMBOLS",
1666                              "BYZANTINEMUSICALSYMBOLS");
1667 
1668         /**
1669          * Constant for the "Musical Symbols" Unicode character block.
1670          * @since 1.5
1671          */
1672         public static final UnicodeBlock MUSICAL_SYMBOLS =
1673             new UnicodeBlock("MUSICAL_SYMBOLS",
1674                              "MUSICAL SYMBOLS",
1675                              "MUSICALSYMBOLS");
1676 
1677         /**
1678          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1682             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1683                              "TAI XUAN JING SYMBOLS",
1684                              "TAIXUANJINGSYMBOLS");
1685 
1686         /**
1687          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1688          * character block.
1689          * @since 1.5
1690          */
1691         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1692             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1693                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1694                              "MATHEMATICALALPHANUMERICSYMBOLS");
1695 
1696         /**
1697          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1698          * character block.
1699          * @since 1.5
1700          */
1701         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1702             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1703                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1704                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1705 
1706         /**
1707          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1711             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1712                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1713                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1714 
1715         /**
1716          * Constant for the "Tags" Unicode character block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock TAGS =
1720             new UnicodeBlock("TAGS");
1721 
1722         /**
1723          * Constant for the "Variation Selectors Supplement" Unicode character
1724          * block.
1725          * @since 1.5
1726          */
1727         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1728             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1729                              "VARIATION SELECTORS SUPPLEMENT",
1730                              "VARIATIONSELECTORSSUPPLEMENT");
1731 
1732         /**
1733          * Constant for the "Supplementary Private Use Area-A" Unicode character
1734          * block.
1735          * @since 1.5
1736          */
1737         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1738             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1739                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1740                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1741 
1742         /**
1743          * Constant for the "Supplementary Private Use Area-B" Unicode character
1744          * block.
1745          * @since 1.5
1746          */
1747         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1748             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1749                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1750                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1751 
1752         /**
1753          * Constant for the "High Surrogates" Unicode character block.
1754          * This block represents codepoint values in the high surrogate
1755          * range: U+D800 through U+DB7F
1756          *
1757          * @since 1.5
1758          */
1759         public static final UnicodeBlock HIGH_SURROGATES =
1760             new UnicodeBlock("HIGH_SURROGATES",
1761                              "HIGH SURROGATES",
1762                              "HIGHSURROGATES");
1763 
1764         /**
1765          * Constant for the "High Private Use Surrogates" Unicode character
1766          * block.
1767          * This block represents codepoint values in the private use high
1768          * surrogate range: U+DB80 through U+DBFF
1769          *
1770          * @since 1.5
1771          */
1772         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1773             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1774                              "HIGH PRIVATE USE SURROGATES",
1775                              "HIGHPRIVATEUSESURROGATES");
1776 
1777         /**
1778          * Constant for the "Low Surrogates" Unicode character block.
1779          * This block represents codepoint values in the low surrogate
1780          * range: U+DC00 through U+DFFF
1781          *
1782          * @since 1.5
1783          */
1784         public static final UnicodeBlock LOW_SURROGATES =
1785             new UnicodeBlock("LOW_SURROGATES",
1786                              "LOW SURROGATES",
1787                              "LOWSURROGATES");
1788 
1789         /**
1790          * Constant for the "Arabic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1794             new UnicodeBlock("ARABIC_SUPPLEMENT",
1795                              "ARABIC SUPPLEMENT",
1796                              "ARABICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "NKo" Unicode character block.
1800          * @since 1.7
1801          */
1802         public static final UnicodeBlock NKO =
1803             new UnicodeBlock("NKO");
1804 
1805         /**
1806          * Constant for the "Samaritan" Unicode character block.
1807          * @since 1.7
1808          */
1809         public static final UnicodeBlock SAMARITAN =
1810             new UnicodeBlock("SAMARITAN");
1811 
1812         /**
1813          * Constant for the "Mandaic" Unicode character block.
1814          * @since 1.7
1815          */
1816         public static final UnicodeBlock MANDAIC =
1817             new UnicodeBlock("MANDAIC");
1818 
1819         /**
1820          * Constant for the "Ethiopic Supplement" Unicode character block.
1821          * @since 1.7
1822          */
1823         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1824             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1825                              "ETHIOPIC SUPPLEMENT",
1826                              "ETHIOPICSUPPLEMENT");
1827 
1828         /**
1829          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1830          * Unicode character block.
1831          * @since 1.7
1832          */
1833         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1834             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1835                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1836                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1837 
1838         /**
1839          * Constant for the "New Tai Lue" Unicode character block.
1840          * @since 1.7
1841          */
1842         public static final UnicodeBlock NEW_TAI_LUE =
1843             new UnicodeBlock("NEW_TAI_LUE",
1844                              "NEW TAI LUE",
1845                              "NEWTAILUE");
1846 
1847         /**
1848          * Constant for the "Buginese" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BUGINESE =
1852             new UnicodeBlock("BUGINESE");
1853 
1854         /**
1855          * Constant for the "Tai Tham" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock TAI_THAM =
1859             new UnicodeBlock("TAI_THAM",
1860                              "TAI THAM",
1861                              "TAITHAM");
1862 
1863         /**
1864          * Constant for the "Balinese" Unicode character block.
1865          * @since 1.7
1866          */
1867         public static final UnicodeBlock BALINESE =
1868             new UnicodeBlock("BALINESE");
1869 
1870         /**
1871          * Constant for the "Sundanese" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock SUNDANESE =
1875             new UnicodeBlock("SUNDANESE");
1876 
1877         /**
1878          * Constant for the "Batak" Unicode character block.
1879          * @since 1.7
1880          */
1881         public static final UnicodeBlock BATAK =
1882             new UnicodeBlock("BATAK");
1883 
1884         /**
1885          * Constant for the "Lepcha" Unicode character block.
1886          * @since 1.7
1887          */
1888         public static final UnicodeBlock LEPCHA =
1889             new UnicodeBlock("LEPCHA");
1890 
1891         /**
1892          * Constant for the "Ol Chiki" Unicode character block.
1893          * @since 1.7
1894          */
1895         public static final UnicodeBlock OL_CHIKI =
1896             new UnicodeBlock("OL_CHIKI",
1897                              "OL CHIKI",
1898                              "OLCHIKI");
1899 
1900         /**
1901          * Constant for the "Vedic Extensions" Unicode character block.
1902          * @since 1.7
1903          */
1904         public static final UnicodeBlock VEDIC_EXTENSIONS =
1905             new UnicodeBlock("VEDIC_EXTENSIONS",
1906                              "VEDIC EXTENSIONS",
1907                              "VEDICEXTENSIONS");
1908 
1909         /**
1910          * Constant for the "Phonetic Extensions Supplement" Unicode character
1911          * block.
1912          * @since 1.7
1913          */
1914         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1915             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1916                              "PHONETIC EXTENSIONS SUPPLEMENT",
1917                              "PHONETICEXTENSIONSSUPPLEMENT");
1918 
1919         /**
1920          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1921          * character block.
1922          * @since 1.7
1923          */
1924         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1925             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1926                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1927                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1928 
1929         /**
1930          * Constant for the "Glagolitic" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock GLAGOLITIC =
1934             new UnicodeBlock("GLAGOLITIC");
1935 
1936         /**
1937          * Constant for the "Latin Extended-C" Unicode character block.
1938          * @since 1.7
1939          */
1940         public static final UnicodeBlock LATIN_EXTENDED_C =
1941             new UnicodeBlock("LATIN_EXTENDED_C",
1942                              "LATIN EXTENDED-C",
1943                              "LATINEXTENDED-C");
1944 
1945         /**
1946          * Constant for the "Coptic" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock COPTIC =
1950             new UnicodeBlock("COPTIC");
1951 
1952         /**
1953          * Constant for the "Georgian Supplement" Unicode character block.
1954          * @since 1.7
1955          */
1956         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1957             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1958                              "GEORGIAN SUPPLEMENT",
1959                              "GEORGIANSUPPLEMENT");
1960 
1961         /**
1962          * Constant for the "Tifinagh" Unicode character block.
1963          * @since 1.7
1964          */
1965         public static final UnicodeBlock TIFINAGH =
1966             new UnicodeBlock("TIFINAGH");
1967 
1968         /**
1969          * Constant for the "Ethiopic Extended" Unicode character block.
1970          * @since 1.7
1971          */
1972         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1973             new UnicodeBlock("ETHIOPIC_EXTENDED",
1974                              "ETHIOPIC EXTENDED",
1975                              "ETHIOPICEXTENDED");
1976 
1977         /**
1978          * Constant for the "Cyrillic Extended-A" Unicode character block.
1979          * @since 1.7
1980          */
1981         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1982             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1983                              "CYRILLIC EXTENDED-A",
1984                              "CYRILLICEXTENDED-A");
1985 
1986         /**
1987          * Constant for the "Supplemental Punctuation" Unicode character block.
1988          * @since 1.7
1989          */
1990         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1991             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1992                              "SUPPLEMENTAL PUNCTUATION",
1993                              "SUPPLEMENTALPUNCTUATION");
1994 
1995         /**
1996          * Constant for the "CJK Strokes" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock CJK_STROKES =
2000             new UnicodeBlock("CJK_STROKES",
2001                              "CJK STROKES",
2002                              "CJKSTROKES");
2003 
2004         /**
2005          * Constant for the "Lisu" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock LISU =
2009             new UnicodeBlock("LISU");
2010 
2011         /**
2012          * Constant for the "Vai" Unicode character block.
2013          * @since 1.7
2014          */
2015         public static final UnicodeBlock VAI =
2016             new UnicodeBlock("VAI");
2017 
2018         /**
2019          * Constant for the "Cyrillic Extended-B" Unicode character block.
2020          * @since 1.7
2021          */
2022         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2023             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2024                              "CYRILLIC EXTENDED-B",
2025                              "CYRILLICEXTENDED-B");
2026 
2027         /**
2028          * Constant for the "Bamum" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock BAMUM =
2032             new UnicodeBlock("BAMUM");
2033 
2034         /**
2035          * Constant for the "Modifier Tone Letters" Unicode character block.
2036          * @since 1.7
2037          */
2038         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2039             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2040                              "MODIFIER TONE LETTERS",
2041                              "MODIFIERTONELETTERS");
2042 
2043         /**
2044          * Constant for the "Latin Extended-D" Unicode character block.
2045          * @since 1.7
2046          */
2047         public static final UnicodeBlock LATIN_EXTENDED_D =
2048             new UnicodeBlock("LATIN_EXTENDED_D",
2049                              "LATIN EXTENDED-D",
2050                              "LATINEXTENDED-D");
2051 
2052         /**
2053          * Constant for the "Syloti Nagri" Unicode character block.
2054          * @since 1.7
2055          */
2056         public static final UnicodeBlock SYLOTI_NAGRI =
2057             new UnicodeBlock("SYLOTI_NAGRI",
2058                              "SYLOTI NAGRI",
2059                              "SYLOTINAGRI");
2060 
2061         /**
2062          * Constant for the "Common Indic Number Forms" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2066             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2067                              "COMMON INDIC NUMBER FORMS",
2068                              "COMMONINDICNUMBERFORMS");
2069 
2070         /**
2071          * Constant for the "Phags-pa" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock PHAGS_PA =
2075             new UnicodeBlock("PHAGS_PA",
2076                              "PHAGS-PA");
2077 
2078         /**
2079          * Constant for the "Saurashtra" Unicode character block.
2080          * @since 1.7
2081          */
2082         public static final UnicodeBlock SAURASHTRA =
2083             new UnicodeBlock("SAURASHTRA");
2084 
2085         /**
2086          * Constant for the "Devanagari Extended" Unicode character block.
2087          * @since 1.7
2088          */
2089         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2090             new UnicodeBlock("DEVANAGARI_EXTENDED",
2091                              "DEVANAGARI EXTENDED",
2092                              "DEVANAGARIEXTENDED");
2093 
2094         /**
2095          * Constant for the "Kayah Li" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock KAYAH_LI =
2099             new UnicodeBlock("KAYAH_LI",
2100                              "KAYAH LI",
2101                              "KAYAHLI");
2102 
2103         /**
2104          * Constant for the "Rejang" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock REJANG =
2108             new UnicodeBlock("REJANG");
2109 
2110         /**
2111          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2112          * @since 1.7
2113          */
2114         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2115             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2116                              "HANGUL JAMO EXTENDED-A",
2117                              "HANGULJAMOEXTENDED-A");
2118 
2119         /**
2120          * Constant for the "Javanese" Unicode character block.
2121          * @since 1.7
2122          */
2123         public static final UnicodeBlock JAVANESE =
2124             new UnicodeBlock("JAVANESE");
2125 
2126         /**
2127          * Constant for the "Cham" Unicode character block.
2128          * @since 1.7
2129          */
2130         public static final UnicodeBlock CHAM =
2131             new UnicodeBlock("CHAM");
2132 
2133         /**
2134          * Constant for the "Myanmar Extended-A" Unicode character block.
2135          * @since 1.7
2136          */
2137         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2138             new UnicodeBlock("MYANMAR_EXTENDED_A",
2139                              "MYANMAR EXTENDED-A",
2140                              "MYANMAREXTENDED-A");
2141 
2142         /**
2143          * Constant for the "Tai Viet" Unicode character block.
2144          * @since 1.7
2145          */
2146         public static final UnicodeBlock TAI_VIET =
2147             new UnicodeBlock("TAI_VIET",
2148                              "TAI VIET",
2149                              "TAIVIET");
2150 
2151         /**
2152          * Constant for the "Ethiopic Extended-A" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2156             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2157                              "ETHIOPIC EXTENDED-A",
2158                              "ETHIOPICEXTENDED-A");
2159 
2160         /**
2161          * Constant for the "Meetei Mayek" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock MEETEI_MAYEK =
2165             new UnicodeBlock("MEETEI_MAYEK",
2166                              "MEETEI MAYEK",
2167                              "MEETEIMAYEK");
2168 
2169         /**
2170          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2174             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2175                              "HANGUL JAMO EXTENDED-B",
2176                              "HANGULJAMOEXTENDED-B");
2177 
2178         /**
2179          * Constant for the "Vertical Forms" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock VERTICAL_FORMS =
2183             new UnicodeBlock("VERTICAL_FORMS",
2184                              "VERTICAL FORMS",
2185                              "VERTICALFORMS");
2186 
2187         /**
2188          * Constant for the "Ancient Greek Numbers" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2192             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2193                              "ANCIENT GREEK NUMBERS",
2194                              "ANCIENTGREEKNUMBERS");
2195 
2196         /**
2197          * Constant for the "Ancient Symbols" Unicode character block.
2198          * @since 1.7
2199          */
2200         public static final UnicodeBlock ANCIENT_SYMBOLS =
2201             new UnicodeBlock("ANCIENT_SYMBOLS",
2202                              "ANCIENT SYMBOLS",
2203                              "ANCIENTSYMBOLS");
2204 
2205         /**
2206          * Constant for the "Phaistos Disc" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock PHAISTOS_DISC =
2210             new UnicodeBlock("PHAISTOS_DISC",
2211                              "PHAISTOS DISC",
2212                              "PHAISTOSDISC");
2213 
2214         /**
2215          * Constant for the "Lycian" Unicode character block.
2216          * @since 1.7
2217          */
2218         public static final UnicodeBlock LYCIAN =
2219             new UnicodeBlock("LYCIAN");
2220 
2221         /**
2222          * Constant for the "Carian" Unicode character block.
2223          * @since 1.7
2224          */
2225         public static final UnicodeBlock CARIAN =
2226             new UnicodeBlock("CARIAN");
2227 
2228         /**
2229          * Constant for the "Old Persian" Unicode character block.
2230          * @since 1.7
2231          */
2232         public static final UnicodeBlock OLD_PERSIAN =
2233             new UnicodeBlock("OLD_PERSIAN",
2234                              "OLD PERSIAN",
2235                              "OLDPERSIAN");
2236 
2237         /**
2238          * Constant for the "Imperial Aramaic" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2242             new UnicodeBlock("IMPERIAL_ARAMAIC",
2243                              "IMPERIAL ARAMAIC",
2244                              "IMPERIALARAMAIC");
2245 
2246         /**
2247          * Constant for the "Phoenician" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock PHOENICIAN =
2251             new UnicodeBlock("PHOENICIAN");
2252 
2253         /**
2254          * Constant for the "Lydian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock LYDIAN =
2258             new UnicodeBlock("LYDIAN");
2259 
2260         /**
2261          * Constant for the "Kharoshthi" Unicode character block.
2262          * @since 1.7
2263          */
2264         public static final UnicodeBlock KHAROSHTHI =
2265             new UnicodeBlock("KHAROSHTHI");
2266 
2267         /**
2268          * Constant for the "Old South Arabian" Unicode character block.
2269          * @since 1.7
2270          */
2271         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2272             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2273                              "OLD SOUTH ARABIAN",
2274                              "OLDSOUTHARABIAN");
2275 
2276         /**
2277          * Constant for the "Avestan" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock AVESTAN =
2281             new UnicodeBlock("AVESTAN");
2282 
2283         /**
2284          * Constant for the "Inscriptional Parthian" Unicode character block.
2285          * @since 1.7
2286          */
2287         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2288             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2289                              "INSCRIPTIONAL PARTHIAN",
2290                              "INSCRIPTIONALPARTHIAN");
2291 
2292         /**
2293          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2294          * @since 1.7
2295          */
2296         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2297             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2298                              "INSCRIPTIONAL PAHLAVI",
2299                              "INSCRIPTIONALPAHLAVI");
2300 
2301         /**
2302          * Constant for the "Old Turkic" Unicode character block.
2303          * @since 1.7
2304          */
2305         public static final UnicodeBlock OLD_TURKIC =
2306             new UnicodeBlock("OLD_TURKIC",
2307                              "OLD TURKIC",
2308                              "OLDTURKIC");
2309 
2310         /**
2311          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2312          * @since 1.7
2313          */
2314         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2315             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2316                              "RUMI NUMERAL SYMBOLS",
2317                              "RUMINUMERALSYMBOLS");
2318 
2319         /**
2320          * Constant for the "Brahmi" Unicode character block.
2321          * @since 1.7
2322          */
2323         public static final UnicodeBlock BRAHMI =
2324             new UnicodeBlock("BRAHMI");
2325 
2326         /**
2327          * Constant for the "Kaithi" Unicode character block.
2328          * @since 1.7
2329          */
2330         public static final UnicodeBlock KAITHI =
2331             new UnicodeBlock("KAITHI");
2332 
2333         /**
2334          * Constant for the "Cuneiform" Unicode character block.
2335          * @since 1.7
2336          */
2337         public static final UnicodeBlock CUNEIFORM =
2338             new UnicodeBlock("CUNEIFORM");
2339 
2340         /**
2341          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2342          * character block.
2343          * @since 1.7
2344          */
2345         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2346             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2347                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2348                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2349 
2350         /**
2351          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2352          * @since 1.7
2353          */
2354         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2355             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2356                              "EGYPTIAN HIEROGLYPHS",
2357                              "EGYPTIANHIEROGLYPHS");
2358 
2359         /**
2360          * Constant for the "Bamum Supplement" Unicode character block.
2361          * @since 1.7
2362          */
2363         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2364             new UnicodeBlock("BAMUM_SUPPLEMENT",
2365                              "BAMUM SUPPLEMENT",
2366                              "BAMUMSUPPLEMENT");
2367 
2368         /**
2369          * Constant for the "Kana Supplement" Unicode character block.
2370          * @since 1.7
2371          */
2372         public static final UnicodeBlock KANA_SUPPLEMENT =
2373             new UnicodeBlock("KANA_SUPPLEMENT",
2374                              "KANA SUPPLEMENT",
2375                              "KANASUPPLEMENT");
2376 
2377         /**
2378          * Constant for the "Ancient Greek Musical Notation" Unicode character
2379          * block.
2380          * @since 1.7
2381          */
2382         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2383             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2384                              "ANCIENT GREEK MUSICAL NOTATION",
2385                              "ANCIENTGREEKMUSICALNOTATION");
2386 
2387         /**
2388          * Constant for the "Counting Rod Numerals" Unicode character block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2392             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2393                              "COUNTING ROD NUMERALS",
2394                              "COUNTINGRODNUMERALS");
2395 
2396         /**
2397          * Constant for the "Mahjong Tiles" Unicode character block.
2398          * @since 1.7
2399          */
2400         public static final UnicodeBlock MAHJONG_TILES =
2401             new UnicodeBlock("MAHJONG_TILES",
2402                              "MAHJONG TILES",
2403                              "MAHJONGTILES");
2404 
2405         /**
2406          * Constant for the "Domino Tiles" Unicode character block.
2407          * @since 1.7
2408          */
2409         public static final UnicodeBlock DOMINO_TILES =
2410             new UnicodeBlock("DOMINO_TILES",
2411                              "DOMINO TILES",
2412                              "DOMINOTILES");
2413 
2414         /**
2415          * Constant for the "Playing Cards" Unicode character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock PLAYING_CARDS =
2419             new UnicodeBlock("PLAYING_CARDS",
2420                              "PLAYING CARDS",
2421                              "PLAYINGCARDS");
2422 
2423         /**
2424          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2425          * block.
2426          * @since 1.7
2427          */
2428         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2429             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2430                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2431                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2432 
2433         /**
2434          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2435          * block.
2436          * @since 1.7
2437          */
2438         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2439             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2440                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2441                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2442 
2443         /**
2444          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2445          * character block.
2446          * @since 1.7
2447          */
2448         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2449             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2450                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2451                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2452 
2453         /**
2454          * Constant for the "Emoticons" Unicode character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock EMOTICONS =
2458             new UnicodeBlock("EMOTICONS");
2459 
2460         /**
2461          * Constant for the "Transport And Map Symbols" Unicode character block.
2462          * @since 1.7
2463          */
2464         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2465             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2466                              "TRANSPORT AND MAP SYMBOLS",
2467                              "TRANSPORTANDMAPSYMBOLS");
2468 
2469         /**
2470          * Constant for the "Alchemical Symbols" Unicode character block.
2471          * @since 1.7
2472          */
2473         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2474             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2475                              "ALCHEMICAL SYMBOLS",
2476                              "ALCHEMICALSYMBOLS");
2477 
2478         /**
2479          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2480          * character block.
2481          * @since 1.7
2482          */
2483         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2484             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2485                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2486                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2487 
2488         /**
2489          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2490          * character block.
2491          * @since 1.7
2492          */
2493         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2494             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2495                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2496                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2497 
2498         /**
2499          * Constant for the "Arabic Extended-A" Unicode character block.
2500          * @since 1.8
2501          */
2502         public static final UnicodeBlock ARABIC_EXTENDED_A =
2503             new UnicodeBlock("ARABIC_EXTENDED_A",
2504                              "ARABIC EXTENDED-A",
2505                              "ARABICEXTENDED-A");
2506 
2507         /**
2508          * Constant for the "Sundanese Supplement" Unicode character block.
2509          * @since 1.8
2510          */
2511         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2512             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2513                              "SUNDANESE SUPPLEMENT",
2514                              "SUNDANESESUPPLEMENT");
2515 
2516         /**
2517          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2518          * @since 1.8
2519          */
2520         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2521             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2522                              "MEETEI MAYEK EXTENSIONS",
2523                              "MEETEIMAYEKEXTENSIONS");
2524 
2525         /**
2526          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2527          * @since 1.8
2528          */
2529         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2530             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2531                              "MEROITIC HIEROGLYPHS",
2532                              "MEROITICHIEROGLYPHS");
2533 
2534         /**
2535          * Constant for the "Meroitic Cursive" Unicode character block.
2536          * @since 1.8
2537          */
2538         public static final UnicodeBlock MEROITIC_CURSIVE =
2539             new UnicodeBlock("MEROITIC_CURSIVE",
2540                              "MEROITIC CURSIVE",
2541                              "MEROITICCURSIVE");
2542 
2543         /**
2544          * Constant for the "Sora Sompeng" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock SORA_SOMPENG =
2548             new UnicodeBlock("SORA_SOMPENG",
2549                              "SORA SOMPENG",
2550                              "SORASOMPENG");
2551 
2552         /**
2553          * Constant for the "Chakma" Unicode character block.
2554          * @since 1.8
2555          */
2556         public static final UnicodeBlock CHAKMA =
2557             new UnicodeBlock("CHAKMA");
2558 
2559         /**
2560          * Constant for the "Sharada" Unicode character block.
2561          * @since 1.8
2562          */
2563         public static final UnicodeBlock SHARADA =
2564             new UnicodeBlock("SHARADA");
2565 
2566         /**
2567          * Constant for the "Takri" Unicode character block.
2568          * @since 1.8
2569          */
2570         public static final UnicodeBlock TAKRI =
2571             new UnicodeBlock("TAKRI");
2572 
2573         /**
2574          * Constant for the "Miao" Unicode character block.
2575          * @since 1.8
2576          */
2577         public static final UnicodeBlock MIAO =
2578             new UnicodeBlock("MIAO");
2579 
2580         /**
2581          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2582          * character block.
2583          * @since 1.8
2584          */
2585         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2586             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2587                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2588                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2589 
2590         /**
2591          * Constant for the "Combining Diacritical Marks Extended" Unicode
2592          * character block.
2593          * @since 9
2594          */
2595         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2596             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2597                              "COMBINING DIACRITICAL MARKS EXTENDED",
2598                              "COMBININGDIACRITICALMARKSEXTENDED");
2599 
2600         /**
2601          * Constant for the "Myanmar Extended-B" Unicode character block.
2602          * @since 9
2603          */
2604         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2605             new UnicodeBlock("MYANMAR_EXTENDED_B",
2606                              "MYANMAR EXTENDED-B",
2607                              "MYANMAREXTENDED-B");
2608 
2609         /**
2610          * Constant for the "Latin Extended-E" Unicode character block.
2611          * @since 9
2612          */
2613         public static final UnicodeBlock LATIN_EXTENDED_E =
2614             new UnicodeBlock("LATIN_EXTENDED_E",
2615                              "LATIN EXTENDED-E",
2616                              "LATINEXTENDED-E");
2617 
2618         /**
2619          * Constant for the "Coptic Epact Numbers" Unicode character block.
2620          * @since 9
2621          */
2622         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2623             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2624                              "COPTIC EPACT NUMBERS",
2625                              "COPTICEPACTNUMBERS");
2626 
2627         /**
2628          * Constant for the "Old Permic" Unicode character block.
2629          * @since 9
2630          */
2631         public static final UnicodeBlock OLD_PERMIC =
2632             new UnicodeBlock("OLD_PERMIC",
2633                              "OLD PERMIC",
2634                              "OLDPERMIC");
2635 
2636         /**
2637          * Constant for the "Elbasan" Unicode character block.
2638          * @since 9
2639          */
2640         public static final UnicodeBlock ELBASAN =
2641             new UnicodeBlock("ELBASAN");
2642 
2643         /**
2644          * Constant for the "Caucasian Albanian" Unicode character block.
2645          * @since 9
2646          */
2647         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2648             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2649                              "CAUCASIAN ALBANIAN",
2650                              "CAUCASIANALBANIAN");
2651 
2652         /**
2653          * Constant for the "Linear A" Unicode character block.
2654          * @since 9
2655          */
2656         public static final UnicodeBlock LINEAR_A =
2657             new UnicodeBlock("LINEAR_A",
2658                              "LINEAR A",
2659                              "LINEARA");
2660 
2661         /**
2662          * Constant for the "Palmyrene" Unicode character block.
2663          * @since 9
2664          */
2665         public static final UnicodeBlock PALMYRENE =
2666             new UnicodeBlock("PALMYRENE");
2667 
2668         /**
2669          * Constant for the "Nabataean" Unicode character block.
2670          * @since 9
2671          */
2672         public static final UnicodeBlock NABATAEAN =
2673             new UnicodeBlock("NABATAEAN");
2674 
2675         /**
2676          * Constant for the "Old North Arabian" Unicode character block.
2677          * @since 9
2678          */
2679         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2680             new UnicodeBlock("OLD_NORTH_ARABIAN",
2681                              "OLD NORTH ARABIAN",
2682                              "OLDNORTHARABIAN");
2683 
2684         /**
2685          * Constant for the "Manichaean" Unicode character block.
2686          * @since 9
2687          */
2688         public static final UnicodeBlock MANICHAEAN =
2689             new UnicodeBlock("MANICHAEAN");
2690 
2691         /**
2692          * Constant for the "Psalter Pahlavi" Unicode character block.
2693          * @since 9
2694          */
2695         public static final UnicodeBlock PSALTER_PAHLAVI =
2696             new UnicodeBlock("PSALTER_PAHLAVI",
2697                              "PSALTER PAHLAVI",
2698                              "PSALTERPAHLAVI");
2699 
2700         /**
2701          * Constant for the "Mahajani" Unicode character block.
2702          * @since 9
2703          */
2704         public static final UnicodeBlock MAHAJANI =
2705             new UnicodeBlock("MAHAJANI");
2706 
2707         /**
2708          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2709          * @since 9
2710          */
2711         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2712             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2713                              "SINHALA ARCHAIC NUMBERS",
2714                              "SINHALAARCHAICNUMBERS");
2715 
2716         /**
2717          * Constant for the "Khojki" Unicode character block.
2718          * @since 9
2719          */
2720         public static final UnicodeBlock KHOJKI =
2721             new UnicodeBlock("KHOJKI");
2722 
2723         /**
2724          * Constant for the "Khudawadi" Unicode character block.
2725          * @since 9
2726          */
2727         public static final UnicodeBlock KHUDAWADI =
2728             new UnicodeBlock("KHUDAWADI");
2729 
2730         /**
2731          * Constant for the "Grantha" Unicode character block.
2732          * @since 9
2733          */
2734         public static final UnicodeBlock GRANTHA =
2735             new UnicodeBlock("GRANTHA");
2736 
2737         /**
2738          * Constant for the "Tirhuta" Unicode character block.
2739          * @since 9
2740          */
2741         public static final UnicodeBlock TIRHUTA =
2742             new UnicodeBlock("TIRHUTA");
2743 
2744         /**
2745          * Constant for the "Siddham" Unicode character block.
2746          * @since 9
2747          */
2748         public static final UnicodeBlock SIDDHAM =
2749             new UnicodeBlock("SIDDHAM");
2750 
2751         /**
2752          * Constant for the "Modi" Unicode character block.
2753          * @since 9
2754          */
2755         public static final UnicodeBlock MODI =
2756             new UnicodeBlock("MODI");
2757 
2758         /**
2759          * Constant for the "Warang Citi" Unicode character block.
2760          * @since 9
2761          */
2762         public static final UnicodeBlock WARANG_CITI =
2763             new UnicodeBlock("WARANG_CITI",
2764                              "WARANG CITI",
2765                              "WARANGCITI");
2766 
2767         /**
2768          * Constant for the "Pau Cin Hau" Unicode character block.
2769          * @since 9
2770          */
2771         public static final UnicodeBlock PAU_CIN_HAU =
2772             new UnicodeBlock("PAU_CIN_HAU",
2773                              "PAU CIN HAU",
2774                              "PAUCINHAU");
2775 
2776         /**
2777          * Constant for the "Mro" Unicode character block.
2778          * @since 9
2779          */
2780         public static final UnicodeBlock MRO =
2781             new UnicodeBlock("MRO");
2782 
2783         /**
2784          * Constant for the "Bassa Vah" Unicode character block.
2785          * @since 9
2786          */
2787         public static final UnicodeBlock BASSA_VAH =
2788             new UnicodeBlock("BASSA_VAH",
2789                              "BASSA VAH",
2790                              "BASSAVAH");
2791 
2792         /**
2793          * Constant for the "Pahawh Hmong" Unicode character block.
2794          * @since 9
2795          */
2796         public static final UnicodeBlock PAHAWH_HMONG =
2797             new UnicodeBlock("PAHAWH_HMONG",
2798                              "PAHAWH HMONG",
2799                              "PAHAWHHMONG");
2800 
2801         /**
2802          * Constant for the "Duployan" Unicode character block.
2803          * @since 9
2804          */
2805         public static final UnicodeBlock DUPLOYAN =
2806             new UnicodeBlock("DUPLOYAN");
2807 
2808         /**
2809          * Constant for the "Shorthand Format Controls" Unicode character block.
2810          * @since 9
2811          */
2812         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2813             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2814                              "SHORTHAND FORMAT CONTROLS",
2815                              "SHORTHANDFORMATCONTROLS");
2816 
2817         /**
2818          * Constant for the "Mende Kikakui" Unicode character block.
2819          * @since 9
2820          */
2821         public static final UnicodeBlock MENDE_KIKAKUI =
2822             new UnicodeBlock("MENDE_KIKAKUI",
2823                              "MENDE KIKAKUI",
2824                              "MENDEKIKAKUI");
2825 
2826         /**
2827          * Constant for the "Ornamental Dingbats" Unicode character block.
2828          * @since 9
2829          */
2830         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2831             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2832                              "ORNAMENTAL DINGBATS",
2833                              "ORNAMENTALDINGBATS");
2834 
2835         /**
2836          * Constant for the "Geometric Shapes Extended" Unicode character block.
2837          * @since 9
2838          */
2839         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2840             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2841                              "GEOMETRIC SHAPES EXTENDED",
2842                              "GEOMETRICSHAPESEXTENDED");
2843 
2844         /**
2845          * Constant for the "Supplemental Arrows-C" Unicode character block.
2846          * @since 9
2847          */
2848         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2849             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2850                              "SUPPLEMENTAL ARROWS-C",
2851                              "SUPPLEMENTALARROWS-C");
2852 
2853         /**
2854          * Constant for the "Cherokee Supplement" Unicode character block.
2855          * @since 9
2856          */
2857         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2858             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2859                              "CHEROKEE SUPPLEMENT",
2860                              "CHEROKEESUPPLEMENT");
2861 
2862         /**
2863          * Constant for the "Hatran" Unicode character block.
2864          * @since 9
2865          */
2866         public static final UnicodeBlock HATRAN =
2867             new UnicodeBlock("HATRAN");
2868 
2869         /**
2870          * Constant for the "Old Hungarian" Unicode character block.
2871          * @since 9
2872          */
2873         public static final UnicodeBlock OLD_HUNGARIAN =
2874             new UnicodeBlock("OLD_HUNGARIAN",
2875                              "OLD HUNGARIAN",
2876                              "OLDHUNGARIAN");
2877 
2878         /**
2879          * Constant for the "Multani" Unicode character block.
2880          * @since 9
2881          */
2882         public static final UnicodeBlock MULTANI =
2883             new UnicodeBlock("MULTANI");
2884 
2885         /**
2886          * Constant for the "Ahom" Unicode character block.
2887          * @since 9
2888          */
2889         public static final UnicodeBlock AHOM =
2890             new UnicodeBlock("AHOM");
2891 
2892         /**
2893          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2894          * @since 9
2895          */
2896         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2897             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2898                              "EARLY DYNASTIC CUNEIFORM",
2899                              "EARLYDYNASTICCUNEIFORM");
2900 
2901         /**
2902          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2903          * @since 9
2904          */
2905         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2906             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2907                              "ANATOLIAN HIEROGLYPHS",
2908                              "ANATOLIANHIEROGLYPHS");
2909 
2910         /**
2911          * Constant for the "Sutton SignWriting" Unicode character block.
2912          * @since 9
2913          */
2914         public static final UnicodeBlock SUTTON_SIGNWRITING =
2915             new UnicodeBlock("SUTTON_SIGNWRITING",
2916                              "SUTTON SIGNWRITING",
2917                              "SUTTONSIGNWRITING");
2918 
2919         /**
2920          * Constant for the "Supplemental Symbols and Pictographs" Unicode
2921          * character block.
2922          * @since 9
2923          */
2924         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2925             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2926                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2927                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2928 
2929         /**
2930          * Constant for the "CJK Unified Ideographs Extension E" Unicode
2931          * character block.
2932          * @since 9
2933          */
2934         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2935             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2936                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2937                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2938 
2939         /**
2940          * Constant for the "Syriac Supplement" Unicode
2941          * character block.
2942          * @since 11
2943          */
2944         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2945             new UnicodeBlock("SYRIAC_SUPPLEMENT",
2946                              "SYRIAC SUPPLEMENT",
2947                              "SYRIACSUPPLEMENT");
2948 
2949         /**
2950          * Constant for the "Cyrillic Extended-C" Unicode
2951          * character block.
2952          * @since 11
2953          */
2954         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2955             new UnicodeBlock("CYRILLIC_EXTENDED_C",
2956                              "CYRILLIC EXTENDED-C",
2957                              "CYRILLICEXTENDED-C");
2958 
2959         /**
2960          * Constant for the "Osage" Unicode
2961          * character block.
2962          * @since 11
2963          */
2964         public static final UnicodeBlock OSAGE =
2965             new UnicodeBlock("OSAGE");
2966 
2967         /**
2968          * Constant for the "Newa" Unicode
2969          * character block.
2970          * @since 11
2971          */
2972         public static final UnicodeBlock NEWA =
2973             new UnicodeBlock("NEWA");
2974 
2975         /**
2976          * Constant for the "Mongolian Supplement" Unicode
2977          * character block.
2978          * @since 11
2979          */
2980         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2981             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2982                              "MONGOLIAN SUPPLEMENT",
2983                              "MONGOLIANSUPPLEMENT");
2984 
2985         /**
2986          * Constant for the "Marchen" Unicode
2987          * character block.
2988          * @since 11
2989          */
2990         public static final UnicodeBlock MARCHEN =
2991             new UnicodeBlock("MARCHEN");
2992 
2993         /**
2994          * Constant for the "Ideographic Symbols and Punctuation" Unicode
2995          * character block.
2996          * @since 11
2997          */
2998         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2999             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3000                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3001                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3002 
3003         /**
3004          * Constant for the "Tangut" Unicode
3005          * character block.
3006          * @since 11
3007          */
3008         public static final UnicodeBlock TANGUT =
3009             new UnicodeBlock("TANGUT");
3010 
3011         /**
3012          * Constant for the "Tangut Components" Unicode
3013          * character block.
3014          * @since 11
3015          */
3016         public static final UnicodeBlock TANGUT_COMPONENTS =
3017             new UnicodeBlock("TANGUT_COMPONENTS",
3018                              "TANGUT COMPONENTS",
3019                              "TANGUTCOMPONENTS");
3020 
3021         /**
3022          * Constant for the "Kana Extended-A" Unicode
3023          * character block.
3024          * @since 11
3025          */
3026         public static final UnicodeBlock KANA_EXTENDED_A =
3027             new UnicodeBlock("KANA_EXTENDED_A",
3028                              "KANA EXTENDED-A",
3029                              "KANAEXTENDED-A");
3030         /**
3031          * Constant for the "Glagolitic Supplement" Unicode
3032          * character block.
3033          * @since 11
3034          */
3035         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3036             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3037                              "GLAGOLITIC SUPPLEMENT",
3038                              "GLAGOLITICSUPPLEMENT");
3039         /**
3040          * Constant for the "Adlam" Unicode
3041          * character block.
3042          * @since 11
3043          */
3044         public static final UnicodeBlock ADLAM =
3045             new UnicodeBlock("ADLAM");
3046 
3047         /**
3048          * Constant for the "Masaram Gondi" Unicode
3049          * character block.
3050          * @since 11
3051          */
3052         public static final UnicodeBlock MASARAM_GONDI =
3053             new UnicodeBlock("MASARAM_GONDI",
3054                              "MASARAM GONDI",
3055                              "MASARAMGONDI");
3056 
3057         /**
3058          * Constant for the "Zanabazar Square" Unicode
3059          * character block.
3060          * @since 11
3061          */
3062         public static final UnicodeBlock ZANABAZAR_SQUARE =
3063             new UnicodeBlock("ZANABAZAR_SQUARE",
3064                              "ZANABAZAR SQUARE",
3065                              "ZANABAZARSQUARE");
3066 
3067         /**
3068          * Constant for the "Nushu" Unicode
3069          * character block.
3070          * @since 11
3071          */
3072         public static final UnicodeBlock NUSHU =
3073             new UnicodeBlock("NUSHU");
3074 
3075         /**
3076          * Constant for the "Soyombo" Unicode
3077          * character block.
3078          * @since 11
3079          */
3080         public static final UnicodeBlock SOYOMBO =
3081             new UnicodeBlock("SOYOMBO");
3082 
3083         /**
3084          * Constant for the "Bhaiksuki" Unicode
3085          * character block.
3086          * @since 11
3087          */
3088         public static final UnicodeBlock BHAIKSUKI =
3089             new UnicodeBlock("BHAIKSUKI");
3090 
3091         /**
3092          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3093          * character block.
3094          * @since 11
3095          */
3096         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3097             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3098                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3099                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3100 
3101         private static final int blockStarts[] = {
3102             0x0000,   // 0000..007F; Basic Latin
3103             0x0080,   // 0080..00FF; Latin-1 Supplement
3104             0x0100,   // 0100..017F; Latin Extended-A
3105             0x0180,   // 0180..024F; Latin Extended-B
3106             0x0250,   // 0250..02AF; IPA Extensions
3107             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3108             0x0300,   // 0300..036F; Combining Diacritical Marks
3109             0x0370,   // 0370..03FF; Greek and Coptic
3110             0x0400,   // 0400..04FF; Cyrillic
3111             0x0500,   // 0500..052F; Cyrillic Supplement
3112             0x0530,   // 0530..058F; Armenian
3113             0x0590,   // 0590..05FF; Hebrew
3114             0x0600,   // 0600..06FF; Arabic
3115             0x0700,   // 0700..074F; Syriac
3116             0x0750,   // 0750..077F; Arabic Supplement
3117             0x0780,   // 0780..07BF; Thaana
3118             0x07C0,   // 07C0..07FF; NKo
3119             0x0800,   // 0800..083F; Samaritan
3120             0x0840,   // 0840..085F; Mandaic
3121             0x0860,   // 0860..086F; Syriac Supplement
3122             0x0870,   //             unassigned
3123             0x08A0,   // 08A0..08FF; Arabic Extended-A
3124             0x0900,   // 0900..097F; Devanagari
3125             0x0980,   // 0980..09FF; Bengali
3126             0x0A00,   // 0A00..0A7F; Gurmukhi
3127             0x0A80,   // 0A80..0AFF; Gujarati
3128             0x0B00,   // 0B00..0B7F; Oriya
3129             0x0B80,   // 0B80..0BFF; Tamil
3130             0x0C00,   // 0C00..0C7F; Telugu
3131             0x0C80,   // 0C80..0CFF; Kannada
3132             0x0D00,   // 0D00..0D7F; Malayalam
3133             0x0D80,   // 0D80..0DFF; Sinhala
3134             0x0E00,   // 0E00..0E7F; Thai
3135             0x0E80,   // 0E80..0EFF; Lao
3136             0x0F00,   // 0F00..0FFF; Tibetan
3137             0x1000,   // 1000..109F; Myanmar
3138             0x10A0,   // 10A0..10FF; Georgian
3139             0x1100,   // 1100..11FF; Hangul Jamo
3140             0x1200,   // 1200..137F; Ethiopic
3141             0x1380,   // 1380..139F; Ethiopic Supplement
3142             0x13A0,   // 13A0..13FF; Cherokee
3143             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3144             0x1680,   // 1680..169F; Ogham
3145             0x16A0,   // 16A0..16FF; Runic
3146             0x1700,   // 1700..171F; Tagalog
3147             0x1720,   // 1720..173F; Hanunoo
3148             0x1740,   // 1740..175F; Buhid
3149             0x1760,   // 1760..177F; Tagbanwa
3150             0x1780,   // 1780..17FF; Khmer
3151             0x1800,   // 1800..18AF; Mongolian
3152             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3153             0x1900,   // 1900..194F; Limbu
3154             0x1950,   // 1950..197F; Tai Le
3155             0x1980,   // 1980..19DF; New Tai Lue
3156             0x19E0,   // 19E0..19FF; Khmer Symbols
3157             0x1A00,   // 1A00..1A1F; Buginese
3158             0x1A20,   // 1A20..1AAF; Tai Tham
3159             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3160             0x1B00,   // 1B00..1B7F; Balinese
3161             0x1B80,   // 1B80..1BBF; Sundanese
3162             0x1BC0,   // 1BC0..1BFF; Batak
3163             0x1C00,   // 1C00..1C4F; Lepcha
3164             0x1C50,   // 1C50..1C7F; Ol Chiki
3165             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3166             0x1C90,   //             unassigned
3167             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3168             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3169             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3170             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3171             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3172             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3173             0x1F00,   // 1F00..1FFF; Greek Extended
3174             0x2000,   // 2000..206F; General Punctuation
3175             0x2070,   // 2070..209F; Superscripts and Subscripts
3176             0x20A0,   // 20A0..20CF; Currency Symbols
3177             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3178             0x2100,   // 2100..214F; Letterlike Symbols
3179             0x2150,   // 2150..218F; Number Forms
3180             0x2190,   // 2190..21FF; Arrows
3181             0x2200,   // 2200..22FF; Mathematical Operators
3182             0x2300,   // 2300..23FF; Miscellaneous Technical
3183             0x2400,   // 2400..243F; Control Pictures
3184             0x2440,   // 2440..245F; Optical Character Recognition
3185             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3186             0x2500,   // 2500..257F; Box Drawing
3187             0x2580,   // 2580..259F; Block Elements
3188             0x25A0,   // 25A0..25FF; Geometric Shapes
3189             0x2600,   // 2600..26FF; Miscellaneous Symbols
3190             0x2700,   // 2700..27BF; Dingbats
3191             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3192             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3193             0x2800,   // 2800..28FF; Braille Patterns
3194             0x2900,   // 2900..297F; Supplemental Arrows-B
3195             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3196             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3197             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3198             0x2C00,   // 2C00..2C5F; Glagolitic
3199             0x2C60,   // 2C60..2C7F; Latin Extended-C
3200             0x2C80,   // 2C80..2CFF; Coptic
3201             0x2D00,   // 2D00..2D2F; Georgian Supplement
3202             0x2D30,   // 2D30..2D7F; Tifinagh
3203             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3204             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3205             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3206             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3207             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3208             0x2FE0,   //             unassigned
3209             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3210             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3211             0x3040,   // 3040..309F; Hiragana
3212             0x30A0,   // 30A0..30FF; Katakana
3213             0x3100,   // 3100..312F; Bopomofo
3214             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3215             0x3190,   // 3190..319F; Kanbun
3216             0x31A0,   // 31A0..31BF; Bopomofo Extended
3217             0x31C0,   // 31C0..31EF; CJK Strokes
3218             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3219             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3220             0x3300,   // 3300..33FF; CJK Compatibility
3221             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3222             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3223             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3224             0xA000,   // A000..A48F; Yi Syllables
3225             0xA490,   // A490..A4CF; Yi Radicals
3226             0xA4D0,   // A4D0..A4FF; Lisu
3227             0xA500,   // A500..A63F; Vai
3228             0xA640,   // A640..A69F; Cyrillic Extended-B
3229             0xA6A0,   // A6A0..A6FF; Bamum
3230             0xA700,   // A700..A71F; Modifier Tone Letters
3231             0xA720,   // A720..A7FF; Latin Extended-D
3232             0xA800,   // A800..A82F; Syloti Nagri
3233             0xA830,   // A830..A83F; Common Indic Number Forms
3234             0xA840,   // A840..A87F; Phags-pa
3235             0xA880,   // A880..A8DF; Saurashtra
3236             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3237             0xA900,   // A900..A92F; Kayah Li
3238             0xA930,   // A930..A95F; Rejang
3239             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3240             0xA980,   // A980..A9DF; Javanese
3241             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3242             0xAA00,   // AA00..AA5F; Cham
3243             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3244             0xAA80,   // AA80..AADF; Tai Viet
3245             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3246             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3247             0xAB30,   // AB30..AB6F; Latin Extended-E
3248             0xAB70,   // AB70..ABBF; Cherokee Supplement
3249             0xABC0,   // ABC0..ABFF; Meetei Mayek
3250             0xAC00,   // AC00..D7AF; Hangul Syllables
3251             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3252             0xD800,   // D800..DB7F; High Surrogates
3253             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3254             0xDC00,   // DC00..DFFF; Low Surrogates
3255             0xE000,   // E000..F8FF; Private Use Area
3256             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3257             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3258             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3259             0xFE00,   // FE00..FE0F; Variation Selectors
3260             0xFE10,   // FE10..FE1F; Vertical Forms
3261             0xFE20,   // FE20..FE2F; Combining Half Marks
3262             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3263             0xFE50,   // FE50..FE6F; Small Form Variants
3264             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3265             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3266             0xFFF0,   // FFF0..FFFF; Specials
3267             0x10000,  // 10000..1007F; Linear B Syllabary
3268             0x10080,  // 10080..100FF; Linear B Ideograms
3269             0x10100,  // 10100..1013F; Aegean Numbers
3270             0x10140,  // 10140..1018F; Ancient Greek Numbers
3271             0x10190,  // 10190..101CF; Ancient Symbols
3272             0x101D0,  // 101D0..101FF; Phaistos Disc
3273             0x10200,  //               unassigned
3274             0x10280,  // 10280..1029F; Lycian
3275             0x102A0,  // 102A0..102DF; Carian
3276             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3277             0x10300,  // 10300..1032F; Old Italic
3278             0x10330,  // 10330..1034F; Gothic
3279             0x10350,  // 10350..1037F; Old Permic
3280             0x10380,  // 10380..1039F; Ugaritic
3281             0x103A0,  // 103A0..103DF; Old Persian
3282             0x103E0,  //               unassigned
3283             0x10400,  // 10400..1044F; Deseret
3284             0x10450,  // 10450..1047F; Shavian
3285             0x10480,  // 10480..104AF; Osmanya
3286             0x104B0,  // 104B0..104FF; Osage
3287             0x10500,  // 10500..1052F; Elbasan
3288             0x10530,  // 10530..1056F; Caucasian Albanian
3289             0x10570,  //               unassigned
3290             0x10600,  // 10600..1077F; Linear A
3291             0x10780,  //               unassigned
3292             0x10800,  // 10800..1083F; Cypriot Syllabary
3293             0x10840,  // 10840..1085F; Imperial Aramaic
3294             0x10860,  // 10860..1087F; Palmyrene
3295             0x10880,  // 10880..108AF; Nabataean
3296             0x108B0,  //               unassigned
3297             0x108E0,  // 108E0..108FF; Hatran
3298             0x10900,  // 10900..1091F; Phoenician
3299             0x10920,  // 10920..1093F; Lydian
3300             0x10940,  //               unassigned
3301             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3302             0x109A0,  // 109A0..109FF; Meroitic Cursive
3303             0x10A00,  // 10A00..10A5F; Kharoshthi
3304             0x10A60,  // 10A60..10A7F; Old South Arabian
3305             0x10A80,  // 10A80..10A9F; Old North Arabian
3306             0x10AA0,  //               unassigned
3307             0x10AC0,  // 10AC0..10AFF; Manichaean
3308             0x10B00,  // 10B00..10B3F; Avestan
3309             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3310             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3311             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3312             0x10BB0,  //               unassigned
3313             0x10C00,  // 10C00..10C4F; Old Turkic
3314             0x10C50,  //               unassigned
3315             0x10C80,  // 10C80..10CFF; Old Hungarian
3316             0x10D00,  //               unassigned
3317             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3318             0x10E80,  //               unassigned
3319             0x11000,  // 11000..1107F; Brahmi
3320             0x11080,  // 11080..110CF; Kaithi
3321             0x110D0,  // 110D0..110FF; Sora Sompeng
3322             0x11100,  // 11100..1114F; Chakma
3323             0x11150,  // 11150..1117F; Mahajani
3324             0x11180,  // 11180..111DF; Sharada
3325             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3326             0x11200,  // 11200..1124F; Khojki
3327             0x11250,  //               unassigned
3328             0x11280,  // 11280..112AF; Multani
3329             0x112B0,  // 112B0..112FF; Khudawadi
3330             0x11300,  // 11300..1137F; Grantha
3331             0x11380,  //               unassigned
3332             0x11400,  // 11400..1147F; Newa
3333             0x11480,  // 11480..114DF; Tirhuta
3334             0x114E0,  //               unassigned
3335             0x11580,  // 11580..115FF; Siddham
3336             0x11600,  // 11600..1165F; Modi
3337             0x11660, //  11660..1167F; Mongolian Supplement
3338             0x11680,  // 11680..116CF; Takri
3339             0x116D0,  //               unassigned
3340             0x11700,  // 11700..1173F; Ahom
3341             0x11740,  //               unassigned
3342             0x118A0,  // 118A0..118FF; Warang Citi
3343             0x11900,  //               unassigned
3344             0x11A00,  // 11A00..11A4F; Zanabazar Square
3345             0x11A50,  // 11A50..11AAF; Soyombo
3346             0x11AB0,  //               unassigned
3347             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3348             0x11B00,  //               unassigned
3349             0x11C00,  // 11C00..11C6F; Bhaiksuki
3350             0x11C70,  // 11C70..11CBF; Marchen
3351             0x11CC0,  //               unassigned
3352             0x11D00,  // 11D00..11D5F; Masaram Gondi
3353             0x11D60,  //               unassigned
3354             0x12000,  // 12000..123FF; Cuneiform
3355             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3356             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3357             0x12550,  //               unassigned
3358             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3359             0x13430,  //               unassigned
3360             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3361             0x14680,  //               unassigned
3362             0x16800,  // 16800..16A3F; Bamum Supplement
3363             0x16A40,  // 16A40..16A6F; Mro
3364             0x16A70,  //               unassigned
3365             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3366             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3367             0x16B90,  //               unassigned
3368             0x16F00,  // 16F00..16F9F; Miao
3369             0x16FA0,  //               unassigned
3370             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3371             0x17000,  // 17000..187FF; Tangut
3372             0x18800,  // 18800..18AFF; Tangut Components
3373             0x18B00,  //               unassigned
3374             0x1B000,  // 1B000..1B0FF; Kana Supplement
3375             0x1B100,  // 1B100..1B12F; Kana Extended-A
3376             0x1B130,  //               unassigned
3377             0x1B170,  // 1B170..1B2FF; Nushu
3378             0x1B300,  //               unassigned
3379             0x1BC00,  // 1BC00..1BC9F; Duployan
3380             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3381             0x1BCB0,  //               unassigned
3382             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3383             0x1D100,  // 1D100..1D1FF; Musical Symbols
3384             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3385             0x1D250,  //               unassigned
3386             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3387             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3388             0x1D380,  //               unassigned
3389             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3390             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3391             0x1DAB0,  //               unassigned
3392             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3393             0x1E030,  //               unassigned
3394             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3395             0x1E8E0,  //               unassigned
3396             0x1E900,  // 1E900..1E95F; Adlam
3397             0x1E960,  //               unassigned
3398             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3399             0x1EF00,  //               unassigned
3400             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3401             0x1F030,  // 1F030..1F09F; Domino Tiles
3402             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3403             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3404             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3405             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3406             0x1F600,  // 1F600..1F64F; Emoticons
3407             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3408             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3409             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3410             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3411             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3412             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3413             0x1FA00,  //               unassigned
3414             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3415             0x2A6E0,  //               unassigned
3416             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3417             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3418             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3419             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3420             0x2EBF0,  //               unassigned
3421             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3422             0x2FA20,  //               unassigned
3423             0xE0000,  // E0000..E007F; Tags
3424             0xE0080,  //               unassigned
3425             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3426             0xE01F0,  //               unassigned
3427             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3428             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
3429         };
3430 
3431         private static final UnicodeBlock[] blocks = {
3432             BASIC_LATIN,
3433             LATIN_1_SUPPLEMENT,
3434             LATIN_EXTENDED_A,
3435             LATIN_EXTENDED_B,
3436             IPA_EXTENSIONS,
3437             SPACING_MODIFIER_LETTERS,
3438             COMBINING_DIACRITICAL_MARKS,
3439             GREEK,
3440             CYRILLIC,
3441             CYRILLIC_SUPPLEMENTARY,
3442             ARMENIAN,
3443             HEBREW,
3444             ARABIC,
3445             SYRIAC,
3446             ARABIC_SUPPLEMENT,
3447             THAANA,
3448             NKO,
3449             SAMARITAN,
3450             MANDAIC,
3451             SYRIAC_SUPPLEMENT,
3452             null,
3453             ARABIC_EXTENDED_A,
3454             DEVANAGARI,
3455             BENGALI,
3456             GURMUKHI,
3457             GUJARATI,
3458             ORIYA,
3459             TAMIL,
3460             TELUGU,
3461             KANNADA,
3462             MALAYALAM,
3463             SINHALA,
3464             THAI,
3465             LAO,
3466             TIBETAN,
3467             MYANMAR,
3468             GEORGIAN,
3469             HANGUL_JAMO,
3470             ETHIOPIC,
3471             ETHIOPIC_SUPPLEMENT,
3472             CHEROKEE,
3473             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3474             OGHAM,
3475             RUNIC,
3476             TAGALOG,
3477             HANUNOO,
3478             BUHID,
3479             TAGBANWA,
3480             KHMER,
3481             MONGOLIAN,
3482             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3483             LIMBU,
3484             TAI_LE,
3485             NEW_TAI_LUE,
3486             KHMER_SYMBOLS,
3487             BUGINESE,
3488             TAI_THAM,
3489             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3490             BALINESE,
3491             SUNDANESE,
3492             BATAK,
3493             LEPCHA,
3494             OL_CHIKI,
3495             CYRILLIC_EXTENDED_C,
3496             null,
3497             SUNDANESE_SUPPLEMENT,
3498             VEDIC_EXTENSIONS,
3499             PHONETIC_EXTENSIONS,
3500             PHONETIC_EXTENSIONS_SUPPLEMENT,
3501             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3502             LATIN_EXTENDED_ADDITIONAL,
3503             GREEK_EXTENDED,
3504             GENERAL_PUNCTUATION,
3505             SUPERSCRIPTS_AND_SUBSCRIPTS,
3506             CURRENCY_SYMBOLS,
3507             COMBINING_MARKS_FOR_SYMBOLS,
3508             LETTERLIKE_SYMBOLS,
3509             NUMBER_FORMS,
3510             ARROWS,
3511             MATHEMATICAL_OPERATORS,
3512             MISCELLANEOUS_TECHNICAL,
3513             CONTROL_PICTURES,
3514             OPTICAL_CHARACTER_RECOGNITION,
3515             ENCLOSED_ALPHANUMERICS,
3516             BOX_DRAWING,
3517             BLOCK_ELEMENTS,
3518             GEOMETRIC_SHAPES,
3519             MISCELLANEOUS_SYMBOLS,
3520             DINGBATS,
3521             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3522             SUPPLEMENTAL_ARROWS_A,
3523             BRAILLE_PATTERNS,
3524             SUPPLEMENTAL_ARROWS_B,
3525             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3526             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3527             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3528             GLAGOLITIC,
3529             LATIN_EXTENDED_C,
3530             COPTIC,
3531             GEORGIAN_SUPPLEMENT,
3532             TIFINAGH,
3533             ETHIOPIC_EXTENDED,
3534             CYRILLIC_EXTENDED_A,
3535             SUPPLEMENTAL_PUNCTUATION,
3536             CJK_RADICALS_SUPPLEMENT,
3537             KANGXI_RADICALS,
3538             null,
3539             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3540             CJK_SYMBOLS_AND_PUNCTUATION,
3541             HIRAGANA,
3542             KATAKANA,
3543             BOPOMOFO,
3544             HANGUL_COMPATIBILITY_JAMO,
3545             KANBUN,
3546             BOPOMOFO_EXTENDED,
3547             CJK_STROKES,
3548             KATAKANA_PHONETIC_EXTENSIONS,
3549             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3550             CJK_COMPATIBILITY,
3551             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3552             YIJING_HEXAGRAM_SYMBOLS,
3553             CJK_UNIFIED_IDEOGRAPHS,
3554             YI_SYLLABLES,
3555             YI_RADICALS,
3556             LISU,
3557             VAI,
3558             CYRILLIC_EXTENDED_B,
3559             BAMUM,
3560             MODIFIER_TONE_LETTERS,
3561             LATIN_EXTENDED_D,
3562             SYLOTI_NAGRI,
3563             COMMON_INDIC_NUMBER_FORMS,
3564             PHAGS_PA,
3565             SAURASHTRA,
3566             DEVANAGARI_EXTENDED,
3567             KAYAH_LI,
3568             REJANG,
3569             HANGUL_JAMO_EXTENDED_A,
3570             JAVANESE,
3571             MYANMAR_EXTENDED_B,
3572             CHAM,
3573             MYANMAR_EXTENDED_A,
3574             TAI_VIET,
3575             MEETEI_MAYEK_EXTENSIONS,
3576             ETHIOPIC_EXTENDED_A,
3577             LATIN_EXTENDED_E,
3578             CHEROKEE_SUPPLEMENT,
3579             MEETEI_MAYEK,
3580             HANGUL_SYLLABLES,
3581             HANGUL_JAMO_EXTENDED_B,
3582             HIGH_SURROGATES,
3583             HIGH_PRIVATE_USE_SURROGATES,
3584             LOW_SURROGATES,
3585             PRIVATE_USE_AREA,
3586             CJK_COMPATIBILITY_IDEOGRAPHS,
3587             ALPHABETIC_PRESENTATION_FORMS,
3588             ARABIC_PRESENTATION_FORMS_A,
3589             VARIATION_SELECTORS,
3590             VERTICAL_FORMS,
3591             COMBINING_HALF_MARKS,
3592             CJK_COMPATIBILITY_FORMS,
3593             SMALL_FORM_VARIANTS,
3594             ARABIC_PRESENTATION_FORMS_B,
3595             HALFWIDTH_AND_FULLWIDTH_FORMS,
3596             SPECIALS,
3597             LINEAR_B_SYLLABARY,
3598             LINEAR_B_IDEOGRAMS,
3599             AEGEAN_NUMBERS,
3600             ANCIENT_GREEK_NUMBERS,
3601             ANCIENT_SYMBOLS,
3602             PHAISTOS_DISC,
3603             null,
3604             LYCIAN,
3605             CARIAN,
3606             COPTIC_EPACT_NUMBERS,
3607             OLD_ITALIC,
3608             GOTHIC,
3609             OLD_PERMIC,
3610             UGARITIC,
3611             OLD_PERSIAN,
3612             null,
3613             DESERET,
3614             SHAVIAN,
3615             OSMANYA,
3616             OSAGE,
3617             ELBASAN,
3618             CAUCASIAN_ALBANIAN,
3619             null,
3620             LINEAR_A,
3621             null,
3622             CYPRIOT_SYLLABARY,
3623             IMPERIAL_ARAMAIC,
3624             PALMYRENE,
3625             NABATAEAN,
3626             null,
3627             HATRAN,
3628             PHOENICIAN,
3629             LYDIAN,
3630             null,
3631             MEROITIC_HIEROGLYPHS,
3632             MEROITIC_CURSIVE,
3633             KHAROSHTHI,
3634             OLD_SOUTH_ARABIAN,
3635             OLD_NORTH_ARABIAN,
3636             null,
3637             MANICHAEAN,
3638             AVESTAN,
3639             INSCRIPTIONAL_PARTHIAN,
3640             INSCRIPTIONAL_PAHLAVI,
3641             PSALTER_PAHLAVI,
3642             null,
3643             OLD_TURKIC,
3644             null,
3645             OLD_HUNGARIAN,
3646             null,
3647             RUMI_NUMERAL_SYMBOLS,
3648             null,
3649             BRAHMI,
3650             KAITHI,
3651             SORA_SOMPENG,
3652             CHAKMA,
3653             MAHAJANI,
3654             SHARADA,
3655             SINHALA_ARCHAIC_NUMBERS,
3656             KHOJKI,
3657             null,
3658             MULTANI,
3659             KHUDAWADI,
3660             GRANTHA,
3661             null,
3662             NEWA,
3663             TIRHUTA,
3664             null,
3665             SIDDHAM,
3666             MODI,
3667             MONGOLIAN_SUPPLEMENT,
3668             TAKRI,
3669             null,
3670             AHOM,
3671             null,
3672             WARANG_CITI,
3673             null,
3674             ZANABAZAR_SQUARE,
3675             SOYOMBO,
3676             null,
3677             PAU_CIN_HAU,
3678             null,
3679             BHAIKSUKI,
3680             MARCHEN,
3681             null,
3682             MASARAM_GONDI,
3683             null,
3684             CUNEIFORM,
3685             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3686             EARLY_DYNASTIC_CUNEIFORM,
3687             null,
3688             EGYPTIAN_HIEROGLYPHS,
3689             null,
3690             ANATOLIAN_HIEROGLYPHS,
3691             null,
3692             BAMUM_SUPPLEMENT,
3693             MRO,
3694             null,
3695             BASSA_VAH,
3696             PAHAWH_HMONG,
3697             null,
3698             MIAO,
3699             null,
3700             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3701             TANGUT,
3702             TANGUT_COMPONENTS,
3703             null,
3704             KANA_SUPPLEMENT,
3705             KANA_EXTENDED_A,
3706             null,
3707             NUSHU,
3708             null,
3709             DUPLOYAN,
3710             SHORTHAND_FORMAT_CONTROLS,
3711             null,
3712             BYZANTINE_MUSICAL_SYMBOLS,
3713             MUSICAL_SYMBOLS,
3714             ANCIENT_GREEK_MUSICAL_NOTATION,
3715             null,
3716             TAI_XUAN_JING_SYMBOLS,
3717             COUNTING_ROD_NUMERALS,
3718             null,
3719             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3720             SUTTON_SIGNWRITING,
3721             null,
3722             GLAGOLITIC_SUPPLEMENT,
3723             null,
3724             MENDE_KIKAKUI,
3725             null,
3726             ADLAM,
3727             null,
3728             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3729             null,
3730             MAHJONG_TILES,
3731             DOMINO_TILES,
3732             PLAYING_CARDS,
3733             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3734             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3735             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3736             EMOTICONS,
3737             ORNAMENTAL_DINGBATS,
3738             TRANSPORT_AND_MAP_SYMBOLS,
3739             ALCHEMICAL_SYMBOLS,
3740             GEOMETRIC_SHAPES_EXTENDED,
3741             SUPPLEMENTAL_ARROWS_C,
3742             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3743             null,
3744             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3745             null,
3746             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3747             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3748             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3749             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3750             null,
3751             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3752             null,
3753             TAGS,
3754             null,
3755             VARIATION_SELECTORS_SUPPLEMENT,
3756             null,
3757             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3758             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3759         };
3760 
3761 
3762         /**
3763          * Returns the object representing the Unicode block containing the
3764          * given character, or {@code null} if the character is not a
3765          * member of a defined block.
3766          *
3767          * <p><b>Note:</b> This method cannot handle
3768          * <a href="Character.html#supplementary"> supplementary
3769          * characters</a>.  To support all Unicode characters, including
3770          * supplementary characters, use the {@link #of(int)} method.
3771          *
3772          * @param   c  The character in question
3773          * @return  The {@code UnicodeBlock} instance representing the
3774          *          Unicode block of which this character is a member, or
3775          *          {@code null} if the character is not a member of any
3776          *          Unicode block
3777          */
3778         public static UnicodeBlock of(char c) {
3779             return of((int)c);
3780         }
3781 
3782         /**
3783          * Returns the object representing the Unicode block
3784          * containing the given character (Unicode code point), or
3785          * {@code null} if the character is not a member of a
3786          * defined block.
3787          *
3788          * @param   codePoint the character (Unicode code point) in question.
3789          * @return  The {@code UnicodeBlock} instance representing the
3790          *          Unicode block of which this character is a member, or
3791          *          {@code null} if the character is not a member of any
3792          *          Unicode block
3793          * @exception IllegalArgumentException if the specified
3794          * {@code codePoint} is an invalid Unicode code point.
3795          * @see Character#isValidCodePoint(int)
3796          * @since   1.5
3797          */
3798         public static UnicodeBlock of(int codePoint) {
3799             if (!isValidCodePoint(codePoint)) {
3800                 throw new IllegalArgumentException();
3801             }
3802 
3803             int top, bottom, current;
3804             bottom = 0;
3805             top = blockStarts.length;
3806             current = top/2;
3807 
3808             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3809             while (top - bottom > 1) {
3810                 if (codePoint >= blockStarts[current]) {
3811                     bottom = current;
3812                 } else {
3813                     top = current;
3814                 }
3815                 current = (top + bottom) / 2;
3816             }
3817             return blocks[current];
3818         }
3819 
3820         /**
3821          * Returns the UnicodeBlock with the given name. Block
3822          * names are determined by The Unicode Standard. The file
3823          * {@code Blocks-<version>.txt} defines blocks for a particular
3824          * version of the standard. The {@link Character} class specifies
3825          * the version of the standard that it supports.
3826          * <p>
3827          * This method accepts block names in the following forms:
3828          * <ol>
3829          * <li> Canonical block names as defined by the Unicode Standard.
3830          * For example, the standard defines a "Basic Latin" block. Therefore, this
3831          * method accepts "Basic Latin" as a valid block name. The documentation of
3832          * each UnicodeBlock provides the canonical name.
3833          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3834          * is a valid block name for the "Basic Latin" block.
3835          * <li>The text representation of each constant UnicodeBlock identifier.
3836          * For example, this method will return the {@link #BASIC_LATIN} block if
3837          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3838          * hyphens in the canonical name with underscores.
3839          * </ol>
3840          * Finally, character case is ignored for all of the valid block name forms.
3841          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3842          * The en_US locale's case mapping rules are used to provide case-insensitive
3843          * string comparisons for block name validation.
3844          * <p>
3845          * If the Unicode Standard changes block names, both the previous and
3846          * current names will be accepted.
3847          *
3848          * @param blockName A {@code UnicodeBlock} name.
3849          * @return The {@code UnicodeBlock} instance identified
3850          *         by {@code blockName}
3851          * @throws IllegalArgumentException if {@code blockName} is an
3852          *         invalid name
3853          * @throws NullPointerException if {@code blockName} is null
3854          * @since 1.5
3855          */
3856         public static final UnicodeBlock forName(String blockName) {
3857             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3858             if (block == null) {
3859                 throw new IllegalArgumentException();
3860             }
3861             return block;
3862         }
3863     }
3864 
3865 
3866     /**
3867      * A family of character subsets representing the character scripts
3868      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3869      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3870      * character is assigned to a single Unicode script, either a specific
3871      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3872      * one of the following three special values,
3873      * {@link Character.UnicodeScript#INHERITED Inherited},
3874      * {@link Character.UnicodeScript#COMMON Common} or
3875      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3876      *
3877      * @since 1.7
3878      */
3879     public static enum UnicodeScript {
3880         /**
3881          * Unicode script "Common".
3882          */
3883         COMMON,
3884 
3885         /**
3886          * Unicode script "Latin".
3887          */
3888         LATIN,
3889 
3890         /**
3891          * Unicode script "Greek".
3892          */
3893         GREEK,
3894 
3895         /**
3896          * Unicode script "Cyrillic".
3897          */
3898         CYRILLIC,
3899 
3900         /**
3901          * Unicode script "Armenian".
3902          */
3903         ARMENIAN,
3904 
3905         /**
3906          * Unicode script "Hebrew".
3907          */
3908         HEBREW,
3909 
3910         /**
3911          * Unicode script "Arabic".
3912          */
3913         ARABIC,
3914 
3915         /**
3916          * Unicode script "Syriac".
3917          */
3918         SYRIAC,
3919 
3920         /**
3921          * Unicode script "Thaana".
3922          */
3923         THAANA,
3924 
3925         /**
3926          * Unicode script "Devanagari".
3927          */
3928         DEVANAGARI,
3929 
3930         /**
3931          * Unicode script "Bengali".
3932          */
3933         BENGALI,
3934 
3935         /**
3936          * Unicode script "Gurmukhi".
3937          */
3938         GURMUKHI,
3939 
3940         /**
3941          * Unicode script "Gujarati".
3942          */
3943         GUJARATI,
3944 
3945         /**
3946          * Unicode script "Oriya".
3947          */
3948         ORIYA,
3949 
3950         /**
3951          * Unicode script "Tamil".
3952          */
3953         TAMIL,
3954 
3955         /**
3956          * Unicode script "Telugu".
3957          */
3958         TELUGU,
3959 
3960         /**
3961          * Unicode script "Kannada".
3962          */
3963         KANNADA,
3964 
3965         /**
3966          * Unicode script "Malayalam".
3967          */
3968         MALAYALAM,
3969 
3970         /**
3971          * Unicode script "Sinhala".
3972          */
3973         SINHALA,
3974 
3975         /**
3976          * Unicode script "Thai".
3977          */
3978         THAI,
3979 
3980         /**
3981          * Unicode script "Lao".
3982          */
3983         LAO,
3984 
3985         /**
3986          * Unicode script "Tibetan".
3987          */
3988         TIBETAN,
3989 
3990         /**
3991          * Unicode script "Myanmar".
3992          */
3993         MYANMAR,
3994 
3995         /**
3996          * Unicode script "Georgian".
3997          */
3998         GEORGIAN,
3999 
4000         /**
4001          * Unicode script "Hangul".
4002          */
4003         HANGUL,
4004 
4005         /**
4006          * Unicode script "Ethiopic".
4007          */
4008         ETHIOPIC,
4009 
4010         /**
4011          * Unicode script "Cherokee".
4012          */
4013         CHEROKEE,
4014 
4015         /**
4016          * Unicode script "Canadian_Aboriginal".
4017          */
4018         CANADIAN_ABORIGINAL,
4019 
4020         /**
4021          * Unicode script "Ogham".
4022          */
4023         OGHAM,
4024 
4025         /**
4026          * Unicode script "Runic".
4027          */
4028         RUNIC,
4029 
4030         /**
4031          * Unicode script "Khmer".
4032          */
4033         KHMER,
4034 
4035         /**
4036          * Unicode script "Mongolian".
4037          */
4038         MONGOLIAN,
4039 
4040         /**
4041          * Unicode script "Hiragana".
4042          */
4043         HIRAGANA,
4044 
4045         /**
4046          * Unicode script "Katakana".
4047          */
4048         KATAKANA,
4049 
4050         /**
4051          * Unicode script "Bopomofo".
4052          */
4053         BOPOMOFO,
4054 
4055         /**
4056          * Unicode script "Han".
4057          */
4058         HAN,
4059 
4060         /**
4061          * Unicode script "Yi".
4062          */
4063         YI,
4064 
4065         /**
4066          * Unicode script "Old_Italic".
4067          */
4068         OLD_ITALIC,
4069 
4070         /**
4071          * Unicode script "Gothic".
4072          */
4073         GOTHIC,
4074 
4075         /**
4076          * Unicode script "Deseret".
4077          */
4078         DESERET,
4079 
4080         /**
4081          * Unicode script "Inherited".
4082          */
4083         INHERITED,
4084 
4085         /**
4086          * Unicode script "Tagalog".
4087          */
4088         TAGALOG,
4089 
4090         /**
4091          * Unicode script "Hanunoo".
4092          */
4093         HANUNOO,
4094 
4095         /**
4096          * Unicode script "Buhid".
4097          */
4098         BUHID,
4099 
4100         /**
4101          * Unicode script "Tagbanwa".
4102          */
4103         TAGBANWA,
4104 
4105         /**
4106          * Unicode script "Limbu".
4107          */
4108         LIMBU,
4109 
4110         /**
4111          * Unicode script "Tai_Le".
4112          */
4113         TAI_LE,
4114 
4115         /**
4116          * Unicode script "Linear_B".
4117          */
4118         LINEAR_B,
4119 
4120         /**
4121          * Unicode script "Ugaritic".
4122          */
4123         UGARITIC,
4124 
4125         /**
4126          * Unicode script "Shavian".
4127          */
4128         SHAVIAN,
4129 
4130         /**
4131          * Unicode script "Osmanya".
4132          */
4133         OSMANYA,
4134 
4135         /**
4136          * Unicode script "Cypriot".
4137          */
4138         CYPRIOT,
4139 
4140         /**
4141          * Unicode script "Braille".
4142          */
4143         BRAILLE,
4144 
4145         /**
4146          * Unicode script "Buginese".
4147          */
4148         BUGINESE,
4149 
4150         /**
4151          * Unicode script "Coptic".
4152          */
4153         COPTIC,
4154 
4155         /**
4156          * Unicode script "New_Tai_Lue".
4157          */
4158         NEW_TAI_LUE,
4159 
4160         /**
4161          * Unicode script "Glagolitic".
4162          */
4163         GLAGOLITIC,
4164 
4165         /**
4166          * Unicode script "Tifinagh".
4167          */
4168         TIFINAGH,
4169 
4170         /**
4171          * Unicode script "Syloti_Nagri".
4172          */
4173         SYLOTI_NAGRI,
4174 
4175         /**
4176          * Unicode script "Old_Persian".
4177          */
4178         OLD_PERSIAN,
4179 
4180         /**
4181          * Unicode script "Kharoshthi".
4182          */
4183         KHAROSHTHI,
4184 
4185         /**
4186          * Unicode script "Balinese".
4187          */
4188         BALINESE,
4189 
4190         /**
4191          * Unicode script "Cuneiform".
4192          */
4193         CUNEIFORM,
4194 
4195         /**
4196          * Unicode script "Phoenician".
4197          */
4198         PHOENICIAN,
4199 
4200         /**
4201          * Unicode script "Phags_Pa".
4202          */
4203         PHAGS_PA,
4204 
4205         /**
4206          * Unicode script "Nko".
4207          */
4208         NKO,
4209 
4210         /**
4211          * Unicode script "Sundanese".
4212          */
4213         SUNDANESE,
4214 
4215         /**
4216          * Unicode script "Batak".
4217          */
4218         BATAK,
4219 
4220         /**
4221          * Unicode script "Lepcha".
4222          */
4223         LEPCHA,
4224 
4225         /**
4226          * Unicode script "Ol_Chiki".
4227          */
4228         OL_CHIKI,
4229 
4230         /**
4231          * Unicode script "Vai".
4232          */
4233         VAI,
4234 
4235         /**
4236          * Unicode script "Saurashtra".
4237          */
4238         SAURASHTRA,
4239 
4240         /**
4241          * Unicode script "Kayah_Li".
4242          */
4243         KAYAH_LI,
4244 
4245         /**
4246          * Unicode script "Rejang".
4247          */
4248         REJANG,
4249 
4250         /**
4251          * Unicode script "Lycian".
4252          */
4253         LYCIAN,
4254 
4255         /**
4256          * Unicode script "Carian".
4257          */
4258         CARIAN,
4259 
4260         /**
4261          * Unicode script "Lydian".
4262          */
4263         LYDIAN,
4264 
4265         /**
4266          * Unicode script "Cham".
4267          */
4268         CHAM,
4269 
4270         /**
4271          * Unicode script "Tai_Tham".
4272          */
4273         TAI_THAM,
4274 
4275         /**
4276          * Unicode script "Tai_Viet".
4277          */
4278         TAI_VIET,
4279 
4280         /**
4281          * Unicode script "Avestan".
4282          */
4283         AVESTAN,
4284 
4285         /**
4286          * Unicode script "Egyptian_Hieroglyphs".
4287          */
4288         EGYPTIAN_HIEROGLYPHS,
4289 
4290         /**
4291          * Unicode script "Samaritan".
4292          */
4293         SAMARITAN,
4294 
4295         /**
4296          * Unicode script "Mandaic".
4297          */
4298         MANDAIC,
4299 
4300         /**
4301          * Unicode script "Lisu".
4302          */
4303         LISU,
4304 
4305         /**
4306          * Unicode script "Bamum".
4307          */
4308         BAMUM,
4309 
4310         /**
4311          * Unicode script "Javanese".
4312          */
4313         JAVANESE,
4314 
4315         /**
4316          * Unicode script "Meetei_Mayek".
4317          */
4318         MEETEI_MAYEK,
4319 
4320         /**
4321          * Unicode script "Imperial_Aramaic".
4322          */
4323         IMPERIAL_ARAMAIC,
4324 
4325         /**
4326          * Unicode script "Old_South_Arabian".
4327          */
4328         OLD_SOUTH_ARABIAN,
4329 
4330         /**
4331          * Unicode script "Inscriptional_Parthian".
4332          */
4333         INSCRIPTIONAL_PARTHIAN,
4334 
4335         /**
4336          * Unicode script "Inscriptional_Pahlavi".
4337          */
4338         INSCRIPTIONAL_PAHLAVI,
4339 
4340         /**
4341          * Unicode script "Old_Turkic".
4342          */
4343         OLD_TURKIC,
4344 
4345         /**
4346          * Unicode script "Brahmi".
4347          */
4348         BRAHMI,
4349 
4350         /**
4351          * Unicode script "Kaithi".
4352          */
4353         KAITHI,
4354 
4355         /**
4356          * Unicode script "Meroitic Hieroglyphs".
4357          * @since 1.8
4358          */
4359         MEROITIC_HIEROGLYPHS,
4360 
4361         /**
4362          * Unicode script "Meroitic Cursive".
4363          * @since 1.8
4364          */
4365         MEROITIC_CURSIVE,
4366 
4367         /**
4368          * Unicode script "Sora Sompeng".
4369          * @since 1.8
4370          */
4371         SORA_SOMPENG,
4372 
4373         /**
4374          * Unicode script "Chakma".
4375          * @since 1.8
4376          */
4377         CHAKMA,
4378 
4379         /**
4380          * Unicode script "Sharada".
4381          * @since 1.8
4382          */
4383         SHARADA,
4384 
4385         /**
4386          * Unicode script "Takri".
4387          * @since 1.8
4388          */
4389         TAKRI,
4390 
4391         /**
4392          * Unicode script "Miao".
4393          * @since 1.8
4394          */
4395         MIAO,
4396 
4397         /**
4398          * Unicode script "Caucasian Albanian".
4399          * @since 9
4400          */
4401         CAUCASIAN_ALBANIAN,
4402 
4403         /**
4404          * Unicode script "Bassa Vah".
4405          * @since 9
4406          */
4407         BASSA_VAH,
4408 
4409         /**
4410          * Unicode script "Duployan".
4411          * @since 9
4412          */
4413         DUPLOYAN,
4414 
4415         /**
4416          * Unicode script "Elbasan".
4417          * @since 9
4418          */
4419         ELBASAN,
4420 
4421         /**
4422          * Unicode script "Grantha".
4423          * @since 9
4424          */
4425         GRANTHA,
4426 
4427         /**
4428          * Unicode script "Pahawh Hmong".
4429          * @since 9
4430          */
4431         PAHAWH_HMONG,
4432 
4433         /**
4434          * Unicode script "Khojki".
4435          * @since 9
4436          */
4437         KHOJKI,
4438 
4439         /**
4440          * Unicode script "Linear A".
4441          * @since 9
4442          */
4443         LINEAR_A,
4444 
4445         /**
4446          * Unicode script "Mahajani".
4447          * @since 9
4448          */
4449         MAHAJANI,
4450 
4451         /**
4452          * Unicode script "Manichaean".
4453          * @since 9
4454          */
4455         MANICHAEAN,
4456 
4457         /**
4458          * Unicode script "Mende Kikakui".
4459          * @since 9
4460          */
4461         MENDE_KIKAKUI,
4462 
4463         /**
4464          * Unicode script "Modi".
4465          * @since 9
4466          */
4467         MODI,
4468 
4469         /**
4470          * Unicode script "Mro".
4471          * @since 9
4472          */
4473         MRO,
4474 
4475         /**
4476          * Unicode script "Old North Arabian".
4477          * @since 9
4478          */
4479         OLD_NORTH_ARABIAN,
4480 
4481         /**
4482          * Unicode script "Nabataean".
4483          * @since 9
4484          */
4485         NABATAEAN,
4486 
4487         /**
4488          * Unicode script "Palmyrene".
4489          * @since 9
4490          */
4491         PALMYRENE,
4492 
4493         /**
4494          * Unicode script "Pau Cin Hau".
4495          * @since 9
4496          */
4497         PAU_CIN_HAU,
4498 
4499         /**
4500          * Unicode script "Old Permic".
4501          * @since 9
4502          */
4503         OLD_PERMIC,
4504 
4505         /**
4506          * Unicode script "Psalter Pahlavi".
4507          * @since 9
4508          */
4509         PSALTER_PAHLAVI,
4510 
4511         /**
4512          * Unicode script "Siddham".
4513          * @since 9
4514          */
4515         SIDDHAM,
4516 
4517         /**
4518          * Unicode script "Khudawadi".
4519          * @since 9
4520          */
4521         KHUDAWADI,
4522 
4523         /**
4524          * Unicode script "Tirhuta".
4525          * @since 9
4526          */
4527         TIRHUTA,
4528 
4529         /**
4530          * Unicode script "Warang Citi".
4531          * @since 9
4532          */
4533         WARANG_CITI,
4534 
4535          /**
4536          * Unicode script "Ahom".
4537          * @since 9
4538          */
4539         AHOM,
4540 
4541         /**
4542          * Unicode script "Anatolian Hieroglyphs".
4543          * @since 9
4544          */
4545         ANATOLIAN_HIEROGLYPHS,
4546 
4547         /**
4548          * Unicode script "Hatran".
4549          * @since 9
4550          */
4551         HATRAN,
4552 
4553         /**
4554          * Unicode script "Multani".
4555          * @since 9
4556          */
4557         MULTANI,
4558 
4559         /**
4560          * Unicode script "Old Hungarian".
4561          * @since 9
4562          */
4563         OLD_HUNGARIAN,
4564 
4565         /**
4566          * Unicode script "SignWriting".
4567          * @since 9
4568          */
4569         SIGNWRITING,
4570 
4571         /**
4572           * Unicode script "Adlam".
4573           * @since 11
4574           */
4575         ADLAM,
4576 
4577         /**
4578           * Unicode script "Bhaiksuki".
4579           * @since 11
4580           */
4581         BHAIKSUKI,
4582 
4583         /**
4584           * Unicode script "Marchen".
4585           * @since 11
4586           */
4587         MARCHEN,
4588 
4589         /**
4590           * Unicode script "Newa".
4591           * @since 11
4592           */
4593         NEWA,
4594 
4595         /**
4596           * Unicode script "Osage".
4597           * @since 11
4598           */
4599         OSAGE,
4600 
4601         /**
4602           * Unicode script "Tangut".
4603           * @since 11
4604           */
4605         TANGUT,
4606 
4607         /**
4608           * Unicode script "Masaram Gondi".
4609           * @since 11
4610           */
4611         MASARAM_GONDI,
4612 
4613         /**
4614           * Unicode script "Nushu".
4615           * @since 11
4616           */
4617         NUSHU,
4618 
4619         /**
4620           * Unicode script "Soyombo".
4621           * @since 11
4622           */
4623         SOYOMBO,
4624 
4625         /**
4626           * Unicode script "Zanabazar Square".
4627           * @since 11
4628           */
4629         ZANABAZAR_SQUARE,
4630 
4631         /**
4632          * Unicode script "Unknown".
4633          */
4634         UNKNOWN;
4635 
4636         private static final int[] scriptStarts = {
4637             0x0000,   // 0000..0040; COMMON
4638             0x0041,   // 0041..005A; LATIN
4639             0x005B,   // 005B..0060; COMMON
4640             0x0061,   // 0061..007A; LATIN
4641             0x007B,   // 007B..00A9; COMMON
4642             0x00AA,   // 00AA      ; LATIN
4643             0x00AB,   // 00AB..00B9; COMMON
4644             0x00BA,   // 00BA      ; LATIN
4645             0x00BB,   // 00BB..00BF; COMMON
4646             0x00C0,   // 00C0..00D6; LATIN
4647             0x00D7,   // 00D7      ; COMMON
4648             0x00D8,   // 00D8..00F6; LATIN
4649             0x00F7,   // 00F7      ; COMMON
4650             0x00F8,   // 00F8..02B8; LATIN
4651             0x02B9,   // 02B9..02DF; COMMON
4652             0x02E0,   // 02E0..02E4; LATIN
4653             0x02E5,   // 02E5..02E9; COMMON
4654             0x02EA,   // 02EA..02EB; BOPOMOFO
4655             0x02EC,   // 02EC..02FF; COMMON
4656             0x0300,   // 0300..036F; INHERITED
4657             0x0370,   // 0370..0373; GREEK
4658             0x0374,   // 0374      ; COMMON
4659             0x0375,   // 0375..0377; GREEK
4660             0x0378,   // 0378..0379; UNKNOWN
4661             0x037A,   // 037A..037D; GREEK
4662             0x037E,   // 037E      ; COMMON
4663             0x037F,   // 037F      ; GREEK
4664             0x0380,   // 0380..0383; UNKNOWN
4665             0x0384,   // 0384      ; GREEK
4666             0x0385,   // 0385      ; COMMON
4667             0x0386,   // 0386      ; GREEK
4668             0x0387,   // 0387      ; COMMON
4669             0x0388,   // 0388..038A; GREEK
4670             0x038B,   // 038B      ; UNKNOWN
4671             0x038C,   // 038C      ; GREEK
4672             0x038D,   // 038D      ; UNKNOWN
4673             0x038E,   // 038E..03A1; GREEK
4674             0x03A2,   // 03A2      ; UNKNOWN
4675             0x03A3,   // 03A3..03E1; GREEK
4676             0x03E2,   // 03E2..03EF; COPTIC
4677             0x03F0,   // 03F0..03FF; GREEK
4678             0x0400,   // 0400..0484; CYRILLIC
4679             0x0485,   // 0485..0486; INHERITED
4680             0x0487,   // 0487..052F; CYRILLIC
4681             0x0530,   // 0530      ; UNKNOWN
4682             0x0531,   // 0531..0556; ARMENIAN
4683             0x0557,   // 0557..0558; UNKNOWN
4684             0x0559,   // 0559..055F; ARMENIAN
4685             0x0560,   // 0560      ; UNKNOWN
4686             0x0561,   // 0561..0587; ARMENIAN
4687             0x0588,   // 0588      ; UNKNOWN
4688             0x0589,   // 0589      ; COMMON
4689             0x058A,   // 058A      ; ARMENIAN
4690             0x058B,   // 058B..058C; UNKNOWN
4691             0x058D,   // 058D..058F; ARMENIAN
4692             0x0590,   // 0590      ; UNKNOWN
4693             0x0591,   // 0591..05C7; HEBREW
4694             0x05C8,   // 05C8..05CF; UNKNOWN
4695             0x05D0,   // 05D0..05EA; HEBREW
4696             0x05EB,   // 05EB..05EF; UNKNOWN
4697             0x05F0,   // 05F0..05F4; HEBREW
4698             0x05F5,   // 05F5..05FF; UNKNOWN
4699             0x0600,   // 0600..0604; ARABIC
4700             0x0605,   // 0605      ; COMMON
4701             0x0606,   // 0606..060B; ARABIC
4702             0x060C,   // 060C      ; COMMON
4703             0x060D,   // 060D..061A; ARABIC
4704             0x061B,   // 061B      ; COMMON
4705             0x061C,   // 061C      ; ARABIC
4706             0x061D,   // 061D      ; UNKNOWN
4707             0x061E,   // 061E      ; ARABIC
4708             0x061F,   // 061F      ; COMMON
4709             0x0620,   // 0620..063F; ARABIC
4710             0x0640,   // 0640      ; COMMON
4711             0x0641,   // 0641..064A; ARABIC
4712             0x064B,   // 064B..0655; INHERITED
4713             0x0656,   // 0656..066F; ARABIC
4714             0x0670,   // 0670      ; INHERITED
4715             0x0671,   // 0671..06DC; ARABIC
4716             0x06DD,   // 06DD      ; COMMON
4717             0x06DE,   // 06DE..06FF; ARABIC
4718             0x0700,   // 0700..070D; SYRIAC
4719             0x070E,   // 070E      ; UNKNOWN
4720             0x070F,   // 070F..074A; SYRIAC
4721             0x074B,   // 074B..074C; UNKNOWN
4722             0x074D,   // 074D..074F; SYRIAC
4723             0x0750,   // 0750..077F; ARABIC
4724             0x0780,   // 0780..07B1; THAANA
4725             0x07B2,   // 07B2..07BF; UNKNOWN
4726             0x07C0,   // 07C0..07FA; NKO
4727             0x07FB,   // 07FB..07FF; UNKNOWN
4728             0x0800,   // 0800..082D; SAMARITAN
4729             0x082E,   // 082E..082F; UNKNOWN
4730             0x0830,   // 0830..083E; SAMARITAN
4731             0x083F,   // 083F      ; UNKNOWN
4732             0x0840,   // 0840..085B; MANDAIC
4733             0x085C,   // 085C..085D; UNKNOWN
4734             0x085E,   // 085E      ; MANDAIC
4735             0x085F,   // 085F      ; UNKNOWN
4736             0x0860,   // 0860..086A; SYRIAC
4737             0x086B,   // 086B..089F; UNKNOWN
4738             0x08A0,   // 08A0..08B4; ARABIC
4739             0x08B5,   // 08B5      ; UNKNOWN
4740             0x08B6,   // 08B6..08BD; ARABIC
4741             0x08BE,   // 08BE..08D3; UNKNOWN
4742             0x08D4,   // 08D4..08E1; ARABIC
4743             0x08E2,   // 08E2      ; COMMON
4744             0x08E3,   // 08E3..08FF; ARABIC
4745             0x0900,   // 0900..0950; DEVANAGARI
4746             0x0951,   // 0951..0952; INHERITED
4747             0x0953,   // 0953..0963; DEVANAGARI
4748             0x0964,   // 0964..0965; COMMON
4749             0x0966,   // 0966..097F; DEVANAGARI
4750             0x0980,   // 0980..0983; BENGALI
4751             0x0984,   // 0984      ; UNKNOWN
4752             0x0985,   // 0985..098C; BENGALI
4753             0x098D,   // 098D..098E; UNKNOWN
4754             0x098F,   // 098F..0990; BENGALI
4755             0x0991,   // 0991..0992; UNKNOWN
4756             0x0993,   // 0993..09A8; BENGALI
4757             0x09A9,   // 09A9      ; UNKNOWN
4758             0x09AA,   // 09AA..09B0; BENGALI
4759             0x09B1,   // 09B1      ; UNKNOWN
4760             0x09B2,   // 09B2      ; BENGALI
4761             0x09B3,   // 09B3..09B5; UNKNOWN
4762             0x09B6,   // 09B6..09B9; BENGALI
4763             0x09BA,   // 09BA..09BB; UNKNOWN
4764             0x09BC,   // 09BC..09C4; BENGALI
4765             0x09C5,   // 09C5..09C6; UNKNOWN
4766             0x09C7,   // 09C7..09C8; BENGALI
4767             0x09C9,   // 09C9..09CA; UNKNOWN
4768             0x09CB,   // 09CB..09CE; BENGALI
4769             0x09CF,   // 09CF..09D6; UNKNOWN
4770             0x09D7,   // 09D7      ; BENGALI
4771             0x09D8,   // 09D8..09DB; UNKNOWN
4772             0x09DC,   // 09DC..09DD; BENGALI
4773             0x09DE,   // 09DE      ; UNKNOWN
4774             0x09DF,   // 09DF..09E3; BENGALI
4775             0x09E4,   // 09E4..09E5; UNKNOWN
4776             0x09E6,   // 09E6..09FD; BENGALI
4777             0x09FE,   // 09FE..0A00; UNKNOWN
4778             0x0A01,   // 0A01..0A03; GURMUKHI
4779             0x0A04,   // 0A04      ; UNKNOWN
4780             0x0A05,   // 0A05..0A0A; GURMUKHI
4781             0x0A0B,   // 0A0B..0A0E; UNKNOWN
4782             0x0A0F,   // 0A0F..0A10; GURMUKHI
4783             0x0A11,   // 0A11..0A12; UNKNOWN
4784             0x0A13,   // 0A13..0A28; GURMUKHI
4785             0x0A29,   // 0A29      ; UNKNOWN
4786             0x0A2A,   // 0A2A..0A30; GURMUKHI
4787             0x0A31,   // 0A31      ; UNKNOWN
4788             0x0A32,   // 0A32..0A33; GURMUKHI
4789             0x0A34,   // 0A34      ; UNKNOWN
4790             0x0A35,   // 0A35..0A36; GURMUKHI
4791             0x0A37,   // 0A37      ; UNKNOWN
4792             0x0A38,   // 0A38..0A39; GURMUKHI
4793             0x0A3A,   // 0A3A..0A3B; UNKNOWN
4794             0x0A3C,   // 0A3C      ; GURMUKHI
4795             0x0A3D,   // 0A3D      ; UNKNOWN
4796             0x0A3E,   // 0A3E..0A42; GURMUKHI
4797             0x0A43,   // 0A43..0A46; UNKNOWN
4798             0x0A47,   // 0A47..0A48; GURMUKHI
4799             0x0A49,   // 0A49..0A4A; UNKNOWN
4800             0x0A4B,   // 0A4B..0A4D; GURMUKHI
4801             0x0A4E,   // 0A4E..0A50; UNKNOWN
4802             0x0A51,   // 0A51      ; GURMUKHI
4803             0x0A52,   // 0A52..0A58; UNKNOWN
4804             0x0A59,   // 0A59..0A5C; GURMUKHI
4805             0x0A5D,   // 0A5D      ; UNKNOWN
4806             0x0A5E,   // 0A5E      ; GURMUKHI
4807             0x0A5F,   // 0A5F..0A65; UNKNOWN
4808             0x0A66,   // 0A66..0A75; GURMUKHI
4809             0x0A76,   // 0A76..0A80; UNKNOWN
4810             0x0A81,   // 0A81..0A83; GUJARATI
4811             0x0A84,   // 0A84      ; UNKNOWN
4812             0x0A85,   // 0A85..0A8D; GUJARATI
4813             0x0A8E,   // 0A8E      ; UNKNOWN
4814             0x0A8F,   // 0A8F..0A91; GUJARATI
4815             0x0A92,   // 0A92      ; UNKNOWN
4816             0x0A93,   // 0A93..0AA8; GUJARATI
4817             0x0AA9,   // 0AA9      ; UNKNOWN
4818             0x0AAA,   // 0AAA..0AB0; GUJARATI
4819             0x0AB1,   // 0AB1      ; UNKNOWN
4820             0x0AB2,   // 0AB2..0AB3; GUJARATI
4821             0x0AB4,   // 0AB4      ; UNKNOWN
4822             0x0AB5,   // 0AB5..0AB9; GUJARATI
4823             0x0ABA,   // 0ABA..0ABB; UNKNOWN
4824             0x0ABC,   // 0ABC..0AC5; GUJARATI
4825             0x0AC6,   // 0AC6      ; UNKNOWN
4826             0x0AC7,   // 0AC7..0AC9; GUJARATI
4827             0x0ACA,   // 0ACA      ; UNKNOWN
4828             0x0ACB,   // 0ACB..0ACD; GUJARATI
4829             0x0ACE,   // 0ACE..0ACF; UNKNOWN
4830             0x0AD0,   // 0AD0      ; GUJARATI
4831             0x0AD1,   // 0AD1..0ADF; UNKNOWN
4832             0x0AE0,   // 0AE0..0AE3; GUJARATI
4833             0x0AE4,   // 0AE4..0AE5; UNKNOWN
4834             0x0AE6,   // 0AE6..0AF1; GUJARATI
4835             0x0AF2,   // 0AF2..0AF8; UNKNOWN
4836             0x0AF9,   // 0AF9..0AFF; GUJARATI
4837             0x0B00,   // 0B00      ; UNKNOWN
4838             0x0B01,   // 0B01..0B03; ORIYA
4839             0x0B04,   // 0B04      ; UNKNOWN
4840             0x0B05,   // 0B05..0B0C; ORIYA
4841             0x0B0D,   // 0B0D..0B0E; UNKNOWN
4842             0x0B0F,   // 0B0F..0B10; ORIYA
4843             0x0B11,   // 0B11..0B12; UNKNOWN
4844             0x0B13,   // 0B13..0B28; ORIYA
4845             0x0B29,   // 0B29      ; UNKNOWN
4846             0x0B2A,   // 0B2A..0B30; ORIYA
4847             0x0B31,   // 0B31      ; UNKNOWN
4848             0x0B32,   // 0B32..0B33; ORIYA
4849             0x0B34,   // 0B34      ; UNKNOWN
4850             0x0B35,   // 0B35..0B39; ORIYA
4851             0x0B3A,   // 0B3A..0B3B; UNKNOWN
4852             0x0B3C,   // 0B3C..0B44; ORIYA
4853             0x0B45,   // 0B45..0B46; UNKNOWN
4854             0x0B47,   // 0B47..0B48; ORIYA
4855             0x0B49,   // 0B49..0B4A; UNKNOWN
4856             0x0B4B,   // 0B4B..0B4D; ORIYA
4857             0x0B4E,   // 0B4E..0B55; UNKNOWN
4858             0x0B56,   // 0B56..0B57; ORIYA
4859             0x0B58,   // 0B58..0B5B; UNKNOWN
4860             0x0B5C,   // 0B5C..0B5D; ORIYA
4861             0x0B5E,   // 0B5E      ; UNKNOWN
4862             0x0B5F,   // 0B5F..0B63; ORIYA
4863             0x0B64,   // 0B64..0B65; UNKNOWN
4864             0x0B66,   // 0B66..0B77; ORIYA
4865             0x0B78,   // 0B78..0B81; UNKNOWN
4866             0x0B82,   // 0B82..0B83; TAMIL
4867             0x0B84,   // 0B84      ; UNKNOWN
4868             0x0B85,   // 0B85..0B8A; TAMIL
4869             0x0B8B,   // 0B8B..0B8D; UNKNOWN
4870             0x0B8E,   // 0B8E..0B90; TAMIL
4871             0x0B91,   // 0B91      ; UNKNOWN
4872             0x0B92,   // 0B92..0B95; TAMIL
4873             0x0B96,   // 0B96..0B98; UNKNOWN
4874             0x0B99,   // 0B99..0B9A; TAMIL
4875             0x0B9B,   // 0B9B      ; UNKNOWN
4876             0x0B9C,   // 0B9C      ; TAMIL
4877             0x0B9D,   // 0B9D      ; UNKNOWN
4878             0x0B9E,   // 0B9E..0B9F; TAMIL
4879             0x0BA0,   // 0BA0..0BA2; UNKNOWN
4880             0x0BA3,   // 0BA3..0BA4; TAMIL
4881             0x0BA5,   // 0BA5..0BA7; UNKNOWN
4882             0x0BA8,   // 0BA8..0BAA; TAMIL
4883             0x0BAB,   // 0BAB..0BAD; UNKNOWN
4884             0x0BAE,   // 0BAE..0BB9; TAMIL
4885             0x0BBA,   // 0BBA..0BBD; UNKNOWN
4886             0x0BBE,   // 0BBE..0BC2; TAMIL
4887             0x0BC3,   // 0BC3..0BC5; UNKNOWN
4888             0x0BC6,   // 0BC6..0BC8; TAMIL
4889             0x0BC9,   // 0BC9      ; UNKNOWN
4890             0x0BCA,   // 0BCA..0BCD; TAMIL
4891             0x0BCE,   // 0BCE..0BCF; UNKNOWN
4892             0x0BD0,   // 0BD0      ; TAMIL
4893             0x0BD1,   // 0BD1..0BD6; UNKNOWN
4894             0x0BD7,   // 0BD7      ; TAMIL
4895             0x0BD8,   // 0BD8..0BE5; UNKNOWN
4896             0x0BE6,   // 0BE6..0BFA; TAMIL
4897             0x0BFB,   // 0BFB..0BFF; UNKNOWN
4898             0x0C00,   // 0C00..0C03; TELUGU
4899             0x0C04,   // 0C04      ; UNKNOWN
4900             0x0C05,   // 0C05..0C0C; TELUGU
4901             0x0C0D,   // 0C0D      ; UNKNOWN
4902             0x0C0E,   // 0C0E..0C10; TELUGU
4903             0x0C11,   // 0C11      ; UNKNOWN
4904             0x0C12,   // 0C12..0C28; TELUGU
4905             0x0C29,   // 0C29      ; UNKNOWN
4906             0x0C2A,   // 0C2A..0C39; TELUGU
4907             0x0C3A,   // 0C3A..0C3C; UNKNOWN
4908             0x0C3D,   // 0C3D..0C44; TELUGU
4909             0x0C45,   // 0C45      ; UNKNOWN
4910             0x0C46,   // 0C46..0C48; TELUGU
4911             0x0C49,   // 0C49      ; UNKNOWN
4912             0x0C4A,   // 0C4A..0C4D; TELUGU
4913             0x0C4E,   // 0C4E..0C54; UNKNOWN
4914             0x0C55,   // 0C55..0C56; TELUGU
4915             0x0C57,   // 0C57      ; UNKNOWN
4916             0x0C58,   // 0C58..0C5A; TELUGU
4917             0x0C5B,   // 0C5B..0C5F; UNKNOWN
4918             0x0C60,   // 0C60..0C63; TELUGU
4919             0x0C64,   // 0C64..0C65; UNKNOWN
4920             0x0C66,   // 0C66..0C6F; TELUGU
4921             0x0C70,   // 0C70..0C77; UNKNOWN
4922             0x0C78,   // 0C78..0C7F; TELUGU
4923             0x0C80,   // 0C80..0C83; KANNADA
4924             0x0C84,   // 0C84      ; UNKNOWN
4925             0x0C85,   // 0C85..0C8C; KANNADA
4926             0x0C8D,   // 0C8D      ; UNKNOWN
4927             0x0C8E,   // 0C8E..0C90; KANNADA
4928             0x0C91,   // 0C91      ; UNKNOWN
4929             0x0C92,   // 0C92..0CA8; KANNADA
4930             0x0CA9,   // 0CA9      ; UNKNOWN
4931             0x0CAA,   // 0CAA..0CB3; KANNADA
4932             0x0CB4,   // 0CB4      ; UNKNOWN
4933             0x0CB5,   // 0CB5..0CB9; KANNADA
4934             0x0CBA,   // 0CBA..0CBB; UNKNOWN
4935             0x0CBC,   // 0CBC..0CC4; KANNADA
4936             0x0CC5,   // 0CC5      ; UNKNOWN
4937             0x0CC6,   // 0CC6..0CC8; KANNADA
4938             0x0CC9,   // 0CC9      ; UNKNOWN
4939             0x0CCA,   // 0CCA..0CCD; KANNADA
4940             0x0CCE,   // 0CCE..0CD4; UNKNOWN
4941             0x0CD5,   // 0CD5..0CD6; KANNADA
4942             0x0CD7,   // 0CD7..0CDD; UNKNOWN
4943             0x0CDE,   // 0CDE      ; KANNADA
4944             0x0CDF,   // 0CDF      ; UNKNOWN
4945             0x0CE0,   // 0CE0..0CE3; KANNADA
4946             0x0CE4,   // 0CE4..0CE5; UNKNOWN
4947             0x0CE6,   // 0CE6..0CEF; KANNADA
4948             0x0CF0,   // 0CF0      ; UNKNOWN
4949             0x0CF1,   // 0CF1..0CF2; KANNADA
4950             0x0CF3,   // 0CF3..0CFF; UNKNOWN
4951             0x0D00,   // 0D00..0D03; MALAYALAM
4952             0x0D04,   // 0D04      ; UNKNOWN
4953             0x0D05,   // 0D05..0D0C; MALAYALAM
4954             0x0D0D,   // 0D0D      ; UNKNOWN
4955             0x0D0E,   // 0D0E..0D10; MALAYALAM
4956             0x0D11,   // 0D11      ; UNKNOWN
4957             0x0D12,   // 0D12..0D44; MALAYALAM
4958             0x0D45,   // 0D45      ; UNKNOWN
4959             0x0D46,   // 0D46..0D48; MALAYALAM
4960             0x0D49,   // 0D49      ; UNKNOWN
4961             0x0D4A,   // 0D4A..0D4F; MALAYALAM
4962             0x0D50,   // 0D50..0D53; UNKNOWN
4963             0x0D54,   // 0D54..0D63; MALAYALAM
4964             0x0D64,   // 0D64..0D65; UNKNOWN
4965             0x0D66,   // 0D66..0D7F; MALAYALAM
4966             0x0D80,   // 0D80..0D81; UNKNOWN
4967             0x0D82,   // 0D82..0D83; SINHALA
4968             0x0D84,   // 0D84      ; UNKNOWN
4969             0x0D85,   // 0D85..0D96; SINHALA
4970             0x0D97,   // 0D97..0D99; UNKNOWN
4971             0x0D9A,   // 0D9A..0DB1; SINHALA
4972             0x0DB2,   // 0DB2      ; UNKNOWN
4973             0x0DB3,   // 0DB3..0DBB; SINHALA
4974             0x0DBC,   // 0DBC      ; UNKNOWN
4975             0x0DBD,   // 0DBD      ; SINHALA
4976             0x0DBE,   // 0DBE..0DBF; UNKNOWN
4977             0x0DC0,   // 0DC0..0DC6; SINHALA
4978             0x0DC7,   // 0DC7..0DC9; UNKNOWN
4979             0x0DCA,   // 0DCA      ; SINHALA
4980             0x0DCB,   // 0DCB..0DCE; UNKNOWN
4981             0x0DCF,   // 0DCF..0DD4; SINHALA
4982             0x0DD5,   // 0DD5      ; UNKNOWN
4983             0x0DD6,   // 0DD6      ; SINHALA
4984             0x0DD7,   // 0DD7      ; UNKNOWN
4985             0x0DD8,   // 0DD8..0DDF; SINHALA
4986             0x0DE0,   // 0DE0..0DE5; UNKNOWN
4987             0x0DE6,   // 0DE6..0DEF; SINHALA
4988             0x0DF0,   // 0DF0..0DF1; UNKNOWN
4989             0x0DF2,   // 0DF2..0DF4; SINHALA
4990             0x0DF5,   // 0DF5..0E00; UNKNOWN
4991             0x0E01,   // 0E01..0E3A; THAI
4992             0x0E3B,   // 0E3B..0E3E; UNKNOWN
4993             0x0E3F,   // 0E3F      ; COMMON
4994             0x0E40,   // 0E40..0E5B; THAI
4995             0x0E5C,   // 0E5C..0E80; UNKNOWN
4996             0x0E81,   // 0E81..0E82; LAO
4997             0x0E83,   // 0E83      ; UNKNOWN
4998             0x0E84,   // 0E84      ; LAO
4999             0x0E85,   // 0E85..0E86; UNKNOWN
5000             0x0E87,   // 0E87..0E88; LAO
5001             0x0E89,   // 0E89      ; UNKNOWN
5002             0x0E8A,   // 0E8A      ; LAO
5003             0x0E8B,   // 0E8B..0E8C; UNKNOWN
5004             0x0E8D,   // 0E8D      ; LAO
5005             0x0E8E,   // 0E8E..0E93; UNKNOWN
5006             0x0E94,   // 0E94..0E97; LAO
5007             0x0E98,   // 0E98      ; UNKNOWN
5008             0x0E99,   // 0E99..0E9F; LAO
5009             0x0EA0,   // 0EA0      ; UNKNOWN
5010             0x0EA1,   // 0EA1..0EA3; LAO
5011             0x0EA4,   // 0EA4      ; UNKNOWN
5012             0x0EA5,   // 0EA5      ; LAO
5013             0x0EA6,   // 0EA6      ; UNKNOWN
5014             0x0EA7,   // 0EA7      ; LAO
5015             0x0EA8,   // 0EA8..0EA9; UNKNOWN
5016             0x0EAA,   // 0EAA..0EAB; LAO
5017             0x0EAC,   // 0EAC      ; UNKNOWN
5018             0x0EAD,   // 0EAD..0EB9; LAO
5019             0x0EBA,   // 0EBA      ; UNKNOWN
5020             0x0EBB,   // 0EBB..0EBD; LAO
5021             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5022             0x0EC0,   // 0EC0..0EC4; LAO
5023             0x0EC5,   // 0EC5      ; UNKNOWN
5024             0x0EC6,   // 0EC6      ; LAO
5025             0x0EC7,   // 0EC7      ; UNKNOWN
5026             0x0EC8,   // 0EC8..0ECD; LAO
5027             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5028             0x0ED0,   // 0ED0..0ED9; LAO
5029             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5030             0x0EDC,   // 0EDC..0EDF; LAO
5031             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5032             0x0F00,   // 0F00..0F47; TIBETAN
5033             0x0F48,   // 0F48      ; UNKNOWN
5034             0x0F49,   // 0F49..0F6C; TIBETAN
5035             0x0F6D,   // 0F6D..0F70; UNKNOWN
5036             0x0F71,   // 0F71..0F97; TIBETAN
5037             0x0F98,   // 0F98      ; UNKNOWN
5038             0x0F99,   // 0F99..0FBC; TIBETAN
5039             0x0FBD,   // 0FBD      ; UNKNOWN
5040             0x0FBE,   // 0FBE..0FCC; TIBETAN
5041             0x0FCD,   // 0FCD      ; UNKNOWN
5042             0x0FCE,   // 0FCE..0FD4; TIBETAN
5043             0x0FD5,   // 0FD5..0FD8; COMMON
5044             0x0FD9,   // 0FD9..0FDA; TIBETAN
5045             0x0FDB,   // 0FDB..FFF; UNKNOWN
5046             0x1000,   // 1000..109F; MYANMAR
5047             0x10A0,   // 10A0..10C5; GEORGIAN
5048             0x10C6,   // 10C6      ; UNKNOWN
5049             0x10C7,   // 10C7      ; GEORGIAN
5050             0x10C8,   // 10C8..10CC; UNKNOWN
5051             0x10CD,   // 10CD      ; GEORGIAN
5052             0x10CE,   // 10CE..10CF; UNKNOWN
5053             0x10D0,   // 10D0..10FA; GEORGIAN
5054             0x10FB,   // 10FB      ; COMMON
5055             0x10FC,   // 10FC..10FF; GEORGIAN
5056             0x1100,   // 1100..11FF; HANGUL
5057             0x1200,   // 1200..1248; ETHIOPIC
5058             0x1249,   // 1249      ; UNKNOWN
5059             0x124A,   // 124A..124D; ETHIOPIC
5060             0x124E,   // 124E..124F; UNKNOWN
5061             0x1250,   // 1250..1256; ETHIOPIC
5062             0x1257,   // 1257      ; UNKNOWN
5063             0x1258,   // 1258      ; ETHIOPIC
5064             0x1259,   // 1259      ; UNKNOWN
5065             0x125A,   // 125A..125D; ETHIOPIC
5066             0x125E,   // 125E..125F; UNKNOWN
5067             0x1260,   // 1260..1288; ETHIOPIC
5068             0x1289,   // 1289      ; UNKNOWN
5069             0x128A,   // 128A..128D; ETHIOPIC
5070             0x128E,   // 128E..128F; UNKNOWN
5071             0x1290,   // 1290..12B0; ETHIOPIC
5072             0x12B1,   // 12B1      ; UNKNOWN
5073             0x12B2,   // 12B2..12B5; ETHIOPIC
5074             0x12B6,   // 12B6..12B7; UNKNOWN
5075             0x12B8,   // 12B8..12BE; ETHIOPIC
5076             0x12BF,   // 12BF      ; UNKNOWN
5077             0x12C0,   // 12C0      ; ETHIOPIC
5078             0x12C1,   // 12C1      ; UNKNOWN
5079             0x12C2,   // 12C2..12C5; ETHIOPIC
5080             0x12C6,   // 12C6..12C7; UNKNOWN
5081             0x12C8,   // 12C8..12D6; ETHIOPIC
5082             0x12D7,   // 12D7      ; UNKNOWN
5083             0x12D8,   // 12D8..1310; ETHIOPIC
5084             0x1311,   // 1311      ; UNKNOWN
5085             0x1312,   // 1312..1315; ETHIOPIC
5086             0x1316,   // 1316..1317; UNKNOWN
5087             0x1318,   // 1318..135A; ETHIOPIC
5088             0x135B,   // 135B..135C; UNKNOWN
5089             0x135D,   // 135D..137C; ETHIOPIC
5090             0x137D,   // 137D..137F; UNKNOWN
5091             0x1380,   // 1380..1399; ETHIOPIC
5092             0x139A,   // 139A..139F; UNKNOWN
5093             0x13A0,   // 13A0..13F5; CHEROKEE
5094             0x13F6,   // 13F6..13F7; UNKNOWN
5095             0x13F8,   // 13F8..13FD; CHEROKEE
5096             0x13FE,   // 13FE..13FF; UNKNOWN
5097             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5098             0x1680,   // 1680..169C; OGHAM
5099             0x169D,   // 169D..169F; UNKNOWN
5100             0x16A0,   // 16A0..16EA; RUNIC
5101             0x16EB,   // 16EB..16ED; COMMON
5102             0x16EE,   // 16EE..16F8; RUNIC
5103             0x16F9,   // 16F9..16FF; UNKNOWN
5104             0x1700,   // 1700..170C; TAGALOG
5105             0x170D,   // 170D      ; UNKNOWN
5106             0x170E,   // 170E..1714; TAGALOG
5107             0x1715,   // 1715..171F; UNKNOWN
5108             0x1720,   // 1720..1734; HANUNOO
5109             0x1735,   // 1735..1736; COMMON
5110             0x1737,   // 1737..173F; UNKNOWN
5111             0x1740,   // 1740..1753; BUHID
5112             0x1754,   // 1754..175F; UNKNOWN
5113             0x1760,   // 1760..176C; TAGBANWA
5114             0x176D,   // 176D      ; UNKNOWN
5115             0x176E,   // 176E..1770; TAGBANWA
5116             0x1771,   // 1771      ; UNKNOWN
5117             0x1772,   // 1772..1773; TAGBANWA
5118             0x1774,   // 1774..177F; UNKNOWN
5119             0x1780,   // 1780..17DD; KHMER
5120             0x17DE,   // 17DE..17DF; UNKNOWN
5121             0x17E0,   // 17E0..17E9; KHMER
5122             0x17EA,   // 17EA..17EF; UNKNOWN
5123             0x17F0,   // 17F0..17F9; KHMER
5124             0x17FA,   // 17FA..17FF; UNKNOWN
5125             0x1800,   // 1800..1801; MONGOLIAN
5126             0x1802,   // 1802..1803; COMMON
5127             0x1804,   // 1804      ; MONGOLIAN
5128             0x1805,   // 1805      ; COMMON
5129             0x1806,   // 1806..180E; MONGOLIAN
5130             0x180F,   // 180F      ; UNKNOWN
5131             0x1810,   // 1810..1819; MONGOLIAN
5132             0x181A,   // 181A..181F; UNKNOWN
5133             0x1820,   // 1820..1877; MONGOLIAN
5134             0x1878,   // 1878..187F; UNKNOWN
5135             0x1880,   // 1880..18AA; MONGOLIAN
5136             0x18AB,   // 18AB..18AF; UNKNOWN
5137             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5138             0x18F6,   // 18F6..18FF; UNKNOWN
5139             0x1900,   // 1900..191E; LIMBU
5140             0x191F,   // 191F      ; UNKNOWN
5141             0x1920,   // 1920..192B; LIMBU
5142             0x192C,   // 192C..192F; UNKNOWN
5143             0x1930,   // 1930..193B; LIMBU
5144             0x193C,   // 193C..193F; UNKNOWN
5145             0x1940,   // 1940      ; LIMBU
5146             0x1941,   // 1941..1943; UNKNOWN
5147             0x1944,   // 1944..194F; LIMBU
5148             0x1950,   // 1950..196D; TAI_LE
5149             0x196E,   // 196E..196F; UNKNOWN
5150             0x1970,   // 1970..1974; TAI_LE
5151             0x1975,   // 1975..197F; UNKNOWN
5152             0x1980,   // 1980..19AB; NEW_TAI_LUE
5153             0x19AC,   // 19AC..19AF; UNKNOWN
5154             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5155             0x19CA,   // 19CA..19CF; UNKNOWN
5156             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5157             0x19DB,   // 19DB..19DD; UNKNOWN
5158             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5159             0x19E0,   // 19E0..19FF; KHMER
5160             0x1A00,   // 1A00..1A1B; BUGINESE
5161             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5162             0x1A1E,   // 1A1E..1A1F; BUGINESE
5163             0x1A20,   // 1A20..1A5E; TAI_THAM
5164             0x1A5F,   // 1A5F      ; UNKNOWN
5165             0x1A60,   // 1A60..1A7C; TAI_THAM
5166             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5167             0x1A7F,   // 1A7F..1A89; TAI_THAM
5168             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5169             0x1A90,   // 1A90..1A99; TAI_THAM
5170             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5171             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5172             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5173             0x1AB0,   // 1AB0..1ABE; INHERITED
5174             0x1ABF,   // 1ABF..1AFF; UNKNOWN
5175             0x1B00,   // 1B00..1B4B; BALINESE
5176             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5177             0x1B50,   // 1B50..1B7C; BALINESE
5178             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5179             0x1B80,   // 1B80..1BBF; SUNDANESE
5180             0x1BC0,   // 1BC0..1BF3; BATAK
5181             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5182             0x1BFC,   // 1BFC..1BFF; BATAK
5183             0x1C00,   // 1C00..1C37; LEPCHA
5184             0x1C38,   // 1C38..1C3A; UNKNOWN
5185             0x1C3B,   // 1C3B..1C49; LEPCHA
5186             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5187             0x1C4D,   // 1C4D..1C4F; LEPCHA
5188             0x1C50,   // 1C50..1C7F; OL_CHIKI
5189             0x1C80,   // 1C80..1C88; CYRILLIC
5190             0x1C89,   // 1C89..1CBF; UNKNOWN
5191             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5192             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5193             0x1CD0,   // 1CD0..1CD2; INHERITED
5194             0x1CD3,   // 1CD3      ; COMMON
5195             0x1CD4,   // 1CD4..1CE0; INHERITED
5196             0x1CE1,   // 1CE1      ; COMMON
5197             0x1CE2,   // 1CE2..1CE8; INHERITED
5198             0x1CE9,   // 1CE9..1CEC; COMMON
5199             0x1CED,   // 1CED      ; INHERITED
5200             0x1CEE,   // 1CEE..1CF3; COMMON
5201             0x1CF4,   // 1CF4      ; INHERITED
5202             0x1CF5,   // 1CF5..1CF7; COMMON
5203             0x1CF8,   // 1CF8..1CF9; INHERITED
5204             0x1CFA,   // 1CFA..1CFF; UNKNOWN
5205             0x1D00,   // 1D00..1D25; LATIN
5206             0x1D26,   // 1D26..1D2A; GREEK
5207             0x1D2B,   // 1D2B      ; CYRILLIC
5208             0x1D2C,   // 1D2C..1D5C; LATIN
5209             0x1D5D,   // 1D5D..1D61; GREEK
5210             0x1D62,   // 1D62..1D65; LATIN
5211             0x1D66,   // 1D66..1D6A; GREEK
5212             0x1D6B,   // 1D6B..1D77; LATIN
5213             0x1D78,   // 1D78      ; CYRILLIC
5214             0x1D79,   // 1D79..1DBE; LATIN
5215             0x1DBF,   // 1DBF      ; GREEK
5216             0x1DC0,   // 1DC0..1DF9; INHERITED
5217             0x1DFA,   // 1DFA      ; UNKNOWN
5218             0x1DFB,   // 1DFB..1DFF; INHERITED
5219             0x1E00,   // 1E00..1EFF; LATIN
5220             0x1F00,   // 1F00..1F15; GREEK
5221             0x1F16,   // 1F16..1F17; UNKNOWN
5222             0x1F18,   // 1F18..1F1D; GREEK
5223             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5224             0x1F20,   // 1F20..1F45; GREEK
5225             0x1F46,   // 1F46..1F47; UNKNOWN
5226             0x1F48,   // 1F48..1F4D; GREEK
5227             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5228             0x1F50,   // 1F50..1F57; GREEK
5229             0x1F58,   // 1F58      ; UNKNOWN
5230             0x1F59,   // 1F59      ; GREEK
5231             0x1F5A,   // 1F5A      ; UNKNOWN
5232             0x1F5B,   // 1F5B      ; GREEK
5233             0x1F5C,   // 1F5C      ; UNKNOWN
5234             0x1F5D,   // 1F5D      ; GREEK
5235             0x1F5E,   // 1F5E      ; UNKNOWN
5236             0x1F5F,   // 1F5F..1F7D; GREEK
5237             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5238             0x1F80,   // 1F80..1FB4; GREEK
5239             0x1FB5,   // 1FB5      ; UNKNOWN
5240             0x1FB6,   // 1FB6..1FC4; GREEK
5241             0x1FC5,   // 1FC5      ; UNKNOWN
5242             0x1FC6,   // 1FC6..1FD3; GREEK
5243             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5244             0x1FD6,   // 1FD6..1FDB; GREEK
5245             0x1FDC,   // 1FDC      ; UNKNOWN
5246             0x1FDD,   // 1FDD..1FEF; GREEK
5247             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5248             0x1FF2,   // 1FF2..1FF4; GREEK
5249             0x1FF5,   // 1FF5      ; UNKNOWN
5250             0x1FF6,   // 1FF6..1FFE; GREEK
5251             0x1FFF,   // 1FFF      ; UNKNOWN
5252             0x2000,   // 2000..200B; COMMON
5253             0x200C,   // 200C..200D; INHERITED
5254             0x200E,   // 200E..2064; COMMON
5255             0x2065,   // 2065      ; UNKNOWN
5256             0x2066,   // 2066..2070; COMMON
5257             0x2071,   // 2071      ; LATIN
5258             0x2072,   // 2072..2073; UNKNOWN
5259             0x2074,   // 2074..207E; COMMON
5260             0x207F,   // 207F      ; LATIN
5261             0x2080,   // 2080..208E; COMMON
5262             0x208F,   // 208F      ; UNKNOWN
5263             0x2090,   // 2090..209C; LATIN
5264             0x209D,   // 209D..209F; UNKNOWN
5265             0x20A0,   // 20A0..20BF; COMMON
5266             0x20C0,   // 20C0..20CF; UNKNOWN
5267             0x20D0,   // 20D0..20F0; INHERITED
5268             0x20F1,   // 20F1..20FF; UNKNOWN
5269             0x2100,   // 2100..2125; COMMON
5270             0x2126,   // 2126      ; GREEK
5271             0x2127,   // 2127..2129; COMMON
5272             0x212A,   // 212A..212B; LATIN
5273             0x212C,   // 212C..2131; COMMON
5274             0x2132,   // 2132      ; LATIN
5275             0x2133,   // 2133..214D; COMMON
5276             0x214E,   // 214E      ; LATIN
5277             0x214F,   // 214F..215F; COMMON
5278             0x2160,   // 2160..2188; LATIN
5279             0x2189,   // 2189..218B; COMMON
5280             0x218C,   // 218C..218F; UNKNOWN
5281             0x2190,   // 2190..2426; COMMON
5282             0x2427,   // 2427..243F; UNKNOWN
5283             0x2440,   // 2440..244A; COMMON
5284             0x244B,   // 244B..245F; UNKNOWN
5285             0x2460,   // 2460..27FF; COMMON
5286             0x2800,   // 2800..28FF; BRAILLE
5287             0x2900,   // 2900..2B73; COMMON
5288             0x2B74,   // 2B74..2B75; UNKNOWN
5289             0x2B76,   // 2B76..2B95; COMMON
5290             0x2B96,   // 2B96..2B97; UNKNOWN
5291             0x2B98,   // 2B98..2BB9; COMMON
5292             0x2BBA,   // 2BBA..2BBC; UNKNOWN
5293             0x2BBD,   // 2BBD..2BC8; COMMON
5294             0x2BC9,   // 2BC9      ; UNKNOWN
5295             0x2BCA,   // 2BCA..2BD2; COMMON
5296             0x2BD3,   // 2BD3..2BEB; UNKNOWN
5297             0x2BEC,   // 2BEC..2BEF; COMMON
5298             0x2BF0,   // 2BF0..2BFF; UNKNOWN
5299             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5300             0x2C2F,   // 2C2F      ; UNKNOWN
5301             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5302             0x2C5F,   // 2C5F      ; UNKNOWN
5303             0x2C60,   // 2C60..2C7F; LATIN
5304             0x2C80,   // 2C80..2CF3; COPTIC
5305             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5306             0x2CF9,   // 2CF9..2CFF; COPTIC
5307             0x2D00,   // 2D00..2D25; GEORGIAN
5308             0x2D26,   // 2D26      ; UNKNOWN
5309             0x2D27,   // 2D27      ; GEORGIAN
5310             0x2D28,   // 2D28..2D2C; UNKNOWN
5311             0x2D2D,   // 2D2D      ; GEORGIAN
5312             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5313             0x2D30,   // 2D30..2D67; TIFINAGH
5314             0x2D68,   // 2D68..2D6E; UNKNOWN
5315             0x2D6F,   // 2D6F..2D70; TIFINAGH
5316             0x2D71,   // 2D71..2D7E; UNKNOWN
5317             0x2D7F,   // 2D7F      ; TIFINAGH
5318             0x2D80,   // 2D80..2D96; ETHIOPIC
5319             0x2D97,   // 2D97..2D9F; UNKNOWN
5320             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5321             0x2DA7,   // 2DA7      ; UNKNOWN
5322             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5323             0x2DAF,   // 2DAF      ; UNKNOWN
5324             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5325             0x2DB7,   // 2DB7      ; UNKNOWN
5326             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5327             0x2DBF,   // 2DBF      ; UNKNOWN
5328             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5329             0x2DC7,   // 2DC7      ; UNKNOWN
5330             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5331             0x2DCF,   // 2DCF      ; UNKNOWN
5332             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5333             0x2DD7,   // 2DD7      ; UNKNOWN
5334             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5335             0x2DDF,   // 2DDF      ; UNKNOWN
5336             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5337             0x2E00,   // 2E00..2E49; COMMON
5338             0x2E50,   // 2E50..2E7F; UNKNOWN
5339             0x2E80,   // 2E80..2E99; HAN
5340             0x2E9A,   // 2E9A      ; UNKNOWN
5341             0x2E9B,   // 2E9B..2EF3; HAN
5342             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5343             0x2F00,   // 2F00..2FD5; HAN
5344             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5345             0x2FF0,   // 2FF0..2FFB; COMMON
5346             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5347             0x3000,   // 3000..3004; COMMON
5348             0x3005,   // 3005      ; HAN
5349             0x3006,   // 3006      ; COMMON
5350             0x3007,   // 3007      ; HAN
5351             0x3008,   // 3008..3020; COMMON
5352             0x3021,   // 3021..3029; HAN
5353             0x302A,   // 302A..302D; INHERITED
5354             0x302E,   // 302E..302F; HANGUL
5355             0x3030,   // 3030..3037; COMMON
5356             0x3038,   // 3038..303B; HAN
5357             0x303C,   // 303C..303F; COMMON
5358             0x3040,   // 3040      ; UNKNOWN
5359             0x3041,   // 3041..3096; HIRAGANA
5360             0x3097,   // 3097..3098; UNKNOWN
5361             0x3099,   // 3099..309A; INHERITED
5362             0x309B,   // 309B..309C; COMMON
5363             0x309D,   // 309D..309F; HIRAGANA
5364             0x30A0,   // 30A0      ; COMMON
5365             0x30A1,   // 30A1..30FA; KATAKANA
5366             0x30FB,   // 30FB..30FC; COMMON
5367             0x30FD,   // 30FD..30FF; KATAKANA
5368             0x3100,   // 3100..3104; UNKNOWN
5369             0x3105,   // 3105..312E; BOPOMOFO
5370             0x312F,   // 312F..3130; UNKNOWN
5371             0x3131,   // 3131..318E; HANGUL
5372             0x318F,   // 318F      ; UNKNOWN
5373             0x3190,   // 3190..319F; COMMON
5374             0x31A0,   // 31A0..31BA; BOPOMOFO
5375             0x31BB,   // 31BB..31BF; UNKNOWN
5376             0x31C0,   // 31C0..31E3; COMMON
5377             0x31E4,   // 31E4..31EF; UNKNOWN
5378             0x31F0,   // 31F0..31FF; KATAKANA
5379             0x3200,   // 3200..321E; HANGUL
5380             0x321F,   // 321F      ; UNKNOWN
5381             0x3220,   // 3220..325F; COMMON
5382             0x3260,   // 3260..327E; HANGUL
5383             0x327F,   // 327F..32CF; COMMON
5384             0x32D0,   // 32D0..32FE; KATAKANA
5385             0x32FF,   // 32FF      ; UNKNOWN
5386             0x3300,   // 3300..3357; KATAKANA
5387             0x3358,   // 3358..33FF; COMMON
5388             0x3400,   // 3400..4DB5; HAN
5389             0x4DB6,   // 4DB6..4DBF; UNKNOWN
5390             0x4DC0,   // 4DC0..4DFF; COMMON
5391             0x4E00,   // 4E00..9FEA; HAN
5392             0x9FEB,   // 9FEB..9FFF; UNKNOWN
5393             0xA000,   // A000..A48C; YI
5394             0xA48D,   // A48D..A48F; UNKNOWN
5395             0xA490,   // A490..A4C6; YI
5396             0xA4C7,   // A4C7..A4CF; UNKNOWN
5397             0xA4D0,   // A4D0..A4FF; LISU
5398             0xA500,   // A500..A62B; VAI
5399             0xA62C,   // A62C..A63F; UNKNOWN
5400             0xA640,   // A640..A69F; CYRILLIC
5401             0xA6A0,   // A6A0..A6F7; BAMUM
5402             0xA6F8,   // A6F8..A6FF; UNKNOWN
5403             0xA700,   // A700..A721; COMMON
5404             0xA722,   // A722..A787; LATIN
5405             0xA788,   // A788..A78A; COMMON
5406             0xA78B,   // A78B..A7AE; LATIN
5407             0xA7AF,   // A7AF      ; UNKNOWN
5408             0xA7B0,   // A7B0..A7B7; LATIN
5409             0xA7B8,   // A7B8..A7F6; UNKNOWN
5410             0xA7F7,   // A7F7..A7FF; LATIN
5411             0xA800,   // A800..A82B; SYLOTI_NAGRI
5412             0xA82C,   // A82C..A82F; UNKNOWN
5413             0xA830,   // A830..A839; COMMON
5414             0xA83A,   // A83A..A83F; UNKNOWN
5415             0xA840,   // A840..A877; PHAGS_PA
5416             0xA878,   // A878..A87F; UNKNOWN
5417             0xA880,   // A880..A8C5; SAURASHTRA
5418             0xA8C6,   // A8C6..A8CD; UNKNOWN
5419             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5420             0xA8DA,   // A8DA..A8DF; UNKNOWN
5421             0xA8E0,   // A8E0..A8FD; DEVANAGARI
5422             0xA8FE,   // A8FE..A8FF; UNKNOWN
5423             0xA900,   // A900..A92D; KAYAH_LI
5424             0xA92E,   // A92E      ; COMMON
5425             0xA92F,   // A92F      ; KAYAH_LI
5426             0xA930,   // A930..A953; REJANG
5427             0xA954,   // A954..A95E; UNKNOWN
5428             0xA95F,   // A95F      ; REJANG
5429             0xA960,   // A960..A97C; HANGUL
5430             0xA97D,   // A97D..A97F; UNKNOWN
5431             0xA980,   // A980..A9CD; JAVANESE
5432             0xA9CE,   // A9CE      ; UNKNOWN
5433             0xA9CF,   // A9CF      ; COMMON
5434             0xA9D0,   // A9D0..A9D9; JAVANESE
5435             0xA9DA,   // A9DA..A9DD; UNKNOWN
5436             0xA9DE,   // A9DE..A9DF; JAVANESE
5437             0xA9E0,   // A9E0..A9FE; MYANMAR
5438             0xA9FF,   // A9FF      ; UNKNOWN
5439             0xAA00,   // AA00..AA36; CHAM
5440             0xAA37,   // AA37..AA3F; UNKNOWN
5441             0xAA40,   // AA40..AA4D; CHAM
5442             0xAA4E,   // AA4E..AA4F; UNKNOWN
5443             0xAA50,   // AA50..AA59; CHAM
5444             0xAA5A,   // AA5A..AA5B; UNKNOWN
5445             0xAA5C,   // AA5C..AA5F; CHAM
5446             0xAA60,   // AA60..AA7F; MYANMAR
5447             0xAA80,   // AA80..AAC2; TAI_VIET
5448             0xAAC3,   // AAC3..AADA; UNKNOWN
5449             0xAADB,   // AADB..AADF; TAI_VIET
5450             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5451             0xAAF7,   // AAF7..AB00; UNKNOWN
5452             0xAB01,   // AB01..AB06; ETHIOPIC
5453             0xAB07,   // AB07..AB08; UNKNOWN
5454             0xAB09,   // AB09..AB0E; ETHIOPIC
5455             0xAB0F,   // AB0F..AB10; UNKNOWN
5456             0xAB11,   // AB11..AB16; ETHIOPIC
5457             0xAB17,   // AB17..AB1F; UNKNOWN
5458             0xAB20,   // AB20..AB26; ETHIOPIC
5459             0xAB27,   // AB27      ; UNKNOWN
5460             0xAB28,   // AB28..AB2E; ETHIOPIC
5461             0xAB2F,   // AB2F      ; UNKNOWN
5462             0xAB30,   // AB30..AB5A; LATIN
5463             0xAB5B,   // AB5B      ; COMMON
5464             0xAB5C,   // AB5C..AB64; LATIN
5465             0xAB65,   // AB65      ; GREEK
5466             0xAB66,   // AB66..AB6F; UNKNOWN
5467             0xAB70,   // AB70..ABBF; CHEROKEE
5468             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5469             0xABEE,   // ABEE..ABEF; UNKNOWN
5470             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5471             0xABFA,   // ABFA..ABFF; UNKNOWN
5472             0xAC00,   // AC00..D7A3; HANGUL
5473             0xD7A4,   // D7A4..D7AF; UNKNOWN
5474             0xD7B0,   // D7B0..D7C6; HANGUL
5475             0xD7C7,   // D7C7..D7CA; UNKNOWN
5476             0xD7CB,   // D7CB..D7FB; HANGUL
5477             0xD7FC,   // D7FC..F8FF; UNKNOWN
5478             0xF900,   // F900..FA6D; HAN
5479             0xFA6E,   // FA6E..FA6F; UNKNOWN
5480             0xFA70,   // FA70..FAD9; HAN
5481             0xFADA,   // FADA..FAFF; UNKNOWN
5482             0xFB00,   // FB00..FB06; LATIN
5483             0xFB07,   // FB07..FB12; UNKNOWN
5484             0xFB13,   // FB13..FB17; ARMENIAN
5485             0xFB18,   // FB18..FB1C; UNKNOWN
5486             0xFB1D,   // FB1D..FB36; HEBREW
5487             0xFB37,   // FB37      ; UNKNOWN
5488             0xFB38,   // FB38..FB3C; HEBREW
5489             0xFB3D,   // FB3D      ; UNKNOWN
5490             0xFB3E,   // FB3E      ; HEBREW
5491             0xFB3F,   // FB3F      ; UNKNOWN
5492             0xFB40,   // FB40..FB41; HEBREW
5493             0xFB42,   // FB42      ; UNKNOWN
5494             0xFB43,   // FB43..FB44; HEBREW
5495             0xFB45,   // FB45      ; UNKNOWN
5496             0xFB46,   // FB46..FB4F; HEBREW
5497             0xFB50,   // FB50..FBC1; ARABIC
5498             0xFBC2,   // FBC2..FBD2; UNKNOWN
5499             0xFBD3,   // FBD3..FD3D; ARABIC
5500             0xFD3E,   // FD3E..FD3F; COMMON
5501             0xFD40,   // FD40..FD4F; UNKNOWN
5502             0xFD50,   // FD50..FD8F; ARABIC
5503             0xFD90,   // FD90..FD91; UNKNOWN
5504             0xFD92,   // FD92..FDC7; ARABIC
5505             0xFDC8,   // FDC8..FDEF; UNKNOWN
5506             0xFDF0,   // FDF0..FDFD; ARABIC
5507             0xFDFE,   // FDFE..FDFF; UNKNOWN
5508             0xFE00,   // FE00..FE0F; INHERITED
5509             0xFE10,   // FE10..FE19; COMMON
5510             0xFE1A,   // FE1A..FE1F; UNKNOWN
5511             0xFE20,   // FE20..FE2D; INHERITED
5512             0xFE2E,   // FE2E..FE2F; CYRILLIC
5513             0xFE30,   // FE30..FE52; COMMON
5514             0xFE53,   // FE53      ; UNKNOWN
5515             0xFE54,   // FE54..FE66; COMMON
5516             0xFE67,   // FE67      ; UNKNOWN
5517             0xFE68,   // FE68..FE6B; COMMON
5518             0xFE6C,   // FE6C..FE6F; UNKNOWN
5519             0xFE70,   // FE70..FE74; ARABIC
5520             0xFE75,   // FE75      ; UNKNOWN
5521             0xFE76,   // FE76..FEFC; ARABIC
5522             0xFEFD,   // FEFD..FEFE; UNKNOWN
5523             0xFEFF,   // FEFF      ; COMMON
5524             0xFF00,   // FF00      ; UNKNOWN
5525             0xFF01,   // FF01..FF20; COMMON
5526             0xFF21,   // FF21..FF3A; LATIN
5527             0xFF3B,   // FF3B..FF40; COMMON
5528             0xFF41,   // FF41..FF5A; LATIN
5529             0xFF5B,   // FF5B..FF65; COMMON
5530             0xFF66,   // FF66..FF6F; KATAKANA
5531             0xFF70,   // FF70      ; COMMON
5532             0xFF71,   // FF71..FF9D; KATAKANA
5533             0xFF9E,   // FF9E..FF9F; COMMON
5534             0xFFA0,   // FFA0..FFBE; HANGUL
5535             0xFFBF,   // FFBF..FFC1; UNKNOWN
5536             0xFFC2,   // FFC2..FFC7; HANGUL
5537             0xFFC8,   // FFC8..FFC9; UNKNOWN
5538             0xFFCA,   // FFCA..FFCF; HANGUL
5539             0xFFD0,   // FFD0..FFD1; UNKNOWN
5540             0xFFD2,   // FFD2..FFD7; HANGUL
5541             0xFFD8,   // FFD8..FFD9; UNKNOWN
5542             0xFFDA,   // FFDA..FFDC; HANGUL
5543             0xFFDD,   // FFDD..FFDF; UNKNOWN
5544             0xFFE0,   // FFE0..FFE6; COMMON
5545             0xFFE7,   // FFE7      ; UNKNOWN
5546             0xFFE8,   // FFE8..FFEE; COMMON
5547             0xFFEF,   // FFEF..FFF8; UNKNOWN
5548             0xFFF9,   // FFF9..FFFD; COMMON
5549             0xFFFE,   // FFFE..FFFF; UNKNOWN
5550             0x10000,  // 10000..1000B; LINEAR_B
5551             0x1000C,  // 1000C       ; UNKNOWN
5552             0x1000D,  // 1000D..10026; LINEAR_B
5553             0x10027,  // 10027       ; UNKNOWN
5554             0x10028,  // 10028..1003A; LINEAR_B
5555             0x1003B,  // 1003B       ; UNKNOWN
5556             0x1003C,  // 1003C..1003D; LINEAR_B
5557             0x1003E,  // 1003E       ; UNKNOWN
5558             0x1003F,  // 1003F..1004D; LINEAR_B
5559             0x1004E,  // 1004E..1004F; UNKNOWN
5560             0x10050,  // 10050..1005D; LINEAR_B
5561             0x1005E,  // 1005E..1007F; UNKNOWN
5562             0x10080,  // 10080..100FA; LINEAR_B
5563             0x100FB,  // 100FB..100FF; UNKNOWN
5564             0x10100,  // 10100..10102; COMMON
5565             0x10103,  // 10103..10106; UNKNOWN
5566             0x10107,  // 10107..10133; COMMON
5567             0x10134,  // 10134..10136; UNKNOWN
5568             0x10137,  // 10137..1013F; COMMON
5569             0x10140,  // 10140..1018E; GREEK
5570             0x1018F,  // 1018F       ; UNKNOWN
5571             0x10190,  // 10190..1019B; COMMON
5572             0x1019C,  // 1019C..1019F; UNKNOWN
5573             0x101A0,  // 101A0       ; GREEK
5574             0x101A1,  // 101A1..101CF; UNKNOWN
5575             0x101D0,  // 101D0..101FC; COMMON
5576             0x101FD,  // 101FD       ; INHERITED
5577             0x101FE,  // 101FE..1027F; UNKNOWN
5578             0x10280,  // 10280..1029C; LYCIAN
5579             0x1029D,  // 1029D..1029F; UNKNOWN
5580             0x102A0,  // 102A0..102D0; CARIAN
5581             0x102D1,  // 102D1..102DF; UNKNOWN
5582             0x102E0,  // 102E0       ; INHERITED
5583             0x102E1,  // 102E1..102FB; COMMON
5584             0x102FC,  // 102FC..102FF; UNKNOWN
5585             0x10300,  // 10300..10323; OLD_ITALIC
5586             0x10324,  // 10324..1032C; UNKNOWN
5587             0x1032D,  // 1032D..1032F; OLD_ITALIC
5588             0x10330,  // 10330..1034A; GOTHIC
5589             0x1034B,  // 1034B..1034F; UNKNOWN
5590             0x10350,  // 10350..1037A; OLD_PERMIC
5591             0x1037B,  // 1037B..1037F; UNKNOWN
5592             0x10380,  // 10380..1039D; UGARITIC
5593             0x1039E,  // 1039E       ; UNKNOWN
5594             0x1039F,  // 1039F       ; UGARITIC
5595             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5596             0x103C4,  // 103C4..103C7; UNKNOWN
5597             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5598             0x103D6,  // 103D6..103FF; UNKNOWN
5599             0x10400,  // 10400..1044F; DESERET
5600             0x10450,  // 10450..1047F; SHAVIAN
5601             0x10480,  // 10480..1049D; OSMANYA
5602             0x1049E,  // 1049E..1049F; UNKNOWN
5603             0x104A0,  // 104A0..104A9; OSMANYA
5604             0x104AA,  // 104AA..104AF; UNKNOWN
5605             0x104B0,  // 104B0..104D3; OSAGE
5606             0x104D4,  // 104D4..104D7; UNKNOWN
5607             0x104D8,  // 104D8..104FB; OSAGE
5608             0x104FC,  // 104FC..104FF; UNKNOWN
5609             0x10500,  // 10500..10527; ELBASAN
5610             0x10528,  // 10528..1052F; UNKNOWN
5611             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5612             0x10564,  // 10564..1056E; UNKNOWN
5613             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5614             0x10570,  // 10570..105FF; UNKNOWN
5615             0x10600,  // 10600..10736; LINEAR_A
5616             0x10737,  // 10737..1073F; UNKNOWN
5617             0x10740,  // 10740..10755; LINEAR_A
5618             0x10756,  // 10756..1075F; UNKNOWN
5619             0x10760,  // 10760..10767; LINEAR_A
5620             0x10768,  // 10768..107FF; UNKNOWN
5621             0x10800,  // 10800..10805; CYPRIOT
5622             0x10806,  // 10806..10807; UNKNOWN
5623             0x10808,  // 10808       ; CYPRIOT
5624             0x10809,  // 10809       ; UNKNOWN
5625             0x1080A,  // 1080A..10835; CYPRIOT
5626             0x10836,  // 10836       ; UNKNOWN
5627             0x10837,  // 10837..10838; CYPRIOT
5628             0x10839,  // 10839..1083B; UNKNOWN
5629             0x1083C,  // 1083C       ; CYPRIOT
5630             0x1083D,  // 1083D..1083E; UNKNOWN
5631             0x1083F,  // 1083F       ; CYPRIOT
5632             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5633             0x10856,  // 10856       ; UNKNOWN
5634             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5635             0x10860,  // 10860..1087F; PALMYRENE
5636             0x10880,  // 10880..1089E; NABATAEAN
5637             0x1089F,  // 1089F..108A6; UNKNOWN
5638             0x108A7,  // 108A7..108AF; NABATAEAN
5639             0x108B0,  // 108B0..108DF; UNKNOWN
5640             0x108E0,  // 108E0..108F2; HATRAN
5641             0x108F3,  // 108F3       ; UNKNOWN
5642             0x108F4,  // 108F4..108F5; HATRAN
5643             0x108F6,  // 108F6..108FA; UNKNOWN
5644             0x108FB,  // 108FB..108FF; HATRAN
5645             0x10900,  // 10900..1091B; PHOENICIAN
5646             0x1091C,  // 1091C..1091E; UNKNOWN
5647             0x1091F,  // 1091F       ; PHOENICIAN
5648             0x10920,  // 10920..10939; LYDIAN
5649             0x1093A,  // 1093A..1093E; UNKNOWN
5650             0x1093F,  // 1093F       ; LYDIAN
5651             0x10940,  // 10940..1097F; UNKNOWN
5652             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5653             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5654             0x109B8,  // 109B8..109BB; UNKNOWN
5655             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
5656             0x109D0,  // 109D0..109D1; UNKNOWN
5657             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
5658             0x10A00,  // 10A00..10A03; KHAROSHTHI
5659             0x10A04,  // 10A04       ; UNKNOWN
5660             0x10A05,  // 10A05..10A06; KHAROSHTHI
5661             0x10A07,  // 10A07..10A0B; UNKNOWN
5662             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5663             0x10A14,  // 10A14       ; UNKNOWN
5664             0x10A15,  // 10A15..10A17; KHAROSHTHI
5665             0x10A18,  // 10A18       ; UNKNOWN
5666             0x10A19,  // 10A19..10A33; KHAROSHTHI
5667             0x10A34,  // 10A34..10A37; UNKNOWN
5668             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5669             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5670             0x10A3F,  // 10A3F..10A47; KHAROSHTHI
5671             0x10A48,  // 10A48..10A4F; UNKNOWN
5672             0x10A50,  // 10A50..10A58; KHAROSHTHI
5673             0x10A59,  // 10A59..10A5F; UNKNOWN
5674             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5675             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5676             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5677             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5678             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5679             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5680             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5681             0x10B00,  // 10B00..10B35; AVESTAN
5682             0x10B36,  // 10B36..10B38; UNKNOWN
5683             0x10B39,  // 10B39..10B3F; AVESTAN
5684             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5685             0x10B56,  // 10B56..10B57; UNKNOWN
5686             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5687             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5688             0x10B73,  // 10B73..10B77; UNKNOWN
5689             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5690             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5691             0x10B92,  // 10B92..10B98; UNKNOWN
5692             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5693             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5694             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5695             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5696             0x10C00,  // 10C00..10C48; OLD_TURKIC
5697             0x10C49,  // 10C49..10C7F; UNKNOWN
5698             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
5699             0x10CB3,  // 10CB3..10CBF; UNKNOWN
5700             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
5701             0x10CF3,  // 10CF3..10CF9; UNKNOWN
5702             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
5703             0x10D00,  // 10D00..10E5F; UNKNOWN
5704             0x10E60,  // 10E60..10E7E; ARABIC
5705             0x10E7F,  // 10E7F..10FFF; UNKNOWN
5706             0x11000,  // 11000..1104D; BRAHMI
5707             0x1104E,  // 1104E..11051; UNKNOWN
5708             0x11052,  // 11052..1106F; BRAHMI
5709             0x11070,  // 11070..1107E; UNKNOWN
5710             0x1107F,  // 1107F       ; BRAHMI
5711             0x11080,  // 11080..110C1; KAITHI
5712             0x110C2,  // 110C2..110CF; UNKNOWN
5713             0x110D0,  // 110D0..110E8; SORA_SOMPENG
5714             0x110E9,  // 110E9..110EF; UNKNOWN
5715             0x110F0,  // 110F0..110F9; SORA_SOMPENG
5716             0x110FA,  // 110FA..110FF; UNKNOWN
5717             0x11100,  // 11100..11134; CHAKMA
5718             0x11135,  // 11135       ; UNKNOWN
5719             0x11136,  // 11136..11143; CHAKMA
5720             0x11144,  // 11144..1114F; UNKNOWN
5721             0x11150,  // 11150..11176; MAHAJANI
5722             0x11177,  // 11177..1117F; UNKNOWN
5723             0x11180,  // 11180..111CD; SHARADA
5724             0x111CE,  // 111CE..111CF; UNKNOWN
5725             0x111D0,  // 111D0..111DF; SHARADA
5726             0x111E0,  // 111E0       ; UNKNOWN
5727             0x111E1,  // 111E1..111F4; SINHALA
5728             0x111F5,  // 111F5..111FF; UNKNOWN
5729             0x11200,  // 11200..11211; KHOJKI
5730             0x11212,  // 11212       ; UNKNOWN
5731             0x11213,  // 11213..1123E; KHOJKI
5732             0x1123F,  // 1123F..1127F; UNKNOWN
5733             0x11280,  // 11280..11286; MULTANI
5734             0x11287,  // 11287       ; UNKNOWN
5735             0x11288,  // 11288       ; MULTANI
5736             0x11289,  // 11289       ; UNKNOWN
5737             0x1128A,  // 1128A..1128D; MULTANI
5738             0x1128E,  // 1128E       ; UNKNOWN
5739             0x1128F,  // 1128F..1129D; MULTANI
5740             0x1129E,  // 1129E       ; UNKNOWN
5741             0x1129F,  // 1129F..112A9; MULTANI
5742             0x112AA,  // 112AA..112AF; UNKNOWN
5743             0x112B0,  // 112B0..112EA; KHUDAWADI
5744             0x112EB,  // 112EB..112EF; UNKNOWN
5745             0x112F0,  // 112F0..112F9; KHUDAWADI
5746             0x112FA,  // 112FA..112FF; UNKNOWN
5747             0x11300,  // 11300..11303; GRANTHA
5748             0x11304,  // 11304       ; UNKNOWN
5749             0x11305,  // 11305..1130C; GRANTHA
5750             0x1130D,  // 1130D..1130E; UNKNOWN
5751             0x1130F,  // 1130F..11310; GRANTHA
5752             0x11311,  // 11311..11312; UNKNOWN
5753             0x11313,  // 11313..11328; GRANTHA
5754             0x11329,  // 11329       ; UNKNOWN
5755             0x1132A,  // 1132A..11330; GRANTHA
5756             0x11331,  // 11331       ; UNKNOWN
5757             0x11332,  // 11332..11333; GRANTHA
5758             0x11334,  // 11334       ; UNKNOWN
5759             0x11335,  // 11335..11339; GRANTHA
5760             0x1133A,  // 1133A..1133B; UNKNOWN
5761             0x1133C,  // 1133C..11344; GRANTHA
5762             0x11345,  // 11345..11346; UNKNOWN
5763             0x11347,  // 11347..11348; GRANTHA
5764             0x11349,  // 11349..1134A; UNKNOWN
5765             0x1134B,  // 1134B..1134D; GRANTHA
5766             0x1134E,  // 1134E..1134F; UNKNOWN
5767             0x11350,  // 11350       ; GRANTHA
5768             0x11351,  // 11351..11356; UNKNOWN
5769             0x11357,  // 11357       ; GRANTHA
5770             0x11358,  // 11358..1135C; UNKNOWN
5771             0x1135D,  // 1135D..11363; GRANTHA
5772             0x11364,  // 11364..11365; UNKNOWN
5773             0x11366,  // 11366..1136C; GRANTHA
5774             0x1136D,  // 1136D..1136F; UNKNOWN
5775             0x11370,  // 11370..11374; GRANTHA
5776             0x11375,  // 11375..113FF; UNKNOWN
5777             0x11400,  // 11400..11459; NEWA
5778             0x1145A,  // 1145A       ; UNKNOWN
5779             0x1145B,  // 1145B       ; NEWA
5780             0x1145C,  // 1145C       ; UNKNOWN
5781             0x1145D,  // 1145D       ; NEWA
5782             0x1145E,  // 1145E..1147F; UNKNOWN
5783             0x11480,  // 11480..114C7; TIRHUTA
5784             0x114C8,  // 114C8..114CF; UNKNOWN
5785             0x114D0,  // 114D0..114D9; TIRHUTA
5786             0x114DA,  // 114DA..1157F; UNKNOWN
5787             0x11580,  // 11580..115B5; SIDDHAM
5788             0x115B6,  // 115B6..115B7; UNKNOWN
5789             0x115B8,  // 115B8..115DD; SIDDHAM
5790             0x115DE,  // 115DE..115FF; UNKNOWN
5791             0x11600,  // 11600..11644; MODI
5792             0x11645,  // 11645..1164F; UNKNOWN
5793             0x11650,  // 11650..11659; MODI
5794             0x1165A,  // 1165A..1165F; UNKNOWN
5795             0x11660,  // 11660..1166C; MONGOLIAN
5796             0X1166D,  // 1166D..1167F; UNKNOWN
5797             0x11680,  // 11680..116B7; TAKRI
5798             0x116B8,  // 116B8..116BF; UNKNOWN
5799             0x116C0,  // 116C0..116C9; TAKRI
5800             0x116CA,  // 116CA..116FF; UNKNOWN
5801             0x11700,  // 11700..11719; AHOM
5802             0x1171A,  // 1171A..1171C; UNKNOWN
5803             0x1171D,  // 1171D..1172B; AHOM
5804             0x1172C,  // 1172C..1172F; UNKNOWN
5805             0x11730,  // 11730..1173F; AHOM
5806             0x11740,  // 11740..1189F; UNKNOWN
5807             0x118A0,  // 118A0..118F2; WARANG_CITI
5808             0x118F3,  // 118F3..118FE; UNKNOWN
5809             0x118FF,  // 118FF       ; WARANG_CITI
5810             0x11900,  // 11900..119FF; UNKNOWN
5811             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
5812             0X11A48,  // 11A48..11A4F; UNKNOWN
5813             0x11A50,  // 11A50..11A83; SOYOMBO
5814             0x11A84,  // 11A84..11A85; UNKNOWN
5815             0x11A86,  // 11A86..11A9C; SOYOMBO
5816             0x11A9D,  // 11A9D       ; UNKNOWN
5817             0x11A9E,  // 11A9E..11AA2; SOYOMBO
5818             0x11AA3,  // 11AA3..11ABF; UNKNOWN
5819             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
5820             0x11AF9,  // 11AF9..11BFF; UNKNOWN
5821             0x11C00,  // 11C00..11C08; BHAIKSUKI
5822             0x11C09,  // 11C09       ; UNKNOWN
5823             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
5824             0x11C37,  // 11C37       ; UNKNOWN
5825             0x11C38,  // 11C38..11C45; BHAIKSUKI
5826             0x11C46,  // 11C46..11C49; UNKNOWN
5827             0x11C50,  // 11C50..11C6C; BHAIKSUKI
5828             0x11C6D,  // 11C6D..11C6F; UNKNOWN
5829             0x11C70,  // 11C70..11C8F; MARCHEN
5830             0x11C90,  // 11C90..11C91; UNKNOWN
5831             0x11C92,  // 11C92..11CA7; MARCHEN
5832             0x11CA8,  // 11CA8       ; UNKNOWN
5833             0x11CA9,  // 11CA9..11CB6; MARCHEN
5834             0x11CB7,  // 11CB7..11CFF; UNKNOWN
5835             0x11D00,  // 11D00..11D06; MASARAM_GONDI
5836             0x11D07,  // 11D07       ; UNKNOWN
5837             0x11D08,  // 11D08..11D09; MASARAM_GONDI
5838             0x11D0A,  // 11D0A       ; UNKNOWN
5839             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
5840             0x11D37,  // 11D37..11D39; UNKNOWN
5841             0x11D3A,  // 11D3A       ; MASARAM_GONDI
5842             0x11D3B,  // 11D3B       ; UNKNOWN
5843             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
5844             0x11D3E,  // 11D3E       ; UNKNOWN
5845             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
5846             0x11D48,  // 11D48..11D49, UNKNOWN
5847             0x11D50,  // 11D50..11D59; MASARAM_GONDI
5848             0x11D5A,  // 11D5A..1AFFF; UNKNOWN
5849             0x12000,  // 12000..12399; CUNEIFORM
5850             0x1239A,  // 1239A..123FF; UNKNOWN
5851             0x12400,  // 12400..1246E; CUNEIFORM
5852             0x1246F,  // 1246F       ; UNKNOWN
5853             0x12470,  // 12470..12474; CUNEIFORM
5854             0x12475,  // 12475..1247F; UNKNOWN
5855             0x12480,  // 12480..12543; CUNEIFORM
5856             0x12544,  // 12544..12FFF; UNKNOWN
5857             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
5858             0x1342F,  // 1342F..143FF; UNKNOWN
5859             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
5860             0x14647,  // 14647..167FF; UNKNOWN
5861             0x16800,  // 16800..16A38; BAMUM
5862             0x16A39,  // 16A39..16A3F; UNKNOWN
5863             0x16A40,  // 16A40..16A5E; MRO
5864             0x16A5F,  // 16A5F       ; UNKNOWN
5865             0x16A60,  // 16A60..16A69; MRO
5866             0x16A6A,  // 16A6A..16A6D; UNKNOWN
5867             0x16A6E,  // 16A6E..16A6F; MRO
5868             0x16A70,  // 16A70..16ACF; UNKNOWN
5869             0x16AD0,  // 16AD0..16AED; BASSA_VAH
5870             0x16AEE,  // 16AEE..16AEF; UNKNOWN
5871             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
5872             0x16AF6,  // 16AF6..16AFF; UNKNOWN
5873             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
5874             0x16B46,  // 16B46..16B4F; UNKNOWN
5875             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
5876             0x16B5A,  // 16B5A       ; UNKNOWN
5877             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
5878             0x16B62,  // 16B62       ; UNKNOWN
5879             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
5880             0x16B78,  // 16B78..16B7C; UNKNOWN
5881             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
5882             0x16B90,  // 16B90..16EFF; UNKNOWN
5883             0x16F00,  // 16F00..16F44; MIAO
5884             0x16F45,  // 16F45..16F4F; UNKNOWN
5885             0x16F50,  // 16F50..16F7E; MIAO
5886             0x16F7F,  // 16F7F..16F8E; UNKNOWN
5887             0x16F8F,  // 16F8F..16F9F; MIAO
5888             0x16FA0,  // 16FA0..16FDF; UNKNOWN
5889             0x16FE0,  // 16FE0       ; TANGUT
5890             0x16FE1,  // 16FE1       ; NUSHU
5891             0x16FE2,  // 16FE2..16FFF; UNKNOWN
5892             0x17000,  // 17000..187EC; TANGUT
5893             0x187ED,  // 187ED..187FF; UNKNOWN
5894             0x18800,  // 18800..18AF2; TANGUT
5895             0x18AF3,  // 18AF3..1AFFF; UNKNOWN
5896             0x1B000,  // 1B000       ; KATAKANA
5897             0x1B001,  // 1B001..1B11E; HIRAGANA
5898             0x1B11F,  // 1B11F..1B16F; UNKNOWN
5899             0x1B170,  // 1B170..1B2FB; NUSHU
5900             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
5901             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
5902             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
5903             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
5904             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
5905             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
5906             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
5907             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
5908             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
5909             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
5910             0x1BCA0,  // 1BCA0..1BCA3; COMMON
5911             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
5912             0x1D000,  // 1D000..1D0F5; COMMON
5913             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
5914             0x1D100,  // 1D100..1D126; COMMON
5915             0x1D127,  // 1D127..1D128; UNKNOWN
5916             0x1D129,  // 1D129..1D166; COMMON
5917             0x1D167,  // 1D167..1D169; INHERITED
5918             0x1D16A,  // 1D16A..1D17A; COMMON
5919             0x1D17B,  // 1D17B..1D182; INHERITED
5920             0x1D183,  // 1D183..1D184; COMMON
5921             0x1D185,  // 1D185..1D18B; INHERITED
5922             0x1D18C,  // 1D18C..1D1A9; COMMON
5923             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
5924             0x1D1AE,  // 1D1AE..1D1E8; COMMON
5925             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
5926             0x1D200,  // 1D200..1D245; GREEK
5927             0x1D246,  // 1D246..1D2FF; UNKNOWN
5928             0x1D300,  // 1D300..1D356; COMMON
5929             0x1D357,  // 1D357..1D35F; UNKNOWN
5930             0x1D360,  // 1D360..1D371; COMMON
5931             0x1D372,  // 1D372..1D3FF; UNKNOWN
5932             0x1D400,  // 1D400..1D454; COMMON
5933             0x1D455,  // 1D455       ; UNKNOWN
5934             0x1D456,  // 1D456..1D49C; COMMON
5935             0x1D49D,  // 1D49D       ; UNKNOWN
5936             0x1D49E,  // 1D49E..1D49F; COMMON
5937             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
5938             0x1D4A2,  // 1D4A2       ; COMMON
5939             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
5940             0x1D4A5,  // 1D4A5..1D4A6; COMMON
5941             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
5942             0x1D4A9,  // 1D4A9..1D4AC; COMMON
5943             0x1D4AD,  // 1D4AD       ; UNKNOWN
5944             0x1D4AE,  // 1D4AE..1D4B9; COMMON
5945             0x1D4BA,  // 1D4BA       ; UNKNOWN
5946             0x1D4BB,  // 1D4BB       ; COMMON
5947             0x1D4BC,  // 1D4BC       ; UNKNOWN
5948             0x1D4BD,  // 1D4BD..1D4C3; COMMON
5949             0x1D4C4,  // 1D4C4       ; UNKNOWN
5950             0x1D4C5,  // 1D4C5..1D505; COMMON
5951             0x1D506,  // 1D506       ; UNKNOWN
5952             0x1D507,  // 1D507..1D50A; COMMON
5953             0x1D50B,  // 1D50B..1D50C; UNKNOWN
5954             0x1D50D,  // 1D50D..1D514; COMMON
5955             0x1D515,  // 1D515       ; UNKNOWN
5956             0x1D516,  // 1D516..1D51C; COMMON
5957             0x1D51D,  // 1D51D       ; UNKNOWN
5958             0x1D51E,  // 1D51E..1D539; COMMON
5959             0x1D53A,  // 1D53A       ; UNKNOWN
5960             0x1D53B,  // 1D53B..1D53E; COMMON
5961             0x1D53F,  // 1D53F       ; UNKNOWN
5962             0x1D540,  // 1D540..1D544; COMMON
5963             0x1D545,  // 1D545       ; UNKNOWN
5964             0x1D546,  // 1D546       ; COMMON
5965             0x1D547,  // 1D547..1D549; UNKNOWN
5966             0x1D54A,  // 1D54A..1D550; COMMON
5967             0x1D551,  // 1D551       ; UNKNOWN
5968             0x1D552,  // 1D552..1D6A5; COMMON
5969             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
5970             0x1D6A8,  // 1D6A8..1D7CB; COMMON
5971             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
5972             0x1D7CE,  // 1D7CE..1D7FF; COMMON
5973             0x1D800,  // 1D800..1DA8B; SIGNWRITING
5974             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
5975             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
5976             0x1DAA0,  // 1DAA0       ; UNKNOWN
5977             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
5978             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
5979             0x1E000,  // 1E000..1E006; GLAGOLITIC
5980             0x1E007,  // 1E007       ; UNKNOWN
5981             0x1E008,  // 1E008..1E018; GLAGOLITIC
5982             0x1E019,  // 1E019..1E01A; UNKNOWN
5983             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
5984             0x1E022,  // 1E022       ; UNKNOWN
5985             0x1E023,  // 1E023..1E024; GLAGOLITIC
5986             0x1E025,  // 1E025       ; UNKNOWN
5987             0x1E026,  // 1E026..1E02A; GLAGOLITIC
5988             0x1E02B,  // 1E02B..1E7FF; UNKNOWN
5989             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
5990             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
5991             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
5992             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
5993             0x1E900,  // 1E900..1E94A; ADLAM
5994             0x1E94B,  // 1E94B..1E94F; UNKNOWN
5995             0x1E950,  // 1E950..1E959; ADLAM
5996             0x1E95A,  // 1E95A..1E95D; UNKNOWN
5997             0x1E95E,  // 1E95E..1E95F; ADLAM
5998             0x1E960,  // 1E960..1EDFF; UNKNOWN
5999             0x1EE00,  // 1EE00..1EE03; ARABIC
6000             0x1EE04,  // 1EE04       ; UNKNOWN
6001             0x1EE05,  // 1EE05..1EE1F; ARABIC
6002             0x1EE20,  // 1EE20       ; UNKNOWN
6003             0x1EE21,  // 1EE21..1EE22; ARABIC
6004             0x1EE23,  // 1EE23       ; UNKNOWN
6005             0x1EE24,  // 1EE24       ; ARABIC
6006             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6007             0x1EE27,  // 1EE27       ; ARABIC
6008             0x1EE28,  // 1EE28       ; UNKNOWN
6009             0x1EE29,  // 1EE29..1EE32; ARABIC
6010             0x1EE33,  // 1EE33       ; UNKNOWN
6011             0x1EE34,  // 1EE34..1EE37; ARABIC
6012             0x1EE38,  // 1EE38       ; UNKNOWN
6013             0x1EE39,  // 1EE39       ; ARABIC
6014             0x1EE3A,  // 1EE3A       ; UNKNOWN
6015             0x1EE3B,  // 1EE3B       ; ARABIC
6016             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6017             0x1EE42,  // 1EE42       ; ARABIC
6018             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6019             0x1EE47,  // 1EE47       ; ARABIC
6020             0x1EE48,  // 1EE48       ; UNKNOWN
6021             0x1EE49,  // 1EE49       ; ARABIC
6022             0x1EE4A,  // 1EE4A       ; UNKNOWN
6023             0x1EE4B,  // 1EE4B       ; ARABIC
6024             0x1EE4C,  // 1EE4C       ; UNKNOWN
6025             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6026             0x1EE50,  // 1EE50       ; UNKNOWN
6027             0x1EE51,  // 1EE51..1EE52; ARABIC
6028             0x1EE53,  // 1EE53       ; UNKNOWN
6029             0x1EE54,  // 1EE54       ; ARABIC
6030             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6031             0x1EE57,  // 1EE57       ; ARABIC
6032             0x1EE58,  // 1EE58       ; UNKNOWN
6033             0x1EE59,  // 1EE59       ; ARABIC
6034             0x1EE5A,  // 1EE5A       ; UNKNOWN
6035             0x1EE5B,  // 1EE5B       ; ARABIC
6036             0x1EE5C,  // 1EE5C       ; UNKNOWN
6037             0x1EE5D,  // 1EE5D       ; ARABIC
6038             0x1EE5E,  // 1EE5E       ; UNKNOWN
6039             0x1EE5F,  // 1EE5F       ; ARABIC
6040             0x1EE60,  // 1EE60       ; UNKNOWN
6041             0x1EE61,  // 1EE61..1EE62; ARABIC
6042             0x1EE63,  // 1EE63       ; UNKNOWN
6043             0x1EE64,  // 1EE64       ; ARABIC
6044             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6045             0x1EE67,  // 1EE67..1EE6A; ARABIC
6046             0x1EE6B,  // 1EE6B       ; UNKNOWN
6047             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6048             0x1EE73,  // 1EE73       ; UNKNOWN
6049             0x1EE74,  // 1EE74..1EE77; ARABIC
6050             0x1EE78,  // 1EE78       ; UNKNOWN
6051             0x1EE79,  // 1EE79..1EE7C; ARABIC
6052             0x1EE7D,  // 1EE7D       ; UNKNOWN
6053             0x1EE7E,  // 1EE7E       ; ARABIC
6054             0x1EE7F,  // 1EE7F       ; UNKNOWN
6055             0x1EE80,  // 1EE80..1EE89; ARABIC
6056             0x1EE8A,  // 1EE8A       ; UNKNOWN
6057             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6058             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6059             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6060             0x1EEA4,  // 1EEA4       ; UNKNOWN
6061             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6062             0x1EEAA,  // 1EEAA       ; UNKNOWN
6063             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6064             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6065             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6066             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6067             0x1F000,  // 1F000..1F02B; COMMON
6068             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6069             0x1F030,  // 1F030..1F093; COMMON
6070             0x1F094,  // 1F094..1F09F; UNKNOWN
6071             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6072             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6073             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6074             0x1F0C0,  // 1F0C0       ; UNKNOWN
6075             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6076             0x1F0D0,  // 1F0D0       ; UNKNOWN
6077             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6078             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6079             0x1F100,  // 1F100..1F10C; COMMON
6080             0x1F10D,  // 1F10D..1F10F; UNKNOWN
6081             0x1F110,  // 1F110..1F12E; COMMON
6082             0x1F12F,  // 1F12F       ; UNKNOWN
6083             0x1F130,  // 1F130..1F16B; COMMON
6084             0x1F16C,  // 1F16C..1F16F; UNKNOWN
6085             0x1F170,  // 1F170..1F1AC; COMMON
6086             0x1F1AD,  // 1F1AD..1F1E5; UNKNOWN
6087             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6088             0x1F200,  // 1F200       ; HIRAGANA
6089             0x1F201,  // 1F201..1F202; COMMON
6090             0x1F203,  // 1F203..1F20F; UNKNOWN
6091             0x1F210,  // 1F210..1F23B; COMMON
6092             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6093             0x1F240,  // 1F240..1F248; COMMON
6094             0x1F249,  // 1F249..1F24F; UNKNOWN
6095             0x1F250,  // 1F250..1F251; COMMON
6096             0x1F252,  // 1F252..1F25F; UNKNOWN
6097             0x1F260,  // 1F260..1F265; COMMON
6098             0x1F266,  // 1F266..1F2FF; UNKNOWN
6099             0x1F300,  // 1F300..1F6D4; COMMON
6100             0x1F6D5,  // 1F6D5..1F6DF; UNKNOWN
6101             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6102             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6103             0x1F6F0,  // 1F6F0..1F6F8; COMMON
6104             0x1F6F9,  // 1F6F9..1F6FF; UNKNOWN
6105             0x1F700,  // 1F700..1F773; COMMON
6106             0x1F774,  // 1F774..1F77F; UNKNOWN
6107             0x1F780,  // 1F780..1F7D4; COMMON
6108             0x1F7D5,  // 1F7D5..1F7FF; UNKNOWN
6109             0x1F800,  // 1F800..1F80B; COMMON
6110             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6111             0x1F810,  // 1F810..1F847; COMMON
6112             0x1F848,  // 1F848..1F84F; UNKNOWN
6113             0x1F850,  // 1F850..1F859; COMMON
6114             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6115             0x1F860,  // 1F860..1F887; COMMON
6116             0x1F888,  // 1F888..1F88F; UNKNOWN
6117             0x1F890,  // 1F890..1F8AD; COMMON
6118             0x1F8AE,  // 1F8AE..1F8FF; UNKNOWN
6119             0x1F900,  // 1F900..1F90B; COMMON
6120             0x1F90C,  // 1F90C..1F90F; UNKNOWN
6121             0x1F910,  // 1F910..1F93E; COMMON
6122             0x1F93F,  // 1F93F       ; UNKNOWN
6123             0x1F940,  // 1F940..1F94C; COMMON
6124             0x1F94D,  // 1F94D..1F94F; UNKNOWN
6125             0x1F950,  // 1F950..1F96B; COMMON
6126             0x1F96C,  // 1F96C..1F97F; UNKNOWN
6127             0x1F980,  // 1F980..1F997; COMMON
6128             0x1F998,  // 1F998..1F9BF; UNKNOWN
6129             0x1F9C0,  // 1F9C0       ; COMMON
6130             0x1F9C1,  // 1F9C1..1F9CF; UNKNOWN
6131             0x1F9D0,  // 1F9D0..1F9E6; COMMON
6132             0x1F9E7,  // 1F9E7..1FFFF; UNKNOWN
6133             0x20000,  // 20000..2A6D6; HAN
6134             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
6135             0x2A700,  // 2A700..2B734; HAN
6136             0x2B735,  // 2B735..2B73F; UNKNOWN
6137             0x2B740,  // 2B740..2B81D; HAN
6138             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6139             0x2B820,  // 2B820..2CEA1; HAN
6140             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6141             0x2CEB0,  // 2CEB0..2EBE0; HAN
6142             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6143             0x2F800,  // 2F800..2FA1D; HAN
6144             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
6145             0xE0001,  // E0001       ; COMMON
6146             0xE0002,  // E0002..E001F; UNKNOWN
6147             0xE0020,  // E0020..E007F; COMMON
6148             0xE0080,  // E0080..E00FF; UNKNOWN
6149             0xE0100,  // E0100..E01EF; INHERITED
6150             0xE01F0   // E01F0..10FFFF; UNKNOWN
6151         };
6152 
6153         private static final UnicodeScript[] scripts = {
6154             COMMON,                   // 0000..0040
6155             LATIN,                    // 0041..005A
6156             COMMON,                   // 005B..0060
6157             LATIN,                    // 0061..007A
6158             COMMON,                   // 007B..00A9
6159             LATIN,                    // 00AA
6160             COMMON,                   // 00AB..00B9
6161             LATIN,                    // 00BA
6162             COMMON,                   // 00BB..00BF
6163             LATIN,                    // 00C0..00D6
6164             COMMON,                   // 00D7
6165             LATIN,                    // 00D8..00F6
6166             COMMON,                   // 00F7
6167             LATIN,                    // 00F8..02B8
6168             COMMON,                   // 02B9..02DF
6169             LATIN,                    // 02E0..02E4
6170             COMMON,                   // 02E5..02E9
6171             BOPOMOFO,                 // 02EA..02EB
6172             COMMON,                   // 02EC..02FF
6173             INHERITED,                // 0300..036F
6174             GREEK,                    // 0370..0373
6175             COMMON,                   // 0374
6176             GREEK,                    // 0375..0377
6177             UNKNOWN,                  // 0378..0379
6178             GREEK,                    // 037A..037D
6179             COMMON,                   // 037E
6180             GREEK,                    // 037F
6181             UNKNOWN,                  // 0380..0383
6182             GREEK,                    // 0384
6183             COMMON,                   // 0385
6184             GREEK,                    // 0386
6185             COMMON,                   // 0387
6186             GREEK,                    // 0388..038A
6187             UNKNOWN,                  // 038B
6188             GREEK,                    // 038C
6189             UNKNOWN,                  // 038D
6190             GREEK,                    // 038E..03A1
6191             UNKNOWN,                  // 03A2
6192             GREEK,                    // 03A3..03E1
6193             COPTIC,                   // 03E2..03EF
6194             GREEK,                    // 03F0..03FF
6195             CYRILLIC,                 // 0400..0484
6196             INHERITED,                // 0485..0486
6197             CYRILLIC,                 // 0487..052F
6198             UNKNOWN,                  // 0530
6199             ARMENIAN,                 // 0531..0556
6200             UNKNOWN,                  // 0557..0558
6201             ARMENIAN,                 // 0559..055F
6202             UNKNOWN,                  // 0560
6203             ARMENIAN,                 // 0561..0587
6204             UNKNOWN,                  // 0588
6205             COMMON,                   // 0589
6206             ARMENIAN,                 // 058A
6207             UNKNOWN,                  // 058B..058C
6208             ARMENIAN,                 // 058D..058F
6209             UNKNOWN,                  // 0590
6210             HEBREW,                   // 0591..05C7
6211             UNKNOWN,                  // 05C8..05CF
6212             HEBREW,                   // 05D0..05EA
6213             UNKNOWN,                  // 05EB..05EF
6214             HEBREW,                   // 05F0..05F4
6215             UNKNOWN,                  // 05F5..05FF
6216             ARABIC,                   // 0600..0604
6217             COMMON,                   // 0605
6218             ARABIC,                   // 0606..060B
6219             COMMON,                   // 060C
6220             ARABIC,                   // 060D..061A
6221             COMMON,                   // 061B
6222             ARABIC,                   // 061C
6223             UNKNOWN,                  // 061D
6224             ARABIC,                   // 061E
6225             COMMON,                   // 061F
6226             ARABIC,                   // 0620..063F
6227             COMMON,                   // 0640
6228             ARABIC,                   // 0641..064A
6229             INHERITED,                // 064B..0655
6230             ARABIC,                   // 0656..066F
6231             INHERITED,                // 0670
6232             ARABIC,                   // 0671..06DC
6233             COMMON,                   // 06DD
6234             ARABIC,                   // 06DE..06FF
6235             SYRIAC,                   // 0700..070D
6236             UNKNOWN,                  // 070E
6237             SYRIAC,                   // 070F..074A
6238             UNKNOWN,                  // 074B..074C
6239             SYRIAC,                   // 074D..074F
6240             ARABIC,                   // 0750..077F
6241             THAANA,                   // 0780..07B1
6242             UNKNOWN,                  // 07B2..07BF
6243             NKO,                      // 07C0..07FA
6244             UNKNOWN,                  // 07FB..07FF
6245             SAMARITAN,                // 0800..082D
6246             UNKNOWN,                  // 082E..082F
6247             SAMARITAN,                // 0830..083E
6248             UNKNOWN,                  // 083F
6249             MANDAIC,                  // 0840..085B
6250             UNKNOWN,                  // 085C..085D
6251             MANDAIC,                  // 085E
6252             UNKNOWN,                  // 085F
6253             SYRIAC,                   // 0860..086A
6254             UNKNOWN,                  // 086B..089F
6255             ARABIC,                   // 08A0..08B4
6256             UNKNOWN,                  // 08B5
6257             ARABIC,                   // 08B6..08BD
6258             UNKNOWN,                  // 08BE..08D3
6259             ARABIC,                   // 08D4..08E1
6260             COMMON,                   // 08E2
6261             ARABIC,                   // 08E3..08FF
6262             DEVANAGARI,               // 0900..0950
6263             INHERITED,                // 0951..0952
6264             DEVANAGARI,               // 0953..0963
6265             COMMON,                   // 0964..0965
6266             DEVANAGARI,               // 0966..097F
6267             BENGALI,                  // 0980..0983
6268             UNKNOWN,                  // 0984
6269             BENGALI,                  // 0985..098C
6270             UNKNOWN,                  // 098D..098E
6271             BENGALI,                  // 098F..0990
6272             UNKNOWN,                  // 0991..0992
6273             BENGALI,                  // 0993..09A8
6274             UNKNOWN,                  // 09A9
6275             BENGALI,                  // 09AA..09B0
6276             UNKNOWN,                  // 09B1
6277             BENGALI,                  // 09B2
6278             UNKNOWN,                  // 09B3..09B5
6279             BENGALI,                  // 09B6..09B9
6280             UNKNOWN,                  // 09BA..09BB
6281             BENGALI,                  // 09BC..09C4
6282             UNKNOWN,                  // 09C5..09C6
6283             BENGALI,                  // 09C7..09C8
6284             UNKNOWN,                  // 09C9..09CA
6285             BENGALI,                  // 09CB..09CE
6286             UNKNOWN,                  // 09CF..09D6
6287             BENGALI,                  // 09D7
6288             UNKNOWN,                  // 09D8..09DB
6289             BENGALI,                  // 09DC..09DD
6290             UNKNOWN,                  // 09DE
6291             BENGALI,                  // 09DF..09E3
6292             UNKNOWN,                  // 09E4..09E5
6293             BENGALI,                  // 09E6..09FD
6294             UNKNOWN,                  // 09FE..0A00
6295             GURMUKHI,                 // 0A01..0A03
6296             UNKNOWN,                  // 0A04
6297             GURMUKHI,                 // 0A05..0A0A
6298             UNKNOWN,                  // 0A0B..0A0E
6299             GURMUKHI,                 // 0A0F..0A10
6300             UNKNOWN,                  // 0A11..0A12
6301             GURMUKHI,                 // 0A13..0A28
6302             UNKNOWN,                  // 0A29
6303             GURMUKHI,                 // 0A2A..0A30
6304             UNKNOWN,                  // 0A31
6305             GURMUKHI,                 // 0A32..0A33
6306             UNKNOWN,                  // 0A34
6307             GURMUKHI,                 // 0A35..0A36
6308             UNKNOWN,                  // 0A37
6309             GURMUKHI,                 // 0A38..0A39
6310             UNKNOWN,                  // 0A3A..0A3B
6311             GURMUKHI,                 // 0A3C
6312             UNKNOWN,                  // 0A3D
6313             GURMUKHI,                 // 0A3E..0A42
6314             UNKNOWN,                  // 0A43..0A46
6315             GURMUKHI,                 // 0A47..0A48
6316             UNKNOWN,                  // 0A49..0A4A
6317             GURMUKHI,                 // 0A4B..0A4D
6318             UNKNOWN,                  // 0A4E..0A50
6319             GURMUKHI,                 // 0A51
6320             UNKNOWN,                  // 0A52..0A58
6321             GURMUKHI,                 // 0A59..0A5C
6322             UNKNOWN,                  // 0A5D
6323             GURMUKHI,                 // 0A5E
6324             UNKNOWN,                  // 0A5F..0A65
6325             GURMUKHI,                 // 0A66..0A75
6326             UNKNOWN,                  // 0A76..0A80
6327             GUJARATI,                 // 0A81..0A83
6328             UNKNOWN,                  // 0A84
6329             GUJARATI,                 // 0A85..0A8D
6330             UNKNOWN,                  // 0A8E
6331             GUJARATI,                 // 0A8F..0A91
6332             UNKNOWN,                  // 0A92
6333             GUJARATI,                 // 0A93..0AA8
6334             UNKNOWN,                  // 0AA9
6335             GUJARATI,                 // 0AAA..0AB0
6336             UNKNOWN,                  // 0AB1
6337             GUJARATI,                 // 0AB2..0AB3
6338             UNKNOWN,                  // 0AB4
6339             GUJARATI,                 // 0AB5..0AB9
6340             UNKNOWN,                  // 0ABA..0ABB
6341             GUJARATI,                 // 0ABC..0AC5
6342             UNKNOWN,                  // 0AC6
6343             GUJARATI,                 // 0AC7..0AC9
6344             UNKNOWN,                  // 0ACA
6345             GUJARATI,                 // 0ACB..0ACD
6346             UNKNOWN,                  // 0ACE..0ACF
6347             GUJARATI,                 // 0AD0
6348             UNKNOWN,                  // 0AD1..0ADF
6349             GUJARATI,                 // 0AE0..0AE3
6350             UNKNOWN,                  // 0AE4..0AE5
6351             GUJARATI,                 // 0AE6..0AF1
6352             UNKNOWN,                  // 0AF2..0AF8
6353             GUJARATI,                 // 0AF9..0AFF
6354             UNKNOWN,                  // 0B00
6355             ORIYA,                    // 0B01..0B03
6356             UNKNOWN,                  // 0B04
6357             ORIYA,                    // 0B05..0B0C
6358             UNKNOWN,                  // 0B0D..0B0E
6359             ORIYA,                    // 0B0F..0B10
6360             UNKNOWN,                  // 0B11..0B12
6361             ORIYA,                    // 0B13..0B28
6362             UNKNOWN,                  // 0B29
6363             ORIYA,                    // 0B2A..0B30
6364             UNKNOWN,                  // 0B31
6365             ORIYA,                    // 0B32..0B33
6366             UNKNOWN,                  // 0B34
6367             ORIYA,                    // 0B35..0B39
6368             UNKNOWN,                  // 0B3A..0B3B
6369             ORIYA,                    // 0B3C..0B44
6370             UNKNOWN,                  // 0B45..0B46
6371             ORIYA,                    // 0B47..0B48
6372             UNKNOWN,                  // 0B49..0B4A
6373             ORIYA,                    // 0B4B..0B4D
6374             UNKNOWN,                  // 0B4E..0B55
6375             ORIYA,                    // 0B56..0B57
6376             UNKNOWN,                  // 0B58..0B5B
6377             ORIYA,                    // 0B5C..0B5D
6378             UNKNOWN,                  // 0B5E
6379             ORIYA,                    // 0B5F..0B63
6380             UNKNOWN,                  // 0B64..0B65
6381             ORIYA,                    // 0B66..0B77
6382             UNKNOWN,                  // 0B78..0B81
6383             TAMIL,                    // 0B82..0B83
6384             UNKNOWN,                  // 0B84
6385             TAMIL,                    // 0B85..0B8A
6386             UNKNOWN,                  // 0B8B..0B8D
6387             TAMIL,                    // 0B8E..0B90
6388             UNKNOWN,                  // 0B91
6389             TAMIL,                    // 0B92..0B95
6390             UNKNOWN,                  // 0B96..0B98
6391             TAMIL,                    // 0B99..0B9A
6392             UNKNOWN,                  // 0B9B
6393             TAMIL,                    // 0B9C
6394             UNKNOWN,                  // 0B9D
6395             TAMIL,                    // 0B9E..0B9F
6396             UNKNOWN,                  // 0BA0..0BA2
6397             TAMIL,                    // 0BA3..0BA4
6398             UNKNOWN,                  // 0BA5..0BA7
6399             TAMIL,                    // 0BA8..0BAA
6400             UNKNOWN,                  // 0BAB..0BAD
6401             TAMIL,                    // 0BAE..0BB9
6402             UNKNOWN,                  // 0BBA..0BBD
6403             TAMIL,                    // 0BBE..0BC2
6404             UNKNOWN,                  // 0BC3..0BC5
6405             TAMIL,                    // 0BC6..0BC8
6406             UNKNOWN,                  // 0BC9
6407             TAMIL,                    // 0BCA..0BCD
6408             UNKNOWN,                  // 0BCE..0BCF
6409             TAMIL,                    // 0BD0
6410             UNKNOWN,                  // 0BD1..0BD6
6411             TAMIL,                    // 0BD7
6412             UNKNOWN,                  // 0BD8..0BE5
6413             TAMIL,                    // 0BE6..0BFA
6414             UNKNOWN,                  // 0BFB..0BFF
6415             TELUGU,                   // 0C00..0C03
6416             UNKNOWN,                  // 0C04
6417             TELUGU,                   // 0C05..0C0C
6418             UNKNOWN,                  // 0C0D
6419             TELUGU,                   // 0C0E..0C10
6420             UNKNOWN,                  // 0C11
6421             TELUGU,                   // 0C12..0C28
6422             UNKNOWN,                  // 0C29
6423             TELUGU,                   // 0C2A..0C39
6424             UNKNOWN,                  // 0C3A..0C3C
6425             TELUGU,                   // 0C3D..0C44
6426             UNKNOWN,                  // 0C45
6427             TELUGU,                   // 0C46..0C48
6428             UNKNOWN,                  // 0C49
6429             TELUGU,                   // 0C4A..0C4D
6430             UNKNOWN,                  // 0C4E..0C54
6431             TELUGU,                   // 0C55..0C56
6432             UNKNOWN,                  // 0C57
6433             TELUGU,                   // 0C58..0C5A
6434             UNKNOWN,                  // 0C5B..0C5F
6435             TELUGU,                   // 0C60..0C63
6436             UNKNOWN,                  // 0C64..0C65
6437             TELUGU,                   // 0C66..0C6F
6438             UNKNOWN,                  // 0C70..0C77
6439             TELUGU,                   // 0C78..0C7F
6440             KANNADA,                  // 0C80..0C83
6441             UNKNOWN,                  // 0C84
6442             KANNADA,                  // 0C85..0C8C
6443             UNKNOWN,                  // 0C8D
6444             KANNADA,                  // 0C8E..0C90
6445             UNKNOWN,                  // 0C91
6446             KANNADA,                  // 0C92..0CA8
6447             UNKNOWN,                  // 0CA9
6448             KANNADA,                  // 0CAA..0CB3
6449             UNKNOWN,                  // 0CB4
6450             KANNADA,                  // 0CB5..0CB9
6451             UNKNOWN,                  // 0CBA..0CBB
6452             KANNADA,                  // 0CBC..0CC4
6453             UNKNOWN,                  // 0CC5
6454             KANNADA,                  // 0CC6..0CC8
6455             UNKNOWN,                  // 0CC9
6456             KANNADA,                  // 0CCA..0CCD
6457             UNKNOWN,                  // 0CCE..0CD4
6458             KANNADA,                  // 0CD5..0CD6
6459             UNKNOWN,                  // 0CD7..0CDD
6460             KANNADA,                  // 0CDE
6461             UNKNOWN,                  // 0CDF
6462             KANNADA,                  // 0CE0..0CE3
6463             UNKNOWN,                  // 0CE4..0CE5
6464             KANNADA,                  // 0CE6..0CEF
6465             UNKNOWN,                  // 0CF0
6466             KANNADA,                  // 0CF1..0CF2
6467             UNKNOWN,                  // 0CF3..0CFF
6468             MALAYALAM,                // 0D00..0D03
6469             UNKNOWN,                  // 0D04
6470             MALAYALAM,                // 0D05..0D0C
6471             UNKNOWN,                  // 0D0D
6472             MALAYALAM,                // 0D0E..0D10
6473             UNKNOWN,                  // 0D11
6474             MALAYALAM,                // 0D12..0D44
6475             UNKNOWN,                  // 0D45
6476             MALAYALAM,                // 0D46..0D48
6477             UNKNOWN,                  // 0D49
6478             MALAYALAM,                // 0D4A..0D4F
6479             UNKNOWN,                  // 0D50..0D53
6480             MALAYALAM,                // 0D54..0D63
6481             UNKNOWN,                  // 0D64..0D65
6482             MALAYALAM,                // 0D66..0D7F
6483             UNKNOWN,                  // 0D80..0D81
6484             SINHALA,                  // 0D82..0D83
6485             UNKNOWN,                  // 0D84
6486             SINHALA,                  // 0D85..0D96
6487             UNKNOWN,                  // 0D97..0D99
6488             SINHALA,                  // 0D9A..0DB1
6489             UNKNOWN,                  // 0DB2
6490             SINHALA,                  // 0DB3..0DBB
6491             UNKNOWN,                  // 0DBC
6492             SINHALA,                  // 0DBD
6493             UNKNOWN,                  // 0DBE..0DBF
6494             SINHALA,                  // 0DC0..0DC6
6495             UNKNOWN,                  // 0DC7..0DC9
6496             SINHALA,                  // 0DCA
6497             UNKNOWN,                  // 0DCB..0DCE
6498             SINHALA,                  // 0DCF..0DD4
6499             UNKNOWN,                  // 0DD5
6500             SINHALA,                  // 0DD6
6501             UNKNOWN,                  // 0DD7
6502             SINHALA,                  // 0DD8..0DDF
6503             UNKNOWN,                  // 0DE0..0DE5
6504             SINHALA,                  // 0DE6..0DEF
6505             UNKNOWN,                  // 0DF0..0DF1
6506             SINHALA,                  // 0DF2..0DF4
6507             UNKNOWN,                  // 0DF5..0E00
6508             THAI,                     // 0E01..0E3A
6509             UNKNOWN,                  // 0E3B..0E3E
6510             COMMON,                   // 0E3F
6511             THAI,                     // 0E40..0E5B
6512             UNKNOWN,                  // 0E5C..0E80
6513             LAO,                      // 0E81..0E82
6514             UNKNOWN,                  // 0E83
6515             LAO,                      // 0E84
6516             UNKNOWN,                  // 0E85..0E86
6517             LAO,                      // 0E87..0E88
6518             UNKNOWN,                  // 0E89
6519             LAO,                      // 0E8A
6520             UNKNOWN,                  // 0E8B..0E8C
6521             LAO,                      // 0E8D
6522             UNKNOWN,                  // 0E8E..0E93
6523             LAO,                      // 0E94..0E97
6524             UNKNOWN,                  // 0E98
6525             LAO,                      // 0E99..0E9F
6526             UNKNOWN,                  // 0EA0
6527             LAO,                      // 0EA1..0EA3
6528             UNKNOWN,                  // 0EA4
6529             LAO,                      // 0EA5
6530             UNKNOWN,                  // 0EA6
6531             LAO,                      // 0EA7
6532             UNKNOWN,                  // 0EA8..0EA9
6533             LAO,                      // 0EAA..0EAB
6534             UNKNOWN,                  // 0EAC
6535             LAO,                      // 0EAD..0EB9
6536             UNKNOWN,                  // 0EBA
6537             LAO,                      // 0EBB..0EBD
6538             UNKNOWN,                  // 0EBE..0EBF
6539             LAO,                      // 0EC0..0EC4
6540             UNKNOWN,                  // 0EC5
6541             LAO,                      // 0EC6
6542             UNKNOWN,                  // 0EC7
6543             LAO,                      // 0EC8..0ECD
6544             UNKNOWN,                  // 0ECE..0ECF
6545             LAO,                      // 0ED0..0ED9
6546             UNKNOWN,                  // 0EDA..0EDB
6547             LAO,                      // 0EDC..0EDF
6548             UNKNOWN,                  // 0EE0..0EFF
6549             TIBETAN,                  // 0F00..0F47
6550             UNKNOWN,                  // 0F48
6551             TIBETAN,                  // 0F49..0F6C
6552             UNKNOWN,                  // 0F6D..0F70
6553             TIBETAN,                  // 0F71..0F97
6554             UNKNOWN,                  // 0F98
6555             TIBETAN,                  // 0F99..0FBC
6556             UNKNOWN,                  // 0FBD
6557             TIBETAN,                  // 0FBE..0FCC
6558             UNKNOWN,                  // 0FCD
6559             TIBETAN,                  // 0FCE..0FD4
6560             COMMON,                   // 0FD5..0FD8
6561             TIBETAN,                  // 0FD9..0FDA
6562             UNKNOWN,                  // 0FDB..FFF
6563             MYANMAR,                  // 1000..109F
6564             GEORGIAN,                 // 10A0..10C5
6565             UNKNOWN,                  // 10C6
6566             GEORGIAN,                 // 10C7
6567             UNKNOWN,                  // 10C8..10CC
6568             GEORGIAN,                 // 10CD
6569             UNKNOWN,                  // 10CE..10CF
6570             GEORGIAN,                 // 10D0..10FA
6571             COMMON,                   // 10FB
6572             GEORGIAN,                 // 10FC..10FF
6573             HANGUL,                   // 1100..11FF
6574             ETHIOPIC,                 // 1200..1248
6575             UNKNOWN,                  // 1249
6576             ETHIOPIC,                 // 124A..124D
6577             UNKNOWN,                  // 124E..124F
6578             ETHIOPIC,                 // 1250..1256
6579             UNKNOWN,                  // 1257
6580             ETHIOPIC,                 // 1258
6581             UNKNOWN,                  // 1259
6582             ETHIOPIC,                 // 125A..125D
6583             UNKNOWN,                  // 125E..125F
6584             ETHIOPIC,                 // 1260..1288
6585             UNKNOWN,                  // 1289
6586             ETHIOPIC,                 // 128A..128D
6587             UNKNOWN,                  // 128E..128F
6588             ETHIOPIC,                 // 1290..12B0
6589             UNKNOWN,                  // 12B1
6590             ETHIOPIC,                 // 12B2..12B5
6591             UNKNOWN,                  // 12B6..12B7
6592             ETHIOPIC,                 // 12B8..12BE
6593             UNKNOWN,                  // 12BF
6594             ETHIOPIC,                 // 12C0
6595             UNKNOWN,                  // 12C1
6596             ETHIOPIC,                 // 12C2..12C5
6597             UNKNOWN,                  // 12C6..12C7
6598             ETHIOPIC,                 // 12C8..12D6
6599             UNKNOWN,                  // 12D7
6600             ETHIOPIC,                 // 12D8..1310
6601             UNKNOWN,                  // 1311
6602             ETHIOPIC,                 // 1312..1315
6603             UNKNOWN,                  // 1316..1317
6604             ETHIOPIC,                 // 1318..135A
6605             UNKNOWN,                  // 135B..135C
6606             ETHIOPIC,                 // 135D..137C
6607             UNKNOWN,                  // 137D..137F
6608             ETHIOPIC,                 // 1380..1399
6609             UNKNOWN,                  // 139A..139F
6610             CHEROKEE,                 // 13A0..13F5
6611             UNKNOWN,                  // 13F6..13F7
6612             CHEROKEE,                 // 13F8..13FD
6613             UNKNOWN,                  // 13FE..13FF
6614             CANADIAN_ABORIGINAL,      // 1400..167F
6615             OGHAM,                    // 1680..169C
6616             UNKNOWN,                  // 169D..169F
6617             RUNIC,                    // 16A0..16EA
6618             COMMON,                   // 16EB..16ED
6619             RUNIC,                    // 16EE..16F8
6620             UNKNOWN,                  // 16F9..16FF
6621             TAGALOG,                  // 1700..170C
6622             UNKNOWN,                  // 170D
6623             TAGALOG,                  // 170E..1714
6624             UNKNOWN,                  // 1715..171F
6625             HANUNOO,                  // 1720..1734
6626             COMMON,                   // 1735..1736
6627             UNKNOWN,                  // 1737..173F
6628             BUHID,                    // 1740..1753
6629             UNKNOWN,                  // 1754..175F
6630             TAGBANWA,                 // 1760..176C
6631             UNKNOWN,                  // 176D
6632             TAGBANWA,                 // 176E..1770
6633             UNKNOWN,                  // 1771
6634             TAGBANWA,                 // 1772..1773
6635             UNKNOWN,                  // 1774..177F
6636             KHMER,                    // 1780..17DD
6637             UNKNOWN,                  // 17DE..17DF
6638             KHMER,                    // 17E0..17E9
6639             UNKNOWN,                  // 17EA..17EF
6640             KHMER,                    // 17F0..17F9
6641             UNKNOWN,                  // 17FA..17FF
6642             MONGOLIAN,                // 1800..1801
6643             COMMON,                   // 1802..1803
6644             MONGOLIAN,                // 1804
6645             COMMON,                   // 1805
6646             MONGOLIAN,                // 1806..180E
6647             UNKNOWN,                  // 180F
6648             MONGOLIAN,                // 1810..1819
6649             UNKNOWN,                  // 181A..181F
6650             MONGOLIAN,                // 1820..1877
6651             UNKNOWN,                  // 1878..187F
6652             MONGOLIAN,                // 1880..18AA
6653             UNKNOWN,                  // 18AB..18AF
6654             CANADIAN_ABORIGINAL,      // 18B0..18F5
6655             UNKNOWN,                  // 18F6..18FF
6656             LIMBU,                    // 1900..191E
6657             UNKNOWN,                  // 191F
6658             LIMBU,                    // 1920..192B
6659             UNKNOWN,                  // 192C..192F
6660             LIMBU,                    // 1930..193B
6661             UNKNOWN,                  // 193C..193F
6662             LIMBU,                    // 1940
6663             UNKNOWN,                  // 1941..1943
6664             LIMBU,                    // 1944..194F
6665             TAI_LE,                   // 1950..196D
6666             UNKNOWN,                  // 196E..196F
6667             TAI_LE,                   // 1970..1974
6668             UNKNOWN,                  // 1975..197F
6669             NEW_TAI_LUE,              // 1980..19AB
6670             UNKNOWN,                  // 19AC..19AF
6671             NEW_TAI_LUE,              // 19B0..19C9
6672             UNKNOWN,                  // 19CA..19CF
6673             NEW_TAI_LUE,              // 19D0..19DA
6674             UNKNOWN,                  // 19DB..19DD
6675             NEW_TAI_LUE,              // 19DE..19DF
6676             KHMER,                    // 19E0..19FF
6677             BUGINESE,                 // 1A00..1A1B
6678             UNKNOWN,                  // 1A1C..1A1D
6679             BUGINESE,                 // 1A1E..1A1F
6680             TAI_THAM,                 // 1A20..1A5E
6681             UNKNOWN,                  // 1A5F
6682             TAI_THAM,                 // 1A60..1A7C
6683             UNKNOWN,                  // 1A7D..1A7E
6684             TAI_THAM,                 // 1A7F..1A89
6685             UNKNOWN,                  // 1A8A..1A8F
6686             TAI_THAM,                 // 1A90..1A99
6687             UNKNOWN,                  // 1A9A..1A9F
6688             TAI_THAM,                 // 1AA0..1AAD
6689             UNKNOWN,                  // 1AAE..1AAF
6690             INHERITED,                // 1AB0..1ABE
6691             UNKNOWN,                  // 1ABF..1AFF
6692             BALINESE,                 // 1B00..1B4B
6693             UNKNOWN,                  // 1B4C..1B4F
6694             BALINESE,                 // 1B50..1B7C
6695             UNKNOWN,                  // 1B7D..1B7F
6696             SUNDANESE,                // 1B80..1BBF
6697             BATAK,                    // 1BC0..1BF3
6698             UNKNOWN,                  // 1BF4..1BFB
6699             BATAK,                    // 1BFC..1BFF
6700             LEPCHA,                   // 1C00..1C37
6701             UNKNOWN,                  // 1C38..1C3A
6702             LEPCHA,                   // 1C3B..1C49
6703             UNKNOWN,                  // 1C4A..1C4C
6704             LEPCHA,                   // 1C4D..1C4F
6705             OL_CHIKI,                 // 1C50..1C7F
6706             CYRILLIC,                 // 1C80..1C88
6707             UNKNOWN,                  // 1C89..1CBF
6708             SUNDANESE,                // 1CC0..1CC7
6709             UNKNOWN,                  // 1CC8..1CCF
6710             INHERITED,                // 1CD0..1CD2
6711             COMMON,                   // 1CD3
6712             INHERITED,                // 1CD4..1CE0
6713             COMMON,                   // 1CE1
6714             INHERITED,                // 1CE2..1CE8
6715             COMMON,                   // 1CE9..1CEC
6716             INHERITED,                // 1CED
6717             COMMON,                   // 1CEE..1CF3
6718             INHERITED,                // 1CF4
6719             COMMON,                   // 1CF5..1CF7
6720             INHERITED,                // 1CF8..1CF9
6721             UNKNOWN,                  // 1CFA..1CFF
6722             LATIN,                    // 1D00..1D25
6723             GREEK,                    // 1D26..1D2A
6724             CYRILLIC,                 // 1D2B
6725             LATIN,                    // 1D2C..1D5C
6726             GREEK,                    // 1D5D..1D61
6727             LATIN,                    // 1D62..1D65
6728             GREEK,                    // 1D66..1D6A
6729             LATIN,                    // 1D6B..1D77
6730             CYRILLIC,                 // 1D78
6731             LATIN,                    // 1D79..1DBE
6732             GREEK,                    // 1DBF
6733             INHERITED,                // 1DC0..1DF9
6734             UNKNOWN,                  // 1DFA
6735             INHERITED,                // 1DFB..1DFF
6736             LATIN,                    // 1E00..1EFF
6737             GREEK,                    // 1F00..1F15
6738             UNKNOWN,                  // 1F16..1F17
6739             GREEK,                    // 1F18..1F1D
6740             UNKNOWN,                  // 1F1E..1F1F
6741             GREEK,                    // 1F20..1F45
6742             UNKNOWN,                  // 1F46..1F47
6743             GREEK,                    // 1F48..1F4D
6744             UNKNOWN,                  // 1F4E..1F4F
6745             GREEK,                    // 1F50..1F57
6746             UNKNOWN,                  // 1F58
6747             GREEK,                    // 1F59
6748             UNKNOWN,                  // 1F5A
6749             GREEK,                    // 1F5B
6750             UNKNOWN,                  // 1F5C
6751             GREEK,                    // 1F5D
6752             UNKNOWN,                  // 1F5E
6753             GREEK,                    // 1F5F..1F7D
6754             UNKNOWN,                  // 1F7E..1F7F
6755             GREEK,                    // 1F80..1FB4
6756             UNKNOWN,                  // 1FB5
6757             GREEK,                    // 1FB6..1FC4
6758             UNKNOWN,                  // 1FC5
6759             GREEK,                    // 1FC6..1FD3
6760             UNKNOWN,                  // 1FD4..1FD5
6761             GREEK,                    // 1FD6..1FDB
6762             UNKNOWN,                  // 1FDC
6763             GREEK,                    // 1FDD..1FEF
6764             UNKNOWN,                  // 1FF0..1FF1
6765             GREEK,                    // 1FF2..1FF4
6766             UNKNOWN,                  // 1FF5
6767             GREEK,                    // 1FF6..1FFE
6768             UNKNOWN,                  // 1FFF
6769             COMMON,                   // 2000..200B
6770             INHERITED,                // 200C..200D
6771             COMMON,                   // 200E..2064
6772             UNKNOWN,                  // 2065
6773             COMMON,                   // 2066..2070
6774             LATIN,                    // 2071
6775             UNKNOWN,                  // 2072..2073
6776             COMMON,                   // 2074..207E
6777             LATIN,                    // 207F
6778             COMMON,                   // 2080..208E
6779             UNKNOWN,                  // 208F
6780             LATIN,                    // 2090..209C
6781             UNKNOWN,                  // 209D..209F
6782             COMMON,                   // 20A0..20BF
6783             UNKNOWN,                  // 20C0..20CF
6784             INHERITED,                // 20D0..20F0
6785             UNKNOWN,                  // 20F1..20FF
6786             COMMON,                   // 2100..2125
6787             GREEK,                    // 2126
6788             COMMON,                   // 2127..2129
6789             LATIN,                    // 212A..212B
6790             COMMON,                   // 212C..2131
6791             LATIN,                    // 2132
6792             COMMON,                   // 2133..214D
6793             LATIN,                    // 214E
6794             COMMON,                   // 214F..215F
6795             LATIN,                    // 2160..2188
6796             COMMON,                   // 2189..218B
6797             UNKNOWN,                  // 218C..218F
6798             COMMON,                   // 2190..2426
6799             UNKNOWN,                  // 2427..243F
6800             COMMON,                   // 2440..244A
6801             UNKNOWN,                  // 244B..245F
6802             COMMON,                   // 2460..27FF
6803             BRAILLE,                  // 2800..28FF
6804             COMMON,                   // 2900..2B73
6805             UNKNOWN,                  // 2B74..2B75
6806             COMMON,                   // 2B76..2B95
6807             UNKNOWN,                  // 2B96..2B97
6808             COMMON,                   // 2B98..2BB9
6809             UNKNOWN,                  // 2BBA..2BBC
6810             COMMON,                   // 2BBD..2BC8
6811             UNKNOWN,                  // 2BC9
6812             COMMON,                   // 2BCA..2BD2
6813             UNKNOWN,                  // 2BD3..2BEB
6814             COMMON,                   // 2BEC..2BEF
6815             UNKNOWN,                  // 2BF0..2BFF
6816             GLAGOLITIC,               // 2C00..2C2E
6817             UNKNOWN,                  // 2C2F
6818             GLAGOLITIC,               // 2C30..2C5E
6819             UNKNOWN,                  // 2C5F
6820             LATIN,                    // 2C60..2C7F
6821             COPTIC,                   // 2C80..2CF3
6822             UNKNOWN,                  // 2CF4..2CF8
6823             COPTIC,                   // 2CF9..2CFF
6824             GEORGIAN,                 // 2D00..2D25
6825             UNKNOWN,                  // 2D26
6826             GEORGIAN,                 // 2D27
6827             UNKNOWN,                  // 2D28..2D2C
6828             GEORGIAN,                 // 2D2D
6829             UNKNOWN,                  // 2D2E..2D2F
6830             TIFINAGH,                 // 2D30..2D67
6831             UNKNOWN,                  // 2D68..2D6E
6832             TIFINAGH,                 // 2D6F..2D70
6833             UNKNOWN,                  // 2D71..2D7E
6834             TIFINAGH,                 // 2D7F
6835             ETHIOPIC,                 // 2D80..2D96
6836             UNKNOWN,                  // 2D97..2D9F
6837             ETHIOPIC,                 // 2DA0..2DA6
6838             UNKNOWN,                  // 2DA7
6839             ETHIOPIC,                 // 2DA8..2DAE
6840             UNKNOWN,                  // 2DAF
6841             ETHIOPIC,                 // 2DB0..2DB6
6842             UNKNOWN,                  // 2DB7
6843             ETHIOPIC,                 // 2DB8..2DBE
6844             UNKNOWN,                  // 2DBF
6845             ETHIOPIC,                 // 2DC0..2DC6
6846             UNKNOWN,                  // 2DC7
6847             ETHIOPIC,                 // 2DC8..2DCE
6848             UNKNOWN,                  // 2DCF
6849             ETHIOPIC,                 // 2DD0..2DD6
6850             UNKNOWN,                  // 2DD7
6851             ETHIOPIC,                 // 2DD8..2DDE
6852             UNKNOWN,                  // 2DDF
6853             CYRILLIC,                 // 2DE0..2DFF
6854             COMMON,                   // 2E00..2E49
6855             UNKNOWN,                  // 2E50..2E7F
6856             HAN,                      // 2E80..2E99
6857             UNKNOWN,                  // 2E9A
6858             HAN,                      // 2E9B..2EF3
6859             UNKNOWN,                  // 2EF4..2EFF
6860             HAN,                      // 2F00..2FD5
6861             UNKNOWN,                  // 2FD6..2FEF
6862             COMMON,                   // 2FF0..2FFB
6863             UNKNOWN,                  // 2FFC..2FFF
6864             COMMON,                   // 3000..3004
6865             HAN,                      // 3005
6866             COMMON,                   // 3006
6867             HAN,                      // 3007
6868             COMMON,                   // 3008..3020
6869             HAN,                      // 3021..3029
6870             INHERITED,                // 302A..302D
6871             HANGUL,                   // 302E..302F
6872             COMMON,                   // 3030..3037
6873             HAN,                      // 3038..303B
6874             COMMON,                   // 303C..303F
6875             UNKNOWN,                  // 3040
6876             HIRAGANA,                 // 3041..3096
6877             UNKNOWN,                  // 3097..3098
6878             INHERITED,                // 3099..309A
6879             COMMON,                   // 309B..309C
6880             HIRAGANA,                 // 309D..309F
6881             COMMON,                   // 30A0
6882             KATAKANA,                 // 30A1..30FA
6883             COMMON,                   // 30FB..30FC
6884             KATAKANA,                 // 30FD..30FF
6885             UNKNOWN,                  // 3100..3104
6886             BOPOMOFO,                 // 3105..312E
6887             UNKNOWN,                  // 312F..3130
6888             HANGUL,                   // 3131..318E
6889             UNKNOWN,                  // 318F
6890             COMMON,                   // 3190..319F
6891             BOPOMOFO,                 // 31A0..31BA
6892             UNKNOWN,                  // 31BB..31BF
6893             COMMON,                   // 31C0..31E3
6894             UNKNOWN,                  // 31E4..31EF
6895             KATAKANA,                 // 31F0..31FF
6896             HANGUL,                   // 3200..321E
6897             UNKNOWN,                  // 321F
6898             COMMON,                   // 3220..325F
6899             HANGUL,                   // 3260..327E
6900             COMMON,                   // 327F..32CF
6901             KATAKANA,                 // 32D0..32FE
6902             UNKNOWN,                  // 32FF
6903             KATAKANA,                 // 3300..3357
6904             COMMON,                   // 3358..33FF
6905             HAN,                      // 3400..4DB5
6906             UNKNOWN,                  // 4DB6..4DBF
6907             COMMON,                   // 4DC0..4DFF
6908             HAN,                      // 4E00..9FEA
6909             UNKNOWN,                  // 9FEB..9FFF
6910             YI,                       // A000..A48C
6911             UNKNOWN,                  // A48D..A48F
6912             YI,                       // A490..A4C6
6913             UNKNOWN,                  // A4C7..A4CF
6914             LISU,                     // A4D0..A4FF
6915             VAI,                      // A500..A62B
6916             UNKNOWN,                  // A62C..A63F
6917             CYRILLIC,                 // A640..A69F
6918             BAMUM,                    // A6A0..A6F7
6919             UNKNOWN,                  // A6F8..A6FF
6920             COMMON,                   // A700..A721
6921             LATIN,                    // A722..A787
6922             COMMON,                   // A788..A78A
6923             LATIN,                    // A78B..A7AE
6924             UNKNOWN,                  // A7AF
6925             LATIN,                    // A7B0..A7B7
6926             UNKNOWN,                  // A7B8..A7F6
6927             LATIN,                    // A7F7..A7FF
6928             SYLOTI_NAGRI,             // A800..A82B
6929             UNKNOWN,                  // A82C..A82F
6930             COMMON,                   // A830..A839
6931             UNKNOWN,                  // A83A..A83F
6932             PHAGS_PA,                 // A840..A877
6933             UNKNOWN,                  // A878..A87F
6934             SAURASHTRA,               // A880..A8C5
6935             UNKNOWN,                  // A8C6..A8CD
6936             SAURASHTRA,               // A8CE..A8D9
6937             UNKNOWN,                  // A8DA..A8DF
6938             DEVANAGARI,               // A8E0..A8FD
6939             UNKNOWN,                  // A8FE..A8FF
6940             KAYAH_LI,                 // A900..A92D
6941             COMMON,                   // A92E
6942             KAYAH_LI,                 // A92F
6943             REJANG,                   // A930..A953
6944             UNKNOWN,                  // A954..A95E
6945             REJANG,                   // A95F
6946             HANGUL,                   // A960..A97C
6947             UNKNOWN,                  // A97D..A97F
6948             JAVANESE,                 // A980..A9CD
6949             UNKNOWN,                  // A9CE
6950             COMMON,                   // A9CF
6951             JAVANESE,                 // A9D0..A9D9
6952             UNKNOWN,                  // A9DA..A9DD
6953             JAVANESE,                 // A9DE..A9DF
6954             MYANMAR,                  // A9E0..A9FE
6955             UNKNOWN,                  // A9FF
6956             CHAM,                     // AA00..AA36
6957             UNKNOWN,                  // AA37..AA3F
6958             CHAM,                     // AA40..AA4D
6959             UNKNOWN,                  // AA4E..AA4F
6960             CHAM,                     // AA50..AA59
6961             UNKNOWN,                  // AA5A..AA5B
6962             CHAM,                     // AA5C..AA5F
6963             MYANMAR,                  // AA60..AA7F
6964             TAI_VIET,                 // AA80..AAC2
6965             UNKNOWN,                  // AAC3..AADA
6966             TAI_VIET,                 // AADB..AADF
6967             MEETEI_MAYEK,             // AAE0..AAF6
6968             UNKNOWN,                  // AAF7..AB00
6969             ETHIOPIC,                 // AB01..AB06
6970             UNKNOWN,                  // AB07..AB08
6971             ETHIOPIC,                 // AB09..AB0E
6972             UNKNOWN,                  // AB0F..AB10
6973             ETHIOPIC,                 // AB11..AB16
6974             UNKNOWN,                  // AB17..AB1F
6975             ETHIOPIC,                 // AB20..AB26
6976             UNKNOWN,                  // AB27
6977             ETHIOPIC,                 // AB28..AB2E
6978             UNKNOWN,                  // AB2F
6979             LATIN,                    // AB30..AB5A
6980             COMMON,                   // AB5B
6981             LATIN,                    // AB5C..AB64
6982             GREEK,                    // AB65
6983             UNKNOWN,                  // AB66..AB6F
6984             CHEROKEE,                 // AB70..ABBF
6985             MEETEI_MAYEK,             // ABC0..ABED
6986             UNKNOWN,                  // ABEE..ABEF
6987             MEETEI_MAYEK,             // ABF0..ABF9
6988             UNKNOWN,                  // ABFA..ABFF
6989             HANGUL,                   // AC00..D7A3
6990             UNKNOWN,                  // D7A4..D7AF
6991             HANGUL,                   // D7B0..D7C6
6992             UNKNOWN,                  // D7C7..D7CA
6993             HANGUL,                   // D7CB..D7FB
6994             UNKNOWN,                  // D7FC..F8FF
6995             HAN,                      // F900..FA6D
6996             UNKNOWN,                  // FA6E..FA6F
6997             HAN,                      // FA70..FAD9
6998             UNKNOWN,                  // FADA..FAFF
6999             LATIN,                    // FB00..FB06
7000             UNKNOWN,                  // FB07..FB12
7001             ARMENIAN,                 // FB13..FB17
7002             UNKNOWN,                  // FB18..FB1C
7003             HEBREW,                   // FB1D..FB36
7004             UNKNOWN,                  // FB37
7005             HEBREW,                   // FB38..FB3C
7006             UNKNOWN,                  // FB3D
7007             HEBREW,                   // FB3E
7008             UNKNOWN,                  // FB3F
7009             HEBREW,                   // FB40..FB41
7010             UNKNOWN,                  // FB42
7011             HEBREW,                   // FB43..FB44
7012             UNKNOWN,                  // FB45
7013             HEBREW,                   // FB46..FB4F
7014             ARABIC,                   // FB50..FBC1
7015             UNKNOWN,                  // FBC2..FBD2
7016             ARABIC,                   // FBD3..FD3D
7017             COMMON,                   // FD3E..FD3F
7018             UNKNOWN,                  // FD40..FD4F
7019             ARABIC,                   // FD50..FD8F
7020             UNKNOWN,                  // FD90..FD91
7021             ARABIC,                   // FD92..FDC7
7022             UNKNOWN,                  // FDC8..FDEF
7023             ARABIC,                   // FDF0..FDFD
7024             UNKNOWN,                  // FDFE..FDFF
7025             INHERITED,                // FE00..FE0F
7026             COMMON,                   // FE10..FE19
7027             UNKNOWN,                  // FE1A..FE1F
7028             INHERITED,                // FE20..FE2D
7029             CYRILLIC,                 // FE2E..FE2F
7030             COMMON,                   // FE30..FE52
7031             UNKNOWN,                  // FE53
7032             COMMON,                   // FE54..FE66
7033             UNKNOWN,                  // FE67
7034             COMMON,                   // FE68..FE6B
7035             UNKNOWN,                  // FE6C..FE6F
7036             ARABIC,                   // FE70..FE74
7037             UNKNOWN,                  // FE75
7038             ARABIC,                   // FE76..FEFC
7039             UNKNOWN,                  // FEFD..FEFE
7040             COMMON,                   // FEFF
7041             UNKNOWN,                  // FF00
7042             COMMON,                   // FF01..FF20
7043             LATIN,                    // FF21..FF3A
7044             COMMON,                   // FF3B..FF40
7045             LATIN,                    // FF41..FF5A
7046             COMMON,                   // FF5B..FF65
7047             KATAKANA,                 // FF66..FF6F
7048             COMMON,                   // FF70
7049             KATAKANA,                 // FF71..FF9D
7050             COMMON,                   // FF9E..FF9F
7051             HANGUL,                   // FFA0..FFBE
7052             UNKNOWN,                  // FFBF..FFC1
7053             HANGUL,                   // FFC2..FFC7
7054             UNKNOWN,                  // FFC8..FFC9
7055             HANGUL,                   // FFCA..FFCF
7056             UNKNOWN,                  // FFD0..FFD1
7057             HANGUL,                   // FFD2..FFD7
7058             UNKNOWN,                  // FFD8..FFD9
7059             HANGUL,                   // FFDA..FFDC
7060             UNKNOWN,                  // FFDD..FFDF
7061             COMMON,                   // FFE0..FFE6
7062             UNKNOWN,                  // FFE7
7063             COMMON,                   // FFE8..FFEE
7064             UNKNOWN,                  // FFEF..FFF8
7065             COMMON,                   // FFF9..FFFD
7066             UNKNOWN,                  // FFFE..FFFF
7067             LINEAR_B,                 // 10000..1000B
7068             UNKNOWN,                  // 1000C
7069             LINEAR_B,                 // 1000D..10026
7070             UNKNOWN,                  // 10027
7071             LINEAR_B,                 // 10028..1003A
7072             UNKNOWN,                  // 1003B
7073             LINEAR_B,                 // 1003C..1003D
7074             UNKNOWN,                  // 1003E
7075             LINEAR_B,                 // 1003F..1004D
7076             UNKNOWN,                  // 1004E..1004F
7077             LINEAR_B,                 // 10050..1005D
7078             UNKNOWN,                  // 1005E..1007F
7079             LINEAR_B,                 // 10080..100FA
7080             UNKNOWN,                  // 100FB..100FF
7081             COMMON,                   // 10100..10102
7082             UNKNOWN,                  // 10103..10106
7083             COMMON,                   // 10107..10133
7084             UNKNOWN,                  // 10134..10136
7085             COMMON,                   // 10137..1013F
7086             GREEK,                    // 10140..1018E
7087             UNKNOWN,                  // 1018F
7088             COMMON,                   // 10190..1019B
7089             UNKNOWN,                  // 1019C..1019F
7090             GREEK,                    // 101A0
7091             UNKNOWN,                  // 101A1..101CF
7092             COMMON,                   // 101D0..101FC
7093             INHERITED,                // 101FD
7094             UNKNOWN,                  // 101FE..1027F
7095             LYCIAN,                   // 10280..1029C
7096             UNKNOWN,                  // 1029D..1029F
7097             CARIAN,                   // 102A0..102D0
7098             UNKNOWN,                  // 102D1..102DF
7099             INHERITED,                // 102E0
7100             COMMON,                   // 102E1..102FB
7101             UNKNOWN,                  // 102FC..102FF
7102             OLD_ITALIC,               // 10300..10323
7103             UNKNOWN,                  // 10324..1032C
7104             OLD_ITALIC,               // 1032D..1032F
7105             GOTHIC,                   // 10330..1034A
7106             UNKNOWN,                  // 1034B..1034F
7107             OLD_PERMIC,               // 10350..1037A
7108             UNKNOWN,                  // 1037B..1037F
7109             UGARITIC,                 // 10380..1039D
7110             UNKNOWN,                  // 1039E
7111             UGARITIC,                 // 1039F
7112             OLD_PERSIAN,              // 103A0..103C3
7113             UNKNOWN,                  // 103C4..103C7
7114             OLD_PERSIAN,              // 103C8..103D5
7115             UNKNOWN,                  // 103D6..103FF
7116             DESERET,                  // 10400..1044F
7117             SHAVIAN,                  // 10450..1047F
7118             OSMANYA,                  // 10480..1049D
7119             UNKNOWN,                  // 1049E..1049F
7120             OSMANYA,                  // 104A0..104A9
7121             UNKNOWN,                  // 104AA..104AF
7122             OSAGE,                    // 104B0..104D3;
7123             UNKNOWN,                  // 104D4..104D7;
7124             OSAGE,                    // 104D8..104FB;
7125             UNKNOWN,                  // 104FC..104FF;
7126             ELBASAN,                  // 10500..10527
7127             UNKNOWN,                  // 10528..1052F
7128             CAUCASIAN_ALBANIAN,       // 10530..10563
7129             UNKNOWN,                  // 10564..1056E
7130             CAUCASIAN_ALBANIAN,       // 1056F
7131             UNKNOWN,                  // 10570..105FF
7132             LINEAR_A,                 // 10600..10736
7133             UNKNOWN,                  // 10737..1073F
7134             LINEAR_A,                 // 10740..10755
7135             UNKNOWN,                  // 10756..1075F
7136             LINEAR_A,                 // 10760..10767
7137             UNKNOWN,                  // 10768..107FF
7138             CYPRIOT,                  // 10800..10805
7139             UNKNOWN,                  // 10806..10807
7140             CYPRIOT,                  // 10808
7141             UNKNOWN,                  // 10809
7142             CYPRIOT,                  // 1080A..10835
7143             UNKNOWN,                  // 10836
7144             CYPRIOT,                  // 10837..10838
7145             UNKNOWN,                  // 10839..1083B
7146             CYPRIOT,                  // 1083C
7147             UNKNOWN,                  // 1083D..1083E
7148             CYPRIOT,                  // 1083F
7149             IMPERIAL_ARAMAIC,         // 10840..10855
7150             UNKNOWN,                  // 10856
7151             IMPERIAL_ARAMAIC,         // 10857..1085F
7152             PALMYRENE,                // 10860..1087F
7153             NABATAEAN,                // 10880..1089E
7154             UNKNOWN,                  // 1089F..108A6
7155             NABATAEAN,                // 108A7..108AF
7156             UNKNOWN,                  // 108B0..108DF
7157             HATRAN,                   // 108E0..108F2
7158             UNKNOWN,                  // 108F3
7159             HATRAN,                   // 108F4..108F5
7160             UNKNOWN,                  // 108F6..108FA
7161             HATRAN,                   // 108FB..108FF
7162             PHOENICIAN,               // 10900..1091B
7163             UNKNOWN,                  // 1091C..1091E
7164             PHOENICIAN,               // 1091F
7165             LYDIAN,                   // 10920..10939
7166             UNKNOWN,                  // 1093A..1093E
7167             LYDIAN,                   // 1093F
7168             UNKNOWN,                  // 10940..1097F
7169             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7170             MEROITIC_CURSIVE,         // 109A0..109B7
7171             UNKNOWN,                  // 109B8..109BB
7172             MEROITIC_CURSIVE,         // 109BC..109CF
7173             UNKNOWN,                  // 109D0..109D1
7174             MEROITIC_CURSIVE,         // 109D2..109FF
7175             KHAROSHTHI,               // 10A00..10A03
7176             UNKNOWN,                  // 10A04
7177             KHAROSHTHI,               // 10A05..10A06
7178             UNKNOWN,                  // 10A07..10A0B
7179             KHAROSHTHI,               // 10A0C..10A13
7180             UNKNOWN,                  // 10A14
7181             KHAROSHTHI,               // 10A15..10A17
7182             UNKNOWN,                  // 10A18
7183             KHAROSHTHI,               // 10A19..10A33
7184             UNKNOWN,                  // 10A34..10A37
7185             KHAROSHTHI,               // 10A38..10A3A
7186             UNKNOWN,                  // 10A3B..10A3E
7187             KHAROSHTHI,               // 10A3F..10A47
7188             UNKNOWN,                  // 10A48..10A4F
7189             KHAROSHTHI,               // 10A50..10A58
7190             UNKNOWN,                  // 10A59..10A5F
7191             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7192             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7193             UNKNOWN,                  // 10AA0..10ABF
7194             MANICHAEAN,               // 10AC0..10AE6
7195             UNKNOWN,                  // 10AE7..10AEA
7196             MANICHAEAN,               // 10AEB..10AF6
7197             UNKNOWN,                  // 10AF7..10AFF
7198             AVESTAN,                  // 10B00..10B35
7199             UNKNOWN,                  // 10B36..10B38
7200             AVESTAN,                  // 10B39..10B3F
7201             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7202             UNKNOWN,                  // 10B56..10B57
7203             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7204             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7205             UNKNOWN,                  // 10B73..10B77
7206             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7207             PSALTER_PAHLAVI,          // 10B80..10B91
7208             UNKNOWN,                  // 10B92..10B98
7209             PSALTER_PAHLAVI,          // 10B99..10B9C
7210             UNKNOWN,                  // 10B9D..10BA8
7211             PSALTER_PAHLAVI,          // 10BA9..10BAF
7212             UNKNOWN,                  // 10BB0..10BFF
7213             OLD_TURKIC,               // 10C00..10C48
7214             UNKNOWN,                  // 10C49..10C7F
7215             OLD_HUNGARIAN,            // 10C80..10CB2
7216             UNKNOWN,                  // 10CB3..10CBF
7217             OLD_HUNGARIAN,            // 10CC0..10CF2
7218             UNKNOWN,                  // 10CF3..10CF9
7219             OLD_HUNGARIAN,            // 10CFA..10CFF
7220             UNKNOWN,                  // 10D00..10E5F
7221             ARABIC,                   // 10E60..10E7E
7222             UNKNOWN,                  // 10E7F..10FFF
7223             BRAHMI,                   // 11000..1104D
7224             UNKNOWN,                  // 1104E..11051
7225             BRAHMI,                   // 11052..1106F
7226             UNKNOWN,                  // 11070..1107E
7227             BRAHMI,                   // 1107F
7228             KAITHI,                   // 11080..110C1
7229             UNKNOWN,                  // 110C2..110CF
7230             SORA_SOMPENG,             // 110D0..110E8
7231             UNKNOWN,                  // 110E9..110EF
7232             SORA_SOMPENG,             // 110F0..110F9
7233             UNKNOWN,                  // 110FA..110FF
7234             CHAKMA,                   // 11100..11134
7235             UNKNOWN,                  // 11135
7236             CHAKMA,                   // 11136..11143
7237             UNKNOWN,                  // 11144..1114F
7238             MAHAJANI,                 // 11150..11176
7239             UNKNOWN,                  // 11177..1117F
7240             SHARADA,                  // 11180..111CD
7241             UNKNOWN,                  // 111CE..111CF
7242             SHARADA,                  // 111D0..111DF
7243             UNKNOWN,                  // 111E0
7244             SINHALA,                  // 111E1..111F4
7245             UNKNOWN,                  // 111F5..111FF
7246             KHOJKI,                   // 11200..11211
7247             UNKNOWN,                  // 11212
7248             KHOJKI,                   // 11213..1123E
7249             UNKNOWN,                  // 1123F..1127F
7250             MULTANI,                  // 11280..11286
7251             UNKNOWN,                  // 11287
7252             MULTANI,                  // 11288
7253             UNKNOWN,                  // 11289
7254             MULTANI,                  // 1128A..1128D
7255             UNKNOWN,                  // 1128E
7256             MULTANI,                  // 1128F..1129D
7257             UNKNOWN,                  // 1129E
7258             MULTANI,                  // 1129F..112A9
7259             UNKNOWN,                  // 112AA..112AF
7260             KHUDAWADI,                // 112B0..112EA
7261             UNKNOWN,                  // 112EB..112EF
7262             KHUDAWADI,                // 112F0..112F9
7263             UNKNOWN,                  // 112FA..112FF
7264             GRANTHA,                  // 11300..11303
7265             UNKNOWN,                  // 11304
7266             GRANTHA,                  // 11305..1130C
7267             UNKNOWN,                  // 1130D..1130E
7268             GRANTHA,                  // 1130F..11310
7269             UNKNOWN,                  // 11311..11312
7270             GRANTHA,                  // 11313..11328
7271             UNKNOWN,                  // 11329
7272             GRANTHA,                  // 1132A..11330
7273             UNKNOWN,                  // 11331
7274             GRANTHA,                  // 11332..11333
7275             UNKNOWN,                  // 11334
7276             GRANTHA,                  // 11335..11339
7277             UNKNOWN,                  // 1133A..1133B
7278             GRANTHA,                  // 1133C..11344
7279             UNKNOWN,                  // 11345..11346
7280             GRANTHA,                  // 11347..11348
7281             UNKNOWN,                  // 11349..1134A
7282             GRANTHA,                  // 1134B..1134D
7283             UNKNOWN,                  // 1134E..1134F
7284             GRANTHA,                  // 11350
7285             UNKNOWN,                  // 11351..11356
7286             GRANTHA,                  // 11357
7287             UNKNOWN,                  // 11358..1135C
7288             GRANTHA,                  // 1135D..11363
7289             UNKNOWN,                  // 11364..11365
7290             GRANTHA,                  // 11366..1136C
7291             UNKNOWN,                  // 1136D..1136F
7292             GRANTHA,                  // 11370..11374
7293             UNKNOWN,                  // 11375..113FF
7294             NEWA,                     // 11400..11459
7295             UNKNOWN,                  // 1145A
7296             NEWA,                     // 1145B
7297             UNKNOWN,                  // 1145C
7298             NEWA,                     // 1145D
7299             UNKNOWN,                  // 1145E..1147F
7300             TIRHUTA,                  // 11480..114C7
7301             UNKNOWN,                  // 114C8..114CF
7302             TIRHUTA,                  // 114D0..114D9
7303             UNKNOWN,                  // 114DA..1157F
7304             SIDDHAM,                  // 11580..115B5
7305             UNKNOWN,                  // 115B6..115B7
7306             SIDDHAM,                  // 115B8..115DD
7307             UNKNOWN,                  // 115DE..115FF
7308             MODI,                     // 11600..11644
7309             UNKNOWN,                  // 11645..1164F
7310             MODI,                     // 11650..11659
7311             UNKNOWN,                  // 1165A..1165F
7312             MONGOLIAN,                // 11660..1166C
7313             UNKNOWN,                  // 1166D..1167F
7314             TAKRI,                    // 11680..116B7
7315             UNKNOWN,                  // 116B8..116BF
7316             TAKRI,                    // 116C0..116C9
7317             UNKNOWN,                  // 116CA..116FF
7318             AHOM,                     // 11700..11719
7319             UNKNOWN,                  // 1171A..1171C
7320             AHOM,                     // 1171D..1172B
7321             UNKNOWN,                  // 1172C..1172F
7322             AHOM,                     // 11730..1173F
7323             UNKNOWN,                  // 11740..1189F
7324             WARANG_CITI,              // 118A0..118F2
7325             UNKNOWN,                  // 118F3..118FE
7326             WARANG_CITI,              // 118FF
7327             UNKNOWN,                  // 11900..119FF
7328             ZANABAZAR_SQUARE,         // 11A00..11A47
7329             UNKNOWN,                  // 11A48..11A4F
7330             SOYOMBO,                  // 11A50..11A83
7331             UNKNOWN,                  // 11A84..11A85
7332             SOYOMBO,                  // 11A86..11A9C
7333             UNKNOWN,                  // 11A9D
7334             SOYOMBO,                  // 11A9E..11AA2
7335             UNKNOWN,                  // 11AA3..11ABF
7336             PAU_CIN_HAU,              // 11AC0..11AF8
7337             UNKNOWN,                  // 11AF9..11BFF
7338             BHAIKSUKI,                // 11C00..11C08
7339             UNKNOWN,                  // 11C09
7340             BHAIKSUKI,                // 11C0A..11C36
7341             UNKNOWN,                  // 11C37
7342             BHAIKSUKI,                // 11C38..11C45
7343             UNKNOWN,                  // 11C46..11C49
7344             BHAIKSUKI,                // 11C50..11C6C
7345             UNKNOWN,                  // 11C6D..11C6F
7346             MARCHEN,                  // 11C70..11C8F
7347             UNKNOWN,                  // 11C90..11C91
7348             MARCHEN,                  // 11C92..11CA7
7349             UNKNOWN,                  // 11CA8
7350             MARCHEN,                  // 11CA9..11CB6
7351             UNKNOWN,                  // 11CB7..11CFF
7352             MASARAM_GONDI,            // 11D00..11D06
7353             UNKNOWN,                  // 11D07
7354             MASARAM_GONDI,            // 11D08..11D09
7355             UNKNOWN,                  // 11D0A
7356             MASARAM_GONDI,            // 11D0B..11D36
7357             UNKNOWN,                  // 11D37..11D39
7358             MASARAM_GONDI,            // 11D3A
7359             UNKNOWN,                  // 11D3B
7360             MASARAM_GONDI,            // 11D3C..11D3D
7361             UNKNOWN,                  // 11D3E
7362             MASARAM_GONDI,            // 11D3F..11D47
7363             UNKNOWN,                  // 11D48..11D49
7364             MASARAM_GONDI,            // 11D50..11D59
7365             UNKNOWN,                  // 11D5A..1AFFF;
7366             CUNEIFORM,                // 12000..12399
7367             UNKNOWN,                  // 1239A..123FF
7368             CUNEIFORM,                // 12400..1246E
7369             UNKNOWN,                  // 1246F
7370             CUNEIFORM,                // 12470..12474
7371             UNKNOWN,                  // 12475..1247F
7372             CUNEIFORM,                // 12480..12543
7373             UNKNOWN,                  // 12544..12FFF
7374             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
7375             UNKNOWN,                  // 1342F..143FF
7376             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
7377             UNKNOWN,                  // 14647..167FF
7378             BAMUM,                    // 16800..16A38
7379             UNKNOWN,                  // 16A39..16A3F
7380             MRO,                      // 16A40..16A5E
7381             UNKNOWN,                  // 16A5F
7382             MRO,                      // 16A60..16A69
7383             UNKNOWN,                  // 16A6A..16A6D
7384             MRO,                      // 16A6E..16A6F
7385             UNKNOWN,                  // 16A70..16ACF
7386             BASSA_VAH,                // 16AD0..16AED
7387             UNKNOWN,                  // 16AEE..16AEF
7388             BASSA_VAH,                // 16AF0..16AF5
7389             UNKNOWN,                  // 16AF6..16AFF
7390             PAHAWH_HMONG,             // 16B00..16B45
7391             UNKNOWN,                  // 16B46..16B4F
7392             PAHAWH_HMONG,             // 16B50..16B59
7393             UNKNOWN,                  // 16B5A
7394             PAHAWH_HMONG,             // 16B5B..16B61
7395             UNKNOWN,                  // 16B62
7396             PAHAWH_HMONG,             // 16B63..16B77
7397             UNKNOWN,                  // 16B78..16B7C
7398             PAHAWH_HMONG,             // 16B7D..16B8F
7399             UNKNOWN,                  // 16B90..16EFF
7400             MIAO,                     // 16F00..16F44
7401             UNKNOWN,                  // 16F45..16F4F
7402             MIAO,                     // 16F50..16F7E
7403             UNKNOWN,                  // 16F7F..16F8E
7404             MIAO,                     // 16F8F..16F9F
7405             UNKNOWN,                  // 16FA0..16FDF
7406             TANGUT,                   // 16FE0
7407             NUSHU,                    // 16FE1
7408             UNKNOWN,                  // 16FE2..16FFF
7409             TANGUT,                   // 17000..187EC
7410             UNKNOWN,                  // 187ED..187FF
7411             TANGUT,                   // 18800..18AF2
7412             UNKNOWN,                  // 18AF3..1AFFF
7413             KATAKANA,                 // 1B000
7414             HIRAGANA,                 // 1B001..1B11E
7415             UNKNOWN,                  // 1B11F..1B16F
7416             NUSHU,                    // 1B170..1B2FB
7417             UNKNOWN,                  // 1B2FC..1BBFF
7418             DUPLOYAN,                 // 1BC00..1BC6A
7419             UNKNOWN,                  // 1BC6B..1BC6F
7420             DUPLOYAN,                 // 1BC70..1BC7C
7421             UNKNOWN,                  // 1BC7D..1BC7F
7422             DUPLOYAN,                 // 1BC80..1BC88
7423             UNKNOWN,                  // 1BC89..1BC8F
7424             DUPLOYAN,                 // 1BC90..1BC99
7425             UNKNOWN,                  // 1BC9A..1BC9B
7426             DUPLOYAN,                 // 1BC9C..1BC9F
7427             COMMON,                   // 1BCA0..1BCA3
7428             UNKNOWN,                  // 1BCA4..1CFFF
7429             COMMON,                   // 1D000..1D0F5
7430             UNKNOWN,                  // 1D0F6..1D0FF
7431             COMMON,                   // 1D100..1D126
7432             UNKNOWN,                  // 1D127..1D128
7433             COMMON,                   // 1D129..1D166
7434             INHERITED,                // 1D167..1D169
7435             COMMON,                   // 1D16A..1D17A
7436             INHERITED,                // 1D17B..1D182
7437             COMMON,                   // 1D183..1D184
7438             INHERITED,                // 1D185..1D18B
7439             COMMON,                   // 1D18C..1D1A9
7440             INHERITED,                // 1D1AA..1D1AD
7441             COMMON,                   // 1D1AE..1D1E8
7442             UNKNOWN,                  // 1D1E9..1D1FF
7443             GREEK,                    // 1D200..1D245
7444             UNKNOWN,                  // 1D246..1D2FF
7445             COMMON,                   // 1D300..1D356
7446             UNKNOWN,                  // 1D357..1D35F
7447             COMMON,                   // 1D360..1D371
7448             UNKNOWN,                  // 1D372..1D3FF
7449             COMMON,                   // 1D400..1D454
7450             UNKNOWN,                  // 1D455
7451             COMMON,                   // 1D456..1D49C
7452             UNKNOWN,                  // 1D49D
7453             COMMON,                   // 1D49E..1D49F
7454             UNKNOWN,                  // 1D4A0..1D4A1
7455             COMMON,                   // 1D4A2
7456             UNKNOWN,                  // 1D4A3..1D4A4
7457             COMMON,                   // 1D4A5..1D4A6
7458             UNKNOWN,                  // 1D4A7..1D4A8
7459             COMMON,                   // 1D4A9..1D4AC
7460             UNKNOWN,                  // 1D4AD
7461             COMMON,                   // 1D4AE..1D4B9
7462             UNKNOWN,                  // 1D4BA
7463             COMMON,                   // 1D4BB
7464             UNKNOWN,                  // 1D4BC
7465             COMMON,                   // 1D4BD..1D4C3
7466             UNKNOWN,                  // 1D4C4
7467             COMMON,                   // 1D4C5..1D505
7468             UNKNOWN,                  // 1D506
7469             COMMON,                   // 1D507..1D50A
7470             UNKNOWN,                  // 1D50B..1D50C
7471             COMMON,                   // 1D50D..1D514
7472             UNKNOWN,                  // 1D515
7473             COMMON,                   // 1D516..1D51C
7474             UNKNOWN,                  // 1D51D
7475             COMMON,                   // 1D51E..1D539
7476             UNKNOWN,                  // 1D53A
7477             COMMON,                   // 1D53B..1D53E
7478             UNKNOWN,                  // 1D53F
7479             COMMON,                   // 1D540..1D544
7480             UNKNOWN,                  // 1D545
7481             COMMON,                   // 1D546
7482             UNKNOWN,                  // 1D547..1D549
7483             COMMON,                   // 1D54A..1D550
7484             UNKNOWN,                  // 1D551
7485             COMMON,                   // 1D552..1D6A5
7486             UNKNOWN,                  // 1D6A6..1D6A7
7487             COMMON,                   // 1D6A8..1D7CB
7488             UNKNOWN,                  // 1D7CC..1D7CD
7489             COMMON,                   // 1D7CE..1D7FF
7490             SIGNWRITING,              // 1D800..1DA8B
7491             UNKNOWN,                  // 1DA8C..1DA9A
7492             SIGNWRITING,              // 1DA9B..1DA9F
7493             UNKNOWN,                  // 1DAA0
7494             SIGNWRITING,              // 1DAA1..1DAAF
7495             UNKNOWN,                  // 1DAB0..1DFFF
7496             GLAGOLITIC,               // 1E000..1E006
7497             UNKNOWN,                  // 1E007
7498             GLAGOLITIC,               // 1E008..1E018
7499             UNKNOWN,                  // 1E019..1E01A
7500             GLAGOLITIC,               // 1E01B..1E021
7501             UNKNOWN,                  // 1E022
7502             GLAGOLITIC,               // 1E023..1E024
7503             UNKNOWN,                  // 1E025
7504             GLAGOLITIC,               // 1E026..1E02A
7505             UNKNOWN,                  // 1E02B..1E7FF
7506             MENDE_KIKAKUI,            // 1E800..1E8C4
7507             UNKNOWN,                  // 1E8C5..1E8C6
7508             MENDE_KIKAKUI,            // 1E8C7..1E8D6
7509             UNKNOWN,                  // 1E8D7..1E8FF
7510             ADLAM,                    // 1E900..1E94A
7511             UNKNOWN,                  // 1E94B..1E94F
7512             ADLAM,                    // 1E950..1E959
7513             UNKNOWN,                  // 1E95A..1E95D
7514             ADLAM,                    // 1E95E..1E95F
7515             UNKNOWN,                  // 1E960..1EDFF
7516             ARABIC,                   // 1EE00..1EE03
7517             UNKNOWN,                  // 1EE04
7518             ARABIC,                   // 1EE05..1EE1F
7519             UNKNOWN,                  // 1EE20
7520             ARABIC,                   // 1EE21..1EE22
7521             UNKNOWN,                  // 1EE23
7522             ARABIC,                   // 1EE24
7523             UNKNOWN,                  // 1EE25..1EE26
7524             ARABIC,                   // 1EE27
7525             UNKNOWN,                  // 1EE28
7526             ARABIC,                   // 1EE29..1EE32
7527             UNKNOWN,                  // 1EE33
7528             ARABIC,                   // 1EE34..1EE37
7529             UNKNOWN,                  // 1EE38
7530             ARABIC,                   // 1EE39
7531             UNKNOWN,                  // 1EE3A
7532             ARABIC,                   // 1EE3B
7533             UNKNOWN,                  // 1EE3C..1EE41
7534             ARABIC,                   // 1EE42
7535             UNKNOWN,                  // 1EE43..1EE46
7536             ARABIC,                   // 1EE47
7537             UNKNOWN,                  // 1EE48
7538             ARABIC,                   // 1EE49
7539             UNKNOWN,                  // 1EE4A
7540             ARABIC,                   // 1EE4B
7541             UNKNOWN,                  // 1EE4C
7542             ARABIC,                   // 1EE4D..1EE4F
7543             UNKNOWN,                  // 1EE50
7544             ARABIC,                   // 1EE51..1EE52
7545             UNKNOWN,                  // 1EE53
7546             ARABIC,                   // 1EE54
7547             UNKNOWN,                  // 1EE55..1EE56
7548             ARABIC,                   // 1EE57
7549             UNKNOWN,                  // 1EE58
7550             ARABIC,                   // 1EE59
7551             UNKNOWN,                  // 1EE5A
7552             ARABIC,                   // 1EE5B
7553             UNKNOWN,                  // 1EE5C
7554             ARABIC,                   // 1EE5D
7555             UNKNOWN,                  // 1EE5E
7556             ARABIC,                   // 1EE5F
7557             UNKNOWN,                  // 1EE60
7558             ARABIC,                   // 1EE61..1EE62
7559             UNKNOWN,                  // 1EE63
7560             ARABIC,                   // 1EE64
7561             UNKNOWN,                  // 1EE65..1EE66
7562             ARABIC,                   // 1EE67..1EE6A
7563             UNKNOWN,                  // 1EE6B
7564             ARABIC,                   // 1EE6C..1EE72
7565             UNKNOWN,                  // 1EE73
7566             ARABIC,                   // 1EE74..1EE77
7567             UNKNOWN,                  // 1EE78
7568             ARABIC,                   // 1EE79..1EE7C
7569             UNKNOWN,                  // 1EE7D
7570             ARABIC,                   // 1EE7E
7571             UNKNOWN,                  // 1EE7F
7572             ARABIC,                   // 1EE80..1EE89
7573             UNKNOWN,                  // 1EE8A
7574             ARABIC,                   // 1EE8B..1EE9B
7575             UNKNOWN,                  // 1EE9C..1EEA0
7576             ARABIC,                   // 1EEA1..1EEA3
7577             UNKNOWN,                  // 1EEA4
7578             ARABIC,                   // 1EEA5..1EEA9
7579             UNKNOWN,                  // 1EEAA
7580             ARABIC,                   // 1EEAB..1EEBB
7581             UNKNOWN,                  // 1EEBC..1EEEF
7582             ARABIC,                   // 1EEF0..1EEF1
7583             UNKNOWN,                  // 1EEF2..1EFFF
7584             COMMON,                   // 1F000..1F02B
7585             UNKNOWN,                  // 1F02C..1F02F
7586             COMMON,                   // 1F030..1F093
7587             UNKNOWN,                  // 1F094..1F09F
7588             COMMON,                   // 1F0A0..1F0AE
7589             UNKNOWN,                  // 1F0AF..1F0B0
7590             COMMON,                   // 1F0B1..1F0BF
7591             UNKNOWN,                  // 1F0C0
7592             COMMON,                   // 1F0C1..1F0CF
7593             UNKNOWN,                  // 1F0D0
7594             COMMON,                   // 1F0D1..1F0F5
7595             UNKNOWN,                  // 1F0F6..1F0FF
7596             COMMON,                   // 1F100..1F10C
7597             UNKNOWN,                  // 1F10D..1F10F
7598             COMMON,                   // 1F110..1F12E
7599             UNKNOWN,                  // 1F12F
7600             COMMON,                   // 1F130..1F16B
7601             UNKNOWN,                  // 1F16C..1F16F
7602             COMMON,                   // 1F170..1F1AC
7603             UNKNOWN,                  // 1F1AD..1F1E5
7604             COMMON,                   // 1F1E6..1F1FF
7605             HIRAGANA,                 // 1F200
7606             COMMON,                   // 1F201..1F202
7607             UNKNOWN,                  // 1F203..1F20F
7608             COMMON,                   // 1F210..1F23B
7609             UNKNOWN,                  // 1F23C..1F23F
7610             COMMON,                   // 1F240..1F248
7611             UNKNOWN,                  // 1F249..1F24F
7612             COMMON,                   // 1F250..1F251
7613             UNKNOWN,                  // 1F252..1F25F
7614             COMMON,                   // 1F260..1F265
7615             UNKNOWN,                  // 1F266..1F2FF
7616             COMMON,                   // 1F300..1F6D4
7617             UNKNOWN,                  // 1F6D5..1F6DF
7618             COMMON,                   // 1F6E0..1F6EC
7619             UNKNOWN,                  // 1F6ED..1F6EF
7620             COMMON,                   // 1F6F0..1F6F8
7621             UNKNOWN,                  // 1F6F9..1F6FF
7622             COMMON,                   // 1F700..1F773
7623             UNKNOWN,                  // 1F774..1F77F
7624             COMMON,                   // 1F780..1F7D4
7625             UNKNOWN,                  // 1F7D5..1F7FF
7626             COMMON,                   // 1F800..1F80B
7627             UNKNOWN,                  // 1F80C..1F80F
7628             COMMON,                   // 1F810..1F847
7629             UNKNOWN,                  // 1F848..1F84F
7630             COMMON,                   // 1F850..1F859
7631             UNKNOWN,                  // 1F85A..1F85F
7632             COMMON,                   // 1F860..1F887
7633             UNKNOWN,                  // 1F888..1F88F
7634             COMMON,                   // 1F890..1F8AD
7635             UNKNOWN,                  // 1F8AE..1F8FF
7636             COMMON,                   // 1F900..1F90B
7637             UNKNOWN,                  // 1F90C..1F90F
7638             COMMON,                   // 1F910..1F93E
7639             UNKNOWN,                  // 1F93F
7640             COMMON,                   // 1F940..1F94C
7641             UNKNOWN,                  // 1F94D..1F94F
7642             COMMON,                   // 1F950..1F96B
7643             UNKNOWN,                  // 1F96C..1F97F
7644             COMMON,                   // 1F980..1F997
7645             UNKNOWN,                  // 1F998..1F9BF
7646             COMMON,                   // 1F9C0
7647             UNKNOWN,                  // 1F9C1..1F9CF
7648             COMMON,                   // 1F9D0..1F9E6
7649             UNKNOWN,                  // 1F9E7..1FFFF
7650             HAN,                      // 20000..2A6D6
7651             UNKNOWN,                  // 2A6D7..2A6FF
7652             HAN,                      // 2A700..2B734
7653             UNKNOWN,                  // 2B735..2B73F
7654             HAN,                      // 2B740..2B81D
7655             UNKNOWN,                  // 2B81E..2B81F
7656             HAN,                      // 2B820..2CEA1
7657             UNKNOWN,                  // 2CEA2..2CEAF
7658             HAN,                      // 2CEB0..2EBE0
7659             UNKNOWN,                  // 2EBE1..2F7FF
7660             HAN,                      // 2F800..2FA1D
7661             UNKNOWN,                  // 2FA1E..E0000
7662             COMMON,                   // E0001
7663             UNKNOWN,                  // E0002..E001F
7664             COMMON,                   // E0020..E007F
7665             UNKNOWN,                  // E0080..E00FF
7666             INHERITED,                // E0100..E01EF
7667             UNKNOWN                   // E01F0..10FFFF
7668         };
7669 
7670         private static HashMap<String, Character.UnicodeScript> aliases;
7671         static {
7672             aliases = new HashMap<>((int)(142 / 0.75f + 1.0f));
7673             aliases.put("ADLM", ADLAM);
7674             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7675             aliases.put("AHOM", AHOM);
7676             aliases.put("ARAB", ARABIC);
7677             aliases.put("ARMI", IMPERIAL_ARAMAIC);
7678             aliases.put("ARMN", ARMENIAN);
7679             aliases.put("AVST", AVESTAN);
7680             aliases.put("BALI", BALINESE);
7681             aliases.put("BAMU", BAMUM);
7682             aliases.put("BASS", BASSA_VAH);
7683             aliases.put("BATK", BATAK);
7684             aliases.put("BENG", BENGALI);
7685             aliases.put("BHKS", BHAIKSUKI);
7686             aliases.put("BOPO", BOPOMOFO);
7687             aliases.put("BRAH", BRAHMI);
7688             aliases.put("BRAI", BRAILLE);
7689             aliases.put("BUGI", BUGINESE);
7690             aliases.put("BUHD", BUHID);
7691             aliases.put("CAKM", CHAKMA);
7692             aliases.put("CANS", CANADIAN_ABORIGINAL);
7693             aliases.put("CARI", CARIAN);
7694             aliases.put("CHAM", CHAM);
7695             aliases.put("CHER", CHEROKEE);
7696             aliases.put("COPT", COPTIC);
7697             aliases.put("CPRT", CYPRIOT);
7698             aliases.put("CYRL", CYRILLIC);
7699             aliases.put("DEVA", DEVANAGARI);
7700             aliases.put("DSRT", DESERET);
7701             aliases.put("DUPL", DUPLOYAN);
7702             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7703             aliases.put("ELBA", ELBASAN);
7704             aliases.put("ETHI", ETHIOPIC);
7705             aliases.put("GEOR", GEORGIAN);
7706             aliases.put("GLAG", GLAGOLITIC);
7707             aliases.put("GONM", MASARAM_GONDI);
7708             aliases.put("GOTH", GOTHIC);
7709             aliases.put("GRAN", GRANTHA);
7710             aliases.put("GREK", GREEK);
7711             aliases.put("GUJR", GUJARATI);
7712             aliases.put("GURU", GURMUKHI);
7713             aliases.put("HANG", HANGUL);
7714             aliases.put("HANI", HAN);
7715             aliases.put("HANO", HANUNOO);
7716             aliases.put("HATR", HATRAN);
7717             aliases.put("HEBR", HEBREW);
7718             aliases.put("HIRA", HIRAGANA);
7719             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
7720             aliases.put("HMNG", PAHAWH_HMONG);
7721             // it appears we don't have the KATAKANA_OR_HIRAGANA
7722             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7723             aliases.put("HUNG", OLD_HUNGARIAN);
7724             aliases.put("ITAL", OLD_ITALIC);
7725             aliases.put("JAVA", JAVANESE);
7726             aliases.put("KALI", KAYAH_LI);
7727             aliases.put("KANA", KATAKANA);
7728             aliases.put("KHAR", KHAROSHTHI);
7729             aliases.put("KHMR", KHMER);
7730             aliases.put("KHOJ", KHOJKI);
7731             aliases.put("KNDA", KANNADA);
7732             aliases.put("KTHI", KAITHI);
7733             aliases.put("LANA", TAI_THAM);
7734             aliases.put("LAOO", LAO);
7735             aliases.put("LATN", LATIN);
7736             aliases.put("LEPC", LEPCHA);
7737             aliases.put("LIMB", LIMBU);
7738             aliases.put("LINA", LINEAR_A);
7739             aliases.put("LINB", LINEAR_B);
7740             aliases.put("LISU", LISU);
7741             aliases.put("LYCI", LYCIAN);
7742             aliases.put("LYDI", LYDIAN);
7743             aliases.put("MAHJ", MAHAJANI);
7744             aliases.put("MARC", MARCHEN);
7745             aliases.put("MAND", MANDAIC);
7746             aliases.put("MANI", MANICHAEAN);
7747             aliases.put("MEND", MENDE_KIKAKUI);
7748             aliases.put("MERC", MEROITIC_CURSIVE);
7749             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7750             aliases.put("MLYM", MALAYALAM);
7751             aliases.put("MODI", MODI);
7752             aliases.put("MONG", MONGOLIAN);
7753             aliases.put("MROO", MRO);
7754             aliases.put("MTEI", MEETEI_MAYEK);
7755             aliases.put("MULT", MULTANI);
7756             aliases.put("MYMR", MYANMAR);
7757             aliases.put("NARB", OLD_NORTH_ARABIAN);
7758             aliases.put("NBAT", NABATAEAN);
7759             aliases.put("NEWA", NEWA);
7760             aliases.put("NKOO", NKO);
7761             aliases.put("NSHU", NUSHU);
7762             aliases.put("OGAM", OGHAM);
7763             aliases.put("OLCK", OL_CHIKI);
7764             aliases.put("ORKH", OLD_TURKIC);
7765             aliases.put("ORYA", ORIYA);
7766             aliases.put("OSGE", OSAGE);
7767             aliases.put("OSMA", OSMANYA);
7768             aliases.put("PALM", PALMYRENE);
7769             aliases.put("PAUC", PAU_CIN_HAU);
7770             aliases.put("PERM", OLD_PERMIC);
7771             aliases.put("PHAG", PHAGS_PA);
7772             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
7773             aliases.put("PHLP", PSALTER_PAHLAVI);
7774             aliases.put("PHNX", PHOENICIAN);
7775             aliases.put("PLRD", MIAO);
7776             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
7777             aliases.put("RJNG", REJANG);
7778             aliases.put("RUNR", RUNIC);
7779             aliases.put("SAMR", SAMARITAN);
7780             aliases.put("SARB", OLD_SOUTH_ARABIAN);
7781             aliases.put("SAUR", SAURASHTRA);
7782             aliases.put("SGNW", SIGNWRITING);
7783             aliases.put("SHAW", SHAVIAN);
7784             aliases.put("SHRD", SHARADA);
7785             aliases.put("SIDD", SIDDHAM);
7786             aliases.put("SIND", KHUDAWADI);
7787             aliases.put("SINH", SINHALA);
7788             aliases.put("SORA", SORA_SOMPENG);
7789             aliases.put("SOYO", SOYOMBO);
7790             aliases.put("SUND", SUNDANESE);
7791             aliases.put("SYLO", SYLOTI_NAGRI);
7792             aliases.put("SYRC", SYRIAC);
7793             aliases.put("TAGB", TAGBANWA);
7794             aliases.put("TAKR", TAKRI);
7795             aliases.put("TALE", TAI_LE);
7796             aliases.put("TALU", NEW_TAI_LUE);
7797             aliases.put("TAML", TAMIL);
7798             aliases.put("TANG", TANGUT);
7799             aliases.put("TAVT", TAI_VIET);
7800             aliases.put("TELU", TELUGU);
7801             aliases.put("TFNG", TIFINAGH);
7802             aliases.put("TGLG", TAGALOG);
7803             aliases.put("THAA", THAANA);
7804             aliases.put("THAI", THAI);
7805             aliases.put("TIBT", TIBETAN);
7806             aliases.put("TIRH", TIRHUTA);
7807             aliases.put("UGAR", UGARITIC);
7808             aliases.put("VAII", VAI);
7809             aliases.put("WARA", WARANG_CITI);
7810             aliases.put("XPEO", OLD_PERSIAN);
7811             aliases.put("XSUX", CUNEIFORM);
7812             aliases.put("YIII", YI);
7813             aliases.put("ZANB", ZANABAZAR_SQUARE);
7814             aliases.put("ZINH", INHERITED);
7815             aliases.put("ZYYY", COMMON);
7816             aliases.put("ZZZZ", UNKNOWN);
7817         }
7818 
7819         /**
7820          * Returns the enum constant representing the Unicode script of which
7821          * the given character (Unicode code point) is assigned to.
7822          *
7823          * @param   codePoint the character (Unicode code point) in question.
7824          * @return  The {@code UnicodeScript} constant representing the
7825          *          Unicode script of which this character is assigned to.
7826          *
7827          * @exception IllegalArgumentException if the specified
7828          * {@code codePoint} is an invalid Unicode code point.
7829          * @see Character#isValidCodePoint(int)
7830          *
7831          */
7832         public static UnicodeScript of(int codePoint) {
7833             if (!isValidCodePoint(codePoint))
7834                 throw new IllegalArgumentException();
7835             int type = getType(codePoint);
7836             // leave SURROGATE and PRIVATE_USE for table lookup
7837             if (type == UNASSIGNED)
7838                 return UNKNOWN;
7839             int index = Arrays.binarySearch(scriptStarts, codePoint);
7840             if (index < 0)
7841                 index = -index - 2;
7842             return scripts[index];
7843         }
7844 
7845         /**
7846          * Returns the UnicodeScript constant with the given Unicode script
7847          * name or the script name alias. Script names and their aliases are
7848          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
7849          * and {@code PropertyValueAliases<version>.txt} define script names
7850          * and the script name aliases for a particular version of the
7851          * standard. The {@link Character} class specifies the version of
7852          * the standard that it supports.
7853          * <p>
7854          * Character case is ignored for all of the valid script names.
7855          * The en_US locale's case mapping rules are used to provide
7856          * case-insensitive string comparisons for script name validation.
7857          *
7858          * @param scriptName A {@code UnicodeScript} name.
7859          * @return The {@code UnicodeScript} constant identified
7860          *         by {@code scriptName}
7861          * @throws IllegalArgumentException if {@code scriptName} is an
7862          *         invalid name
7863          * @throws NullPointerException if {@code scriptName} is null
7864          */
7865         public static final UnicodeScript forName(String scriptName) {
7866             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
7867                                  //.replace(' ', '_'));
7868             UnicodeScript sc = aliases.get(scriptName);
7869             if (sc != null)
7870                 return sc;
7871             return valueOf(scriptName);
7872         }
7873     }
7874 
7875     /**
7876      * The value of the {@code Character}.
7877      *
7878      * @serial
7879      */
7880     private final char value;
7881 
7882     /** use serialVersionUID from JDK 1.0.2 for interoperability */
7883     private static final long serialVersionUID = 3786198910865385080L;
7884 
7885     /**
7886      * Constructs a newly allocated {@code Character} object that
7887      * represents the specified {@code char} value.
7888      *
7889      * @param  value   the value to be represented by the
7890      *                  {@code Character} object.
7891      *
7892      * @deprecated
7893      * It is rarely appropriate to use this constructor. The static factory
7894      * {@link #valueOf(char)} is generally a better choice, as it is
7895      * likely to yield significantly better space and time performance.
7896      */
7897     @Deprecated(since="9")
7898     public Character(char value) {
7899         this.value = value;
7900     }
7901 
7902     private static class CharacterCache {
7903         private CharacterCache(){}
7904 
7905         static final Character cache[] = new Character[127 + 1];
7906 
7907         static {
7908             for (int i = 0; i < cache.length; i++)
7909                 cache[i] = new Character((char)i);
7910         }
7911     }
7912 
7913     /**
7914      * Returns a {@code Character} instance representing the specified
7915      * {@code char} value.
7916      * If a new {@code Character} instance is not required, this method
7917      * should generally be used in preference to the constructor
7918      * {@link #Character(char)}, as this method is likely to yield
7919      * significantly better space and time performance by caching
7920      * frequently requested values.
7921      *
7922      * This method will always cache values in the range {@code
7923      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
7924      * cache other values outside of this range.
7925      *
7926      * @param  c a char value.
7927      * @return a {@code Character} instance representing {@code c}.
7928      * @since  1.5
7929      */
7930     @HotSpotIntrinsicCandidate
7931     public static Character valueOf(char c) {
7932         if (c <= 127) { // must cache
7933             return CharacterCache.cache[(int)c];
7934         }
7935         return new Character(c);
7936     }
7937 
7938     /**
7939      * Returns the value of this {@code Character} object.
7940      * @return  the primitive {@code char} value represented by
7941      *          this object.
7942      */
7943     @HotSpotIntrinsicCandidate
7944     public char charValue() {
7945         return value;
7946     }
7947 
7948     /**
7949      * Returns a hash code for this {@code Character}; equal to the result
7950      * of invoking {@code charValue()}.
7951      *
7952      * @return a hash code value for this {@code Character}
7953      */
7954     @Override
7955     public int hashCode() {
7956         return Character.hashCode(value);
7957     }
7958 
7959     /**
7960      * Returns a hash code for a {@code char} value; compatible with
7961      * {@code Character.hashCode()}.
7962      *
7963      * @since 1.8
7964      *
7965      * @param value The {@code char} for which to return a hash code.
7966      * @return a hash code value for a {@code char} value.
7967      */
7968     public static int hashCode(char value) {
7969         return (int)value;
7970     }
7971 
7972     /**
7973      * Compares this object against the specified object.
7974      * The result is {@code true} if and only if the argument is not
7975      * {@code null} and is a {@code Character} object that
7976      * represents the same {@code char} value as this object.
7977      *
7978      * @param   obj   the object to compare with.
7979      * @return  {@code true} if the objects are the same;
7980      *          {@code false} otherwise.
7981      */
7982     public boolean equals(Object obj) {
7983         if (obj instanceof Character) {
7984             return value == ((Character)obj).charValue();
7985         }
7986         return false;
7987     }
7988 
7989     /**
7990      * Returns a {@code String} object representing this
7991      * {@code Character}'s value.  The result is a string of
7992      * length 1 whose sole component is the primitive
7993      * {@code char} value represented by this
7994      * {@code Character} object.
7995      *
7996      * @return  a string representation of this object.
7997      */
7998     public String toString() {
7999         char buf[] = {value};
8000         return String.valueOf(buf);
8001     }
8002 
8003     /**
8004      * Returns a {@code String} object representing the
8005      * specified {@code char}.  The result is a string of length
8006      * 1 consisting solely of the specified {@code char}.
8007      *
8008      * @apiNote This method cannot handle <a
8009      * href="#supplementary"> supplementary characters</a>. To support
8010      * all Unicode characters, including supplementary characters, use
8011      * the {@link #toString(int)} method.
8012      *
8013      * @param c the {@code char} to be converted
8014      * @return the string representation of the specified {@code char}
8015      * @since 1.4
8016      */
8017     public static String toString(char c) {
8018         return String.valueOf(c);
8019     }
8020 
8021     /**
8022      * Returns a {@code String} object representing the
8023      * specified character (Unicode code point).  The result is a string of
8024      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8025      *
8026      * @param codePoint the {@code codePoint} to be converted
8027      * @return the string representation of the specified {@code codePoint}
8028      * @exception IllegalArgumentException if the specified
8029      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8030      *      valid Unicode code point}.
8031      * @since 11
8032      */
8033     public static String toString(int codePoint) {
8034         return String.valueOfCodePoint(codePoint);
8035     }
8036 
8037     /**
8038      * Determines whether the specified code point is a valid
8039      * <a href="http://www.unicode.org/glossary/#code_point">
8040      * Unicode code point value</a>.
8041      *
8042      * @param  codePoint the Unicode code point to be tested
8043      * @return {@code true} if the specified code point value is between
8044      *         {@link #MIN_CODE_POINT} and
8045      *         {@link #MAX_CODE_POINT} inclusive;
8046      *         {@code false} otherwise.
8047      * @since  1.5
8048      */
8049     public static boolean isValidCodePoint(int codePoint) {
8050         // Optimized form of:
8051         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8052         int plane = codePoint >>> 16;
8053         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8054     }
8055 
8056     /**
8057      * Determines whether the specified character (Unicode code point)
8058      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8059      * Such code points can be represented using a single {@code char}.
8060      *
8061      * @param  codePoint the character (Unicode code point) to be tested
8062      * @return {@code true} if the specified code point is between
8063      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8064      *         {@code false} otherwise.
8065      * @since  1.7
8066      */
8067     public static boolean isBmpCodePoint(int codePoint) {
8068         return codePoint >>> 16 == 0;
8069         // Optimized form of:
8070         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8071         // We consistently use logical shift (>>>) to facilitate
8072         // additional runtime optimizations.
8073     }
8074 
8075     /**
8076      * Determines whether the specified character (Unicode code point)
8077      * is in the <a href="#supplementary">supplementary character</a> range.
8078      *
8079      * @param  codePoint the character (Unicode code point) to be tested
8080      * @return {@code true} if the specified code point is between
8081      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8082      *         {@link #MAX_CODE_POINT} inclusive;
8083      *         {@code false} otherwise.
8084      * @since  1.5
8085      */
8086     public static boolean isSupplementaryCodePoint(int codePoint) {
8087         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8088             && codePoint <  MAX_CODE_POINT + 1;
8089     }
8090 
8091     /**
8092      * Determines if the given {@code char} value is a
8093      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8094      * Unicode high-surrogate code unit</a>
8095      * (also known as <i>leading-surrogate code unit</i>).
8096      *
8097      * <p>Such values do not represent characters by themselves,
8098      * but are used in the representation of
8099      * <a href="#supplementary">supplementary characters</a>
8100      * in the UTF-16 encoding.
8101      *
8102      * @param  ch the {@code char} value to be tested.
8103      * @return {@code true} if the {@code char} value is between
8104      *         {@link #MIN_HIGH_SURROGATE} and
8105      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8106      *         {@code false} otherwise.
8107      * @see    Character#isLowSurrogate(char)
8108      * @see    Character.UnicodeBlock#of(int)
8109      * @since  1.5
8110      */
8111     public static boolean isHighSurrogate(char ch) {
8112         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8113         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8114     }
8115 
8116     /**
8117      * Determines if the given {@code char} value is a
8118      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8119      * Unicode low-surrogate code unit</a>
8120      * (also known as <i>trailing-surrogate code unit</i>).
8121      *
8122      * <p>Such values do not represent characters by themselves,
8123      * but are used in the representation of
8124      * <a href="#supplementary">supplementary characters</a>
8125      * in the UTF-16 encoding.
8126      *
8127      * @param  ch the {@code char} value to be tested.
8128      * @return {@code true} if the {@code char} value is between
8129      *         {@link #MIN_LOW_SURROGATE} and
8130      *         {@link #MAX_LOW_SURROGATE} inclusive;
8131      *         {@code false} otherwise.
8132      * @see    Character#isHighSurrogate(char)
8133      * @since  1.5
8134      */
8135     public static boolean isLowSurrogate(char ch) {
8136         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8137     }
8138 
8139     /**
8140      * Determines if the given {@code char} value is a Unicode
8141      * <i>surrogate code unit</i>.
8142      *
8143      * <p>Such values do not represent characters by themselves,
8144      * but are used in the representation of
8145      * <a href="#supplementary">supplementary characters</a>
8146      * in the UTF-16 encoding.
8147      *
8148      * <p>A char value is a surrogate code unit if and only if it is either
8149      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8150      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8151      *
8152      * @param  ch the {@code char} value to be tested.
8153      * @return {@code true} if the {@code char} value is between
8154      *         {@link #MIN_SURROGATE} and
8155      *         {@link #MAX_SURROGATE} inclusive;
8156      *         {@code false} otherwise.
8157      * @since  1.7
8158      */
8159     public static boolean isSurrogate(char ch) {
8160         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8161     }
8162 
8163     /**
8164      * Determines whether the specified pair of {@code char}
8165      * values is a valid
8166      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8167      * Unicode surrogate pair</a>.
8168 
8169      * <p>This method is equivalent to the expression:
8170      * <blockquote><pre>{@code
8171      * isHighSurrogate(high) && isLowSurrogate(low)
8172      * }</pre></blockquote>
8173      *
8174      * @param  high the high-surrogate code value to be tested
8175      * @param  low the low-surrogate code value to be tested
8176      * @return {@code true} if the specified high and
8177      * low-surrogate code values represent a valid surrogate pair;
8178      * {@code false} otherwise.
8179      * @since  1.5
8180      */
8181     public static boolean isSurrogatePair(char high, char low) {
8182         return isHighSurrogate(high) && isLowSurrogate(low);
8183     }
8184 
8185     /**
8186      * Determines the number of {@code char} values needed to
8187      * represent the specified character (Unicode code point). If the
8188      * specified character is equal to or greater than 0x10000, then
8189      * the method returns 2. Otherwise, the method returns 1.
8190      *
8191      * <p>This method doesn't validate the specified character to be a
8192      * valid Unicode code point. The caller must validate the
8193      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8194      * if necessary.
8195      *
8196      * @param   codePoint the character (Unicode code point) to be tested.
8197      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8198      * @see     Character#isSupplementaryCodePoint(int)
8199      * @since   1.5
8200      */
8201     public static int charCount(int codePoint) {
8202         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8203     }
8204 
8205     /**
8206      * Converts the specified surrogate pair to its supplementary code
8207      * point value. This method does not validate the specified
8208      * surrogate pair. The caller must validate it using {@link
8209      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8210      *
8211      * @param  high the high-surrogate code unit
8212      * @param  low the low-surrogate code unit
8213      * @return the supplementary code point composed from the
8214      *         specified surrogate pair.
8215      * @since  1.5
8216      */
8217     public static int toCodePoint(char high, char low) {
8218         // Optimized form of:
8219         // return ((high - MIN_HIGH_SURROGATE) << 10)
8220         //         + (low - MIN_LOW_SURROGATE)
8221         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8222         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8223                                        - (MIN_HIGH_SURROGATE << 10)
8224                                        - MIN_LOW_SURROGATE);
8225     }
8226 
8227     /**
8228      * Returns the code point at the given index of the
8229      * {@code CharSequence}. If the {@code char} value at
8230      * the given index in the {@code CharSequence} is in the
8231      * high-surrogate range, the following index is less than the
8232      * length of the {@code CharSequence}, and the
8233      * {@code char} value at the following index is in the
8234      * low-surrogate range, then the supplementary code point
8235      * corresponding to this surrogate pair is returned. Otherwise,
8236      * the {@code char} value at the given index is returned.
8237      *
8238      * @param seq a sequence of {@code char} values (Unicode code
8239      * units)
8240      * @param index the index to the {@code char} values (Unicode
8241      * code units) in {@code seq} to be converted
8242      * @return the Unicode code point at the given index
8243      * @exception NullPointerException if {@code seq} is null.
8244      * @exception IndexOutOfBoundsException if the value
8245      * {@code index} is negative or not less than
8246      * {@link CharSequence#length() seq.length()}.
8247      * @since  1.5
8248      */
8249     public static int codePointAt(CharSequence seq, int index) {
8250         char c1 = seq.charAt(index);
8251         if (isHighSurrogate(c1) && ++index < seq.length()) {
8252             char c2 = seq.charAt(index);
8253             if (isLowSurrogate(c2)) {
8254                 return toCodePoint(c1, c2);
8255             }
8256         }
8257         return c1;
8258     }
8259 
8260     /**
8261      * Returns the code point at the given index of the
8262      * {@code char} array. If the {@code char} value at
8263      * the given index in the {@code char} array is in the
8264      * high-surrogate range, the following index is less than the
8265      * length of the {@code char} array, and the
8266      * {@code char} value at the following index is in the
8267      * low-surrogate range, then the supplementary code point
8268      * corresponding to this surrogate pair is returned. Otherwise,
8269      * the {@code char} value at the given index is returned.
8270      *
8271      * @param a the {@code char} array
8272      * @param index the index to the {@code char} values (Unicode
8273      * code units) in the {@code char} array to be converted
8274      * @return the Unicode code point at the given index
8275      * @exception NullPointerException if {@code a} is null.
8276      * @exception IndexOutOfBoundsException if the value
8277      * {@code index} is negative or not less than
8278      * the length of the {@code char} array.
8279      * @since  1.5
8280      */
8281     public static int codePointAt(char[] a, int index) {
8282         return codePointAtImpl(a, index, a.length);
8283     }
8284 
8285     /**
8286      * Returns the code point at the given index of the
8287      * {@code char} array, where only array elements with
8288      * {@code index} less than {@code limit} can be used. If
8289      * the {@code char} value at the given index in the
8290      * {@code char} array is in the high-surrogate range, the
8291      * following index is less than the {@code limit}, and the
8292      * {@code char} value at the following index is in the
8293      * low-surrogate range, then the supplementary code point
8294      * corresponding to this surrogate pair is returned. Otherwise,
8295      * the {@code char} value at the given index is returned.
8296      *
8297      * @param a the {@code char} array
8298      * @param index the index to the {@code char} values (Unicode
8299      * code units) in the {@code char} array to be converted
8300      * @param limit the index after the last array element that
8301      * can be used in the {@code char} array
8302      * @return the Unicode code point at the given index
8303      * @exception NullPointerException if {@code a} is null.
8304      * @exception IndexOutOfBoundsException if the {@code index}
8305      * argument is negative or not less than the {@code limit}
8306      * argument, or if the {@code limit} argument is negative or
8307      * greater than the length of the {@code char} array.
8308      * @since  1.5
8309      */
8310     public static int codePointAt(char[] a, int index, int limit) {
8311         if (index >= limit || limit < 0 || limit > a.length) {
8312             throw new IndexOutOfBoundsException();
8313         }
8314         return codePointAtImpl(a, index, limit);
8315     }
8316 
8317     // throws ArrayIndexOutOfBoundsException if index out of bounds
8318     static int codePointAtImpl(char[] a, int index, int limit) {
8319         char c1 = a[index];
8320         if (isHighSurrogate(c1) && ++index < limit) {
8321             char c2 = a[index];
8322             if (isLowSurrogate(c2)) {
8323                 return toCodePoint(c1, c2);
8324             }
8325         }
8326         return c1;
8327     }
8328 
8329     /**
8330      * Returns the code point preceding the given index of the
8331      * {@code CharSequence}. If the {@code char} value at
8332      * {@code (index - 1)} in the {@code CharSequence} is in
8333      * the low-surrogate range, {@code (index - 2)} is not
8334      * negative, and the {@code char} value at {@code (index - 2)}
8335      * in the {@code CharSequence} is in the
8336      * high-surrogate range, then the supplementary code point
8337      * corresponding to this surrogate pair is returned. Otherwise,
8338      * the {@code char} value at {@code (index - 1)} is
8339      * returned.
8340      *
8341      * @param seq the {@code CharSequence} instance
8342      * @param index the index following the code point that should be returned
8343      * @return the Unicode code point value before the given index.
8344      * @exception NullPointerException if {@code seq} is null.
8345      * @exception IndexOutOfBoundsException if the {@code index}
8346      * argument is less than 1 or greater than {@link
8347      * CharSequence#length() seq.length()}.
8348      * @since  1.5
8349      */
8350     public static int codePointBefore(CharSequence seq, int index) {
8351         char c2 = seq.charAt(--index);
8352         if (isLowSurrogate(c2) && index > 0) {
8353             char c1 = seq.charAt(--index);
8354             if (isHighSurrogate(c1)) {
8355                 return toCodePoint(c1, c2);
8356             }
8357         }
8358         return c2;
8359     }
8360 
8361     /**
8362      * Returns the code point preceding the given index of the
8363      * {@code char} array. If the {@code char} value at
8364      * {@code (index - 1)} in the {@code char} array is in
8365      * the low-surrogate range, {@code (index - 2)} is not
8366      * negative, and the {@code char} value at {@code (index - 2)}
8367      * in the {@code char} array is in the
8368      * high-surrogate range, then the supplementary code point
8369      * corresponding to this surrogate pair is returned. Otherwise,
8370      * the {@code char} value at {@code (index - 1)} is
8371      * returned.
8372      *
8373      * @param a the {@code char} array
8374      * @param index the index following the code point that should be returned
8375      * @return the Unicode code point value before the given index.
8376      * @exception NullPointerException if {@code a} is null.
8377      * @exception IndexOutOfBoundsException if the {@code index}
8378      * argument is less than 1 or greater than the length of the
8379      * {@code char} array
8380      * @since  1.5
8381      */
8382     public static int codePointBefore(char[] a, int index) {
8383         return codePointBeforeImpl(a, index, 0);
8384     }
8385 
8386     /**
8387      * Returns the code point preceding the given index of the
8388      * {@code char} array, where only array elements with
8389      * {@code index} greater than or equal to {@code start}
8390      * can be used. If the {@code char} value at {@code (index - 1)}
8391      * in the {@code char} array is in the
8392      * low-surrogate range, {@code (index - 2)} is not less than
8393      * {@code start}, and the {@code char} value at
8394      * {@code (index - 2)} in the {@code char} array is in
8395      * the high-surrogate range, then the supplementary code point
8396      * corresponding to this surrogate pair is returned. Otherwise,
8397      * the {@code char} value at {@code (index - 1)} is
8398      * returned.
8399      *
8400      * @param a the {@code char} array
8401      * @param index the index following the code point that should be returned
8402      * @param start the index of the first array element in the
8403      * {@code char} array
8404      * @return the Unicode code point value before the given index.
8405      * @exception NullPointerException if {@code a} is null.
8406      * @exception IndexOutOfBoundsException if the {@code index}
8407      * argument is not greater than the {@code start} argument or
8408      * is greater than the length of the {@code char} array, or
8409      * if the {@code start} argument is negative or not less than
8410      * the length of the {@code char} array.
8411      * @since  1.5
8412      */
8413     public static int codePointBefore(char[] a, int index, int start) {
8414         if (index <= start || start < 0 || start >= a.length) {
8415             throw new IndexOutOfBoundsException();
8416         }
8417         return codePointBeforeImpl(a, index, start);
8418     }
8419 
8420     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8421     static int codePointBeforeImpl(char[] a, int index, int start) {
8422         char c2 = a[--index];
8423         if (isLowSurrogate(c2) && index > start) {
8424             char c1 = a[--index];
8425             if (isHighSurrogate(c1)) {
8426                 return toCodePoint(c1, c2);
8427             }
8428         }
8429         return c2;
8430     }
8431 
8432     /**
8433      * Returns the leading surrogate (a
8434      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8435      * high surrogate code unit</a>) of the
8436      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8437      * surrogate pair</a>
8438      * representing the specified supplementary character (Unicode
8439      * code point) in the UTF-16 encoding.  If the specified character
8440      * is not a
8441      * <a href="Character.html#supplementary">supplementary character</a>,
8442      * an unspecified {@code char} is returned.
8443      *
8444      * <p>If
8445      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8446      * is {@code true}, then
8447      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8448      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8449      * are also always {@code true}.
8450      *
8451      * @param   codePoint a supplementary character (Unicode code point)
8452      * @return  the leading surrogate code unit used to represent the
8453      *          character in the UTF-16 encoding
8454      * @since   1.7
8455      */
8456     public static char highSurrogate(int codePoint) {
8457         return (char) ((codePoint >>> 10)
8458             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8459     }
8460 
8461     /**
8462      * Returns the trailing surrogate (a
8463      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8464      * low surrogate code unit</a>) of the
8465      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8466      * surrogate pair</a>
8467      * representing the specified supplementary character (Unicode
8468      * code point) in the UTF-16 encoding.  If the specified character
8469      * is not a
8470      * <a href="Character.html#supplementary">supplementary character</a>,
8471      * an unspecified {@code char} is returned.
8472      *
8473      * <p>If
8474      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8475      * is {@code true}, then
8476      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8477      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8478      * are also always {@code true}.
8479      *
8480      * @param   codePoint a supplementary character (Unicode code point)
8481      * @return  the trailing surrogate code unit used to represent the
8482      *          character in the UTF-16 encoding
8483      * @since   1.7
8484      */
8485     public static char lowSurrogate(int codePoint) {
8486         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8487     }
8488 
8489     /**
8490      * Converts the specified character (Unicode code point) to its
8491      * UTF-16 representation. If the specified code point is a BMP
8492      * (Basic Multilingual Plane or Plane 0) value, the same value is
8493      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8494      * specified code point is a supplementary character, its
8495      * surrogate values are stored in {@code dst[dstIndex]}
8496      * (high-surrogate) and {@code dst[dstIndex+1]}
8497      * (low-surrogate), and 2 is returned.
8498      *
8499      * @param  codePoint the character (Unicode code point) to be converted.
8500      * @param  dst an array of {@code char} in which the
8501      * {@code codePoint}'s UTF-16 value is stored.
8502      * @param dstIndex the start index into the {@code dst}
8503      * array where the converted value is stored.
8504      * @return 1 if the code point is a BMP code point, 2 if the
8505      * code point is a supplementary code point.
8506      * @exception IllegalArgumentException if the specified
8507      * {@code codePoint} is not a valid Unicode code point.
8508      * @exception NullPointerException if the specified {@code dst} is null.
8509      * @exception IndexOutOfBoundsException if {@code dstIndex}
8510      * is negative or not less than {@code dst.length}, or if
8511      * {@code dst} at {@code dstIndex} doesn't have enough
8512      * array element(s) to store the resulting {@code char}
8513      * value(s). (If {@code dstIndex} is equal to
8514      * {@code dst.length-1} and the specified
8515      * {@code codePoint} is a supplementary character, the
8516      * high-surrogate value is not stored in
8517      * {@code dst[dstIndex]}.)
8518      * @since  1.5
8519      */
8520     public static int toChars(int codePoint, char[] dst, int dstIndex) {
8521         if (isBmpCodePoint(codePoint)) {
8522             dst[dstIndex] = (char) codePoint;
8523             return 1;
8524         } else if (isValidCodePoint(codePoint)) {
8525             toSurrogates(codePoint, dst, dstIndex);
8526             return 2;
8527         } else {
8528             throw new IllegalArgumentException();
8529         }
8530     }
8531 
8532     /**
8533      * Converts the specified character (Unicode code point) to its
8534      * UTF-16 representation stored in a {@code char} array. If
8535      * the specified code point is a BMP (Basic Multilingual Plane or
8536      * Plane 0) value, the resulting {@code char} array has
8537      * the same value as {@code codePoint}. If the specified code
8538      * point is a supplementary code point, the resulting
8539      * {@code char} array has the corresponding surrogate pair.
8540      *
8541      * @param  codePoint a Unicode code point
8542      * @return a {@code char} array having
8543      *         {@code codePoint}'s UTF-16 representation.
8544      * @exception IllegalArgumentException if the specified
8545      * {@code codePoint} is not a valid Unicode code point.
8546      * @since  1.5
8547      */
8548     public static char[] toChars(int codePoint) {
8549         if (isBmpCodePoint(codePoint)) {
8550             return new char[] { (char) codePoint };
8551         } else if (isValidCodePoint(codePoint)) {
8552             char[] result = new char[2];
8553             toSurrogates(codePoint, result, 0);
8554             return result;
8555         } else {
8556             throw new IllegalArgumentException();
8557         }
8558     }
8559 
8560     static void toSurrogates(int codePoint, char[] dst, int index) {
8561         // We write elements "backwards" to guarantee all-or-nothing
8562         dst[index+1] = lowSurrogate(codePoint);
8563         dst[index] = highSurrogate(codePoint);
8564     }
8565 
8566     /**
8567      * Returns the number of Unicode code points in the text range of
8568      * the specified char sequence. The text range begins at the
8569      * specified {@code beginIndex} and extends to the
8570      * {@code char} at index {@code endIndex - 1}. Thus the
8571      * length (in {@code char}s) of the text range is
8572      * {@code endIndex-beginIndex}. Unpaired surrogates within
8573      * the text range count as one code point each.
8574      *
8575      * @param seq the char sequence
8576      * @param beginIndex the index to the first {@code char} of
8577      * the text range.
8578      * @param endIndex the index after the last {@code char} of
8579      * the text range.
8580      * @return the number of Unicode code points in the specified text
8581      * range
8582      * @exception NullPointerException if {@code seq} is null.
8583      * @exception IndexOutOfBoundsException if the
8584      * {@code beginIndex} is negative, or {@code endIndex}
8585      * is larger than the length of the given sequence, or
8586      * {@code beginIndex} is larger than {@code endIndex}.
8587      * @since  1.5
8588      */
8589     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
8590         int length = seq.length();
8591         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
8592             throw new IndexOutOfBoundsException();
8593         }
8594         int n = endIndex - beginIndex;
8595         for (int i = beginIndex; i < endIndex; ) {
8596             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
8597                 isLowSurrogate(seq.charAt(i))) {
8598                 n--;
8599                 i++;
8600             }
8601         }
8602         return n;
8603     }
8604 
8605     /**
8606      * Returns the number of Unicode code points in a subarray of the
8607      * {@code char} array argument. The {@code offset}
8608      * argument is the index of the first {@code char} of the
8609      * subarray and the {@code count} argument specifies the
8610      * length of the subarray in {@code char}s. Unpaired
8611      * surrogates within the subarray count as one code point each.
8612      *
8613      * @param a the {@code char} array
8614      * @param offset the index of the first {@code char} in the
8615      * given {@code char} array
8616      * @param count the length of the subarray in {@code char}s
8617      * @return the number of Unicode code points in the specified subarray
8618      * @exception NullPointerException if {@code a} is null.
8619      * @exception IndexOutOfBoundsException if {@code offset} or
8620      * {@code count} is negative, or if {@code offset +
8621      * count} is larger than the length of the given array.
8622      * @since  1.5
8623      */
8624     public static int codePointCount(char[] a, int offset, int count) {
8625         if (count > a.length - offset || offset < 0 || count < 0) {
8626             throw new IndexOutOfBoundsException();
8627         }
8628         return codePointCountImpl(a, offset, count);
8629     }
8630 
8631     static int codePointCountImpl(char[] a, int offset, int count) {
8632         int endIndex = offset + count;
8633         int n = count;
8634         for (int i = offset; i < endIndex; ) {
8635             if (isHighSurrogate(a[i++]) && i < endIndex &&
8636                 isLowSurrogate(a[i])) {
8637                 n--;
8638                 i++;
8639             }
8640         }
8641         return n;
8642     }
8643 
8644     /**
8645      * Returns the index within the given char sequence that is offset
8646      * from the given {@code index} by {@code codePointOffset}
8647      * code points. Unpaired surrogates within the text range given by
8648      * {@code index} and {@code codePointOffset} count as
8649      * one code point each.
8650      *
8651      * @param seq the char sequence
8652      * @param index the index to be offset
8653      * @param codePointOffset the offset in code points
8654      * @return the index within the char sequence
8655      * @exception NullPointerException if {@code seq} is null.
8656      * @exception IndexOutOfBoundsException if {@code index}
8657      *   is negative or larger then the length of the char sequence,
8658      *   or if {@code codePointOffset} is positive and the
8659      *   subsequence starting with {@code index} has fewer than
8660      *   {@code codePointOffset} code points, or if
8661      *   {@code codePointOffset} is negative and the subsequence
8662      *   before {@code index} has fewer than the absolute value
8663      *   of {@code codePointOffset} code points.
8664      * @since 1.5
8665      */
8666     public static int offsetByCodePoints(CharSequence seq, int index,
8667                                          int codePointOffset) {
8668         int length = seq.length();
8669         if (index < 0 || index > length) {
8670             throw new IndexOutOfBoundsException();
8671         }
8672 
8673         int x = index;
8674         if (codePointOffset >= 0) {
8675             int i;
8676             for (i = 0; x < length && i < codePointOffset; i++) {
8677                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8678                     isLowSurrogate(seq.charAt(x))) {
8679                     x++;
8680                 }
8681             }
8682             if (i < codePointOffset) {
8683                 throw new IndexOutOfBoundsException();
8684             }
8685         } else {
8686             int i;
8687             for (i = codePointOffset; x > 0 && i < 0; i++) {
8688                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8689                     isHighSurrogate(seq.charAt(x-1))) {
8690                     x--;
8691                 }
8692             }
8693             if (i < 0) {
8694                 throw new IndexOutOfBoundsException();
8695             }
8696         }
8697         return x;
8698     }
8699 
8700     /**
8701      * Returns the index within the given {@code char} subarray
8702      * that is offset from the given {@code index} by
8703      * {@code codePointOffset} code points. The
8704      * {@code start} and {@code count} arguments specify a
8705      * subarray of the {@code char} array. Unpaired surrogates
8706      * within the text range given by {@code index} and
8707      * {@code codePointOffset} count as one code point each.
8708      *
8709      * @param a the {@code char} array
8710      * @param start the index of the first {@code char} of the
8711      * subarray
8712      * @param count the length of the subarray in {@code char}s
8713      * @param index the index to be offset
8714      * @param codePointOffset the offset in code points
8715      * @return the index within the subarray
8716      * @exception NullPointerException if {@code a} is null.
8717      * @exception IndexOutOfBoundsException
8718      *   if {@code start} or {@code count} is negative,
8719      *   or if {@code start + count} is larger than the length of
8720      *   the given array,
8721      *   or if {@code index} is less than {@code start} or
8722      *   larger then {@code start + count},
8723      *   or if {@code codePointOffset} is positive and the text range
8724      *   starting with {@code index} and ending with {@code start + count - 1}
8725      *   has fewer than {@code codePointOffset} code
8726      *   points,
8727      *   or if {@code codePointOffset} is negative and the text range
8728      *   starting with {@code start} and ending with {@code index - 1}
8729      *   has fewer than the absolute value of
8730      *   {@code codePointOffset} code points.
8731      * @since 1.5
8732      */
8733     public static int offsetByCodePoints(char[] a, int start, int count,
8734                                          int index, int codePointOffset) {
8735         if (count > a.length-start || start < 0 || count < 0
8736             || index < start || index > start+count) {
8737             throw new IndexOutOfBoundsException();
8738         }
8739         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8740     }
8741 
8742     static int offsetByCodePointsImpl(char[]a, int start, int count,
8743                                       int index, int codePointOffset) {
8744         int x = index;
8745         if (codePointOffset >= 0) {
8746             int limit = start + count;
8747             int i;
8748             for (i = 0; x < limit && i < codePointOffset; i++) {
8749                 if (isHighSurrogate(a[x++]) && x < limit &&
8750                     isLowSurrogate(a[x])) {
8751                     x++;
8752                 }
8753             }
8754             if (i < codePointOffset) {
8755                 throw new IndexOutOfBoundsException();
8756             }
8757         } else {
8758             int i;
8759             for (i = codePointOffset; x > start && i < 0; i++) {
8760                 if (isLowSurrogate(a[--x]) && x > start &&
8761                     isHighSurrogate(a[x-1])) {
8762                     x--;
8763                 }
8764             }
8765             if (i < 0) {
8766                 throw new IndexOutOfBoundsException();
8767             }
8768         }
8769         return x;
8770     }
8771 
8772     /**
8773      * Determines if the specified character is a lowercase character.
8774      * <p>
8775      * A character is lowercase if its general category type, provided
8776      * by {@code Character.getType(ch)}, is
8777      * {@code LOWERCASE_LETTER}, or it has contributory property
8778      * Other_Lowercase as defined by the Unicode Standard.
8779      * <p>
8780      * The following are examples of lowercase characters:
8781      * <blockquote><pre>
8782      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8783      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8784      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8785      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8786      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8787      * </pre></blockquote>
8788      * <p> Many other Unicode characters are lowercase too.
8789      *
8790      * <p><b>Note:</b> This method cannot handle <a
8791      * href="#supplementary"> supplementary characters</a>. To support
8792      * all Unicode characters, including supplementary characters, use
8793      * the {@link #isLowerCase(int)} method.
8794      *
8795      * @param   ch   the character to be tested.
8796      * @return  {@code true} if the character is lowercase;
8797      *          {@code false} otherwise.
8798      * @see     Character#isLowerCase(char)
8799      * @see     Character#isTitleCase(char)
8800      * @see     Character#toLowerCase(char)
8801      * @see     Character#getType(char)
8802      */
8803     public static boolean isLowerCase(char ch) {
8804         return isLowerCase((int)ch);
8805     }
8806 
8807     /**
8808      * Determines if the specified character (Unicode code point) is a
8809      * lowercase character.
8810      * <p>
8811      * A character is lowercase if its general category type, provided
8812      * by {@link Character#getType getType(codePoint)}, is
8813      * {@code LOWERCASE_LETTER}, or it has contributory property
8814      * Other_Lowercase as defined by the Unicode Standard.
8815      * <p>
8816      * The following are examples of lowercase characters:
8817      * <blockquote><pre>
8818      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8819      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8820      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8821      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8822      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8823      * </pre></blockquote>
8824      * <p> Many other Unicode characters are lowercase too.
8825      *
8826      * @param   codePoint the character (Unicode code point) to be tested.
8827      * @return  {@code true} if the character is lowercase;
8828      *          {@code false} otherwise.
8829      * @see     Character#isLowerCase(int)
8830      * @see     Character#isTitleCase(int)
8831      * @see     Character#toLowerCase(int)
8832      * @see     Character#getType(int)
8833      * @since   1.5
8834      */
8835     public static boolean isLowerCase(int codePoint) {
8836         return getType(codePoint) == Character.LOWERCASE_LETTER ||
8837                CharacterData.of(codePoint).isOtherLowercase(codePoint);
8838     }
8839 
8840     /**
8841      * Determines if the specified character is an uppercase character.
8842      * <p>
8843      * A character is uppercase if its general category type, provided by
8844      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
8845      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8846      * <p>
8847      * The following are examples of uppercase characters:
8848      * <blockquote><pre>
8849      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8850      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8851      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8852      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8853      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8854      * </pre></blockquote>
8855      * <p> Many other Unicode characters are uppercase too.
8856      *
8857      * <p><b>Note:</b> This method cannot handle <a
8858      * href="#supplementary"> supplementary characters</a>. To support
8859      * all Unicode characters, including supplementary characters, use
8860      * the {@link #isUpperCase(int)} method.
8861      *
8862      * @param   ch   the character to be tested.
8863      * @return  {@code true} if the character is uppercase;
8864      *          {@code false} otherwise.
8865      * @see     Character#isLowerCase(char)
8866      * @see     Character#isTitleCase(char)
8867      * @see     Character#toUpperCase(char)
8868      * @see     Character#getType(char)
8869      * @since   1.0
8870      */
8871     public static boolean isUpperCase(char ch) {
8872         return isUpperCase((int)ch);
8873     }
8874 
8875     /**
8876      * Determines if the specified character (Unicode code point) is an uppercase character.
8877      * <p>
8878      * A character is uppercase if its general category type, provided by
8879      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
8880      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8881      * <p>
8882      * The following are examples of uppercase characters:
8883      * <blockquote><pre>
8884      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8885      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8886      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8887      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8888      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8889      * </pre></blockquote>
8890      * <p> Many other Unicode characters are uppercase too.
8891      *
8892      * @param   codePoint the character (Unicode code point) to be tested.
8893      * @return  {@code true} if the character is uppercase;
8894      *          {@code false} otherwise.
8895      * @see     Character#isLowerCase(int)
8896      * @see     Character#isTitleCase(int)
8897      * @see     Character#toUpperCase(int)
8898      * @see     Character#getType(int)
8899      * @since   1.5
8900      */
8901     public static boolean isUpperCase(int codePoint) {
8902         return getType(codePoint) == Character.UPPERCASE_LETTER ||
8903                CharacterData.of(codePoint).isOtherUppercase(codePoint);
8904     }
8905 
8906     /**
8907      * Determines if the specified character is a titlecase character.
8908      * <p>
8909      * A character is a titlecase character if its general
8910      * category type, provided by {@code Character.getType(ch)},
8911      * is {@code TITLECASE_LETTER}.
8912      * <p>
8913      * Some characters look like pairs of Latin letters. For example, there
8914      * is an uppercase letter that looks like "LJ" and has a corresponding
8915      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8916      * is the appropriate form to use when rendering a word in lowercase
8917      * with initial capitals, as for a book title.
8918      * <p>
8919      * These are some of the Unicode characters for which this method returns
8920      * {@code true}:
8921      * <ul>
8922      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8923      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8924      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8925      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8926      * </ul>
8927      * <p> Many other Unicode characters are titlecase too.
8928      *
8929      * <p><b>Note:</b> This method cannot handle <a
8930      * href="#supplementary"> supplementary characters</a>. To support
8931      * all Unicode characters, including supplementary characters, use
8932      * the {@link #isTitleCase(int)} method.
8933      *
8934      * @param   ch   the character to be tested.
8935      * @return  {@code true} if the character is titlecase;
8936      *          {@code false} otherwise.
8937      * @see     Character#isLowerCase(char)
8938      * @see     Character#isUpperCase(char)
8939      * @see     Character#toTitleCase(char)
8940      * @see     Character#getType(char)
8941      * @since   1.0.2
8942      */
8943     public static boolean isTitleCase(char ch) {
8944         return isTitleCase((int)ch);
8945     }
8946 
8947     /**
8948      * Determines if the specified character (Unicode code point) is a titlecase character.
8949      * <p>
8950      * A character is a titlecase character if its general
8951      * category type, provided by {@link Character#getType(int) getType(codePoint)},
8952      * is {@code TITLECASE_LETTER}.
8953      * <p>
8954      * Some characters look like pairs of Latin letters. For example, there
8955      * is an uppercase letter that looks like "LJ" and has a corresponding
8956      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8957      * is the appropriate form to use when rendering a word in lowercase
8958      * with initial capitals, as for a book title.
8959      * <p>
8960      * These are some of the Unicode characters for which this method returns
8961      * {@code true}:
8962      * <ul>
8963      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8964      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8965      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8966      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8967      * </ul>
8968      * <p> Many other Unicode characters are titlecase too.
8969      *
8970      * @param   codePoint the character (Unicode code point) to be tested.
8971      * @return  {@code true} if the character is titlecase;
8972      *          {@code false} otherwise.
8973      * @see     Character#isLowerCase(int)
8974      * @see     Character#isUpperCase(int)
8975      * @see     Character#toTitleCase(int)
8976      * @see     Character#getType(int)
8977      * @since   1.5
8978      */
8979     public static boolean isTitleCase(int codePoint) {
8980         return getType(codePoint) == Character.TITLECASE_LETTER;
8981     }
8982 
8983     /**
8984      * Determines if the specified character is a digit.
8985      * <p>
8986      * A character is a digit if its general category type, provided
8987      * by {@code Character.getType(ch)}, is
8988      * {@code DECIMAL_DIGIT_NUMBER}.
8989      * <p>
8990      * Some Unicode character ranges that contain digits:
8991      * <ul>
8992      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
8993      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
8994      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
8995      *     Arabic-Indic digits
8996      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
8997      *     Extended Arabic-Indic digits
8998      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
8999      *     Devanagari digits
9000      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9001      *     Fullwidth digits
9002      * </ul>
9003      *
9004      * Many other character ranges contain digits as well.
9005      *
9006      * <p><b>Note:</b> This method cannot handle <a
9007      * href="#supplementary"> supplementary characters</a>. To support
9008      * all Unicode characters, including supplementary characters, use
9009      * the {@link #isDigit(int)} method.
9010      *
9011      * @param   ch   the character to be tested.
9012      * @return  {@code true} if the character is a digit;
9013      *          {@code false} otherwise.
9014      * @see     Character#digit(char, int)
9015      * @see     Character#forDigit(int, int)
9016      * @see     Character#getType(char)
9017      */
9018     public static boolean isDigit(char ch) {
9019         return isDigit((int)ch);
9020     }
9021 
9022     /**
9023      * Determines if the specified character (Unicode code point) is a digit.
9024      * <p>
9025      * A character is a digit if its general category type, provided
9026      * by {@link Character#getType(int) getType(codePoint)}, is
9027      * {@code DECIMAL_DIGIT_NUMBER}.
9028      * <p>
9029      * Some Unicode character ranges that contain digits:
9030      * <ul>
9031      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9032      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9033      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9034      *     Arabic-Indic digits
9035      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9036      *     Extended Arabic-Indic digits
9037      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9038      *     Devanagari digits
9039      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9040      *     Fullwidth digits
9041      * </ul>
9042      *
9043      * Many other character ranges contain digits as well.
9044      *
9045      * @param   codePoint the character (Unicode code point) to be tested.
9046      * @return  {@code true} if the character is a digit;
9047      *          {@code false} otherwise.
9048      * @see     Character#forDigit(int, int)
9049      * @see     Character#getType(int)
9050      * @since   1.5
9051      */
9052     public static boolean isDigit(int codePoint) {
9053         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
9054     }
9055 
9056     /**
9057      * Determines if a character is defined in Unicode.
9058      * <p>
9059      * A character is defined if at least one of the following is true:
9060      * <ul>
9061      * <li>It has an entry in the UnicodeData file.
9062      * <li>It has a value in a range defined by the UnicodeData file.
9063      * </ul>
9064      *
9065      * <p><b>Note:</b> This method cannot handle <a
9066      * href="#supplementary"> supplementary characters</a>. To support
9067      * all Unicode characters, including supplementary characters, use
9068      * the {@link #isDefined(int)} method.
9069      *
9070      * @param   ch   the character to be tested
9071      * @return  {@code true} if the character has a defined meaning
9072      *          in Unicode; {@code false} otherwise.
9073      * @see     Character#isDigit(char)
9074      * @see     Character#isLetter(char)
9075      * @see     Character#isLetterOrDigit(char)
9076      * @see     Character#isLowerCase(char)
9077      * @see     Character#isTitleCase(char)
9078      * @see     Character#isUpperCase(char)
9079      * @since   1.0.2
9080      */
9081     public static boolean isDefined(char ch) {
9082         return isDefined((int)ch);
9083     }
9084 
9085     /**
9086      * Determines if a character (Unicode code point) is defined in Unicode.
9087      * <p>
9088      * A character is defined if at least one of the following is true:
9089      * <ul>
9090      * <li>It has an entry in the UnicodeData file.
9091      * <li>It has a value in a range defined by the UnicodeData file.
9092      * </ul>
9093      *
9094      * @param   codePoint the character (Unicode code point) to be tested.
9095      * @return  {@code true} if the character has a defined meaning
9096      *          in Unicode; {@code false} otherwise.
9097      * @see     Character#isDigit(int)
9098      * @see     Character#isLetter(int)
9099      * @see     Character#isLetterOrDigit(int)
9100      * @see     Character#isLowerCase(int)
9101      * @see     Character#isTitleCase(int)
9102      * @see     Character#isUpperCase(int)
9103      * @since   1.5
9104      */
9105     public static boolean isDefined(int codePoint) {
9106         return getType(codePoint) != Character.UNASSIGNED;
9107     }
9108 
9109     /**
9110      * Determines if the specified character is a letter.
9111      * <p>
9112      * A character is considered to be a letter if its general
9113      * category type, provided by {@code Character.getType(ch)},
9114      * is any of the following:
9115      * <ul>
9116      * <li> {@code UPPERCASE_LETTER}
9117      * <li> {@code LOWERCASE_LETTER}
9118      * <li> {@code TITLECASE_LETTER}
9119      * <li> {@code MODIFIER_LETTER}
9120      * <li> {@code OTHER_LETTER}
9121      * </ul>
9122      *
9123      * Not all letters have case. Many characters are
9124      * letters but are neither uppercase nor lowercase nor titlecase.
9125      *
9126      * <p><b>Note:</b> This method cannot handle <a
9127      * href="#supplementary"> supplementary characters</a>. To support
9128      * all Unicode characters, including supplementary characters, use
9129      * the {@link #isLetter(int)} method.
9130      *
9131      * @param   ch   the character to be tested.
9132      * @return  {@code true} if the character is a letter;
9133      *          {@code false} otherwise.
9134      * @see     Character#isDigit(char)
9135      * @see     Character#isJavaIdentifierStart(char)
9136      * @see     Character#isJavaLetter(char)
9137      * @see     Character#isJavaLetterOrDigit(char)
9138      * @see     Character#isLetterOrDigit(char)
9139      * @see     Character#isLowerCase(char)
9140      * @see     Character#isTitleCase(char)
9141      * @see     Character#isUnicodeIdentifierStart(char)
9142      * @see     Character#isUpperCase(char)
9143      */
9144     public static boolean isLetter(char ch) {
9145         return isLetter((int)ch);
9146     }
9147 
9148     /**
9149      * Determines if the specified character (Unicode code point) is a letter.
9150      * <p>
9151      * A character is considered to be a letter if its general
9152      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9153      * is any of the following:
9154      * <ul>
9155      * <li> {@code UPPERCASE_LETTER}
9156      * <li> {@code LOWERCASE_LETTER}
9157      * <li> {@code TITLECASE_LETTER}
9158      * <li> {@code MODIFIER_LETTER}
9159      * <li> {@code OTHER_LETTER}
9160      * </ul>
9161      *
9162      * Not all letters have case. Many characters are
9163      * letters but are neither uppercase nor lowercase nor titlecase.
9164      *
9165      * @param   codePoint the character (Unicode code point) to be tested.
9166      * @return  {@code true} if the character is a letter;
9167      *          {@code false} otherwise.
9168      * @see     Character#isDigit(int)
9169      * @see     Character#isJavaIdentifierStart(int)
9170      * @see     Character#isLetterOrDigit(int)
9171      * @see     Character#isLowerCase(int)
9172      * @see     Character#isTitleCase(int)
9173      * @see     Character#isUnicodeIdentifierStart(int)
9174      * @see     Character#isUpperCase(int)
9175      * @since   1.5
9176      */
9177     public static boolean isLetter(int codePoint) {
9178         return ((((1 << Character.UPPERCASE_LETTER) |
9179             (1 << Character.LOWERCASE_LETTER) |
9180             (1 << Character.TITLECASE_LETTER) |
9181             (1 << Character.MODIFIER_LETTER) |
9182             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9183             != 0;
9184     }
9185 
9186     /**
9187      * Determines if the specified character is a letter or digit.
9188      * <p>
9189      * A character is considered to be a letter or digit if either
9190      * {@code Character.isLetter(char ch)} or
9191      * {@code Character.isDigit(char ch)} returns
9192      * {@code true} for the character.
9193      *
9194      * <p><b>Note:</b> This method cannot handle <a
9195      * href="#supplementary"> supplementary characters</a>. To support
9196      * all Unicode characters, including supplementary characters, use
9197      * the {@link #isLetterOrDigit(int)} method.
9198      *
9199      * @param   ch   the character to be tested.
9200      * @return  {@code true} if the character is a letter or digit;
9201      *          {@code false} otherwise.
9202      * @see     Character#isDigit(char)
9203      * @see     Character#isJavaIdentifierPart(char)
9204      * @see     Character#isJavaLetter(char)
9205      * @see     Character#isJavaLetterOrDigit(char)
9206      * @see     Character#isLetter(char)
9207      * @see     Character#isUnicodeIdentifierPart(char)
9208      * @since   1.0.2
9209      */
9210     public static boolean isLetterOrDigit(char ch) {
9211         return isLetterOrDigit((int)ch);
9212     }
9213 
9214     /**
9215      * Determines if the specified character (Unicode code point) is a letter or digit.
9216      * <p>
9217      * A character is considered to be a letter or digit if either
9218      * {@link #isLetter(int) isLetter(codePoint)} or
9219      * {@link #isDigit(int) isDigit(codePoint)} returns
9220      * {@code true} for the character.
9221      *
9222      * @param   codePoint the character (Unicode code point) to be tested.
9223      * @return  {@code true} if the character is a letter or digit;
9224      *          {@code false} otherwise.
9225      * @see     Character#isDigit(int)
9226      * @see     Character#isJavaIdentifierPart(int)
9227      * @see     Character#isLetter(int)
9228      * @see     Character#isUnicodeIdentifierPart(int)
9229      * @since   1.5
9230      */
9231     public static boolean isLetterOrDigit(int codePoint) {
9232         return ((((1 << Character.UPPERCASE_LETTER) |
9233             (1 << Character.LOWERCASE_LETTER) |
9234             (1 << Character.TITLECASE_LETTER) |
9235             (1 << Character.MODIFIER_LETTER) |
9236             (1 << Character.OTHER_LETTER) |
9237             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9238             != 0;
9239     }
9240 
9241     /**
9242      * Determines if the specified character is permissible as the first
9243      * character in a Java identifier.
9244      * <p>
9245      * A character may start a Java identifier if and only if
9246      * one of the following is true:
9247      * <ul>
9248      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9249      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9250      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9251      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9252      * </ul>
9253      *
9254      * @param   ch the character to be tested.
9255      * @return  {@code true} if the character may start a Java
9256      *          identifier; {@code false} otherwise.
9257      * @see     Character#isJavaLetterOrDigit(char)
9258      * @see     Character#isJavaIdentifierStart(char)
9259      * @see     Character#isJavaIdentifierPart(char)
9260      * @see     Character#isLetter(char)
9261      * @see     Character#isLetterOrDigit(char)
9262      * @see     Character#isUnicodeIdentifierStart(char)
9263      * @since   1.0.2
9264      * @deprecated Replaced by isJavaIdentifierStart(char).
9265      */
9266     @Deprecated(since="1.1")
9267     public static boolean isJavaLetter(char ch) {
9268         return isJavaIdentifierStart(ch);
9269     }
9270 
9271     /**
9272      * Determines if the specified character may be part of a Java
9273      * identifier as other than the first character.
9274      * <p>
9275      * A character may be part of a Java identifier if and only if any
9276      * of the following are true:
9277      * <ul>
9278      * <li>  it is a letter
9279      * <li>  it is a currency symbol (such as {@code '$'})
9280      * <li>  it is a connecting punctuation character (such as {@code '_'})
9281      * <li>  it is a digit
9282      * <li>  it is a numeric letter (such as a Roman numeral character)
9283      * <li>  it is a combining mark
9284      * <li>  it is a non-spacing mark
9285      * <li> {@code isIdentifierIgnorable} returns
9286      * {@code true} for the character.
9287      * </ul>
9288      *
9289      * @param   ch the character to be tested.
9290      * @return  {@code true} if the character may be part of a
9291      *          Java identifier; {@code false} otherwise.
9292      * @see     Character#isJavaLetter(char)
9293      * @see     Character#isJavaIdentifierStart(char)
9294      * @see     Character#isJavaIdentifierPart(char)
9295      * @see     Character#isLetter(char)
9296      * @see     Character#isLetterOrDigit(char)
9297      * @see     Character#isUnicodeIdentifierPart(char)
9298      * @see     Character#isIdentifierIgnorable(char)
9299      * @since   1.0.2
9300      * @deprecated Replaced by isJavaIdentifierPart(char).
9301      */
9302     @Deprecated(since="1.1")
9303     public static boolean isJavaLetterOrDigit(char ch) {
9304         return isJavaIdentifierPart(ch);
9305     }
9306 
9307     /**
9308      * Determines if the specified character (Unicode code point) is an alphabet.
9309      * <p>
9310      * A character is considered to be alphabetic if its general category type,
9311      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9312      * the following:
9313      * <ul>
9314      * <li> <code>UPPERCASE_LETTER</code>
9315      * <li> <code>LOWERCASE_LETTER</code>
9316      * <li> <code>TITLECASE_LETTER</code>
9317      * <li> <code>MODIFIER_LETTER</code>
9318      * <li> <code>OTHER_LETTER</code>
9319      * <li> <code>LETTER_NUMBER</code>
9320      * </ul>
9321      * or it has contributory property Other_Alphabetic as defined by the
9322      * Unicode Standard.
9323      *
9324      * @param   codePoint the character (Unicode code point) to be tested.
9325      * @return  <code>true</code> if the character is a Unicode alphabet
9326      *          character, <code>false</code> otherwise.
9327      * @since   1.7
9328      */
9329     public static boolean isAlphabetic(int codePoint) {
9330         return (((((1 << Character.UPPERCASE_LETTER) |
9331             (1 << Character.LOWERCASE_LETTER) |
9332             (1 << Character.TITLECASE_LETTER) |
9333             (1 << Character.MODIFIER_LETTER) |
9334             (1 << Character.OTHER_LETTER) |
9335             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9336             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9337     }
9338 
9339     /**
9340      * Determines if the specified character (Unicode code point) is a CJKV
9341      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9342      * the Unicode Standard.
9343      *
9344      * @param   codePoint the character (Unicode code point) to be tested.
9345      * @return  <code>true</code> if the character is a Unicode ideograph
9346      *          character, <code>false</code> otherwise.
9347      * @since   1.7
9348      */
9349     public static boolean isIdeographic(int codePoint) {
9350         return CharacterData.of(codePoint).isIdeographic(codePoint);
9351     }
9352 
9353     /**
9354      * Determines if the specified character is
9355      * permissible as the first character in a Java identifier.
9356      * <p>
9357      * A character may start a Java identifier if and only if
9358      * one of the following conditions is true:
9359      * <ul>
9360      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9361      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9362      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9363      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9364      * </ul>
9365      *
9366      * <p><b>Note:</b> This method cannot handle <a
9367      * href="#supplementary"> supplementary characters</a>. To support
9368      * all Unicode characters, including supplementary characters, use
9369      * the {@link #isJavaIdentifierStart(int)} method.
9370      *
9371      * @param   ch the character to be tested.
9372      * @return  {@code true} if the character may start a Java identifier;
9373      *          {@code false} otherwise.
9374      * @see     Character#isJavaIdentifierPart(char)
9375      * @see     Character#isLetter(char)
9376      * @see     Character#isUnicodeIdentifierStart(char)
9377      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9378      * @since   1.1
9379      */
9380     public static boolean isJavaIdentifierStart(char ch) {
9381         return isJavaIdentifierStart((int)ch);
9382     }
9383 
9384     /**
9385      * Determines if the character (Unicode code point) is
9386      * permissible as the first character in a Java identifier.
9387      * <p>
9388      * A character may start a Java identifier if and only if
9389      * one of the following conditions is true:
9390      * <ul>
9391      * <li> {@link #isLetter(int) isLetter(codePoint)}
9392      *      returns {@code true}
9393      * <li> {@link #getType(int) getType(codePoint)}
9394      *      returns {@code LETTER_NUMBER}
9395      * <li> the referenced character is a currency symbol (such as {@code '$'})
9396      * <li> the referenced character is a connecting punctuation character
9397      *      (such as {@code '_'}).
9398      * </ul>
9399      *
9400      * @param   codePoint the character (Unicode code point) to be tested.
9401      * @return  {@code true} if the character may start a Java identifier;
9402      *          {@code false} otherwise.
9403      * @see     Character#isJavaIdentifierPart(int)
9404      * @see     Character#isLetter(int)
9405      * @see     Character#isUnicodeIdentifierStart(int)
9406      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9407      * @since   1.5
9408      */
9409     public static boolean isJavaIdentifierStart(int codePoint) {
9410         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9411     }
9412 
9413     /**
9414      * Determines if the specified character may be part of a Java
9415      * identifier as other than the first character.
9416      * <p>
9417      * A character may be part of a Java identifier if any of the following
9418      * are true:
9419      * <ul>
9420      * <li>  it is a letter
9421      * <li>  it is a currency symbol (such as {@code '$'})
9422      * <li>  it is a connecting punctuation character (such as {@code '_'})
9423      * <li>  it is a digit
9424      * <li>  it is a numeric letter (such as a Roman numeral character)
9425      * <li>  it is a combining mark
9426      * <li>  it is a non-spacing mark
9427      * <li> {@code isIdentifierIgnorable} returns
9428      * {@code true} for the character
9429      * </ul>
9430      *
9431      * <p><b>Note:</b> This method cannot handle <a
9432      * href="#supplementary"> supplementary characters</a>. To support
9433      * all Unicode characters, including supplementary characters, use
9434      * the {@link #isJavaIdentifierPart(int)} method.
9435      *
9436      * @param   ch      the character to be tested.
9437      * @return {@code true} if the character may be part of a
9438      *          Java identifier; {@code false} otherwise.
9439      * @see     Character#isIdentifierIgnorable(char)
9440      * @see     Character#isJavaIdentifierStart(char)
9441      * @see     Character#isLetterOrDigit(char)
9442      * @see     Character#isUnicodeIdentifierPart(char)
9443      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9444      * @since   1.1
9445      */
9446     public static boolean isJavaIdentifierPart(char ch) {
9447         return isJavaIdentifierPart((int)ch);
9448     }
9449 
9450     /**
9451      * Determines if the character (Unicode code point) may be part of a Java
9452      * identifier as other than the first character.
9453      * <p>
9454      * A character may be part of a Java identifier if any of the following
9455      * are true:
9456      * <ul>
9457      * <li>  it is a letter
9458      * <li>  it is a currency symbol (such as {@code '$'})
9459      * <li>  it is a connecting punctuation character (such as {@code '_'})
9460      * <li>  it is a digit
9461      * <li>  it is a numeric letter (such as a Roman numeral character)
9462      * <li>  it is a combining mark
9463      * <li>  it is a non-spacing mark
9464      * <li> {@link #isIdentifierIgnorable(int)
9465      * isIdentifierIgnorable(codePoint)} returns {@code true} for
9466      * the character
9467      * </ul>
9468      *
9469      * @param   codePoint the character (Unicode code point) to be tested.
9470      * @return {@code true} if the character may be part of a
9471      *          Java identifier; {@code false} otherwise.
9472      * @see     Character#isIdentifierIgnorable(int)
9473      * @see     Character#isJavaIdentifierStart(int)
9474      * @see     Character#isLetterOrDigit(int)
9475      * @see     Character#isUnicodeIdentifierPart(int)
9476      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9477      * @since   1.5
9478      */
9479     public static boolean isJavaIdentifierPart(int codePoint) {
9480         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9481     }
9482 
9483     /**
9484      * Determines if the specified character is permissible as the
9485      * first character in a Unicode identifier.
9486      * <p>
9487      * A character may start a Unicode identifier if and only if
9488      * one of the following conditions is true:
9489      * <ul>
9490      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9491      * <li> {@link #getType(char) getType(ch)} returns
9492      *      {@code LETTER_NUMBER}.
9493      * </ul>
9494      *
9495      * <p><b>Note:</b> This method cannot handle <a
9496      * href="#supplementary"> supplementary characters</a>. To support
9497      * all Unicode characters, including supplementary characters, use
9498      * the {@link #isUnicodeIdentifierStart(int)} method.
9499      *
9500      * @param   ch      the character to be tested.
9501      * @return  {@code true} if the character may start a Unicode
9502      *          identifier; {@code false} otherwise.
9503      * @see     Character#isJavaIdentifierStart(char)
9504      * @see     Character#isLetter(char)
9505      * @see     Character#isUnicodeIdentifierPart(char)
9506      * @since   1.1
9507      */
9508     public static boolean isUnicodeIdentifierStart(char ch) {
9509         return isUnicodeIdentifierStart((int)ch);
9510     }
9511 
9512     /**
9513      * Determines if the specified character (Unicode code point) is permissible as the
9514      * first character in a Unicode identifier.
9515      * <p>
9516      * A character may start a Unicode identifier if and only if
9517      * one of the following conditions is true:
9518      * <ul>
9519      * <li> {@link #isLetter(int) isLetter(codePoint)}
9520      *      returns {@code true}
9521      * <li> {@link #getType(int) getType(codePoint)}
9522      *      returns {@code LETTER_NUMBER}.
9523      * </ul>
9524      * @param   codePoint the character (Unicode code point) to be tested.
9525      * @return  {@code true} if the character may start a Unicode
9526      *          identifier; {@code false} otherwise.
9527      * @see     Character#isJavaIdentifierStart(int)
9528      * @see     Character#isLetter(int)
9529      * @see     Character#isUnicodeIdentifierPart(int)
9530      * @since   1.5
9531      */
9532     public static boolean isUnicodeIdentifierStart(int codePoint) {
9533         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9534     }
9535 
9536     /**
9537      * Determines if the specified character may be part of a Unicode
9538      * identifier as other than the first character.
9539      * <p>
9540      * A character may be part of a Unicode identifier if and only if
9541      * one of the following statements is true:
9542      * <ul>
9543      * <li>  it is a letter
9544      * <li>  it is a connecting punctuation character (such as {@code '_'})
9545      * <li>  it is a digit
9546      * <li>  it is a numeric letter (such as a Roman numeral character)
9547      * <li>  it is a combining mark
9548      * <li>  it is a non-spacing mark
9549      * <li> {@code isIdentifierIgnorable} returns
9550      * {@code true} for this character.
9551      * </ul>
9552      *
9553      * <p><b>Note:</b> This method cannot handle <a
9554      * href="#supplementary"> supplementary characters</a>. To support
9555      * all Unicode characters, including supplementary characters, use
9556      * the {@link #isUnicodeIdentifierPart(int)} method.
9557      *
9558      * @param   ch      the character to be tested.
9559      * @return  {@code true} if the character may be part of a
9560      *          Unicode identifier; {@code false} otherwise.
9561      * @see     Character#isIdentifierIgnorable(char)
9562      * @see     Character#isJavaIdentifierPart(char)
9563      * @see     Character#isLetterOrDigit(char)
9564      * @see     Character#isUnicodeIdentifierStart(char)
9565      * @since   1.1
9566      */
9567     public static boolean isUnicodeIdentifierPart(char ch) {
9568         return isUnicodeIdentifierPart((int)ch);
9569     }
9570 
9571     /**
9572      * Determines if the specified character (Unicode code point) may be part of a Unicode
9573      * identifier as other than the first character.
9574      * <p>
9575      * A character may be part of a Unicode identifier if and only if
9576      * one of the following statements is true:
9577      * <ul>
9578      * <li>  it is a letter
9579      * <li>  it is a connecting punctuation character (such as {@code '_'})
9580      * <li>  it is a digit
9581      * <li>  it is a numeric letter (such as a Roman numeral character)
9582      * <li>  it is a combining mark
9583      * <li>  it is a non-spacing mark
9584      * <li> {@code isIdentifierIgnorable} returns
9585      * {@code true} for this character.
9586      * </ul>
9587      * @param   codePoint the character (Unicode code point) to be tested.
9588      * @return  {@code true} if the character may be part of a
9589      *          Unicode identifier; {@code false} otherwise.
9590      * @see     Character#isIdentifierIgnorable(int)
9591      * @see     Character#isJavaIdentifierPart(int)
9592      * @see     Character#isLetterOrDigit(int)
9593      * @see     Character#isUnicodeIdentifierStart(int)
9594      * @since   1.5
9595      */
9596     public static boolean isUnicodeIdentifierPart(int codePoint) {
9597         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
9598     }
9599 
9600     /**
9601      * Determines if the specified character should be regarded as
9602      * an ignorable character in a Java identifier or a Unicode identifier.
9603      * <p>
9604      * The following Unicode characters are ignorable in a Java identifier
9605      * or a Unicode identifier:
9606      * <ul>
9607      * <li>ISO control characters that are not whitespace
9608      * <ul>
9609      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9610      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9611      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9612      * </ul>
9613      *
9614      * <li>all characters that have the {@code FORMAT} general
9615      * category value
9616      * </ul>
9617      *
9618      * <p><b>Note:</b> This method cannot handle <a
9619      * href="#supplementary"> supplementary characters</a>. To support
9620      * all Unicode characters, including supplementary characters, use
9621      * the {@link #isIdentifierIgnorable(int)} method.
9622      *
9623      * @param   ch      the character to be tested.
9624      * @return  {@code true} if the character is an ignorable control
9625      *          character that may be part of a Java or Unicode identifier;
9626      *           {@code false} otherwise.
9627      * @see     Character#isJavaIdentifierPart(char)
9628      * @see     Character#isUnicodeIdentifierPart(char)
9629      * @since   1.1
9630      */
9631     public static boolean isIdentifierIgnorable(char ch) {
9632         return isIdentifierIgnorable((int)ch);
9633     }
9634 
9635     /**
9636      * Determines if the specified character (Unicode code point) should be regarded as
9637      * an ignorable character in a Java identifier or a Unicode identifier.
9638      * <p>
9639      * The following Unicode characters are ignorable in a Java identifier
9640      * or a Unicode identifier:
9641      * <ul>
9642      * <li>ISO control characters that are not whitespace
9643      * <ul>
9644      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9645      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9646      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9647      * </ul>
9648      *
9649      * <li>all characters that have the {@code FORMAT} general
9650      * category value
9651      * </ul>
9652      *
9653      * @param   codePoint the character (Unicode code point) to be tested.
9654      * @return  {@code true} if the character is an ignorable control
9655      *          character that may be part of a Java or Unicode identifier;
9656      *          {@code false} otherwise.
9657      * @see     Character#isJavaIdentifierPart(int)
9658      * @see     Character#isUnicodeIdentifierPart(int)
9659      * @since   1.5
9660      */
9661     public static boolean isIdentifierIgnorable(int codePoint) {
9662         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
9663     }
9664 
9665     /**
9666      * Converts the character argument to lowercase using case
9667      * mapping information from the UnicodeData file.
9668      * <p>
9669      * Note that
9670      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
9671      * does not always return {@code true} for some ranges of
9672      * characters, particularly those that are symbols or ideographs.
9673      *
9674      * <p>In general, {@link String#toLowerCase()} should be used to map
9675      * characters to lowercase. {@code String} case mapping methods
9676      * have several benefits over {@code Character} case mapping methods.
9677      * {@code String} case mapping methods can perform locale-sensitive
9678      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9679      * the {@code Character} case mapping methods cannot.
9680      *
9681      * <p><b>Note:</b> This method cannot handle <a
9682      * href="#supplementary"> supplementary characters</a>. To support
9683      * all Unicode characters, including supplementary characters, use
9684      * the {@link #toLowerCase(int)} method.
9685      *
9686      * @param   ch   the character to be converted.
9687      * @return  the lowercase equivalent of the character, if any;
9688      *          otherwise, the character itself.
9689      * @see     Character#isLowerCase(char)
9690      * @see     String#toLowerCase()
9691      */
9692     public static char toLowerCase(char ch) {
9693         return (char)toLowerCase((int)ch);
9694     }
9695 
9696     /**
9697      * Converts the character (Unicode code point) argument to
9698      * lowercase using case mapping information from the UnicodeData
9699      * file.
9700      *
9701      * <p> Note that
9702      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9703      * does not always return {@code true} for some ranges of
9704      * characters, particularly those that are symbols or ideographs.
9705      *
9706      * <p>In general, {@link String#toLowerCase()} should be used to map
9707      * characters to lowercase. {@code String} case mapping methods
9708      * have several benefits over {@code Character} case mapping methods.
9709      * {@code String} case mapping methods can perform locale-sensitive
9710      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9711      * the {@code Character} case mapping methods cannot.
9712      *
9713      * @param   codePoint   the character (Unicode code point) to be converted.
9714      * @return  the lowercase equivalent of the character (Unicode code
9715      *          point), if any; otherwise, the character itself.
9716      * @see     Character#isLowerCase(int)
9717      * @see     String#toLowerCase()
9718      *
9719      * @since   1.5
9720      */
9721     public static int toLowerCase(int codePoint) {
9722         return CharacterData.of(codePoint).toLowerCase(codePoint);
9723     }
9724 
9725     /**
9726      * Converts the character argument to uppercase using case mapping
9727      * information from the UnicodeData file.
9728      * <p>
9729      * Note that
9730      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9731      * does not always return {@code true} for some ranges of
9732      * characters, particularly those that are symbols or ideographs.
9733      *
9734      * <p>In general, {@link String#toUpperCase()} should be used to map
9735      * characters to uppercase. {@code String} case mapping methods
9736      * have several benefits over {@code Character} case mapping methods.
9737      * {@code String} case mapping methods can perform locale-sensitive
9738      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9739      * the {@code Character} case mapping methods cannot.
9740      *
9741      * <p><b>Note:</b> This method cannot handle <a
9742      * href="#supplementary"> supplementary characters</a>. To support
9743      * all Unicode characters, including supplementary characters, use
9744      * the {@link #toUpperCase(int)} method.
9745      *
9746      * @param   ch   the character to be converted.
9747      * @return  the uppercase equivalent of the character, if any;
9748      *          otherwise, the character itself.
9749      * @see     Character#isUpperCase(char)
9750      * @see     String#toUpperCase()
9751      */
9752     public static char toUpperCase(char ch) {
9753         return (char)toUpperCase((int)ch);
9754     }
9755 
9756     /**
9757      * Converts the character (Unicode code point) argument to
9758      * uppercase using case mapping information from the UnicodeData
9759      * file.
9760      *
9761      * <p>Note that
9762      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
9763      * does not always return {@code true} for some ranges of
9764      * characters, particularly those that are symbols or ideographs.
9765      *
9766      * <p>In general, {@link String#toUpperCase()} should be used to map
9767      * characters to uppercase. {@code String} case mapping methods
9768      * have several benefits over {@code Character} case mapping methods.
9769      * {@code String} case mapping methods can perform locale-sensitive
9770      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9771      * the {@code Character} case mapping methods cannot.
9772      *
9773      * @param   codePoint   the character (Unicode code point) to be converted.
9774      * @return  the uppercase equivalent of the character, if any;
9775      *          otherwise, the character itself.
9776      * @see     Character#isUpperCase(int)
9777      * @see     String#toUpperCase()
9778      *
9779      * @since   1.5
9780      */
9781     public static int toUpperCase(int codePoint) {
9782         return CharacterData.of(codePoint).toUpperCase(codePoint);
9783     }
9784 
9785     /**
9786      * Converts the character argument to titlecase using case mapping
9787      * information from the UnicodeData file. If a character has no
9788      * explicit titlecase mapping and is not itself a titlecase char
9789      * according to UnicodeData, then the uppercase mapping is
9790      * returned as an equivalent titlecase mapping. If the
9791      * {@code char} argument is already a titlecase
9792      * {@code char}, the same {@code char} value will be
9793      * returned.
9794      * <p>
9795      * Note that
9796      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
9797      * does not always return {@code true} for some ranges of
9798      * characters.
9799      *
9800      * <p><b>Note:</b> This method cannot handle <a
9801      * href="#supplementary"> supplementary characters</a>. To support
9802      * all Unicode characters, including supplementary characters, use
9803      * the {@link #toTitleCase(int)} method.
9804      *
9805      * @param   ch   the character to be converted.
9806      * @return  the titlecase equivalent of the character, if any;
9807      *          otherwise, the character itself.
9808      * @see     Character#isTitleCase(char)
9809      * @see     Character#toLowerCase(char)
9810      * @see     Character#toUpperCase(char)
9811      * @since   1.0.2
9812      */
9813     public static char toTitleCase(char ch) {
9814         return (char)toTitleCase((int)ch);
9815     }
9816 
9817     /**
9818      * Converts the character (Unicode code point) argument to titlecase using case mapping
9819      * information from the UnicodeData file. If a character has no
9820      * explicit titlecase mapping and is not itself a titlecase char
9821      * according to UnicodeData, then the uppercase mapping is
9822      * returned as an equivalent titlecase mapping. If the
9823      * character argument is already a titlecase
9824      * character, the same character value will be
9825      * returned.
9826      *
9827      * <p>Note that
9828      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
9829      * does not always return {@code true} for some ranges of
9830      * characters.
9831      *
9832      * @param   codePoint   the character (Unicode code point) to be converted.
9833      * @return  the titlecase equivalent of the character, if any;
9834      *          otherwise, the character itself.
9835      * @see     Character#isTitleCase(int)
9836      * @see     Character#toLowerCase(int)
9837      * @see     Character#toUpperCase(int)
9838      * @since   1.5
9839      */
9840     public static int toTitleCase(int codePoint) {
9841         return CharacterData.of(codePoint).toTitleCase(codePoint);
9842     }
9843 
9844     /**
9845      * Returns the numeric value of the character {@code ch} in the
9846      * specified radix.
9847      * <p>
9848      * If the radix is not in the range {@code MIN_RADIX} &le;
9849      * {@code radix} &le; {@code MAX_RADIX} or if the
9850      * value of {@code ch} is not a valid digit in the specified
9851      * radix, {@code -1} is returned. A character is a valid digit
9852      * if at least one of the following is true:
9853      * <ul>
9854      * <li>The method {@code isDigit} is {@code true} of the character
9855      *     and the Unicode decimal digit value of the character (or its
9856      *     single-character decomposition) is less than the specified radix.
9857      *     In this case the decimal digit value is returned.
9858      * <li>The character is one of the uppercase Latin letters
9859      *     {@code 'A'} through {@code 'Z'} and its code is less than
9860      *     {@code radix + 'A' - 10}.
9861      *     In this case, {@code ch - 'A' + 10}
9862      *     is returned.
9863      * <li>The character is one of the lowercase Latin letters
9864      *     {@code 'a'} through {@code 'z'} and its code is less than
9865      *     {@code radix + 'a' - 10}.
9866      *     In this case, {@code ch - 'a' + 10}
9867      *     is returned.
9868      * <li>The character is one of the fullwidth uppercase Latin letters A
9869      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9870      *     and its code is less than
9871      *     {@code radix + '\u005CuFF21' - 10}.
9872      *     In this case, {@code ch - '\u005CuFF21' + 10}
9873      *     is returned.
9874      * <li>The character is one of the fullwidth lowercase Latin letters a
9875      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9876      *     and its code is less than
9877      *     {@code radix + '\u005CuFF41' - 10}.
9878      *     In this case, {@code ch - '\u005CuFF41' + 10}
9879      *     is returned.
9880      * </ul>
9881      *
9882      * <p><b>Note:</b> This method cannot handle <a
9883      * href="#supplementary"> supplementary characters</a>. To support
9884      * all Unicode characters, including supplementary characters, use
9885      * the {@link #digit(int, int)} method.
9886      *
9887      * @param   ch      the character to be converted.
9888      * @param   radix   the radix.
9889      * @return  the numeric value represented by the character in the
9890      *          specified radix.
9891      * @see     Character#forDigit(int, int)
9892      * @see     Character#isDigit(char)
9893      */
9894     public static int digit(char ch, int radix) {
9895         return digit((int)ch, radix);
9896     }
9897 
9898     /**
9899      * Returns the numeric value of the specified character (Unicode
9900      * code point) in the specified radix.
9901      *
9902      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
9903      * {@code radix} &le; {@code MAX_RADIX} or if the
9904      * character is not a valid digit in the specified
9905      * radix, {@code -1} is returned. A character is a valid digit
9906      * if at least one of the following is true:
9907      * <ul>
9908      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
9909      *     and the Unicode decimal digit value of the character (or its
9910      *     single-character decomposition) is less than the specified radix.
9911      *     In this case the decimal digit value is returned.
9912      * <li>The character is one of the uppercase Latin letters
9913      *     {@code 'A'} through {@code 'Z'} and its code is less than
9914      *     {@code radix + 'A' - 10}.
9915      *     In this case, {@code codePoint - 'A' + 10}
9916      *     is returned.
9917      * <li>The character is one of the lowercase Latin letters
9918      *     {@code 'a'} through {@code 'z'} and its code is less than
9919      *     {@code radix + 'a' - 10}.
9920      *     In this case, {@code codePoint - 'a' + 10}
9921      *     is returned.
9922      * <li>The character is one of the fullwidth uppercase Latin letters A
9923      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9924      *     and its code is less than
9925      *     {@code radix + '\u005CuFF21' - 10}.
9926      *     In this case,
9927      *     {@code codePoint - '\u005CuFF21' + 10}
9928      *     is returned.
9929      * <li>The character is one of the fullwidth lowercase Latin letters a
9930      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9931      *     and its code is less than
9932      *     {@code radix + '\u005CuFF41'- 10}.
9933      *     In this case,
9934      *     {@code codePoint - '\u005CuFF41' + 10}
9935      *     is returned.
9936      * </ul>
9937      *
9938      * @param   codePoint the character (Unicode code point) to be converted.
9939      * @param   radix   the radix.
9940      * @return  the numeric value represented by the character in the
9941      *          specified radix.
9942      * @see     Character#forDigit(int, int)
9943      * @see     Character#isDigit(int)
9944      * @since   1.5
9945      */
9946     public static int digit(int codePoint, int radix) {
9947         return CharacterData.of(codePoint).digit(codePoint, radix);
9948     }
9949 
9950     /**
9951      * Returns the {@code int} value that the specified Unicode
9952      * character represents. For example, the character
9953      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
9954      * an int with a value of 50.
9955      * <p>
9956      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9957      * {@code '\u005Cu005A'}), lowercase
9958      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9959      * full width variant ({@code '\u005CuFF21'} through
9960      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9961      * {@code '\u005CuFF5A'}) forms have numeric values from 10
9962      * through 35. This is independent of the Unicode specification,
9963      * which does not assign numeric values to these {@code char}
9964      * values.
9965      * <p>
9966      * If the character does not have a numeric value, then -1 is returned.
9967      * If the character has a numeric value that cannot be represented as a
9968      * nonnegative integer (for example, a fractional value), then -2
9969      * is returned.
9970      *
9971      * <p><b>Note:</b> This method cannot handle <a
9972      * href="#supplementary"> supplementary characters</a>. To support
9973      * all Unicode characters, including supplementary characters, use
9974      * the {@link #getNumericValue(int)} method.
9975      *
9976      * @param   ch      the character to be converted.
9977      * @return  the numeric value of the character, as a nonnegative {@code int}
9978      *          value; -2 if the character has a numeric value but the value
9979      *          can not be represented as a nonnegative {@code int} value;
9980      *          -1 if the character has no numeric value.
9981      * @see     Character#forDigit(int, int)
9982      * @see     Character#isDigit(char)
9983      * @since   1.1
9984      */
9985     public static int getNumericValue(char ch) {
9986         return getNumericValue((int)ch);
9987     }
9988 
9989     /**
9990      * Returns the {@code int} value that the specified
9991      * character (Unicode code point) represents. For example, the character
9992      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
9993      * an {@code int} with a value of 50.
9994      * <p>
9995      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9996      * {@code '\u005Cu005A'}), lowercase
9997      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9998      * full width variant ({@code '\u005CuFF21'} through
9999      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10000      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10001      * through 35. This is independent of the Unicode specification,
10002      * which does not assign numeric values to these {@code char}
10003      * values.
10004      * <p>
10005      * If the character does not have a numeric value, then -1 is returned.
10006      * If the character has a numeric value that cannot be represented as a
10007      * nonnegative integer (for example, a fractional value), then -2
10008      * is returned.
10009      *
10010      * @param   codePoint the character (Unicode code point) to be converted.
10011      * @return  the numeric value of the character, as a nonnegative {@code int}
10012      *          value; -2 if the character has a numeric value but the value
10013      *          can not be represented as a nonnegative {@code int} value;
10014      *          -1 if the character has no numeric value.
10015      * @see     Character#forDigit(int, int)
10016      * @see     Character#isDigit(int)
10017      * @since   1.5
10018      */
10019     public static int getNumericValue(int codePoint) {
10020         return CharacterData.of(codePoint).getNumericValue(codePoint);
10021     }
10022 
10023     /**
10024      * Determines if the specified character is ISO-LATIN-1 white space.
10025      * This method returns {@code true} for the following five
10026      * characters only:
10027      * <table class="striped">
10028      * <caption style="display:none">truechars</caption>
10029      * <thead>
10030      * <tr><th scope="col">Character
10031      *     <th scope="col">Code
10032      *     <th scope="col">Name
10033      * </thead>
10034      * <tbody>
10035      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
10036      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
10037      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
10038      *     <td>{@code NEW LINE}</td></tr>
10039      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
10040      *     <td>{@code FORM FEED}</td></tr>
10041      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
10042      *     <td>{@code CARRIAGE RETURN}</td></tr>
10043      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
10044      *     <td>{@code SPACE}</td></tr>
10045      * </tbody>
10046      * </table>
10047      *
10048      * @param      ch   the character to be tested.
10049      * @return     {@code true} if the character is ISO-LATIN-1 white
10050      *             space; {@code false} otherwise.
10051      * @see        Character#isSpaceChar(char)
10052      * @see        Character#isWhitespace(char)
10053      * @deprecated Replaced by isWhitespace(char).
10054      */
10055     @Deprecated(since="1.1")
10056     public static boolean isSpace(char ch) {
10057         return (ch <= 0x0020) &&
10058             (((((1L << 0x0009) |
10059             (1L << 0x000A) |
10060             (1L << 0x000C) |
10061             (1L << 0x000D) |
10062             (1L << 0x0020)) >> ch) & 1L) != 0);
10063     }
10064 
10065 
10066     /**
10067      * Determines if the specified character is a Unicode space character.
10068      * A character is considered to be a space character if and only if
10069      * it is specified to be a space character by the Unicode Standard. This
10070      * method returns true if the character's general category type is any of
10071      * the following:
10072      * <ul>
10073      * <li> {@code SPACE_SEPARATOR}
10074      * <li> {@code LINE_SEPARATOR}
10075      * <li> {@code PARAGRAPH_SEPARATOR}
10076      * </ul>
10077      *
10078      * <p><b>Note:</b> This method cannot handle <a
10079      * href="#supplementary"> supplementary characters</a>. To support
10080      * all Unicode characters, including supplementary characters, use
10081      * the {@link #isSpaceChar(int)} method.
10082      *
10083      * @param   ch      the character to be tested.
10084      * @return  {@code true} if the character is a space character;
10085      *          {@code false} otherwise.
10086      * @see     Character#isWhitespace(char)
10087      * @since   1.1
10088      */
10089     public static boolean isSpaceChar(char ch) {
10090         return isSpaceChar((int)ch);
10091     }
10092 
10093     /**
10094      * Determines if the specified character (Unicode code point) is a
10095      * Unicode space character.  A character is considered to be a
10096      * space character if and only if it is specified to be a space
10097      * character by the Unicode Standard. This method returns true if
10098      * the character's general category type is any of the following:
10099      *
10100      * <ul>
10101      * <li> {@link #SPACE_SEPARATOR}
10102      * <li> {@link #LINE_SEPARATOR}
10103      * <li> {@link #PARAGRAPH_SEPARATOR}
10104      * </ul>
10105      *
10106      * @param   codePoint the character (Unicode code point) to be tested.
10107      * @return  {@code true} if the character is a space character;
10108      *          {@code false} otherwise.
10109      * @see     Character#isWhitespace(int)
10110      * @since   1.5
10111      */
10112     public static boolean isSpaceChar(int codePoint) {
10113         return ((((1 << Character.SPACE_SEPARATOR) |
10114                   (1 << Character.LINE_SEPARATOR) |
10115                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10116             != 0;
10117     }
10118 
10119     /**
10120      * Determines if the specified character is white space according to Java.
10121      * A character is a Java whitespace character if and only if it satisfies
10122      * one of the following criteria:
10123      * <ul>
10124      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10125      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10126      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10127      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10128      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10129      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10130      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10131      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10132      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10133      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10134      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10135      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10136      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10137      * </ul>
10138      *
10139      * <p><b>Note:</b> This method cannot handle <a
10140      * href="#supplementary"> supplementary characters</a>. To support
10141      * all Unicode characters, including supplementary characters, use
10142      * the {@link #isWhitespace(int)} method.
10143      *
10144      * @param   ch the character to be tested.
10145      * @return  {@code true} if the character is a Java whitespace
10146      *          character; {@code false} otherwise.
10147      * @see     Character#isSpaceChar(char)
10148      * @since   1.1
10149      */
10150     public static boolean isWhitespace(char ch) {
10151         return isWhitespace((int)ch);
10152     }
10153 
10154     /**
10155      * Determines if the specified character (Unicode code point) is
10156      * white space according to Java.  A character is a Java
10157      * whitespace character if and only if it satisfies one of the
10158      * following criteria:
10159      * <ul>
10160      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10161      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10162      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10163      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10164      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10165      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10166      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10167      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10168      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10169      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10170      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10171      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10172      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10173      * </ul>
10174      *
10175      * @param   codePoint the character (Unicode code point) to be tested.
10176      * @return  {@code true} if the character is a Java whitespace
10177      *          character; {@code false} otherwise.
10178      * @see     Character#isSpaceChar(int)
10179      * @since   1.5
10180      */
10181     public static boolean isWhitespace(int codePoint) {
10182         return CharacterData.of(codePoint).isWhitespace(codePoint);
10183     }
10184 
10185     /**
10186      * Determines if the specified character is an ISO control
10187      * character.  A character is considered to be an ISO control
10188      * character if its code is in the range {@code '\u005Cu0000'}
10189      * through {@code '\u005Cu001F'} or in the range
10190      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10191      *
10192      * <p><b>Note:</b> This method cannot handle <a
10193      * href="#supplementary"> supplementary characters</a>. To support
10194      * all Unicode characters, including supplementary characters, use
10195      * the {@link #isISOControl(int)} method.
10196      *
10197      * @param   ch      the character to be tested.
10198      * @return  {@code true} if the character is an ISO control character;
10199      *          {@code false} otherwise.
10200      *
10201      * @see     Character#isSpaceChar(char)
10202      * @see     Character#isWhitespace(char)
10203      * @since   1.1
10204      */
10205     public static boolean isISOControl(char ch) {
10206         return isISOControl((int)ch);
10207     }
10208 
10209     /**
10210      * Determines if the referenced character (Unicode code point) is an ISO control
10211      * character.  A character is considered to be an ISO control
10212      * character if its code is in the range {@code '\u005Cu0000'}
10213      * through {@code '\u005Cu001F'} or in the range
10214      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10215      *
10216      * @param   codePoint the character (Unicode code point) to be tested.
10217      * @return  {@code true} if the character is an ISO control character;
10218      *          {@code false} otherwise.
10219      * @see     Character#isSpaceChar(int)
10220      * @see     Character#isWhitespace(int)
10221      * @since   1.5
10222      */
10223     public static boolean isISOControl(int codePoint) {
10224         // Optimized form of:
10225         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
10226         //     (codePoint >= 0x7F && codePoint <= 0x9F);
10227         return codePoint <= 0x9F &&
10228             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10229     }
10230 
10231     /**
10232      * Returns a value indicating a character's general category.
10233      *
10234      * <p><b>Note:</b> This method cannot handle <a
10235      * href="#supplementary"> supplementary characters</a>. To support
10236      * all Unicode characters, including supplementary characters, use
10237      * the {@link #getType(int)} method.
10238      *
10239      * @param   ch      the character to be tested.
10240      * @return  a value of type {@code int} representing the
10241      *          character's general category.
10242      * @see     Character#COMBINING_SPACING_MARK
10243      * @see     Character#CONNECTOR_PUNCTUATION
10244      * @see     Character#CONTROL
10245      * @see     Character#CURRENCY_SYMBOL
10246      * @see     Character#DASH_PUNCTUATION
10247      * @see     Character#DECIMAL_DIGIT_NUMBER
10248      * @see     Character#ENCLOSING_MARK
10249      * @see     Character#END_PUNCTUATION
10250      * @see     Character#FINAL_QUOTE_PUNCTUATION
10251      * @see     Character#FORMAT
10252      * @see     Character#INITIAL_QUOTE_PUNCTUATION
10253      * @see     Character#LETTER_NUMBER
10254      * @see     Character#LINE_SEPARATOR
10255      * @see     Character#LOWERCASE_LETTER
10256      * @see     Character#MATH_SYMBOL
10257      * @see     Character#MODIFIER_LETTER
10258      * @see     Character#MODIFIER_SYMBOL
10259      * @see     Character#NON_SPACING_MARK
10260      * @see     Character#OTHER_LETTER
10261      * @see     Character#OTHER_NUMBER
10262      * @see     Character#OTHER_PUNCTUATION
10263      * @see     Character#OTHER_SYMBOL
10264      * @see     Character#PARAGRAPH_SEPARATOR
10265      * @see     Character#PRIVATE_USE
10266      * @see     Character#SPACE_SEPARATOR
10267      * @see     Character#START_PUNCTUATION
10268      * @see     Character#SURROGATE
10269      * @see     Character#TITLECASE_LETTER
10270      * @see     Character#UNASSIGNED
10271      * @see     Character#UPPERCASE_LETTER
10272      * @since   1.1
10273      */
10274     public static int getType(char ch) {
10275         return getType((int)ch);
10276     }
10277 
10278     /**
10279      * Returns a value indicating a character's general category.
10280      *
10281      * @param   codePoint the character (Unicode code point) to be tested.
10282      * @return  a value of type {@code int} representing the
10283      *          character's general category.
10284      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10285      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10286      * @see     Character#CONTROL CONTROL
10287      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10288      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
10289      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10290      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
10291      * @see     Character#END_PUNCTUATION END_PUNCTUATION
10292      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10293      * @see     Character#FORMAT FORMAT
10294      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10295      * @see     Character#LETTER_NUMBER LETTER_NUMBER
10296      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
10297      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
10298      * @see     Character#MATH_SYMBOL MATH_SYMBOL
10299      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
10300      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10301      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
10302      * @see     Character#OTHER_LETTER OTHER_LETTER
10303      * @see     Character#OTHER_NUMBER OTHER_NUMBER
10304      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10305      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
10306      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10307      * @see     Character#PRIVATE_USE PRIVATE_USE
10308      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
10309      * @see     Character#START_PUNCTUATION START_PUNCTUATION
10310      * @see     Character#SURROGATE SURROGATE
10311      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
10312      * @see     Character#UNASSIGNED UNASSIGNED
10313      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
10314      * @since   1.5
10315      */
10316     public static int getType(int codePoint) {
10317         return CharacterData.of(codePoint).getType(codePoint);
10318     }
10319 
10320     /**
10321      * Determines the character representation for a specific digit in
10322      * the specified radix. If the value of {@code radix} is not a
10323      * valid radix, or the value of {@code digit} is not a valid
10324      * digit in the specified radix, the null character
10325      * ({@code '\u005Cu0000'}) is returned.
10326      * <p>
10327      * The {@code radix} argument is valid if it is greater than or
10328      * equal to {@code MIN_RADIX} and less than or equal to
10329      * {@code MAX_RADIX}. The {@code digit} argument is valid if
10330      * {@code 0 <= digit < radix}.
10331      * <p>
10332      * If the digit is less than 10, then
10333      * {@code '0' + digit} is returned. Otherwise, the value
10334      * {@code 'a' + digit - 10} is returned.
10335      *
10336      * @param   digit   the number to convert to a character.
10337      * @param   radix   the radix.
10338      * @return  the {@code char} representation of the specified digit
10339      *          in the specified radix.
10340      * @see     Character#MIN_RADIX
10341      * @see     Character#MAX_RADIX
10342      * @see     Character#digit(char, int)
10343      */
10344     public static char forDigit(int digit, int radix) {
10345         if ((digit >= radix) || (digit < 0)) {
10346             return '\0';
10347         }
10348         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10349             return '\0';
10350         }
10351         if (digit < 10) {
10352             return (char)('0' + digit);
10353         }
10354         return (char)('a' - 10 + digit);
10355     }
10356 
10357     /**
10358      * Returns the Unicode directionality property for the given
10359      * character.  Character directionality is used to calculate the
10360      * visual ordering of text. The directionality value of undefined
10361      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10362      *
10363      * <p><b>Note:</b> This method cannot handle <a
10364      * href="#supplementary"> supplementary characters</a>. To support
10365      * all Unicode characters, including supplementary characters, use
10366      * the {@link #getDirectionality(int)} method.
10367      *
10368      * @param  ch {@code char} for which the directionality property
10369      *            is requested.
10370      * @return the directionality property of the {@code char} value.
10371      *
10372      * @see Character#DIRECTIONALITY_UNDEFINED
10373      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10374      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10375      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10376      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10377      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10378      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10379      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10380      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10381      * @see Character#DIRECTIONALITY_NONSPACING_MARK
10382      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10383      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10384      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10385      * @see Character#DIRECTIONALITY_WHITESPACE
10386      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10387      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10388      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10389      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10390      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10391      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10392      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10393      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10394      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10395      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10396      * @since 1.4
10397      */
10398     public static byte getDirectionality(char ch) {
10399         return getDirectionality((int)ch);
10400     }
10401 
10402     /**
10403      * Returns the Unicode directionality property for the given
10404      * character (Unicode code point).  Character directionality is
10405      * used to calculate the visual ordering of text. The
10406      * directionality value of undefined character is {@link
10407      * #DIRECTIONALITY_UNDEFINED}.
10408      *
10409      * @param   codePoint the character (Unicode code point) for which
10410      *          the directionality property is requested.
10411      * @return the directionality property of the character.
10412      *
10413      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10414      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10415      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10416      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10417      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10418      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10419      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10420      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10421      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10422      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10423      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10424      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10425      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10426      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10427      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10428      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10429      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10430      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10431      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10432      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10433      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10434      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10435      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10436      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10437      * @since    1.5
10438      */
10439     public static byte getDirectionality(int codePoint) {
10440         return CharacterData.of(codePoint).getDirectionality(codePoint);
10441     }
10442 
10443     /**
10444      * Determines whether the character is mirrored according to the
10445      * Unicode specification.  Mirrored characters should have their
10446      * glyphs horizontally mirrored when displayed in text that is
10447      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
10448      * PARENTHESIS is semantically defined to be an <i>opening
10449      * parenthesis</i>.  This will appear as a "(" in text that is
10450      * left-to-right but as a ")" in text that is right-to-left.
10451      *
10452      * <p><b>Note:</b> This method cannot handle <a
10453      * href="#supplementary"> supplementary characters</a>. To support
10454      * all Unicode characters, including supplementary characters, use
10455      * the {@link #isMirrored(int)} method.
10456      *
10457      * @param  ch {@code char} for which the mirrored property is requested
10458      * @return {@code true} if the char is mirrored, {@code false}
10459      *         if the {@code char} is not mirrored or is not defined.
10460      * @since 1.4
10461      */
10462     public static boolean isMirrored(char ch) {
10463         return isMirrored((int)ch);
10464     }
10465 
10466     /**
10467      * Determines whether the specified character (Unicode code point)
10468      * is mirrored according to the Unicode specification.  Mirrored
10469      * characters should have their glyphs horizontally mirrored when
10470      * displayed in text that is right-to-left.  For example,
10471      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10472      * defined to be an <i>opening parenthesis</i>.  This will appear
10473      * as a "(" in text that is left-to-right but as a ")" in text
10474      * that is right-to-left.
10475      *
10476      * @param   codePoint the character (Unicode code point) to be tested.
10477      * @return  {@code true} if the character is mirrored, {@code false}
10478      *          if the character is not mirrored or is not defined.
10479      * @since   1.5
10480      */
10481     public static boolean isMirrored(int codePoint) {
10482         return CharacterData.of(codePoint).isMirrored(codePoint);
10483     }
10484 
10485     /**
10486      * Compares two {@code Character} objects numerically.
10487      *
10488      * @param   anotherCharacter   the {@code Character} to be compared.
10489 
10490      * @return  the value {@code 0} if the argument {@code Character}
10491      *          is equal to this {@code Character}; a value less than
10492      *          {@code 0} if this {@code Character} is numerically less
10493      *          than the {@code Character} argument; and a value greater than
10494      *          {@code 0} if this {@code Character} is numerically greater
10495      *          than the {@code Character} argument (unsigned comparison).
10496      *          Note that this is strictly a numerical comparison; it is not
10497      *          locale-dependent.
10498      * @since   1.2
10499      */
10500     public int compareTo(Character anotherCharacter) {
10501         return compare(this.value, anotherCharacter.value);
10502     }
10503 
10504     /**
10505      * Compares two {@code char} values numerically.
10506      * The value returned is identical to what would be returned by:
10507      * <pre>
10508      *    Character.valueOf(x).compareTo(Character.valueOf(y))
10509      * </pre>
10510      *
10511      * @param  x the first {@code char} to compare
10512      * @param  y the second {@code char} to compare
10513      * @return the value {@code 0} if {@code x == y};
10514      *         a value less than {@code 0} if {@code x < y}; and
10515      *         a value greater than {@code 0} if {@code x > y}
10516      * @since 1.7
10517      */
10518     public static int compare(char x, char y) {
10519         return x - y;
10520     }
10521 
10522     /**
10523      * Converts the character (Unicode code point) argument to uppercase using
10524      * information from the UnicodeData file.
10525      *
10526      * @param   codePoint   the character (Unicode code point) to be converted.
10527      * @return  either the uppercase equivalent of the character, if
10528      *          any, or an error flag ({@code Character.ERROR})
10529      *          that indicates that a 1:M {@code char} mapping exists.
10530      * @see     Character#isLowerCase(char)
10531      * @see     Character#isUpperCase(char)
10532      * @see     Character#toLowerCase(char)
10533      * @see     Character#toTitleCase(char)
10534      * @since 1.4
10535      */
10536     static int toUpperCaseEx(int codePoint) {
10537         assert isValidCodePoint(codePoint);
10538         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
10539     }
10540 
10541     /**
10542      * Converts the character (Unicode code point) argument to uppercase using case
10543      * mapping information from the SpecialCasing file in the Unicode
10544      * specification. If a character has no explicit uppercase
10545      * mapping, then the {@code char} itself is returned in the
10546      * {@code char[]}.
10547      *
10548      * @param   codePoint   the character (Unicode code point) to be converted.
10549      * @return a {@code char[]} with the uppercased character.
10550      * @since 1.4
10551      */
10552     static char[] toUpperCaseCharArray(int codePoint) {
10553         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
10554         assert isBmpCodePoint(codePoint);
10555         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
10556     }
10557 
10558     /**
10559      * The number of bits used to represent a {@code char} value in unsigned
10560      * binary form, constant {@code 16}.
10561      *
10562      * @since 1.5
10563      */
10564     public static final int SIZE = 16;
10565 
10566     /**
10567      * The number of bytes used to represent a {@code char} value in unsigned
10568      * binary form.
10569      *
10570      * @since 1.8
10571      */
10572     public static final int BYTES = SIZE / Byte.SIZE;
10573 
10574     /**
10575      * Returns the value obtained by reversing the order of the bytes in the
10576      * specified {@code char} value.
10577      *
10578      * @param ch The {@code char} of which to reverse the byte order.
10579      * @return the value obtained by reversing (or, equivalently, swapping)
10580      *     the bytes in the specified {@code char} value.
10581      * @since 1.5
10582      */
10583     @HotSpotIntrinsicCandidate
10584     public static char reverseBytes(char ch) {
10585         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
10586     }
10587 
10588     /**
10589      * Returns the Unicode name of the specified character
10590      * {@code codePoint}, or null if the code point is
10591      * {@link #UNASSIGNED unassigned}.
10592      * <p>
10593      * Note: if the specified character is not assigned a name by
10594      * the <i>UnicodeData</i> file (part of the Unicode Character
10595      * Database maintained by the Unicode Consortium), the returned
10596      * name is the same as the result of expression.
10597      *
10598      * <blockquote>{@code
10599      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10600      *     + " "
10601      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10602      *
10603      * }</blockquote>
10604      *
10605      * @param  codePoint the character (Unicode code point)
10606      *
10607      * @return the Unicode name of the specified character, or null if
10608      *         the code point is unassigned.
10609      *
10610      * @exception IllegalArgumentException if the specified
10611      *            {@code codePoint} is not a valid Unicode
10612      *            code point.
10613      *
10614      * @since 1.7
10615      */
10616     public static String getName(int codePoint) {
10617         if (!isValidCodePoint(codePoint)) {
10618             throw new IllegalArgumentException();
10619         }
10620         String name = CharacterName.getInstance().getName(codePoint);
10621         if (name != null)
10622             return name;
10623         if (getType(codePoint) == UNASSIGNED)
10624             return null;
10625         UnicodeBlock block = UnicodeBlock.of(codePoint);
10626         if (block != null)
10627             return block.toString().replace('_', ' ') + " "
10628                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10629         // should never come here
10630         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10631     }
10632 
10633     /**
10634      * Returns the code point value of the Unicode character specified by
10635      * the given Unicode character name.
10636      * <p>
10637      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
10638      * file (part of the Unicode Character Database maintained by the Unicode
10639      * Consortium), its name is defined as the result of expression
10640      *
10641      * <blockquote>{@code
10642      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10643      *     + " "
10644      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10645      *
10646      * }</blockquote>
10647      * <p>
10648      * The {@code name} matching is case insensitive, with any leading and
10649      * trailing whitespace character removed.
10650      *
10651      * @param  name the Unicode character name
10652      *
10653      * @return the code point value of the character specified by its name.
10654      *
10655      * @throws IllegalArgumentException if the specified {@code name}
10656      *         is not a valid Unicode character name.
10657      * @throws NullPointerException if {@code name} is {@code null}
10658      *
10659      * @since 9
10660      */
10661     public static int codePointOf(String name) {
10662         name = name.trim().toUpperCase(Locale.ROOT);
10663         int cp = CharacterName.getInstance().getCodePoint(name);
10664         if (cp != -1)
10665             return cp;
10666         try {
10667             int off = name.lastIndexOf(' ');
10668             if (off != -1) {
10669                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
10670                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
10671                     return cp;
10672             }
10673         } catch (Exception x) {}
10674         throw new IllegalArgumentException("Unrecognized character name :" + name);
10675     }
10676 }