1 /*
   2  * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 10.0.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  * <p>
  59  * The code point, U+32FF, is reserved by the Unicode Consortium
  60  * to represent the Japanese square character for the new era that begins
  61  * May 2019. Relevant methods in the Character class return the same
  62  * properties as for the existing Japanese era characters (e.g., U+337E for
  63  * "Meizi"). For the details of the code point, refer to
  64  * <a href="http://blog.unicode.org/2018/09/new-japanese-era.html">
  65  * http://blog.unicode.org/2018/09/new-japanese-era.html</a>
  66  *
  67  * <h3><a id="unicode">Unicode Character Representations</a></h3>
  68  *
  69  * <p>The {@code char} data type (and therefore the value that a
  70  * {@code Character} object encapsulates) are based on the
  71  * original Unicode specification, which defined characters as
  72  * fixed-width 16-bit entities. The Unicode Standard has since been
  73  * changed to allow for characters whose representation requires more
  74  * than 16 bits.  The range of legal <em>code point</em>s is now
  75  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  76  * (Refer to the <a
  77  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  78  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  79  * Standard.)
  80  *
  81  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
  82  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  83  * <a id="supplementary">Characters</a> whose code points are greater
  84  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  85  * platform uses the UTF-16 representation in {@code char} arrays and
  86  * in the {@code String} and {@code StringBuffer} classes. In
  87  * this representation, supplementary characters are represented as a pair
  88  * of {@code char} values, the first from the <em>high-surrogates</em>
  89  * range, (\uD800-\uDBFF), the second from the
  90  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  91  *
  92  * <p>A {@code char} value, therefore, represents Basic
  93  * Multilingual Plane (BMP) code points, including the surrogate
  94  * code points, or code units of the UTF-16 encoding. An
  95  * {@code int} value represents all Unicode code points,
  96  * including supplementary code points. The lower (least significant)
  97  * 21 bits of {@code int} are used to represent Unicode code
  98  * points and the upper (most significant) 11 bits must be zero.
  99  * Unless otherwise specified, the behavior with respect to
 100  * supplementary characters and surrogate {@code char} values is
 101  * as follows:
 102  *
 103  * <ul>
 104  * <li>The methods that only accept a {@code char} value cannot support
 105  * supplementary characters. They treat {@code char} values from the
 106  * surrogate ranges as undefined characters. For example,
 107  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 108  * this specific value if followed by any low-surrogate value in a string
 109  * would represent a letter.
 110  *
 111  * <li>The methods that accept an {@code int} value support all
 112  * Unicode characters, including supplementary characters. For
 113  * example, {@code Character.isLetter(0x2F81A)} returns
 114  * {@code true} because the code point value represents a letter
 115  * (a CJK ideograph).
 116  * </ul>
 117  *
 118  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 119  * used for character values in the range between U+0000 and U+10FFFF,
 120  * and <em>Unicode code unit</em> is used for 16-bit
 121  * {@code char} values that are code units of the <em>UTF-16</em>
 122  * encoding. For more information on Unicode terminology, refer to the
 123  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 124  *
 125  * @author  Lee Boynton
 126  * @author  Guy Steele
 127  * @author  Akira Tanaka
 128  * @author  Martin Buchholz
 129  * @author  Ulf Zibis
 130  * @since   1.0
 131  */
 132 public final
 133 class Character implements java.io.Serializable, Comparable<Character> {
 134     /**
 135      * The minimum radix available for conversion to and from strings.
 136      * The constant value of this field is the smallest value permitted
 137      * for the radix argument in radix-conversion methods such as the
 138      * {@code digit} method, the {@code forDigit} method, and the
 139      * {@code toString} method of class {@code Integer}.
 140      *
 141      * @see     Character#digit(char, int)
 142      * @see     Character#forDigit(int, int)
 143      * @see     Integer#toString(int, int)
 144      * @see     Integer#valueOf(String)
 145      */
 146     public static final int MIN_RADIX = 2;
 147 
 148     /**
 149      * The maximum radix available for conversion to and from strings.
 150      * The constant value of this field is the largest value permitted
 151      * for the radix argument in radix-conversion methods such as the
 152      * {@code digit} method, the {@code forDigit} method, and the
 153      * {@code toString} method of class {@code Integer}.
 154      *
 155      * @see     Character#digit(char, int)
 156      * @see     Character#forDigit(int, int)
 157      * @see     Integer#toString(int, int)
 158      * @see     Integer#valueOf(String)
 159      */
 160     public static final int MAX_RADIX = 36;
 161 
 162     /**
 163      * The constant value of this field is the smallest value of type
 164      * {@code char}, {@code '\u005Cu0000'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MIN_VALUE = '\u0000';
 169 
 170     /**
 171      * The constant value of this field is the largest value of type
 172      * {@code char}, {@code '\u005CuFFFF'}.
 173      *
 174      * @since   1.0.2
 175      */
 176     public static final char MAX_VALUE = '\uFFFF';
 177 
 178     /**
 179      * The {@code Class} instance representing the primitive type
 180      * {@code char}.
 181      *
 182      * @since   1.1
 183      */
 184     @SuppressWarnings("unchecked")
 185     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 186 
 187     /*
 188      * Normative general types
 189      */
 190 
 191     /*
 192      * General character types
 193      */
 194 
 195     /**
 196      * General category "Cn" in the Unicode specification.
 197      * @since   1.1
 198      */
 199     public static final byte UNASSIGNED = 0;
 200 
 201     /**
 202      * General category "Lu" in the Unicode specification.
 203      * @since   1.1
 204      */
 205     public static final byte UPPERCASE_LETTER = 1;
 206 
 207     /**
 208      * General category "Ll" in the Unicode specification.
 209      * @since   1.1
 210      */
 211     public static final byte LOWERCASE_LETTER = 2;
 212 
 213     /**
 214      * General category "Lt" in the Unicode specification.
 215      * @since   1.1
 216      */
 217     public static final byte TITLECASE_LETTER = 3;
 218 
 219     /**
 220      * General category "Lm" in the Unicode specification.
 221      * @since   1.1
 222      */
 223     public static final byte MODIFIER_LETTER = 4;
 224 
 225     /**
 226      * General category "Lo" in the Unicode specification.
 227      * @since   1.1
 228      */
 229     public static final byte OTHER_LETTER = 5;
 230 
 231     /**
 232      * General category "Mn" in the Unicode specification.
 233      * @since   1.1
 234      */
 235     public static final byte NON_SPACING_MARK = 6;
 236 
 237     /**
 238      * General category "Me" in the Unicode specification.
 239      * @since   1.1
 240      */
 241     public static final byte ENCLOSING_MARK = 7;
 242 
 243     /**
 244      * General category "Mc" in the Unicode specification.
 245      * @since   1.1
 246      */
 247     public static final byte COMBINING_SPACING_MARK = 8;
 248 
 249     /**
 250      * General category "Nd" in the Unicode specification.
 251      * @since   1.1
 252      */
 253     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 254 
 255     /**
 256      * General category "Nl" in the Unicode specification.
 257      * @since   1.1
 258      */
 259     public static final byte LETTER_NUMBER = 10;
 260 
 261     /**
 262      * General category "No" in the Unicode specification.
 263      * @since   1.1
 264      */
 265     public static final byte OTHER_NUMBER = 11;
 266 
 267     /**
 268      * General category "Zs" in the Unicode specification.
 269      * @since   1.1
 270      */
 271     public static final byte SPACE_SEPARATOR = 12;
 272 
 273     /**
 274      * General category "Zl" in the Unicode specification.
 275      * @since   1.1
 276      */
 277     public static final byte LINE_SEPARATOR = 13;
 278 
 279     /**
 280      * General category "Zp" in the Unicode specification.
 281      * @since   1.1
 282      */
 283     public static final byte PARAGRAPH_SEPARATOR = 14;
 284 
 285     /**
 286      * General category "Cc" in the Unicode specification.
 287      * @since   1.1
 288      */
 289     public static final byte CONTROL = 15;
 290 
 291     /**
 292      * General category "Cf" in the Unicode specification.
 293      * @since   1.1
 294      */
 295     public static final byte FORMAT = 16;
 296 
 297     /**
 298      * General category "Co" in the Unicode specification.
 299      * @since   1.1
 300      */
 301     public static final byte PRIVATE_USE = 18;
 302 
 303     /**
 304      * General category "Cs" in the Unicode specification.
 305      * @since   1.1
 306      */
 307     public static final byte SURROGATE = 19;
 308 
 309     /**
 310      * General category "Pd" in the Unicode specification.
 311      * @since   1.1
 312      */
 313     public static final byte DASH_PUNCTUATION = 20;
 314 
 315     /**
 316      * General category "Ps" in the Unicode specification.
 317      * @since   1.1
 318      */
 319     public static final byte START_PUNCTUATION = 21;
 320 
 321     /**
 322      * General category "Pe" in the Unicode specification.
 323      * @since   1.1
 324      */
 325     public static final byte END_PUNCTUATION = 22;
 326 
 327     /**
 328      * General category "Pc" in the Unicode specification.
 329      * @since   1.1
 330      */
 331     public static final byte CONNECTOR_PUNCTUATION = 23;
 332 
 333     /**
 334      * General category "Po" in the Unicode specification.
 335      * @since   1.1
 336      */
 337     public static final byte OTHER_PUNCTUATION = 24;
 338 
 339     /**
 340      * General category "Sm" in the Unicode specification.
 341      * @since   1.1
 342      */
 343     public static final byte MATH_SYMBOL = 25;
 344 
 345     /**
 346      * General category "Sc" in the Unicode specification.
 347      * @since   1.1
 348      */
 349     public static final byte CURRENCY_SYMBOL = 26;
 350 
 351     /**
 352      * General category "Sk" in the Unicode specification.
 353      * @since   1.1
 354      */
 355     public static final byte MODIFIER_SYMBOL = 27;
 356 
 357     /**
 358      * General category "So" in the Unicode specification.
 359      * @since   1.1
 360      */
 361     public static final byte OTHER_SYMBOL = 28;
 362 
 363     /**
 364      * General category "Pi" in the Unicode specification.
 365      * @since   1.4
 366      */
 367     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 368 
 369     /**
 370      * General category "Pf" in the Unicode specification.
 371      * @since   1.4
 372      */
 373     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 374 
 375     /**
 376      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 377      */
 378     static final int ERROR = 0xFFFFFFFF;
 379 
 380 
 381     /**
 382      * Undefined bidirectional character type. Undefined {@code char}
 383      * values have undefined directionality in the Unicode specification.
 384      * @since 1.4
 385      */
 386     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 387 
 388     /**
 389      * Strong bidirectional character type "L" in the Unicode specification.
 390      * @since 1.4
 391      */
 392     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 393 
 394     /**
 395      * Strong bidirectional character type "R" in the Unicode specification.
 396      * @since 1.4
 397      */
 398     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 399 
 400     /**
 401     * Strong bidirectional character type "AL" in the Unicode specification.
 402      * @since 1.4
 403      */
 404     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 405 
 406     /**
 407      * Weak bidirectional character type "EN" in the Unicode specification.
 408      * @since 1.4
 409      */
 410     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 411 
 412     /**
 413      * Weak bidirectional character type "ES" in the Unicode specification.
 414      * @since 1.4
 415      */
 416     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 417 
 418     /**
 419      * Weak bidirectional character type "ET" in the Unicode specification.
 420      * @since 1.4
 421      */
 422     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 423 
 424     /**
 425      * Weak bidirectional character type "AN" in the Unicode specification.
 426      * @since 1.4
 427      */
 428     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 429 
 430     /**
 431      * Weak bidirectional character type "CS" in the Unicode specification.
 432      * @since 1.4
 433      */
 434     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 435 
 436     /**
 437      * Weak bidirectional character type "NSM" in the Unicode specification.
 438      * @since 1.4
 439      */
 440     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 441 
 442     /**
 443      * Weak bidirectional character type "BN" in the Unicode specification.
 444      * @since 1.4
 445      */
 446     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 447 
 448     /**
 449      * Neutral bidirectional character type "B" in the Unicode specification.
 450      * @since 1.4
 451      */
 452     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 453 
 454     /**
 455      * Neutral bidirectional character type "S" in the Unicode specification.
 456      * @since 1.4
 457      */
 458     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 459 
 460     /**
 461      * Neutral bidirectional character type "WS" in the Unicode specification.
 462      * @since 1.4
 463      */
 464     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 465 
 466     /**
 467      * Neutral bidirectional character type "ON" in the Unicode specification.
 468      * @since 1.4
 469      */
 470     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 471 
 472     /**
 473      * Strong bidirectional character type "LRE" in the Unicode specification.
 474      * @since 1.4
 475      */
 476     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 477 
 478     /**
 479      * Strong bidirectional character type "LRO" in the Unicode specification.
 480      * @since 1.4
 481      */
 482     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 483 
 484     /**
 485      * Strong bidirectional character type "RLE" in the Unicode specification.
 486      * @since 1.4
 487      */
 488     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 489 
 490     /**
 491      * Strong bidirectional character type "RLO" in the Unicode specification.
 492      * @since 1.4
 493      */
 494     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 495 
 496     /**
 497      * Weak bidirectional character type "PDF" in the Unicode specification.
 498      * @since 1.4
 499      */
 500     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 501 
 502     /**
 503      * Weak bidirectional character type "LRI" in the Unicode specification.
 504      * @since 9
 505      */
 506     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
 507 
 508     /**
 509      * Weak bidirectional character type "RLI" in the Unicode specification.
 510      * @since 9
 511      */
 512     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
 513 
 514     /**
 515      * Weak bidirectional character type "FSI" in the Unicode specification.
 516      * @since 9
 517      */
 518     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
 519 
 520     /**
 521      * Weak bidirectional character type "PDI" in the Unicode specification.
 522      * @since 9
 523      */
 524     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
 525 
 526     /**
 527      * The minimum value of a
 528      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 529      * Unicode high-surrogate code unit</a>
 530      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 531      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 532      *
 533      * @since 1.5
 534      */
 535     public static final char MIN_HIGH_SURROGATE = '\uD800';
 536 
 537     /**
 538      * The maximum value of a
 539      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 540      * Unicode high-surrogate code unit</a>
 541      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 542      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 543      *
 544      * @since 1.5
 545      */
 546     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 547 
 548     /**
 549      * The minimum value of a
 550      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 551      * Unicode low-surrogate code unit</a>
 552      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 553      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 554      *
 555      * @since 1.5
 556      */
 557     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 558 
 559     /**
 560      * The maximum value of a
 561      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 562      * Unicode low-surrogate code unit</a>
 563      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 564      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 565      *
 566      * @since 1.5
 567      */
 568     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 569 
 570     /**
 571      * The minimum value of a Unicode surrogate code unit in the
 572      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 573      *
 574      * @since 1.5
 575      */
 576     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 577 
 578     /**
 579      * The maximum value of a Unicode surrogate code unit in the
 580      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 581      *
 582      * @since 1.5
 583      */
 584     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 585 
 586     /**
 587      * The minimum value of a
 588      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 589      * Unicode supplementary code point</a>, constant {@code U+10000}.
 590      *
 591      * @since 1.5
 592      */
 593     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 594 
 595     /**
 596      * The minimum value of a
 597      * <a href="http://www.unicode.org/glossary/#code_point">
 598      * Unicode code point</a>, constant {@code U+0000}.
 599      *
 600      * @since 1.5
 601      */
 602     public static final int MIN_CODE_POINT = 0x000000;
 603 
 604     /**
 605      * The maximum value of a
 606      * <a href="http://www.unicode.org/glossary/#code_point">
 607      * Unicode code point</a>, constant {@code U+10FFFF}.
 608      *
 609      * @since 1.5
 610      */
 611     public static final int MAX_CODE_POINT = 0X10FFFF;
 612 
 613 
 614     /**
 615      * Instances of this class represent particular subsets of the Unicode
 616      * character set.  The only family of subsets defined in the
 617      * {@code Character} class is {@link Character.UnicodeBlock}.
 618      * Other portions of the Java API may define other subsets for their
 619      * own purposes.
 620      *
 621      * @since 1.2
 622      */
 623     public static class Subset  {
 624 
 625         private String name;
 626 
 627         /**
 628          * Constructs a new {@code Subset} instance.
 629          *
 630          * @param  name  The name of this subset
 631          * @throws NullPointerException if name is {@code null}
 632          */
 633         protected Subset(String name) {
 634             if (name == null) {
 635                 throw new NullPointerException("name");
 636             }
 637             this.name = name;
 638         }
 639 
 640         /**
 641          * Compares two {@code Subset} objects for equality.
 642          * This method returns {@code true} if and only if
 643          * {@code this} and the argument refer to the same
 644          * object; since this method is {@code final}, this
 645          * guarantee holds for all subclasses.
 646          */
 647         public final boolean equals(Object obj) {
 648             return (this == obj);
 649         }
 650 
 651         /**
 652          * Returns the standard hash code as defined by the
 653          * {@link Object#hashCode} method.  This method
 654          * is {@code final} in order to ensure that the
 655          * {@code equals} and {@code hashCode} methods will
 656          * be consistent in all subclasses.
 657          */
 658         public final int hashCode() {
 659             return super.hashCode();
 660         }
 661 
 662         /**
 663          * Returns the name of this subset.
 664          */
 665         public final String toString() {
 666             return name;
 667         }
 668     }
 669 
 670     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 671     // for the latest specification of Unicode Blocks.
 672 
 673     /**
 674      * A family of character subsets representing the character blocks in the
 675      * Unicode specification. Character blocks generally define characters
 676      * used for a specific script or purpose. A character is contained by
 677      * at most one Unicode block.
 678      *
 679      * @since 1.2
 680      */
 681     public static final class UnicodeBlock extends Subset {
 682         /**
 683          * 638  - the expected number of entities
 684          * 0.75 - the default load factor of HashMap
 685          */
 686         private static Map<String, UnicodeBlock> map =
 687                 new HashMap<>((int)(638 / 0.75f + 1.0f));
 688 
 689         /**
 690          * Creates a UnicodeBlock with the given identifier name.
 691          * This name must be the same as the block identifier.
 692          */
 693         private UnicodeBlock(String idName) {
 694             super(idName);
 695             map.put(idName, this);
 696         }
 697 
 698         /**
 699          * Creates a UnicodeBlock with the given identifier name and
 700          * alias name.
 701          */
 702         private UnicodeBlock(String idName, String alias) {
 703             this(idName);
 704             map.put(alias, this);
 705         }
 706 
 707         /**
 708          * Creates a UnicodeBlock with the given identifier name and
 709          * alias names.
 710          */
 711         private UnicodeBlock(String idName, String... aliases) {
 712             this(idName);
 713             for (String alias : aliases)
 714                 map.put(alias, this);
 715         }
 716 
 717         /**
 718          * Constant for the "Basic Latin" Unicode character block.
 719          * @since 1.2
 720          */
 721         public static final UnicodeBlock  BASIC_LATIN =
 722             new UnicodeBlock("BASIC_LATIN",
 723                              "BASIC LATIN",
 724                              "BASICLATIN");
 725 
 726         /**
 727          * Constant for the "Latin-1 Supplement" Unicode character block.
 728          * @since 1.2
 729          */
 730         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 731             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 732                              "LATIN-1 SUPPLEMENT",
 733                              "LATIN-1SUPPLEMENT");
 734 
 735         /**
 736          * Constant for the "Latin Extended-A" Unicode character block.
 737          * @since 1.2
 738          */
 739         public static final UnicodeBlock LATIN_EXTENDED_A =
 740             new UnicodeBlock("LATIN_EXTENDED_A",
 741                              "LATIN EXTENDED-A",
 742                              "LATINEXTENDED-A");
 743 
 744         /**
 745          * Constant for the "Latin Extended-B" Unicode character block.
 746          * @since 1.2
 747          */
 748         public static final UnicodeBlock LATIN_EXTENDED_B =
 749             new UnicodeBlock("LATIN_EXTENDED_B",
 750                              "LATIN EXTENDED-B",
 751                              "LATINEXTENDED-B");
 752 
 753         /**
 754          * Constant for the "IPA Extensions" Unicode character block.
 755          * @since 1.2
 756          */
 757         public static final UnicodeBlock IPA_EXTENSIONS =
 758             new UnicodeBlock("IPA_EXTENSIONS",
 759                              "IPA EXTENSIONS",
 760                              "IPAEXTENSIONS");
 761 
 762         /**
 763          * Constant for the "Spacing Modifier Letters" Unicode character block.
 764          * @since 1.2
 765          */
 766         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 767             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 768                              "SPACING MODIFIER LETTERS",
 769                              "SPACINGMODIFIERLETTERS");
 770 
 771         /**
 772          * Constant for the "Combining Diacritical Marks" Unicode character block.
 773          * @since 1.2
 774          */
 775         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 776             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 777                              "COMBINING DIACRITICAL MARKS",
 778                              "COMBININGDIACRITICALMARKS");
 779 
 780         /**
 781          * Constant for the "Greek and Coptic" Unicode character block.
 782          * <p>
 783          * This block was previously known as the "Greek" block.
 784          *
 785          * @since 1.2
 786          */
 787         public static final UnicodeBlock GREEK =
 788             new UnicodeBlock("GREEK",
 789                              "GREEK AND COPTIC",
 790                              "GREEKANDCOPTIC");
 791 
 792         /**
 793          * Constant for the "Cyrillic" Unicode character block.
 794          * @since 1.2
 795          */
 796         public static final UnicodeBlock CYRILLIC =
 797             new UnicodeBlock("CYRILLIC");
 798 
 799         /**
 800          * Constant for the "Armenian" Unicode character block.
 801          * @since 1.2
 802          */
 803         public static final UnicodeBlock ARMENIAN =
 804             new UnicodeBlock("ARMENIAN");
 805 
 806         /**
 807          * Constant for the "Hebrew" Unicode character block.
 808          * @since 1.2
 809          */
 810         public static final UnicodeBlock HEBREW =
 811             new UnicodeBlock("HEBREW");
 812 
 813         /**
 814          * Constant for the "Arabic" Unicode character block.
 815          * @since 1.2
 816          */
 817         public static final UnicodeBlock ARABIC =
 818             new UnicodeBlock("ARABIC");
 819 
 820         /**
 821          * Constant for the "Devanagari" Unicode character block.
 822          * @since 1.2
 823          */
 824         public static final UnicodeBlock DEVANAGARI =
 825             new UnicodeBlock("DEVANAGARI");
 826 
 827         /**
 828          * Constant for the "Bengali" Unicode character block.
 829          * @since 1.2
 830          */
 831         public static final UnicodeBlock BENGALI =
 832             new UnicodeBlock("BENGALI");
 833 
 834         /**
 835          * Constant for the "Gurmukhi" Unicode character block.
 836          * @since 1.2
 837          */
 838         public static final UnicodeBlock GURMUKHI =
 839             new UnicodeBlock("GURMUKHI");
 840 
 841         /**
 842          * Constant for the "Gujarati" Unicode character block.
 843          * @since 1.2
 844          */
 845         public static final UnicodeBlock GUJARATI =
 846             new UnicodeBlock("GUJARATI");
 847 
 848         /**
 849          * Constant for the "Oriya" Unicode character block.
 850          * @since 1.2
 851          */
 852         public static final UnicodeBlock ORIYA =
 853             new UnicodeBlock("ORIYA");
 854 
 855         /**
 856          * Constant for the "Tamil" Unicode character block.
 857          * @since 1.2
 858          */
 859         public static final UnicodeBlock TAMIL =
 860             new UnicodeBlock("TAMIL");
 861 
 862         /**
 863          * Constant for the "Telugu" Unicode character block.
 864          * @since 1.2
 865          */
 866         public static final UnicodeBlock TELUGU =
 867             new UnicodeBlock("TELUGU");
 868 
 869         /**
 870          * Constant for the "Kannada" Unicode character block.
 871          * @since 1.2
 872          */
 873         public static final UnicodeBlock KANNADA =
 874             new UnicodeBlock("KANNADA");
 875 
 876         /**
 877          * Constant for the "Malayalam" Unicode character block.
 878          * @since 1.2
 879          */
 880         public static final UnicodeBlock MALAYALAM =
 881             new UnicodeBlock("MALAYALAM");
 882 
 883         /**
 884          * Constant for the "Thai" Unicode character block.
 885          * @since 1.2
 886          */
 887         public static final UnicodeBlock THAI =
 888             new UnicodeBlock("THAI");
 889 
 890         /**
 891          * Constant for the "Lao" Unicode character block.
 892          * @since 1.2
 893          */
 894         public static final UnicodeBlock LAO =
 895             new UnicodeBlock("LAO");
 896 
 897         /**
 898          * Constant for the "Tibetan" Unicode character block.
 899          * @since 1.2
 900          */
 901         public static final UnicodeBlock TIBETAN =
 902             new UnicodeBlock("TIBETAN");
 903 
 904         /**
 905          * Constant for the "Georgian" Unicode character block.
 906          * @since 1.2
 907          */
 908         public static final UnicodeBlock GEORGIAN =
 909             new UnicodeBlock("GEORGIAN");
 910 
 911         /**
 912          * Constant for the "Hangul Jamo" Unicode character block.
 913          * @since 1.2
 914          */
 915         public static final UnicodeBlock HANGUL_JAMO =
 916             new UnicodeBlock("HANGUL_JAMO",
 917                              "HANGUL JAMO",
 918                              "HANGULJAMO");
 919 
 920         /**
 921          * Constant for the "Latin Extended Additional" Unicode character block.
 922          * @since 1.2
 923          */
 924         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 925             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 926                              "LATIN EXTENDED ADDITIONAL",
 927                              "LATINEXTENDEDADDITIONAL");
 928 
 929         /**
 930          * Constant for the "Greek Extended" Unicode character block.
 931          * @since 1.2
 932          */
 933         public static final UnicodeBlock GREEK_EXTENDED =
 934             new UnicodeBlock("GREEK_EXTENDED",
 935                              "GREEK EXTENDED",
 936                              "GREEKEXTENDED");
 937 
 938         /**
 939          * Constant for the "General Punctuation" Unicode character block.
 940          * @since 1.2
 941          */
 942         public static final UnicodeBlock GENERAL_PUNCTUATION =
 943             new UnicodeBlock("GENERAL_PUNCTUATION",
 944                              "GENERAL PUNCTUATION",
 945                              "GENERALPUNCTUATION");
 946 
 947         /**
 948          * Constant for the "Superscripts and Subscripts" Unicode character
 949          * block.
 950          * @since 1.2
 951          */
 952         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 953             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 954                              "SUPERSCRIPTS AND SUBSCRIPTS",
 955                              "SUPERSCRIPTSANDSUBSCRIPTS");
 956 
 957         /**
 958          * Constant for the "Currency Symbols" Unicode character block.
 959          * @since 1.2
 960          */
 961         public static final UnicodeBlock CURRENCY_SYMBOLS =
 962             new UnicodeBlock("CURRENCY_SYMBOLS",
 963                              "CURRENCY SYMBOLS",
 964                              "CURRENCYSYMBOLS");
 965 
 966         /**
 967          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 968          * character block.
 969          * <p>
 970          * This block was previously known as "Combining Marks for Symbols".
 971          * @since 1.2
 972          */
 973         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 974             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 975                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 976                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 977                              "COMBINING MARKS FOR SYMBOLS",
 978                              "COMBININGMARKSFORSYMBOLS");
 979 
 980         /**
 981          * Constant for the "Letterlike Symbols" Unicode character block.
 982          * @since 1.2
 983          */
 984         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 985             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 986                              "LETTERLIKE SYMBOLS",
 987                              "LETTERLIKESYMBOLS");
 988 
 989         /**
 990          * Constant for the "Number Forms" Unicode character block.
 991          * @since 1.2
 992          */
 993         public static final UnicodeBlock NUMBER_FORMS =
 994             new UnicodeBlock("NUMBER_FORMS",
 995                              "NUMBER FORMS",
 996                              "NUMBERFORMS");
 997 
 998         /**
 999          * Constant for the "Arrows" Unicode character block.
1000          * @since 1.2
1001          */
1002         public static final UnicodeBlock ARROWS =
1003             new UnicodeBlock("ARROWS");
1004 
1005         /**
1006          * Constant for the "Mathematical Operators" Unicode character block.
1007          * @since 1.2
1008          */
1009         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1010             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1011                              "MATHEMATICAL OPERATORS",
1012                              "MATHEMATICALOPERATORS");
1013 
1014         /**
1015          * Constant for the "Miscellaneous Technical" Unicode character block.
1016          * @since 1.2
1017          */
1018         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1019             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1020                              "MISCELLANEOUS TECHNICAL",
1021                              "MISCELLANEOUSTECHNICAL");
1022 
1023         /**
1024          * Constant for the "Control Pictures" Unicode character block.
1025          * @since 1.2
1026          */
1027         public static final UnicodeBlock CONTROL_PICTURES =
1028             new UnicodeBlock("CONTROL_PICTURES",
1029                              "CONTROL PICTURES",
1030                              "CONTROLPICTURES");
1031 
1032         /**
1033          * Constant for the "Optical Character Recognition" Unicode character block.
1034          * @since 1.2
1035          */
1036         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1037             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1038                              "OPTICAL CHARACTER RECOGNITION",
1039                              "OPTICALCHARACTERRECOGNITION");
1040 
1041         /**
1042          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1043          * @since 1.2
1044          */
1045         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1046             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1047                              "ENCLOSED ALPHANUMERICS",
1048                              "ENCLOSEDALPHANUMERICS");
1049 
1050         /**
1051          * Constant for the "Box Drawing" Unicode character block.
1052          * @since 1.2
1053          */
1054         public static final UnicodeBlock BOX_DRAWING =
1055             new UnicodeBlock("BOX_DRAWING",
1056                              "BOX DRAWING",
1057                              "BOXDRAWING");
1058 
1059         /**
1060          * Constant for the "Block Elements" Unicode character block.
1061          * @since 1.2
1062          */
1063         public static final UnicodeBlock BLOCK_ELEMENTS =
1064             new UnicodeBlock("BLOCK_ELEMENTS",
1065                              "BLOCK ELEMENTS",
1066                              "BLOCKELEMENTS");
1067 
1068         /**
1069          * Constant for the "Geometric Shapes" Unicode character block.
1070          * @since 1.2
1071          */
1072         public static final UnicodeBlock GEOMETRIC_SHAPES =
1073             new UnicodeBlock("GEOMETRIC_SHAPES",
1074                              "GEOMETRIC SHAPES",
1075                              "GEOMETRICSHAPES");
1076 
1077         /**
1078          * Constant for the "Miscellaneous Symbols" Unicode character block.
1079          * @since 1.2
1080          */
1081         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1082             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1083                              "MISCELLANEOUS SYMBOLS",
1084                              "MISCELLANEOUSSYMBOLS");
1085 
1086         /**
1087          * Constant for the "Dingbats" Unicode character block.
1088          * @since 1.2
1089          */
1090         public static final UnicodeBlock DINGBATS =
1091             new UnicodeBlock("DINGBATS");
1092 
1093         /**
1094          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1095          * @since 1.2
1096          */
1097         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1098             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1099                              "CJK SYMBOLS AND PUNCTUATION",
1100                              "CJKSYMBOLSANDPUNCTUATION");
1101 
1102         /**
1103          * Constant for the "Hiragana" Unicode character block.
1104          * @since 1.2
1105          */
1106         public static final UnicodeBlock HIRAGANA =
1107             new UnicodeBlock("HIRAGANA");
1108 
1109         /**
1110          * Constant for the "Katakana" Unicode character block.
1111          * @since 1.2
1112          */
1113         public static final UnicodeBlock KATAKANA =
1114             new UnicodeBlock("KATAKANA");
1115 
1116         /**
1117          * Constant for the "Bopomofo" Unicode character block.
1118          * @since 1.2
1119          */
1120         public static final UnicodeBlock BOPOMOFO =
1121             new UnicodeBlock("BOPOMOFO");
1122 
1123         /**
1124          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1125          * @since 1.2
1126          */
1127         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1128             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1129                              "HANGUL COMPATIBILITY JAMO",
1130                              "HANGULCOMPATIBILITYJAMO");
1131 
1132         /**
1133          * Constant for the "Kanbun" Unicode character block.
1134          * @since 1.2
1135          */
1136         public static final UnicodeBlock KANBUN =
1137             new UnicodeBlock("KANBUN");
1138 
1139         /**
1140          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1141          * @since 1.2
1142          */
1143         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1144             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1145                              "ENCLOSED CJK LETTERS AND MONTHS",
1146                              "ENCLOSEDCJKLETTERSANDMONTHS");
1147 
1148         /**
1149          * Constant for the "CJK Compatibility" Unicode character block.
1150          * @since 1.2
1151          */
1152         public static final UnicodeBlock CJK_COMPATIBILITY =
1153             new UnicodeBlock("CJK_COMPATIBILITY",
1154                              "CJK COMPATIBILITY",
1155                              "CJKCOMPATIBILITY");
1156 
1157         /**
1158          * Constant for the "CJK Unified Ideographs" Unicode character block.
1159          * @since 1.2
1160          */
1161         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1162             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1163                              "CJK UNIFIED IDEOGRAPHS",
1164                              "CJKUNIFIEDIDEOGRAPHS");
1165 
1166         /**
1167          * Constant for the "Hangul Syllables" Unicode character block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock HANGUL_SYLLABLES =
1171             new UnicodeBlock("HANGUL_SYLLABLES",
1172                              "HANGUL SYLLABLES",
1173                              "HANGULSYLLABLES");
1174 
1175         /**
1176          * Constant for the "Private Use Area" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock PRIVATE_USE_AREA =
1180             new UnicodeBlock("PRIVATE_USE_AREA",
1181                              "PRIVATE USE AREA",
1182                              "PRIVATEUSEAREA");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Ideographs" Unicode character
1186          * block.
1187          * @since 1.2
1188          */
1189         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1190             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1191                              "CJK COMPATIBILITY IDEOGRAPHS",
1192                              "CJKCOMPATIBILITYIDEOGRAPHS");
1193 
1194         /**
1195          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1196          * @since 1.2
1197          */
1198         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1199             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1200                              "ALPHABETIC PRESENTATION FORMS",
1201                              "ALPHABETICPRESENTATIONFORMS");
1202 
1203         /**
1204          * Constant for the "Arabic Presentation Forms-A" Unicode character
1205          * block.
1206          * @since 1.2
1207          */
1208         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1209             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1210                              "ARABIC PRESENTATION FORMS-A",
1211                              "ARABICPRESENTATIONFORMS-A");
1212 
1213         /**
1214          * Constant for the "Combining Half Marks" Unicode character block.
1215          * @since 1.2
1216          */
1217         public static final UnicodeBlock COMBINING_HALF_MARKS =
1218             new UnicodeBlock("COMBINING_HALF_MARKS",
1219                              "COMBINING HALF MARKS",
1220                              "COMBININGHALFMARKS");
1221 
1222         /**
1223          * Constant for the "CJK Compatibility Forms" Unicode character block.
1224          * @since 1.2
1225          */
1226         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1227             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1228                              "CJK COMPATIBILITY FORMS",
1229                              "CJKCOMPATIBILITYFORMS");
1230 
1231         /**
1232          * Constant for the "Small Form Variants" Unicode character block.
1233          * @since 1.2
1234          */
1235         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1236             new UnicodeBlock("SMALL_FORM_VARIANTS",
1237                              "SMALL FORM VARIANTS",
1238                              "SMALLFORMVARIANTS");
1239 
1240         /**
1241          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1242          * @since 1.2
1243          */
1244         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1245             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1246                              "ARABIC PRESENTATION FORMS-B",
1247                              "ARABICPRESENTATIONFORMS-B");
1248 
1249         /**
1250          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1251          * block.
1252          * @since 1.2
1253          */
1254         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1255             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1256                              "HALFWIDTH AND FULLWIDTH FORMS",
1257                              "HALFWIDTHANDFULLWIDTHFORMS");
1258 
1259         /**
1260          * Constant for the "Specials" Unicode character block.
1261          * @since 1.2
1262          */
1263         public static final UnicodeBlock SPECIALS =
1264             new UnicodeBlock("SPECIALS");
1265 
1266         /**
1267          * @deprecated
1268          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1269          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1270          * These constants match the block definitions of the Unicode Standard.
1271          * The {@link #of(char)} and {@link #of(int)} methods return the
1272          * standard constants.
1273          */
1274         @Deprecated(since="1.5")
1275         public static final UnicodeBlock SURROGATES_AREA =
1276             new UnicodeBlock("SURROGATES_AREA");
1277 
1278         /**
1279          * Constant for the "Syriac" Unicode character block.
1280          * @since 1.4
1281          */
1282         public static final UnicodeBlock SYRIAC =
1283             new UnicodeBlock("SYRIAC");
1284 
1285         /**
1286          * Constant for the "Thaana" Unicode character block.
1287          * @since 1.4
1288          */
1289         public static final UnicodeBlock THAANA =
1290             new UnicodeBlock("THAANA");
1291 
1292         /**
1293          * Constant for the "Sinhala" Unicode character block.
1294          * @since 1.4
1295          */
1296         public static final UnicodeBlock SINHALA =
1297             new UnicodeBlock("SINHALA");
1298 
1299         /**
1300          * Constant for the "Myanmar" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock MYANMAR =
1304             new UnicodeBlock("MYANMAR");
1305 
1306         /**
1307          * Constant for the "Ethiopic" Unicode character block.
1308          * @since 1.4
1309          */
1310         public static final UnicodeBlock ETHIOPIC =
1311             new UnicodeBlock("ETHIOPIC");
1312 
1313         /**
1314          * Constant for the "Cherokee" Unicode character block.
1315          * @since 1.4
1316          */
1317         public static final UnicodeBlock CHEROKEE =
1318             new UnicodeBlock("CHEROKEE");
1319 
1320         /**
1321          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1322          * @since 1.4
1323          */
1324         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1325             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1326                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1327                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1328 
1329         /**
1330          * Constant for the "Ogham" Unicode character block.
1331          * @since 1.4
1332          */
1333         public static final UnicodeBlock OGHAM =
1334             new UnicodeBlock("OGHAM");
1335 
1336         /**
1337          * Constant for the "Runic" Unicode character block.
1338          * @since 1.4
1339          */
1340         public static final UnicodeBlock RUNIC =
1341             new UnicodeBlock("RUNIC");
1342 
1343         /**
1344          * Constant for the "Khmer" Unicode character block.
1345          * @since 1.4
1346          */
1347         public static final UnicodeBlock KHMER =
1348             new UnicodeBlock("KHMER");
1349 
1350         /**
1351          * Constant for the "Mongolian" Unicode character block.
1352          * @since 1.4
1353          */
1354         public static final UnicodeBlock MONGOLIAN =
1355             new UnicodeBlock("MONGOLIAN");
1356 
1357         /**
1358          * Constant for the "Braille Patterns" Unicode character block.
1359          * @since 1.4
1360          */
1361         public static final UnicodeBlock BRAILLE_PATTERNS =
1362             new UnicodeBlock("BRAILLE_PATTERNS",
1363                              "BRAILLE PATTERNS",
1364                              "BRAILLEPATTERNS");
1365 
1366         /**
1367          * Constant for the "CJK Radicals Supplement" Unicode character block.
1368          * @since 1.4
1369          */
1370         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1371             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1372                              "CJK RADICALS SUPPLEMENT",
1373                              "CJKRADICALSSUPPLEMENT");
1374 
1375         /**
1376          * Constant for the "Kangxi Radicals" Unicode character block.
1377          * @since 1.4
1378          */
1379         public static final UnicodeBlock KANGXI_RADICALS =
1380             new UnicodeBlock("KANGXI_RADICALS",
1381                              "KANGXI RADICALS",
1382                              "KANGXIRADICALS");
1383 
1384         /**
1385          * Constant for the "Ideographic Description Characters" Unicode character block.
1386          * @since 1.4
1387          */
1388         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1389             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1390                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1391                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1392 
1393         /**
1394          * Constant for the "Bopomofo Extended" Unicode character block.
1395          * @since 1.4
1396          */
1397         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1398             new UnicodeBlock("BOPOMOFO_EXTENDED",
1399                              "BOPOMOFO EXTENDED",
1400                              "BOPOMOFOEXTENDED");
1401 
1402         /**
1403          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1404          * @since 1.4
1405          */
1406         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1407             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1408                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1409                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1410 
1411         /**
1412          * Constant for the "Yi Syllables" Unicode character block.
1413          * @since 1.4
1414          */
1415         public static final UnicodeBlock YI_SYLLABLES =
1416             new UnicodeBlock("YI_SYLLABLES",
1417                              "YI SYLLABLES",
1418                              "YISYLLABLES");
1419 
1420         /**
1421          * Constant for the "Yi Radicals" Unicode character block.
1422          * @since 1.4
1423          */
1424         public static final UnicodeBlock YI_RADICALS =
1425             new UnicodeBlock("YI_RADICALS",
1426                              "YI RADICALS",
1427                              "YIRADICALS");
1428 
1429         /**
1430          * Constant for the "Cyrillic Supplement" Unicode character block.
1431          * This block was previously known as the "Cyrillic Supplementary" block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1435             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1436                              "CYRILLIC SUPPLEMENTARY",
1437                              "CYRILLICSUPPLEMENTARY",
1438                              "CYRILLIC SUPPLEMENT",
1439                              "CYRILLICSUPPLEMENT");
1440 
1441         /**
1442          * Constant for the "Tagalog" Unicode character block.
1443          * @since 1.5
1444          */
1445         public static final UnicodeBlock TAGALOG =
1446             new UnicodeBlock("TAGALOG");
1447 
1448         /**
1449          * Constant for the "Hanunoo" Unicode character block.
1450          * @since 1.5
1451          */
1452         public static final UnicodeBlock HANUNOO =
1453             new UnicodeBlock("HANUNOO");
1454 
1455         /**
1456          * Constant for the "Buhid" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock BUHID =
1460             new UnicodeBlock("BUHID");
1461 
1462         /**
1463          * Constant for the "Tagbanwa" Unicode character block.
1464          * @since 1.5
1465          */
1466         public static final UnicodeBlock TAGBANWA =
1467             new UnicodeBlock("TAGBANWA");
1468 
1469         /**
1470          * Constant for the "Limbu" Unicode character block.
1471          * @since 1.5
1472          */
1473         public static final UnicodeBlock LIMBU =
1474             new UnicodeBlock("LIMBU");
1475 
1476         /**
1477          * Constant for the "Tai Le" Unicode character block.
1478          * @since 1.5
1479          */
1480         public static final UnicodeBlock TAI_LE =
1481             new UnicodeBlock("TAI_LE",
1482                              "TAI LE",
1483                              "TAILE");
1484 
1485         /**
1486          * Constant for the "Khmer Symbols" Unicode character block.
1487          * @since 1.5
1488          */
1489         public static final UnicodeBlock KHMER_SYMBOLS =
1490             new UnicodeBlock("KHMER_SYMBOLS",
1491                              "KHMER SYMBOLS",
1492                              "KHMERSYMBOLS");
1493 
1494         /**
1495          * Constant for the "Phonetic Extensions" Unicode character block.
1496          * @since 1.5
1497          */
1498         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1499             new UnicodeBlock("PHONETIC_EXTENSIONS",
1500                              "PHONETIC EXTENSIONS",
1501                              "PHONETICEXTENSIONS");
1502 
1503         /**
1504          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1505          * @since 1.5
1506          */
1507         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1508             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1509                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1510                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1511 
1512         /**
1513          * Constant for the "Supplemental Arrows-A" Unicode character block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1517             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1518                              "SUPPLEMENTAL ARROWS-A",
1519                              "SUPPLEMENTALARROWS-A");
1520 
1521         /**
1522          * Constant for the "Supplemental Arrows-B" Unicode character block.
1523          * @since 1.5
1524          */
1525         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1526             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1527                              "SUPPLEMENTAL ARROWS-B",
1528                              "SUPPLEMENTALARROWS-B");
1529 
1530         /**
1531          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1532          * character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1536             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1537                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1538                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1539 
1540         /**
1541          * Constant for the "Supplemental Mathematical Operators" Unicode
1542          * character block.
1543          * @since 1.5
1544          */
1545         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1546             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1547                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1548                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1549 
1550         /**
1551          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1552          * block.
1553          * @since 1.5
1554          */
1555         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1556             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1557                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1558                              "MISCELLANEOUSSYMBOLSANDARROWS");
1559 
1560         /**
1561          * Constant for the "Katakana Phonetic Extensions" Unicode character
1562          * block.
1563          * @since 1.5
1564          */
1565         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1566             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1567                              "KATAKANA PHONETIC EXTENSIONS",
1568                              "KATAKANAPHONETICEXTENSIONS");
1569 
1570         /**
1571          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1572          * @since 1.5
1573          */
1574         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1575             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1576                              "YIJING HEXAGRAM SYMBOLS",
1577                              "YIJINGHEXAGRAMSYMBOLS");
1578 
1579         /**
1580          * Constant for the "Variation Selectors" Unicode character block.
1581          * @since 1.5
1582          */
1583         public static final UnicodeBlock VARIATION_SELECTORS =
1584             new UnicodeBlock("VARIATION_SELECTORS",
1585                              "VARIATION SELECTORS",
1586                              "VARIATIONSELECTORS");
1587 
1588         /**
1589          * Constant for the "Linear B Syllabary" Unicode character block.
1590          * @since 1.5
1591          */
1592         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1593             new UnicodeBlock("LINEAR_B_SYLLABARY",
1594                              "LINEAR B SYLLABARY",
1595                              "LINEARBSYLLABARY");
1596 
1597         /**
1598          * Constant for the "Linear B Ideograms" Unicode character block.
1599          * @since 1.5
1600          */
1601         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1602             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1603                              "LINEAR B IDEOGRAMS",
1604                              "LINEARBIDEOGRAMS");
1605 
1606         /**
1607          * Constant for the "Aegean Numbers" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock AEGEAN_NUMBERS =
1611             new UnicodeBlock("AEGEAN_NUMBERS",
1612                              "AEGEAN NUMBERS",
1613                              "AEGEANNUMBERS");
1614 
1615         /**
1616          * Constant for the "Old Italic" Unicode character block.
1617          * @since 1.5
1618          */
1619         public static final UnicodeBlock OLD_ITALIC =
1620             new UnicodeBlock("OLD_ITALIC",
1621                              "OLD ITALIC",
1622                              "OLDITALIC");
1623 
1624         /**
1625          * Constant for the "Gothic" Unicode character block.
1626          * @since 1.5
1627          */
1628         public static final UnicodeBlock GOTHIC =
1629             new UnicodeBlock("GOTHIC");
1630 
1631         /**
1632          * Constant for the "Ugaritic" Unicode character block.
1633          * @since 1.5
1634          */
1635         public static final UnicodeBlock UGARITIC =
1636             new UnicodeBlock("UGARITIC");
1637 
1638         /**
1639          * Constant for the "Deseret" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock DESERET =
1643             new UnicodeBlock("DESERET");
1644 
1645         /**
1646          * Constant for the "Shavian" Unicode character block.
1647          * @since 1.5
1648          */
1649         public static final UnicodeBlock SHAVIAN =
1650             new UnicodeBlock("SHAVIAN");
1651 
1652         /**
1653          * Constant for the "Osmanya" Unicode character block.
1654          * @since 1.5
1655          */
1656         public static final UnicodeBlock OSMANYA =
1657             new UnicodeBlock("OSMANYA");
1658 
1659         /**
1660          * Constant for the "Cypriot Syllabary" Unicode character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1664             new UnicodeBlock("CYPRIOT_SYLLABARY",
1665                              "CYPRIOT SYLLABARY",
1666                              "CYPRIOTSYLLABARY");
1667 
1668         /**
1669          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1670          * @since 1.5
1671          */
1672         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1673             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1674                              "BYZANTINE MUSICAL SYMBOLS",
1675                              "BYZANTINEMUSICALSYMBOLS");
1676 
1677         /**
1678          * Constant for the "Musical Symbols" Unicode character block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock MUSICAL_SYMBOLS =
1682             new UnicodeBlock("MUSICAL_SYMBOLS",
1683                              "MUSICAL SYMBOLS",
1684                              "MUSICALSYMBOLS");
1685 
1686         /**
1687          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1688          * @since 1.5
1689          */
1690         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1691             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1692                              "TAI XUAN JING SYMBOLS",
1693                              "TAIXUANJINGSYMBOLS");
1694 
1695         /**
1696          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1697          * character block.
1698          * @since 1.5
1699          */
1700         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1701             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1702                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1703                              "MATHEMATICALALPHANUMERICSYMBOLS");
1704 
1705         /**
1706          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1707          * character block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1711             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1712                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1713                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1714 
1715         /**
1716          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1720             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1721                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1722                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1723 
1724         /**
1725          * Constant for the "Tags" Unicode character block.
1726          * @since 1.5
1727          */
1728         public static final UnicodeBlock TAGS =
1729             new UnicodeBlock("TAGS");
1730 
1731         /**
1732          * Constant for the "Variation Selectors Supplement" Unicode character
1733          * block.
1734          * @since 1.5
1735          */
1736         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1737             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1738                              "VARIATION SELECTORS SUPPLEMENT",
1739                              "VARIATIONSELECTORSSUPPLEMENT");
1740 
1741         /**
1742          * Constant for the "Supplementary Private Use Area-A" Unicode character
1743          * block.
1744          * @since 1.5
1745          */
1746         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1747             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1748                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1749                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1750 
1751         /**
1752          * Constant for the "Supplementary Private Use Area-B" Unicode character
1753          * block.
1754          * @since 1.5
1755          */
1756         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1757             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1758                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1759                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1760 
1761         /**
1762          * Constant for the "High Surrogates" Unicode character block.
1763          * This block represents codepoint values in the high surrogate
1764          * range: U+D800 through U+DB7F
1765          *
1766          * @since 1.5
1767          */
1768         public static final UnicodeBlock HIGH_SURROGATES =
1769             new UnicodeBlock("HIGH_SURROGATES",
1770                              "HIGH SURROGATES",
1771                              "HIGHSURROGATES");
1772 
1773         /**
1774          * Constant for the "High Private Use Surrogates" Unicode character
1775          * block.
1776          * This block represents codepoint values in the private use high
1777          * surrogate range: U+DB80 through U+DBFF
1778          *
1779          * @since 1.5
1780          */
1781         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1782             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1783                              "HIGH PRIVATE USE SURROGATES",
1784                              "HIGHPRIVATEUSESURROGATES");
1785 
1786         /**
1787          * Constant for the "Low Surrogates" Unicode character block.
1788          * This block represents codepoint values in the low surrogate
1789          * range: U+DC00 through U+DFFF
1790          *
1791          * @since 1.5
1792          */
1793         public static final UnicodeBlock LOW_SURROGATES =
1794             new UnicodeBlock("LOW_SURROGATES",
1795                              "LOW SURROGATES",
1796                              "LOWSURROGATES");
1797 
1798         /**
1799          * Constant for the "Arabic Supplement" Unicode character block.
1800          * @since 1.7
1801          */
1802         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1803             new UnicodeBlock("ARABIC_SUPPLEMENT",
1804                              "ARABIC SUPPLEMENT",
1805                              "ARABICSUPPLEMENT");
1806 
1807         /**
1808          * Constant for the "NKo" Unicode character block.
1809          * @since 1.7
1810          */
1811         public static final UnicodeBlock NKO =
1812             new UnicodeBlock("NKO");
1813 
1814         /**
1815          * Constant for the "Samaritan" Unicode character block.
1816          * @since 1.7
1817          */
1818         public static final UnicodeBlock SAMARITAN =
1819             new UnicodeBlock("SAMARITAN");
1820 
1821         /**
1822          * Constant for the "Mandaic" Unicode character block.
1823          * @since 1.7
1824          */
1825         public static final UnicodeBlock MANDAIC =
1826             new UnicodeBlock("MANDAIC");
1827 
1828         /**
1829          * Constant for the "Ethiopic Supplement" Unicode character block.
1830          * @since 1.7
1831          */
1832         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1833             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1834                              "ETHIOPIC SUPPLEMENT",
1835                              "ETHIOPICSUPPLEMENT");
1836 
1837         /**
1838          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1839          * Unicode character block.
1840          * @since 1.7
1841          */
1842         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1843             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1844                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1845                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1846 
1847         /**
1848          * Constant for the "New Tai Lue" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock NEW_TAI_LUE =
1852             new UnicodeBlock("NEW_TAI_LUE",
1853                              "NEW TAI LUE",
1854                              "NEWTAILUE");
1855 
1856         /**
1857          * Constant for the "Buginese" Unicode character block.
1858          * @since 1.7
1859          */
1860         public static final UnicodeBlock BUGINESE =
1861             new UnicodeBlock("BUGINESE");
1862 
1863         /**
1864          * Constant for the "Tai Tham" Unicode character block.
1865          * @since 1.7
1866          */
1867         public static final UnicodeBlock TAI_THAM =
1868             new UnicodeBlock("TAI_THAM",
1869                              "TAI THAM",
1870                              "TAITHAM");
1871 
1872         /**
1873          * Constant for the "Balinese" Unicode character block.
1874          * @since 1.7
1875          */
1876         public static final UnicodeBlock BALINESE =
1877             new UnicodeBlock("BALINESE");
1878 
1879         /**
1880          * Constant for the "Sundanese" Unicode character block.
1881          * @since 1.7
1882          */
1883         public static final UnicodeBlock SUNDANESE =
1884             new UnicodeBlock("SUNDANESE");
1885 
1886         /**
1887          * Constant for the "Batak" Unicode character block.
1888          * @since 1.7
1889          */
1890         public static final UnicodeBlock BATAK =
1891             new UnicodeBlock("BATAK");
1892 
1893         /**
1894          * Constant for the "Lepcha" Unicode character block.
1895          * @since 1.7
1896          */
1897         public static final UnicodeBlock LEPCHA =
1898             new UnicodeBlock("LEPCHA");
1899 
1900         /**
1901          * Constant for the "Ol Chiki" Unicode character block.
1902          * @since 1.7
1903          */
1904         public static final UnicodeBlock OL_CHIKI =
1905             new UnicodeBlock("OL_CHIKI",
1906                              "OL CHIKI",
1907                              "OLCHIKI");
1908 
1909         /**
1910          * Constant for the "Vedic Extensions" Unicode character block.
1911          * @since 1.7
1912          */
1913         public static final UnicodeBlock VEDIC_EXTENSIONS =
1914             new UnicodeBlock("VEDIC_EXTENSIONS",
1915                              "VEDIC EXTENSIONS",
1916                              "VEDICEXTENSIONS");
1917 
1918         /**
1919          * Constant for the "Phonetic Extensions Supplement" Unicode character
1920          * block.
1921          * @since 1.7
1922          */
1923         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1924             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1925                              "PHONETIC EXTENSIONS SUPPLEMENT",
1926                              "PHONETICEXTENSIONSSUPPLEMENT");
1927 
1928         /**
1929          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1930          * character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1934             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1935                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1936                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1937 
1938         /**
1939          * Constant for the "Glagolitic" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock GLAGOLITIC =
1943             new UnicodeBlock("GLAGOLITIC");
1944 
1945         /**
1946          * Constant for the "Latin Extended-C" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock LATIN_EXTENDED_C =
1950             new UnicodeBlock("LATIN_EXTENDED_C",
1951                              "LATIN EXTENDED-C",
1952                              "LATINEXTENDED-C");
1953 
1954         /**
1955          * Constant for the "Coptic" Unicode character block.
1956          * @since 1.7
1957          */
1958         public static final UnicodeBlock COPTIC =
1959             new UnicodeBlock("COPTIC");
1960 
1961         /**
1962          * Constant for the "Georgian Supplement" Unicode character block.
1963          * @since 1.7
1964          */
1965         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1966             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1967                              "GEORGIAN SUPPLEMENT",
1968                              "GEORGIANSUPPLEMENT");
1969 
1970         /**
1971          * Constant for the "Tifinagh" Unicode character block.
1972          * @since 1.7
1973          */
1974         public static final UnicodeBlock TIFINAGH =
1975             new UnicodeBlock("TIFINAGH");
1976 
1977         /**
1978          * Constant for the "Ethiopic Extended" Unicode character block.
1979          * @since 1.7
1980          */
1981         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1982             new UnicodeBlock("ETHIOPIC_EXTENDED",
1983                              "ETHIOPIC EXTENDED",
1984                              "ETHIOPICEXTENDED");
1985 
1986         /**
1987          * Constant for the "Cyrillic Extended-A" Unicode character block.
1988          * @since 1.7
1989          */
1990         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1991             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1992                              "CYRILLIC EXTENDED-A",
1993                              "CYRILLICEXTENDED-A");
1994 
1995         /**
1996          * Constant for the "Supplemental Punctuation" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2000             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2001                              "SUPPLEMENTAL PUNCTUATION",
2002                              "SUPPLEMENTALPUNCTUATION");
2003 
2004         /**
2005          * Constant for the "CJK Strokes" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock CJK_STROKES =
2009             new UnicodeBlock("CJK_STROKES",
2010                              "CJK STROKES",
2011                              "CJKSTROKES");
2012 
2013         /**
2014          * Constant for the "Lisu" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LISU =
2018             new UnicodeBlock("LISU");
2019 
2020         /**
2021          * Constant for the "Vai" Unicode character block.
2022          * @since 1.7
2023          */
2024         public static final UnicodeBlock VAI =
2025             new UnicodeBlock("VAI");
2026 
2027         /**
2028          * Constant for the "Cyrillic Extended-B" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2032             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2033                              "CYRILLIC EXTENDED-B",
2034                              "CYRILLICEXTENDED-B");
2035 
2036         /**
2037          * Constant for the "Bamum" Unicode character block.
2038          * @since 1.7
2039          */
2040         public static final UnicodeBlock BAMUM =
2041             new UnicodeBlock("BAMUM");
2042 
2043         /**
2044          * Constant for the "Modifier Tone Letters" Unicode character block.
2045          * @since 1.7
2046          */
2047         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2048             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2049                              "MODIFIER TONE LETTERS",
2050                              "MODIFIERTONELETTERS");
2051 
2052         /**
2053          * Constant for the "Latin Extended-D" Unicode character block.
2054          * @since 1.7
2055          */
2056         public static final UnicodeBlock LATIN_EXTENDED_D =
2057             new UnicodeBlock("LATIN_EXTENDED_D",
2058                              "LATIN EXTENDED-D",
2059                              "LATINEXTENDED-D");
2060 
2061         /**
2062          * Constant for the "Syloti Nagri" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock SYLOTI_NAGRI =
2066             new UnicodeBlock("SYLOTI_NAGRI",
2067                              "SYLOTI NAGRI",
2068                              "SYLOTINAGRI");
2069 
2070         /**
2071          * Constant for the "Common Indic Number Forms" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2075             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2076                              "COMMON INDIC NUMBER FORMS",
2077                              "COMMONINDICNUMBERFORMS");
2078 
2079         /**
2080          * Constant for the "Phags-pa" Unicode character block.
2081          * @since 1.7
2082          */
2083         public static final UnicodeBlock PHAGS_PA =
2084             new UnicodeBlock("PHAGS_PA",
2085                              "PHAGS-PA");
2086 
2087         /**
2088          * Constant for the "Saurashtra" Unicode character block.
2089          * @since 1.7
2090          */
2091         public static final UnicodeBlock SAURASHTRA =
2092             new UnicodeBlock("SAURASHTRA");
2093 
2094         /**
2095          * Constant for the "Devanagari Extended" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2099             new UnicodeBlock("DEVANAGARI_EXTENDED",
2100                              "DEVANAGARI EXTENDED",
2101                              "DEVANAGARIEXTENDED");
2102 
2103         /**
2104          * Constant for the "Kayah Li" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock KAYAH_LI =
2108             new UnicodeBlock("KAYAH_LI",
2109                              "KAYAH LI",
2110                              "KAYAHLI");
2111 
2112         /**
2113          * Constant for the "Rejang" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock REJANG =
2117             new UnicodeBlock("REJANG");
2118 
2119         /**
2120          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2121          * @since 1.7
2122          */
2123         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2124             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2125                              "HANGUL JAMO EXTENDED-A",
2126                              "HANGULJAMOEXTENDED-A");
2127 
2128         /**
2129          * Constant for the "Javanese" Unicode character block.
2130          * @since 1.7
2131          */
2132         public static final UnicodeBlock JAVANESE =
2133             new UnicodeBlock("JAVANESE");
2134 
2135         /**
2136          * Constant for the "Cham" Unicode character block.
2137          * @since 1.7
2138          */
2139         public static final UnicodeBlock CHAM =
2140             new UnicodeBlock("CHAM");
2141 
2142         /**
2143          * Constant for the "Myanmar Extended-A" Unicode character block.
2144          * @since 1.7
2145          */
2146         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2147             new UnicodeBlock("MYANMAR_EXTENDED_A",
2148                              "MYANMAR EXTENDED-A",
2149                              "MYANMAREXTENDED-A");
2150 
2151         /**
2152          * Constant for the "Tai Viet" Unicode character block.
2153          * @since 1.7
2154          */
2155         public static final UnicodeBlock TAI_VIET =
2156             new UnicodeBlock("TAI_VIET",
2157                              "TAI VIET",
2158                              "TAIVIET");
2159 
2160         /**
2161          * Constant for the "Ethiopic Extended-A" Unicode character block.
2162          * @since 1.7
2163          */
2164         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2165             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2166                              "ETHIOPIC EXTENDED-A",
2167                              "ETHIOPICEXTENDED-A");
2168 
2169         /**
2170          * Constant for the "Meetei Mayek" Unicode character block.
2171          * @since 1.7
2172          */
2173         public static final UnicodeBlock MEETEI_MAYEK =
2174             new UnicodeBlock("MEETEI_MAYEK",
2175                              "MEETEI MAYEK",
2176                              "MEETEIMAYEK");
2177 
2178         /**
2179          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2180          * @since 1.7
2181          */
2182         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2183             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2184                              "HANGUL JAMO EXTENDED-B",
2185                              "HANGULJAMOEXTENDED-B");
2186 
2187         /**
2188          * Constant for the "Vertical Forms" Unicode character block.
2189          * @since 1.7
2190          */
2191         public static final UnicodeBlock VERTICAL_FORMS =
2192             new UnicodeBlock("VERTICAL_FORMS",
2193                              "VERTICAL FORMS",
2194                              "VERTICALFORMS");
2195 
2196         /**
2197          * Constant for the "Ancient Greek Numbers" Unicode character block.
2198          * @since 1.7
2199          */
2200         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2201             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2202                              "ANCIENT GREEK NUMBERS",
2203                              "ANCIENTGREEKNUMBERS");
2204 
2205         /**
2206          * Constant for the "Ancient Symbols" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock ANCIENT_SYMBOLS =
2210             new UnicodeBlock("ANCIENT_SYMBOLS",
2211                              "ANCIENT SYMBOLS",
2212                              "ANCIENTSYMBOLS");
2213 
2214         /**
2215          * Constant for the "Phaistos Disc" Unicode character block.
2216          * @since 1.7
2217          */
2218         public static final UnicodeBlock PHAISTOS_DISC =
2219             new UnicodeBlock("PHAISTOS_DISC",
2220                              "PHAISTOS DISC",
2221                              "PHAISTOSDISC");
2222 
2223         /**
2224          * Constant for the "Lycian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYCIAN =
2228             new UnicodeBlock("LYCIAN");
2229 
2230         /**
2231          * Constant for the "Carian" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock CARIAN =
2235             new UnicodeBlock("CARIAN");
2236 
2237         /**
2238          * Constant for the "Old Persian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_PERSIAN =
2242             new UnicodeBlock("OLD_PERSIAN",
2243                              "OLD PERSIAN",
2244                              "OLDPERSIAN");
2245 
2246         /**
2247          * Constant for the "Imperial Aramaic" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2251             new UnicodeBlock("IMPERIAL_ARAMAIC",
2252                              "IMPERIAL ARAMAIC",
2253                              "IMPERIALARAMAIC");
2254 
2255         /**
2256          * Constant for the "Phoenician" Unicode character block.
2257          * @since 1.7
2258          */
2259         public static final UnicodeBlock PHOENICIAN =
2260             new UnicodeBlock("PHOENICIAN");
2261 
2262         /**
2263          * Constant for the "Lydian" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock LYDIAN =
2267             new UnicodeBlock("LYDIAN");
2268 
2269         /**
2270          * Constant for the "Kharoshthi" Unicode character block.
2271          * @since 1.7
2272          */
2273         public static final UnicodeBlock KHAROSHTHI =
2274             new UnicodeBlock("KHAROSHTHI");
2275 
2276         /**
2277          * Constant for the "Old South Arabian" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2281             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2282                              "OLD SOUTH ARABIAN",
2283                              "OLDSOUTHARABIAN");
2284 
2285         /**
2286          * Constant for the "Avestan" Unicode character block.
2287          * @since 1.7
2288          */
2289         public static final UnicodeBlock AVESTAN =
2290             new UnicodeBlock("AVESTAN");
2291 
2292         /**
2293          * Constant for the "Inscriptional Parthian" Unicode character block.
2294          * @since 1.7
2295          */
2296         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2297             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2298                              "INSCRIPTIONAL PARTHIAN",
2299                              "INSCRIPTIONALPARTHIAN");
2300 
2301         /**
2302          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2303          * @since 1.7
2304          */
2305         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2306             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2307                              "INSCRIPTIONAL PAHLAVI",
2308                              "INSCRIPTIONALPAHLAVI");
2309 
2310         /**
2311          * Constant for the "Old Turkic" Unicode character block.
2312          * @since 1.7
2313          */
2314         public static final UnicodeBlock OLD_TURKIC =
2315             new UnicodeBlock("OLD_TURKIC",
2316                              "OLD TURKIC",
2317                              "OLDTURKIC");
2318 
2319         /**
2320          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2321          * @since 1.7
2322          */
2323         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2324             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2325                              "RUMI NUMERAL SYMBOLS",
2326                              "RUMINUMERALSYMBOLS");
2327 
2328         /**
2329          * Constant for the "Brahmi" Unicode character block.
2330          * @since 1.7
2331          */
2332         public static final UnicodeBlock BRAHMI =
2333             new UnicodeBlock("BRAHMI");
2334 
2335         /**
2336          * Constant for the "Kaithi" Unicode character block.
2337          * @since 1.7
2338          */
2339         public static final UnicodeBlock KAITHI =
2340             new UnicodeBlock("KAITHI");
2341 
2342         /**
2343          * Constant for the "Cuneiform" Unicode character block.
2344          * @since 1.7
2345          */
2346         public static final UnicodeBlock CUNEIFORM =
2347             new UnicodeBlock("CUNEIFORM");
2348 
2349         /**
2350          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2351          * character block.
2352          * @since 1.7
2353          */
2354         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2355             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2356                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2357                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2358 
2359         /**
2360          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2361          * @since 1.7
2362          */
2363         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2364             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2365                              "EGYPTIAN HIEROGLYPHS",
2366                              "EGYPTIANHIEROGLYPHS");
2367 
2368         /**
2369          * Constant for the "Bamum Supplement" Unicode character block.
2370          * @since 1.7
2371          */
2372         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2373             new UnicodeBlock("BAMUM_SUPPLEMENT",
2374                              "BAMUM SUPPLEMENT",
2375                              "BAMUMSUPPLEMENT");
2376 
2377         /**
2378          * Constant for the "Kana Supplement" Unicode character block.
2379          * @since 1.7
2380          */
2381         public static final UnicodeBlock KANA_SUPPLEMENT =
2382             new UnicodeBlock("KANA_SUPPLEMENT",
2383                              "KANA SUPPLEMENT",
2384                              "KANASUPPLEMENT");
2385 
2386         /**
2387          * Constant for the "Ancient Greek Musical Notation" Unicode character
2388          * block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2392             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2393                              "ANCIENT GREEK MUSICAL NOTATION",
2394                              "ANCIENTGREEKMUSICALNOTATION");
2395 
2396         /**
2397          * Constant for the "Counting Rod Numerals" Unicode character block.
2398          * @since 1.7
2399          */
2400         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2401             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2402                              "COUNTING ROD NUMERALS",
2403                              "COUNTINGRODNUMERALS");
2404 
2405         /**
2406          * Constant for the "Mahjong Tiles" Unicode character block.
2407          * @since 1.7
2408          */
2409         public static final UnicodeBlock MAHJONG_TILES =
2410             new UnicodeBlock("MAHJONG_TILES",
2411                              "MAHJONG TILES",
2412                              "MAHJONGTILES");
2413 
2414         /**
2415          * Constant for the "Domino Tiles" Unicode character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock DOMINO_TILES =
2419             new UnicodeBlock("DOMINO_TILES",
2420                              "DOMINO TILES",
2421                              "DOMINOTILES");
2422 
2423         /**
2424          * Constant for the "Playing Cards" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock PLAYING_CARDS =
2428             new UnicodeBlock("PLAYING_CARDS",
2429                              "PLAYING CARDS",
2430                              "PLAYINGCARDS");
2431 
2432         /**
2433          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2434          * block.
2435          * @since 1.7
2436          */
2437         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2438             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2439                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2440                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2441 
2442         /**
2443          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2444          * block.
2445          * @since 1.7
2446          */
2447         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2448             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2449                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2450                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2451 
2452         /**
2453          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2454          * character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2458             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2459                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2460                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2461 
2462         /**
2463          * Constant for the "Emoticons" Unicode character block.
2464          * @since 1.7
2465          */
2466         public static final UnicodeBlock EMOTICONS =
2467             new UnicodeBlock("EMOTICONS");
2468 
2469         /**
2470          * Constant for the "Transport And Map Symbols" Unicode character block.
2471          * @since 1.7
2472          */
2473         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2474             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2475                              "TRANSPORT AND MAP SYMBOLS",
2476                              "TRANSPORTANDMAPSYMBOLS");
2477 
2478         /**
2479          * Constant for the "Alchemical Symbols" Unicode character block.
2480          * @since 1.7
2481          */
2482         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2483             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2484                              "ALCHEMICAL SYMBOLS",
2485                              "ALCHEMICALSYMBOLS");
2486 
2487         /**
2488          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2489          * character block.
2490          * @since 1.7
2491          */
2492         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2493             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2494                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2495                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2496 
2497         /**
2498          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2499          * character block.
2500          * @since 1.7
2501          */
2502         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2503             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2504                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2505                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2506 
2507         /**
2508          * Constant for the "Arabic Extended-A" Unicode character block.
2509          * @since 1.8
2510          */
2511         public static final UnicodeBlock ARABIC_EXTENDED_A =
2512             new UnicodeBlock("ARABIC_EXTENDED_A",
2513                              "ARABIC EXTENDED-A",
2514                              "ARABICEXTENDED-A");
2515 
2516         /**
2517          * Constant for the "Sundanese Supplement" Unicode character block.
2518          * @since 1.8
2519          */
2520         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2521             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2522                              "SUNDANESE SUPPLEMENT",
2523                              "SUNDANESESUPPLEMENT");
2524 
2525         /**
2526          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2527          * @since 1.8
2528          */
2529         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2530             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2531                              "MEETEI MAYEK EXTENSIONS",
2532                              "MEETEIMAYEKEXTENSIONS");
2533 
2534         /**
2535          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2536          * @since 1.8
2537          */
2538         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2539             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2540                              "MEROITIC HIEROGLYPHS",
2541                              "MEROITICHIEROGLYPHS");
2542 
2543         /**
2544          * Constant for the "Meroitic Cursive" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock MEROITIC_CURSIVE =
2548             new UnicodeBlock("MEROITIC_CURSIVE",
2549                              "MEROITIC CURSIVE",
2550                              "MEROITICCURSIVE");
2551 
2552         /**
2553          * Constant for the "Sora Sompeng" Unicode character block.
2554          * @since 1.8
2555          */
2556         public static final UnicodeBlock SORA_SOMPENG =
2557             new UnicodeBlock("SORA_SOMPENG",
2558                              "SORA SOMPENG",
2559                              "SORASOMPENG");
2560 
2561         /**
2562          * Constant for the "Chakma" Unicode character block.
2563          * @since 1.8
2564          */
2565         public static final UnicodeBlock CHAKMA =
2566             new UnicodeBlock("CHAKMA");
2567 
2568         /**
2569          * Constant for the "Sharada" Unicode character block.
2570          * @since 1.8
2571          */
2572         public static final UnicodeBlock SHARADA =
2573             new UnicodeBlock("SHARADA");
2574 
2575         /**
2576          * Constant for the "Takri" Unicode character block.
2577          * @since 1.8
2578          */
2579         public static final UnicodeBlock TAKRI =
2580             new UnicodeBlock("TAKRI");
2581 
2582         /**
2583          * Constant for the "Miao" Unicode character block.
2584          * @since 1.8
2585          */
2586         public static final UnicodeBlock MIAO =
2587             new UnicodeBlock("MIAO");
2588 
2589         /**
2590          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2591          * character block.
2592          * @since 1.8
2593          */
2594         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2595             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2596                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2597                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2598 
2599         /**
2600          * Constant for the "Combining Diacritical Marks Extended" Unicode
2601          * character block.
2602          * @since 9
2603          */
2604         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2605             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2606                              "COMBINING DIACRITICAL MARKS EXTENDED",
2607                              "COMBININGDIACRITICALMARKSEXTENDED");
2608 
2609         /**
2610          * Constant for the "Myanmar Extended-B" Unicode character block.
2611          * @since 9
2612          */
2613         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2614             new UnicodeBlock("MYANMAR_EXTENDED_B",
2615                              "MYANMAR EXTENDED-B",
2616                              "MYANMAREXTENDED-B");
2617 
2618         /**
2619          * Constant for the "Latin Extended-E" Unicode character block.
2620          * @since 9
2621          */
2622         public static final UnicodeBlock LATIN_EXTENDED_E =
2623             new UnicodeBlock("LATIN_EXTENDED_E",
2624                              "LATIN EXTENDED-E",
2625                              "LATINEXTENDED-E");
2626 
2627         /**
2628          * Constant for the "Coptic Epact Numbers" Unicode character block.
2629          * @since 9
2630          */
2631         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2632             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2633                              "COPTIC EPACT NUMBERS",
2634                              "COPTICEPACTNUMBERS");
2635 
2636         /**
2637          * Constant for the "Old Permic" Unicode character block.
2638          * @since 9
2639          */
2640         public static final UnicodeBlock OLD_PERMIC =
2641             new UnicodeBlock("OLD_PERMIC",
2642                              "OLD PERMIC",
2643                              "OLDPERMIC");
2644 
2645         /**
2646          * Constant for the "Elbasan" Unicode character block.
2647          * @since 9
2648          */
2649         public static final UnicodeBlock ELBASAN =
2650             new UnicodeBlock("ELBASAN");
2651 
2652         /**
2653          * Constant for the "Caucasian Albanian" Unicode character block.
2654          * @since 9
2655          */
2656         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2657             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2658                              "CAUCASIAN ALBANIAN",
2659                              "CAUCASIANALBANIAN");
2660 
2661         /**
2662          * Constant for the "Linear A" Unicode character block.
2663          * @since 9
2664          */
2665         public static final UnicodeBlock LINEAR_A =
2666             new UnicodeBlock("LINEAR_A",
2667                              "LINEAR A",
2668                              "LINEARA");
2669 
2670         /**
2671          * Constant for the "Palmyrene" Unicode character block.
2672          * @since 9
2673          */
2674         public static final UnicodeBlock PALMYRENE =
2675             new UnicodeBlock("PALMYRENE");
2676 
2677         /**
2678          * Constant for the "Nabataean" Unicode character block.
2679          * @since 9
2680          */
2681         public static final UnicodeBlock NABATAEAN =
2682             new UnicodeBlock("NABATAEAN");
2683 
2684         /**
2685          * Constant for the "Old North Arabian" Unicode character block.
2686          * @since 9
2687          */
2688         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2689             new UnicodeBlock("OLD_NORTH_ARABIAN",
2690                              "OLD NORTH ARABIAN",
2691                              "OLDNORTHARABIAN");
2692 
2693         /**
2694          * Constant for the "Manichaean" Unicode character block.
2695          * @since 9
2696          */
2697         public static final UnicodeBlock MANICHAEAN =
2698             new UnicodeBlock("MANICHAEAN");
2699 
2700         /**
2701          * Constant for the "Psalter Pahlavi" Unicode character block.
2702          * @since 9
2703          */
2704         public static final UnicodeBlock PSALTER_PAHLAVI =
2705             new UnicodeBlock("PSALTER_PAHLAVI",
2706                              "PSALTER PAHLAVI",
2707                              "PSALTERPAHLAVI");
2708 
2709         /**
2710          * Constant for the "Mahajani" Unicode character block.
2711          * @since 9
2712          */
2713         public static final UnicodeBlock MAHAJANI =
2714             new UnicodeBlock("MAHAJANI");
2715 
2716         /**
2717          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2718          * @since 9
2719          */
2720         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2721             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2722                              "SINHALA ARCHAIC NUMBERS",
2723                              "SINHALAARCHAICNUMBERS");
2724 
2725         /**
2726          * Constant for the "Khojki" Unicode character block.
2727          * @since 9
2728          */
2729         public static final UnicodeBlock KHOJKI =
2730             new UnicodeBlock("KHOJKI");
2731 
2732         /**
2733          * Constant for the "Khudawadi" Unicode character block.
2734          * @since 9
2735          */
2736         public static final UnicodeBlock KHUDAWADI =
2737             new UnicodeBlock("KHUDAWADI");
2738 
2739         /**
2740          * Constant for the "Grantha" Unicode character block.
2741          * @since 9
2742          */
2743         public static final UnicodeBlock GRANTHA =
2744             new UnicodeBlock("GRANTHA");
2745 
2746         /**
2747          * Constant for the "Tirhuta" Unicode character block.
2748          * @since 9
2749          */
2750         public static final UnicodeBlock TIRHUTA =
2751             new UnicodeBlock("TIRHUTA");
2752 
2753         /**
2754          * Constant for the "Siddham" Unicode character block.
2755          * @since 9
2756          */
2757         public static final UnicodeBlock SIDDHAM =
2758             new UnicodeBlock("SIDDHAM");
2759 
2760         /**
2761          * Constant for the "Modi" Unicode character block.
2762          * @since 9
2763          */
2764         public static final UnicodeBlock MODI =
2765             new UnicodeBlock("MODI");
2766 
2767         /**
2768          * Constant for the "Warang Citi" Unicode character block.
2769          * @since 9
2770          */
2771         public static final UnicodeBlock WARANG_CITI =
2772             new UnicodeBlock("WARANG_CITI",
2773                              "WARANG CITI",
2774                              "WARANGCITI");
2775 
2776         /**
2777          * Constant for the "Pau Cin Hau" Unicode character block.
2778          * @since 9
2779          */
2780         public static final UnicodeBlock PAU_CIN_HAU =
2781             new UnicodeBlock("PAU_CIN_HAU",
2782                              "PAU CIN HAU",
2783                              "PAUCINHAU");
2784 
2785         /**
2786          * Constant for the "Mro" Unicode character block.
2787          * @since 9
2788          */
2789         public static final UnicodeBlock MRO =
2790             new UnicodeBlock("MRO");
2791 
2792         /**
2793          * Constant for the "Bassa Vah" Unicode character block.
2794          * @since 9
2795          */
2796         public static final UnicodeBlock BASSA_VAH =
2797             new UnicodeBlock("BASSA_VAH",
2798                              "BASSA VAH",
2799                              "BASSAVAH");
2800 
2801         /**
2802          * Constant for the "Pahawh Hmong" Unicode character block.
2803          * @since 9
2804          */
2805         public static final UnicodeBlock PAHAWH_HMONG =
2806             new UnicodeBlock("PAHAWH_HMONG",
2807                              "PAHAWH HMONG",
2808                              "PAHAWHHMONG");
2809 
2810         /**
2811          * Constant for the "Duployan" Unicode character block.
2812          * @since 9
2813          */
2814         public static final UnicodeBlock DUPLOYAN =
2815             new UnicodeBlock("DUPLOYAN");
2816 
2817         /**
2818          * Constant for the "Shorthand Format Controls" Unicode character block.
2819          * @since 9
2820          */
2821         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2822             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2823                              "SHORTHAND FORMAT CONTROLS",
2824                              "SHORTHANDFORMATCONTROLS");
2825 
2826         /**
2827          * Constant for the "Mende Kikakui" Unicode character block.
2828          * @since 9
2829          */
2830         public static final UnicodeBlock MENDE_KIKAKUI =
2831             new UnicodeBlock("MENDE_KIKAKUI",
2832                              "MENDE KIKAKUI",
2833                              "MENDEKIKAKUI");
2834 
2835         /**
2836          * Constant for the "Ornamental Dingbats" Unicode character block.
2837          * @since 9
2838          */
2839         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2840             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2841                              "ORNAMENTAL DINGBATS",
2842                              "ORNAMENTALDINGBATS");
2843 
2844         /**
2845          * Constant for the "Geometric Shapes Extended" Unicode character block.
2846          * @since 9
2847          */
2848         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2849             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2850                              "GEOMETRIC SHAPES EXTENDED",
2851                              "GEOMETRICSHAPESEXTENDED");
2852 
2853         /**
2854          * Constant for the "Supplemental Arrows-C" Unicode character block.
2855          * @since 9
2856          */
2857         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2858             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2859                              "SUPPLEMENTAL ARROWS-C",
2860                              "SUPPLEMENTALARROWS-C");
2861 
2862         /**
2863          * Constant for the "Cherokee Supplement" Unicode character block.
2864          * @since 9
2865          */
2866         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2867             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2868                              "CHEROKEE SUPPLEMENT",
2869                              "CHEROKEESUPPLEMENT");
2870 
2871         /**
2872          * Constant for the "Hatran" Unicode character block.
2873          * @since 9
2874          */
2875         public static final UnicodeBlock HATRAN =
2876             new UnicodeBlock("HATRAN");
2877 
2878         /**
2879          * Constant for the "Old Hungarian" Unicode character block.
2880          * @since 9
2881          */
2882         public static final UnicodeBlock OLD_HUNGARIAN =
2883             new UnicodeBlock("OLD_HUNGARIAN",
2884                              "OLD HUNGARIAN",
2885                              "OLDHUNGARIAN");
2886 
2887         /**
2888          * Constant for the "Multani" Unicode character block.
2889          * @since 9
2890          */
2891         public static final UnicodeBlock MULTANI =
2892             new UnicodeBlock("MULTANI");
2893 
2894         /**
2895          * Constant for the "Ahom" Unicode character block.
2896          * @since 9
2897          */
2898         public static final UnicodeBlock AHOM =
2899             new UnicodeBlock("AHOM");
2900 
2901         /**
2902          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2903          * @since 9
2904          */
2905         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2906             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2907                              "EARLY DYNASTIC CUNEIFORM",
2908                              "EARLYDYNASTICCUNEIFORM");
2909 
2910         /**
2911          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2912          * @since 9
2913          */
2914         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2915             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2916                              "ANATOLIAN HIEROGLYPHS",
2917                              "ANATOLIANHIEROGLYPHS");
2918 
2919         /**
2920          * Constant for the "Sutton SignWriting" Unicode character block.
2921          * @since 9
2922          */
2923         public static final UnicodeBlock SUTTON_SIGNWRITING =
2924             new UnicodeBlock("SUTTON_SIGNWRITING",
2925                              "SUTTON SIGNWRITING",
2926                              "SUTTONSIGNWRITING");
2927 
2928         /**
2929          * Constant for the "Supplemental Symbols and Pictographs" Unicode
2930          * character block.
2931          * @since 9
2932          */
2933         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2934             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2935                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2936                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2937 
2938         /**
2939          * Constant for the "CJK Unified Ideographs Extension E" Unicode
2940          * character block.
2941          * @since 9
2942          */
2943         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2944             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2945                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2946                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2947 
2948         /**
2949          * Constant for the "Syriac Supplement" Unicode
2950          * character block.
2951          * @since 11
2952          */
2953         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2954             new UnicodeBlock("SYRIAC_SUPPLEMENT",
2955                              "SYRIAC SUPPLEMENT",
2956                              "SYRIACSUPPLEMENT");
2957 
2958         /**
2959          * Constant for the "Cyrillic Extended-C" Unicode
2960          * character block.
2961          * @since 11
2962          */
2963         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2964             new UnicodeBlock("CYRILLIC_EXTENDED_C",
2965                              "CYRILLIC EXTENDED-C",
2966                              "CYRILLICEXTENDED-C");
2967 
2968         /**
2969          * Constant for the "Osage" Unicode
2970          * character block.
2971          * @since 11
2972          */
2973         public static final UnicodeBlock OSAGE =
2974             new UnicodeBlock("OSAGE");
2975 
2976         /**
2977          * Constant for the "Newa" Unicode
2978          * character block.
2979          * @since 11
2980          */
2981         public static final UnicodeBlock NEWA =
2982             new UnicodeBlock("NEWA");
2983 
2984         /**
2985          * Constant for the "Mongolian Supplement" Unicode
2986          * character block.
2987          * @since 11
2988          */
2989         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2990             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2991                              "MONGOLIAN SUPPLEMENT",
2992                              "MONGOLIANSUPPLEMENT");
2993 
2994         /**
2995          * Constant for the "Marchen" Unicode
2996          * character block.
2997          * @since 11
2998          */
2999         public static final UnicodeBlock MARCHEN =
3000             new UnicodeBlock("MARCHEN");
3001 
3002         /**
3003          * Constant for the "Ideographic Symbols and Punctuation" Unicode
3004          * character block.
3005          * @since 11
3006          */
3007         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3008             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3009                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3010                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3011 
3012         /**
3013          * Constant for the "Tangut" Unicode
3014          * character block.
3015          * @since 11
3016          */
3017         public static final UnicodeBlock TANGUT =
3018             new UnicodeBlock("TANGUT");
3019 
3020         /**
3021          * Constant for the "Tangut Components" Unicode
3022          * character block.
3023          * @since 11
3024          */
3025         public static final UnicodeBlock TANGUT_COMPONENTS =
3026             new UnicodeBlock("TANGUT_COMPONENTS",
3027                              "TANGUT COMPONENTS",
3028                              "TANGUTCOMPONENTS");
3029 
3030         /**
3031          * Constant for the "Kana Extended-A" Unicode
3032          * character block.
3033          * @since 11
3034          */
3035         public static final UnicodeBlock KANA_EXTENDED_A =
3036             new UnicodeBlock("KANA_EXTENDED_A",
3037                              "KANA EXTENDED-A",
3038                              "KANAEXTENDED-A");
3039         /**
3040          * Constant for the "Glagolitic Supplement" Unicode
3041          * character block.
3042          * @since 11
3043          */
3044         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3045             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3046                              "GLAGOLITIC SUPPLEMENT",
3047                              "GLAGOLITICSUPPLEMENT");
3048         /**
3049          * Constant for the "Adlam" Unicode
3050          * character block.
3051          * @since 11
3052          */
3053         public static final UnicodeBlock ADLAM =
3054             new UnicodeBlock("ADLAM");
3055 
3056         /**
3057          * Constant for the "Masaram Gondi" Unicode
3058          * character block.
3059          * @since 11
3060          */
3061         public static final UnicodeBlock MASARAM_GONDI =
3062             new UnicodeBlock("MASARAM_GONDI",
3063                              "MASARAM GONDI",
3064                              "MASARAMGONDI");
3065 
3066         /**
3067          * Constant for the "Zanabazar Square" Unicode
3068          * character block.
3069          * @since 11
3070          */
3071         public static final UnicodeBlock ZANABAZAR_SQUARE =
3072             new UnicodeBlock("ZANABAZAR_SQUARE",
3073                              "ZANABAZAR SQUARE",
3074                              "ZANABAZARSQUARE");
3075 
3076         /**
3077          * Constant for the "Nushu" Unicode
3078          * character block.
3079          * @since 11
3080          */
3081         public static final UnicodeBlock NUSHU =
3082             new UnicodeBlock("NUSHU");
3083 
3084         /**
3085          * Constant for the "Soyombo" Unicode
3086          * character block.
3087          * @since 11
3088          */
3089         public static final UnicodeBlock SOYOMBO =
3090             new UnicodeBlock("SOYOMBO");
3091 
3092         /**
3093          * Constant for the "Bhaiksuki" Unicode
3094          * character block.
3095          * @since 11
3096          */
3097         public static final UnicodeBlock BHAIKSUKI =
3098             new UnicodeBlock("BHAIKSUKI");
3099 
3100         /**
3101          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3102          * character block.
3103          * @since 11
3104          */
3105         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3106             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3107                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3108                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3109 
3110         private static final int blockStarts[] = {
3111             0x0000,   // 0000..007F; Basic Latin
3112             0x0080,   // 0080..00FF; Latin-1 Supplement
3113             0x0100,   // 0100..017F; Latin Extended-A
3114             0x0180,   // 0180..024F; Latin Extended-B
3115             0x0250,   // 0250..02AF; IPA Extensions
3116             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3117             0x0300,   // 0300..036F; Combining Diacritical Marks
3118             0x0370,   // 0370..03FF; Greek and Coptic
3119             0x0400,   // 0400..04FF; Cyrillic
3120             0x0500,   // 0500..052F; Cyrillic Supplement
3121             0x0530,   // 0530..058F; Armenian
3122             0x0590,   // 0590..05FF; Hebrew
3123             0x0600,   // 0600..06FF; Arabic
3124             0x0700,   // 0700..074F; Syriac
3125             0x0750,   // 0750..077F; Arabic Supplement
3126             0x0780,   // 0780..07BF; Thaana
3127             0x07C0,   // 07C0..07FF; NKo
3128             0x0800,   // 0800..083F; Samaritan
3129             0x0840,   // 0840..085F; Mandaic
3130             0x0860,   // 0860..086F; Syriac Supplement
3131             0x0870,   //             unassigned
3132             0x08A0,   // 08A0..08FF; Arabic Extended-A
3133             0x0900,   // 0900..097F; Devanagari
3134             0x0980,   // 0980..09FF; Bengali
3135             0x0A00,   // 0A00..0A7F; Gurmukhi
3136             0x0A80,   // 0A80..0AFF; Gujarati
3137             0x0B00,   // 0B00..0B7F; Oriya
3138             0x0B80,   // 0B80..0BFF; Tamil
3139             0x0C00,   // 0C00..0C7F; Telugu
3140             0x0C80,   // 0C80..0CFF; Kannada
3141             0x0D00,   // 0D00..0D7F; Malayalam
3142             0x0D80,   // 0D80..0DFF; Sinhala
3143             0x0E00,   // 0E00..0E7F; Thai
3144             0x0E80,   // 0E80..0EFF; Lao
3145             0x0F00,   // 0F00..0FFF; Tibetan
3146             0x1000,   // 1000..109F; Myanmar
3147             0x10A0,   // 10A0..10FF; Georgian
3148             0x1100,   // 1100..11FF; Hangul Jamo
3149             0x1200,   // 1200..137F; Ethiopic
3150             0x1380,   // 1380..139F; Ethiopic Supplement
3151             0x13A0,   // 13A0..13FF; Cherokee
3152             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3153             0x1680,   // 1680..169F; Ogham
3154             0x16A0,   // 16A0..16FF; Runic
3155             0x1700,   // 1700..171F; Tagalog
3156             0x1720,   // 1720..173F; Hanunoo
3157             0x1740,   // 1740..175F; Buhid
3158             0x1760,   // 1760..177F; Tagbanwa
3159             0x1780,   // 1780..17FF; Khmer
3160             0x1800,   // 1800..18AF; Mongolian
3161             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3162             0x1900,   // 1900..194F; Limbu
3163             0x1950,   // 1950..197F; Tai Le
3164             0x1980,   // 1980..19DF; New Tai Lue
3165             0x19E0,   // 19E0..19FF; Khmer Symbols
3166             0x1A00,   // 1A00..1A1F; Buginese
3167             0x1A20,   // 1A20..1AAF; Tai Tham
3168             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3169             0x1B00,   // 1B00..1B7F; Balinese
3170             0x1B80,   // 1B80..1BBF; Sundanese
3171             0x1BC0,   // 1BC0..1BFF; Batak
3172             0x1C00,   // 1C00..1C4F; Lepcha
3173             0x1C50,   // 1C50..1C7F; Ol Chiki
3174             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3175             0x1C90,   //             unassigned
3176             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3177             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3178             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3179             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3180             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3181             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3182             0x1F00,   // 1F00..1FFF; Greek Extended
3183             0x2000,   // 2000..206F; General Punctuation
3184             0x2070,   // 2070..209F; Superscripts and Subscripts
3185             0x20A0,   // 20A0..20CF; Currency Symbols
3186             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3187             0x2100,   // 2100..214F; Letterlike Symbols
3188             0x2150,   // 2150..218F; Number Forms
3189             0x2190,   // 2190..21FF; Arrows
3190             0x2200,   // 2200..22FF; Mathematical Operators
3191             0x2300,   // 2300..23FF; Miscellaneous Technical
3192             0x2400,   // 2400..243F; Control Pictures
3193             0x2440,   // 2440..245F; Optical Character Recognition
3194             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3195             0x2500,   // 2500..257F; Box Drawing
3196             0x2580,   // 2580..259F; Block Elements
3197             0x25A0,   // 25A0..25FF; Geometric Shapes
3198             0x2600,   // 2600..26FF; Miscellaneous Symbols
3199             0x2700,   // 2700..27BF; Dingbats
3200             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3201             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3202             0x2800,   // 2800..28FF; Braille Patterns
3203             0x2900,   // 2900..297F; Supplemental Arrows-B
3204             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3205             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3206             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3207             0x2C00,   // 2C00..2C5F; Glagolitic
3208             0x2C60,   // 2C60..2C7F; Latin Extended-C
3209             0x2C80,   // 2C80..2CFF; Coptic
3210             0x2D00,   // 2D00..2D2F; Georgian Supplement
3211             0x2D30,   // 2D30..2D7F; Tifinagh
3212             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3213             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3214             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3215             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3216             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3217             0x2FE0,   //             unassigned
3218             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3219             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3220             0x3040,   // 3040..309F; Hiragana
3221             0x30A0,   // 30A0..30FF; Katakana
3222             0x3100,   // 3100..312F; Bopomofo
3223             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3224             0x3190,   // 3190..319F; Kanbun
3225             0x31A0,   // 31A0..31BF; Bopomofo Extended
3226             0x31C0,   // 31C0..31EF; CJK Strokes
3227             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3228             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3229             0x3300,   // 3300..33FF; CJK Compatibility
3230             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3231             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3232             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3233             0xA000,   // A000..A48F; Yi Syllables
3234             0xA490,   // A490..A4CF; Yi Radicals
3235             0xA4D0,   // A4D0..A4FF; Lisu
3236             0xA500,   // A500..A63F; Vai
3237             0xA640,   // A640..A69F; Cyrillic Extended-B
3238             0xA6A0,   // A6A0..A6FF; Bamum
3239             0xA700,   // A700..A71F; Modifier Tone Letters
3240             0xA720,   // A720..A7FF; Latin Extended-D
3241             0xA800,   // A800..A82F; Syloti Nagri
3242             0xA830,   // A830..A83F; Common Indic Number Forms
3243             0xA840,   // A840..A87F; Phags-pa
3244             0xA880,   // A880..A8DF; Saurashtra
3245             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3246             0xA900,   // A900..A92F; Kayah Li
3247             0xA930,   // A930..A95F; Rejang
3248             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3249             0xA980,   // A980..A9DF; Javanese
3250             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3251             0xAA00,   // AA00..AA5F; Cham
3252             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3253             0xAA80,   // AA80..AADF; Tai Viet
3254             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3255             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3256             0xAB30,   // AB30..AB6F; Latin Extended-E
3257             0xAB70,   // AB70..ABBF; Cherokee Supplement
3258             0xABC0,   // ABC0..ABFF; Meetei Mayek
3259             0xAC00,   // AC00..D7AF; Hangul Syllables
3260             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3261             0xD800,   // D800..DB7F; High Surrogates
3262             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3263             0xDC00,   // DC00..DFFF; Low Surrogates
3264             0xE000,   // E000..F8FF; Private Use Area
3265             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3266             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3267             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3268             0xFE00,   // FE00..FE0F; Variation Selectors
3269             0xFE10,   // FE10..FE1F; Vertical Forms
3270             0xFE20,   // FE20..FE2F; Combining Half Marks
3271             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3272             0xFE50,   // FE50..FE6F; Small Form Variants
3273             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3274             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3275             0xFFF0,   // FFF0..FFFF; Specials
3276             0x10000,  // 10000..1007F; Linear B Syllabary
3277             0x10080,  // 10080..100FF; Linear B Ideograms
3278             0x10100,  // 10100..1013F; Aegean Numbers
3279             0x10140,  // 10140..1018F; Ancient Greek Numbers
3280             0x10190,  // 10190..101CF; Ancient Symbols
3281             0x101D0,  // 101D0..101FF; Phaistos Disc
3282             0x10200,  //               unassigned
3283             0x10280,  // 10280..1029F; Lycian
3284             0x102A0,  // 102A0..102DF; Carian
3285             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3286             0x10300,  // 10300..1032F; Old Italic
3287             0x10330,  // 10330..1034F; Gothic
3288             0x10350,  // 10350..1037F; Old Permic
3289             0x10380,  // 10380..1039F; Ugaritic
3290             0x103A0,  // 103A0..103DF; Old Persian
3291             0x103E0,  //               unassigned
3292             0x10400,  // 10400..1044F; Deseret
3293             0x10450,  // 10450..1047F; Shavian
3294             0x10480,  // 10480..104AF; Osmanya
3295             0x104B0,  // 104B0..104FF; Osage
3296             0x10500,  // 10500..1052F; Elbasan
3297             0x10530,  // 10530..1056F; Caucasian Albanian
3298             0x10570,  //               unassigned
3299             0x10600,  // 10600..1077F; Linear A
3300             0x10780,  //               unassigned
3301             0x10800,  // 10800..1083F; Cypriot Syllabary
3302             0x10840,  // 10840..1085F; Imperial Aramaic
3303             0x10860,  // 10860..1087F; Palmyrene
3304             0x10880,  // 10880..108AF; Nabataean
3305             0x108B0,  //               unassigned
3306             0x108E0,  // 108E0..108FF; Hatran
3307             0x10900,  // 10900..1091F; Phoenician
3308             0x10920,  // 10920..1093F; Lydian
3309             0x10940,  //               unassigned
3310             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3311             0x109A0,  // 109A0..109FF; Meroitic Cursive
3312             0x10A00,  // 10A00..10A5F; Kharoshthi
3313             0x10A60,  // 10A60..10A7F; Old South Arabian
3314             0x10A80,  // 10A80..10A9F; Old North Arabian
3315             0x10AA0,  //               unassigned
3316             0x10AC0,  // 10AC0..10AFF; Manichaean
3317             0x10B00,  // 10B00..10B3F; Avestan
3318             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3319             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3320             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3321             0x10BB0,  //               unassigned
3322             0x10C00,  // 10C00..10C4F; Old Turkic
3323             0x10C50,  //               unassigned
3324             0x10C80,  // 10C80..10CFF; Old Hungarian
3325             0x10D00,  //               unassigned
3326             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3327             0x10E80,  //               unassigned
3328             0x11000,  // 11000..1107F; Brahmi
3329             0x11080,  // 11080..110CF; Kaithi
3330             0x110D0,  // 110D0..110FF; Sora Sompeng
3331             0x11100,  // 11100..1114F; Chakma
3332             0x11150,  // 11150..1117F; Mahajani
3333             0x11180,  // 11180..111DF; Sharada
3334             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3335             0x11200,  // 11200..1124F; Khojki
3336             0x11250,  //               unassigned
3337             0x11280,  // 11280..112AF; Multani
3338             0x112B0,  // 112B0..112FF; Khudawadi
3339             0x11300,  // 11300..1137F; Grantha
3340             0x11380,  //               unassigned
3341             0x11400,  // 11400..1147F; Newa
3342             0x11480,  // 11480..114DF; Tirhuta
3343             0x114E0,  //               unassigned
3344             0x11580,  // 11580..115FF; Siddham
3345             0x11600,  // 11600..1165F; Modi
3346             0x11660, //  11660..1167F; Mongolian Supplement
3347             0x11680,  // 11680..116CF; Takri
3348             0x116D0,  //               unassigned
3349             0x11700,  // 11700..1173F; Ahom
3350             0x11740,  //               unassigned
3351             0x118A0,  // 118A0..118FF; Warang Citi
3352             0x11900,  //               unassigned
3353             0x11A00,  // 11A00..11A4F; Zanabazar Square
3354             0x11A50,  // 11A50..11AAF; Soyombo
3355             0x11AB0,  //               unassigned
3356             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3357             0x11B00,  //               unassigned
3358             0x11C00,  // 11C00..11C6F; Bhaiksuki
3359             0x11C70,  // 11C70..11CBF; Marchen
3360             0x11CC0,  //               unassigned
3361             0x11D00,  // 11D00..11D5F; Masaram Gondi
3362             0x11D60,  //               unassigned
3363             0x12000,  // 12000..123FF; Cuneiform
3364             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3365             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3366             0x12550,  //               unassigned
3367             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3368             0x13430,  //               unassigned
3369             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3370             0x14680,  //               unassigned
3371             0x16800,  // 16800..16A3F; Bamum Supplement
3372             0x16A40,  // 16A40..16A6F; Mro
3373             0x16A70,  //               unassigned
3374             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3375             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3376             0x16B90,  //               unassigned
3377             0x16F00,  // 16F00..16F9F; Miao
3378             0x16FA0,  //               unassigned
3379             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3380             0x17000,  // 17000..187FF; Tangut
3381             0x18800,  // 18800..18AFF; Tangut Components
3382             0x18B00,  //               unassigned
3383             0x1B000,  // 1B000..1B0FF; Kana Supplement
3384             0x1B100,  // 1B100..1B12F; Kana Extended-A
3385             0x1B130,  //               unassigned
3386             0x1B170,  // 1B170..1B2FF; Nushu
3387             0x1B300,  //               unassigned
3388             0x1BC00,  // 1BC00..1BC9F; Duployan
3389             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3390             0x1BCB0,  //               unassigned
3391             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3392             0x1D100,  // 1D100..1D1FF; Musical Symbols
3393             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3394             0x1D250,  //               unassigned
3395             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3396             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3397             0x1D380,  //               unassigned
3398             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3399             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3400             0x1DAB0,  //               unassigned
3401             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3402             0x1E030,  //               unassigned
3403             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3404             0x1E8E0,  //               unassigned
3405             0x1E900,  // 1E900..1E95F; Adlam
3406             0x1E960,  //               unassigned
3407             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3408             0x1EF00,  //               unassigned
3409             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3410             0x1F030,  // 1F030..1F09F; Domino Tiles
3411             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3412             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3413             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3414             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3415             0x1F600,  // 1F600..1F64F; Emoticons
3416             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3417             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3418             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3419             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3420             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3421             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3422             0x1FA00,  //               unassigned
3423             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3424             0x2A6E0,  //               unassigned
3425             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3426             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3427             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3428             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3429             0x2EBF0,  //               unassigned
3430             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3431             0x2FA20,  //               unassigned
3432             0xE0000,  // E0000..E007F; Tags
3433             0xE0080,  //               unassigned
3434             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3435             0xE01F0,  //               unassigned
3436             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3437             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
3438         };
3439 
3440         private static final UnicodeBlock[] blocks = {
3441             BASIC_LATIN,
3442             LATIN_1_SUPPLEMENT,
3443             LATIN_EXTENDED_A,
3444             LATIN_EXTENDED_B,
3445             IPA_EXTENSIONS,
3446             SPACING_MODIFIER_LETTERS,
3447             COMBINING_DIACRITICAL_MARKS,
3448             GREEK,
3449             CYRILLIC,
3450             CYRILLIC_SUPPLEMENTARY,
3451             ARMENIAN,
3452             HEBREW,
3453             ARABIC,
3454             SYRIAC,
3455             ARABIC_SUPPLEMENT,
3456             THAANA,
3457             NKO,
3458             SAMARITAN,
3459             MANDAIC,
3460             SYRIAC_SUPPLEMENT,
3461             null,
3462             ARABIC_EXTENDED_A,
3463             DEVANAGARI,
3464             BENGALI,
3465             GURMUKHI,
3466             GUJARATI,
3467             ORIYA,
3468             TAMIL,
3469             TELUGU,
3470             KANNADA,
3471             MALAYALAM,
3472             SINHALA,
3473             THAI,
3474             LAO,
3475             TIBETAN,
3476             MYANMAR,
3477             GEORGIAN,
3478             HANGUL_JAMO,
3479             ETHIOPIC,
3480             ETHIOPIC_SUPPLEMENT,
3481             CHEROKEE,
3482             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3483             OGHAM,
3484             RUNIC,
3485             TAGALOG,
3486             HANUNOO,
3487             BUHID,
3488             TAGBANWA,
3489             KHMER,
3490             MONGOLIAN,
3491             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3492             LIMBU,
3493             TAI_LE,
3494             NEW_TAI_LUE,
3495             KHMER_SYMBOLS,
3496             BUGINESE,
3497             TAI_THAM,
3498             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3499             BALINESE,
3500             SUNDANESE,
3501             BATAK,
3502             LEPCHA,
3503             OL_CHIKI,
3504             CYRILLIC_EXTENDED_C,
3505             null,
3506             SUNDANESE_SUPPLEMENT,
3507             VEDIC_EXTENSIONS,
3508             PHONETIC_EXTENSIONS,
3509             PHONETIC_EXTENSIONS_SUPPLEMENT,
3510             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3511             LATIN_EXTENDED_ADDITIONAL,
3512             GREEK_EXTENDED,
3513             GENERAL_PUNCTUATION,
3514             SUPERSCRIPTS_AND_SUBSCRIPTS,
3515             CURRENCY_SYMBOLS,
3516             COMBINING_MARKS_FOR_SYMBOLS,
3517             LETTERLIKE_SYMBOLS,
3518             NUMBER_FORMS,
3519             ARROWS,
3520             MATHEMATICAL_OPERATORS,
3521             MISCELLANEOUS_TECHNICAL,
3522             CONTROL_PICTURES,
3523             OPTICAL_CHARACTER_RECOGNITION,
3524             ENCLOSED_ALPHANUMERICS,
3525             BOX_DRAWING,
3526             BLOCK_ELEMENTS,
3527             GEOMETRIC_SHAPES,
3528             MISCELLANEOUS_SYMBOLS,
3529             DINGBATS,
3530             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3531             SUPPLEMENTAL_ARROWS_A,
3532             BRAILLE_PATTERNS,
3533             SUPPLEMENTAL_ARROWS_B,
3534             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3535             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3536             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3537             GLAGOLITIC,
3538             LATIN_EXTENDED_C,
3539             COPTIC,
3540             GEORGIAN_SUPPLEMENT,
3541             TIFINAGH,
3542             ETHIOPIC_EXTENDED,
3543             CYRILLIC_EXTENDED_A,
3544             SUPPLEMENTAL_PUNCTUATION,
3545             CJK_RADICALS_SUPPLEMENT,
3546             KANGXI_RADICALS,
3547             null,
3548             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3549             CJK_SYMBOLS_AND_PUNCTUATION,
3550             HIRAGANA,
3551             KATAKANA,
3552             BOPOMOFO,
3553             HANGUL_COMPATIBILITY_JAMO,
3554             KANBUN,
3555             BOPOMOFO_EXTENDED,
3556             CJK_STROKES,
3557             KATAKANA_PHONETIC_EXTENSIONS,
3558             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3559             CJK_COMPATIBILITY,
3560             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3561             YIJING_HEXAGRAM_SYMBOLS,
3562             CJK_UNIFIED_IDEOGRAPHS,
3563             YI_SYLLABLES,
3564             YI_RADICALS,
3565             LISU,
3566             VAI,
3567             CYRILLIC_EXTENDED_B,
3568             BAMUM,
3569             MODIFIER_TONE_LETTERS,
3570             LATIN_EXTENDED_D,
3571             SYLOTI_NAGRI,
3572             COMMON_INDIC_NUMBER_FORMS,
3573             PHAGS_PA,
3574             SAURASHTRA,
3575             DEVANAGARI_EXTENDED,
3576             KAYAH_LI,
3577             REJANG,
3578             HANGUL_JAMO_EXTENDED_A,
3579             JAVANESE,
3580             MYANMAR_EXTENDED_B,
3581             CHAM,
3582             MYANMAR_EXTENDED_A,
3583             TAI_VIET,
3584             MEETEI_MAYEK_EXTENSIONS,
3585             ETHIOPIC_EXTENDED_A,
3586             LATIN_EXTENDED_E,
3587             CHEROKEE_SUPPLEMENT,
3588             MEETEI_MAYEK,
3589             HANGUL_SYLLABLES,
3590             HANGUL_JAMO_EXTENDED_B,
3591             HIGH_SURROGATES,
3592             HIGH_PRIVATE_USE_SURROGATES,
3593             LOW_SURROGATES,
3594             PRIVATE_USE_AREA,
3595             CJK_COMPATIBILITY_IDEOGRAPHS,
3596             ALPHABETIC_PRESENTATION_FORMS,
3597             ARABIC_PRESENTATION_FORMS_A,
3598             VARIATION_SELECTORS,
3599             VERTICAL_FORMS,
3600             COMBINING_HALF_MARKS,
3601             CJK_COMPATIBILITY_FORMS,
3602             SMALL_FORM_VARIANTS,
3603             ARABIC_PRESENTATION_FORMS_B,
3604             HALFWIDTH_AND_FULLWIDTH_FORMS,
3605             SPECIALS,
3606             LINEAR_B_SYLLABARY,
3607             LINEAR_B_IDEOGRAMS,
3608             AEGEAN_NUMBERS,
3609             ANCIENT_GREEK_NUMBERS,
3610             ANCIENT_SYMBOLS,
3611             PHAISTOS_DISC,
3612             null,
3613             LYCIAN,
3614             CARIAN,
3615             COPTIC_EPACT_NUMBERS,
3616             OLD_ITALIC,
3617             GOTHIC,
3618             OLD_PERMIC,
3619             UGARITIC,
3620             OLD_PERSIAN,
3621             null,
3622             DESERET,
3623             SHAVIAN,
3624             OSMANYA,
3625             OSAGE,
3626             ELBASAN,
3627             CAUCASIAN_ALBANIAN,
3628             null,
3629             LINEAR_A,
3630             null,
3631             CYPRIOT_SYLLABARY,
3632             IMPERIAL_ARAMAIC,
3633             PALMYRENE,
3634             NABATAEAN,
3635             null,
3636             HATRAN,
3637             PHOENICIAN,
3638             LYDIAN,
3639             null,
3640             MEROITIC_HIEROGLYPHS,
3641             MEROITIC_CURSIVE,
3642             KHAROSHTHI,
3643             OLD_SOUTH_ARABIAN,
3644             OLD_NORTH_ARABIAN,
3645             null,
3646             MANICHAEAN,
3647             AVESTAN,
3648             INSCRIPTIONAL_PARTHIAN,
3649             INSCRIPTIONAL_PAHLAVI,
3650             PSALTER_PAHLAVI,
3651             null,
3652             OLD_TURKIC,
3653             null,
3654             OLD_HUNGARIAN,
3655             null,
3656             RUMI_NUMERAL_SYMBOLS,
3657             null,
3658             BRAHMI,
3659             KAITHI,
3660             SORA_SOMPENG,
3661             CHAKMA,
3662             MAHAJANI,
3663             SHARADA,
3664             SINHALA_ARCHAIC_NUMBERS,
3665             KHOJKI,
3666             null,
3667             MULTANI,
3668             KHUDAWADI,
3669             GRANTHA,
3670             null,
3671             NEWA,
3672             TIRHUTA,
3673             null,
3674             SIDDHAM,
3675             MODI,
3676             MONGOLIAN_SUPPLEMENT,
3677             TAKRI,
3678             null,
3679             AHOM,
3680             null,
3681             WARANG_CITI,
3682             null,
3683             ZANABAZAR_SQUARE,
3684             SOYOMBO,
3685             null,
3686             PAU_CIN_HAU,
3687             null,
3688             BHAIKSUKI,
3689             MARCHEN,
3690             null,
3691             MASARAM_GONDI,
3692             null,
3693             CUNEIFORM,
3694             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3695             EARLY_DYNASTIC_CUNEIFORM,
3696             null,
3697             EGYPTIAN_HIEROGLYPHS,
3698             null,
3699             ANATOLIAN_HIEROGLYPHS,
3700             null,
3701             BAMUM_SUPPLEMENT,
3702             MRO,
3703             null,
3704             BASSA_VAH,
3705             PAHAWH_HMONG,
3706             null,
3707             MIAO,
3708             null,
3709             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3710             TANGUT,
3711             TANGUT_COMPONENTS,
3712             null,
3713             KANA_SUPPLEMENT,
3714             KANA_EXTENDED_A,
3715             null,
3716             NUSHU,
3717             null,
3718             DUPLOYAN,
3719             SHORTHAND_FORMAT_CONTROLS,
3720             null,
3721             BYZANTINE_MUSICAL_SYMBOLS,
3722             MUSICAL_SYMBOLS,
3723             ANCIENT_GREEK_MUSICAL_NOTATION,
3724             null,
3725             TAI_XUAN_JING_SYMBOLS,
3726             COUNTING_ROD_NUMERALS,
3727             null,
3728             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3729             SUTTON_SIGNWRITING,
3730             null,
3731             GLAGOLITIC_SUPPLEMENT,
3732             null,
3733             MENDE_KIKAKUI,
3734             null,
3735             ADLAM,
3736             null,
3737             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3738             null,
3739             MAHJONG_TILES,
3740             DOMINO_TILES,
3741             PLAYING_CARDS,
3742             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3743             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3744             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3745             EMOTICONS,
3746             ORNAMENTAL_DINGBATS,
3747             TRANSPORT_AND_MAP_SYMBOLS,
3748             ALCHEMICAL_SYMBOLS,
3749             GEOMETRIC_SHAPES_EXTENDED,
3750             SUPPLEMENTAL_ARROWS_C,
3751             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3752             null,
3753             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3754             null,
3755             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3756             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3757             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3758             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3759             null,
3760             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3761             null,
3762             TAGS,
3763             null,
3764             VARIATION_SELECTORS_SUPPLEMENT,
3765             null,
3766             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3767             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3768         };
3769 
3770 
3771         /**
3772          * Returns the object representing the Unicode block containing the
3773          * given character, or {@code null} if the character is not a
3774          * member of a defined block.
3775          *
3776          * <p><b>Note:</b> This method cannot handle
3777          * <a href="Character.html#supplementary"> supplementary
3778          * characters</a>.  To support all Unicode characters, including
3779          * supplementary characters, use the {@link #of(int)} method.
3780          *
3781          * @param   c  The character in question
3782          * @return  The {@code UnicodeBlock} instance representing the
3783          *          Unicode block of which this character is a member, or
3784          *          {@code null} if the character is not a member of any
3785          *          Unicode block
3786          */
3787         public static UnicodeBlock of(char c) {
3788             return of((int)c);
3789         }
3790 
3791         /**
3792          * Returns the object representing the Unicode block
3793          * containing the given character (Unicode code point), or
3794          * {@code null} if the character is not a member of a
3795          * defined block.
3796          *
3797          * @param   codePoint the character (Unicode code point) in question.
3798          * @return  The {@code UnicodeBlock} instance representing the
3799          *          Unicode block of which this character is a member, or
3800          *          {@code null} if the character is not a member of any
3801          *          Unicode block
3802          * @throws  IllegalArgumentException if the specified
3803          * {@code codePoint} is an invalid Unicode code point.
3804          * @see Character#isValidCodePoint(int)
3805          * @since   1.5
3806          */
3807         public static UnicodeBlock of(int codePoint) {
3808             if (!isValidCodePoint(codePoint)) {
3809                 throw new IllegalArgumentException(
3810                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
3811             }
3812 
3813             int top, bottom, current;
3814             bottom = 0;
3815             top = blockStarts.length;
3816             current = top/2;
3817 
3818             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3819             while (top - bottom > 1) {
3820                 if (codePoint >= blockStarts[current]) {
3821                     bottom = current;
3822                 } else {
3823                     top = current;
3824                 }
3825                 current = (top + bottom) / 2;
3826             }
3827             return blocks[current];
3828         }
3829 
3830         /**
3831          * Returns the UnicodeBlock with the given name. Block
3832          * names are determined by The Unicode Standard. The file
3833          * {@code Blocks-<version>.txt} defines blocks for a particular
3834          * version of the standard. The {@link Character} class specifies
3835          * the version of the standard that it supports.
3836          * <p>
3837          * This method accepts block names in the following forms:
3838          * <ol>
3839          * <li> Canonical block names as defined by the Unicode Standard.
3840          * For example, the standard defines a "Basic Latin" block. Therefore, this
3841          * method accepts "Basic Latin" as a valid block name. The documentation of
3842          * each UnicodeBlock provides the canonical name.
3843          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3844          * is a valid block name for the "Basic Latin" block.
3845          * <li>The text representation of each constant UnicodeBlock identifier.
3846          * For example, this method will return the {@link #BASIC_LATIN} block if
3847          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3848          * hyphens in the canonical name with underscores.
3849          * </ol>
3850          * Finally, character case is ignored for all of the valid block name forms.
3851          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3852          * The en_US locale's case mapping rules are used to provide case-insensitive
3853          * string comparisons for block name validation.
3854          * <p>
3855          * If the Unicode Standard changes block names, both the previous and
3856          * current names will be accepted.
3857          *
3858          * @param blockName A {@code UnicodeBlock} name.
3859          * @return The {@code UnicodeBlock} instance identified
3860          *         by {@code blockName}
3861          * @throws IllegalArgumentException if {@code blockName} is an
3862          *         invalid name
3863          * @throws NullPointerException if {@code blockName} is null
3864          * @since 1.5
3865          */
3866         public static final UnicodeBlock forName(String blockName) {
3867             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3868             if (block == null) {
3869                 throw new IllegalArgumentException("Not a valid block name: "
3870                             + blockName);
3871             }
3872             return block;
3873         }
3874     }
3875 
3876 
3877     /**
3878      * A family of character subsets representing the character scripts
3879      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3880      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3881      * character is assigned to a single Unicode script, either a specific
3882      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3883      * one of the following three special values,
3884      * {@link Character.UnicodeScript#INHERITED Inherited},
3885      * {@link Character.UnicodeScript#COMMON Common} or
3886      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3887      *
3888      * @since 1.7
3889      */
3890     public static enum UnicodeScript {
3891         /**
3892          * Unicode script "Common".
3893          */
3894         COMMON,
3895 
3896         /**
3897          * Unicode script "Latin".
3898          */
3899         LATIN,
3900 
3901         /**
3902          * Unicode script "Greek".
3903          */
3904         GREEK,
3905 
3906         /**
3907          * Unicode script "Cyrillic".
3908          */
3909         CYRILLIC,
3910 
3911         /**
3912          * Unicode script "Armenian".
3913          */
3914         ARMENIAN,
3915 
3916         /**
3917          * Unicode script "Hebrew".
3918          */
3919         HEBREW,
3920 
3921         /**
3922          * Unicode script "Arabic".
3923          */
3924         ARABIC,
3925 
3926         /**
3927          * Unicode script "Syriac".
3928          */
3929         SYRIAC,
3930 
3931         /**
3932          * Unicode script "Thaana".
3933          */
3934         THAANA,
3935 
3936         /**
3937          * Unicode script "Devanagari".
3938          */
3939         DEVANAGARI,
3940 
3941         /**
3942          * Unicode script "Bengali".
3943          */
3944         BENGALI,
3945 
3946         /**
3947          * Unicode script "Gurmukhi".
3948          */
3949         GURMUKHI,
3950 
3951         /**
3952          * Unicode script "Gujarati".
3953          */
3954         GUJARATI,
3955 
3956         /**
3957          * Unicode script "Oriya".
3958          */
3959         ORIYA,
3960 
3961         /**
3962          * Unicode script "Tamil".
3963          */
3964         TAMIL,
3965 
3966         /**
3967          * Unicode script "Telugu".
3968          */
3969         TELUGU,
3970 
3971         /**
3972          * Unicode script "Kannada".
3973          */
3974         KANNADA,
3975 
3976         /**
3977          * Unicode script "Malayalam".
3978          */
3979         MALAYALAM,
3980 
3981         /**
3982          * Unicode script "Sinhala".
3983          */
3984         SINHALA,
3985 
3986         /**
3987          * Unicode script "Thai".
3988          */
3989         THAI,
3990 
3991         /**
3992          * Unicode script "Lao".
3993          */
3994         LAO,
3995 
3996         /**
3997          * Unicode script "Tibetan".
3998          */
3999         TIBETAN,
4000 
4001         /**
4002          * Unicode script "Myanmar".
4003          */
4004         MYANMAR,
4005 
4006         /**
4007          * Unicode script "Georgian".
4008          */
4009         GEORGIAN,
4010 
4011         /**
4012          * Unicode script "Hangul".
4013          */
4014         HANGUL,
4015 
4016         /**
4017          * Unicode script "Ethiopic".
4018          */
4019         ETHIOPIC,
4020 
4021         /**
4022          * Unicode script "Cherokee".
4023          */
4024         CHEROKEE,
4025 
4026         /**
4027          * Unicode script "Canadian_Aboriginal".
4028          */
4029         CANADIAN_ABORIGINAL,
4030 
4031         /**
4032          * Unicode script "Ogham".
4033          */
4034         OGHAM,
4035 
4036         /**
4037          * Unicode script "Runic".
4038          */
4039         RUNIC,
4040 
4041         /**
4042          * Unicode script "Khmer".
4043          */
4044         KHMER,
4045 
4046         /**
4047          * Unicode script "Mongolian".
4048          */
4049         MONGOLIAN,
4050 
4051         /**
4052          * Unicode script "Hiragana".
4053          */
4054         HIRAGANA,
4055 
4056         /**
4057          * Unicode script "Katakana".
4058          */
4059         KATAKANA,
4060 
4061         /**
4062          * Unicode script "Bopomofo".
4063          */
4064         BOPOMOFO,
4065 
4066         /**
4067          * Unicode script "Han".
4068          */
4069         HAN,
4070 
4071         /**
4072          * Unicode script "Yi".
4073          */
4074         YI,
4075 
4076         /**
4077          * Unicode script "Old_Italic".
4078          */
4079         OLD_ITALIC,
4080 
4081         /**
4082          * Unicode script "Gothic".
4083          */
4084         GOTHIC,
4085 
4086         /**
4087          * Unicode script "Deseret".
4088          */
4089         DESERET,
4090 
4091         /**
4092          * Unicode script "Inherited".
4093          */
4094         INHERITED,
4095 
4096         /**
4097          * Unicode script "Tagalog".
4098          */
4099         TAGALOG,
4100 
4101         /**
4102          * Unicode script "Hanunoo".
4103          */
4104         HANUNOO,
4105 
4106         /**
4107          * Unicode script "Buhid".
4108          */
4109         BUHID,
4110 
4111         /**
4112          * Unicode script "Tagbanwa".
4113          */
4114         TAGBANWA,
4115 
4116         /**
4117          * Unicode script "Limbu".
4118          */
4119         LIMBU,
4120 
4121         /**
4122          * Unicode script "Tai_Le".
4123          */
4124         TAI_LE,
4125 
4126         /**
4127          * Unicode script "Linear_B".
4128          */
4129         LINEAR_B,
4130 
4131         /**
4132          * Unicode script "Ugaritic".
4133          */
4134         UGARITIC,
4135 
4136         /**
4137          * Unicode script "Shavian".
4138          */
4139         SHAVIAN,
4140 
4141         /**
4142          * Unicode script "Osmanya".
4143          */
4144         OSMANYA,
4145 
4146         /**
4147          * Unicode script "Cypriot".
4148          */
4149         CYPRIOT,
4150 
4151         /**
4152          * Unicode script "Braille".
4153          */
4154         BRAILLE,
4155 
4156         /**
4157          * Unicode script "Buginese".
4158          */
4159         BUGINESE,
4160 
4161         /**
4162          * Unicode script "Coptic".
4163          */
4164         COPTIC,
4165 
4166         /**
4167          * Unicode script "New_Tai_Lue".
4168          */
4169         NEW_TAI_LUE,
4170 
4171         /**
4172          * Unicode script "Glagolitic".
4173          */
4174         GLAGOLITIC,
4175 
4176         /**
4177          * Unicode script "Tifinagh".
4178          */
4179         TIFINAGH,
4180 
4181         /**
4182          * Unicode script "Syloti_Nagri".
4183          */
4184         SYLOTI_NAGRI,
4185 
4186         /**
4187          * Unicode script "Old_Persian".
4188          */
4189         OLD_PERSIAN,
4190 
4191         /**
4192          * Unicode script "Kharoshthi".
4193          */
4194         KHAROSHTHI,
4195 
4196         /**
4197          * Unicode script "Balinese".
4198          */
4199         BALINESE,
4200 
4201         /**
4202          * Unicode script "Cuneiform".
4203          */
4204         CUNEIFORM,
4205 
4206         /**
4207          * Unicode script "Phoenician".
4208          */
4209         PHOENICIAN,
4210 
4211         /**
4212          * Unicode script "Phags_Pa".
4213          */
4214         PHAGS_PA,
4215 
4216         /**
4217          * Unicode script "Nko".
4218          */
4219         NKO,
4220 
4221         /**
4222          * Unicode script "Sundanese".
4223          */
4224         SUNDANESE,
4225 
4226         /**
4227          * Unicode script "Batak".
4228          */
4229         BATAK,
4230 
4231         /**
4232          * Unicode script "Lepcha".
4233          */
4234         LEPCHA,
4235 
4236         /**
4237          * Unicode script "Ol_Chiki".
4238          */
4239         OL_CHIKI,
4240 
4241         /**
4242          * Unicode script "Vai".
4243          */
4244         VAI,
4245 
4246         /**
4247          * Unicode script "Saurashtra".
4248          */
4249         SAURASHTRA,
4250 
4251         /**
4252          * Unicode script "Kayah_Li".
4253          */
4254         KAYAH_LI,
4255 
4256         /**
4257          * Unicode script "Rejang".
4258          */
4259         REJANG,
4260 
4261         /**
4262          * Unicode script "Lycian".
4263          */
4264         LYCIAN,
4265 
4266         /**
4267          * Unicode script "Carian".
4268          */
4269         CARIAN,
4270 
4271         /**
4272          * Unicode script "Lydian".
4273          */
4274         LYDIAN,
4275 
4276         /**
4277          * Unicode script "Cham".
4278          */
4279         CHAM,
4280 
4281         /**
4282          * Unicode script "Tai_Tham".
4283          */
4284         TAI_THAM,
4285 
4286         /**
4287          * Unicode script "Tai_Viet".
4288          */
4289         TAI_VIET,
4290 
4291         /**
4292          * Unicode script "Avestan".
4293          */
4294         AVESTAN,
4295 
4296         /**
4297          * Unicode script "Egyptian_Hieroglyphs".
4298          */
4299         EGYPTIAN_HIEROGLYPHS,
4300 
4301         /**
4302          * Unicode script "Samaritan".
4303          */
4304         SAMARITAN,
4305 
4306         /**
4307          * Unicode script "Mandaic".
4308          */
4309         MANDAIC,
4310 
4311         /**
4312          * Unicode script "Lisu".
4313          */
4314         LISU,
4315 
4316         /**
4317          * Unicode script "Bamum".
4318          */
4319         BAMUM,
4320 
4321         /**
4322          * Unicode script "Javanese".
4323          */
4324         JAVANESE,
4325 
4326         /**
4327          * Unicode script "Meetei_Mayek".
4328          */
4329         MEETEI_MAYEK,
4330 
4331         /**
4332          * Unicode script "Imperial_Aramaic".
4333          */
4334         IMPERIAL_ARAMAIC,
4335 
4336         /**
4337          * Unicode script "Old_South_Arabian".
4338          */
4339         OLD_SOUTH_ARABIAN,
4340 
4341         /**
4342          * Unicode script "Inscriptional_Parthian".
4343          */
4344         INSCRIPTIONAL_PARTHIAN,
4345 
4346         /**
4347          * Unicode script "Inscriptional_Pahlavi".
4348          */
4349         INSCRIPTIONAL_PAHLAVI,
4350 
4351         /**
4352          * Unicode script "Old_Turkic".
4353          */
4354         OLD_TURKIC,
4355 
4356         /**
4357          * Unicode script "Brahmi".
4358          */
4359         BRAHMI,
4360 
4361         /**
4362          * Unicode script "Kaithi".
4363          */
4364         KAITHI,
4365 
4366         /**
4367          * Unicode script "Meroitic Hieroglyphs".
4368          * @since 1.8
4369          */
4370         MEROITIC_HIEROGLYPHS,
4371 
4372         /**
4373          * Unicode script "Meroitic Cursive".
4374          * @since 1.8
4375          */
4376         MEROITIC_CURSIVE,
4377 
4378         /**
4379          * Unicode script "Sora Sompeng".
4380          * @since 1.8
4381          */
4382         SORA_SOMPENG,
4383 
4384         /**
4385          * Unicode script "Chakma".
4386          * @since 1.8
4387          */
4388         CHAKMA,
4389 
4390         /**
4391          * Unicode script "Sharada".
4392          * @since 1.8
4393          */
4394         SHARADA,
4395 
4396         /**
4397          * Unicode script "Takri".
4398          * @since 1.8
4399          */
4400         TAKRI,
4401 
4402         /**
4403          * Unicode script "Miao".
4404          * @since 1.8
4405          */
4406         MIAO,
4407 
4408         /**
4409          * Unicode script "Caucasian Albanian".
4410          * @since 9
4411          */
4412         CAUCASIAN_ALBANIAN,
4413 
4414         /**
4415          * Unicode script "Bassa Vah".
4416          * @since 9
4417          */
4418         BASSA_VAH,
4419 
4420         /**
4421          * Unicode script "Duployan".
4422          * @since 9
4423          */
4424         DUPLOYAN,
4425 
4426         /**
4427          * Unicode script "Elbasan".
4428          * @since 9
4429          */
4430         ELBASAN,
4431 
4432         /**
4433          * Unicode script "Grantha".
4434          * @since 9
4435          */
4436         GRANTHA,
4437 
4438         /**
4439          * Unicode script "Pahawh Hmong".
4440          * @since 9
4441          */
4442         PAHAWH_HMONG,
4443 
4444         /**
4445          * Unicode script "Khojki".
4446          * @since 9
4447          */
4448         KHOJKI,
4449 
4450         /**
4451          * Unicode script "Linear A".
4452          * @since 9
4453          */
4454         LINEAR_A,
4455 
4456         /**
4457          * Unicode script "Mahajani".
4458          * @since 9
4459          */
4460         MAHAJANI,
4461 
4462         /**
4463          * Unicode script "Manichaean".
4464          * @since 9
4465          */
4466         MANICHAEAN,
4467 
4468         /**
4469          * Unicode script "Mende Kikakui".
4470          * @since 9
4471          */
4472         MENDE_KIKAKUI,
4473 
4474         /**
4475          * Unicode script "Modi".
4476          * @since 9
4477          */
4478         MODI,
4479 
4480         /**
4481          * Unicode script "Mro".
4482          * @since 9
4483          */
4484         MRO,
4485 
4486         /**
4487          * Unicode script "Old North Arabian".
4488          * @since 9
4489          */
4490         OLD_NORTH_ARABIAN,
4491 
4492         /**
4493          * Unicode script "Nabataean".
4494          * @since 9
4495          */
4496         NABATAEAN,
4497 
4498         /**
4499          * Unicode script "Palmyrene".
4500          * @since 9
4501          */
4502         PALMYRENE,
4503 
4504         /**
4505          * Unicode script "Pau Cin Hau".
4506          * @since 9
4507          */
4508         PAU_CIN_HAU,
4509 
4510         /**
4511          * Unicode script "Old Permic".
4512          * @since 9
4513          */
4514         OLD_PERMIC,
4515 
4516         /**
4517          * Unicode script "Psalter Pahlavi".
4518          * @since 9
4519          */
4520         PSALTER_PAHLAVI,
4521 
4522         /**
4523          * Unicode script "Siddham".
4524          * @since 9
4525          */
4526         SIDDHAM,
4527 
4528         /**
4529          * Unicode script "Khudawadi".
4530          * @since 9
4531          */
4532         KHUDAWADI,
4533 
4534         /**
4535          * Unicode script "Tirhuta".
4536          * @since 9
4537          */
4538         TIRHUTA,
4539 
4540         /**
4541          * Unicode script "Warang Citi".
4542          * @since 9
4543          */
4544         WARANG_CITI,
4545 
4546          /**
4547          * Unicode script "Ahom".
4548          * @since 9
4549          */
4550         AHOM,
4551 
4552         /**
4553          * Unicode script "Anatolian Hieroglyphs".
4554          * @since 9
4555          */
4556         ANATOLIAN_HIEROGLYPHS,
4557 
4558         /**
4559          * Unicode script "Hatran".
4560          * @since 9
4561          */
4562         HATRAN,
4563 
4564         /**
4565          * Unicode script "Multani".
4566          * @since 9
4567          */
4568         MULTANI,
4569 
4570         /**
4571          * Unicode script "Old Hungarian".
4572          * @since 9
4573          */
4574         OLD_HUNGARIAN,
4575 
4576         /**
4577          * Unicode script "SignWriting".
4578          * @since 9
4579          */
4580         SIGNWRITING,
4581 
4582         /**
4583           * Unicode script "Adlam".
4584           * @since 11
4585           */
4586         ADLAM,
4587 
4588         /**
4589           * Unicode script "Bhaiksuki".
4590           * @since 11
4591           */
4592         BHAIKSUKI,
4593 
4594         /**
4595           * Unicode script "Marchen".
4596           * @since 11
4597           */
4598         MARCHEN,
4599 
4600         /**
4601           * Unicode script "Newa".
4602           * @since 11
4603           */
4604         NEWA,
4605 
4606         /**
4607           * Unicode script "Osage".
4608           * @since 11
4609           */
4610         OSAGE,
4611 
4612         /**
4613           * Unicode script "Tangut".
4614           * @since 11
4615           */
4616         TANGUT,
4617 
4618         /**
4619           * Unicode script "Masaram Gondi".
4620           * @since 11
4621           */
4622         MASARAM_GONDI,
4623 
4624         /**
4625           * Unicode script "Nushu".
4626           * @since 11
4627           */
4628         NUSHU,
4629 
4630         /**
4631           * Unicode script "Soyombo".
4632           * @since 11
4633           */
4634         SOYOMBO,
4635 
4636         /**
4637           * Unicode script "Zanabazar Square".
4638           * @since 11
4639           */
4640         ZANABAZAR_SQUARE,
4641 
4642         /**
4643          * Unicode script "Unknown".
4644          */
4645         UNKNOWN;
4646 
4647         private static final int[] scriptStarts = {
4648             0x0000,   // 0000..0040; COMMON
4649             0x0041,   // 0041..005A; LATIN
4650             0x005B,   // 005B..0060; COMMON
4651             0x0061,   // 0061..007A; LATIN
4652             0x007B,   // 007B..00A9; COMMON
4653             0x00AA,   // 00AA      ; LATIN
4654             0x00AB,   // 00AB..00B9; COMMON
4655             0x00BA,   // 00BA      ; LATIN
4656             0x00BB,   // 00BB..00BF; COMMON
4657             0x00C0,   // 00C0..00D6; LATIN
4658             0x00D7,   // 00D7      ; COMMON
4659             0x00D8,   // 00D8..00F6; LATIN
4660             0x00F7,   // 00F7      ; COMMON
4661             0x00F8,   // 00F8..02B8; LATIN
4662             0x02B9,   // 02B9..02DF; COMMON
4663             0x02E0,   // 02E0..02E4; LATIN
4664             0x02E5,   // 02E5..02E9; COMMON
4665             0x02EA,   // 02EA..02EB; BOPOMOFO
4666             0x02EC,   // 02EC..02FF; COMMON
4667             0x0300,   // 0300..036F; INHERITED
4668             0x0370,   // 0370..0373; GREEK
4669             0x0374,   // 0374      ; COMMON
4670             0x0375,   // 0375..0377; GREEK
4671             0x0378,   // 0378..0379; UNKNOWN
4672             0x037A,   // 037A..037D; GREEK
4673             0x037E,   // 037E      ; COMMON
4674             0x037F,   // 037F      ; GREEK
4675             0x0380,   // 0380..0383; UNKNOWN
4676             0x0384,   // 0384      ; GREEK
4677             0x0385,   // 0385      ; COMMON
4678             0x0386,   // 0386      ; GREEK
4679             0x0387,   // 0387      ; COMMON
4680             0x0388,   // 0388..038A; GREEK
4681             0x038B,   // 038B      ; UNKNOWN
4682             0x038C,   // 038C      ; GREEK
4683             0x038D,   // 038D      ; UNKNOWN
4684             0x038E,   // 038E..03A1; GREEK
4685             0x03A2,   // 03A2      ; UNKNOWN
4686             0x03A3,   // 03A3..03E1; GREEK
4687             0x03E2,   // 03E2..03EF; COPTIC
4688             0x03F0,   // 03F0..03FF; GREEK
4689             0x0400,   // 0400..0484; CYRILLIC
4690             0x0485,   // 0485..0486; INHERITED
4691             0x0487,   // 0487..052F; CYRILLIC
4692             0x0530,   // 0530      ; UNKNOWN
4693             0x0531,   // 0531..0556; ARMENIAN
4694             0x0557,   // 0557..0558; UNKNOWN
4695             0x0559,   // 0559..055F; ARMENIAN
4696             0x0560,   // 0560      ; UNKNOWN
4697             0x0561,   // 0561..0587; ARMENIAN
4698             0x0588,   // 0588      ; UNKNOWN
4699             0x0589,   // 0589      ; COMMON
4700             0x058A,   // 058A      ; ARMENIAN
4701             0x058B,   // 058B..058C; UNKNOWN
4702             0x058D,   // 058D..058F; ARMENIAN
4703             0x0590,   // 0590      ; UNKNOWN
4704             0x0591,   // 0591..05C7; HEBREW
4705             0x05C8,   // 05C8..05CF; UNKNOWN
4706             0x05D0,   // 05D0..05EA; HEBREW
4707             0x05EB,   // 05EB..05EF; UNKNOWN
4708             0x05F0,   // 05F0..05F4; HEBREW
4709             0x05F5,   // 05F5..05FF; UNKNOWN
4710             0x0600,   // 0600..0604; ARABIC
4711             0x0605,   // 0605      ; COMMON
4712             0x0606,   // 0606..060B; ARABIC
4713             0x060C,   // 060C      ; COMMON
4714             0x060D,   // 060D..061A; ARABIC
4715             0x061B,   // 061B      ; COMMON
4716             0x061C,   // 061C      ; ARABIC
4717             0x061D,   // 061D      ; UNKNOWN
4718             0x061E,   // 061E      ; ARABIC
4719             0x061F,   // 061F      ; COMMON
4720             0x0620,   // 0620..063F; ARABIC
4721             0x0640,   // 0640      ; COMMON
4722             0x0641,   // 0641..064A; ARABIC
4723             0x064B,   // 064B..0655; INHERITED
4724             0x0656,   // 0656..066F; ARABIC
4725             0x0670,   // 0670      ; INHERITED
4726             0x0671,   // 0671..06DC; ARABIC
4727             0x06DD,   // 06DD      ; COMMON
4728             0x06DE,   // 06DE..06FF; ARABIC
4729             0x0700,   // 0700..070D; SYRIAC
4730             0x070E,   // 070E      ; UNKNOWN
4731             0x070F,   // 070F..074A; SYRIAC
4732             0x074B,   // 074B..074C; UNKNOWN
4733             0x074D,   // 074D..074F; SYRIAC
4734             0x0750,   // 0750..077F; ARABIC
4735             0x0780,   // 0780..07B1; THAANA
4736             0x07B2,   // 07B2..07BF; UNKNOWN
4737             0x07C0,   // 07C0..07FA; NKO
4738             0x07FB,   // 07FB..07FF; UNKNOWN
4739             0x0800,   // 0800..082D; SAMARITAN
4740             0x082E,   // 082E..082F; UNKNOWN
4741             0x0830,   // 0830..083E; SAMARITAN
4742             0x083F,   // 083F      ; UNKNOWN
4743             0x0840,   // 0840..085B; MANDAIC
4744             0x085C,   // 085C..085D; UNKNOWN
4745             0x085E,   // 085E      ; MANDAIC
4746             0x085F,   // 085F      ; UNKNOWN
4747             0x0860,   // 0860..086A; SYRIAC
4748             0x086B,   // 086B..089F; UNKNOWN
4749             0x08A0,   // 08A0..08B4; ARABIC
4750             0x08B5,   // 08B5      ; UNKNOWN
4751             0x08B6,   // 08B6..08BD; ARABIC
4752             0x08BE,   // 08BE..08D3; UNKNOWN
4753             0x08D4,   // 08D4..08E1; ARABIC
4754             0x08E2,   // 08E2      ; COMMON
4755             0x08E3,   // 08E3..08FF; ARABIC
4756             0x0900,   // 0900..0950; DEVANAGARI
4757             0x0951,   // 0951..0952; INHERITED
4758             0x0953,   // 0953..0963; DEVANAGARI
4759             0x0964,   // 0964..0965; COMMON
4760             0x0966,   // 0966..097F; DEVANAGARI
4761             0x0980,   // 0980..0983; BENGALI
4762             0x0984,   // 0984      ; UNKNOWN
4763             0x0985,   // 0985..098C; BENGALI
4764             0x098D,   // 098D..098E; UNKNOWN
4765             0x098F,   // 098F..0990; BENGALI
4766             0x0991,   // 0991..0992; UNKNOWN
4767             0x0993,   // 0993..09A8; BENGALI
4768             0x09A9,   // 09A9      ; UNKNOWN
4769             0x09AA,   // 09AA..09B0; BENGALI
4770             0x09B1,   // 09B1      ; UNKNOWN
4771             0x09B2,   // 09B2      ; BENGALI
4772             0x09B3,   // 09B3..09B5; UNKNOWN
4773             0x09B6,   // 09B6..09B9; BENGALI
4774             0x09BA,   // 09BA..09BB; UNKNOWN
4775             0x09BC,   // 09BC..09C4; BENGALI
4776             0x09C5,   // 09C5..09C6; UNKNOWN
4777             0x09C7,   // 09C7..09C8; BENGALI
4778             0x09C9,   // 09C9..09CA; UNKNOWN
4779             0x09CB,   // 09CB..09CE; BENGALI
4780             0x09CF,   // 09CF..09D6; UNKNOWN
4781             0x09D7,   // 09D7      ; BENGALI
4782             0x09D8,   // 09D8..09DB; UNKNOWN
4783             0x09DC,   // 09DC..09DD; BENGALI
4784             0x09DE,   // 09DE      ; UNKNOWN
4785             0x09DF,   // 09DF..09E3; BENGALI
4786             0x09E4,   // 09E4..09E5; UNKNOWN
4787             0x09E6,   // 09E6..09FD; BENGALI
4788             0x09FE,   // 09FE..0A00; UNKNOWN
4789             0x0A01,   // 0A01..0A03; GURMUKHI
4790             0x0A04,   // 0A04      ; UNKNOWN
4791             0x0A05,   // 0A05..0A0A; GURMUKHI
4792             0x0A0B,   // 0A0B..0A0E; UNKNOWN
4793             0x0A0F,   // 0A0F..0A10; GURMUKHI
4794             0x0A11,   // 0A11..0A12; UNKNOWN
4795             0x0A13,   // 0A13..0A28; GURMUKHI
4796             0x0A29,   // 0A29      ; UNKNOWN
4797             0x0A2A,   // 0A2A..0A30; GURMUKHI
4798             0x0A31,   // 0A31      ; UNKNOWN
4799             0x0A32,   // 0A32..0A33; GURMUKHI
4800             0x0A34,   // 0A34      ; UNKNOWN
4801             0x0A35,   // 0A35..0A36; GURMUKHI
4802             0x0A37,   // 0A37      ; UNKNOWN
4803             0x0A38,   // 0A38..0A39; GURMUKHI
4804             0x0A3A,   // 0A3A..0A3B; UNKNOWN
4805             0x0A3C,   // 0A3C      ; GURMUKHI
4806             0x0A3D,   // 0A3D      ; UNKNOWN
4807             0x0A3E,   // 0A3E..0A42; GURMUKHI
4808             0x0A43,   // 0A43..0A46; UNKNOWN
4809             0x0A47,   // 0A47..0A48; GURMUKHI
4810             0x0A49,   // 0A49..0A4A; UNKNOWN
4811             0x0A4B,   // 0A4B..0A4D; GURMUKHI
4812             0x0A4E,   // 0A4E..0A50; UNKNOWN
4813             0x0A51,   // 0A51      ; GURMUKHI
4814             0x0A52,   // 0A52..0A58; UNKNOWN
4815             0x0A59,   // 0A59..0A5C; GURMUKHI
4816             0x0A5D,   // 0A5D      ; UNKNOWN
4817             0x0A5E,   // 0A5E      ; GURMUKHI
4818             0x0A5F,   // 0A5F..0A65; UNKNOWN
4819             0x0A66,   // 0A66..0A75; GURMUKHI
4820             0x0A76,   // 0A76..0A80; UNKNOWN
4821             0x0A81,   // 0A81..0A83; GUJARATI
4822             0x0A84,   // 0A84      ; UNKNOWN
4823             0x0A85,   // 0A85..0A8D; GUJARATI
4824             0x0A8E,   // 0A8E      ; UNKNOWN
4825             0x0A8F,   // 0A8F..0A91; GUJARATI
4826             0x0A92,   // 0A92      ; UNKNOWN
4827             0x0A93,   // 0A93..0AA8; GUJARATI
4828             0x0AA9,   // 0AA9      ; UNKNOWN
4829             0x0AAA,   // 0AAA..0AB0; GUJARATI
4830             0x0AB1,   // 0AB1      ; UNKNOWN
4831             0x0AB2,   // 0AB2..0AB3; GUJARATI
4832             0x0AB4,   // 0AB4      ; UNKNOWN
4833             0x0AB5,   // 0AB5..0AB9; GUJARATI
4834             0x0ABA,   // 0ABA..0ABB; UNKNOWN
4835             0x0ABC,   // 0ABC..0AC5; GUJARATI
4836             0x0AC6,   // 0AC6      ; UNKNOWN
4837             0x0AC7,   // 0AC7..0AC9; GUJARATI
4838             0x0ACA,   // 0ACA      ; UNKNOWN
4839             0x0ACB,   // 0ACB..0ACD; GUJARATI
4840             0x0ACE,   // 0ACE..0ACF; UNKNOWN
4841             0x0AD0,   // 0AD0      ; GUJARATI
4842             0x0AD1,   // 0AD1..0ADF; UNKNOWN
4843             0x0AE0,   // 0AE0..0AE3; GUJARATI
4844             0x0AE4,   // 0AE4..0AE5; UNKNOWN
4845             0x0AE6,   // 0AE6..0AF1; GUJARATI
4846             0x0AF2,   // 0AF2..0AF8; UNKNOWN
4847             0x0AF9,   // 0AF9..0AFF; GUJARATI
4848             0x0B00,   // 0B00      ; UNKNOWN
4849             0x0B01,   // 0B01..0B03; ORIYA
4850             0x0B04,   // 0B04      ; UNKNOWN
4851             0x0B05,   // 0B05..0B0C; ORIYA
4852             0x0B0D,   // 0B0D..0B0E; UNKNOWN
4853             0x0B0F,   // 0B0F..0B10; ORIYA
4854             0x0B11,   // 0B11..0B12; UNKNOWN
4855             0x0B13,   // 0B13..0B28; ORIYA
4856             0x0B29,   // 0B29      ; UNKNOWN
4857             0x0B2A,   // 0B2A..0B30; ORIYA
4858             0x0B31,   // 0B31      ; UNKNOWN
4859             0x0B32,   // 0B32..0B33; ORIYA
4860             0x0B34,   // 0B34      ; UNKNOWN
4861             0x0B35,   // 0B35..0B39; ORIYA
4862             0x0B3A,   // 0B3A..0B3B; UNKNOWN
4863             0x0B3C,   // 0B3C..0B44; ORIYA
4864             0x0B45,   // 0B45..0B46; UNKNOWN
4865             0x0B47,   // 0B47..0B48; ORIYA
4866             0x0B49,   // 0B49..0B4A; UNKNOWN
4867             0x0B4B,   // 0B4B..0B4D; ORIYA
4868             0x0B4E,   // 0B4E..0B55; UNKNOWN
4869             0x0B56,   // 0B56..0B57; ORIYA
4870             0x0B58,   // 0B58..0B5B; UNKNOWN
4871             0x0B5C,   // 0B5C..0B5D; ORIYA
4872             0x0B5E,   // 0B5E      ; UNKNOWN
4873             0x0B5F,   // 0B5F..0B63; ORIYA
4874             0x0B64,   // 0B64..0B65; UNKNOWN
4875             0x0B66,   // 0B66..0B77; ORIYA
4876             0x0B78,   // 0B78..0B81; UNKNOWN
4877             0x0B82,   // 0B82..0B83; TAMIL
4878             0x0B84,   // 0B84      ; UNKNOWN
4879             0x0B85,   // 0B85..0B8A; TAMIL
4880             0x0B8B,   // 0B8B..0B8D; UNKNOWN
4881             0x0B8E,   // 0B8E..0B90; TAMIL
4882             0x0B91,   // 0B91      ; UNKNOWN
4883             0x0B92,   // 0B92..0B95; TAMIL
4884             0x0B96,   // 0B96..0B98; UNKNOWN
4885             0x0B99,   // 0B99..0B9A; TAMIL
4886             0x0B9B,   // 0B9B      ; UNKNOWN
4887             0x0B9C,   // 0B9C      ; TAMIL
4888             0x0B9D,   // 0B9D      ; UNKNOWN
4889             0x0B9E,   // 0B9E..0B9F; TAMIL
4890             0x0BA0,   // 0BA0..0BA2; UNKNOWN
4891             0x0BA3,   // 0BA3..0BA4; TAMIL
4892             0x0BA5,   // 0BA5..0BA7; UNKNOWN
4893             0x0BA8,   // 0BA8..0BAA; TAMIL
4894             0x0BAB,   // 0BAB..0BAD; UNKNOWN
4895             0x0BAE,   // 0BAE..0BB9; TAMIL
4896             0x0BBA,   // 0BBA..0BBD; UNKNOWN
4897             0x0BBE,   // 0BBE..0BC2; TAMIL
4898             0x0BC3,   // 0BC3..0BC5; UNKNOWN
4899             0x0BC6,   // 0BC6..0BC8; TAMIL
4900             0x0BC9,   // 0BC9      ; UNKNOWN
4901             0x0BCA,   // 0BCA..0BCD; TAMIL
4902             0x0BCE,   // 0BCE..0BCF; UNKNOWN
4903             0x0BD0,   // 0BD0      ; TAMIL
4904             0x0BD1,   // 0BD1..0BD6; UNKNOWN
4905             0x0BD7,   // 0BD7      ; TAMIL
4906             0x0BD8,   // 0BD8..0BE5; UNKNOWN
4907             0x0BE6,   // 0BE6..0BFA; TAMIL
4908             0x0BFB,   // 0BFB..0BFF; UNKNOWN
4909             0x0C00,   // 0C00..0C03; TELUGU
4910             0x0C04,   // 0C04      ; UNKNOWN
4911             0x0C05,   // 0C05..0C0C; TELUGU
4912             0x0C0D,   // 0C0D      ; UNKNOWN
4913             0x0C0E,   // 0C0E..0C10; TELUGU
4914             0x0C11,   // 0C11      ; UNKNOWN
4915             0x0C12,   // 0C12..0C28; TELUGU
4916             0x0C29,   // 0C29      ; UNKNOWN
4917             0x0C2A,   // 0C2A..0C39; TELUGU
4918             0x0C3A,   // 0C3A..0C3C; UNKNOWN
4919             0x0C3D,   // 0C3D..0C44; TELUGU
4920             0x0C45,   // 0C45      ; UNKNOWN
4921             0x0C46,   // 0C46..0C48; TELUGU
4922             0x0C49,   // 0C49      ; UNKNOWN
4923             0x0C4A,   // 0C4A..0C4D; TELUGU
4924             0x0C4E,   // 0C4E..0C54; UNKNOWN
4925             0x0C55,   // 0C55..0C56; TELUGU
4926             0x0C57,   // 0C57      ; UNKNOWN
4927             0x0C58,   // 0C58..0C5A; TELUGU
4928             0x0C5B,   // 0C5B..0C5F; UNKNOWN
4929             0x0C60,   // 0C60..0C63; TELUGU
4930             0x0C64,   // 0C64..0C65; UNKNOWN
4931             0x0C66,   // 0C66..0C6F; TELUGU
4932             0x0C70,   // 0C70..0C77; UNKNOWN
4933             0x0C78,   // 0C78..0C7F; TELUGU
4934             0x0C80,   // 0C80..0C83; KANNADA
4935             0x0C84,   // 0C84      ; UNKNOWN
4936             0x0C85,   // 0C85..0C8C; KANNADA
4937             0x0C8D,   // 0C8D      ; UNKNOWN
4938             0x0C8E,   // 0C8E..0C90; KANNADA
4939             0x0C91,   // 0C91      ; UNKNOWN
4940             0x0C92,   // 0C92..0CA8; KANNADA
4941             0x0CA9,   // 0CA9      ; UNKNOWN
4942             0x0CAA,   // 0CAA..0CB3; KANNADA
4943             0x0CB4,   // 0CB4      ; UNKNOWN
4944             0x0CB5,   // 0CB5..0CB9; KANNADA
4945             0x0CBA,   // 0CBA..0CBB; UNKNOWN
4946             0x0CBC,   // 0CBC..0CC4; KANNADA
4947             0x0CC5,   // 0CC5      ; UNKNOWN
4948             0x0CC6,   // 0CC6..0CC8; KANNADA
4949             0x0CC9,   // 0CC9      ; UNKNOWN
4950             0x0CCA,   // 0CCA..0CCD; KANNADA
4951             0x0CCE,   // 0CCE..0CD4; UNKNOWN
4952             0x0CD5,   // 0CD5..0CD6; KANNADA
4953             0x0CD7,   // 0CD7..0CDD; UNKNOWN
4954             0x0CDE,   // 0CDE      ; KANNADA
4955             0x0CDF,   // 0CDF      ; UNKNOWN
4956             0x0CE0,   // 0CE0..0CE3; KANNADA
4957             0x0CE4,   // 0CE4..0CE5; UNKNOWN
4958             0x0CE6,   // 0CE6..0CEF; KANNADA
4959             0x0CF0,   // 0CF0      ; UNKNOWN
4960             0x0CF1,   // 0CF1..0CF2; KANNADA
4961             0x0CF3,   // 0CF3..0CFF; UNKNOWN
4962             0x0D00,   // 0D00..0D03; MALAYALAM
4963             0x0D04,   // 0D04      ; UNKNOWN
4964             0x0D05,   // 0D05..0D0C; MALAYALAM
4965             0x0D0D,   // 0D0D      ; UNKNOWN
4966             0x0D0E,   // 0D0E..0D10; MALAYALAM
4967             0x0D11,   // 0D11      ; UNKNOWN
4968             0x0D12,   // 0D12..0D44; MALAYALAM
4969             0x0D45,   // 0D45      ; UNKNOWN
4970             0x0D46,   // 0D46..0D48; MALAYALAM
4971             0x0D49,   // 0D49      ; UNKNOWN
4972             0x0D4A,   // 0D4A..0D4F; MALAYALAM
4973             0x0D50,   // 0D50..0D53; UNKNOWN
4974             0x0D54,   // 0D54..0D63; MALAYALAM
4975             0x0D64,   // 0D64..0D65; UNKNOWN
4976             0x0D66,   // 0D66..0D7F; MALAYALAM
4977             0x0D80,   // 0D80..0D81; UNKNOWN
4978             0x0D82,   // 0D82..0D83; SINHALA
4979             0x0D84,   // 0D84      ; UNKNOWN
4980             0x0D85,   // 0D85..0D96; SINHALA
4981             0x0D97,   // 0D97..0D99; UNKNOWN
4982             0x0D9A,   // 0D9A..0DB1; SINHALA
4983             0x0DB2,   // 0DB2      ; UNKNOWN
4984             0x0DB3,   // 0DB3..0DBB; SINHALA
4985             0x0DBC,   // 0DBC      ; UNKNOWN
4986             0x0DBD,   // 0DBD      ; SINHALA
4987             0x0DBE,   // 0DBE..0DBF; UNKNOWN
4988             0x0DC0,   // 0DC0..0DC6; SINHALA
4989             0x0DC7,   // 0DC7..0DC9; UNKNOWN
4990             0x0DCA,   // 0DCA      ; SINHALA
4991             0x0DCB,   // 0DCB..0DCE; UNKNOWN
4992             0x0DCF,   // 0DCF..0DD4; SINHALA
4993             0x0DD5,   // 0DD5      ; UNKNOWN
4994             0x0DD6,   // 0DD6      ; SINHALA
4995             0x0DD7,   // 0DD7      ; UNKNOWN
4996             0x0DD8,   // 0DD8..0DDF; SINHALA
4997             0x0DE0,   // 0DE0..0DE5; UNKNOWN
4998             0x0DE6,   // 0DE6..0DEF; SINHALA
4999             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5000             0x0DF2,   // 0DF2..0DF4; SINHALA
5001             0x0DF5,   // 0DF5..0E00; UNKNOWN
5002             0x0E01,   // 0E01..0E3A; THAI
5003             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5004             0x0E3F,   // 0E3F      ; COMMON
5005             0x0E40,   // 0E40..0E5B; THAI
5006             0x0E5C,   // 0E5C..0E80; UNKNOWN
5007             0x0E81,   // 0E81..0E82; LAO
5008             0x0E83,   // 0E83      ; UNKNOWN
5009             0x0E84,   // 0E84      ; LAO
5010             0x0E85,   // 0E85..0E86; UNKNOWN
5011             0x0E87,   // 0E87..0E88; LAO
5012             0x0E89,   // 0E89      ; UNKNOWN
5013             0x0E8A,   // 0E8A      ; LAO
5014             0x0E8B,   // 0E8B..0E8C; UNKNOWN
5015             0x0E8D,   // 0E8D      ; LAO
5016             0x0E8E,   // 0E8E..0E93; UNKNOWN
5017             0x0E94,   // 0E94..0E97; LAO
5018             0x0E98,   // 0E98      ; UNKNOWN
5019             0x0E99,   // 0E99..0E9F; LAO
5020             0x0EA0,   // 0EA0      ; UNKNOWN
5021             0x0EA1,   // 0EA1..0EA3; LAO
5022             0x0EA4,   // 0EA4      ; UNKNOWN
5023             0x0EA5,   // 0EA5      ; LAO
5024             0x0EA6,   // 0EA6      ; UNKNOWN
5025             0x0EA7,   // 0EA7      ; LAO
5026             0x0EA8,   // 0EA8..0EA9; UNKNOWN
5027             0x0EAA,   // 0EAA..0EAB; LAO
5028             0x0EAC,   // 0EAC      ; UNKNOWN
5029             0x0EAD,   // 0EAD..0EB9; LAO
5030             0x0EBA,   // 0EBA      ; UNKNOWN
5031             0x0EBB,   // 0EBB..0EBD; LAO
5032             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5033             0x0EC0,   // 0EC0..0EC4; LAO
5034             0x0EC5,   // 0EC5      ; UNKNOWN
5035             0x0EC6,   // 0EC6      ; LAO
5036             0x0EC7,   // 0EC7      ; UNKNOWN
5037             0x0EC8,   // 0EC8..0ECD; LAO
5038             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5039             0x0ED0,   // 0ED0..0ED9; LAO
5040             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5041             0x0EDC,   // 0EDC..0EDF; LAO
5042             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5043             0x0F00,   // 0F00..0F47; TIBETAN
5044             0x0F48,   // 0F48      ; UNKNOWN
5045             0x0F49,   // 0F49..0F6C; TIBETAN
5046             0x0F6D,   // 0F6D..0F70; UNKNOWN
5047             0x0F71,   // 0F71..0F97; TIBETAN
5048             0x0F98,   // 0F98      ; UNKNOWN
5049             0x0F99,   // 0F99..0FBC; TIBETAN
5050             0x0FBD,   // 0FBD      ; UNKNOWN
5051             0x0FBE,   // 0FBE..0FCC; TIBETAN
5052             0x0FCD,   // 0FCD      ; UNKNOWN
5053             0x0FCE,   // 0FCE..0FD4; TIBETAN
5054             0x0FD5,   // 0FD5..0FD8; COMMON
5055             0x0FD9,   // 0FD9..0FDA; TIBETAN
5056             0x0FDB,   // 0FDB..FFF; UNKNOWN
5057             0x1000,   // 1000..109F; MYANMAR
5058             0x10A0,   // 10A0..10C5; GEORGIAN
5059             0x10C6,   // 10C6      ; UNKNOWN
5060             0x10C7,   // 10C7      ; GEORGIAN
5061             0x10C8,   // 10C8..10CC; UNKNOWN
5062             0x10CD,   // 10CD      ; GEORGIAN
5063             0x10CE,   // 10CE..10CF; UNKNOWN
5064             0x10D0,   // 10D0..10FA; GEORGIAN
5065             0x10FB,   // 10FB      ; COMMON
5066             0x10FC,   // 10FC..10FF; GEORGIAN
5067             0x1100,   // 1100..11FF; HANGUL
5068             0x1200,   // 1200..1248; ETHIOPIC
5069             0x1249,   // 1249      ; UNKNOWN
5070             0x124A,   // 124A..124D; ETHIOPIC
5071             0x124E,   // 124E..124F; UNKNOWN
5072             0x1250,   // 1250..1256; ETHIOPIC
5073             0x1257,   // 1257      ; UNKNOWN
5074             0x1258,   // 1258      ; ETHIOPIC
5075             0x1259,   // 1259      ; UNKNOWN
5076             0x125A,   // 125A..125D; ETHIOPIC
5077             0x125E,   // 125E..125F; UNKNOWN
5078             0x1260,   // 1260..1288; ETHIOPIC
5079             0x1289,   // 1289      ; UNKNOWN
5080             0x128A,   // 128A..128D; ETHIOPIC
5081             0x128E,   // 128E..128F; UNKNOWN
5082             0x1290,   // 1290..12B0; ETHIOPIC
5083             0x12B1,   // 12B1      ; UNKNOWN
5084             0x12B2,   // 12B2..12B5; ETHIOPIC
5085             0x12B6,   // 12B6..12B7; UNKNOWN
5086             0x12B8,   // 12B8..12BE; ETHIOPIC
5087             0x12BF,   // 12BF      ; UNKNOWN
5088             0x12C0,   // 12C0      ; ETHIOPIC
5089             0x12C1,   // 12C1      ; UNKNOWN
5090             0x12C2,   // 12C2..12C5; ETHIOPIC
5091             0x12C6,   // 12C6..12C7; UNKNOWN
5092             0x12C8,   // 12C8..12D6; ETHIOPIC
5093             0x12D7,   // 12D7      ; UNKNOWN
5094             0x12D8,   // 12D8..1310; ETHIOPIC
5095             0x1311,   // 1311      ; UNKNOWN
5096             0x1312,   // 1312..1315; ETHIOPIC
5097             0x1316,   // 1316..1317; UNKNOWN
5098             0x1318,   // 1318..135A; ETHIOPIC
5099             0x135B,   // 135B..135C; UNKNOWN
5100             0x135D,   // 135D..137C; ETHIOPIC
5101             0x137D,   // 137D..137F; UNKNOWN
5102             0x1380,   // 1380..1399; ETHIOPIC
5103             0x139A,   // 139A..139F; UNKNOWN
5104             0x13A0,   // 13A0..13F5; CHEROKEE
5105             0x13F6,   // 13F6..13F7; UNKNOWN
5106             0x13F8,   // 13F8..13FD; CHEROKEE
5107             0x13FE,   // 13FE..13FF; UNKNOWN
5108             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5109             0x1680,   // 1680..169C; OGHAM
5110             0x169D,   // 169D..169F; UNKNOWN
5111             0x16A0,   // 16A0..16EA; RUNIC
5112             0x16EB,   // 16EB..16ED; COMMON
5113             0x16EE,   // 16EE..16F8; RUNIC
5114             0x16F9,   // 16F9..16FF; UNKNOWN
5115             0x1700,   // 1700..170C; TAGALOG
5116             0x170D,   // 170D      ; UNKNOWN
5117             0x170E,   // 170E..1714; TAGALOG
5118             0x1715,   // 1715..171F; UNKNOWN
5119             0x1720,   // 1720..1734; HANUNOO
5120             0x1735,   // 1735..1736; COMMON
5121             0x1737,   // 1737..173F; UNKNOWN
5122             0x1740,   // 1740..1753; BUHID
5123             0x1754,   // 1754..175F; UNKNOWN
5124             0x1760,   // 1760..176C; TAGBANWA
5125             0x176D,   // 176D      ; UNKNOWN
5126             0x176E,   // 176E..1770; TAGBANWA
5127             0x1771,   // 1771      ; UNKNOWN
5128             0x1772,   // 1772..1773; TAGBANWA
5129             0x1774,   // 1774..177F; UNKNOWN
5130             0x1780,   // 1780..17DD; KHMER
5131             0x17DE,   // 17DE..17DF; UNKNOWN
5132             0x17E0,   // 17E0..17E9; KHMER
5133             0x17EA,   // 17EA..17EF; UNKNOWN
5134             0x17F0,   // 17F0..17F9; KHMER
5135             0x17FA,   // 17FA..17FF; UNKNOWN
5136             0x1800,   // 1800..1801; MONGOLIAN
5137             0x1802,   // 1802..1803; COMMON
5138             0x1804,   // 1804      ; MONGOLIAN
5139             0x1805,   // 1805      ; COMMON
5140             0x1806,   // 1806..180E; MONGOLIAN
5141             0x180F,   // 180F      ; UNKNOWN
5142             0x1810,   // 1810..1819; MONGOLIAN
5143             0x181A,   // 181A..181F; UNKNOWN
5144             0x1820,   // 1820..1877; MONGOLIAN
5145             0x1878,   // 1878..187F; UNKNOWN
5146             0x1880,   // 1880..18AA; MONGOLIAN
5147             0x18AB,   // 18AB..18AF; UNKNOWN
5148             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5149             0x18F6,   // 18F6..18FF; UNKNOWN
5150             0x1900,   // 1900..191E; LIMBU
5151             0x191F,   // 191F      ; UNKNOWN
5152             0x1920,   // 1920..192B; LIMBU
5153             0x192C,   // 192C..192F; UNKNOWN
5154             0x1930,   // 1930..193B; LIMBU
5155             0x193C,   // 193C..193F; UNKNOWN
5156             0x1940,   // 1940      ; LIMBU
5157             0x1941,   // 1941..1943; UNKNOWN
5158             0x1944,   // 1944..194F; LIMBU
5159             0x1950,   // 1950..196D; TAI_LE
5160             0x196E,   // 196E..196F; UNKNOWN
5161             0x1970,   // 1970..1974; TAI_LE
5162             0x1975,   // 1975..197F; UNKNOWN
5163             0x1980,   // 1980..19AB; NEW_TAI_LUE
5164             0x19AC,   // 19AC..19AF; UNKNOWN
5165             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5166             0x19CA,   // 19CA..19CF; UNKNOWN
5167             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5168             0x19DB,   // 19DB..19DD; UNKNOWN
5169             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5170             0x19E0,   // 19E0..19FF; KHMER
5171             0x1A00,   // 1A00..1A1B; BUGINESE
5172             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5173             0x1A1E,   // 1A1E..1A1F; BUGINESE
5174             0x1A20,   // 1A20..1A5E; TAI_THAM
5175             0x1A5F,   // 1A5F      ; UNKNOWN
5176             0x1A60,   // 1A60..1A7C; TAI_THAM
5177             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5178             0x1A7F,   // 1A7F..1A89; TAI_THAM
5179             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5180             0x1A90,   // 1A90..1A99; TAI_THAM
5181             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5182             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5183             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5184             0x1AB0,   // 1AB0..1ABE; INHERITED
5185             0x1ABF,   // 1ABF..1AFF; UNKNOWN
5186             0x1B00,   // 1B00..1B4B; BALINESE
5187             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5188             0x1B50,   // 1B50..1B7C; BALINESE
5189             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5190             0x1B80,   // 1B80..1BBF; SUNDANESE
5191             0x1BC0,   // 1BC0..1BF3; BATAK
5192             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5193             0x1BFC,   // 1BFC..1BFF; BATAK
5194             0x1C00,   // 1C00..1C37; LEPCHA
5195             0x1C38,   // 1C38..1C3A; UNKNOWN
5196             0x1C3B,   // 1C3B..1C49; LEPCHA
5197             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5198             0x1C4D,   // 1C4D..1C4F; LEPCHA
5199             0x1C50,   // 1C50..1C7F; OL_CHIKI
5200             0x1C80,   // 1C80..1C88; CYRILLIC
5201             0x1C89,   // 1C89..1CBF; UNKNOWN
5202             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5203             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5204             0x1CD0,   // 1CD0..1CD2; INHERITED
5205             0x1CD3,   // 1CD3      ; COMMON
5206             0x1CD4,   // 1CD4..1CE0; INHERITED
5207             0x1CE1,   // 1CE1      ; COMMON
5208             0x1CE2,   // 1CE2..1CE8; INHERITED
5209             0x1CE9,   // 1CE9..1CEC; COMMON
5210             0x1CED,   // 1CED      ; INHERITED
5211             0x1CEE,   // 1CEE..1CF3; COMMON
5212             0x1CF4,   // 1CF4      ; INHERITED
5213             0x1CF5,   // 1CF5..1CF7; COMMON
5214             0x1CF8,   // 1CF8..1CF9; INHERITED
5215             0x1CFA,   // 1CFA..1CFF; UNKNOWN
5216             0x1D00,   // 1D00..1D25; LATIN
5217             0x1D26,   // 1D26..1D2A; GREEK
5218             0x1D2B,   // 1D2B      ; CYRILLIC
5219             0x1D2C,   // 1D2C..1D5C; LATIN
5220             0x1D5D,   // 1D5D..1D61; GREEK
5221             0x1D62,   // 1D62..1D65; LATIN
5222             0x1D66,   // 1D66..1D6A; GREEK
5223             0x1D6B,   // 1D6B..1D77; LATIN
5224             0x1D78,   // 1D78      ; CYRILLIC
5225             0x1D79,   // 1D79..1DBE; LATIN
5226             0x1DBF,   // 1DBF      ; GREEK
5227             0x1DC0,   // 1DC0..1DF9; INHERITED
5228             0x1DFA,   // 1DFA      ; UNKNOWN
5229             0x1DFB,   // 1DFB..1DFF; INHERITED
5230             0x1E00,   // 1E00..1EFF; LATIN
5231             0x1F00,   // 1F00..1F15; GREEK
5232             0x1F16,   // 1F16..1F17; UNKNOWN
5233             0x1F18,   // 1F18..1F1D; GREEK
5234             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5235             0x1F20,   // 1F20..1F45; GREEK
5236             0x1F46,   // 1F46..1F47; UNKNOWN
5237             0x1F48,   // 1F48..1F4D; GREEK
5238             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5239             0x1F50,   // 1F50..1F57; GREEK
5240             0x1F58,   // 1F58      ; UNKNOWN
5241             0x1F59,   // 1F59      ; GREEK
5242             0x1F5A,   // 1F5A      ; UNKNOWN
5243             0x1F5B,   // 1F5B      ; GREEK
5244             0x1F5C,   // 1F5C      ; UNKNOWN
5245             0x1F5D,   // 1F5D      ; GREEK
5246             0x1F5E,   // 1F5E      ; UNKNOWN
5247             0x1F5F,   // 1F5F..1F7D; GREEK
5248             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5249             0x1F80,   // 1F80..1FB4; GREEK
5250             0x1FB5,   // 1FB5      ; UNKNOWN
5251             0x1FB6,   // 1FB6..1FC4; GREEK
5252             0x1FC5,   // 1FC5      ; UNKNOWN
5253             0x1FC6,   // 1FC6..1FD3; GREEK
5254             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5255             0x1FD6,   // 1FD6..1FDB; GREEK
5256             0x1FDC,   // 1FDC      ; UNKNOWN
5257             0x1FDD,   // 1FDD..1FEF; GREEK
5258             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5259             0x1FF2,   // 1FF2..1FF4; GREEK
5260             0x1FF5,   // 1FF5      ; UNKNOWN
5261             0x1FF6,   // 1FF6..1FFE; GREEK
5262             0x1FFF,   // 1FFF      ; UNKNOWN
5263             0x2000,   // 2000..200B; COMMON
5264             0x200C,   // 200C..200D; INHERITED
5265             0x200E,   // 200E..2064; COMMON
5266             0x2065,   // 2065      ; UNKNOWN
5267             0x2066,   // 2066..2070; COMMON
5268             0x2071,   // 2071      ; LATIN
5269             0x2072,   // 2072..2073; UNKNOWN
5270             0x2074,   // 2074..207E; COMMON
5271             0x207F,   // 207F      ; LATIN
5272             0x2080,   // 2080..208E; COMMON
5273             0x208F,   // 208F      ; UNKNOWN
5274             0x2090,   // 2090..209C; LATIN
5275             0x209D,   // 209D..209F; UNKNOWN
5276             0x20A0,   // 20A0..20BF; COMMON
5277             0x20C0,   // 20C0..20CF; UNKNOWN
5278             0x20D0,   // 20D0..20F0; INHERITED
5279             0x20F1,   // 20F1..20FF; UNKNOWN
5280             0x2100,   // 2100..2125; COMMON
5281             0x2126,   // 2126      ; GREEK
5282             0x2127,   // 2127..2129; COMMON
5283             0x212A,   // 212A..212B; LATIN
5284             0x212C,   // 212C..2131; COMMON
5285             0x2132,   // 2132      ; LATIN
5286             0x2133,   // 2133..214D; COMMON
5287             0x214E,   // 214E      ; LATIN
5288             0x214F,   // 214F..215F; COMMON
5289             0x2160,   // 2160..2188; LATIN
5290             0x2189,   // 2189..218B; COMMON
5291             0x218C,   // 218C..218F; UNKNOWN
5292             0x2190,   // 2190..2426; COMMON
5293             0x2427,   // 2427..243F; UNKNOWN
5294             0x2440,   // 2440..244A; COMMON
5295             0x244B,   // 244B..245F; UNKNOWN
5296             0x2460,   // 2460..27FF; COMMON
5297             0x2800,   // 2800..28FF; BRAILLE
5298             0x2900,   // 2900..2B73; COMMON
5299             0x2B74,   // 2B74..2B75; UNKNOWN
5300             0x2B76,   // 2B76..2B95; COMMON
5301             0x2B96,   // 2B96..2B97; UNKNOWN
5302             0x2B98,   // 2B98..2BB9; COMMON
5303             0x2BBA,   // 2BBA..2BBC; UNKNOWN
5304             0x2BBD,   // 2BBD..2BC8; COMMON
5305             0x2BC9,   // 2BC9      ; UNKNOWN
5306             0x2BCA,   // 2BCA..2BD2; COMMON
5307             0x2BD3,   // 2BD3..2BEB; UNKNOWN
5308             0x2BEC,   // 2BEC..2BEF; COMMON
5309             0x2BF0,   // 2BF0..2BFF; UNKNOWN
5310             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5311             0x2C2F,   // 2C2F      ; UNKNOWN
5312             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5313             0x2C5F,   // 2C5F      ; UNKNOWN
5314             0x2C60,   // 2C60..2C7F; LATIN
5315             0x2C80,   // 2C80..2CF3; COPTIC
5316             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5317             0x2CF9,   // 2CF9..2CFF; COPTIC
5318             0x2D00,   // 2D00..2D25; GEORGIAN
5319             0x2D26,   // 2D26      ; UNKNOWN
5320             0x2D27,   // 2D27      ; GEORGIAN
5321             0x2D28,   // 2D28..2D2C; UNKNOWN
5322             0x2D2D,   // 2D2D      ; GEORGIAN
5323             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5324             0x2D30,   // 2D30..2D67; TIFINAGH
5325             0x2D68,   // 2D68..2D6E; UNKNOWN
5326             0x2D6F,   // 2D6F..2D70; TIFINAGH
5327             0x2D71,   // 2D71..2D7E; UNKNOWN
5328             0x2D7F,   // 2D7F      ; TIFINAGH
5329             0x2D80,   // 2D80..2D96; ETHIOPIC
5330             0x2D97,   // 2D97..2D9F; UNKNOWN
5331             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5332             0x2DA7,   // 2DA7      ; UNKNOWN
5333             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5334             0x2DAF,   // 2DAF      ; UNKNOWN
5335             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5336             0x2DB7,   // 2DB7      ; UNKNOWN
5337             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5338             0x2DBF,   // 2DBF      ; UNKNOWN
5339             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5340             0x2DC7,   // 2DC7      ; UNKNOWN
5341             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5342             0x2DCF,   // 2DCF      ; UNKNOWN
5343             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5344             0x2DD7,   // 2DD7      ; UNKNOWN
5345             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5346             0x2DDF,   // 2DDF      ; UNKNOWN
5347             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5348             0x2E00,   // 2E00..2E49; COMMON
5349             0x2E50,   // 2E50..2E7F; UNKNOWN
5350             0x2E80,   // 2E80..2E99; HAN
5351             0x2E9A,   // 2E9A      ; UNKNOWN
5352             0x2E9B,   // 2E9B..2EF3; HAN
5353             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5354             0x2F00,   // 2F00..2FD5; HAN
5355             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5356             0x2FF0,   // 2FF0..2FFB; COMMON
5357             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5358             0x3000,   // 3000..3004; COMMON
5359             0x3005,   // 3005      ; HAN
5360             0x3006,   // 3006      ; COMMON
5361             0x3007,   // 3007      ; HAN
5362             0x3008,   // 3008..3020; COMMON
5363             0x3021,   // 3021..3029; HAN
5364             0x302A,   // 302A..302D; INHERITED
5365             0x302E,   // 302E..302F; HANGUL
5366             0x3030,   // 3030..3037; COMMON
5367             0x3038,   // 3038..303B; HAN
5368             0x303C,   // 303C..303F; COMMON
5369             0x3040,   // 3040      ; UNKNOWN
5370             0x3041,   // 3041..3096; HIRAGANA
5371             0x3097,   // 3097..3098; UNKNOWN
5372             0x3099,   // 3099..309A; INHERITED
5373             0x309B,   // 309B..309C; COMMON
5374             0x309D,   // 309D..309F; HIRAGANA
5375             0x30A0,   // 30A0      ; COMMON
5376             0x30A1,   // 30A1..30FA; KATAKANA
5377             0x30FB,   // 30FB..30FC; COMMON
5378             0x30FD,   // 30FD..30FF; KATAKANA
5379             0x3100,   // 3100..3104; UNKNOWN
5380             0x3105,   // 3105..312E; BOPOMOFO
5381             0x312F,   // 312F..3130; UNKNOWN
5382             0x3131,   // 3131..318E; HANGUL
5383             0x318F,   // 318F      ; UNKNOWN
5384             0x3190,   // 3190..319F; COMMON
5385             0x31A0,   // 31A0..31BA; BOPOMOFO
5386             0x31BB,   // 31BB..31BF; UNKNOWN
5387             0x31C0,   // 31C0..31E3; COMMON
5388             0x31E4,   // 31E4..31EF; UNKNOWN
5389             0x31F0,   // 31F0..31FF; KATAKANA
5390             0x3200,   // 3200..321E; HANGUL
5391             0x321F,   // 321F      ; UNKNOWN
5392             0x3220,   // 3220..325F; COMMON
5393             0x3260,   // 3260..327E; HANGUL
5394             0x327F,   // 327F..32CF; COMMON
5395             0x32D0,   // 32D0..32FE; KATAKANA
5396             0x32FF,   // 32FF      ; COMMON
5397             0x3300,   // 3300..3357; KATAKANA
5398             0x3358,   // 3358..33FF; COMMON
5399             0x3400,   // 3400..4DB5; HAN
5400             0x4DB6,   // 4DB6..4DBF; UNKNOWN
5401             0x4DC0,   // 4DC0..4DFF; COMMON
5402             0x4E00,   // 4E00..9FEA; HAN
5403             0x9FEB,   // 9FEB..9FFF; UNKNOWN
5404             0xA000,   // A000..A48C; YI
5405             0xA48D,   // A48D..A48F; UNKNOWN
5406             0xA490,   // A490..A4C6; YI
5407             0xA4C7,   // A4C7..A4CF; UNKNOWN
5408             0xA4D0,   // A4D0..A4FF; LISU
5409             0xA500,   // A500..A62B; VAI
5410             0xA62C,   // A62C..A63F; UNKNOWN
5411             0xA640,   // A640..A69F; CYRILLIC
5412             0xA6A0,   // A6A0..A6F7; BAMUM
5413             0xA6F8,   // A6F8..A6FF; UNKNOWN
5414             0xA700,   // A700..A721; COMMON
5415             0xA722,   // A722..A787; LATIN
5416             0xA788,   // A788..A78A; COMMON
5417             0xA78B,   // A78B..A7AE; LATIN
5418             0xA7AF,   // A7AF      ; UNKNOWN
5419             0xA7B0,   // A7B0..A7B7; LATIN
5420             0xA7B8,   // A7B8..A7F6; UNKNOWN
5421             0xA7F7,   // A7F7..A7FF; LATIN
5422             0xA800,   // A800..A82B; SYLOTI_NAGRI
5423             0xA82C,   // A82C..A82F; UNKNOWN
5424             0xA830,   // A830..A839; COMMON
5425             0xA83A,   // A83A..A83F; UNKNOWN
5426             0xA840,   // A840..A877; PHAGS_PA
5427             0xA878,   // A878..A87F; UNKNOWN
5428             0xA880,   // A880..A8C5; SAURASHTRA
5429             0xA8C6,   // A8C6..A8CD; UNKNOWN
5430             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5431             0xA8DA,   // A8DA..A8DF; UNKNOWN
5432             0xA8E0,   // A8E0..A8FD; DEVANAGARI
5433             0xA8FE,   // A8FE..A8FF; UNKNOWN
5434             0xA900,   // A900..A92D; KAYAH_LI
5435             0xA92E,   // A92E      ; COMMON
5436             0xA92F,   // A92F      ; KAYAH_LI
5437             0xA930,   // A930..A953; REJANG
5438             0xA954,   // A954..A95E; UNKNOWN
5439             0xA95F,   // A95F      ; REJANG
5440             0xA960,   // A960..A97C; HANGUL
5441             0xA97D,   // A97D..A97F; UNKNOWN
5442             0xA980,   // A980..A9CD; JAVANESE
5443             0xA9CE,   // A9CE      ; UNKNOWN
5444             0xA9CF,   // A9CF      ; COMMON
5445             0xA9D0,   // A9D0..A9D9; JAVANESE
5446             0xA9DA,   // A9DA..A9DD; UNKNOWN
5447             0xA9DE,   // A9DE..A9DF; JAVANESE
5448             0xA9E0,   // A9E0..A9FE; MYANMAR
5449             0xA9FF,   // A9FF      ; UNKNOWN
5450             0xAA00,   // AA00..AA36; CHAM
5451             0xAA37,   // AA37..AA3F; UNKNOWN
5452             0xAA40,   // AA40..AA4D; CHAM
5453             0xAA4E,   // AA4E..AA4F; UNKNOWN
5454             0xAA50,   // AA50..AA59; CHAM
5455             0xAA5A,   // AA5A..AA5B; UNKNOWN
5456             0xAA5C,   // AA5C..AA5F; CHAM
5457             0xAA60,   // AA60..AA7F; MYANMAR
5458             0xAA80,   // AA80..AAC2; TAI_VIET
5459             0xAAC3,   // AAC3..AADA; UNKNOWN
5460             0xAADB,   // AADB..AADF; TAI_VIET
5461             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5462             0xAAF7,   // AAF7..AB00; UNKNOWN
5463             0xAB01,   // AB01..AB06; ETHIOPIC
5464             0xAB07,   // AB07..AB08; UNKNOWN
5465             0xAB09,   // AB09..AB0E; ETHIOPIC
5466             0xAB0F,   // AB0F..AB10; UNKNOWN
5467             0xAB11,   // AB11..AB16; ETHIOPIC
5468             0xAB17,   // AB17..AB1F; UNKNOWN
5469             0xAB20,   // AB20..AB26; ETHIOPIC
5470             0xAB27,   // AB27      ; UNKNOWN
5471             0xAB28,   // AB28..AB2E; ETHIOPIC
5472             0xAB2F,   // AB2F      ; UNKNOWN
5473             0xAB30,   // AB30..AB5A; LATIN
5474             0xAB5B,   // AB5B      ; COMMON
5475             0xAB5C,   // AB5C..AB64; LATIN
5476             0xAB65,   // AB65      ; GREEK
5477             0xAB66,   // AB66..AB6F; UNKNOWN
5478             0xAB70,   // AB70..ABBF; CHEROKEE
5479             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5480             0xABEE,   // ABEE..ABEF; UNKNOWN
5481             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5482             0xABFA,   // ABFA..ABFF; UNKNOWN
5483             0xAC00,   // AC00..D7A3; HANGUL
5484             0xD7A4,   // D7A4..D7AF; UNKNOWN
5485             0xD7B0,   // D7B0..D7C6; HANGUL
5486             0xD7C7,   // D7C7..D7CA; UNKNOWN
5487             0xD7CB,   // D7CB..D7FB; HANGUL
5488             0xD7FC,   // D7FC..F8FF; UNKNOWN
5489             0xF900,   // F900..FA6D; HAN
5490             0xFA6E,   // FA6E..FA6F; UNKNOWN
5491             0xFA70,   // FA70..FAD9; HAN
5492             0xFADA,   // FADA..FAFF; UNKNOWN
5493             0xFB00,   // FB00..FB06; LATIN
5494             0xFB07,   // FB07..FB12; UNKNOWN
5495             0xFB13,   // FB13..FB17; ARMENIAN
5496             0xFB18,   // FB18..FB1C; UNKNOWN
5497             0xFB1D,   // FB1D..FB36; HEBREW
5498             0xFB37,   // FB37      ; UNKNOWN
5499             0xFB38,   // FB38..FB3C; HEBREW
5500             0xFB3D,   // FB3D      ; UNKNOWN
5501             0xFB3E,   // FB3E      ; HEBREW
5502             0xFB3F,   // FB3F      ; UNKNOWN
5503             0xFB40,   // FB40..FB41; HEBREW
5504             0xFB42,   // FB42      ; UNKNOWN
5505             0xFB43,   // FB43..FB44; HEBREW
5506             0xFB45,   // FB45      ; UNKNOWN
5507             0xFB46,   // FB46..FB4F; HEBREW
5508             0xFB50,   // FB50..FBC1; ARABIC
5509             0xFBC2,   // FBC2..FBD2; UNKNOWN
5510             0xFBD3,   // FBD3..FD3D; ARABIC
5511             0xFD3E,   // FD3E..FD3F; COMMON
5512             0xFD40,   // FD40..FD4F; UNKNOWN
5513             0xFD50,   // FD50..FD8F; ARABIC
5514             0xFD90,   // FD90..FD91; UNKNOWN
5515             0xFD92,   // FD92..FDC7; ARABIC
5516             0xFDC8,   // FDC8..FDEF; UNKNOWN
5517             0xFDF0,   // FDF0..FDFD; ARABIC
5518             0xFDFE,   // FDFE..FDFF; UNKNOWN
5519             0xFE00,   // FE00..FE0F; INHERITED
5520             0xFE10,   // FE10..FE19; COMMON
5521             0xFE1A,   // FE1A..FE1F; UNKNOWN
5522             0xFE20,   // FE20..FE2D; INHERITED
5523             0xFE2E,   // FE2E..FE2F; CYRILLIC
5524             0xFE30,   // FE30..FE52; COMMON
5525             0xFE53,   // FE53      ; UNKNOWN
5526             0xFE54,   // FE54..FE66; COMMON
5527             0xFE67,   // FE67      ; UNKNOWN
5528             0xFE68,   // FE68..FE6B; COMMON
5529             0xFE6C,   // FE6C..FE6F; UNKNOWN
5530             0xFE70,   // FE70..FE74; ARABIC
5531             0xFE75,   // FE75      ; UNKNOWN
5532             0xFE76,   // FE76..FEFC; ARABIC
5533             0xFEFD,   // FEFD..FEFE; UNKNOWN
5534             0xFEFF,   // FEFF      ; COMMON
5535             0xFF00,   // FF00      ; UNKNOWN
5536             0xFF01,   // FF01..FF20; COMMON
5537             0xFF21,   // FF21..FF3A; LATIN
5538             0xFF3B,   // FF3B..FF40; COMMON
5539             0xFF41,   // FF41..FF5A; LATIN
5540             0xFF5B,   // FF5B..FF65; COMMON
5541             0xFF66,   // FF66..FF6F; KATAKANA
5542             0xFF70,   // FF70      ; COMMON
5543             0xFF71,   // FF71..FF9D; KATAKANA
5544             0xFF9E,   // FF9E..FF9F; COMMON
5545             0xFFA0,   // FFA0..FFBE; HANGUL
5546             0xFFBF,   // FFBF..FFC1; UNKNOWN
5547             0xFFC2,   // FFC2..FFC7; HANGUL
5548             0xFFC8,   // FFC8..FFC9; UNKNOWN
5549             0xFFCA,   // FFCA..FFCF; HANGUL
5550             0xFFD0,   // FFD0..FFD1; UNKNOWN
5551             0xFFD2,   // FFD2..FFD7; HANGUL
5552             0xFFD8,   // FFD8..FFD9; UNKNOWN
5553             0xFFDA,   // FFDA..FFDC; HANGUL
5554             0xFFDD,   // FFDD..FFDF; UNKNOWN
5555             0xFFE0,   // FFE0..FFE6; COMMON
5556             0xFFE7,   // FFE7      ; UNKNOWN
5557             0xFFE8,   // FFE8..FFEE; COMMON
5558             0xFFEF,   // FFEF..FFF8; UNKNOWN
5559             0xFFF9,   // FFF9..FFFD; COMMON
5560             0xFFFE,   // FFFE..FFFF; UNKNOWN
5561             0x10000,  // 10000..1000B; LINEAR_B
5562             0x1000C,  // 1000C       ; UNKNOWN
5563             0x1000D,  // 1000D..10026; LINEAR_B
5564             0x10027,  // 10027       ; UNKNOWN
5565             0x10028,  // 10028..1003A; LINEAR_B
5566             0x1003B,  // 1003B       ; UNKNOWN
5567             0x1003C,  // 1003C..1003D; LINEAR_B
5568             0x1003E,  // 1003E       ; UNKNOWN
5569             0x1003F,  // 1003F..1004D; LINEAR_B
5570             0x1004E,  // 1004E..1004F; UNKNOWN
5571             0x10050,  // 10050..1005D; LINEAR_B
5572             0x1005E,  // 1005E..1007F; UNKNOWN
5573             0x10080,  // 10080..100FA; LINEAR_B
5574             0x100FB,  // 100FB..100FF; UNKNOWN
5575             0x10100,  // 10100..10102; COMMON
5576             0x10103,  // 10103..10106; UNKNOWN
5577             0x10107,  // 10107..10133; COMMON
5578             0x10134,  // 10134..10136; UNKNOWN
5579             0x10137,  // 10137..1013F; COMMON
5580             0x10140,  // 10140..1018E; GREEK
5581             0x1018F,  // 1018F       ; UNKNOWN
5582             0x10190,  // 10190..1019B; COMMON
5583             0x1019C,  // 1019C..1019F; UNKNOWN
5584             0x101A0,  // 101A0       ; GREEK
5585             0x101A1,  // 101A1..101CF; UNKNOWN
5586             0x101D0,  // 101D0..101FC; COMMON
5587             0x101FD,  // 101FD       ; INHERITED
5588             0x101FE,  // 101FE..1027F; UNKNOWN
5589             0x10280,  // 10280..1029C; LYCIAN
5590             0x1029D,  // 1029D..1029F; UNKNOWN
5591             0x102A0,  // 102A0..102D0; CARIAN
5592             0x102D1,  // 102D1..102DF; UNKNOWN
5593             0x102E0,  // 102E0       ; INHERITED
5594             0x102E1,  // 102E1..102FB; COMMON
5595             0x102FC,  // 102FC..102FF; UNKNOWN
5596             0x10300,  // 10300..10323; OLD_ITALIC
5597             0x10324,  // 10324..1032C; UNKNOWN
5598             0x1032D,  // 1032D..1032F; OLD_ITALIC
5599             0x10330,  // 10330..1034A; GOTHIC
5600             0x1034B,  // 1034B..1034F; UNKNOWN
5601             0x10350,  // 10350..1037A; OLD_PERMIC
5602             0x1037B,  // 1037B..1037F; UNKNOWN
5603             0x10380,  // 10380..1039D; UGARITIC
5604             0x1039E,  // 1039E       ; UNKNOWN
5605             0x1039F,  // 1039F       ; UGARITIC
5606             0x103A0,  // 103A0..103C3; OLD_PERSIAN
5607             0x103C4,  // 103C4..103C7; UNKNOWN
5608             0x103C8,  // 103C8..103D5; OLD_PERSIAN
5609             0x103D6,  // 103D6..103FF; UNKNOWN
5610             0x10400,  // 10400..1044F; DESERET
5611             0x10450,  // 10450..1047F; SHAVIAN
5612             0x10480,  // 10480..1049D; OSMANYA
5613             0x1049E,  // 1049E..1049F; UNKNOWN
5614             0x104A0,  // 104A0..104A9; OSMANYA
5615             0x104AA,  // 104AA..104AF; UNKNOWN
5616             0x104B0,  // 104B0..104D3; OSAGE
5617             0x104D4,  // 104D4..104D7; UNKNOWN
5618             0x104D8,  // 104D8..104FB; OSAGE
5619             0x104FC,  // 104FC..104FF; UNKNOWN
5620             0x10500,  // 10500..10527; ELBASAN
5621             0x10528,  // 10528..1052F; UNKNOWN
5622             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
5623             0x10564,  // 10564..1056E; UNKNOWN
5624             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
5625             0x10570,  // 10570..105FF; UNKNOWN
5626             0x10600,  // 10600..10736; LINEAR_A
5627             0x10737,  // 10737..1073F; UNKNOWN
5628             0x10740,  // 10740..10755; LINEAR_A
5629             0x10756,  // 10756..1075F; UNKNOWN
5630             0x10760,  // 10760..10767; LINEAR_A
5631             0x10768,  // 10768..107FF; UNKNOWN
5632             0x10800,  // 10800..10805; CYPRIOT
5633             0x10806,  // 10806..10807; UNKNOWN
5634             0x10808,  // 10808       ; CYPRIOT
5635             0x10809,  // 10809       ; UNKNOWN
5636             0x1080A,  // 1080A..10835; CYPRIOT
5637             0x10836,  // 10836       ; UNKNOWN
5638             0x10837,  // 10837..10838; CYPRIOT
5639             0x10839,  // 10839..1083B; UNKNOWN
5640             0x1083C,  // 1083C       ; CYPRIOT
5641             0x1083D,  // 1083D..1083E; UNKNOWN
5642             0x1083F,  // 1083F       ; CYPRIOT
5643             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
5644             0x10856,  // 10856       ; UNKNOWN
5645             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
5646             0x10860,  // 10860..1087F; PALMYRENE
5647             0x10880,  // 10880..1089E; NABATAEAN
5648             0x1089F,  // 1089F..108A6; UNKNOWN
5649             0x108A7,  // 108A7..108AF; NABATAEAN
5650             0x108B0,  // 108B0..108DF; UNKNOWN
5651             0x108E0,  // 108E0..108F2; HATRAN
5652             0x108F3,  // 108F3       ; UNKNOWN
5653             0x108F4,  // 108F4..108F5; HATRAN
5654             0x108F6,  // 108F6..108FA; UNKNOWN
5655             0x108FB,  // 108FB..108FF; HATRAN
5656             0x10900,  // 10900..1091B; PHOENICIAN
5657             0x1091C,  // 1091C..1091E; UNKNOWN
5658             0x1091F,  // 1091F       ; PHOENICIAN
5659             0x10920,  // 10920..10939; LYDIAN
5660             0x1093A,  // 1093A..1093E; UNKNOWN
5661             0x1093F,  // 1093F       ; LYDIAN
5662             0x10940,  // 10940..1097F; UNKNOWN
5663             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
5664             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
5665             0x109B8,  // 109B8..109BB; UNKNOWN
5666             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
5667             0x109D0,  // 109D0..109D1; UNKNOWN
5668             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
5669             0x10A00,  // 10A00..10A03; KHAROSHTHI
5670             0x10A04,  // 10A04       ; UNKNOWN
5671             0x10A05,  // 10A05..10A06; KHAROSHTHI
5672             0x10A07,  // 10A07..10A0B; UNKNOWN
5673             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
5674             0x10A14,  // 10A14       ; UNKNOWN
5675             0x10A15,  // 10A15..10A17; KHAROSHTHI
5676             0x10A18,  // 10A18       ; UNKNOWN
5677             0x10A19,  // 10A19..10A33; KHAROSHTHI
5678             0x10A34,  // 10A34..10A37; UNKNOWN
5679             0x10A38,  // 10A38..10A3A; KHAROSHTHI
5680             0x10A3B,  // 10A3B..10A3E; UNKNOWN
5681             0x10A3F,  // 10A3F..10A47; KHAROSHTHI
5682             0x10A48,  // 10A48..10A4F; UNKNOWN
5683             0x10A50,  // 10A50..10A58; KHAROSHTHI
5684             0x10A59,  // 10A59..10A5F; UNKNOWN
5685             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
5686             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
5687             0x10AA0,  // 10AA0..10ABF; UNKNOWN
5688             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
5689             0x10AE7,  // 10AE7..10AEA; UNKNOWN
5690             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
5691             0x10AF7,  // 10AF7..10AFF; UNKNOWN
5692             0x10B00,  // 10B00..10B35; AVESTAN
5693             0x10B36,  // 10B36..10B38; UNKNOWN
5694             0x10B39,  // 10B39..10B3F; AVESTAN
5695             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5696             0x10B56,  // 10B56..10B57; UNKNOWN
5697             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5698             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5699             0x10B73,  // 10B73..10B77; UNKNOWN
5700             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5701             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
5702             0x10B92,  // 10B92..10B98; UNKNOWN
5703             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
5704             0x10B9D,  // 10B9D..10BA8; UNKNOWN
5705             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
5706             0x10BB0,  // 10BB0..10BFF; UNKNOWN
5707             0x10C00,  // 10C00..10C48; OLD_TURKIC
5708             0x10C49,  // 10C49..10C7F; UNKNOWN
5709             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
5710             0x10CB3,  // 10CB3..10CBF; UNKNOWN
5711             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
5712             0x10CF3,  // 10CF3..10CF9; UNKNOWN
5713             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
5714             0x10D00,  // 10D00..10E5F; UNKNOWN
5715             0x10E60,  // 10E60..10E7E; ARABIC
5716             0x10E7F,  // 10E7F..10FFF; UNKNOWN
5717             0x11000,  // 11000..1104D; BRAHMI
5718             0x1104E,  // 1104E..11051; UNKNOWN
5719             0x11052,  // 11052..1106F; BRAHMI
5720             0x11070,  // 11070..1107E; UNKNOWN
5721             0x1107F,  // 1107F       ; BRAHMI
5722             0x11080,  // 11080..110C1; KAITHI
5723             0x110C2,  // 110C2..110CF; UNKNOWN
5724             0x110D0,  // 110D0..110E8; SORA_SOMPENG
5725             0x110E9,  // 110E9..110EF; UNKNOWN
5726             0x110F0,  // 110F0..110F9; SORA_SOMPENG
5727             0x110FA,  // 110FA..110FF; UNKNOWN
5728             0x11100,  // 11100..11134; CHAKMA
5729             0x11135,  // 11135       ; UNKNOWN
5730             0x11136,  // 11136..11143; CHAKMA
5731             0x11144,  // 11144..1114F; UNKNOWN
5732             0x11150,  // 11150..11176; MAHAJANI
5733             0x11177,  // 11177..1117F; UNKNOWN
5734             0x11180,  // 11180..111CD; SHARADA
5735             0x111CE,  // 111CE..111CF; UNKNOWN
5736             0x111D0,  // 111D0..111DF; SHARADA
5737             0x111E0,  // 111E0       ; UNKNOWN
5738             0x111E1,  // 111E1..111F4; SINHALA
5739             0x111F5,  // 111F5..111FF; UNKNOWN
5740             0x11200,  // 11200..11211; KHOJKI
5741             0x11212,  // 11212       ; UNKNOWN
5742             0x11213,  // 11213..1123E; KHOJKI
5743             0x1123F,  // 1123F..1127F; UNKNOWN
5744             0x11280,  // 11280..11286; MULTANI
5745             0x11287,  // 11287       ; UNKNOWN
5746             0x11288,  // 11288       ; MULTANI
5747             0x11289,  // 11289       ; UNKNOWN
5748             0x1128A,  // 1128A..1128D; MULTANI
5749             0x1128E,  // 1128E       ; UNKNOWN
5750             0x1128F,  // 1128F..1129D; MULTANI
5751             0x1129E,  // 1129E       ; UNKNOWN
5752             0x1129F,  // 1129F..112A9; MULTANI
5753             0x112AA,  // 112AA..112AF; UNKNOWN
5754             0x112B0,  // 112B0..112EA; KHUDAWADI
5755             0x112EB,  // 112EB..112EF; UNKNOWN
5756             0x112F0,  // 112F0..112F9; KHUDAWADI
5757             0x112FA,  // 112FA..112FF; UNKNOWN
5758             0x11300,  // 11300..11303; GRANTHA
5759             0x11304,  // 11304       ; UNKNOWN
5760             0x11305,  // 11305..1130C; GRANTHA
5761             0x1130D,  // 1130D..1130E; UNKNOWN
5762             0x1130F,  // 1130F..11310; GRANTHA
5763             0x11311,  // 11311..11312; UNKNOWN
5764             0x11313,  // 11313..11328; GRANTHA
5765             0x11329,  // 11329       ; UNKNOWN
5766             0x1132A,  // 1132A..11330; GRANTHA
5767             0x11331,  // 11331       ; UNKNOWN
5768             0x11332,  // 11332..11333; GRANTHA
5769             0x11334,  // 11334       ; UNKNOWN
5770             0x11335,  // 11335..11339; GRANTHA
5771             0x1133A,  // 1133A..1133B; UNKNOWN
5772             0x1133C,  // 1133C..11344; GRANTHA
5773             0x11345,  // 11345..11346; UNKNOWN
5774             0x11347,  // 11347..11348; GRANTHA
5775             0x11349,  // 11349..1134A; UNKNOWN
5776             0x1134B,  // 1134B..1134D; GRANTHA
5777             0x1134E,  // 1134E..1134F; UNKNOWN
5778             0x11350,  // 11350       ; GRANTHA
5779             0x11351,  // 11351..11356; UNKNOWN
5780             0x11357,  // 11357       ; GRANTHA
5781             0x11358,  // 11358..1135C; UNKNOWN
5782             0x1135D,  // 1135D..11363; GRANTHA
5783             0x11364,  // 11364..11365; UNKNOWN
5784             0x11366,  // 11366..1136C; GRANTHA
5785             0x1136D,  // 1136D..1136F; UNKNOWN
5786             0x11370,  // 11370..11374; GRANTHA
5787             0x11375,  // 11375..113FF; UNKNOWN
5788             0x11400,  // 11400..11459; NEWA
5789             0x1145A,  // 1145A       ; UNKNOWN
5790             0x1145B,  // 1145B       ; NEWA
5791             0x1145C,  // 1145C       ; UNKNOWN
5792             0x1145D,  // 1145D       ; NEWA
5793             0x1145E,  // 1145E..1147F; UNKNOWN
5794             0x11480,  // 11480..114C7; TIRHUTA
5795             0x114C8,  // 114C8..114CF; UNKNOWN
5796             0x114D0,  // 114D0..114D9; TIRHUTA
5797             0x114DA,  // 114DA..1157F; UNKNOWN
5798             0x11580,  // 11580..115B5; SIDDHAM
5799             0x115B6,  // 115B6..115B7; UNKNOWN
5800             0x115B8,  // 115B8..115DD; SIDDHAM
5801             0x115DE,  // 115DE..115FF; UNKNOWN
5802             0x11600,  // 11600..11644; MODI
5803             0x11645,  // 11645..1164F; UNKNOWN
5804             0x11650,  // 11650..11659; MODI
5805             0x1165A,  // 1165A..1165F; UNKNOWN
5806             0x11660,  // 11660..1166C; MONGOLIAN
5807             0X1166D,  // 1166D..1167F; UNKNOWN
5808             0x11680,  // 11680..116B7; TAKRI
5809             0x116B8,  // 116B8..116BF; UNKNOWN
5810             0x116C0,  // 116C0..116C9; TAKRI
5811             0x116CA,  // 116CA..116FF; UNKNOWN
5812             0x11700,  // 11700..11719; AHOM
5813             0x1171A,  // 1171A..1171C; UNKNOWN
5814             0x1171D,  // 1171D..1172B; AHOM
5815             0x1172C,  // 1172C..1172F; UNKNOWN
5816             0x11730,  // 11730..1173F; AHOM
5817             0x11740,  // 11740..1189F; UNKNOWN
5818             0x118A0,  // 118A0..118F2; WARANG_CITI
5819             0x118F3,  // 118F3..118FE; UNKNOWN
5820             0x118FF,  // 118FF       ; WARANG_CITI
5821             0x11900,  // 11900..119FF; UNKNOWN
5822             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
5823             0X11A48,  // 11A48..11A4F; UNKNOWN
5824             0x11A50,  // 11A50..11A83; SOYOMBO
5825             0x11A84,  // 11A84..11A85; UNKNOWN
5826             0x11A86,  // 11A86..11A9C; SOYOMBO
5827             0x11A9D,  // 11A9D       ; UNKNOWN
5828             0x11A9E,  // 11A9E..11AA2; SOYOMBO
5829             0x11AA3,  // 11AA3..11ABF; UNKNOWN
5830             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
5831             0x11AF9,  // 11AF9..11BFF; UNKNOWN
5832             0x11C00,  // 11C00..11C08; BHAIKSUKI
5833             0x11C09,  // 11C09       ; UNKNOWN
5834             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
5835             0x11C37,  // 11C37       ; UNKNOWN
5836             0x11C38,  // 11C38..11C45; BHAIKSUKI
5837             0x11C46,  // 11C46..11C49; UNKNOWN
5838             0x11C50,  // 11C50..11C6C; BHAIKSUKI
5839             0x11C6D,  // 11C6D..11C6F; UNKNOWN
5840             0x11C70,  // 11C70..11C8F; MARCHEN
5841             0x11C90,  // 11C90..11C91; UNKNOWN
5842             0x11C92,  // 11C92..11CA7; MARCHEN
5843             0x11CA8,  // 11CA8       ; UNKNOWN
5844             0x11CA9,  // 11CA9..11CB6; MARCHEN
5845             0x11CB7,  // 11CB7..11CFF; UNKNOWN
5846             0x11D00,  // 11D00..11D06; MASARAM_GONDI
5847             0x11D07,  // 11D07       ; UNKNOWN
5848             0x11D08,  // 11D08..11D09; MASARAM_GONDI
5849             0x11D0A,  // 11D0A       ; UNKNOWN
5850             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
5851             0x11D37,  // 11D37..11D39; UNKNOWN
5852             0x11D3A,  // 11D3A       ; MASARAM_GONDI
5853             0x11D3B,  // 11D3B       ; UNKNOWN
5854             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
5855             0x11D3E,  // 11D3E       ; UNKNOWN
5856             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
5857             0x11D48,  // 11D48..11D49, UNKNOWN
5858             0x11D50,  // 11D50..11D59; MASARAM_GONDI
5859             0x11D5A,  // 11D5A..1AFFF; UNKNOWN
5860             0x12000,  // 12000..12399; CUNEIFORM
5861             0x1239A,  // 1239A..123FF; UNKNOWN
5862             0x12400,  // 12400..1246E; CUNEIFORM
5863             0x1246F,  // 1246F       ; UNKNOWN
5864             0x12470,  // 12470..12474; CUNEIFORM
5865             0x12475,  // 12475..1247F; UNKNOWN
5866             0x12480,  // 12480..12543; CUNEIFORM
5867             0x12544,  // 12544..12FFF; UNKNOWN
5868             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
5869             0x1342F,  // 1342F..143FF; UNKNOWN
5870             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
5871             0x14647,  // 14647..167FF; UNKNOWN
5872             0x16800,  // 16800..16A38; BAMUM
5873             0x16A39,  // 16A39..16A3F; UNKNOWN
5874             0x16A40,  // 16A40..16A5E; MRO
5875             0x16A5F,  // 16A5F       ; UNKNOWN
5876             0x16A60,  // 16A60..16A69; MRO
5877             0x16A6A,  // 16A6A..16A6D; UNKNOWN
5878             0x16A6E,  // 16A6E..16A6F; MRO
5879             0x16A70,  // 16A70..16ACF; UNKNOWN
5880             0x16AD0,  // 16AD0..16AED; BASSA_VAH
5881             0x16AEE,  // 16AEE..16AEF; UNKNOWN
5882             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
5883             0x16AF6,  // 16AF6..16AFF; UNKNOWN
5884             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
5885             0x16B46,  // 16B46..16B4F; UNKNOWN
5886             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
5887             0x16B5A,  // 16B5A       ; UNKNOWN
5888             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
5889             0x16B62,  // 16B62       ; UNKNOWN
5890             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
5891             0x16B78,  // 16B78..16B7C; UNKNOWN
5892             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
5893             0x16B90,  // 16B90..16EFF; UNKNOWN
5894             0x16F00,  // 16F00..16F44; MIAO
5895             0x16F45,  // 16F45..16F4F; UNKNOWN
5896             0x16F50,  // 16F50..16F7E; MIAO
5897             0x16F7F,  // 16F7F..16F8E; UNKNOWN
5898             0x16F8F,  // 16F8F..16F9F; MIAO
5899             0x16FA0,  // 16FA0..16FDF; UNKNOWN
5900             0x16FE0,  // 16FE0       ; TANGUT
5901             0x16FE1,  // 16FE1       ; NUSHU
5902             0x16FE2,  // 16FE2..16FFF; UNKNOWN
5903             0x17000,  // 17000..187EC; TANGUT
5904             0x187ED,  // 187ED..187FF; UNKNOWN
5905             0x18800,  // 18800..18AF2; TANGUT
5906             0x18AF3,  // 18AF3..1AFFF; UNKNOWN
5907             0x1B000,  // 1B000       ; KATAKANA
5908             0x1B001,  // 1B001..1B11E; HIRAGANA
5909             0x1B11F,  // 1B11F..1B16F; UNKNOWN
5910             0x1B170,  // 1B170..1B2FB; NUSHU
5911             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
5912             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
5913             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
5914             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
5915             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
5916             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
5917             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
5918             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
5919             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
5920             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
5921             0x1BCA0,  // 1BCA0..1BCA3; COMMON
5922             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
5923             0x1D000,  // 1D000..1D0F5; COMMON
5924             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
5925             0x1D100,  // 1D100..1D126; COMMON
5926             0x1D127,  // 1D127..1D128; UNKNOWN
5927             0x1D129,  // 1D129..1D166; COMMON
5928             0x1D167,  // 1D167..1D169; INHERITED
5929             0x1D16A,  // 1D16A..1D17A; COMMON
5930             0x1D17B,  // 1D17B..1D182; INHERITED
5931             0x1D183,  // 1D183..1D184; COMMON
5932             0x1D185,  // 1D185..1D18B; INHERITED
5933             0x1D18C,  // 1D18C..1D1A9; COMMON
5934             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
5935             0x1D1AE,  // 1D1AE..1D1E8; COMMON
5936             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
5937             0x1D200,  // 1D200..1D245; GREEK
5938             0x1D246,  // 1D246..1D2FF; UNKNOWN
5939             0x1D300,  // 1D300..1D356; COMMON
5940             0x1D357,  // 1D357..1D35F; UNKNOWN
5941             0x1D360,  // 1D360..1D371; COMMON
5942             0x1D372,  // 1D372..1D3FF; UNKNOWN
5943             0x1D400,  // 1D400..1D454; COMMON
5944             0x1D455,  // 1D455       ; UNKNOWN
5945             0x1D456,  // 1D456..1D49C; COMMON
5946             0x1D49D,  // 1D49D       ; UNKNOWN
5947             0x1D49E,  // 1D49E..1D49F; COMMON
5948             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
5949             0x1D4A2,  // 1D4A2       ; COMMON
5950             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
5951             0x1D4A5,  // 1D4A5..1D4A6; COMMON
5952             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
5953             0x1D4A9,  // 1D4A9..1D4AC; COMMON
5954             0x1D4AD,  // 1D4AD       ; UNKNOWN
5955             0x1D4AE,  // 1D4AE..1D4B9; COMMON
5956             0x1D4BA,  // 1D4BA       ; UNKNOWN
5957             0x1D4BB,  // 1D4BB       ; COMMON
5958             0x1D4BC,  // 1D4BC       ; UNKNOWN
5959             0x1D4BD,  // 1D4BD..1D4C3; COMMON
5960             0x1D4C4,  // 1D4C4       ; UNKNOWN
5961             0x1D4C5,  // 1D4C5..1D505; COMMON
5962             0x1D506,  // 1D506       ; UNKNOWN
5963             0x1D507,  // 1D507..1D50A; COMMON
5964             0x1D50B,  // 1D50B..1D50C; UNKNOWN
5965             0x1D50D,  // 1D50D..1D514; COMMON
5966             0x1D515,  // 1D515       ; UNKNOWN
5967             0x1D516,  // 1D516..1D51C; COMMON
5968             0x1D51D,  // 1D51D       ; UNKNOWN
5969             0x1D51E,  // 1D51E..1D539; COMMON
5970             0x1D53A,  // 1D53A       ; UNKNOWN
5971             0x1D53B,  // 1D53B..1D53E; COMMON
5972             0x1D53F,  // 1D53F       ; UNKNOWN
5973             0x1D540,  // 1D540..1D544; COMMON
5974             0x1D545,  // 1D545       ; UNKNOWN
5975             0x1D546,  // 1D546       ; COMMON
5976             0x1D547,  // 1D547..1D549; UNKNOWN
5977             0x1D54A,  // 1D54A..1D550; COMMON
5978             0x1D551,  // 1D551       ; UNKNOWN
5979             0x1D552,  // 1D552..1D6A5; COMMON
5980             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
5981             0x1D6A8,  // 1D6A8..1D7CB; COMMON
5982             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
5983             0x1D7CE,  // 1D7CE..1D7FF; COMMON
5984             0x1D800,  // 1D800..1DA8B; SIGNWRITING
5985             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
5986             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
5987             0x1DAA0,  // 1DAA0       ; UNKNOWN
5988             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
5989             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
5990             0x1E000,  // 1E000..1E006; GLAGOLITIC
5991             0x1E007,  // 1E007       ; UNKNOWN
5992             0x1E008,  // 1E008..1E018; GLAGOLITIC
5993             0x1E019,  // 1E019..1E01A; UNKNOWN
5994             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
5995             0x1E022,  // 1E022       ; UNKNOWN
5996             0x1E023,  // 1E023..1E024; GLAGOLITIC
5997             0x1E025,  // 1E025       ; UNKNOWN
5998             0x1E026,  // 1E026..1E02A; GLAGOLITIC
5999             0x1E02B,  // 1E02B..1E7FF; UNKNOWN
6000             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6001             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6002             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6003             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6004             0x1E900,  // 1E900..1E94A; ADLAM
6005             0x1E94B,  // 1E94B..1E94F; UNKNOWN
6006             0x1E950,  // 1E950..1E959; ADLAM
6007             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6008             0x1E95E,  // 1E95E..1E95F; ADLAM
6009             0x1E960,  // 1E960..1EDFF; UNKNOWN
6010             0x1EE00,  // 1EE00..1EE03; ARABIC
6011             0x1EE04,  // 1EE04       ; UNKNOWN
6012             0x1EE05,  // 1EE05..1EE1F; ARABIC
6013             0x1EE20,  // 1EE20       ; UNKNOWN
6014             0x1EE21,  // 1EE21..1EE22; ARABIC
6015             0x1EE23,  // 1EE23       ; UNKNOWN
6016             0x1EE24,  // 1EE24       ; ARABIC
6017             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6018             0x1EE27,  // 1EE27       ; ARABIC
6019             0x1EE28,  // 1EE28       ; UNKNOWN
6020             0x1EE29,  // 1EE29..1EE32; ARABIC
6021             0x1EE33,  // 1EE33       ; UNKNOWN
6022             0x1EE34,  // 1EE34..1EE37; ARABIC
6023             0x1EE38,  // 1EE38       ; UNKNOWN
6024             0x1EE39,  // 1EE39       ; ARABIC
6025             0x1EE3A,  // 1EE3A       ; UNKNOWN
6026             0x1EE3B,  // 1EE3B       ; ARABIC
6027             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6028             0x1EE42,  // 1EE42       ; ARABIC
6029             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6030             0x1EE47,  // 1EE47       ; ARABIC
6031             0x1EE48,  // 1EE48       ; UNKNOWN
6032             0x1EE49,  // 1EE49       ; ARABIC
6033             0x1EE4A,  // 1EE4A       ; UNKNOWN
6034             0x1EE4B,  // 1EE4B       ; ARABIC
6035             0x1EE4C,  // 1EE4C       ; UNKNOWN
6036             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6037             0x1EE50,  // 1EE50       ; UNKNOWN
6038             0x1EE51,  // 1EE51..1EE52; ARABIC
6039             0x1EE53,  // 1EE53       ; UNKNOWN
6040             0x1EE54,  // 1EE54       ; ARABIC
6041             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6042             0x1EE57,  // 1EE57       ; ARABIC
6043             0x1EE58,  // 1EE58       ; UNKNOWN
6044             0x1EE59,  // 1EE59       ; ARABIC
6045             0x1EE5A,  // 1EE5A       ; UNKNOWN
6046             0x1EE5B,  // 1EE5B       ; ARABIC
6047             0x1EE5C,  // 1EE5C       ; UNKNOWN
6048             0x1EE5D,  // 1EE5D       ; ARABIC
6049             0x1EE5E,  // 1EE5E       ; UNKNOWN
6050             0x1EE5F,  // 1EE5F       ; ARABIC
6051             0x1EE60,  // 1EE60       ; UNKNOWN
6052             0x1EE61,  // 1EE61..1EE62; ARABIC
6053             0x1EE63,  // 1EE63       ; UNKNOWN
6054             0x1EE64,  // 1EE64       ; ARABIC
6055             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6056             0x1EE67,  // 1EE67..1EE6A; ARABIC
6057             0x1EE6B,  // 1EE6B       ; UNKNOWN
6058             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6059             0x1EE73,  // 1EE73       ; UNKNOWN
6060             0x1EE74,  // 1EE74..1EE77; ARABIC
6061             0x1EE78,  // 1EE78       ; UNKNOWN
6062             0x1EE79,  // 1EE79..1EE7C; ARABIC
6063             0x1EE7D,  // 1EE7D       ; UNKNOWN
6064             0x1EE7E,  // 1EE7E       ; ARABIC
6065             0x1EE7F,  // 1EE7F       ; UNKNOWN
6066             0x1EE80,  // 1EE80..1EE89; ARABIC
6067             0x1EE8A,  // 1EE8A       ; UNKNOWN
6068             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6069             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6070             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6071             0x1EEA4,  // 1EEA4       ; UNKNOWN
6072             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6073             0x1EEAA,  // 1EEAA       ; UNKNOWN
6074             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6075             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6076             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6077             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6078             0x1F000,  // 1F000..1F02B; COMMON
6079             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6080             0x1F030,  // 1F030..1F093; COMMON
6081             0x1F094,  // 1F094..1F09F; UNKNOWN
6082             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6083             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6084             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6085             0x1F0C0,  // 1F0C0       ; UNKNOWN
6086             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6087             0x1F0D0,  // 1F0D0       ; UNKNOWN
6088             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6089             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6090             0x1F100,  // 1F100..1F10C; COMMON
6091             0x1F10D,  // 1F10D..1F10F; UNKNOWN
6092             0x1F110,  // 1F110..1F12E; COMMON
6093             0x1F12F,  // 1F12F       ; UNKNOWN
6094             0x1F130,  // 1F130..1F16B; COMMON
6095             0x1F16C,  // 1F16C..1F16F; UNKNOWN
6096             0x1F170,  // 1F170..1F1AC; COMMON
6097             0x1F1AD,  // 1F1AD..1F1E5; UNKNOWN
6098             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6099             0x1F200,  // 1F200       ; HIRAGANA
6100             0x1F201,  // 1F201..1F202; COMMON
6101             0x1F203,  // 1F203..1F20F; UNKNOWN
6102             0x1F210,  // 1F210..1F23B; COMMON
6103             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6104             0x1F240,  // 1F240..1F248; COMMON
6105             0x1F249,  // 1F249..1F24F; UNKNOWN
6106             0x1F250,  // 1F250..1F251; COMMON
6107             0x1F252,  // 1F252..1F25F; UNKNOWN
6108             0x1F260,  // 1F260..1F265; COMMON
6109             0x1F266,  // 1F266..1F2FF; UNKNOWN
6110             0x1F300,  // 1F300..1F6D4; COMMON
6111             0x1F6D5,  // 1F6D5..1F6DF; UNKNOWN
6112             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6113             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6114             0x1F6F0,  // 1F6F0..1F6F8; COMMON
6115             0x1F6F9,  // 1F6F9..1F6FF; UNKNOWN
6116             0x1F700,  // 1F700..1F773; COMMON
6117             0x1F774,  // 1F774..1F77F; UNKNOWN
6118             0x1F780,  // 1F780..1F7D4; COMMON
6119             0x1F7D5,  // 1F7D5..1F7FF; UNKNOWN
6120             0x1F800,  // 1F800..1F80B; COMMON
6121             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6122             0x1F810,  // 1F810..1F847; COMMON
6123             0x1F848,  // 1F848..1F84F; UNKNOWN
6124             0x1F850,  // 1F850..1F859; COMMON
6125             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6126             0x1F860,  // 1F860..1F887; COMMON
6127             0x1F888,  // 1F888..1F88F; UNKNOWN
6128             0x1F890,  // 1F890..1F8AD; COMMON
6129             0x1F8AE,  // 1F8AE..1F8FF; UNKNOWN
6130             0x1F900,  // 1F900..1F90B; COMMON
6131             0x1F90C,  // 1F90C..1F90F; UNKNOWN
6132             0x1F910,  // 1F910..1F93E; COMMON
6133             0x1F93F,  // 1F93F       ; UNKNOWN
6134             0x1F940,  // 1F940..1F94C; COMMON
6135             0x1F94D,  // 1F94D..1F94F; UNKNOWN
6136             0x1F950,  // 1F950..1F96B; COMMON
6137             0x1F96C,  // 1F96C..1F97F; UNKNOWN
6138             0x1F980,  // 1F980..1F997; COMMON
6139             0x1F998,  // 1F998..1F9BF; UNKNOWN
6140             0x1F9C0,  // 1F9C0       ; COMMON
6141             0x1F9C1,  // 1F9C1..1F9CF; UNKNOWN
6142             0x1F9D0,  // 1F9D0..1F9E6; COMMON
6143             0x1F9E7,  // 1F9E7..1FFFF; UNKNOWN
6144             0x20000,  // 20000..2A6D6; HAN
6145             0x2A6D7,  // 2A6D7..2A6FF; UNKNOWN
6146             0x2A700,  // 2A700..2B734; HAN
6147             0x2B735,  // 2B735..2B73F; UNKNOWN
6148             0x2B740,  // 2B740..2B81D; HAN
6149             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6150             0x2B820,  // 2B820..2CEA1; HAN
6151             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6152             0x2CEB0,  // 2CEB0..2EBE0; HAN
6153             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6154             0x2F800,  // 2F800..2FA1D; HAN
6155             0x2FA1E,  // 2FA1E..E0000; UNKNOWN
6156             0xE0001,  // E0001       ; COMMON
6157             0xE0002,  // E0002..E001F; UNKNOWN
6158             0xE0020,  // E0020..E007F; COMMON
6159             0xE0080,  // E0080..E00FF; UNKNOWN
6160             0xE0100,  // E0100..E01EF; INHERITED
6161             0xE01F0   // E01F0..10FFFF; UNKNOWN
6162         };
6163 
6164         private static final UnicodeScript[] scripts = {
6165             COMMON,                   // 0000..0040
6166             LATIN,                    // 0041..005A
6167             COMMON,                   // 005B..0060
6168             LATIN,                    // 0061..007A
6169             COMMON,                   // 007B..00A9
6170             LATIN,                    // 00AA
6171             COMMON,                   // 00AB..00B9
6172             LATIN,                    // 00BA
6173             COMMON,                   // 00BB..00BF
6174             LATIN,                    // 00C0..00D6
6175             COMMON,                   // 00D7
6176             LATIN,                    // 00D8..00F6
6177             COMMON,                   // 00F7
6178             LATIN,                    // 00F8..02B8
6179             COMMON,                   // 02B9..02DF
6180             LATIN,                    // 02E0..02E4
6181             COMMON,                   // 02E5..02E9
6182             BOPOMOFO,                 // 02EA..02EB
6183             COMMON,                   // 02EC..02FF
6184             INHERITED,                // 0300..036F
6185             GREEK,                    // 0370..0373
6186             COMMON,                   // 0374
6187             GREEK,                    // 0375..0377
6188             UNKNOWN,                  // 0378..0379
6189             GREEK,                    // 037A..037D
6190             COMMON,                   // 037E
6191             GREEK,                    // 037F
6192             UNKNOWN,                  // 0380..0383
6193             GREEK,                    // 0384
6194             COMMON,                   // 0385
6195             GREEK,                    // 0386
6196             COMMON,                   // 0387
6197             GREEK,                    // 0388..038A
6198             UNKNOWN,                  // 038B
6199             GREEK,                    // 038C
6200             UNKNOWN,                  // 038D
6201             GREEK,                    // 038E..03A1
6202             UNKNOWN,                  // 03A2
6203             GREEK,                    // 03A3..03E1
6204             COPTIC,                   // 03E2..03EF
6205             GREEK,                    // 03F0..03FF
6206             CYRILLIC,                 // 0400..0484
6207             INHERITED,                // 0485..0486
6208             CYRILLIC,                 // 0487..052F
6209             UNKNOWN,                  // 0530
6210             ARMENIAN,                 // 0531..0556
6211             UNKNOWN,                  // 0557..0558
6212             ARMENIAN,                 // 0559..055F
6213             UNKNOWN,                  // 0560
6214             ARMENIAN,                 // 0561..0587
6215             UNKNOWN,                  // 0588
6216             COMMON,                   // 0589
6217             ARMENIAN,                 // 058A
6218             UNKNOWN,                  // 058B..058C
6219             ARMENIAN,                 // 058D..058F
6220             UNKNOWN,                  // 0590
6221             HEBREW,                   // 0591..05C7
6222             UNKNOWN,                  // 05C8..05CF
6223             HEBREW,                   // 05D0..05EA
6224             UNKNOWN,                  // 05EB..05EF
6225             HEBREW,                   // 05F0..05F4
6226             UNKNOWN,                  // 05F5..05FF
6227             ARABIC,                   // 0600..0604
6228             COMMON,                   // 0605
6229             ARABIC,                   // 0606..060B
6230             COMMON,                   // 060C
6231             ARABIC,                   // 060D..061A
6232             COMMON,                   // 061B
6233             ARABIC,                   // 061C
6234             UNKNOWN,                  // 061D
6235             ARABIC,                   // 061E
6236             COMMON,                   // 061F
6237             ARABIC,                   // 0620..063F
6238             COMMON,                   // 0640
6239             ARABIC,                   // 0641..064A
6240             INHERITED,                // 064B..0655
6241             ARABIC,                   // 0656..066F
6242             INHERITED,                // 0670
6243             ARABIC,                   // 0671..06DC
6244             COMMON,                   // 06DD
6245             ARABIC,                   // 06DE..06FF
6246             SYRIAC,                   // 0700..070D
6247             UNKNOWN,                  // 070E
6248             SYRIAC,                   // 070F..074A
6249             UNKNOWN,                  // 074B..074C
6250             SYRIAC,                   // 074D..074F
6251             ARABIC,                   // 0750..077F
6252             THAANA,                   // 0780..07B1
6253             UNKNOWN,                  // 07B2..07BF
6254             NKO,                      // 07C0..07FA
6255             UNKNOWN,                  // 07FB..07FF
6256             SAMARITAN,                // 0800..082D
6257             UNKNOWN,                  // 082E..082F
6258             SAMARITAN,                // 0830..083E
6259             UNKNOWN,                  // 083F
6260             MANDAIC,                  // 0840..085B
6261             UNKNOWN,                  // 085C..085D
6262             MANDAIC,                  // 085E
6263             UNKNOWN,                  // 085F
6264             SYRIAC,                   // 0860..086A
6265             UNKNOWN,                  // 086B..089F
6266             ARABIC,                   // 08A0..08B4
6267             UNKNOWN,                  // 08B5
6268             ARABIC,                   // 08B6..08BD
6269             UNKNOWN,                  // 08BE..08D3
6270             ARABIC,                   // 08D4..08E1
6271             COMMON,                   // 08E2
6272             ARABIC,                   // 08E3..08FF
6273             DEVANAGARI,               // 0900..0950
6274             INHERITED,                // 0951..0952
6275             DEVANAGARI,               // 0953..0963
6276             COMMON,                   // 0964..0965
6277             DEVANAGARI,               // 0966..097F
6278             BENGALI,                  // 0980..0983
6279             UNKNOWN,                  // 0984
6280             BENGALI,                  // 0985..098C
6281             UNKNOWN,                  // 098D..098E
6282             BENGALI,                  // 098F..0990
6283             UNKNOWN,                  // 0991..0992
6284             BENGALI,                  // 0993..09A8
6285             UNKNOWN,                  // 09A9
6286             BENGALI,                  // 09AA..09B0
6287             UNKNOWN,                  // 09B1
6288             BENGALI,                  // 09B2
6289             UNKNOWN,                  // 09B3..09B5
6290             BENGALI,                  // 09B6..09B9
6291             UNKNOWN,                  // 09BA..09BB
6292             BENGALI,                  // 09BC..09C4
6293             UNKNOWN,                  // 09C5..09C6
6294             BENGALI,                  // 09C7..09C8
6295             UNKNOWN,                  // 09C9..09CA
6296             BENGALI,                  // 09CB..09CE
6297             UNKNOWN,                  // 09CF..09D6
6298             BENGALI,                  // 09D7
6299             UNKNOWN,                  // 09D8..09DB
6300             BENGALI,                  // 09DC..09DD
6301             UNKNOWN,                  // 09DE
6302             BENGALI,                  // 09DF..09E3
6303             UNKNOWN,                  // 09E4..09E5
6304             BENGALI,                  // 09E6..09FD
6305             UNKNOWN,                  // 09FE..0A00
6306             GURMUKHI,                 // 0A01..0A03
6307             UNKNOWN,                  // 0A04
6308             GURMUKHI,                 // 0A05..0A0A
6309             UNKNOWN,                  // 0A0B..0A0E
6310             GURMUKHI,                 // 0A0F..0A10
6311             UNKNOWN,                  // 0A11..0A12
6312             GURMUKHI,                 // 0A13..0A28
6313             UNKNOWN,                  // 0A29
6314             GURMUKHI,                 // 0A2A..0A30
6315             UNKNOWN,                  // 0A31
6316             GURMUKHI,                 // 0A32..0A33
6317             UNKNOWN,                  // 0A34
6318             GURMUKHI,                 // 0A35..0A36
6319             UNKNOWN,                  // 0A37
6320             GURMUKHI,                 // 0A38..0A39
6321             UNKNOWN,                  // 0A3A..0A3B
6322             GURMUKHI,                 // 0A3C
6323             UNKNOWN,                  // 0A3D
6324             GURMUKHI,                 // 0A3E..0A42
6325             UNKNOWN,                  // 0A43..0A46
6326             GURMUKHI,                 // 0A47..0A48
6327             UNKNOWN,                  // 0A49..0A4A
6328             GURMUKHI,                 // 0A4B..0A4D
6329             UNKNOWN,                  // 0A4E..0A50
6330             GURMUKHI,                 // 0A51
6331             UNKNOWN,                  // 0A52..0A58
6332             GURMUKHI,                 // 0A59..0A5C
6333             UNKNOWN,                  // 0A5D
6334             GURMUKHI,                 // 0A5E
6335             UNKNOWN,                  // 0A5F..0A65
6336             GURMUKHI,                 // 0A66..0A75
6337             UNKNOWN,                  // 0A76..0A80
6338             GUJARATI,                 // 0A81..0A83
6339             UNKNOWN,                  // 0A84
6340             GUJARATI,                 // 0A85..0A8D
6341             UNKNOWN,                  // 0A8E
6342             GUJARATI,                 // 0A8F..0A91
6343             UNKNOWN,                  // 0A92
6344             GUJARATI,                 // 0A93..0AA8
6345             UNKNOWN,                  // 0AA9
6346             GUJARATI,                 // 0AAA..0AB0
6347             UNKNOWN,                  // 0AB1
6348             GUJARATI,                 // 0AB2..0AB3
6349             UNKNOWN,                  // 0AB4
6350             GUJARATI,                 // 0AB5..0AB9
6351             UNKNOWN,                  // 0ABA..0ABB
6352             GUJARATI,                 // 0ABC..0AC5
6353             UNKNOWN,                  // 0AC6
6354             GUJARATI,                 // 0AC7..0AC9
6355             UNKNOWN,                  // 0ACA
6356             GUJARATI,                 // 0ACB..0ACD
6357             UNKNOWN,                  // 0ACE..0ACF
6358             GUJARATI,                 // 0AD0
6359             UNKNOWN,                  // 0AD1..0ADF
6360             GUJARATI,                 // 0AE0..0AE3
6361             UNKNOWN,                  // 0AE4..0AE5
6362             GUJARATI,                 // 0AE6..0AF1
6363             UNKNOWN,                  // 0AF2..0AF8
6364             GUJARATI,                 // 0AF9..0AFF
6365             UNKNOWN,                  // 0B00
6366             ORIYA,                    // 0B01..0B03
6367             UNKNOWN,                  // 0B04
6368             ORIYA,                    // 0B05..0B0C
6369             UNKNOWN,                  // 0B0D..0B0E
6370             ORIYA,                    // 0B0F..0B10
6371             UNKNOWN,                  // 0B11..0B12
6372             ORIYA,                    // 0B13..0B28
6373             UNKNOWN,                  // 0B29
6374             ORIYA,                    // 0B2A..0B30
6375             UNKNOWN,                  // 0B31
6376             ORIYA,                    // 0B32..0B33
6377             UNKNOWN,                  // 0B34
6378             ORIYA,                    // 0B35..0B39
6379             UNKNOWN,                  // 0B3A..0B3B
6380             ORIYA,                    // 0B3C..0B44
6381             UNKNOWN,                  // 0B45..0B46
6382             ORIYA,                    // 0B47..0B48
6383             UNKNOWN,                  // 0B49..0B4A
6384             ORIYA,                    // 0B4B..0B4D
6385             UNKNOWN,                  // 0B4E..0B55
6386             ORIYA,                    // 0B56..0B57
6387             UNKNOWN,                  // 0B58..0B5B
6388             ORIYA,                    // 0B5C..0B5D
6389             UNKNOWN,                  // 0B5E
6390             ORIYA,                    // 0B5F..0B63
6391             UNKNOWN,                  // 0B64..0B65
6392             ORIYA,                    // 0B66..0B77
6393             UNKNOWN,                  // 0B78..0B81
6394             TAMIL,                    // 0B82..0B83
6395             UNKNOWN,                  // 0B84
6396             TAMIL,                    // 0B85..0B8A
6397             UNKNOWN,                  // 0B8B..0B8D
6398             TAMIL,                    // 0B8E..0B90
6399             UNKNOWN,                  // 0B91
6400             TAMIL,                    // 0B92..0B95
6401             UNKNOWN,                  // 0B96..0B98
6402             TAMIL,                    // 0B99..0B9A
6403             UNKNOWN,                  // 0B9B
6404             TAMIL,                    // 0B9C
6405             UNKNOWN,                  // 0B9D
6406             TAMIL,                    // 0B9E..0B9F
6407             UNKNOWN,                  // 0BA0..0BA2
6408             TAMIL,                    // 0BA3..0BA4
6409             UNKNOWN,                  // 0BA5..0BA7
6410             TAMIL,                    // 0BA8..0BAA
6411             UNKNOWN,                  // 0BAB..0BAD
6412             TAMIL,                    // 0BAE..0BB9
6413             UNKNOWN,                  // 0BBA..0BBD
6414             TAMIL,                    // 0BBE..0BC2
6415             UNKNOWN,                  // 0BC3..0BC5
6416             TAMIL,                    // 0BC6..0BC8
6417             UNKNOWN,                  // 0BC9
6418             TAMIL,                    // 0BCA..0BCD
6419             UNKNOWN,                  // 0BCE..0BCF
6420             TAMIL,                    // 0BD0
6421             UNKNOWN,                  // 0BD1..0BD6
6422             TAMIL,                    // 0BD7
6423             UNKNOWN,                  // 0BD8..0BE5
6424             TAMIL,                    // 0BE6..0BFA
6425             UNKNOWN,                  // 0BFB..0BFF
6426             TELUGU,                   // 0C00..0C03
6427             UNKNOWN,                  // 0C04
6428             TELUGU,                   // 0C05..0C0C
6429             UNKNOWN,                  // 0C0D
6430             TELUGU,                   // 0C0E..0C10
6431             UNKNOWN,                  // 0C11
6432             TELUGU,                   // 0C12..0C28
6433             UNKNOWN,                  // 0C29
6434             TELUGU,                   // 0C2A..0C39
6435             UNKNOWN,                  // 0C3A..0C3C
6436             TELUGU,                   // 0C3D..0C44
6437             UNKNOWN,                  // 0C45
6438             TELUGU,                   // 0C46..0C48
6439             UNKNOWN,                  // 0C49
6440             TELUGU,                   // 0C4A..0C4D
6441             UNKNOWN,                  // 0C4E..0C54
6442             TELUGU,                   // 0C55..0C56
6443             UNKNOWN,                  // 0C57
6444             TELUGU,                   // 0C58..0C5A
6445             UNKNOWN,                  // 0C5B..0C5F
6446             TELUGU,                   // 0C60..0C63
6447             UNKNOWN,                  // 0C64..0C65
6448             TELUGU,                   // 0C66..0C6F
6449             UNKNOWN,                  // 0C70..0C77
6450             TELUGU,                   // 0C78..0C7F
6451             KANNADA,                  // 0C80..0C83
6452             UNKNOWN,                  // 0C84
6453             KANNADA,                  // 0C85..0C8C
6454             UNKNOWN,                  // 0C8D
6455             KANNADA,                  // 0C8E..0C90
6456             UNKNOWN,                  // 0C91
6457             KANNADA,                  // 0C92..0CA8
6458             UNKNOWN,                  // 0CA9
6459             KANNADA,                  // 0CAA..0CB3
6460             UNKNOWN,                  // 0CB4
6461             KANNADA,                  // 0CB5..0CB9
6462             UNKNOWN,                  // 0CBA..0CBB
6463             KANNADA,                  // 0CBC..0CC4
6464             UNKNOWN,                  // 0CC5
6465             KANNADA,                  // 0CC6..0CC8
6466             UNKNOWN,                  // 0CC9
6467             KANNADA,                  // 0CCA..0CCD
6468             UNKNOWN,                  // 0CCE..0CD4
6469             KANNADA,                  // 0CD5..0CD6
6470             UNKNOWN,                  // 0CD7..0CDD
6471             KANNADA,                  // 0CDE
6472             UNKNOWN,                  // 0CDF
6473             KANNADA,                  // 0CE0..0CE3
6474             UNKNOWN,                  // 0CE4..0CE5
6475             KANNADA,                  // 0CE6..0CEF
6476             UNKNOWN,                  // 0CF0
6477             KANNADA,                  // 0CF1..0CF2
6478             UNKNOWN,                  // 0CF3..0CFF
6479             MALAYALAM,                // 0D00..0D03
6480             UNKNOWN,                  // 0D04
6481             MALAYALAM,                // 0D05..0D0C
6482             UNKNOWN,                  // 0D0D
6483             MALAYALAM,                // 0D0E..0D10
6484             UNKNOWN,                  // 0D11
6485             MALAYALAM,                // 0D12..0D44
6486             UNKNOWN,                  // 0D45
6487             MALAYALAM,                // 0D46..0D48
6488             UNKNOWN,                  // 0D49
6489             MALAYALAM,                // 0D4A..0D4F
6490             UNKNOWN,                  // 0D50..0D53
6491             MALAYALAM,                // 0D54..0D63
6492             UNKNOWN,                  // 0D64..0D65
6493             MALAYALAM,                // 0D66..0D7F
6494             UNKNOWN,                  // 0D80..0D81
6495             SINHALA,                  // 0D82..0D83
6496             UNKNOWN,                  // 0D84
6497             SINHALA,                  // 0D85..0D96
6498             UNKNOWN,                  // 0D97..0D99
6499             SINHALA,                  // 0D9A..0DB1
6500             UNKNOWN,                  // 0DB2
6501             SINHALA,                  // 0DB3..0DBB
6502             UNKNOWN,                  // 0DBC
6503             SINHALA,                  // 0DBD
6504             UNKNOWN,                  // 0DBE..0DBF
6505             SINHALA,                  // 0DC0..0DC6
6506             UNKNOWN,                  // 0DC7..0DC9
6507             SINHALA,                  // 0DCA
6508             UNKNOWN,                  // 0DCB..0DCE
6509             SINHALA,                  // 0DCF..0DD4
6510             UNKNOWN,                  // 0DD5
6511             SINHALA,                  // 0DD6
6512             UNKNOWN,                  // 0DD7
6513             SINHALA,                  // 0DD8..0DDF
6514             UNKNOWN,                  // 0DE0..0DE5
6515             SINHALA,                  // 0DE6..0DEF
6516             UNKNOWN,                  // 0DF0..0DF1
6517             SINHALA,                  // 0DF2..0DF4
6518             UNKNOWN,                  // 0DF5..0E00
6519             THAI,                     // 0E01..0E3A
6520             UNKNOWN,                  // 0E3B..0E3E
6521             COMMON,                   // 0E3F
6522             THAI,                     // 0E40..0E5B
6523             UNKNOWN,                  // 0E5C..0E80
6524             LAO,                      // 0E81..0E82
6525             UNKNOWN,                  // 0E83
6526             LAO,                      // 0E84
6527             UNKNOWN,                  // 0E85..0E86
6528             LAO,                      // 0E87..0E88
6529             UNKNOWN,                  // 0E89
6530             LAO,                      // 0E8A
6531             UNKNOWN,                  // 0E8B..0E8C
6532             LAO,                      // 0E8D
6533             UNKNOWN,                  // 0E8E..0E93
6534             LAO,                      // 0E94..0E97
6535             UNKNOWN,                  // 0E98
6536             LAO,                      // 0E99..0E9F
6537             UNKNOWN,                  // 0EA0
6538             LAO,                      // 0EA1..0EA3
6539             UNKNOWN,                  // 0EA4
6540             LAO,                      // 0EA5
6541             UNKNOWN,                  // 0EA6
6542             LAO,                      // 0EA7
6543             UNKNOWN,                  // 0EA8..0EA9
6544             LAO,                      // 0EAA..0EAB
6545             UNKNOWN,                  // 0EAC
6546             LAO,                      // 0EAD..0EB9
6547             UNKNOWN,                  // 0EBA
6548             LAO,                      // 0EBB..0EBD
6549             UNKNOWN,                  // 0EBE..0EBF
6550             LAO,                      // 0EC0..0EC4
6551             UNKNOWN,                  // 0EC5
6552             LAO,                      // 0EC6
6553             UNKNOWN,                  // 0EC7
6554             LAO,                      // 0EC8..0ECD
6555             UNKNOWN,                  // 0ECE..0ECF
6556             LAO,                      // 0ED0..0ED9
6557             UNKNOWN,                  // 0EDA..0EDB
6558             LAO,                      // 0EDC..0EDF
6559             UNKNOWN,                  // 0EE0..0EFF
6560             TIBETAN,                  // 0F00..0F47
6561             UNKNOWN,                  // 0F48
6562             TIBETAN,                  // 0F49..0F6C
6563             UNKNOWN,                  // 0F6D..0F70
6564             TIBETAN,                  // 0F71..0F97
6565             UNKNOWN,                  // 0F98
6566             TIBETAN,                  // 0F99..0FBC
6567             UNKNOWN,                  // 0FBD
6568             TIBETAN,                  // 0FBE..0FCC
6569             UNKNOWN,                  // 0FCD
6570             TIBETAN,                  // 0FCE..0FD4
6571             COMMON,                   // 0FD5..0FD8
6572             TIBETAN,                  // 0FD9..0FDA
6573             UNKNOWN,                  // 0FDB..FFF
6574             MYANMAR,                  // 1000..109F
6575             GEORGIAN,                 // 10A0..10C5
6576             UNKNOWN,                  // 10C6
6577             GEORGIAN,                 // 10C7
6578             UNKNOWN,                  // 10C8..10CC
6579             GEORGIAN,                 // 10CD
6580             UNKNOWN,                  // 10CE..10CF
6581             GEORGIAN,                 // 10D0..10FA
6582             COMMON,                   // 10FB
6583             GEORGIAN,                 // 10FC..10FF
6584             HANGUL,                   // 1100..11FF
6585             ETHIOPIC,                 // 1200..1248
6586             UNKNOWN,                  // 1249
6587             ETHIOPIC,                 // 124A..124D
6588             UNKNOWN,                  // 124E..124F
6589             ETHIOPIC,                 // 1250..1256
6590             UNKNOWN,                  // 1257
6591             ETHIOPIC,                 // 1258
6592             UNKNOWN,                  // 1259
6593             ETHIOPIC,                 // 125A..125D
6594             UNKNOWN,                  // 125E..125F
6595             ETHIOPIC,                 // 1260..1288
6596             UNKNOWN,                  // 1289
6597             ETHIOPIC,                 // 128A..128D
6598             UNKNOWN,                  // 128E..128F
6599             ETHIOPIC,                 // 1290..12B0
6600             UNKNOWN,                  // 12B1
6601             ETHIOPIC,                 // 12B2..12B5
6602             UNKNOWN,                  // 12B6..12B7
6603             ETHIOPIC,                 // 12B8..12BE
6604             UNKNOWN,                  // 12BF
6605             ETHIOPIC,                 // 12C0
6606             UNKNOWN,                  // 12C1
6607             ETHIOPIC,                 // 12C2..12C5
6608             UNKNOWN,                  // 12C6..12C7
6609             ETHIOPIC,                 // 12C8..12D6
6610             UNKNOWN,                  // 12D7
6611             ETHIOPIC,                 // 12D8..1310
6612             UNKNOWN,                  // 1311
6613             ETHIOPIC,                 // 1312..1315
6614             UNKNOWN,                  // 1316..1317
6615             ETHIOPIC,                 // 1318..135A
6616             UNKNOWN,                  // 135B..135C
6617             ETHIOPIC,                 // 135D..137C
6618             UNKNOWN,                  // 137D..137F
6619             ETHIOPIC,                 // 1380..1399
6620             UNKNOWN,                  // 139A..139F
6621             CHEROKEE,                 // 13A0..13F5
6622             UNKNOWN,                  // 13F6..13F7
6623             CHEROKEE,                 // 13F8..13FD
6624             UNKNOWN,                  // 13FE..13FF
6625             CANADIAN_ABORIGINAL,      // 1400..167F
6626             OGHAM,                    // 1680..169C
6627             UNKNOWN,                  // 169D..169F
6628             RUNIC,                    // 16A0..16EA
6629             COMMON,                   // 16EB..16ED
6630             RUNIC,                    // 16EE..16F8
6631             UNKNOWN,                  // 16F9..16FF
6632             TAGALOG,                  // 1700..170C
6633             UNKNOWN,                  // 170D
6634             TAGALOG,                  // 170E..1714
6635             UNKNOWN,                  // 1715..171F
6636             HANUNOO,                  // 1720..1734
6637             COMMON,                   // 1735..1736
6638             UNKNOWN,                  // 1737..173F
6639             BUHID,                    // 1740..1753
6640             UNKNOWN,                  // 1754..175F
6641             TAGBANWA,                 // 1760..176C
6642             UNKNOWN,                  // 176D
6643             TAGBANWA,                 // 176E..1770
6644             UNKNOWN,                  // 1771
6645             TAGBANWA,                 // 1772..1773
6646             UNKNOWN,                  // 1774..177F
6647             KHMER,                    // 1780..17DD
6648             UNKNOWN,                  // 17DE..17DF
6649             KHMER,                    // 17E0..17E9
6650             UNKNOWN,                  // 17EA..17EF
6651             KHMER,                    // 17F0..17F9
6652             UNKNOWN,                  // 17FA..17FF
6653             MONGOLIAN,                // 1800..1801
6654             COMMON,                   // 1802..1803
6655             MONGOLIAN,                // 1804
6656             COMMON,                   // 1805
6657             MONGOLIAN,                // 1806..180E
6658             UNKNOWN,                  // 180F
6659             MONGOLIAN,                // 1810..1819
6660             UNKNOWN,                  // 181A..181F
6661             MONGOLIAN,                // 1820..1877
6662             UNKNOWN,                  // 1878..187F
6663             MONGOLIAN,                // 1880..18AA
6664             UNKNOWN,                  // 18AB..18AF
6665             CANADIAN_ABORIGINAL,      // 18B0..18F5
6666             UNKNOWN,                  // 18F6..18FF
6667             LIMBU,                    // 1900..191E
6668             UNKNOWN,                  // 191F
6669             LIMBU,                    // 1920..192B
6670             UNKNOWN,                  // 192C..192F
6671             LIMBU,                    // 1930..193B
6672             UNKNOWN,                  // 193C..193F
6673             LIMBU,                    // 1940
6674             UNKNOWN,                  // 1941..1943
6675             LIMBU,                    // 1944..194F
6676             TAI_LE,                   // 1950..196D
6677             UNKNOWN,                  // 196E..196F
6678             TAI_LE,                   // 1970..1974
6679             UNKNOWN,                  // 1975..197F
6680             NEW_TAI_LUE,              // 1980..19AB
6681             UNKNOWN,                  // 19AC..19AF
6682             NEW_TAI_LUE,              // 19B0..19C9
6683             UNKNOWN,                  // 19CA..19CF
6684             NEW_TAI_LUE,              // 19D0..19DA
6685             UNKNOWN,                  // 19DB..19DD
6686             NEW_TAI_LUE,              // 19DE..19DF
6687             KHMER,                    // 19E0..19FF
6688             BUGINESE,                 // 1A00..1A1B
6689             UNKNOWN,                  // 1A1C..1A1D
6690             BUGINESE,                 // 1A1E..1A1F
6691             TAI_THAM,                 // 1A20..1A5E
6692             UNKNOWN,                  // 1A5F
6693             TAI_THAM,                 // 1A60..1A7C
6694             UNKNOWN,                  // 1A7D..1A7E
6695             TAI_THAM,                 // 1A7F..1A89
6696             UNKNOWN,                  // 1A8A..1A8F
6697             TAI_THAM,                 // 1A90..1A99
6698             UNKNOWN,                  // 1A9A..1A9F
6699             TAI_THAM,                 // 1AA0..1AAD
6700             UNKNOWN,                  // 1AAE..1AAF
6701             INHERITED,                // 1AB0..1ABE
6702             UNKNOWN,                  // 1ABF..1AFF
6703             BALINESE,                 // 1B00..1B4B
6704             UNKNOWN,                  // 1B4C..1B4F
6705             BALINESE,                 // 1B50..1B7C
6706             UNKNOWN,                  // 1B7D..1B7F
6707             SUNDANESE,                // 1B80..1BBF
6708             BATAK,                    // 1BC0..1BF3
6709             UNKNOWN,                  // 1BF4..1BFB
6710             BATAK,                    // 1BFC..1BFF
6711             LEPCHA,                   // 1C00..1C37
6712             UNKNOWN,                  // 1C38..1C3A
6713             LEPCHA,                   // 1C3B..1C49
6714             UNKNOWN,                  // 1C4A..1C4C
6715             LEPCHA,                   // 1C4D..1C4F
6716             OL_CHIKI,                 // 1C50..1C7F
6717             CYRILLIC,                 // 1C80..1C88
6718             UNKNOWN,                  // 1C89..1CBF
6719             SUNDANESE,                // 1CC0..1CC7
6720             UNKNOWN,                  // 1CC8..1CCF
6721             INHERITED,                // 1CD0..1CD2
6722             COMMON,                   // 1CD3
6723             INHERITED,                // 1CD4..1CE0
6724             COMMON,                   // 1CE1
6725             INHERITED,                // 1CE2..1CE8
6726             COMMON,                   // 1CE9..1CEC
6727             INHERITED,                // 1CED
6728             COMMON,                   // 1CEE..1CF3
6729             INHERITED,                // 1CF4
6730             COMMON,                   // 1CF5..1CF7
6731             INHERITED,                // 1CF8..1CF9
6732             UNKNOWN,                  // 1CFA..1CFF
6733             LATIN,                    // 1D00..1D25
6734             GREEK,                    // 1D26..1D2A
6735             CYRILLIC,                 // 1D2B
6736             LATIN,                    // 1D2C..1D5C
6737             GREEK,                    // 1D5D..1D61
6738             LATIN,                    // 1D62..1D65
6739             GREEK,                    // 1D66..1D6A
6740             LATIN,                    // 1D6B..1D77
6741             CYRILLIC,                 // 1D78
6742             LATIN,                    // 1D79..1DBE
6743             GREEK,                    // 1DBF
6744             INHERITED,                // 1DC0..1DF9
6745             UNKNOWN,                  // 1DFA
6746             INHERITED,                // 1DFB..1DFF
6747             LATIN,                    // 1E00..1EFF
6748             GREEK,                    // 1F00..1F15
6749             UNKNOWN,                  // 1F16..1F17
6750             GREEK,                    // 1F18..1F1D
6751             UNKNOWN,                  // 1F1E..1F1F
6752             GREEK,                    // 1F20..1F45
6753             UNKNOWN,                  // 1F46..1F47
6754             GREEK,                    // 1F48..1F4D
6755             UNKNOWN,                  // 1F4E..1F4F
6756             GREEK,                    // 1F50..1F57
6757             UNKNOWN,                  // 1F58
6758             GREEK,                    // 1F59
6759             UNKNOWN,                  // 1F5A
6760             GREEK,                    // 1F5B
6761             UNKNOWN,                  // 1F5C
6762             GREEK,                    // 1F5D
6763             UNKNOWN,                  // 1F5E
6764             GREEK,                    // 1F5F..1F7D
6765             UNKNOWN,                  // 1F7E..1F7F
6766             GREEK,                    // 1F80..1FB4
6767             UNKNOWN,                  // 1FB5
6768             GREEK,                    // 1FB6..1FC4
6769             UNKNOWN,                  // 1FC5
6770             GREEK,                    // 1FC6..1FD3
6771             UNKNOWN,                  // 1FD4..1FD5
6772             GREEK,                    // 1FD6..1FDB
6773             UNKNOWN,                  // 1FDC
6774             GREEK,                    // 1FDD..1FEF
6775             UNKNOWN,                  // 1FF0..1FF1
6776             GREEK,                    // 1FF2..1FF4
6777             UNKNOWN,                  // 1FF5
6778             GREEK,                    // 1FF6..1FFE
6779             UNKNOWN,                  // 1FFF
6780             COMMON,                   // 2000..200B
6781             INHERITED,                // 200C..200D
6782             COMMON,                   // 200E..2064
6783             UNKNOWN,                  // 2065
6784             COMMON,                   // 2066..2070
6785             LATIN,                    // 2071
6786             UNKNOWN,                  // 2072..2073
6787             COMMON,                   // 2074..207E
6788             LATIN,                    // 207F
6789             COMMON,                   // 2080..208E
6790             UNKNOWN,                  // 208F
6791             LATIN,                    // 2090..209C
6792             UNKNOWN,                  // 209D..209F
6793             COMMON,                   // 20A0..20BF
6794             UNKNOWN,                  // 20C0..20CF
6795             INHERITED,                // 20D0..20F0
6796             UNKNOWN,                  // 20F1..20FF
6797             COMMON,                   // 2100..2125
6798             GREEK,                    // 2126
6799             COMMON,                   // 2127..2129
6800             LATIN,                    // 212A..212B
6801             COMMON,                   // 212C..2131
6802             LATIN,                    // 2132
6803             COMMON,                   // 2133..214D
6804             LATIN,                    // 214E
6805             COMMON,                   // 214F..215F
6806             LATIN,                    // 2160..2188
6807             COMMON,                   // 2189..218B
6808             UNKNOWN,                  // 218C..218F
6809             COMMON,                   // 2190..2426
6810             UNKNOWN,                  // 2427..243F
6811             COMMON,                   // 2440..244A
6812             UNKNOWN,                  // 244B..245F
6813             COMMON,                   // 2460..27FF
6814             BRAILLE,                  // 2800..28FF
6815             COMMON,                   // 2900..2B73
6816             UNKNOWN,                  // 2B74..2B75
6817             COMMON,                   // 2B76..2B95
6818             UNKNOWN,                  // 2B96..2B97
6819             COMMON,                   // 2B98..2BB9
6820             UNKNOWN,                  // 2BBA..2BBC
6821             COMMON,                   // 2BBD..2BC8
6822             UNKNOWN,                  // 2BC9
6823             COMMON,                   // 2BCA..2BD2
6824             UNKNOWN,                  // 2BD3..2BEB
6825             COMMON,                   // 2BEC..2BEF
6826             UNKNOWN,                  // 2BF0..2BFF
6827             GLAGOLITIC,               // 2C00..2C2E
6828             UNKNOWN,                  // 2C2F
6829             GLAGOLITIC,               // 2C30..2C5E
6830             UNKNOWN,                  // 2C5F
6831             LATIN,                    // 2C60..2C7F
6832             COPTIC,                   // 2C80..2CF3
6833             UNKNOWN,                  // 2CF4..2CF8
6834             COPTIC,                   // 2CF9..2CFF
6835             GEORGIAN,                 // 2D00..2D25
6836             UNKNOWN,                  // 2D26
6837             GEORGIAN,                 // 2D27
6838             UNKNOWN,                  // 2D28..2D2C
6839             GEORGIAN,                 // 2D2D
6840             UNKNOWN,                  // 2D2E..2D2F
6841             TIFINAGH,                 // 2D30..2D67
6842             UNKNOWN,                  // 2D68..2D6E
6843             TIFINAGH,                 // 2D6F..2D70
6844             UNKNOWN,                  // 2D71..2D7E
6845             TIFINAGH,                 // 2D7F
6846             ETHIOPIC,                 // 2D80..2D96
6847             UNKNOWN,                  // 2D97..2D9F
6848             ETHIOPIC,                 // 2DA0..2DA6
6849             UNKNOWN,                  // 2DA7
6850             ETHIOPIC,                 // 2DA8..2DAE
6851             UNKNOWN,                  // 2DAF
6852             ETHIOPIC,                 // 2DB0..2DB6
6853             UNKNOWN,                  // 2DB7
6854             ETHIOPIC,                 // 2DB8..2DBE
6855             UNKNOWN,                  // 2DBF
6856             ETHIOPIC,                 // 2DC0..2DC6
6857             UNKNOWN,                  // 2DC7
6858             ETHIOPIC,                 // 2DC8..2DCE
6859             UNKNOWN,                  // 2DCF
6860             ETHIOPIC,                 // 2DD0..2DD6
6861             UNKNOWN,                  // 2DD7
6862             ETHIOPIC,                 // 2DD8..2DDE
6863             UNKNOWN,                  // 2DDF
6864             CYRILLIC,                 // 2DE0..2DFF
6865             COMMON,                   // 2E00..2E49
6866             UNKNOWN,                  // 2E50..2E7F
6867             HAN,                      // 2E80..2E99
6868             UNKNOWN,                  // 2E9A
6869             HAN,                      // 2E9B..2EF3
6870             UNKNOWN,                  // 2EF4..2EFF
6871             HAN,                      // 2F00..2FD5
6872             UNKNOWN,                  // 2FD6..2FEF
6873             COMMON,                   // 2FF0..2FFB
6874             UNKNOWN,                  // 2FFC..2FFF
6875             COMMON,                   // 3000..3004
6876             HAN,                      // 3005
6877             COMMON,                   // 3006
6878             HAN,                      // 3007
6879             COMMON,                   // 3008..3020
6880             HAN,                      // 3021..3029
6881             INHERITED,                // 302A..302D
6882             HANGUL,                   // 302E..302F
6883             COMMON,                   // 3030..3037
6884             HAN,                      // 3038..303B
6885             COMMON,                   // 303C..303F
6886             UNKNOWN,                  // 3040
6887             HIRAGANA,                 // 3041..3096
6888             UNKNOWN,                  // 3097..3098
6889             INHERITED,                // 3099..309A
6890             COMMON,                   // 309B..309C
6891             HIRAGANA,                 // 309D..309F
6892             COMMON,                   // 30A0
6893             KATAKANA,                 // 30A1..30FA
6894             COMMON,                   // 30FB..30FC
6895             KATAKANA,                 // 30FD..30FF
6896             UNKNOWN,                  // 3100..3104
6897             BOPOMOFO,                 // 3105..312E
6898             UNKNOWN,                  // 312F..3130
6899             HANGUL,                   // 3131..318E
6900             UNKNOWN,                  // 318F
6901             COMMON,                   // 3190..319F
6902             BOPOMOFO,                 // 31A0..31BA
6903             UNKNOWN,                  // 31BB..31BF
6904             COMMON,                   // 31C0..31E3
6905             UNKNOWN,                  // 31E4..31EF
6906             KATAKANA,                 // 31F0..31FF
6907             HANGUL,                   // 3200..321E
6908             UNKNOWN,                  // 321F
6909             COMMON,                   // 3220..325F
6910             HANGUL,                   // 3260..327E
6911             COMMON,                   // 327F..32CF
6912             KATAKANA,                 // 32D0..32FE
6913             COMMON,                   // 32FF
6914             KATAKANA,                 // 3300..3357
6915             COMMON,                   // 3358..33FF
6916             HAN,                      // 3400..4DB5
6917             UNKNOWN,                  // 4DB6..4DBF
6918             COMMON,                   // 4DC0..4DFF
6919             HAN,                      // 4E00..9FEA
6920             UNKNOWN,                  // 9FEB..9FFF
6921             YI,                       // A000..A48C
6922             UNKNOWN,                  // A48D..A48F
6923             YI,                       // A490..A4C6
6924             UNKNOWN,                  // A4C7..A4CF
6925             LISU,                     // A4D0..A4FF
6926             VAI,                      // A500..A62B
6927             UNKNOWN,                  // A62C..A63F
6928             CYRILLIC,                 // A640..A69F
6929             BAMUM,                    // A6A0..A6F7
6930             UNKNOWN,                  // A6F8..A6FF
6931             COMMON,                   // A700..A721
6932             LATIN,                    // A722..A787
6933             COMMON,                   // A788..A78A
6934             LATIN,                    // A78B..A7AE
6935             UNKNOWN,                  // A7AF
6936             LATIN,                    // A7B0..A7B7
6937             UNKNOWN,                  // A7B8..A7F6
6938             LATIN,                    // A7F7..A7FF
6939             SYLOTI_NAGRI,             // A800..A82B
6940             UNKNOWN,                  // A82C..A82F
6941             COMMON,                   // A830..A839
6942             UNKNOWN,                  // A83A..A83F
6943             PHAGS_PA,                 // A840..A877
6944             UNKNOWN,                  // A878..A87F
6945             SAURASHTRA,               // A880..A8C5
6946             UNKNOWN,                  // A8C6..A8CD
6947             SAURASHTRA,               // A8CE..A8D9
6948             UNKNOWN,                  // A8DA..A8DF
6949             DEVANAGARI,               // A8E0..A8FD
6950             UNKNOWN,                  // A8FE..A8FF
6951             KAYAH_LI,                 // A900..A92D
6952             COMMON,                   // A92E
6953             KAYAH_LI,                 // A92F
6954             REJANG,                   // A930..A953
6955             UNKNOWN,                  // A954..A95E
6956             REJANG,                   // A95F
6957             HANGUL,                   // A960..A97C
6958             UNKNOWN,                  // A97D..A97F
6959             JAVANESE,                 // A980..A9CD
6960             UNKNOWN,                  // A9CE
6961             COMMON,                   // A9CF
6962             JAVANESE,                 // A9D0..A9D9
6963             UNKNOWN,                  // A9DA..A9DD
6964             JAVANESE,                 // A9DE..A9DF
6965             MYANMAR,                  // A9E0..A9FE
6966             UNKNOWN,                  // A9FF
6967             CHAM,                     // AA00..AA36
6968             UNKNOWN,                  // AA37..AA3F
6969             CHAM,                     // AA40..AA4D
6970             UNKNOWN,                  // AA4E..AA4F
6971             CHAM,                     // AA50..AA59
6972             UNKNOWN,                  // AA5A..AA5B
6973             CHAM,                     // AA5C..AA5F
6974             MYANMAR,                  // AA60..AA7F
6975             TAI_VIET,                 // AA80..AAC2
6976             UNKNOWN,                  // AAC3..AADA
6977             TAI_VIET,                 // AADB..AADF
6978             MEETEI_MAYEK,             // AAE0..AAF6
6979             UNKNOWN,                  // AAF7..AB00
6980             ETHIOPIC,                 // AB01..AB06
6981             UNKNOWN,                  // AB07..AB08
6982             ETHIOPIC,                 // AB09..AB0E
6983             UNKNOWN,                  // AB0F..AB10
6984             ETHIOPIC,                 // AB11..AB16
6985             UNKNOWN,                  // AB17..AB1F
6986             ETHIOPIC,                 // AB20..AB26
6987             UNKNOWN,                  // AB27
6988             ETHIOPIC,                 // AB28..AB2E
6989             UNKNOWN,                  // AB2F
6990             LATIN,                    // AB30..AB5A
6991             COMMON,                   // AB5B
6992             LATIN,                    // AB5C..AB64
6993             GREEK,                    // AB65
6994             UNKNOWN,                  // AB66..AB6F
6995             CHEROKEE,                 // AB70..ABBF
6996             MEETEI_MAYEK,             // ABC0..ABED
6997             UNKNOWN,                  // ABEE..ABEF
6998             MEETEI_MAYEK,             // ABF0..ABF9
6999             UNKNOWN,                  // ABFA..ABFF
7000             HANGUL,                   // AC00..D7A3
7001             UNKNOWN,                  // D7A4..D7AF
7002             HANGUL,                   // D7B0..D7C6
7003             UNKNOWN,                  // D7C7..D7CA
7004             HANGUL,                   // D7CB..D7FB
7005             UNKNOWN,                  // D7FC..F8FF
7006             HAN,                      // F900..FA6D
7007             UNKNOWN,                  // FA6E..FA6F
7008             HAN,                      // FA70..FAD9
7009             UNKNOWN,                  // FADA..FAFF
7010             LATIN,                    // FB00..FB06
7011             UNKNOWN,                  // FB07..FB12
7012             ARMENIAN,                 // FB13..FB17
7013             UNKNOWN,                  // FB18..FB1C
7014             HEBREW,                   // FB1D..FB36
7015             UNKNOWN,                  // FB37
7016             HEBREW,                   // FB38..FB3C
7017             UNKNOWN,                  // FB3D
7018             HEBREW,                   // FB3E
7019             UNKNOWN,                  // FB3F
7020             HEBREW,                   // FB40..FB41
7021             UNKNOWN,                  // FB42
7022             HEBREW,                   // FB43..FB44
7023             UNKNOWN,                  // FB45
7024             HEBREW,                   // FB46..FB4F
7025             ARABIC,                   // FB50..FBC1
7026             UNKNOWN,                  // FBC2..FBD2
7027             ARABIC,                   // FBD3..FD3D
7028             COMMON,                   // FD3E..FD3F
7029             UNKNOWN,                  // FD40..FD4F
7030             ARABIC,                   // FD50..FD8F
7031             UNKNOWN,                  // FD90..FD91
7032             ARABIC,                   // FD92..FDC7
7033             UNKNOWN,                  // FDC8..FDEF
7034             ARABIC,                   // FDF0..FDFD
7035             UNKNOWN,                  // FDFE..FDFF
7036             INHERITED,                // FE00..FE0F
7037             COMMON,                   // FE10..FE19
7038             UNKNOWN,                  // FE1A..FE1F
7039             INHERITED,                // FE20..FE2D
7040             CYRILLIC,                 // FE2E..FE2F
7041             COMMON,                   // FE30..FE52
7042             UNKNOWN,                  // FE53
7043             COMMON,                   // FE54..FE66
7044             UNKNOWN,                  // FE67
7045             COMMON,                   // FE68..FE6B
7046             UNKNOWN,                  // FE6C..FE6F
7047             ARABIC,                   // FE70..FE74
7048             UNKNOWN,                  // FE75
7049             ARABIC,                   // FE76..FEFC
7050             UNKNOWN,                  // FEFD..FEFE
7051             COMMON,                   // FEFF
7052             UNKNOWN,                  // FF00
7053             COMMON,                   // FF01..FF20
7054             LATIN,                    // FF21..FF3A
7055             COMMON,                   // FF3B..FF40
7056             LATIN,                    // FF41..FF5A
7057             COMMON,                   // FF5B..FF65
7058             KATAKANA,                 // FF66..FF6F
7059             COMMON,                   // FF70
7060             KATAKANA,                 // FF71..FF9D
7061             COMMON,                   // FF9E..FF9F
7062             HANGUL,                   // FFA0..FFBE
7063             UNKNOWN,                  // FFBF..FFC1
7064             HANGUL,                   // FFC2..FFC7
7065             UNKNOWN,                  // FFC8..FFC9
7066             HANGUL,                   // FFCA..FFCF
7067             UNKNOWN,                  // FFD0..FFD1
7068             HANGUL,                   // FFD2..FFD7
7069             UNKNOWN,                  // FFD8..FFD9
7070             HANGUL,                   // FFDA..FFDC
7071             UNKNOWN,                  // FFDD..FFDF
7072             COMMON,                   // FFE0..FFE6
7073             UNKNOWN,                  // FFE7
7074             COMMON,                   // FFE8..FFEE
7075             UNKNOWN,                  // FFEF..FFF8
7076             COMMON,                   // FFF9..FFFD
7077             UNKNOWN,                  // FFFE..FFFF
7078             LINEAR_B,                 // 10000..1000B
7079             UNKNOWN,                  // 1000C
7080             LINEAR_B,                 // 1000D..10026
7081             UNKNOWN,                  // 10027
7082             LINEAR_B,                 // 10028..1003A
7083             UNKNOWN,                  // 1003B
7084             LINEAR_B,                 // 1003C..1003D
7085             UNKNOWN,                  // 1003E
7086             LINEAR_B,                 // 1003F..1004D
7087             UNKNOWN,                  // 1004E..1004F
7088             LINEAR_B,                 // 10050..1005D
7089             UNKNOWN,                  // 1005E..1007F
7090             LINEAR_B,                 // 10080..100FA
7091             UNKNOWN,                  // 100FB..100FF
7092             COMMON,                   // 10100..10102
7093             UNKNOWN,                  // 10103..10106
7094             COMMON,                   // 10107..10133
7095             UNKNOWN,                  // 10134..10136
7096             COMMON,                   // 10137..1013F
7097             GREEK,                    // 10140..1018E
7098             UNKNOWN,                  // 1018F
7099             COMMON,                   // 10190..1019B
7100             UNKNOWN,                  // 1019C..1019F
7101             GREEK,                    // 101A0
7102             UNKNOWN,                  // 101A1..101CF
7103             COMMON,                   // 101D0..101FC
7104             INHERITED,                // 101FD
7105             UNKNOWN,                  // 101FE..1027F
7106             LYCIAN,                   // 10280..1029C
7107             UNKNOWN,                  // 1029D..1029F
7108             CARIAN,                   // 102A0..102D0
7109             UNKNOWN,                  // 102D1..102DF
7110             INHERITED,                // 102E0
7111             COMMON,                   // 102E1..102FB
7112             UNKNOWN,                  // 102FC..102FF
7113             OLD_ITALIC,               // 10300..10323
7114             UNKNOWN,                  // 10324..1032C
7115             OLD_ITALIC,               // 1032D..1032F
7116             GOTHIC,                   // 10330..1034A
7117             UNKNOWN,                  // 1034B..1034F
7118             OLD_PERMIC,               // 10350..1037A
7119             UNKNOWN,                  // 1037B..1037F
7120             UGARITIC,                 // 10380..1039D
7121             UNKNOWN,                  // 1039E
7122             UGARITIC,                 // 1039F
7123             OLD_PERSIAN,              // 103A0..103C3
7124             UNKNOWN,                  // 103C4..103C7
7125             OLD_PERSIAN,              // 103C8..103D5
7126             UNKNOWN,                  // 103D6..103FF
7127             DESERET,                  // 10400..1044F
7128             SHAVIAN,                  // 10450..1047F
7129             OSMANYA,                  // 10480..1049D
7130             UNKNOWN,                  // 1049E..1049F
7131             OSMANYA,                  // 104A0..104A9
7132             UNKNOWN,                  // 104AA..104AF
7133             OSAGE,                    // 104B0..104D3;
7134             UNKNOWN,                  // 104D4..104D7;
7135             OSAGE,                    // 104D8..104FB;
7136             UNKNOWN,                  // 104FC..104FF;
7137             ELBASAN,                  // 10500..10527
7138             UNKNOWN,                  // 10528..1052F
7139             CAUCASIAN_ALBANIAN,       // 10530..10563
7140             UNKNOWN,                  // 10564..1056E
7141             CAUCASIAN_ALBANIAN,       // 1056F
7142             UNKNOWN,                  // 10570..105FF
7143             LINEAR_A,                 // 10600..10736
7144             UNKNOWN,                  // 10737..1073F
7145             LINEAR_A,                 // 10740..10755
7146             UNKNOWN,                  // 10756..1075F
7147             LINEAR_A,                 // 10760..10767
7148             UNKNOWN,                  // 10768..107FF
7149             CYPRIOT,                  // 10800..10805
7150             UNKNOWN,                  // 10806..10807
7151             CYPRIOT,                  // 10808
7152             UNKNOWN,                  // 10809
7153             CYPRIOT,                  // 1080A..10835
7154             UNKNOWN,                  // 10836
7155             CYPRIOT,                  // 10837..10838
7156             UNKNOWN,                  // 10839..1083B
7157             CYPRIOT,                  // 1083C
7158             UNKNOWN,                  // 1083D..1083E
7159             CYPRIOT,                  // 1083F
7160             IMPERIAL_ARAMAIC,         // 10840..10855
7161             UNKNOWN,                  // 10856
7162             IMPERIAL_ARAMAIC,         // 10857..1085F
7163             PALMYRENE,                // 10860..1087F
7164             NABATAEAN,                // 10880..1089E
7165             UNKNOWN,                  // 1089F..108A6
7166             NABATAEAN,                // 108A7..108AF
7167             UNKNOWN,                  // 108B0..108DF
7168             HATRAN,                   // 108E0..108F2
7169             UNKNOWN,                  // 108F3
7170             HATRAN,                   // 108F4..108F5
7171             UNKNOWN,                  // 108F6..108FA
7172             HATRAN,                   // 108FB..108FF
7173             PHOENICIAN,               // 10900..1091B
7174             UNKNOWN,                  // 1091C..1091E
7175             PHOENICIAN,               // 1091F
7176             LYDIAN,                   // 10920..10939
7177             UNKNOWN,                  // 1093A..1093E
7178             LYDIAN,                   // 1093F
7179             UNKNOWN,                  // 10940..1097F
7180             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7181             MEROITIC_CURSIVE,         // 109A0..109B7
7182             UNKNOWN,                  // 109B8..109BB
7183             MEROITIC_CURSIVE,         // 109BC..109CF
7184             UNKNOWN,                  // 109D0..109D1
7185             MEROITIC_CURSIVE,         // 109D2..109FF
7186             KHAROSHTHI,               // 10A00..10A03
7187             UNKNOWN,                  // 10A04
7188             KHAROSHTHI,               // 10A05..10A06
7189             UNKNOWN,                  // 10A07..10A0B
7190             KHAROSHTHI,               // 10A0C..10A13
7191             UNKNOWN,                  // 10A14
7192             KHAROSHTHI,               // 10A15..10A17
7193             UNKNOWN,                  // 10A18
7194             KHAROSHTHI,               // 10A19..10A33
7195             UNKNOWN,                  // 10A34..10A37
7196             KHAROSHTHI,               // 10A38..10A3A
7197             UNKNOWN,                  // 10A3B..10A3E
7198             KHAROSHTHI,               // 10A3F..10A47
7199             UNKNOWN,                  // 10A48..10A4F
7200             KHAROSHTHI,               // 10A50..10A58
7201             UNKNOWN,                  // 10A59..10A5F
7202             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7203             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7204             UNKNOWN,                  // 10AA0..10ABF
7205             MANICHAEAN,               // 10AC0..10AE6
7206             UNKNOWN,                  // 10AE7..10AEA
7207             MANICHAEAN,               // 10AEB..10AF6
7208             UNKNOWN,                  // 10AF7..10AFF
7209             AVESTAN,                  // 10B00..10B35
7210             UNKNOWN,                  // 10B36..10B38
7211             AVESTAN,                  // 10B39..10B3F
7212             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7213             UNKNOWN,                  // 10B56..10B57
7214             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7215             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7216             UNKNOWN,                  // 10B73..10B77
7217             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7218             PSALTER_PAHLAVI,          // 10B80..10B91
7219             UNKNOWN,                  // 10B92..10B98
7220             PSALTER_PAHLAVI,          // 10B99..10B9C
7221             UNKNOWN,                  // 10B9D..10BA8
7222             PSALTER_PAHLAVI,          // 10BA9..10BAF
7223             UNKNOWN,                  // 10BB0..10BFF
7224             OLD_TURKIC,               // 10C00..10C48
7225             UNKNOWN,                  // 10C49..10C7F
7226             OLD_HUNGARIAN,            // 10C80..10CB2
7227             UNKNOWN,                  // 10CB3..10CBF
7228             OLD_HUNGARIAN,            // 10CC0..10CF2
7229             UNKNOWN,                  // 10CF3..10CF9
7230             OLD_HUNGARIAN,            // 10CFA..10CFF
7231             UNKNOWN,                  // 10D00..10E5F
7232             ARABIC,                   // 10E60..10E7E
7233             UNKNOWN,                  // 10E7F..10FFF
7234             BRAHMI,                   // 11000..1104D
7235             UNKNOWN,                  // 1104E..11051
7236             BRAHMI,                   // 11052..1106F
7237             UNKNOWN,                  // 11070..1107E
7238             BRAHMI,                   // 1107F
7239             KAITHI,                   // 11080..110C1
7240             UNKNOWN,                  // 110C2..110CF
7241             SORA_SOMPENG,             // 110D0..110E8
7242             UNKNOWN,                  // 110E9..110EF
7243             SORA_SOMPENG,             // 110F0..110F9
7244             UNKNOWN,                  // 110FA..110FF
7245             CHAKMA,                   // 11100..11134
7246             UNKNOWN,                  // 11135
7247             CHAKMA,                   // 11136..11143
7248             UNKNOWN,                  // 11144..1114F
7249             MAHAJANI,                 // 11150..11176
7250             UNKNOWN,                  // 11177..1117F
7251             SHARADA,                  // 11180..111CD
7252             UNKNOWN,                  // 111CE..111CF
7253             SHARADA,                  // 111D0..111DF
7254             UNKNOWN,                  // 111E0
7255             SINHALA,                  // 111E1..111F4
7256             UNKNOWN,                  // 111F5..111FF
7257             KHOJKI,                   // 11200..11211
7258             UNKNOWN,                  // 11212
7259             KHOJKI,                   // 11213..1123E
7260             UNKNOWN,                  // 1123F..1127F
7261             MULTANI,                  // 11280..11286
7262             UNKNOWN,                  // 11287
7263             MULTANI,                  // 11288
7264             UNKNOWN,                  // 11289
7265             MULTANI,                  // 1128A..1128D
7266             UNKNOWN,                  // 1128E
7267             MULTANI,                  // 1128F..1129D
7268             UNKNOWN,                  // 1129E
7269             MULTANI,                  // 1129F..112A9
7270             UNKNOWN,                  // 112AA..112AF
7271             KHUDAWADI,                // 112B0..112EA
7272             UNKNOWN,                  // 112EB..112EF
7273             KHUDAWADI,                // 112F0..112F9
7274             UNKNOWN,                  // 112FA..112FF
7275             GRANTHA,                  // 11300..11303
7276             UNKNOWN,                  // 11304
7277             GRANTHA,                  // 11305..1130C
7278             UNKNOWN,                  // 1130D..1130E
7279             GRANTHA,                  // 1130F..11310
7280             UNKNOWN,                  // 11311..11312
7281             GRANTHA,                  // 11313..11328
7282             UNKNOWN,                  // 11329
7283             GRANTHA,                  // 1132A..11330
7284             UNKNOWN,                  // 11331
7285             GRANTHA,                  // 11332..11333
7286             UNKNOWN,                  // 11334
7287             GRANTHA,                  // 11335..11339
7288             UNKNOWN,                  // 1133A..1133B
7289             GRANTHA,                  // 1133C..11344
7290             UNKNOWN,                  // 11345..11346
7291             GRANTHA,                  // 11347..11348
7292             UNKNOWN,                  // 11349..1134A
7293             GRANTHA,                  // 1134B..1134D
7294             UNKNOWN,                  // 1134E..1134F
7295             GRANTHA,                  // 11350
7296             UNKNOWN,                  // 11351..11356
7297             GRANTHA,                  // 11357
7298             UNKNOWN,                  // 11358..1135C
7299             GRANTHA,                  // 1135D..11363
7300             UNKNOWN,                  // 11364..11365
7301             GRANTHA,                  // 11366..1136C
7302             UNKNOWN,                  // 1136D..1136F
7303             GRANTHA,                  // 11370..11374
7304             UNKNOWN,                  // 11375..113FF
7305             NEWA,                     // 11400..11459
7306             UNKNOWN,                  // 1145A
7307             NEWA,                     // 1145B
7308             UNKNOWN,                  // 1145C
7309             NEWA,                     // 1145D
7310             UNKNOWN,                  // 1145E..1147F
7311             TIRHUTA,                  // 11480..114C7
7312             UNKNOWN,                  // 114C8..114CF
7313             TIRHUTA,                  // 114D0..114D9
7314             UNKNOWN,                  // 114DA..1157F
7315             SIDDHAM,                  // 11580..115B5
7316             UNKNOWN,                  // 115B6..115B7
7317             SIDDHAM,                  // 115B8..115DD
7318             UNKNOWN,                  // 115DE..115FF
7319             MODI,                     // 11600..11644
7320             UNKNOWN,                  // 11645..1164F
7321             MODI,                     // 11650..11659
7322             UNKNOWN,                  // 1165A..1165F
7323             MONGOLIAN,                // 11660..1166C
7324             UNKNOWN,                  // 1166D..1167F
7325             TAKRI,                    // 11680..116B7
7326             UNKNOWN,                  // 116B8..116BF
7327             TAKRI,                    // 116C0..116C9
7328             UNKNOWN,                  // 116CA..116FF
7329             AHOM,                     // 11700..11719
7330             UNKNOWN,                  // 1171A..1171C
7331             AHOM,                     // 1171D..1172B
7332             UNKNOWN,                  // 1172C..1172F
7333             AHOM,                     // 11730..1173F
7334             UNKNOWN,                  // 11740..1189F
7335             WARANG_CITI,              // 118A0..118F2
7336             UNKNOWN,                  // 118F3..118FE
7337             WARANG_CITI,              // 118FF
7338             UNKNOWN,                  // 11900..119FF
7339             ZANABAZAR_SQUARE,         // 11A00..11A47
7340             UNKNOWN,                  // 11A48..11A4F
7341             SOYOMBO,                  // 11A50..11A83
7342             UNKNOWN,                  // 11A84..11A85
7343             SOYOMBO,                  // 11A86..11A9C
7344             UNKNOWN,                  // 11A9D
7345             SOYOMBO,                  // 11A9E..11AA2
7346             UNKNOWN,                  // 11AA3..11ABF
7347             PAU_CIN_HAU,              // 11AC0..11AF8
7348             UNKNOWN,                  // 11AF9..11BFF
7349             BHAIKSUKI,                // 11C00..11C08
7350             UNKNOWN,                  // 11C09
7351             BHAIKSUKI,                // 11C0A..11C36
7352             UNKNOWN,                  // 11C37
7353             BHAIKSUKI,                // 11C38..11C45
7354             UNKNOWN,                  // 11C46..11C49
7355             BHAIKSUKI,                // 11C50..11C6C
7356             UNKNOWN,                  // 11C6D..11C6F
7357             MARCHEN,                  // 11C70..11C8F
7358             UNKNOWN,                  // 11C90..11C91
7359             MARCHEN,                  // 11C92..11CA7
7360             UNKNOWN,                  // 11CA8
7361             MARCHEN,                  // 11CA9..11CB6
7362             UNKNOWN,                  // 11CB7..11CFF
7363             MASARAM_GONDI,            // 11D00..11D06
7364             UNKNOWN,                  // 11D07
7365             MASARAM_GONDI,            // 11D08..11D09
7366             UNKNOWN,                  // 11D0A
7367             MASARAM_GONDI,            // 11D0B..11D36
7368             UNKNOWN,                  // 11D37..11D39
7369             MASARAM_GONDI,            // 11D3A
7370             UNKNOWN,                  // 11D3B
7371             MASARAM_GONDI,            // 11D3C..11D3D
7372             UNKNOWN,                  // 11D3E
7373             MASARAM_GONDI,            // 11D3F..11D47
7374             UNKNOWN,                  // 11D48..11D49
7375             MASARAM_GONDI,            // 11D50..11D59
7376             UNKNOWN,                  // 11D5A..1AFFF;
7377             CUNEIFORM,                // 12000..12399
7378             UNKNOWN,                  // 1239A..123FF
7379             CUNEIFORM,                // 12400..1246E
7380             UNKNOWN,                  // 1246F
7381             CUNEIFORM,                // 12470..12474
7382             UNKNOWN,                  // 12475..1247F
7383             CUNEIFORM,                // 12480..12543
7384             UNKNOWN,                  // 12544..12FFF
7385             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
7386             UNKNOWN,                  // 1342F..143FF
7387             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
7388             UNKNOWN,                  // 14647..167FF
7389             BAMUM,                    // 16800..16A38
7390             UNKNOWN,                  // 16A39..16A3F
7391             MRO,                      // 16A40..16A5E
7392             UNKNOWN,                  // 16A5F
7393             MRO,                      // 16A60..16A69
7394             UNKNOWN,                  // 16A6A..16A6D
7395             MRO,                      // 16A6E..16A6F
7396             UNKNOWN,                  // 16A70..16ACF
7397             BASSA_VAH,                // 16AD0..16AED
7398             UNKNOWN,                  // 16AEE..16AEF
7399             BASSA_VAH,                // 16AF0..16AF5
7400             UNKNOWN,                  // 16AF6..16AFF
7401             PAHAWH_HMONG,             // 16B00..16B45
7402             UNKNOWN,                  // 16B46..16B4F
7403             PAHAWH_HMONG,             // 16B50..16B59
7404             UNKNOWN,                  // 16B5A
7405             PAHAWH_HMONG,             // 16B5B..16B61
7406             UNKNOWN,                  // 16B62
7407             PAHAWH_HMONG,             // 16B63..16B77
7408             UNKNOWN,                  // 16B78..16B7C
7409             PAHAWH_HMONG,             // 16B7D..16B8F
7410             UNKNOWN,                  // 16B90..16EFF
7411             MIAO,                     // 16F00..16F44
7412             UNKNOWN,                  // 16F45..16F4F
7413             MIAO,                     // 16F50..16F7E
7414             UNKNOWN,                  // 16F7F..16F8E
7415             MIAO,                     // 16F8F..16F9F
7416             UNKNOWN,                  // 16FA0..16FDF
7417             TANGUT,                   // 16FE0
7418             NUSHU,                    // 16FE1
7419             UNKNOWN,                  // 16FE2..16FFF
7420             TANGUT,                   // 17000..187EC
7421             UNKNOWN,                  // 187ED..187FF
7422             TANGUT,                   // 18800..18AF2
7423             UNKNOWN,                  // 18AF3..1AFFF
7424             KATAKANA,                 // 1B000
7425             HIRAGANA,                 // 1B001..1B11E
7426             UNKNOWN,                  // 1B11F..1B16F
7427             NUSHU,                    // 1B170..1B2FB
7428             UNKNOWN,                  // 1B2FC..1BBFF
7429             DUPLOYAN,                 // 1BC00..1BC6A
7430             UNKNOWN,                  // 1BC6B..1BC6F
7431             DUPLOYAN,                 // 1BC70..1BC7C
7432             UNKNOWN,                  // 1BC7D..1BC7F
7433             DUPLOYAN,                 // 1BC80..1BC88
7434             UNKNOWN,                  // 1BC89..1BC8F
7435             DUPLOYAN,                 // 1BC90..1BC99
7436             UNKNOWN,                  // 1BC9A..1BC9B
7437             DUPLOYAN,                 // 1BC9C..1BC9F
7438             COMMON,                   // 1BCA0..1BCA3
7439             UNKNOWN,                  // 1BCA4..1CFFF
7440             COMMON,                   // 1D000..1D0F5
7441             UNKNOWN,                  // 1D0F6..1D0FF
7442             COMMON,                   // 1D100..1D126
7443             UNKNOWN,                  // 1D127..1D128
7444             COMMON,                   // 1D129..1D166
7445             INHERITED,                // 1D167..1D169
7446             COMMON,                   // 1D16A..1D17A
7447             INHERITED,                // 1D17B..1D182
7448             COMMON,                   // 1D183..1D184
7449             INHERITED,                // 1D185..1D18B
7450             COMMON,                   // 1D18C..1D1A9
7451             INHERITED,                // 1D1AA..1D1AD
7452             COMMON,                   // 1D1AE..1D1E8
7453             UNKNOWN,                  // 1D1E9..1D1FF
7454             GREEK,                    // 1D200..1D245
7455             UNKNOWN,                  // 1D246..1D2FF
7456             COMMON,                   // 1D300..1D356
7457             UNKNOWN,                  // 1D357..1D35F
7458             COMMON,                   // 1D360..1D371
7459             UNKNOWN,                  // 1D372..1D3FF
7460             COMMON,                   // 1D400..1D454
7461             UNKNOWN,                  // 1D455
7462             COMMON,                   // 1D456..1D49C
7463             UNKNOWN,                  // 1D49D
7464             COMMON,                   // 1D49E..1D49F
7465             UNKNOWN,                  // 1D4A0..1D4A1
7466             COMMON,                   // 1D4A2
7467             UNKNOWN,                  // 1D4A3..1D4A4
7468             COMMON,                   // 1D4A5..1D4A6
7469             UNKNOWN,                  // 1D4A7..1D4A8
7470             COMMON,                   // 1D4A9..1D4AC
7471             UNKNOWN,                  // 1D4AD
7472             COMMON,                   // 1D4AE..1D4B9
7473             UNKNOWN,                  // 1D4BA
7474             COMMON,                   // 1D4BB
7475             UNKNOWN,                  // 1D4BC
7476             COMMON,                   // 1D4BD..1D4C3
7477             UNKNOWN,                  // 1D4C4
7478             COMMON,                   // 1D4C5..1D505
7479             UNKNOWN,                  // 1D506
7480             COMMON,                   // 1D507..1D50A
7481             UNKNOWN,                  // 1D50B..1D50C
7482             COMMON,                   // 1D50D..1D514
7483             UNKNOWN,                  // 1D515
7484             COMMON,                   // 1D516..1D51C
7485             UNKNOWN,                  // 1D51D
7486             COMMON,                   // 1D51E..1D539
7487             UNKNOWN,                  // 1D53A
7488             COMMON,                   // 1D53B..1D53E
7489             UNKNOWN,                  // 1D53F
7490             COMMON,                   // 1D540..1D544
7491             UNKNOWN,                  // 1D545
7492             COMMON,                   // 1D546
7493             UNKNOWN,                  // 1D547..1D549
7494             COMMON,                   // 1D54A..1D550
7495             UNKNOWN,                  // 1D551
7496             COMMON,                   // 1D552..1D6A5
7497             UNKNOWN,                  // 1D6A6..1D6A7
7498             COMMON,                   // 1D6A8..1D7CB
7499             UNKNOWN,                  // 1D7CC..1D7CD
7500             COMMON,                   // 1D7CE..1D7FF
7501             SIGNWRITING,              // 1D800..1DA8B
7502             UNKNOWN,                  // 1DA8C..1DA9A
7503             SIGNWRITING,              // 1DA9B..1DA9F
7504             UNKNOWN,                  // 1DAA0
7505             SIGNWRITING,              // 1DAA1..1DAAF
7506             UNKNOWN,                  // 1DAB0..1DFFF
7507             GLAGOLITIC,               // 1E000..1E006
7508             UNKNOWN,                  // 1E007
7509             GLAGOLITIC,               // 1E008..1E018
7510             UNKNOWN,                  // 1E019..1E01A
7511             GLAGOLITIC,               // 1E01B..1E021
7512             UNKNOWN,                  // 1E022
7513             GLAGOLITIC,               // 1E023..1E024
7514             UNKNOWN,                  // 1E025
7515             GLAGOLITIC,               // 1E026..1E02A
7516             UNKNOWN,                  // 1E02B..1E7FF
7517             MENDE_KIKAKUI,            // 1E800..1E8C4
7518             UNKNOWN,                  // 1E8C5..1E8C6
7519             MENDE_KIKAKUI,            // 1E8C7..1E8D6
7520             UNKNOWN,                  // 1E8D7..1E8FF
7521             ADLAM,                    // 1E900..1E94A
7522             UNKNOWN,                  // 1E94B..1E94F
7523             ADLAM,                    // 1E950..1E959
7524             UNKNOWN,                  // 1E95A..1E95D
7525             ADLAM,                    // 1E95E..1E95F
7526             UNKNOWN,                  // 1E960..1EDFF
7527             ARABIC,                   // 1EE00..1EE03
7528             UNKNOWN,                  // 1EE04
7529             ARABIC,                   // 1EE05..1EE1F
7530             UNKNOWN,                  // 1EE20
7531             ARABIC,                   // 1EE21..1EE22
7532             UNKNOWN,                  // 1EE23
7533             ARABIC,                   // 1EE24
7534             UNKNOWN,                  // 1EE25..1EE26
7535             ARABIC,                   // 1EE27
7536             UNKNOWN,                  // 1EE28
7537             ARABIC,                   // 1EE29..1EE32
7538             UNKNOWN,                  // 1EE33
7539             ARABIC,                   // 1EE34..1EE37
7540             UNKNOWN,                  // 1EE38
7541             ARABIC,                   // 1EE39
7542             UNKNOWN,                  // 1EE3A
7543             ARABIC,                   // 1EE3B
7544             UNKNOWN,                  // 1EE3C..1EE41
7545             ARABIC,                   // 1EE42
7546             UNKNOWN,                  // 1EE43..1EE46
7547             ARABIC,                   // 1EE47
7548             UNKNOWN,                  // 1EE48
7549             ARABIC,                   // 1EE49
7550             UNKNOWN,                  // 1EE4A
7551             ARABIC,                   // 1EE4B
7552             UNKNOWN,                  // 1EE4C
7553             ARABIC,                   // 1EE4D..1EE4F
7554             UNKNOWN,                  // 1EE50
7555             ARABIC,                   // 1EE51..1EE52
7556             UNKNOWN,                  // 1EE53
7557             ARABIC,                   // 1EE54
7558             UNKNOWN,                  // 1EE55..1EE56
7559             ARABIC,                   // 1EE57
7560             UNKNOWN,                  // 1EE58
7561             ARABIC,                   // 1EE59
7562             UNKNOWN,                  // 1EE5A
7563             ARABIC,                   // 1EE5B
7564             UNKNOWN,                  // 1EE5C
7565             ARABIC,                   // 1EE5D
7566             UNKNOWN,                  // 1EE5E
7567             ARABIC,                   // 1EE5F
7568             UNKNOWN,                  // 1EE60
7569             ARABIC,                   // 1EE61..1EE62
7570             UNKNOWN,                  // 1EE63
7571             ARABIC,                   // 1EE64
7572             UNKNOWN,                  // 1EE65..1EE66
7573             ARABIC,                   // 1EE67..1EE6A
7574             UNKNOWN,                  // 1EE6B
7575             ARABIC,                   // 1EE6C..1EE72
7576             UNKNOWN,                  // 1EE73
7577             ARABIC,                   // 1EE74..1EE77
7578             UNKNOWN,                  // 1EE78
7579             ARABIC,                   // 1EE79..1EE7C
7580             UNKNOWN,                  // 1EE7D
7581             ARABIC,                   // 1EE7E
7582             UNKNOWN,                  // 1EE7F
7583             ARABIC,                   // 1EE80..1EE89
7584             UNKNOWN,                  // 1EE8A
7585             ARABIC,                   // 1EE8B..1EE9B
7586             UNKNOWN,                  // 1EE9C..1EEA0
7587             ARABIC,                   // 1EEA1..1EEA3
7588             UNKNOWN,                  // 1EEA4
7589             ARABIC,                   // 1EEA5..1EEA9
7590             UNKNOWN,                  // 1EEAA
7591             ARABIC,                   // 1EEAB..1EEBB
7592             UNKNOWN,                  // 1EEBC..1EEEF
7593             ARABIC,                   // 1EEF0..1EEF1
7594             UNKNOWN,                  // 1EEF2..1EFFF
7595             COMMON,                   // 1F000..1F02B
7596             UNKNOWN,                  // 1F02C..1F02F
7597             COMMON,                   // 1F030..1F093
7598             UNKNOWN,                  // 1F094..1F09F
7599             COMMON,                   // 1F0A0..1F0AE
7600             UNKNOWN,                  // 1F0AF..1F0B0
7601             COMMON,                   // 1F0B1..1F0BF
7602             UNKNOWN,                  // 1F0C0
7603             COMMON,                   // 1F0C1..1F0CF
7604             UNKNOWN,                  // 1F0D0
7605             COMMON,                   // 1F0D1..1F0F5
7606             UNKNOWN,                  // 1F0F6..1F0FF
7607             COMMON,                   // 1F100..1F10C
7608             UNKNOWN,                  // 1F10D..1F10F
7609             COMMON,                   // 1F110..1F12E
7610             UNKNOWN,                  // 1F12F
7611             COMMON,                   // 1F130..1F16B
7612             UNKNOWN,                  // 1F16C..1F16F
7613             COMMON,                   // 1F170..1F1AC
7614             UNKNOWN,                  // 1F1AD..1F1E5
7615             COMMON,                   // 1F1E6..1F1FF
7616             HIRAGANA,                 // 1F200
7617             COMMON,                   // 1F201..1F202
7618             UNKNOWN,                  // 1F203..1F20F
7619             COMMON,                   // 1F210..1F23B
7620             UNKNOWN,                  // 1F23C..1F23F
7621             COMMON,                   // 1F240..1F248
7622             UNKNOWN,                  // 1F249..1F24F
7623             COMMON,                   // 1F250..1F251
7624             UNKNOWN,                  // 1F252..1F25F
7625             COMMON,                   // 1F260..1F265
7626             UNKNOWN,                  // 1F266..1F2FF
7627             COMMON,                   // 1F300..1F6D4
7628             UNKNOWN,                  // 1F6D5..1F6DF
7629             COMMON,                   // 1F6E0..1F6EC
7630             UNKNOWN,                  // 1F6ED..1F6EF
7631             COMMON,                   // 1F6F0..1F6F8
7632             UNKNOWN,                  // 1F6F9..1F6FF
7633             COMMON,                   // 1F700..1F773
7634             UNKNOWN,                  // 1F774..1F77F
7635             COMMON,                   // 1F780..1F7D4
7636             UNKNOWN,                  // 1F7D5..1F7FF
7637             COMMON,                   // 1F800..1F80B
7638             UNKNOWN,                  // 1F80C..1F80F
7639             COMMON,                   // 1F810..1F847
7640             UNKNOWN,                  // 1F848..1F84F
7641             COMMON,                   // 1F850..1F859
7642             UNKNOWN,                  // 1F85A..1F85F
7643             COMMON,                   // 1F860..1F887
7644             UNKNOWN,                  // 1F888..1F88F
7645             COMMON,                   // 1F890..1F8AD
7646             UNKNOWN,                  // 1F8AE..1F8FF
7647             COMMON,                   // 1F900..1F90B
7648             UNKNOWN,                  // 1F90C..1F90F
7649             COMMON,                   // 1F910..1F93E
7650             UNKNOWN,                  // 1F93F
7651             COMMON,                   // 1F940..1F94C
7652             UNKNOWN,                  // 1F94D..1F94F
7653             COMMON,                   // 1F950..1F96B
7654             UNKNOWN,                  // 1F96C..1F97F
7655             COMMON,                   // 1F980..1F997
7656             UNKNOWN,                  // 1F998..1F9BF
7657             COMMON,                   // 1F9C0
7658             UNKNOWN,                  // 1F9C1..1F9CF
7659             COMMON,                   // 1F9D0..1F9E6
7660             UNKNOWN,                  // 1F9E7..1FFFF
7661             HAN,                      // 20000..2A6D6
7662             UNKNOWN,                  // 2A6D7..2A6FF
7663             HAN,                      // 2A700..2B734
7664             UNKNOWN,                  // 2B735..2B73F
7665             HAN,                      // 2B740..2B81D
7666             UNKNOWN,                  // 2B81E..2B81F
7667             HAN,                      // 2B820..2CEA1
7668             UNKNOWN,                  // 2CEA2..2CEAF
7669             HAN,                      // 2CEB0..2EBE0
7670             UNKNOWN,                  // 2EBE1..2F7FF
7671             HAN,                      // 2F800..2FA1D
7672             UNKNOWN,                  // 2FA1E..E0000
7673             COMMON,                   // E0001
7674             UNKNOWN,                  // E0002..E001F
7675             COMMON,                   // E0020..E007F
7676             UNKNOWN,                  // E0080..E00FF
7677             INHERITED,                // E0100..E01EF
7678             UNKNOWN                   // E01F0..10FFFF
7679         };
7680 
7681         private static HashMap<String, Character.UnicodeScript> aliases;
7682         static {
7683             aliases = new HashMap<>((int)(142 / 0.75f + 1.0f));
7684             aliases.put("ADLM", ADLAM);
7685             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7686             aliases.put("AHOM", AHOM);
7687             aliases.put("ARAB", ARABIC);
7688             aliases.put("ARMI", IMPERIAL_ARAMAIC);
7689             aliases.put("ARMN", ARMENIAN);
7690             aliases.put("AVST", AVESTAN);
7691             aliases.put("BALI", BALINESE);
7692             aliases.put("BAMU", BAMUM);
7693             aliases.put("BASS", BASSA_VAH);
7694             aliases.put("BATK", BATAK);
7695             aliases.put("BENG", BENGALI);
7696             aliases.put("BHKS", BHAIKSUKI);
7697             aliases.put("BOPO", BOPOMOFO);
7698             aliases.put("BRAH", BRAHMI);
7699             aliases.put("BRAI", BRAILLE);
7700             aliases.put("BUGI", BUGINESE);
7701             aliases.put("BUHD", BUHID);
7702             aliases.put("CAKM", CHAKMA);
7703             aliases.put("CANS", CANADIAN_ABORIGINAL);
7704             aliases.put("CARI", CARIAN);
7705             aliases.put("CHAM", CHAM);
7706             aliases.put("CHER", CHEROKEE);
7707             aliases.put("COPT", COPTIC);
7708             aliases.put("CPRT", CYPRIOT);
7709             aliases.put("CYRL", CYRILLIC);
7710             aliases.put("DEVA", DEVANAGARI);
7711             aliases.put("DSRT", DESERET);
7712             aliases.put("DUPL", DUPLOYAN);
7713             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7714             aliases.put("ELBA", ELBASAN);
7715             aliases.put("ETHI", ETHIOPIC);
7716             aliases.put("GEOR", GEORGIAN);
7717             aliases.put("GLAG", GLAGOLITIC);
7718             aliases.put("GONM", MASARAM_GONDI);
7719             aliases.put("GOTH", GOTHIC);
7720             aliases.put("GRAN", GRANTHA);
7721             aliases.put("GREK", GREEK);
7722             aliases.put("GUJR", GUJARATI);
7723             aliases.put("GURU", GURMUKHI);
7724             aliases.put("HANG", HANGUL);
7725             aliases.put("HANI", HAN);
7726             aliases.put("HANO", HANUNOO);
7727             aliases.put("HATR", HATRAN);
7728             aliases.put("HEBR", HEBREW);
7729             aliases.put("HIRA", HIRAGANA);
7730             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
7731             aliases.put("HMNG", PAHAWH_HMONG);
7732             // it appears we don't have the KATAKANA_OR_HIRAGANA
7733             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7734             aliases.put("HUNG", OLD_HUNGARIAN);
7735             aliases.put("ITAL", OLD_ITALIC);
7736             aliases.put("JAVA", JAVANESE);
7737             aliases.put("KALI", KAYAH_LI);
7738             aliases.put("KANA", KATAKANA);
7739             aliases.put("KHAR", KHAROSHTHI);
7740             aliases.put("KHMR", KHMER);
7741             aliases.put("KHOJ", KHOJKI);
7742             aliases.put("KNDA", KANNADA);
7743             aliases.put("KTHI", KAITHI);
7744             aliases.put("LANA", TAI_THAM);
7745             aliases.put("LAOO", LAO);
7746             aliases.put("LATN", LATIN);
7747             aliases.put("LEPC", LEPCHA);
7748             aliases.put("LIMB", LIMBU);
7749             aliases.put("LINA", LINEAR_A);
7750             aliases.put("LINB", LINEAR_B);
7751             aliases.put("LISU", LISU);
7752             aliases.put("LYCI", LYCIAN);
7753             aliases.put("LYDI", LYDIAN);
7754             aliases.put("MAHJ", MAHAJANI);
7755             aliases.put("MARC", MARCHEN);
7756             aliases.put("MAND", MANDAIC);
7757             aliases.put("MANI", MANICHAEAN);
7758             aliases.put("MEND", MENDE_KIKAKUI);
7759             aliases.put("MERC", MEROITIC_CURSIVE);
7760             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7761             aliases.put("MLYM", MALAYALAM);
7762             aliases.put("MODI", MODI);
7763             aliases.put("MONG", MONGOLIAN);
7764             aliases.put("MROO", MRO);
7765             aliases.put("MTEI", MEETEI_MAYEK);
7766             aliases.put("MULT", MULTANI);
7767             aliases.put("MYMR", MYANMAR);
7768             aliases.put("NARB", OLD_NORTH_ARABIAN);
7769             aliases.put("NBAT", NABATAEAN);
7770             aliases.put("NEWA", NEWA);
7771             aliases.put("NKOO", NKO);
7772             aliases.put("NSHU", NUSHU);
7773             aliases.put("OGAM", OGHAM);
7774             aliases.put("OLCK", OL_CHIKI);
7775             aliases.put("ORKH", OLD_TURKIC);
7776             aliases.put("ORYA", ORIYA);
7777             aliases.put("OSGE", OSAGE);
7778             aliases.put("OSMA", OSMANYA);
7779             aliases.put("PALM", PALMYRENE);
7780             aliases.put("PAUC", PAU_CIN_HAU);
7781             aliases.put("PERM", OLD_PERMIC);
7782             aliases.put("PHAG", PHAGS_PA);
7783             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
7784             aliases.put("PHLP", PSALTER_PAHLAVI);
7785             aliases.put("PHNX", PHOENICIAN);
7786             aliases.put("PLRD", MIAO);
7787             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
7788             aliases.put("RJNG", REJANG);
7789             aliases.put("RUNR", RUNIC);
7790             aliases.put("SAMR", SAMARITAN);
7791             aliases.put("SARB", OLD_SOUTH_ARABIAN);
7792             aliases.put("SAUR", SAURASHTRA);
7793             aliases.put("SGNW", SIGNWRITING);
7794             aliases.put("SHAW", SHAVIAN);
7795             aliases.put("SHRD", SHARADA);
7796             aliases.put("SIDD", SIDDHAM);
7797             aliases.put("SIND", KHUDAWADI);
7798             aliases.put("SINH", SINHALA);
7799             aliases.put("SORA", SORA_SOMPENG);
7800             aliases.put("SOYO", SOYOMBO);
7801             aliases.put("SUND", SUNDANESE);
7802             aliases.put("SYLO", SYLOTI_NAGRI);
7803             aliases.put("SYRC", SYRIAC);
7804             aliases.put("TAGB", TAGBANWA);
7805             aliases.put("TAKR", TAKRI);
7806             aliases.put("TALE", TAI_LE);
7807             aliases.put("TALU", NEW_TAI_LUE);
7808             aliases.put("TAML", TAMIL);
7809             aliases.put("TANG", TANGUT);
7810             aliases.put("TAVT", TAI_VIET);
7811             aliases.put("TELU", TELUGU);
7812             aliases.put("TFNG", TIFINAGH);
7813             aliases.put("TGLG", TAGALOG);
7814             aliases.put("THAA", THAANA);
7815             aliases.put("THAI", THAI);
7816             aliases.put("TIBT", TIBETAN);
7817             aliases.put("TIRH", TIRHUTA);
7818             aliases.put("UGAR", UGARITIC);
7819             aliases.put("VAII", VAI);
7820             aliases.put("WARA", WARANG_CITI);
7821             aliases.put("XPEO", OLD_PERSIAN);
7822             aliases.put("XSUX", CUNEIFORM);
7823             aliases.put("YIII", YI);
7824             aliases.put("ZANB", ZANABAZAR_SQUARE);
7825             aliases.put("ZINH", INHERITED);
7826             aliases.put("ZYYY", COMMON);
7827             aliases.put("ZZZZ", UNKNOWN);
7828         }
7829 
7830         /**
7831          * Returns the enum constant representing the Unicode script of which
7832          * the given character (Unicode code point) is assigned to.
7833          *
7834          * @param   codePoint the character (Unicode code point) in question.
7835          * @return  The {@code UnicodeScript} constant representing the
7836          *          Unicode script of which this character is assigned to.
7837          *
7838          * @throws  IllegalArgumentException if the specified
7839          * {@code codePoint} is an invalid Unicode code point.
7840          * @see Character#isValidCodePoint(int)
7841          *
7842          */
7843         public static UnicodeScript of(int codePoint) {
7844             if (!isValidCodePoint(codePoint))
7845                 throw new IllegalArgumentException(
7846                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
7847             int type = getType(codePoint);
7848             // leave SURROGATE and PRIVATE_USE for table lookup
7849             if (type == UNASSIGNED)
7850                 return UNKNOWN;
7851             int index = Arrays.binarySearch(scriptStarts, codePoint);
7852             if (index < 0)
7853                 index = -index - 2;
7854             return scripts[index];
7855         }
7856 
7857         /**
7858          * Returns the UnicodeScript constant with the given Unicode script
7859          * name or the script name alias. Script names and their aliases are
7860          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
7861          * and {@code PropertyValueAliases<version>.txt} define script names
7862          * and the script name aliases for a particular version of the
7863          * standard. The {@link Character} class specifies the version of
7864          * the standard that it supports.
7865          * <p>
7866          * Character case is ignored for all of the valid script names.
7867          * The en_US locale's case mapping rules are used to provide
7868          * case-insensitive string comparisons for script name validation.
7869          *
7870          * @param scriptName A {@code UnicodeScript} name.
7871          * @return The {@code UnicodeScript} constant identified
7872          *         by {@code scriptName}
7873          * @throws IllegalArgumentException if {@code scriptName} is an
7874          *         invalid name
7875          * @throws NullPointerException if {@code scriptName} is null
7876          */
7877         public static final UnicodeScript forName(String scriptName) {
7878             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
7879                                  //.replace(' ', '_'));
7880             UnicodeScript sc = aliases.get(scriptName);
7881             if (sc != null)
7882                 return sc;
7883             return valueOf(scriptName);
7884         }
7885     }
7886 
7887     /**
7888      * The value of the {@code Character}.
7889      *
7890      * @serial
7891      */
7892     private final char value;
7893 
7894     /** use serialVersionUID from JDK 1.0.2 for interoperability */
7895     private static final long serialVersionUID = 3786198910865385080L;
7896 
7897     /**
7898      * Constructs a newly allocated {@code Character} object that
7899      * represents the specified {@code char} value.
7900      *
7901      * @param  value   the value to be represented by the
7902      *                  {@code Character} object.
7903      *
7904      * @deprecated
7905      * It is rarely appropriate to use this constructor. The static factory
7906      * {@link #valueOf(char)} is generally a better choice, as it is
7907      * likely to yield significantly better space and time performance.
7908      */
7909     @Deprecated(since="9")
7910     public Character(char value) {
7911         this.value = value;
7912     }
7913 
7914     private static class CharacterCache {
7915         private CharacterCache(){}
7916 
7917         static final Character cache[] = new Character[127 + 1];
7918 
7919         static {
7920             for (int i = 0; i < cache.length; i++)
7921                 cache[i] = new Character((char)i);
7922         }
7923     }
7924 
7925     /**
7926      * Returns a {@code Character} instance representing the specified
7927      * {@code char} value.
7928      * If a new {@code Character} instance is not required, this method
7929      * should generally be used in preference to the constructor
7930      * {@link #Character(char)}, as this method is likely to yield
7931      * significantly better space and time performance by caching
7932      * frequently requested values.
7933      *
7934      * This method will always cache values in the range {@code
7935      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
7936      * cache other values outside of this range.
7937      *
7938      * @param  c a char value.
7939      * @return a {@code Character} instance representing {@code c}.
7940      * @since  1.5
7941      */
7942     @HotSpotIntrinsicCandidate
7943     public static Character valueOf(char c) {
7944         if (c <= 127) { // must cache
7945             return CharacterCache.cache[(int)c];
7946         }
7947         return new Character(c);
7948     }
7949 
7950     /**
7951      * Returns the value of this {@code Character} object.
7952      * @return  the primitive {@code char} value represented by
7953      *          this object.
7954      */
7955     @HotSpotIntrinsicCandidate
7956     public char charValue() {
7957         return value;
7958     }
7959 
7960     /**
7961      * Returns a hash code for this {@code Character}; equal to the result
7962      * of invoking {@code charValue()}.
7963      *
7964      * @return a hash code value for this {@code Character}
7965      */
7966     @Override
7967     public int hashCode() {
7968         return Character.hashCode(value);
7969     }
7970 
7971     /**
7972      * Returns a hash code for a {@code char} value; compatible with
7973      * {@code Character.hashCode()}.
7974      *
7975      * @since 1.8
7976      *
7977      * @param value The {@code char} for which to return a hash code.
7978      * @return a hash code value for a {@code char} value.
7979      */
7980     public static int hashCode(char value) {
7981         return (int)value;
7982     }
7983 
7984     /**
7985      * Compares this object against the specified object.
7986      * The result is {@code true} if and only if the argument is not
7987      * {@code null} and is a {@code Character} object that
7988      * represents the same {@code char} value as this object.
7989      *
7990      * @param   obj   the object to compare with.
7991      * @return  {@code true} if the objects are the same;
7992      *          {@code false} otherwise.
7993      */
7994     public boolean equals(Object obj) {
7995         if (obj instanceof Character) {
7996             return value == ((Character)obj).charValue();
7997         }
7998         return false;
7999     }
8000 
8001     /**
8002      * Returns a {@code String} object representing this
8003      * {@code Character}'s value.  The result is a string of
8004      * length 1 whose sole component is the primitive
8005      * {@code char} value represented by this
8006      * {@code Character} object.
8007      *
8008      * @return  a string representation of this object.
8009      */
8010     public String toString() {
8011         char buf[] = {value};
8012         return String.valueOf(buf);
8013     }
8014 
8015     /**
8016      * Returns a {@code String} object representing the
8017      * specified {@code char}.  The result is a string of length
8018      * 1 consisting solely of the specified {@code char}.
8019      *
8020      * @apiNote This method cannot handle <a
8021      * href="#supplementary"> supplementary characters</a>. To support
8022      * all Unicode characters, including supplementary characters, use
8023      * the {@link #toString(int)} method.
8024      *
8025      * @param c the {@code char} to be converted
8026      * @return the string representation of the specified {@code char}
8027      * @since 1.4
8028      */
8029     public static String toString(char c) {
8030         return String.valueOf(c);
8031     }
8032 
8033     /**
8034      * Returns a {@code String} object representing the
8035      * specified character (Unicode code point).  The result is a string of
8036      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8037      *
8038      * @param codePoint the {@code codePoint} to be converted
8039      * @return the string representation of the specified {@code codePoint}
8040      * @throws IllegalArgumentException if the specified
8041      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8042      *      valid Unicode code point}.
8043      * @since 11
8044      */
8045     public static String toString(int codePoint) {
8046         return String.valueOfCodePoint(codePoint);
8047     }
8048 
8049     /**
8050      * Determines whether the specified code point is a valid
8051      * <a href="http://www.unicode.org/glossary/#code_point">
8052      * Unicode code point value</a>.
8053      *
8054      * @param  codePoint the Unicode code point to be tested
8055      * @return {@code true} if the specified code point value is between
8056      *         {@link #MIN_CODE_POINT} and
8057      *         {@link #MAX_CODE_POINT} inclusive;
8058      *         {@code false} otherwise.
8059      * @since  1.5
8060      */
8061     public static boolean isValidCodePoint(int codePoint) {
8062         // Optimized form of:
8063         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8064         int plane = codePoint >>> 16;
8065         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8066     }
8067 
8068     /**
8069      * Determines whether the specified character (Unicode code point)
8070      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8071      * Such code points can be represented using a single {@code char}.
8072      *
8073      * @param  codePoint the character (Unicode code point) to be tested
8074      * @return {@code true} if the specified code point is between
8075      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8076      *         {@code false} otherwise.
8077      * @since  1.7
8078      */
8079     public static boolean isBmpCodePoint(int codePoint) {
8080         return codePoint >>> 16 == 0;
8081         // Optimized form of:
8082         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8083         // We consistently use logical shift (>>>) to facilitate
8084         // additional runtime optimizations.
8085     }
8086 
8087     /**
8088      * Determines whether the specified character (Unicode code point)
8089      * is in the <a href="#supplementary">supplementary character</a> range.
8090      *
8091      * @param  codePoint the character (Unicode code point) to be tested
8092      * @return {@code true} if the specified code point is between
8093      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8094      *         {@link #MAX_CODE_POINT} inclusive;
8095      *         {@code false} otherwise.
8096      * @since  1.5
8097      */
8098     public static boolean isSupplementaryCodePoint(int codePoint) {
8099         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8100             && codePoint <  MAX_CODE_POINT + 1;
8101     }
8102 
8103     /**
8104      * Determines if the given {@code char} value is a
8105      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8106      * Unicode high-surrogate code unit</a>
8107      * (also known as <i>leading-surrogate code unit</i>).
8108      *
8109      * <p>Such values do not represent characters by themselves,
8110      * but are used in the representation of
8111      * <a href="#supplementary">supplementary characters</a>
8112      * in the UTF-16 encoding.
8113      *
8114      * @param  ch the {@code char} value to be tested.
8115      * @return {@code true} if the {@code char} value is between
8116      *         {@link #MIN_HIGH_SURROGATE} and
8117      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8118      *         {@code false} otherwise.
8119      * @see    Character#isLowSurrogate(char)
8120      * @see    Character.UnicodeBlock#of(int)
8121      * @since  1.5
8122      */
8123     public static boolean isHighSurrogate(char ch) {
8124         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8125         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8126     }
8127 
8128     /**
8129      * Determines if the given {@code char} value is a
8130      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8131      * Unicode low-surrogate code unit</a>
8132      * (also known as <i>trailing-surrogate code unit</i>).
8133      *
8134      * <p>Such values do not represent characters by themselves,
8135      * but are used in the representation of
8136      * <a href="#supplementary">supplementary characters</a>
8137      * in the UTF-16 encoding.
8138      *
8139      * @param  ch the {@code char} value to be tested.
8140      * @return {@code true} if the {@code char} value is between
8141      *         {@link #MIN_LOW_SURROGATE} and
8142      *         {@link #MAX_LOW_SURROGATE} inclusive;
8143      *         {@code false} otherwise.
8144      * @see    Character#isHighSurrogate(char)
8145      * @since  1.5
8146      */
8147     public static boolean isLowSurrogate(char ch) {
8148         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8149     }
8150 
8151     /**
8152      * Determines if the given {@code char} value is a Unicode
8153      * <i>surrogate code unit</i>.
8154      *
8155      * <p>Such values do not represent characters by themselves,
8156      * but are used in the representation of
8157      * <a href="#supplementary">supplementary characters</a>
8158      * in the UTF-16 encoding.
8159      *
8160      * <p>A char value is a surrogate code unit if and only if it is either
8161      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8162      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8163      *
8164      * @param  ch the {@code char} value to be tested.
8165      * @return {@code true} if the {@code char} value is between
8166      *         {@link #MIN_SURROGATE} and
8167      *         {@link #MAX_SURROGATE} inclusive;
8168      *         {@code false} otherwise.
8169      * @since  1.7
8170      */
8171     public static boolean isSurrogate(char ch) {
8172         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8173     }
8174 
8175     /**
8176      * Determines whether the specified pair of {@code char}
8177      * values is a valid
8178      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8179      * Unicode surrogate pair</a>.
8180 
8181      * <p>This method is equivalent to the expression:
8182      * <blockquote><pre>{@code
8183      * isHighSurrogate(high) && isLowSurrogate(low)
8184      * }</pre></blockquote>
8185      *
8186      * @param  high the high-surrogate code value to be tested
8187      * @param  low the low-surrogate code value to be tested
8188      * @return {@code true} if the specified high and
8189      * low-surrogate code values represent a valid surrogate pair;
8190      * {@code false} otherwise.
8191      * @since  1.5
8192      */
8193     public static boolean isSurrogatePair(char high, char low) {
8194         return isHighSurrogate(high) && isLowSurrogate(low);
8195     }
8196 
8197     /**
8198      * Determines the number of {@code char} values needed to
8199      * represent the specified character (Unicode code point). If the
8200      * specified character is equal to or greater than 0x10000, then
8201      * the method returns 2. Otherwise, the method returns 1.
8202      *
8203      * <p>This method doesn't validate the specified character to be a
8204      * valid Unicode code point. The caller must validate the
8205      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8206      * if necessary.
8207      *
8208      * @param   codePoint the character (Unicode code point) to be tested.
8209      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8210      * @see     Character#isSupplementaryCodePoint(int)
8211      * @since   1.5
8212      */
8213     public static int charCount(int codePoint) {
8214         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8215     }
8216 
8217     /**
8218      * Converts the specified surrogate pair to its supplementary code
8219      * point value. This method does not validate the specified
8220      * surrogate pair. The caller must validate it using {@link
8221      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8222      *
8223      * @param  high the high-surrogate code unit
8224      * @param  low the low-surrogate code unit
8225      * @return the supplementary code point composed from the
8226      *         specified surrogate pair.
8227      * @since  1.5
8228      */
8229     public static int toCodePoint(char high, char low) {
8230         // Optimized form of:
8231         // return ((high - MIN_HIGH_SURROGATE) << 10)
8232         //         + (low - MIN_LOW_SURROGATE)
8233         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8234         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8235                                        - (MIN_HIGH_SURROGATE << 10)
8236                                        - MIN_LOW_SURROGATE);
8237     }
8238 
8239     /**
8240      * Returns the code point at the given index of the
8241      * {@code CharSequence}. If the {@code char} value at
8242      * the given index in the {@code CharSequence} is in the
8243      * high-surrogate range, the following index is less than the
8244      * length of the {@code CharSequence}, and the
8245      * {@code char} value at the following index is in the
8246      * low-surrogate range, then the supplementary code point
8247      * corresponding to this surrogate pair is returned. Otherwise,
8248      * the {@code char} value at the given index is returned.
8249      *
8250      * @param seq a sequence of {@code char} values (Unicode code
8251      * units)
8252      * @param index the index to the {@code char} values (Unicode
8253      * code units) in {@code seq} to be converted
8254      * @return the Unicode code point at the given index
8255      * @throws NullPointerException if {@code seq} is null.
8256      * @throws IndexOutOfBoundsException if the value
8257      * {@code index} is negative or not less than
8258      * {@link CharSequence#length() seq.length()}.
8259      * @since  1.5
8260      */
8261     public static int codePointAt(CharSequence seq, int index) {
8262         char c1 = seq.charAt(index);
8263         if (isHighSurrogate(c1) && ++index < seq.length()) {
8264             char c2 = seq.charAt(index);
8265             if (isLowSurrogate(c2)) {
8266                 return toCodePoint(c1, c2);
8267             }
8268         }
8269         return c1;
8270     }
8271 
8272     /**
8273      * Returns the code point at the given index of the
8274      * {@code char} array. If the {@code char} value at
8275      * the given index in the {@code char} array is in the
8276      * high-surrogate range, the following index is less than the
8277      * length of the {@code char} array, and the
8278      * {@code char} value at the following index is in the
8279      * low-surrogate range, then the supplementary code point
8280      * corresponding to this surrogate pair is returned. Otherwise,
8281      * the {@code char} value at the given index is returned.
8282      *
8283      * @param a the {@code char} array
8284      * @param index the index to the {@code char} values (Unicode
8285      * code units) in the {@code char} array to be converted
8286      * @return the Unicode code point at the given index
8287      * @throws NullPointerException if {@code a} is null.
8288      * @throws IndexOutOfBoundsException if the value
8289      * {@code index} is negative or not less than
8290      * the length of the {@code char} array.
8291      * @since  1.5
8292      */
8293     public static int codePointAt(char[] a, int index) {
8294         return codePointAtImpl(a, index, a.length);
8295     }
8296 
8297     /**
8298      * Returns the code point at the given index of the
8299      * {@code char} array, where only array elements with
8300      * {@code index} less than {@code limit} can be used. If
8301      * the {@code char} value at the given index in the
8302      * {@code char} array is in the high-surrogate range, the
8303      * following index is less than the {@code limit}, and the
8304      * {@code char} value at the following index is in the
8305      * low-surrogate range, then the supplementary code point
8306      * corresponding to this surrogate pair is returned. Otherwise,
8307      * the {@code char} value at the given index is returned.
8308      *
8309      * @param a the {@code char} array
8310      * @param index the index to the {@code char} values (Unicode
8311      * code units) in the {@code char} array to be converted
8312      * @param limit the index after the last array element that
8313      * can be used in the {@code char} array
8314      * @return the Unicode code point at the given index
8315      * @throws NullPointerException if {@code a} is null.
8316      * @throws IndexOutOfBoundsException if the {@code index}
8317      * argument is negative or not less than the {@code limit}
8318      * argument, or if the {@code limit} argument is negative or
8319      * greater than the length of the {@code char} array.
8320      * @since  1.5
8321      */
8322     public static int codePointAt(char[] a, int index, int limit) {
8323         if (index >= limit || limit < 0 || limit > a.length) {
8324             throw new IndexOutOfBoundsException();
8325         }
8326         return codePointAtImpl(a, index, limit);
8327     }
8328 
8329     // throws ArrayIndexOutOfBoundsException if index out of bounds
8330     static int codePointAtImpl(char[] a, int index, int limit) {
8331         char c1 = a[index];
8332         if (isHighSurrogate(c1) && ++index < limit) {
8333             char c2 = a[index];
8334             if (isLowSurrogate(c2)) {
8335                 return toCodePoint(c1, c2);
8336             }
8337         }
8338         return c1;
8339     }
8340 
8341     /**
8342      * Returns the code point preceding the given index of the
8343      * {@code CharSequence}. If the {@code char} value at
8344      * {@code (index - 1)} in the {@code CharSequence} is in
8345      * the low-surrogate range, {@code (index - 2)} is not
8346      * negative, and the {@code char} value at {@code (index - 2)}
8347      * in the {@code CharSequence} is in the
8348      * high-surrogate range, then the supplementary code point
8349      * corresponding to this surrogate pair is returned. Otherwise,
8350      * the {@code char} value at {@code (index - 1)} is
8351      * returned.
8352      *
8353      * @param seq the {@code CharSequence} instance
8354      * @param index the index following the code point that should be returned
8355      * @return the Unicode code point value before the given index.
8356      * @throws NullPointerException if {@code seq} is null.
8357      * @throws IndexOutOfBoundsException if the {@code index}
8358      * argument is less than 1 or greater than {@link
8359      * CharSequence#length() seq.length()}.
8360      * @since  1.5
8361      */
8362     public static int codePointBefore(CharSequence seq, int index) {
8363         char c2 = seq.charAt(--index);
8364         if (isLowSurrogate(c2) && index > 0) {
8365             char c1 = seq.charAt(--index);
8366             if (isHighSurrogate(c1)) {
8367                 return toCodePoint(c1, c2);
8368             }
8369         }
8370         return c2;
8371     }
8372 
8373     /**
8374      * Returns the code point preceding the given index of the
8375      * {@code char} array. If the {@code char} value at
8376      * {@code (index - 1)} in the {@code char} array is in
8377      * the low-surrogate range, {@code (index - 2)} is not
8378      * negative, and the {@code char} value at {@code (index - 2)}
8379      * in the {@code char} array is in the
8380      * high-surrogate range, then the supplementary code point
8381      * corresponding to this surrogate pair is returned. Otherwise,
8382      * the {@code char} value at {@code (index - 1)} is
8383      * returned.
8384      *
8385      * @param a the {@code char} array
8386      * @param index the index following the code point that should be returned
8387      * @return the Unicode code point value before the given index.
8388      * @throws NullPointerException if {@code a} is null.
8389      * @throws IndexOutOfBoundsException if the {@code index}
8390      * argument is less than 1 or greater than the length of the
8391      * {@code char} array
8392      * @since  1.5
8393      */
8394     public static int codePointBefore(char[] a, int index) {
8395         return codePointBeforeImpl(a, index, 0);
8396     }
8397 
8398     /**
8399      * Returns the code point preceding the given index of the
8400      * {@code char} array, where only array elements with
8401      * {@code index} greater than or equal to {@code start}
8402      * can be used. If the {@code char} value at {@code (index - 1)}
8403      * in the {@code char} array is in the
8404      * low-surrogate range, {@code (index - 2)} is not less than
8405      * {@code start}, and the {@code char} value at
8406      * {@code (index - 2)} in the {@code char} array is in
8407      * the high-surrogate range, then the supplementary code point
8408      * corresponding to this surrogate pair is returned. Otherwise,
8409      * the {@code char} value at {@code (index - 1)} is
8410      * returned.
8411      *
8412      * @param a the {@code char} array
8413      * @param index the index following the code point that should be returned
8414      * @param start the index of the first array element in the
8415      * {@code char} array
8416      * @return the Unicode code point value before the given index.
8417      * @throws NullPointerException if {@code a} is null.
8418      * @throws IndexOutOfBoundsException if the {@code index}
8419      * argument is not greater than the {@code start} argument or
8420      * is greater than the length of the {@code char} array, or
8421      * if the {@code start} argument is negative or not less than
8422      * the length of the {@code char} array.
8423      * @since  1.5
8424      */
8425     public static int codePointBefore(char[] a, int index, int start) {
8426         if (index <= start || start < 0 || start >= a.length) {
8427             throw new IndexOutOfBoundsException();
8428         }
8429         return codePointBeforeImpl(a, index, start);
8430     }
8431 
8432     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8433     static int codePointBeforeImpl(char[] a, int index, int start) {
8434         char c2 = a[--index];
8435         if (isLowSurrogate(c2) && index > start) {
8436             char c1 = a[--index];
8437             if (isHighSurrogate(c1)) {
8438                 return toCodePoint(c1, c2);
8439             }
8440         }
8441         return c2;
8442     }
8443 
8444     /**
8445      * Returns the leading surrogate (a
8446      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8447      * high surrogate code unit</a>) of the
8448      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8449      * surrogate pair</a>
8450      * representing the specified supplementary character (Unicode
8451      * code point) in the UTF-16 encoding.  If the specified character
8452      * is not a
8453      * <a href="Character.html#supplementary">supplementary character</a>,
8454      * an unspecified {@code char} is returned.
8455      *
8456      * <p>If
8457      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8458      * is {@code true}, then
8459      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8460      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8461      * are also always {@code true}.
8462      *
8463      * @param   codePoint a supplementary character (Unicode code point)
8464      * @return  the leading surrogate code unit used to represent the
8465      *          character in the UTF-16 encoding
8466      * @since   1.7
8467      */
8468     public static char highSurrogate(int codePoint) {
8469         return (char) ((codePoint >>> 10)
8470             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8471     }
8472 
8473     /**
8474      * Returns the trailing surrogate (a
8475      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8476      * low surrogate code unit</a>) of the
8477      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8478      * surrogate pair</a>
8479      * representing the specified supplementary character (Unicode
8480      * code point) in the UTF-16 encoding.  If the specified character
8481      * is not a
8482      * <a href="Character.html#supplementary">supplementary character</a>,
8483      * an unspecified {@code char} is returned.
8484      *
8485      * <p>If
8486      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8487      * is {@code true}, then
8488      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8489      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8490      * are also always {@code true}.
8491      *
8492      * @param   codePoint a supplementary character (Unicode code point)
8493      * @return  the trailing surrogate code unit used to represent the
8494      *          character in the UTF-16 encoding
8495      * @since   1.7
8496      */
8497     public static char lowSurrogate(int codePoint) {
8498         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8499     }
8500 
8501     /**
8502      * Converts the specified character (Unicode code point) to its
8503      * UTF-16 representation. If the specified code point is a BMP
8504      * (Basic Multilingual Plane or Plane 0) value, the same value is
8505      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8506      * specified code point is a supplementary character, its
8507      * surrogate values are stored in {@code dst[dstIndex]}
8508      * (high-surrogate) and {@code dst[dstIndex+1]}
8509      * (low-surrogate), and 2 is returned.
8510      *
8511      * @param  codePoint the character (Unicode code point) to be converted.
8512      * @param  dst an array of {@code char} in which the
8513      * {@code codePoint}'s UTF-16 value is stored.
8514      * @param dstIndex the start index into the {@code dst}
8515      * array where the converted value is stored.
8516      * @return 1 if the code point is a BMP code point, 2 if the
8517      * code point is a supplementary code point.
8518      * @throws IllegalArgumentException if the specified
8519      * {@code codePoint} is not a valid Unicode code point.
8520      * @throws NullPointerException if the specified {@code dst} is null.
8521      * @throws IndexOutOfBoundsException if {@code dstIndex}
8522      * is negative or not less than {@code dst.length}, or if
8523      * {@code dst} at {@code dstIndex} doesn't have enough
8524      * array element(s) to store the resulting {@code char}
8525      * value(s). (If {@code dstIndex} is equal to
8526      * {@code dst.length-1} and the specified
8527      * {@code codePoint} is a supplementary character, the
8528      * high-surrogate value is not stored in
8529      * {@code dst[dstIndex]}.)
8530      * @since  1.5
8531      */
8532     public static int toChars(int codePoint, char[] dst, int dstIndex) {
8533         if (isBmpCodePoint(codePoint)) {
8534             dst[dstIndex] = (char) codePoint;
8535             return 1;
8536         } else if (isValidCodePoint(codePoint)) {
8537             toSurrogates(codePoint, dst, dstIndex);
8538             return 2;
8539         } else {
8540             throw new IllegalArgumentException(
8541                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8542         }
8543     }
8544 
8545     /**
8546      * Converts the specified character (Unicode code point) to its
8547      * UTF-16 representation stored in a {@code char} array. If
8548      * the specified code point is a BMP (Basic Multilingual Plane or
8549      * Plane 0) value, the resulting {@code char} array has
8550      * the same value as {@code codePoint}. If the specified code
8551      * point is a supplementary code point, the resulting
8552      * {@code char} array has the corresponding surrogate pair.
8553      *
8554      * @param  codePoint a Unicode code point
8555      * @return a {@code char} array having
8556      *         {@code codePoint}'s UTF-16 representation.
8557      * @throws IllegalArgumentException if the specified
8558      * {@code codePoint} is not a valid Unicode code point.
8559      * @since  1.5
8560      */
8561     public static char[] toChars(int codePoint) {
8562         if (isBmpCodePoint(codePoint)) {
8563             return new char[] { (char) codePoint };
8564         } else if (isValidCodePoint(codePoint)) {
8565             char[] result = new char[2];
8566             toSurrogates(codePoint, result, 0);
8567             return result;
8568         } else {
8569             throw new IllegalArgumentException(
8570                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8571         }
8572     }
8573 
8574     static void toSurrogates(int codePoint, char[] dst, int index) {
8575         // We write elements "backwards" to guarantee all-or-nothing
8576         dst[index+1] = lowSurrogate(codePoint);
8577         dst[index] = highSurrogate(codePoint);
8578     }
8579 
8580     /**
8581      * Returns the number of Unicode code points in the text range of
8582      * the specified char sequence. The text range begins at the
8583      * specified {@code beginIndex} and extends to the
8584      * {@code char} at index {@code endIndex - 1}. Thus the
8585      * length (in {@code char}s) of the text range is
8586      * {@code endIndex-beginIndex}. Unpaired surrogates within
8587      * the text range count as one code point each.
8588      *
8589      * @param seq the char sequence
8590      * @param beginIndex the index to the first {@code char} of
8591      * the text range.
8592      * @param endIndex the index after the last {@code char} of
8593      * the text range.
8594      * @return the number of Unicode code points in the specified text
8595      * range
8596      * @throws NullPointerException if {@code seq} is null.
8597      * @throws IndexOutOfBoundsException if the
8598      * {@code beginIndex} is negative, or {@code endIndex}
8599      * is larger than the length of the given sequence, or
8600      * {@code beginIndex} is larger than {@code endIndex}.
8601      * @since  1.5
8602      */
8603     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
8604         int length = seq.length();
8605         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
8606             throw new IndexOutOfBoundsException();
8607         }
8608         int n = endIndex - beginIndex;
8609         for (int i = beginIndex; i < endIndex; ) {
8610             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
8611                 isLowSurrogate(seq.charAt(i))) {
8612                 n--;
8613                 i++;
8614             }
8615         }
8616         return n;
8617     }
8618 
8619     /**
8620      * Returns the number of Unicode code points in a subarray of the
8621      * {@code char} array argument. The {@code offset}
8622      * argument is the index of the first {@code char} of the
8623      * subarray and the {@code count} argument specifies the
8624      * length of the subarray in {@code char}s. Unpaired
8625      * surrogates within the subarray count as one code point each.
8626      *
8627      * @param a the {@code char} array
8628      * @param offset the index of the first {@code char} in the
8629      * given {@code char} array
8630      * @param count the length of the subarray in {@code char}s
8631      * @return the number of Unicode code points in the specified subarray
8632      * @throws NullPointerException if {@code a} is null.
8633      * @throws IndexOutOfBoundsException if {@code offset} or
8634      * {@code count} is negative, or if {@code offset +
8635      * count} is larger than the length of the given array.
8636      * @since  1.5
8637      */
8638     public static int codePointCount(char[] a, int offset, int count) {
8639         if (count > a.length - offset || offset < 0 || count < 0) {
8640             throw new IndexOutOfBoundsException();
8641         }
8642         return codePointCountImpl(a, offset, count);
8643     }
8644 
8645     static int codePointCountImpl(char[] a, int offset, int count) {
8646         int endIndex = offset + count;
8647         int n = count;
8648         for (int i = offset; i < endIndex; ) {
8649             if (isHighSurrogate(a[i++]) && i < endIndex &&
8650                 isLowSurrogate(a[i])) {
8651                 n--;
8652                 i++;
8653             }
8654         }
8655         return n;
8656     }
8657 
8658     /**
8659      * Returns the index within the given char sequence that is offset
8660      * from the given {@code index} by {@code codePointOffset}
8661      * code points. Unpaired surrogates within the text range given by
8662      * {@code index} and {@code codePointOffset} count as
8663      * one code point each.
8664      *
8665      * @param seq the char sequence
8666      * @param index the index to be offset
8667      * @param codePointOffset the offset in code points
8668      * @return the index within the char sequence
8669      * @throws NullPointerException if {@code seq} is null.
8670      * @throws IndexOutOfBoundsException if {@code index}
8671      *   is negative or larger then the length of the char sequence,
8672      *   or if {@code codePointOffset} is positive and the
8673      *   subsequence starting with {@code index} has fewer than
8674      *   {@code codePointOffset} code points, or if
8675      *   {@code codePointOffset} is negative and the subsequence
8676      *   before {@code index} has fewer than the absolute value
8677      *   of {@code codePointOffset} code points.
8678      * @since 1.5
8679      */
8680     public static int offsetByCodePoints(CharSequence seq, int index,
8681                                          int codePointOffset) {
8682         int length = seq.length();
8683         if (index < 0 || index > length) {
8684             throw new IndexOutOfBoundsException();
8685         }
8686 
8687         int x = index;
8688         if (codePointOffset >= 0) {
8689             int i;
8690             for (i = 0; x < length && i < codePointOffset; i++) {
8691                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8692                     isLowSurrogate(seq.charAt(x))) {
8693                     x++;
8694                 }
8695             }
8696             if (i < codePointOffset) {
8697                 throw new IndexOutOfBoundsException();
8698             }
8699         } else {
8700             int i;
8701             for (i = codePointOffset; x > 0 && i < 0; i++) {
8702                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8703                     isHighSurrogate(seq.charAt(x-1))) {
8704                     x--;
8705                 }
8706             }
8707             if (i < 0) {
8708                 throw new IndexOutOfBoundsException();
8709             }
8710         }
8711         return x;
8712     }
8713 
8714     /**
8715      * Returns the index within the given {@code char} subarray
8716      * that is offset from the given {@code index} by
8717      * {@code codePointOffset} code points. The
8718      * {@code start} and {@code count} arguments specify a
8719      * subarray of the {@code char} array. Unpaired surrogates
8720      * within the text range given by {@code index} and
8721      * {@code codePointOffset} count as one code point each.
8722      *
8723      * @param a the {@code char} array
8724      * @param start the index of the first {@code char} of the
8725      * subarray
8726      * @param count the length of the subarray in {@code char}s
8727      * @param index the index to be offset
8728      * @param codePointOffset the offset in code points
8729      * @return the index within the subarray
8730      * @throws NullPointerException if {@code a} is null.
8731      * @throws IndexOutOfBoundsException
8732      *   if {@code start} or {@code count} is negative,
8733      *   or if {@code start + count} is larger than the length of
8734      *   the given array,
8735      *   or if {@code index} is less than {@code start} or
8736      *   larger then {@code start + count},
8737      *   or if {@code codePointOffset} is positive and the text range
8738      *   starting with {@code index} and ending with {@code start + count - 1}
8739      *   has fewer than {@code codePointOffset} code
8740      *   points,
8741      *   or if {@code codePointOffset} is negative and the text range
8742      *   starting with {@code start} and ending with {@code index - 1}
8743      *   has fewer than the absolute value of
8744      *   {@code codePointOffset} code points.
8745      * @since 1.5
8746      */
8747     public static int offsetByCodePoints(char[] a, int start, int count,
8748                                          int index, int codePointOffset) {
8749         if (count > a.length-start || start < 0 || count < 0
8750             || index < start || index > start+count) {
8751             throw new IndexOutOfBoundsException();
8752         }
8753         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8754     }
8755 
8756     static int offsetByCodePointsImpl(char[]a, int start, int count,
8757                                       int index, int codePointOffset) {
8758         int x = index;
8759         if (codePointOffset >= 0) {
8760             int limit = start + count;
8761             int i;
8762             for (i = 0; x < limit && i < codePointOffset; i++) {
8763                 if (isHighSurrogate(a[x++]) && x < limit &&
8764                     isLowSurrogate(a[x])) {
8765                     x++;
8766                 }
8767             }
8768             if (i < codePointOffset) {
8769                 throw new IndexOutOfBoundsException();
8770             }
8771         } else {
8772             int i;
8773             for (i = codePointOffset; x > start && i < 0; i++) {
8774                 if (isLowSurrogate(a[--x]) && x > start &&
8775                     isHighSurrogate(a[x-1])) {
8776                     x--;
8777                 }
8778             }
8779             if (i < 0) {
8780                 throw new IndexOutOfBoundsException();
8781             }
8782         }
8783         return x;
8784     }
8785 
8786     /**
8787      * Determines if the specified character is a lowercase character.
8788      * <p>
8789      * A character is lowercase if its general category type, provided
8790      * by {@code Character.getType(ch)}, is
8791      * {@code LOWERCASE_LETTER}, or it has contributory property
8792      * Other_Lowercase as defined by the Unicode Standard.
8793      * <p>
8794      * The following are examples of lowercase characters:
8795      * <blockquote><pre>
8796      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8797      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8798      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8799      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8800      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8801      * </pre></blockquote>
8802      * <p> Many other Unicode characters are lowercase too.
8803      *
8804      * <p><b>Note:</b> This method cannot handle <a
8805      * href="#supplementary"> supplementary characters</a>. To support
8806      * all Unicode characters, including supplementary characters, use
8807      * the {@link #isLowerCase(int)} method.
8808      *
8809      * @param   ch   the character to be tested.
8810      * @return  {@code true} if the character is lowercase;
8811      *          {@code false} otherwise.
8812      * @see     Character#isLowerCase(char)
8813      * @see     Character#isTitleCase(char)
8814      * @see     Character#toLowerCase(char)
8815      * @see     Character#getType(char)
8816      */
8817     public static boolean isLowerCase(char ch) {
8818         return isLowerCase((int)ch);
8819     }
8820 
8821     /**
8822      * Determines if the specified character (Unicode code point) is a
8823      * lowercase character.
8824      * <p>
8825      * A character is lowercase if its general category type, provided
8826      * by {@link Character#getType getType(codePoint)}, is
8827      * {@code LOWERCASE_LETTER}, or it has contributory property
8828      * Other_Lowercase as defined by the Unicode Standard.
8829      * <p>
8830      * The following are examples of lowercase characters:
8831      * <blockquote><pre>
8832      * a b c d e f g h i j k l m n o p q r s t u v w x y z
8833      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8834      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8835      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8836      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8837      * </pre></blockquote>
8838      * <p> Many other Unicode characters are lowercase too.
8839      *
8840      * @param   codePoint the character (Unicode code point) to be tested.
8841      * @return  {@code true} if the character is lowercase;
8842      *          {@code false} otherwise.
8843      * @see     Character#isLowerCase(int)
8844      * @see     Character#isTitleCase(int)
8845      * @see     Character#toLowerCase(int)
8846      * @see     Character#getType(int)
8847      * @since   1.5
8848      */
8849     public static boolean isLowerCase(int codePoint) {
8850         return getType(codePoint) == Character.LOWERCASE_LETTER ||
8851                CharacterData.of(codePoint).isOtherLowercase(codePoint);
8852     }
8853 
8854     /**
8855      * Determines if the specified character is an uppercase character.
8856      * <p>
8857      * A character is uppercase if its general category type, provided by
8858      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
8859      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8860      * <p>
8861      * The following are examples of uppercase characters:
8862      * <blockquote><pre>
8863      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8864      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8865      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8866      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8867      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8868      * </pre></blockquote>
8869      * <p> Many other Unicode characters are uppercase too.
8870      *
8871      * <p><b>Note:</b> This method cannot handle <a
8872      * href="#supplementary"> supplementary characters</a>. To support
8873      * all Unicode characters, including supplementary characters, use
8874      * the {@link #isUpperCase(int)} method.
8875      *
8876      * @param   ch   the character to be tested.
8877      * @return  {@code true} if the character is uppercase;
8878      *          {@code false} otherwise.
8879      * @see     Character#isLowerCase(char)
8880      * @see     Character#isTitleCase(char)
8881      * @see     Character#toUpperCase(char)
8882      * @see     Character#getType(char)
8883      * @since   1.0
8884      */
8885     public static boolean isUpperCase(char ch) {
8886         return isUpperCase((int)ch);
8887     }
8888 
8889     /**
8890      * Determines if the specified character (Unicode code point) is an uppercase character.
8891      * <p>
8892      * A character is uppercase if its general category type, provided by
8893      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
8894      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8895      * <p>
8896      * The following are examples of uppercase characters:
8897      * <blockquote><pre>
8898      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8899      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8900      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8901      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8902      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8903      * </pre></blockquote>
8904      * <p> Many other Unicode characters are uppercase too.
8905      *
8906      * @param   codePoint the character (Unicode code point) to be tested.
8907      * @return  {@code true} if the character is uppercase;
8908      *          {@code false} otherwise.
8909      * @see     Character#isLowerCase(int)
8910      * @see     Character#isTitleCase(int)
8911      * @see     Character#toUpperCase(int)
8912      * @see     Character#getType(int)
8913      * @since   1.5
8914      */
8915     public static boolean isUpperCase(int codePoint) {
8916         return getType(codePoint) == Character.UPPERCASE_LETTER ||
8917                CharacterData.of(codePoint).isOtherUppercase(codePoint);
8918     }
8919 
8920     /**
8921      * Determines if the specified character is a titlecase character.
8922      * <p>
8923      * A character is a titlecase character if its general
8924      * category type, provided by {@code Character.getType(ch)},
8925      * is {@code TITLECASE_LETTER}.
8926      * <p>
8927      * Some characters look like pairs of Latin letters. For example, there
8928      * is an uppercase letter that looks like "LJ" and has a corresponding
8929      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8930      * is the appropriate form to use when rendering a word in lowercase
8931      * with initial capitals, as for a book title.
8932      * <p>
8933      * These are some of the Unicode characters for which this method returns
8934      * {@code true}:
8935      * <ul>
8936      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8937      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8938      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8939      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8940      * </ul>
8941      * <p> Many other Unicode characters are titlecase too.
8942      *
8943      * <p><b>Note:</b> This method cannot handle <a
8944      * href="#supplementary"> supplementary characters</a>. To support
8945      * all Unicode characters, including supplementary characters, use
8946      * the {@link #isTitleCase(int)} method.
8947      *
8948      * @param   ch   the character to be tested.
8949      * @return  {@code true} if the character is titlecase;
8950      *          {@code false} otherwise.
8951      * @see     Character#isLowerCase(char)
8952      * @see     Character#isUpperCase(char)
8953      * @see     Character#toTitleCase(char)
8954      * @see     Character#getType(char)
8955      * @since   1.0.2
8956      */
8957     public static boolean isTitleCase(char ch) {
8958         return isTitleCase((int)ch);
8959     }
8960 
8961     /**
8962      * Determines if the specified character (Unicode code point) is a titlecase character.
8963      * <p>
8964      * A character is a titlecase character if its general
8965      * category type, provided by {@link Character#getType(int) getType(codePoint)},
8966      * is {@code TITLECASE_LETTER}.
8967      * <p>
8968      * Some characters look like pairs of Latin letters. For example, there
8969      * is an uppercase letter that looks like "LJ" and has a corresponding
8970      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8971      * is the appropriate form to use when rendering a word in lowercase
8972      * with initial capitals, as for a book title.
8973      * <p>
8974      * These are some of the Unicode characters for which this method returns
8975      * {@code true}:
8976      * <ul>
8977      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8978      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8979      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8980      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8981      * </ul>
8982      * <p> Many other Unicode characters are titlecase too.
8983      *
8984      * @param   codePoint the character (Unicode code point) to be tested.
8985      * @return  {@code true} if the character is titlecase;
8986      *          {@code false} otherwise.
8987      * @see     Character#isLowerCase(int)
8988      * @see     Character#isUpperCase(int)
8989      * @see     Character#toTitleCase(int)
8990      * @see     Character#getType(int)
8991      * @since   1.5
8992      */
8993     public static boolean isTitleCase(int codePoint) {
8994         return getType(codePoint) == Character.TITLECASE_LETTER;
8995     }
8996 
8997     /**
8998      * Determines if the specified character is a digit.
8999      * <p>
9000      * A character is a digit if its general category type, provided
9001      * by {@code Character.getType(ch)}, is
9002      * {@code DECIMAL_DIGIT_NUMBER}.
9003      * <p>
9004      * Some Unicode character ranges that contain digits:
9005      * <ul>
9006      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9007      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9008      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9009      *     Arabic-Indic digits
9010      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9011      *     Extended Arabic-Indic digits
9012      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9013      *     Devanagari digits
9014      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9015      *     Fullwidth digits
9016      * </ul>
9017      *
9018      * Many other character ranges contain digits as well.
9019      *
9020      * <p><b>Note:</b> This method cannot handle <a
9021      * href="#supplementary"> supplementary characters</a>. To support
9022      * all Unicode characters, including supplementary characters, use
9023      * the {@link #isDigit(int)} method.
9024      *
9025      * @param   ch   the character to be tested.
9026      * @return  {@code true} if the character is a digit;
9027      *          {@code false} otherwise.
9028      * @see     Character#digit(char, int)
9029      * @see     Character#forDigit(int, int)
9030      * @see     Character#getType(char)
9031      */
9032     public static boolean isDigit(char ch) {
9033         return isDigit((int)ch);
9034     }
9035 
9036     /**
9037      * Determines if the specified character (Unicode code point) is a digit.
9038      * <p>
9039      * A character is a digit if its general category type, provided
9040      * by {@link Character#getType(int) getType(codePoint)}, is
9041      * {@code DECIMAL_DIGIT_NUMBER}.
9042      * <p>
9043      * Some Unicode character ranges that contain digits:
9044      * <ul>
9045      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9046      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9047      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9048      *     Arabic-Indic digits
9049      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9050      *     Extended Arabic-Indic digits
9051      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9052      *     Devanagari digits
9053      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9054      *     Fullwidth digits
9055      * </ul>
9056      *
9057      * Many other character ranges contain digits as well.
9058      *
9059      * @param   codePoint the character (Unicode code point) to be tested.
9060      * @return  {@code true} if the character is a digit;
9061      *          {@code false} otherwise.
9062      * @see     Character#forDigit(int, int)
9063      * @see     Character#getType(int)
9064      * @since   1.5
9065      */
9066     public static boolean isDigit(int codePoint) {
9067         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
9068     }
9069 
9070     /**
9071      * Determines if a character is defined in Unicode.
9072      * <p>
9073      * A character is defined if at least one of the following is true:
9074      * <ul>
9075      * <li>It has an entry in the UnicodeData file.
9076      * <li>It has a value in a range defined by the UnicodeData file.
9077      * </ul>
9078      *
9079      * <p><b>Note:</b> This method cannot handle <a
9080      * href="#supplementary"> supplementary characters</a>. To support
9081      * all Unicode characters, including supplementary characters, use
9082      * the {@link #isDefined(int)} method.
9083      *
9084      * @param   ch   the character to be tested
9085      * @return  {@code true} if the character has a defined meaning
9086      *          in Unicode; {@code false} otherwise.
9087      * @see     Character#isDigit(char)
9088      * @see     Character#isLetter(char)
9089      * @see     Character#isLetterOrDigit(char)
9090      * @see     Character#isLowerCase(char)
9091      * @see     Character#isTitleCase(char)
9092      * @see     Character#isUpperCase(char)
9093      * @since   1.0.2
9094      */
9095     public static boolean isDefined(char ch) {
9096         return isDefined((int)ch);
9097     }
9098 
9099     /**
9100      * Determines if a character (Unicode code point) is defined in Unicode.
9101      * <p>
9102      * A character is defined if at least one of the following is true:
9103      * <ul>
9104      * <li>It has an entry in the UnicodeData file.
9105      * <li>It has a value in a range defined by the UnicodeData file.
9106      * </ul>
9107      *
9108      * @param   codePoint the character (Unicode code point) to be tested.
9109      * @return  {@code true} if the character has a defined meaning
9110      *          in Unicode; {@code false} otherwise.
9111      * @see     Character#isDigit(int)
9112      * @see     Character#isLetter(int)
9113      * @see     Character#isLetterOrDigit(int)
9114      * @see     Character#isLowerCase(int)
9115      * @see     Character#isTitleCase(int)
9116      * @see     Character#isUpperCase(int)
9117      * @since   1.5
9118      */
9119     public static boolean isDefined(int codePoint) {
9120         return getType(codePoint) != Character.UNASSIGNED;
9121     }
9122 
9123     /**
9124      * Determines if the specified character is a letter.
9125      * <p>
9126      * A character is considered to be a letter if its general
9127      * category type, provided by {@code Character.getType(ch)},
9128      * is any of the following:
9129      * <ul>
9130      * <li> {@code UPPERCASE_LETTER}
9131      * <li> {@code LOWERCASE_LETTER}
9132      * <li> {@code TITLECASE_LETTER}
9133      * <li> {@code MODIFIER_LETTER}
9134      * <li> {@code OTHER_LETTER}
9135      * </ul>
9136      *
9137      * Not all letters have case. Many characters are
9138      * letters but are neither uppercase nor lowercase nor titlecase.
9139      *
9140      * <p><b>Note:</b> This method cannot handle <a
9141      * href="#supplementary"> supplementary characters</a>. To support
9142      * all Unicode characters, including supplementary characters, use
9143      * the {@link #isLetter(int)} method.
9144      *
9145      * @param   ch   the character to be tested.
9146      * @return  {@code true} if the character is a letter;
9147      *          {@code false} otherwise.
9148      * @see     Character#isDigit(char)
9149      * @see     Character#isJavaIdentifierStart(char)
9150      * @see     Character#isJavaLetter(char)
9151      * @see     Character#isJavaLetterOrDigit(char)
9152      * @see     Character#isLetterOrDigit(char)
9153      * @see     Character#isLowerCase(char)
9154      * @see     Character#isTitleCase(char)
9155      * @see     Character#isUnicodeIdentifierStart(char)
9156      * @see     Character#isUpperCase(char)
9157      */
9158     public static boolean isLetter(char ch) {
9159         return isLetter((int)ch);
9160     }
9161 
9162     /**
9163      * Determines if the specified character (Unicode code point) is a letter.
9164      * <p>
9165      * A character is considered to be a letter if its general
9166      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9167      * is any of the following:
9168      * <ul>
9169      * <li> {@code UPPERCASE_LETTER}
9170      * <li> {@code LOWERCASE_LETTER}
9171      * <li> {@code TITLECASE_LETTER}
9172      * <li> {@code MODIFIER_LETTER}
9173      * <li> {@code OTHER_LETTER}
9174      * </ul>
9175      *
9176      * Not all letters have case. Many characters are
9177      * letters but are neither uppercase nor lowercase nor titlecase.
9178      *
9179      * @param   codePoint the character (Unicode code point) to be tested.
9180      * @return  {@code true} if the character is a letter;
9181      *          {@code false} otherwise.
9182      * @see     Character#isDigit(int)
9183      * @see     Character#isJavaIdentifierStart(int)
9184      * @see     Character#isLetterOrDigit(int)
9185      * @see     Character#isLowerCase(int)
9186      * @see     Character#isTitleCase(int)
9187      * @see     Character#isUnicodeIdentifierStart(int)
9188      * @see     Character#isUpperCase(int)
9189      * @since   1.5
9190      */
9191     public static boolean isLetter(int codePoint) {
9192         return ((((1 << Character.UPPERCASE_LETTER) |
9193             (1 << Character.LOWERCASE_LETTER) |
9194             (1 << Character.TITLECASE_LETTER) |
9195             (1 << Character.MODIFIER_LETTER) |
9196             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9197             != 0;
9198     }
9199 
9200     /**
9201      * Determines if the specified character is a letter or digit.
9202      * <p>
9203      * A character is considered to be a letter or digit if either
9204      * {@code Character.isLetter(char ch)} or
9205      * {@code Character.isDigit(char ch)} returns
9206      * {@code true} for the character.
9207      *
9208      * <p><b>Note:</b> This method cannot handle <a
9209      * href="#supplementary"> supplementary characters</a>. To support
9210      * all Unicode characters, including supplementary characters, use
9211      * the {@link #isLetterOrDigit(int)} method.
9212      *
9213      * @param   ch   the character to be tested.
9214      * @return  {@code true} if the character is a letter or digit;
9215      *          {@code false} otherwise.
9216      * @see     Character#isDigit(char)
9217      * @see     Character#isJavaIdentifierPart(char)
9218      * @see     Character#isJavaLetter(char)
9219      * @see     Character#isJavaLetterOrDigit(char)
9220      * @see     Character#isLetter(char)
9221      * @see     Character#isUnicodeIdentifierPart(char)
9222      * @since   1.0.2
9223      */
9224     public static boolean isLetterOrDigit(char ch) {
9225         return isLetterOrDigit((int)ch);
9226     }
9227 
9228     /**
9229      * Determines if the specified character (Unicode code point) is a letter or digit.
9230      * <p>
9231      * A character is considered to be a letter or digit if either
9232      * {@link #isLetter(int) isLetter(codePoint)} or
9233      * {@link #isDigit(int) isDigit(codePoint)} returns
9234      * {@code true} for the character.
9235      *
9236      * @param   codePoint the character (Unicode code point) to be tested.
9237      * @return  {@code true} if the character is a letter or digit;
9238      *          {@code false} otherwise.
9239      * @see     Character#isDigit(int)
9240      * @see     Character#isJavaIdentifierPart(int)
9241      * @see     Character#isLetter(int)
9242      * @see     Character#isUnicodeIdentifierPart(int)
9243      * @since   1.5
9244      */
9245     public static boolean isLetterOrDigit(int codePoint) {
9246         return ((((1 << Character.UPPERCASE_LETTER) |
9247             (1 << Character.LOWERCASE_LETTER) |
9248             (1 << Character.TITLECASE_LETTER) |
9249             (1 << Character.MODIFIER_LETTER) |
9250             (1 << Character.OTHER_LETTER) |
9251             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9252             != 0;
9253     }
9254 
9255     /**
9256      * Determines if the specified character is permissible as the first
9257      * character in a Java identifier.
9258      * <p>
9259      * A character may start a Java identifier if and only if
9260      * one of the following is true:
9261      * <ul>
9262      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9263      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9264      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9265      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9266      * </ul>
9267      *
9268      * @param   ch the character to be tested.
9269      * @return  {@code true} if the character may start a Java
9270      *          identifier; {@code false} otherwise.
9271      * @see     Character#isJavaLetterOrDigit(char)
9272      * @see     Character#isJavaIdentifierStart(char)
9273      * @see     Character#isJavaIdentifierPart(char)
9274      * @see     Character#isLetter(char)
9275      * @see     Character#isLetterOrDigit(char)
9276      * @see     Character#isUnicodeIdentifierStart(char)
9277      * @since   1.0.2
9278      * @deprecated Replaced by isJavaIdentifierStart(char).
9279      */
9280     @Deprecated(since="1.1")
9281     public static boolean isJavaLetter(char ch) {
9282         return isJavaIdentifierStart(ch);
9283     }
9284 
9285     /**
9286      * Determines if the specified character may be part of a Java
9287      * identifier as other than the first character.
9288      * <p>
9289      * A character may be part of a Java identifier if and only if any
9290      * of the following are true:
9291      * <ul>
9292      * <li>  it is a letter
9293      * <li>  it is a currency symbol (such as {@code '$'})
9294      * <li>  it is a connecting punctuation character (such as {@code '_'})
9295      * <li>  it is a digit
9296      * <li>  it is a numeric letter (such as a Roman numeral character)
9297      * <li>  it is a combining mark
9298      * <li>  it is a non-spacing mark
9299      * <li> {@code isIdentifierIgnorable} returns
9300      * {@code true} for the character.
9301      * </ul>
9302      *
9303      * @param   ch the character to be tested.
9304      * @return  {@code true} if the character may be part of a
9305      *          Java identifier; {@code false} otherwise.
9306      * @see     Character#isJavaLetter(char)
9307      * @see     Character#isJavaIdentifierStart(char)
9308      * @see     Character#isJavaIdentifierPart(char)
9309      * @see     Character#isLetter(char)
9310      * @see     Character#isLetterOrDigit(char)
9311      * @see     Character#isUnicodeIdentifierPart(char)
9312      * @see     Character#isIdentifierIgnorable(char)
9313      * @since   1.0.2
9314      * @deprecated Replaced by isJavaIdentifierPart(char).
9315      */
9316     @Deprecated(since="1.1")
9317     public static boolean isJavaLetterOrDigit(char ch) {
9318         return isJavaIdentifierPart(ch);
9319     }
9320 
9321     /**
9322      * Determines if the specified character (Unicode code point) is an alphabet.
9323      * <p>
9324      * A character is considered to be alphabetic if its general category type,
9325      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9326      * the following:
9327      * <ul>
9328      * <li> {@code UPPERCASE_LETTER}
9329      * <li> {@code LOWERCASE_LETTER}
9330      * <li> {@code TITLECASE_LETTER}
9331      * <li> {@code MODIFIER_LETTER}
9332      * <li> {@code OTHER_LETTER}
9333      * <li> {@code LETTER_NUMBER}
9334      * </ul>
9335      * or it has contributory property Other_Alphabetic as defined by the
9336      * Unicode Standard.
9337      *
9338      * @param   codePoint the character (Unicode code point) to be tested.
9339      * @return  {@code true} if the character is a Unicode alphabet
9340      *          character, {@code false} otherwise.
9341      * @since   1.7
9342      */
9343     public static boolean isAlphabetic(int codePoint) {
9344         return (((((1 << Character.UPPERCASE_LETTER) |
9345             (1 << Character.LOWERCASE_LETTER) |
9346             (1 << Character.TITLECASE_LETTER) |
9347             (1 << Character.MODIFIER_LETTER) |
9348             (1 << Character.OTHER_LETTER) |
9349             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9350             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9351     }
9352 
9353     /**
9354      * Determines if the specified character (Unicode code point) is a CJKV
9355      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9356      * the Unicode Standard.
9357      *
9358      * @param   codePoint the character (Unicode code point) to be tested.
9359      * @return  {@code true} if the character is a Unicode ideograph
9360      *          character, {@code false} otherwise.
9361      * @since   1.7
9362      */
9363     public static boolean isIdeographic(int codePoint) {
9364         return CharacterData.of(codePoint).isIdeographic(codePoint);
9365     }
9366 
9367     /**
9368      * Determines if the specified character is
9369      * permissible as the first character in a Java identifier.
9370      * <p>
9371      * A character may start a Java identifier if and only if
9372      * one of the following conditions is true:
9373      * <ul>
9374      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9375      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9376      * <li> {@code ch} is a currency symbol (such as {@code '$'})
9377      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9378      * </ul>
9379      *
9380      * <p><b>Note:</b> This method cannot handle <a
9381      * href="#supplementary"> supplementary characters</a>. To support
9382      * all Unicode characters, including supplementary characters, use
9383      * the {@link #isJavaIdentifierStart(int)} method.
9384      *
9385      * @param   ch the character to be tested.
9386      * @return  {@code true} if the character may start a Java identifier;
9387      *          {@code false} otherwise.
9388      * @see     Character#isJavaIdentifierPart(char)
9389      * @see     Character#isLetter(char)
9390      * @see     Character#isUnicodeIdentifierStart(char)
9391      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9392      * @since   1.1
9393      */
9394     public static boolean isJavaIdentifierStart(char ch) {
9395         return isJavaIdentifierStart((int)ch);
9396     }
9397 
9398     /**
9399      * Determines if the character (Unicode code point) is
9400      * permissible as the first character in a Java identifier.
9401      * <p>
9402      * A character may start a Java identifier if and only if
9403      * one of the following conditions is true:
9404      * <ul>
9405      * <li> {@link #isLetter(int) isLetter(codePoint)}
9406      *      returns {@code true}
9407      * <li> {@link #getType(int) getType(codePoint)}
9408      *      returns {@code LETTER_NUMBER}
9409      * <li> the referenced character is a currency symbol (such as {@code '$'})
9410      * <li> the referenced character is a connecting punctuation character
9411      *      (such as {@code '_'}).
9412      * </ul>
9413      *
9414      * @param   codePoint the character (Unicode code point) to be tested.
9415      * @return  {@code true} if the character may start a Java identifier;
9416      *          {@code false} otherwise.
9417      * @see     Character#isJavaIdentifierPart(int)
9418      * @see     Character#isLetter(int)
9419      * @see     Character#isUnicodeIdentifierStart(int)
9420      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9421      * @since   1.5
9422      */
9423     public static boolean isJavaIdentifierStart(int codePoint) {
9424         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9425     }
9426 
9427     /**
9428      * Determines if the specified character may be part of a Java
9429      * identifier as other than the first character.
9430      * <p>
9431      * A character may be part of a Java identifier if any of the following
9432      * are true:
9433      * <ul>
9434      * <li>  it is a letter
9435      * <li>  it is a currency symbol (such as {@code '$'})
9436      * <li>  it is a connecting punctuation character (such as {@code '_'})
9437      * <li>  it is a digit
9438      * <li>  it is a numeric letter (such as a Roman numeral character)
9439      * <li>  it is a combining mark
9440      * <li>  it is a non-spacing mark
9441      * <li> {@code isIdentifierIgnorable} returns
9442      * {@code true} for the character
9443      * </ul>
9444      *
9445      * <p><b>Note:</b> This method cannot handle <a
9446      * href="#supplementary"> supplementary characters</a>. To support
9447      * all Unicode characters, including supplementary characters, use
9448      * the {@link #isJavaIdentifierPart(int)} method.
9449      *
9450      * @param   ch      the character to be tested.
9451      * @return {@code true} if the character may be part of a
9452      *          Java identifier; {@code false} otherwise.
9453      * @see     Character#isIdentifierIgnorable(char)
9454      * @see     Character#isJavaIdentifierStart(char)
9455      * @see     Character#isLetterOrDigit(char)
9456      * @see     Character#isUnicodeIdentifierPart(char)
9457      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9458      * @since   1.1
9459      */
9460     public static boolean isJavaIdentifierPart(char ch) {
9461         return isJavaIdentifierPart((int)ch);
9462     }
9463 
9464     /**
9465      * Determines if the character (Unicode code point) may be part of a Java
9466      * identifier as other than the first character.
9467      * <p>
9468      * A character may be part of a Java identifier if any of the following
9469      * are true:
9470      * <ul>
9471      * <li>  it is a letter
9472      * <li>  it is a currency symbol (such as {@code '$'})
9473      * <li>  it is a connecting punctuation character (such as {@code '_'})
9474      * <li>  it is a digit
9475      * <li>  it is a numeric letter (such as a Roman numeral character)
9476      * <li>  it is a combining mark
9477      * <li>  it is a non-spacing mark
9478      * <li> {@link #isIdentifierIgnorable(int)
9479      * isIdentifierIgnorable(codePoint)} returns {@code true} for
9480      * the character
9481      * </ul>
9482      *
9483      * @param   codePoint the character (Unicode code point) to be tested.
9484      * @return {@code true} if the character may be part of a
9485      *          Java identifier; {@code false} otherwise.
9486      * @see     Character#isIdentifierIgnorable(int)
9487      * @see     Character#isJavaIdentifierStart(int)
9488      * @see     Character#isLetterOrDigit(int)
9489      * @see     Character#isUnicodeIdentifierPart(int)
9490      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9491      * @since   1.5
9492      */
9493     public static boolean isJavaIdentifierPart(int codePoint) {
9494         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9495     }
9496 
9497     /**
9498      * Determines if the specified character is permissible as the
9499      * first character in a Unicode identifier.
9500      * <p>
9501      * A character may start a Unicode identifier if and only if
9502      * one of the following conditions is true:
9503      * <ul>
9504      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9505      * <li> {@link #getType(char) getType(ch)} returns
9506      *      {@code LETTER_NUMBER}.
9507      * </ul>
9508      *
9509      * <p><b>Note:</b> This method cannot handle <a
9510      * href="#supplementary"> supplementary characters</a>. To support
9511      * all Unicode characters, including supplementary characters, use
9512      * the {@link #isUnicodeIdentifierStart(int)} method.
9513      *
9514      * @param   ch      the character to be tested.
9515      * @return  {@code true} if the character may start a Unicode
9516      *          identifier; {@code false} otherwise.
9517      * @see     Character#isJavaIdentifierStart(char)
9518      * @see     Character#isLetter(char)
9519      * @see     Character#isUnicodeIdentifierPart(char)
9520      * @since   1.1
9521      */
9522     public static boolean isUnicodeIdentifierStart(char ch) {
9523         return isUnicodeIdentifierStart((int)ch);
9524     }
9525 
9526     /**
9527      * Determines if the specified character (Unicode code point) is permissible as the
9528      * first character in a Unicode identifier.
9529      * <p>
9530      * A character may start a Unicode identifier if and only if
9531      * one of the following conditions is true:
9532      * <ul>
9533      * <li> {@link #isLetter(int) isLetter(codePoint)}
9534      *      returns {@code true}
9535      * <li> {@link #getType(int) getType(codePoint)}
9536      *      returns {@code LETTER_NUMBER}.
9537      * </ul>
9538      * @param   codePoint the character (Unicode code point) to be tested.
9539      * @return  {@code true} if the character may start a Unicode
9540      *          identifier; {@code false} otherwise.
9541      * @see     Character#isJavaIdentifierStart(int)
9542      * @see     Character#isLetter(int)
9543      * @see     Character#isUnicodeIdentifierPart(int)
9544      * @since   1.5
9545      */
9546     public static boolean isUnicodeIdentifierStart(int codePoint) {
9547         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9548     }
9549 
9550     /**
9551      * Determines if the specified character may be part of a Unicode
9552      * identifier as other than the first character.
9553      * <p>
9554      * A character may be part of a Unicode identifier if and only if
9555      * one of the following statements is true:
9556      * <ul>
9557      * <li>  it is a letter
9558      * <li>  it is a connecting punctuation character (such as {@code '_'})
9559      * <li>  it is a digit
9560      * <li>  it is a numeric letter (such as a Roman numeral character)
9561      * <li>  it is a combining mark
9562      * <li>  it is a non-spacing mark
9563      * <li> {@code isIdentifierIgnorable} returns
9564      * {@code true} for this character.
9565      * </ul>
9566      *
9567      * <p><b>Note:</b> This method cannot handle <a
9568      * href="#supplementary"> supplementary characters</a>. To support
9569      * all Unicode characters, including supplementary characters, use
9570      * the {@link #isUnicodeIdentifierPart(int)} method.
9571      *
9572      * @param   ch      the character to be tested.
9573      * @return  {@code true} if the character may be part of a
9574      *          Unicode identifier; {@code false} otherwise.
9575      * @see     Character#isIdentifierIgnorable(char)
9576      * @see     Character#isJavaIdentifierPart(char)
9577      * @see     Character#isLetterOrDigit(char)
9578      * @see     Character#isUnicodeIdentifierStart(char)
9579      * @since   1.1
9580      */
9581     public static boolean isUnicodeIdentifierPart(char ch) {
9582         return isUnicodeIdentifierPart((int)ch);
9583     }
9584 
9585     /**
9586      * Determines if the specified character (Unicode code point) may be part of a Unicode
9587      * identifier as other than the first character.
9588      * <p>
9589      * A character may be part of a Unicode identifier if and only if
9590      * one of the following statements is true:
9591      * <ul>
9592      * <li>  it is a letter
9593      * <li>  it is a connecting punctuation character (such as {@code '_'})
9594      * <li>  it is a digit
9595      * <li>  it is a numeric letter (such as a Roman numeral character)
9596      * <li>  it is a combining mark
9597      * <li>  it is a non-spacing mark
9598      * <li> {@code isIdentifierIgnorable} returns
9599      * {@code true} for this character.
9600      * </ul>
9601      * @param   codePoint the character (Unicode code point) to be tested.
9602      * @return  {@code true} if the character may be part of a
9603      *          Unicode identifier; {@code false} otherwise.
9604      * @see     Character#isIdentifierIgnorable(int)
9605      * @see     Character#isJavaIdentifierPart(int)
9606      * @see     Character#isLetterOrDigit(int)
9607      * @see     Character#isUnicodeIdentifierStart(int)
9608      * @since   1.5
9609      */
9610     public static boolean isUnicodeIdentifierPart(int codePoint) {
9611         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
9612     }
9613 
9614     /**
9615      * Determines if the specified character should be regarded as
9616      * an ignorable character in a Java identifier or a Unicode identifier.
9617      * <p>
9618      * The following Unicode characters are ignorable in a Java identifier
9619      * or a Unicode identifier:
9620      * <ul>
9621      * <li>ISO control characters that are not whitespace
9622      * <ul>
9623      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9624      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9625      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9626      * </ul>
9627      *
9628      * <li>all characters that have the {@code FORMAT} general
9629      * category value
9630      * </ul>
9631      *
9632      * <p><b>Note:</b> This method cannot handle <a
9633      * href="#supplementary"> supplementary characters</a>. To support
9634      * all Unicode characters, including supplementary characters, use
9635      * the {@link #isIdentifierIgnorable(int)} method.
9636      *
9637      * @param   ch      the character to be tested.
9638      * @return  {@code true} if the character is an ignorable control
9639      *          character that may be part of a Java or Unicode identifier;
9640      *           {@code false} otherwise.
9641      * @see     Character#isJavaIdentifierPart(char)
9642      * @see     Character#isUnicodeIdentifierPart(char)
9643      * @since   1.1
9644      */
9645     public static boolean isIdentifierIgnorable(char ch) {
9646         return isIdentifierIgnorable((int)ch);
9647     }
9648 
9649     /**
9650      * Determines if the specified character (Unicode code point) should be regarded as
9651      * an ignorable character in a Java identifier or a Unicode identifier.
9652      * <p>
9653      * The following Unicode characters are ignorable in a Java identifier
9654      * or a Unicode identifier:
9655      * <ul>
9656      * <li>ISO control characters that are not whitespace
9657      * <ul>
9658      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9659      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9660      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9661      * </ul>
9662      *
9663      * <li>all characters that have the {@code FORMAT} general
9664      * category value
9665      * </ul>
9666      *
9667      * @param   codePoint the character (Unicode code point) to be tested.
9668      * @return  {@code true} if the character is an ignorable control
9669      *          character that may be part of a Java or Unicode identifier;
9670      *          {@code false} otherwise.
9671      * @see     Character#isJavaIdentifierPart(int)
9672      * @see     Character#isUnicodeIdentifierPart(int)
9673      * @since   1.5
9674      */
9675     public static boolean isIdentifierIgnorable(int codePoint) {
9676         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
9677     }
9678 
9679     /**
9680      * Converts the character argument to lowercase using case
9681      * mapping information from the UnicodeData file.
9682      * <p>
9683      * Note that
9684      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
9685      * does not always return {@code true} for some ranges of
9686      * characters, particularly those that are symbols or ideographs.
9687      *
9688      * <p>In general, {@link String#toLowerCase()} should be used to map
9689      * characters to lowercase. {@code String} case mapping methods
9690      * have several benefits over {@code Character} case mapping methods.
9691      * {@code String} case mapping methods can perform locale-sensitive
9692      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9693      * the {@code Character} case mapping methods cannot.
9694      *
9695      * <p><b>Note:</b> This method cannot handle <a
9696      * href="#supplementary"> supplementary characters</a>. To support
9697      * all Unicode characters, including supplementary characters, use
9698      * the {@link #toLowerCase(int)} method.
9699      *
9700      * @param   ch   the character to be converted.
9701      * @return  the lowercase equivalent of the character, if any;
9702      *          otherwise, the character itself.
9703      * @see     Character#isLowerCase(char)
9704      * @see     String#toLowerCase()
9705      */
9706     public static char toLowerCase(char ch) {
9707         return (char)toLowerCase((int)ch);
9708     }
9709 
9710     /**
9711      * Converts the character (Unicode code point) argument to
9712      * lowercase using case mapping information from the UnicodeData
9713      * file.
9714      *
9715      * <p> Note that
9716      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9717      * does not always return {@code true} for some ranges of
9718      * characters, particularly those that are symbols or ideographs.
9719      *
9720      * <p>In general, {@link String#toLowerCase()} should be used to map
9721      * characters to lowercase. {@code String} case mapping methods
9722      * have several benefits over {@code Character} case mapping methods.
9723      * {@code String} case mapping methods can perform locale-sensitive
9724      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9725      * the {@code Character} case mapping methods cannot.
9726      *
9727      * @param   codePoint   the character (Unicode code point) to be converted.
9728      * @return  the lowercase equivalent of the character (Unicode code
9729      *          point), if any; otherwise, the character itself.
9730      * @see     Character#isLowerCase(int)
9731      * @see     String#toLowerCase()
9732      *
9733      * @since   1.5
9734      */
9735     public static int toLowerCase(int codePoint) {
9736         return CharacterData.of(codePoint).toLowerCase(codePoint);
9737     }
9738 
9739     /**
9740      * Converts the character argument to uppercase using case mapping
9741      * information from the UnicodeData file.
9742      * <p>
9743      * Note that
9744      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9745      * does not always return {@code true} for some ranges of
9746      * characters, particularly those that are symbols or ideographs.
9747      *
9748      * <p>In general, {@link String#toUpperCase()} should be used to map
9749      * characters to uppercase. {@code String} case mapping methods
9750      * have several benefits over {@code Character} case mapping methods.
9751      * {@code String} case mapping methods can perform locale-sensitive
9752      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9753      * the {@code Character} case mapping methods cannot.
9754      *
9755      * <p><b>Note:</b> This method cannot handle <a
9756      * href="#supplementary"> supplementary characters</a>. To support
9757      * all Unicode characters, including supplementary characters, use
9758      * the {@link #toUpperCase(int)} method.
9759      *
9760      * @param   ch   the character to be converted.
9761      * @return  the uppercase equivalent of the character, if any;
9762      *          otherwise, the character itself.
9763      * @see     Character#isUpperCase(char)
9764      * @see     String#toUpperCase()
9765      */
9766     public static char toUpperCase(char ch) {
9767         return (char)toUpperCase((int)ch);
9768     }
9769 
9770     /**
9771      * Converts the character (Unicode code point) argument to
9772      * uppercase using case mapping information from the UnicodeData
9773      * file.
9774      *
9775      * <p>Note that
9776      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
9777      * does not always return {@code true} for some ranges of
9778      * characters, particularly those that are symbols or ideographs.
9779      *
9780      * <p>In general, {@link String#toUpperCase()} should be used to map
9781      * characters to uppercase. {@code String} case mapping methods
9782      * have several benefits over {@code Character} case mapping methods.
9783      * {@code String} case mapping methods can perform locale-sensitive
9784      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9785      * the {@code Character} case mapping methods cannot.
9786      *
9787      * @param   codePoint   the character (Unicode code point) to be converted.
9788      * @return  the uppercase equivalent of the character, if any;
9789      *          otherwise, the character itself.
9790      * @see     Character#isUpperCase(int)
9791      * @see     String#toUpperCase()
9792      *
9793      * @since   1.5
9794      */
9795     public static int toUpperCase(int codePoint) {
9796         return CharacterData.of(codePoint).toUpperCase(codePoint);
9797     }
9798 
9799     /**
9800      * Converts the character argument to titlecase using case mapping
9801      * information from the UnicodeData file. If a character has no
9802      * explicit titlecase mapping and is not itself a titlecase char
9803      * according to UnicodeData, then the uppercase mapping is
9804      * returned as an equivalent titlecase mapping. If the
9805      * {@code char} argument is already a titlecase
9806      * {@code char}, the same {@code char} value will be
9807      * returned.
9808      * <p>
9809      * Note that
9810      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
9811      * does not always return {@code true} for some ranges of
9812      * characters.
9813      *
9814      * <p><b>Note:</b> This method cannot handle <a
9815      * href="#supplementary"> supplementary characters</a>. To support
9816      * all Unicode characters, including supplementary characters, use
9817      * the {@link #toTitleCase(int)} method.
9818      *
9819      * @param   ch   the character to be converted.
9820      * @return  the titlecase equivalent of the character, if any;
9821      *          otherwise, the character itself.
9822      * @see     Character#isTitleCase(char)
9823      * @see     Character#toLowerCase(char)
9824      * @see     Character#toUpperCase(char)
9825      * @since   1.0.2
9826      */
9827     public static char toTitleCase(char ch) {
9828         return (char)toTitleCase((int)ch);
9829     }
9830 
9831     /**
9832      * Converts the character (Unicode code point) argument to titlecase using case mapping
9833      * information from the UnicodeData file. If a character has no
9834      * explicit titlecase mapping and is not itself a titlecase char
9835      * according to UnicodeData, then the uppercase mapping is
9836      * returned as an equivalent titlecase mapping. If the
9837      * character argument is already a titlecase
9838      * character, the same character value will be
9839      * returned.
9840      *
9841      * <p>Note that
9842      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
9843      * does not always return {@code true} for some ranges of
9844      * characters.
9845      *
9846      * @param   codePoint   the character (Unicode code point) to be converted.
9847      * @return  the titlecase equivalent of the character, if any;
9848      *          otherwise, the character itself.
9849      * @see     Character#isTitleCase(int)
9850      * @see     Character#toLowerCase(int)
9851      * @see     Character#toUpperCase(int)
9852      * @since   1.5
9853      */
9854     public static int toTitleCase(int codePoint) {
9855         return CharacterData.of(codePoint).toTitleCase(codePoint);
9856     }
9857 
9858     /**
9859      * Returns the numeric value of the character {@code ch} in the
9860      * specified radix.
9861      * <p>
9862      * If the radix is not in the range {@code MIN_RADIX} &le;
9863      * {@code radix} &le; {@code MAX_RADIX} or if the
9864      * value of {@code ch} is not a valid digit in the specified
9865      * radix, {@code -1} is returned. A character is a valid digit
9866      * if at least one of the following is true:
9867      * <ul>
9868      * <li>The method {@code isDigit} is {@code true} of the character
9869      *     and the Unicode decimal digit value of the character (or its
9870      *     single-character decomposition) is less than the specified radix.
9871      *     In this case the decimal digit value is returned.
9872      * <li>The character is one of the uppercase Latin letters
9873      *     {@code 'A'} through {@code 'Z'} and its code is less than
9874      *     {@code radix + 'A' - 10}.
9875      *     In this case, {@code ch - 'A' + 10}
9876      *     is returned.
9877      * <li>The character is one of the lowercase Latin letters
9878      *     {@code 'a'} through {@code 'z'} and its code is less than
9879      *     {@code radix + 'a' - 10}.
9880      *     In this case, {@code ch - 'a' + 10}
9881      *     is returned.
9882      * <li>The character is one of the fullwidth uppercase Latin letters A
9883      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9884      *     and its code is less than
9885      *     {@code radix + '\u005CuFF21' - 10}.
9886      *     In this case, {@code ch - '\u005CuFF21' + 10}
9887      *     is returned.
9888      * <li>The character is one of the fullwidth lowercase Latin letters a
9889      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9890      *     and its code is less than
9891      *     {@code radix + '\u005CuFF41' - 10}.
9892      *     In this case, {@code ch - '\u005CuFF41' + 10}
9893      *     is returned.
9894      * </ul>
9895      *
9896      * <p><b>Note:</b> This method cannot handle <a
9897      * href="#supplementary"> supplementary characters</a>. To support
9898      * all Unicode characters, including supplementary characters, use
9899      * the {@link #digit(int, int)} method.
9900      *
9901      * @param   ch      the character to be converted.
9902      * @param   radix   the radix.
9903      * @return  the numeric value represented by the character in the
9904      *          specified radix.
9905      * @see     Character#forDigit(int, int)
9906      * @see     Character#isDigit(char)
9907      */
9908     public static int digit(char ch, int radix) {
9909         return digit((int)ch, radix);
9910     }
9911 
9912     /**
9913      * Returns the numeric value of the specified character (Unicode
9914      * code point) in the specified radix.
9915      *
9916      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
9917      * {@code radix} &le; {@code MAX_RADIX} or if the
9918      * character is not a valid digit in the specified
9919      * radix, {@code -1} is returned. A character is a valid digit
9920      * if at least one of the following is true:
9921      * <ul>
9922      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
9923      *     and the Unicode decimal digit value of the character (or its
9924      *     single-character decomposition) is less than the specified radix.
9925      *     In this case the decimal digit value is returned.
9926      * <li>The character is one of the uppercase Latin letters
9927      *     {@code 'A'} through {@code 'Z'} and its code is less than
9928      *     {@code radix + 'A' - 10}.
9929      *     In this case, {@code codePoint - 'A' + 10}
9930      *     is returned.
9931      * <li>The character is one of the lowercase Latin letters
9932      *     {@code 'a'} through {@code 'z'} and its code is less than
9933      *     {@code radix + 'a' - 10}.
9934      *     In this case, {@code codePoint - 'a' + 10}
9935      *     is returned.
9936      * <li>The character is one of the fullwidth uppercase Latin letters A
9937      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9938      *     and its code is less than
9939      *     {@code radix + '\u005CuFF21' - 10}.
9940      *     In this case,
9941      *     {@code codePoint - '\u005CuFF21' + 10}
9942      *     is returned.
9943      * <li>The character is one of the fullwidth lowercase Latin letters a
9944      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9945      *     and its code is less than
9946      *     {@code radix + '\u005CuFF41'- 10}.
9947      *     In this case,
9948      *     {@code codePoint - '\u005CuFF41' + 10}
9949      *     is returned.
9950      * </ul>
9951      *
9952      * @param   codePoint the character (Unicode code point) to be converted.
9953      * @param   radix   the radix.
9954      * @return  the numeric value represented by the character in the
9955      *          specified radix.
9956      * @see     Character#forDigit(int, int)
9957      * @see     Character#isDigit(int)
9958      * @since   1.5
9959      */
9960     public static int digit(int codePoint, int radix) {
9961         return CharacterData.of(codePoint).digit(codePoint, radix);
9962     }
9963 
9964     /**
9965      * Returns the {@code int} value that the specified Unicode
9966      * character represents. For example, the character
9967      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
9968      * an int with a value of 50.
9969      * <p>
9970      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9971      * {@code '\u005Cu005A'}), lowercase
9972      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9973      * full width variant ({@code '\u005CuFF21'} through
9974      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9975      * {@code '\u005CuFF5A'}) forms have numeric values from 10
9976      * through 35. This is independent of the Unicode specification,
9977      * which does not assign numeric values to these {@code char}
9978      * values.
9979      * <p>
9980      * If the character does not have a numeric value, then -1 is returned.
9981      * If the character has a numeric value that cannot be represented as a
9982      * nonnegative integer (for example, a fractional value), then -2
9983      * is returned.
9984      *
9985      * <p><b>Note:</b> This method cannot handle <a
9986      * href="#supplementary"> supplementary characters</a>. To support
9987      * all Unicode characters, including supplementary characters, use
9988      * the {@link #getNumericValue(int)} method.
9989      *
9990      * @param   ch      the character to be converted.
9991      * @return  the numeric value of the character, as a nonnegative {@code int}
9992      *          value; -2 if the character has a numeric value but the value
9993      *          can not be represented as a nonnegative {@code int} value;
9994      *          -1 if the character has no numeric value.
9995      * @see     Character#forDigit(int, int)
9996      * @see     Character#isDigit(char)
9997      * @since   1.1
9998      */
9999     public static int getNumericValue(char ch) {
10000         return getNumericValue((int)ch);
10001     }
10002 
10003     /**
10004      * Returns the {@code int} value that the specified
10005      * character (Unicode code point) represents. For example, the character
10006      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
10007      * an {@code int} with a value of 50.
10008      * <p>
10009      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10010      * {@code '\u005Cu005A'}), lowercase
10011      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10012      * full width variant ({@code '\u005CuFF21'} through
10013      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10014      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10015      * through 35. This is independent of the Unicode specification,
10016      * which does not assign numeric values to these {@code char}
10017      * values.
10018      * <p>
10019      * If the character does not have a numeric value, then -1 is returned.
10020      * If the character has a numeric value that cannot be represented as a
10021      * nonnegative integer (for example, a fractional value), then -2
10022      * is returned.
10023      *
10024      * @param   codePoint the character (Unicode code point) to be converted.
10025      * @return  the numeric value of the character, as a nonnegative {@code int}
10026      *          value; -2 if the character has a numeric value but the value
10027      *          can not be represented as a nonnegative {@code int} value;
10028      *          -1 if the character has no numeric value.
10029      * @see     Character#forDigit(int, int)
10030      * @see     Character#isDigit(int)
10031      * @since   1.5
10032      */
10033     public static int getNumericValue(int codePoint) {
10034         return CharacterData.of(codePoint).getNumericValue(codePoint);
10035     }
10036 
10037     /**
10038      * Determines if the specified character is ISO-LATIN-1 white space.
10039      * This method returns {@code true} for the following five
10040      * characters only:
10041      * <table class="striped">
10042      * <caption style="display:none">truechars</caption>
10043      * <thead>
10044      * <tr><th scope="col">Character
10045      *     <th scope="col">Code
10046      *     <th scope="col">Name
10047      * </thead>
10048      * <tbody>
10049      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
10050      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
10051      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
10052      *     <td>{@code NEW LINE}</td></tr>
10053      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
10054      *     <td>{@code FORM FEED}</td></tr>
10055      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
10056      *     <td>{@code CARRIAGE RETURN}</td></tr>
10057      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
10058      *     <td>{@code SPACE}</td></tr>
10059      * </tbody>
10060      * </table>
10061      *
10062      * @param      ch   the character to be tested.
10063      * @return     {@code true} if the character is ISO-LATIN-1 white
10064      *             space; {@code false} otherwise.
10065      * @see        Character#isSpaceChar(char)
10066      * @see        Character#isWhitespace(char)
10067      * @deprecated Replaced by isWhitespace(char).
10068      */
10069     @Deprecated(since="1.1")
10070     public static boolean isSpace(char ch) {
10071         return (ch <= 0x0020) &&
10072             (((((1L << 0x0009) |
10073             (1L << 0x000A) |
10074             (1L << 0x000C) |
10075             (1L << 0x000D) |
10076             (1L << 0x0020)) >> ch) & 1L) != 0);
10077     }
10078 
10079 
10080     /**
10081      * Determines if the specified character is a Unicode space character.
10082      * A character is considered to be a space character if and only if
10083      * it is specified to be a space character by the Unicode Standard. This
10084      * method returns true if the character's general category type is any of
10085      * the following:
10086      * <ul>
10087      * <li> {@code SPACE_SEPARATOR}
10088      * <li> {@code LINE_SEPARATOR}
10089      * <li> {@code PARAGRAPH_SEPARATOR}
10090      * </ul>
10091      *
10092      * <p><b>Note:</b> This method cannot handle <a
10093      * href="#supplementary"> supplementary characters</a>. To support
10094      * all Unicode characters, including supplementary characters, use
10095      * the {@link #isSpaceChar(int)} method.
10096      *
10097      * @param   ch      the character to be tested.
10098      * @return  {@code true} if the character is a space character;
10099      *          {@code false} otherwise.
10100      * @see     Character#isWhitespace(char)
10101      * @since   1.1
10102      */
10103     public static boolean isSpaceChar(char ch) {
10104         return isSpaceChar((int)ch);
10105     }
10106 
10107     /**
10108      * Determines if the specified character (Unicode code point) is a
10109      * Unicode space character.  A character is considered to be a
10110      * space character if and only if it is specified to be a space
10111      * character by the Unicode Standard. This method returns true if
10112      * the character's general category type is any of the following:
10113      *
10114      * <ul>
10115      * <li> {@link #SPACE_SEPARATOR}
10116      * <li> {@link #LINE_SEPARATOR}
10117      * <li> {@link #PARAGRAPH_SEPARATOR}
10118      * </ul>
10119      *
10120      * @param   codePoint the character (Unicode code point) to be tested.
10121      * @return  {@code true} if the character is a space character;
10122      *          {@code false} otherwise.
10123      * @see     Character#isWhitespace(int)
10124      * @since   1.5
10125      */
10126     public static boolean isSpaceChar(int codePoint) {
10127         return ((((1 << Character.SPACE_SEPARATOR) |
10128                   (1 << Character.LINE_SEPARATOR) |
10129                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10130             != 0;
10131     }
10132 
10133     /**
10134      * Determines if the specified character is white space according to Java.
10135      * A character is a Java whitespace character if and only if it satisfies
10136      * one of the following criteria:
10137      * <ul>
10138      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10139      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10140      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10141      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10142      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10143      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10144      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10145      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10146      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10147      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10148      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10149      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10150      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10151      * </ul>
10152      *
10153      * <p><b>Note:</b> This method cannot handle <a
10154      * href="#supplementary"> supplementary characters</a>. To support
10155      * all Unicode characters, including supplementary characters, use
10156      * the {@link #isWhitespace(int)} method.
10157      *
10158      * @param   ch the character to be tested.
10159      * @return  {@code true} if the character is a Java whitespace
10160      *          character; {@code false} otherwise.
10161      * @see     Character#isSpaceChar(char)
10162      * @since   1.1
10163      */
10164     public static boolean isWhitespace(char ch) {
10165         return isWhitespace((int)ch);
10166     }
10167 
10168     /**
10169      * Determines if the specified character (Unicode code point) is
10170      * white space according to Java.  A character is a Java
10171      * whitespace character if and only if it satisfies one of the
10172      * following criteria:
10173      * <ul>
10174      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10175      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10176      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
10177      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10178      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10179      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10180      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10181      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10182      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10183      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10184      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10185      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10186      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10187      * </ul>
10188      *
10189      * @param   codePoint the character (Unicode code point) to be tested.
10190      * @return  {@code true} if the character is a Java whitespace
10191      *          character; {@code false} otherwise.
10192      * @see     Character#isSpaceChar(int)
10193      * @since   1.5
10194      */
10195     public static boolean isWhitespace(int codePoint) {
10196         return CharacterData.of(codePoint).isWhitespace(codePoint);
10197     }
10198 
10199     /**
10200      * Determines if the specified character is an ISO control
10201      * character.  A character is considered to be an ISO control
10202      * character if its code is in the range {@code '\u005Cu0000'}
10203      * through {@code '\u005Cu001F'} or in the range
10204      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10205      *
10206      * <p><b>Note:</b> This method cannot handle <a
10207      * href="#supplementary"> supplementary characters</a>. To support
10208      * all Unicode characters, including supplementary characters, use
10209      * the {@link #isISOControl(int)} method.
10210      *
10211      * @param   ch      the character to be tested.
10212      * @return  {@code true} if the character is an ISO control character;
10213      *          {@code false} otherwise.
10214      *
10215      * @see     Character#isSpaceChar(char)
10216      * @see     Character#isWhitespace(char)
10217      * @since   1.1
10218      */
10219     public static boolean isISOControl(char ch) {
10220         return isISOControl((int)ch);
10221     }
10222 
10223     /**
10224      * Determines if the referenced character (Unicode code point) is an ISO control
10225      * character.  A character is considered to be an ISO control
10226      * character if its code is in the range {@code '\u005Cu0000'}
10227      * through {@code '\u005Cu001F'} or in the range
10228      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10229      *
10230      * @param   codePoint the character (Unicode code point) to be tested.
10231      * @return  {@code true} if the character is an ISO control character;
10232      *          {@code false} otherwise.
10233      * @see     Character#isSpaceChar(int)
10234      * @see     Character#isWhitespace(int)
10235      * @since   1.5
10236      */
10237     public static boolean isISOControl(int codePoint) {
10238         // Optimized form of:
10239         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
10240         //     (codePoint >= 0x7F && codePoint <= 0x9F);
10241         return codePoint <= 0x9F &&
10242             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10243     }
10244 
10245     /**
10246      * Returns a value indicating a character's general category.
10247      *
10248      * <p><b>Note:</b> This method cannot handle <a
10249      * href="#supplementary"> supplementary characters</a>. To support
10250      * all Unicode characters, including supplementary characters, use
10251      * the {@link #getType(int)} method.
10252      *
10253      * @param   ch      the character to be tested.
10254      * @return  a value of type {@code int} representing the
10255      *          character's general category.
10256      * @see     Character#COMBINING_SPACING_MARK
10257      * @see     Character#CONNECTOR_PUNCTUATION
10258      * @see     Character#CONTROL
10259      * @see     Character#CURRENCY_SYMBOL
10260      * @see     Character#DASH_PUNCTUATION
10261      * @see     Character#DECIMAL_DIGIT_NUMBER
10262      * @see     Character#ENCLOSING_MARK
10263      * @see     Character#END_PUNCTUATION
10264      * @see     Character#FINAL_QUOTE_PUNCTUATION
10265      * @see     Character#FORMAT
10266      * @see     Character#INITIAL_QUOTE_PUNCTUATION
10267      * @see     Character#LETTER_NUMBER
10268      * @see     Character#LINE_SEPARATOR
10269      * @see     Character#LOWERCASE_LETTER
10270      * @see     Character#MATH_SYMBOL
10271      * @see     Character#MODIFIER_LETTER
10272      * @see     Character#MODIFIER_SYMBOL
10273      * @see     Character#NON_SPACING_MARK
10274      * @see     Character#OTHER_LETTER
10275      * @see     Character#OTHER_NUMBER
10276      * @see     Character#OTHER_PUNCTUATION
10277      * @see     Character#OTHER_SYMBOL
10278      * @see     Character#PARAGRAPH_SEPARATOR
10279      * @see     Character#PRIVATE_USE
10280      * @see     Character#SPACE_SEPARATOR
10281      * @see     Character#START_PUNCTUATION
10282      * @see     Character#SURROGATE
10283      * @see     Character#TITLECASE_LETTER
10284      * @see     Character#UNASSIGNED
10285      * @see     Character#UPPERCASE_LETTER
10286      * @since   1.1
10287      */
10288     public static int getType(char ch) {
10289         return getType((int)ch);
10290     }
10291 
10292     /**
10293      * Returns a value indicating a character's general category.
10294      *
10295      * @param   codePoint the character (Unicode code point) to be tested.
10296      * @return  a value of type {@code int} representing the
10297      *          character's general category.
10298      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10299      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10300      * @see     Character#CONTROL CONTROL
10301      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10302      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
10303      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10304      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
10305      * @see     Character#END_PUNCTUATION END_PUNCTUATION
10306      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10307      * @see     Character#FORMAT FORMAT
10308      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10309      * @see     Character#LETTER_NUMBER LETTER_NUMBER
10310      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
10311      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
10312      * @see     Character#MATH_SYMBOL MATH_SYMBOL
10313      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
10314      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10315      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
10316      * @see     Character#OTHER_LETTER OTHER_LETTER
10317      * @see     Character#OTHER_NUMBER OTHER_NUMBER
10318      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10319      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
10320      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10321      * @see     Character#PRIVATE_USE PRIVATE_USE
10322      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
10323      * @see     Character#START_PUNCTUATION START_PUNCTUATION
10324      * @see     Character#SURROGATE SURROGATE
10325      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
10326      * @see     Character#UNASSIGNED UNASSIGNED
10327      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
10328      * @since   1.5
10329      */
10330     public static int getType(int codePoint) {
10331         return CharacterData.of(codePoint).getType(codePoint);
10332     }
10333 
10334     /**
10335      * Determines the character representation for a specific digit in
10336      * the specified radix. If the value of {@code radix} is not a
10337      * valid radix, or the value of {@code digit} is not a valid
10338      * digit in the specified radix, the null character
10339      * ({@code '\u005Cu0000'}) is returned.
10340      * <p>
10341      * The {@code radix} argument is valid if it is greater than or
10342      * equal to {@code MIN_RADIX} and less than or equal to
10343      * {@code MAX_RADIX}. The {@code digit} argument is valid if
10344      * {@code 0 <= digit < radix}.
10345      * <p>
10346      * If the digit is less than 10, then
10347      * {@code '0' + digit} is returned. Otherwise, the value
10348      * {@code 'a' + digit - 10} is returned.
10349      *
10350      * @param   digit   the number to convert to a character.
10351      * @param   radix   the radix.
10352      * @return  the {@code char} representation of the specified digit
10353      *          in the specified radix.
10354      * @see     Character#MIN_RADIX
10355      * @see     Character#MAX_RADIX
10356      * @see     Character#digit(char, int)
10357      */
10358     public static char forDigit(int digit, int radix) {
10359         if ((digit >= radix) || (digit < 0)) {
10360             return '\0';
10361         }
10362         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10363             return '\0';
10364         }
10365         if (digit < 10) {
10366             return (char)('0' + digit);
10367         }
10368         return (char)('a' - 10 + digit);
10369     }
10370 
10371     /**
10372      * Returns the Unicode directionality property for the given
10373      * character.  Character directionality is used to calculate the
10374      * visual ordering of text. The directionality value of undefined
10375      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10376      *
10377      * <p><b>Note:</b> This method cannot handle <a
10378      * href="#supplementary"> supplementary characters</a>. To support
10379      * all Unicode characters, including supplementary characters, use
10380      * the {@link #getDirectionality(int)} method.
10381      *
10382      * @param  ch {@code char} for which the directionality property
10383      *            is requested.
10384      * @return the directionality property of the {@code char} value.
10385      *
10386      * @see Character#DIRECTIONALITY_UNDEFINED
10387      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10388      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10389      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10390      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10391      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10392      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10393      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10394      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10395      * @see Character#DIRECTIONALITY_NONSPACING_MARK
10396      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10397      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10398      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10399      * @see Character#DIRECTIONALITY_WHITESPACE
10400      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10401      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10402      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10403      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10404      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10405      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10406      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10407      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10408      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10409      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10410      * @since 1.4
10411      */
10412     public static byte getDirectionality(char ch) {
10413         return getDirectionality((int)ch);
10414     }
10415 
10416     /**
10417      * Returns the Unicode directionality property for the given
10418      * character (Unicode code point).  Character directionality is
10419      * used to calculate the visual ordering of text. The
10420      * directionality value of undefined character is {@link
10421      * #DIRECTIONALITY_UNDEFINED}.
10422      *
10423      * @param   codePoint the character (Unicode code point) for which
10424      *          the directionality property is requested.
10425      * @return the directionality property of the character.
10426      *
10427      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10428      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10429      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10430      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10431      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10432      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10433      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10434      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10435      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10436      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10437      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10438      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10439      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10440      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10441      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10442      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10443      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10444      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10445      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10446      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10447      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10448      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10449      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10450      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10451      * @since    1.5
10452      */
10453     public static byte getDirectionality(int codePoint) {
10454         return CharacterData.of(codePoint).getDirectionality(codePoint);
10455     }
10456 
10457     /**
10458      * Determines whether the character is mirrored according to the
10459      * Unicode specification.  Mirrored characters should have their
10460      * glyphs horizontally mirrored when displayed in text that is
10461      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
10462      * PARENTHESIS is semantically defined to be an <i>opening
10463      * parenthesis</i>.  This will appear as a "(" in text that is
10464      * left-to-right but as a ")" in text that is right-to-left.
10465      *
10466      * <p><b>Note:</b> This method cannot handle <a
10467      * href="#supplementary"> supplementary characters</a>. To support
10468      * all Unicode characters, including supplementary characters, use
10469      * the {@link #isMirrored(int)} method.
10470      *
10471      * @param  ch {@code char} for which the mirrored property is requested
10472      * @return {@code true} if the char is mirrored, {@code false}
10473      *         if the {@code char} is not mirrored or is not defined.
10474      * @since 1.4
10475      */
10476     public static boolean isMirrored(char ch) {
10477         return isMirrored((int)ch);
10478     }
10479 
10480     /**
10481      * Determines whether the specified character (Unicode code point)
10482      * is mirrored according to the Unicode specification.  Mirrored
10483      * characters should have their glyphs horizontally mirrored when
10484      * displayed in text that is right-to-left.  For example,
10485      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10486      * defined to be an <i>opening parenthesis</i>.  This will appear
10487      * as a "(" in text that is left-to-right but as a ")" in text
10488      * that is right-to-left.
10489      *
10490      * @param   codePoint the character (Unicode code point) to be tested.
10491      * @return  {@code true} if the character is mirrored, {@code false}
10492      *          if the character is not mirrored or is not defined.
10493      * @since   1.5
10494      */
10495     public static boolean isMirrored(int codePoint) {
10496         return CharacterData.of(codePoint).isMirrored(codePoint);
10497     }
10498 
10499     /**
10500      * Compares two {@code Character} objects numerically.
10501      *
10502      * @param   anotherCharacter   the {@code Character} to be compared.
10503 
10504      * @return  the value {@code 0} if the argument {@code Character}
10505      *          is equal to this {@code Character}; a value less than
10506      *          {@code 0} if this {@code Character} is numerically less
10507      *          than the {@code Character} argument; and a value greater than
10508      *          {@code 0} if this {@code Character} is numerically greater
10509      *          than the {@code Character} argument (unsigned comparison).
10510      *          Note that this is strictly a numerical comparison; it is not
10511      *          locale-dependent.
10512      * @since   1.2
10513      */
10514     public int compareTo(Character anotherCharacter) {
10515         return compare(this.value, anotherCharacter.value);
10516     }
10517 
10518     /**
10519      * Compares two {@code char} values numerically.
10520      * The value returned is identical to what would be returned by:
10521      * <pre>
10522      *    Character.valueOf(x).compareTo(Character.valueOf(y))
10523      * </pre>
10524      *
10525      * @param  x the first {@code char} to compare
10526      * @param  y the second {@code char} to compare
10527      * @return the value {@code 0} if {@code x == y};
10528      *         a value less than {@code 0} if {@code x < y}; and
10529      *         a value greater than {@code 0} if {@code x > y}
10530      * @since 1.7
10531      */
10532     public static int compare(char x, char y) {
10533         return x - y;
10534     }
10535 
10536     /**
10537      * Converts the character (Unicode code point) argument to uppercase using
10538      * information from the UnicodeData file.
10539      *
10540      * @param   codePoint   the character (Unicode code point) to be converted.
10541      * @return  either the uppercase equivalent of the character, if
10542      *          any, or an error flag ({@code Character.ERROR})
10543      *          that indicates that a 1:M {@code char} mapping exists.
10544      * @see     Character#isLowerCase(char)
10545      * @see     Character#isUpperCase(char)
10546      * @see     Character#toLowerCase(char)
10547      * @see     Character#toTitleCase(char)
10548      * @since 1.4
10549      */
10550     static int toUpperCaseEx(int codePoint) {
10551         assert isValidCodePoint(codePoint);
10552         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
10553     }
10554 
10555     /**
10556      * Converts the character (Unicode code point) argument to uppercase using case
10557      * mapping information from the SpecialCasing file in the Unicode
10558      * specification. If a character has no explicit uppercase
10559      * mapping, then the {@code char} itself is returned in the
10560      * {@code char[]}.
10561      *
10562      * @param   codePoint   the character (Unicode code point) to be converted.
10563      * @return a {@code char[]} with the uppercased character.
10564      * @since 1.4
10565      */
10566     static char[] toUpperCaseCharArray(int codePoint) {
10567         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
10568         assert isBmpCodePoint(codePoint);
10569         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
10570     }
10571 
10572     /**
10573      * The number of bits used to represent a {@code char} value in unsigned
10574      * binary form, constant {@code 16}.
10575      *
10576      * @since 1.5
10577      */
10578     public static final int SIZE = 16;
10579 
10580     /**
10581      * The number of bytes used to represent a {@code char} value in unsigned
10582      * binary form.
10583      *
10584      * @since 1.8
10585      */
10586     public static final int BYTES = SIZE / Byte.SIZE;
10587 
10588     /**
10589      * Returns the value obtained by reversing the order of the bytes in the
10590      * specified {@code char} value.
10591      *
10592      * @param ch The {@code char} of which to reverse the byte order.
10593      * @return the value obtained by reversing (or, equivalently, swapping)
10594      *     the bytes in the specified {@code char} value.
10595      * @since 1.5
10596      */
10597     @HotSpotIntrinsicCandidate
10598     public static char reverseBytes(char ch) {
10599         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
10600     }
10601 
10602     /**
10603      * Returns the Unicode name of the specified character
10604      * {@code codePoint}, or null if the code point is
10605      * {@link #UNASSIGNED unassigned}.
10606      * <p>
10607      * Note: if the specified character is not assigned a name by
10608      * the <i>UnicodeData</i> file (part of the Unicode Character
10609      * Database maintained by the Unicode Consortium), the returned
10610      * name is the same as the result of expression.
10611      *
10612      * <blockquote>{@code
10613      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10614      *     + " "
10615      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10616      *
10617      * }</blockquote>
10618      *
10619      * @param  codePoint the character (Unicode code point)
10620      *
10621      * @return the Unicode name of the specified character, or null if
10622      *         the code point is unassigned.
10623      *
10624      * @throws IllegalArgumentException if the specified
10625      *            {@code codePoint} is not a valid Unicode
10626      *            code point.
10627      *
10628      * @since 1.7
10629      */
10630     public static String getName(int codePoint) {
10631         if (!isValidCodePoint(codePoint)) {
10632             throw new IllegalArgumentException(
10633                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10634         }
10635         String name = CharacterName.getInstance().getName(codePoint);
10636         if (name != null)
10637             return name;
10638         if (getType(codePoint) == UNASSIGNED)
10639             return null;
10640         UnicodeBlock block = UnicodeBlock.of(codePoint);
10641         if (block != null)
10642             return block.toString().replace('_', ' ') + " "
10643                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10644         // should never come here
10645         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10646     }
10647 
10648     /**
10649      * Returns the code point value of the Unicode character specified by
10650      * the given Unicode character name.
10651      * <p>
10652      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
10653      * file (part of the Unicode Character Database maintained by the Unicode
10654      * Consortium), its name is defined as the result of expression
10655      *
10656      * <blockquote>{@code
10657      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10658      *     + " "
10659      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10660      *
10661      * }</blockquote>
10662      * <p>
10663      * The {@code name} matching is case insensitive, with any leading and
10664      * trailing whitespace character removed.
10665      *
10666      * @param  name the Unicode character name
10667      *
10668      * @return the code point value of the character specified by its name.
10669      *
10670      * @throws IllegalArgumentException if the specified {@code name}
10671      *         is not a valid Unicode character name.
10672      * @throws NullPointerException if {@code name} is {@code null}
10673      *
10674      * @since 9
10675      */
10676     public static int codePointOf(String name) {
10677         name = name.trim().toUpperCase(Locale.ROOT);
10678         int cp = CharacterName.getInstance().getCodePoint(name);
10679         if (cp != -1)
10680             return cp;
10681         try {
10682             int off = name.lastIndexOf(' ');
10683             if (off != -1) {
10684                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
10685                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
10686                     return cp;
10687             }
10688         } catch (Exception x) {}
10689         throw new IllegalArgumentException("Unrecognized character name :" + name);
10690     }
10691 }