1 /* 2 * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 import jdk.internal.HotSpotIntrinsicCandidate; 34 import jdk.internal.misc.VM; 35 36 /** 37 * The {@code Character} class wraps a value of the primitive 38 * type {@code char} in an object. An object of type 39 * {@code Character} contains a single field whose type is 40 * {@code char}. 41 * <p> 42 * In addition, this class provides several methods for determining 43 * a character's category (lowercase letter, digit, etc.) and for converting 44 * characters from uppercase to lowercase and vice versa. 45 * <p> 46 * Character information is based on <a id="UnicodeVer">the Unicode Standard, 47 * version 11.0.0</a>. Additional currency symbols (and Japanese Era Square 48 * character) defined subsequent to that Unicode version may be present. 49 * <p> 50 * The methods and data of class {@code Character} are defined by 51 * the information in the <i>UnicodeData</i> file that is part of the 52 * Unicode Character Database maintained by the Unicode 53 * Consortium. This file specifies various properties including name 54 * and general category for every defined Unicode code point or 55 * character range. 56 * <p> 57 * The file and its description are available from the Unicode Consortium at: 58 * <ul> 59 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 60 * </ul> 61 * <p> 62 * The code point, U+32FF, is reserved by the Unicode Consortium 63 * to represent the Japanese square character for the new era that begins 64 * May 2019. Relevant methods in the Character class return the same 65 * properties as for the existing Japanese era characters (e.g., U+337E for 66 * "Meizi"). For the details of the code point, refer to 67 * <a href="http://blog.unicode.org/2018/09/new-japanese-era.html"> 68 * http://blog.unicode.org/2018/09/new-japanese-era.html</a>. 69 * <p> 70 * @implSpec The code points in {@link Character.UnicodeBlock#CURRENCY_SYMBOLS 71 * Currency Symbols} {@code UnicodeBlock} that are unassigned as of the 72 * <a href="#UnicodeVer">Unicode version noted above</a>, 73 * may be defined for currency symbols assigned by the Unicode 74 * Consortium from later updates. The definition of additionally assigned 75 * code points is implementation specific. 76 * 77 * <h3><a id="unicode">Unicode Character Representations</a></h3> 78 * 79 * <p>The {@code char} data type (and therefore the value that a 80 * {@code Character} object encapsulates) are based on the 81 * original Unicode specification, which defined characters as 82 * fixed-width 16-bit entities. The Unicode Standard has since been 83 * changed to allow for characters whose representation requires more 84 * than 16 bits. The range of legal <em>code point</em>s is now 85 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 86 * (Refer to the <a 87 * href="http://www.unicode.org/reports/tr27/#notation"><i> 88 * definition</i></a> of the U+<i>n</i> notation in the Unicode 89 * Standard.) 90 * 91 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 92 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 93 * <a id="supplementary">Characters</a> whose code points are greater 94 * than U+FFFF are called <em>supplementary character</em>s. The Java 95 * platform uses the UTF-16 representation in {@code char} arrays and 96 * in the {@code String} and {@code StringBuffer} classes. In 97 * this representation, supplementary characters are represented as a pair 98 * of {@code char} values, the first from the <em>high-surrogates</em> 99 * range, (\uD800-\uDBFF), the second from the 100 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 101 * 102 * <p>A {@code char} value, therefore, represents Basic 103 * Multilingual Plane (BMP) code points, including the surrogate 104 * code points, or code units of the UTF-16 encoding. An 105 * {@code int} value represents all Unicode code points, 106 * including supplementary code points. The lower (least significant) 107 * 21 bits of {@code int} are used to represent Unicode code 108 * points and the upper (most significant) 11 bits must be zero. 109 * Unless otherwise specified, the behavior with respect to 110 * supplementary characters and surrogate {@code char} values is 111 * as follows: 112 * 113 * <ul> 114 * <li>The methods that only accept a {@code char} value cannot support 115 * supplementary characters. They treat {@code char} values from the 116 * surrogate ranges as undefined characters. For example, 117 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 118 * this specific value if followed by any low-surrogate value in a string 119 * would represent a letter. 120 * 121 * <li>The methods that accept an {@code int} value support all 122 * Unicode characters, including supplementary characters. For 123 * example, {@code Character.isLetter(0x2F81A)} returns 124 * {@code true} because the code point value represents a letter 125 * (a CJK ideograph). 126 * </ul> 127 * 128 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 129 * used for character values in the range between U+0000 and U+10FFFF, 130 * and <em>Unicode code unit</em> is used for 16-bit 131 * {@code char} values that are code units of the <em>UTF-16</em> 132 * encoding. For more information on Unicode terminology, refer to the 133 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 134 * 135 * @author Lee Boynton 136 * @author Guy Steele 137 * @author Akira Tanaka 138 * @author Martin Buchholz 139 * @author Ulf Zibis 140 * @since 1.0 141 */ 142 public final 143 class Character implements java.io.Serializable, Comparable<Character> { 144 /** 145 * The minimum radix available for conversion to and from strings. 146 * The constant value of this field is the smallest value permitted 147 * for the radix argument in radix-conversion methods such as the 148 * {@code digit} method, the {@code forDigit} method, and the 149 * {@code toString} method of class {@code Integer}. 150 * 151 * @see Character#digit(char, int) 152 * @see Character#forDigit(int, int) 153 * @see Integer#toString(int, int) 154 * @see Integer#valueOf(String) 155 */ 156 public static final int MIN_RADIX = 2; 157 158 /** 159 * The maximum radix available for conversion to and from strings. 160 * The constant value of this field is the largest value permitted 161 * for the radix argument in radix-conversion methods such as the 162 * {@code digit} method, the {@code forDigit} method, and the 163 * {@code toString} method of class {@code Integer}. 164 * 165 * @see Character#digit(char, int) 166 * @see Character#forDigit(int, int) 167 * @see Integer#toString(int, int) 168 * @see Integer#valueOf(String) 169 */ 170 public static final int MAX_RADIX = 36; 171 172 /** 173 * The constant value of this field is the smallest value of type 174 * {@code char}, {@code '\u005Cu0000'}. 175 * 176 * @since 1.0.2 177 */ 178 public static final char MIN_VALUE = '\u0000'; 179 180 /** 181 * The constant value of this field is the largest value of type 182 * {@code char}, {@code '\u005CuFFFF'}. 183 * 184 * @since 1.0.2 185 */ 186 public static final char MAX_VALUE = '\uFFFF'; 187 188 /** 189 * The {@code Class} instance representing the primitive type 190 * {@code char}. 191 * 192 * @since 1.1 193 */ 194 @SuppressWarnings("unchecked") 195 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 196 197 /* 198 * Normative general types 199 */ 200 201 /* 202 * General character types 203 */ 204 205 /** 206 * General category "Cn" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte UNASSIGNED = 0; 210 211 /** 212 * General category "Lu" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte UPPERCASE_LETTER = 1; 216 217 /** 218 * General category "Ll" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte LOWERCASE_LETTER = 2; 222 223 /** 224 * General category "Lt" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte TITLECASE_LETTER = 3; 228 229 /** 230 * General category "Lm" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte MODIFIER_LETTER = 4; 234 235 /** 236 * General category "Lo" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte OTHER_LETTER = 5; 240 241 /** 242 * General category "Mn" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte NON_SPACING_MARK = 6; 246 247 /** 248 * General category "Me" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte ENCLOSING_MARK = 7; 252 253 /** 254 * General category "Mc" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte COMBINING_SPACING_MARK = 8; 258 259 /** 260 * General category "Nd" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte DECIMAL_DIGIT_NUMBER = 9; 264 265 /** 266 * General category "Nl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LETTER_NUMBER = 10; 270 271 /** 272 * General category "No" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte OTHER_NUMBER = 11; 276 277 /** 278 * General category "Zs" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte SPACE_SEPARATOR = 12; 282 283 /** 284 * General category "Zl" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte LINE_SEPARATOR = 13; 288 289 /** 290 * General category "Zp" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PARAGRAPH_SEPARATOR = 14; 294 295 /** 296 * General category "Cc" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte CONTROL = 15; 300 301 /** 302 * General category "Cf" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte FORMAT = 16; 306 307 /** 308 * General category "Co" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte PRIVATE_USE = 18; 312 313 /** 314 * General category "Cs" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte SURROGATE = 19; 318 319 /** 320 * General category "Pd" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte DASH_PUNCTUATION = 20; 324 325 /** 326 * General category "Ps" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte START_PUNCTUATION = 21; 330 331 /** 332 * General category "Pe" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte END_PUNCTUATION = 22; 336 337 /** 338 * General category "Pc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CONNECTOR_PUNCTUATION = 23; 342 343 /** 344 * General category "Po" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte OTHER_PUNCTUATION = 24; 348 349 /** 350 * General category "Sm" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte MATH_SYMBOL = 25; 354 355 /** 356 * General category "Sc" in the Unicode specification. 357 * @since 1.1 358 */ 359 public static final byte CURRENCY_SYMBOL = 26; 360 361 /** 362 * General category "Sk" in the Unicode specification. 363 * @since 1.1 364 */ 365 public static final byte MODIFIER_SYMBOL = 27; 366 367 /** 368 * General category "So" in the Unicode specification. 369 * @since 1.1 370 */ 371 public static final byte OTHER_SYMBOL = 28; 372 373 /** 374 * General category "Pi" in the Unicode specification. 375 * @since 1.4 376 */ 377 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 378 379 /** 380 * General category "Pf" in the Unicode specification. 381 * @since 1.4 382 */ 383 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 384 385 /** 386 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 387 */ 388 static final int ERROR = 0xFFFFFFFF; 389 390 391 /** 392 * Undefined bidirectional character type. Undefined {@code char} 393 * values have undefined directionality in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_UNDEFINED = -1; 397 398 /** 399 * Strong bidirectional character type "L" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 403 404 /** 405 * Strong bidirectional character type "R" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 409 410 /** 411 * Strong bidirectional character type "AL" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 415 416 /** 417 * Weak bidirectional character type "EN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 421 422 /** 423 * Weak bidirectional character type "ES" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 427 428 /** 429 * Weak bidirectional character type "ET" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 433 434 /** 435 * Weak bidirectional character type "AN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 439 440 /** 441 * Weak bidirectional character type "CS" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 445 446 /** 447 * Weak bidirectional character type "NSM" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 451 452 /** 453 * Weak bidirectional character type "BN" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 457 458 /** 459 * Neutral bidirectional character type "B" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 463 464 /** 465 * Neutral bidirectional character type "S" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 469 470 /** 471 * Neutral bidirectional character type "WS" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_WHITESPACE = 12; 475 476 /** 477 * Neutral bidirectional character type "ON" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 481 482 /** 483 * Strong bidirectional character type "LRE" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 487 488 /** 489 * Strong bidirectional character type "LRO" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 493 494 /** 495 * Strong bidirectional character type "RLE" in the Unicode specification. 496 * @since 1.4 497 */ 498 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 499 500 /** 501 * Strong bidirectional character type "RLO" in the Unicode specification. 502 * @since 1.4 503 */ 504 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 505 506 /** 507 * Weak bidirectional character type "PDF" in the Unicode specification. 508 * @since 1.4 509 */ 510 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 511 512 /** 513 * Weak bidirectional character type "LRI" in the Unicode specification. 514 * @since 9 515 */ 516 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 517 518 /** 519 * Weak bidirectional character type "RLI" in the Unicode specification. 520 * @since 9 521 */ 522 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 523 524 /** 525 * Weak bidirectional character type "FSI" in the Unicode specification. 526 * @since 9 527 */ 528 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 529 530 /** 531 * Weak bidirectional character type "PDI" in the Unicode specification. 532 * @since 9 533 */ 534 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 535 536 /** 537 * The minimum value of a 538 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 539 * Unicode high-surrogate code unit</a> 540 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 541 * A high-surrogate is also known as a <i>leading-surrogate</i>. 542 * 543 * @since 1.5 544 */ 545 public static final char MIN_HIGH_SURROGATE = '\uD800'; 546 547 /** 548 * The maximum value of a 549 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 550 * Unicode high-surrogate code unit</a> 551 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 552 * A high-surrogate is also known as a <i>leading-surrogate</i>. 553 * 554 * @since 1.5 555 */ 556 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 557 558 /** 559 * The minimum value of a 560 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 561 * Unicode low-surrogate code unit</a> 562 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 563 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 564 * 565 * @since 1.5 566 */ 567 public static final char MIN_LOW_SURROGATE = '\uDC00'; 568 569 /** 570 * The maximum value of a 571 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 572 * Unicode low-surrogate code unit</a> 573 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 574 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 575 * 576 * @since 1.5 577 */ 578 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 579 580 /** 581 * The minimum value of a Unicode surrogate code unit in the 582 * UTF-16 encoding, constant {@code '\u005CuD800'}. 583 * 584 * @since 1.5 585 */ 586 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 587 588 /** 589 * The maximum value of a Unicode surrogate code unit in the 590 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 591 * 592 * @since 1.5 593 */ 594 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 595 596 /** 597 * The minimum value of a 598 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 599 * Unicode supplementary code point</a>, constant {@code U+10000}. 600 * 601 * @since 1.5 602 */ 603 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 604 605 /** 606 * The minimum value of a 607 * <a href="http://www.unicode.org/glossary/#code_point"> 608 * Unicode code point</a>, constant {@code U+0000}. 609 * 610 * @since 1.5 611 */ 612 public static final int MIN_CODE_POINT = 0x000000; 613 614 /** 615 * The maximum value of a 616 * <a href="http://www.unicode.org/glossary/#code_point"> 617 * Unicode code point</a>, constant {@code U+10FFFF}. 618 * 619 * @since 1.5 620 */ 621 public static final int MAX_CODE_POINT = 0X10FFFF; 622 623 624 /** 625 * Instances of this class represent particular subsets of the Unicode 626 * character set. The only family of subsets defined in the 627 * {@code Character} class is {@link Character.UnicodeBlock}. 628 * Other portions of the Java API may define other subsets for their 629 * own purposes. 630 * 631 * @since 1.2 632 */ 633 public static class Subset { 634 635 private String name; 636 637 /** 638 * Constructs a new {@code Subset} instance. 639 * 640 * @param name The name of this subset 641 * @throws NullPointerException if name is {@code null} 642 */ 643 protected Subset(String name) { 644 if (name == null) { 645 throw new NullPointerException("name"); 646 } 647 this.name = name; 648 } 649 650 /** 651 * Compares two {@code Subset} objects for equality. 652 * This method returns {@code true} if and only if 653 * {@code this} and the argument refer to the same 654 * object; since this method is {@code final}, this 655 * guarantee holds for all subclasses. 656 */ 657 public final boolean equals(Object obj) { 658 return (this == obj); 659 } 660 661 /** 662 * Returns the standard hash code as defined by the 663 * {@link Object#hashCode} method. This method 664 * is {@code final} in order to ensure that the 665 * {@code equals} and {@code hashCode} methods will 666 * be consistent in all subclasses. 667 */ 668 public final int hashCode() { 669 return super.hashCode(); 670 } 671 672 /** 673 * Returns the name of this subset. 674 */ 675 public final String toString() { 676 return name; 677 } 678 } 679 680 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 681 // for the latest specification of Unicode Blocks. 682 683 /** 684 * A family of character subsets representing the character blocks in the 685 * Unicode specification. Character blocks generally define characters 686 * used for a specific script or purpose. A character is contained by 687 * at most one Unicode block. 688 * 689 * @since 1.2 690 */ 691 public static final class UnicodeBlock extends Subset { 692 /** 693 * 667 - the expected number of entities 694 * 0.75 - the default load factor of HashMap 695 */ 696 private static final int NUM_ENTITIES = 667; 697 private static Map<String, UnicodeBlock> map = 698 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f)); 699 700 /** 701 * Creates a UnicodeBlock with the given identifier name. 702 * This name must be the same as the block identifier. 703 */ 704 private UnicodeBlock(String idName) { 705 super(idName); 706 map.put(idName, this); 707 } 708 709 /** 710 * Creates a UnicodeBlock with the given identifier name and 711 * alias name. 712 */ 713 private UnicodeBlock(String idName, String alias) { 714 this(idName); 715 map.put(alias, this); 716 } 717 718 /** 719 * Creates a UnicodeBlock with the given identifier name and 720 * alias names. 721 */ 722 private UnicodeBlock(String idName, String... aliases) { 723 this(idName); 724 for (String alias : aliases) 725 map.put(alias, this); 726 } 727 728 /** 729 * Constant for the "Basic Latin" Unicode character block. 730 * @since 1.2 731 */ 732 public static final UnicodeBlock BASIC_LATIN = 733 new UnicodeBlock("BASIC_LATIN", 734 "BASIC LATIN", 735 "BASICLATIN"); 736 737 /** 738 * Constant for the "Latin-1 Supplement" Unicode character block. 739 * @since 1.2 740 */ 741 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 742 new UnicodeBlock("LATIN_1_SUPPLEMENT", 743 "LATIN-1 SUPPLEMENT", 744 "LATIN-1SUPPLEMENT"); 745 746 /** 747 * Constant for the "Latin Extended-A" Unicode character block. 748 * @since 1.2 749 */ 750 public static final UnicodeBlock LATIN_EXTENDED_A = 751 new UnicodeBlock("LATIN_EXTENDED_A", 752 "LATIN EXTENDED-A", 753 "LATINEXTENDED-A"); 754 755 /** 756 * Constant for the "Latin Extended-B" Unicode character block. 757 * @since 1.2 758 */ 759 public static final UnicodeBlock LATIN_EXTENDED_B = 760 new UnicodeBlock("LATIN_EXTENDED_B", 761 "LATIN EXTENDED-B", 762 "LATINEXTENDED-B"); 763 764 /** 765 * Constant for the "IPA Extensions" Unicode character block. 766 * @since 1.2 767 */ 768 public static final UnicodeBlock IPA_EXTENSIONS = 769 new UnicodeBlock("IPA_EXTENSIONS", 770 "IPA EXTENSIONS", 771 "IPAEXTENSIONS"); 772 773 /** 774 * Constant for the "Spacing Modifier Letters" Unicode character block. 775 * @since 1.2 776 */ 777 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 778 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 779 "SPACING MODIFIER LETTERS", 780 "SPACINGMODIFIERLETTERS"); 781 782 /** 783 * Constant for the "Combining Diacritical Marks" Unicode character block. 784 * @since 1.2 785 */ 786 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 787 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 788 "COMBINING DIACRITICAL MARKS", 789 "COMBININGDIACRITICALMARKS"); 790 791 /** 792 * Constant for the "Greek and Coptic" Unicode character block. 793 * <p> 794 * This block was previously known as the "Greek" block. 795 * 796 * @since 1.2 797 */ 798 public static final UnicodeBlock GREEK = 799 new UnicodeBlock("GREEK", 800 "GREEK AND COPTIC", 801 "GREEKANDCOPTIC"); 802 803 /** 804 * Constant for the "Cyrillic" Unicode character block. 805 * @since 1.2 806 */ 807 public static final UnicodeBlock CYRILLIC = 808 new UnicodeBlock("CYRILLIC"); 809 810 /** 811 * Constant for the "Armenian" Unicode character block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock ARMENIAN = 815 new UnicodeBlock("ARMENIAN"); 816 817 /** 818 * Constant for the "Hebrew" Unicode character block. 819 * @since 1.2 820 */ 821 public static final UnicodeBlock HEBREW = 822 new UnicodeBlock("HEBREW"); 823 824 /** 825 * Constant for the "Arabic" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock ARABIC = 829 new UnicodeBlock("ARABIC"); 830 831 /** 832 * Constant for the "Devanagari" Unicode character block. 833 * @since 1.2 834 */ 835 public static final UnicodeBlock DEVANAGARI = 836 new UnicodeBlock("DEVANAGARI"); 837 838 /** 839 * Constant for the "Bengali" Unicode character block. 840 * @since 1.2 841 */ 842 public static final UnicodeBlock BENGALI = 843 new UnicodeBlock("BENGALI"); 844 845 /** 846 * Constant for the "Gurmukhi" Unicode character block. 847 * @since 1.2 848 */ 849 public static final UnicodeBlock GURMUKHI = 850 new UnicodeBlock("GURMUKHI"); 851 852 /** 853 * Constant for the "Gujarati" Unicode character block. 854 * @since 1.2 855 */ 856 public static final UnicodeBlock GUJARATI = 857 new UnicodeBlock("GUJARATI"); 858 859 /** 860 * Constant for the "Oriya" Unicode character block. 861 * @since 1.2 862 */ 863 public static final UnicodeBlock ORIYA = 864 new UnicodeBlock("ORIYA"); 865 866 /** 867 * Constant for the "Tamil" Unicode character block. 868 * @since 1.2 869 */ 870 public static final UnicodeBlock TAMIL = 871 new UnicodeBlock("TAMIL"); 872 873 /** 874 * Constant for the "Telugu" Unicode character block. 875 * @since 1.2 876 */ 877 public static final UnicodeBlock TELUGU = 878 new UnicodeBlock("TELUGU"); 879 880 /** 881 * Constant for the "Kannada" Unicode character block. 882 * @since 1.2 883 */ 884 public static final UnicodeBlock KANNADA = 885 new UnicodeBlock("KANNADA"); 886 887 /** 888 * Constant for the "Malayalam" Unicode character block. 889 * @since 1.2 890 */ 891 public static final UnicodeBlock MALAYALAM = 892 new UnicodeBlock("MALAYALAM"); 893 894 /** 895 * Constant for the "Thai" Unicode character block. 896 * @since 1.2 897 */ 898 public static final UnicodeBlock THAI = 899 new UnicodeBlock("THAI"); 900 901 /** 902 * Constant for the "Lao" Unicode character block. 903 * @since 1.2 904 */ 905 public static final UnicodeBlock LAO = 906 new UnicodeBlock("LAO"); 907 908 /** 909 * Constant for the "Tibetan" Unicode character block. 910 * @since 1.2 911 */ 912 public static final UnicodeBlock TIBETAN = 913 new UnicodeBlock("TIBETAN"); 914 915 /** 916 * Constant for the "Georgian" Unicode character block. 917 * @since 1.2 918 */ 919 public static final UnicodeBlock GEORGIAN = 920 new UnicodeBlock("GEORGIAN"); 921 922 /** 923 * Constant for the "Hangul Jamo" Unicode character block. 924 * @since 1.2 925 */ 926 public static final UnicodeBlock HANGUL_JAMO = 927 new UnicodeBlock("HANGUL_JAMO", 928 "HANGUL JAMO", 929 "HANGULJAMO"); 930 931 /** 932 * Constant for the "Latin Extended Additional" Unicode character block. 933 * @since 1.2 934 */ 935 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 936 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 937 "LATIN EXTENDED ADDITIONAL", 938 "LATINEXTENDEDADDITIONAL"); 939 940 /** 941 * Constant for the "Greek Extended" Unicode character block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock GREEK_EXTENDED = 945 new UnicodeBlock("GREEK_EXTENDED", 946 "GREEK EXTENDED", 947 "GREEKEXTENDED"); 948 949 /** 950 * Constant for the "General Punctuation" Unicode character block. 951 * @since 1.2 952 */ 953 public static final UnicodeBlock GENERAL_PUNCTUATION = 954 new UnicodeBlock("GENERAL_PUNCTUATION", 955 "GENERAL PUNCTUATION", 956 "GENERALPUNCTUATION"); 957 958 /** 959 * Constant for the "Superscripts and Subscripts" Unicode character 960 * block. 961 * @since 1.2 962 */ 963 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 964 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 965 "SUPERSCRIPTS AND SUBSCRIPTS", 966 "SUPERSCRIPTSANDSUBSCRIPTS"); 967 968 /** 969 * Constant for the "Currency Symbols" Unicode character block. 970 * @since 1.2 971 */ 972 public static final UnicodeBlock CURRENCY_SYMBOLS = 973 new UnicodeBlock("CURRENCY_SYMBOLS", 974 "CURRENCY SYMBOLS", 975 "CURRENCYSYMBOLS"); 976 977 /** 978 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 979 * character block. 980 * <p> 981 * This block was previously known as "Combining Marks for Symbols". 982 * @since 1.2 983 */ 984 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 985 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 986 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 987 "COMBININGDIACRITICALMARKSFORSYMBOLS", 988 "COMBINING MARKS FOR SYMBOLS", 989 "COMBININGMARKSFORSYMBOLS"); 990 991 /** 992 * Constant for the "Letterlike Symbols" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 996 new UnicodeBlock("LETTERLIKE_SYMBOLS", 997 "LETTERLIKE SYMBOLS", 998 "LETTERLIKESYMBOLS"); 999 1000 /** 1001 * Constant for the "Number Forms" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock NUMBER_FORMS = 1005 new UnicodeBlock("NUMBER_FORMS", 1006 "NUMBER FORMS", 1007 "NUMBERFORMS"); 1008 1009 /** 1010 * Constant for the "Arrows" Unicode character block. 1011 * @since 1.2 1012 */ 1013 public static final UnicodeBlock ARROWS = 1014 new UnicodeBlock("ARROWS"); 1015 1016 /** 1017 * Constant for the "Mathematical Operators" Unicode character block. 1018 * @since 1.2 1019 */ 1020 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1021 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1022 "MATHEMATICAL OPERATORS", 1023 "MATHEMATICALOPERATORS"); 1024 1025 /** 1026 * Constant for the "Miscellaneous Technical" Unicode character block. 1027 * @since 1.2 1028 */ 1029 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1030 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1031 "MISCELLANEOUS TECHNICAL", 1032 "MISCELLANEOUSTECHNICAL"); 1033 1034 /** 1035 * Constant for the "Control Pictures" Unicode character block. 1036 * @since 1.2 1037 */ 1038 public static final UnicodeBlock CONTROL_PICTURES = 1039 new UnicodeBlock("CONTROL_PICTURES", 1040 "CONTROL PICTURES", 1041 "CONTROLPICTURES"); 1042 1043 /** 1044 * Constant for the "Optical Character Recognition" Unicode character block. 1045 * @since 1.2 1046 */ 1047 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1048 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1049 "OPTICAL CHARACTER RECOGNITION", 1050 "OPTICALCHARACTERRECOGNITION"); 1051 1052 /** 1053 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1054 * @since 1.2 1055 */ 1056 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1057 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1058 "ENCLOSED ALPHANUMERICS", 1059 "ENCLOSEDALPHANUMERICS"); 1060 1061 /** 1062 * Constant for the "Box Drawing" Unicode character block. 1063 * @since 1.2 1064 */ 1065 public static final UnicodeBlock BOX_DRAWING = 1066 new UnicodeBlock("BOX_DRAWING", 1067 "BOX DRAWING", 1068 "BOXDRAWING"); 1069 1070 /** 1071 * Constant for the "Block Elements" Unicode character block. 1072 * @since 1.2 1073 */ 1074 public static final UnicodeBlock BLOCK_ELEMENTS = 1075 new UnicodeBlock("BLOCK_ELEMENTS", 1076 "BLOCK ELEMENTS", 1077 "BLOCKELEMENTS"); 1078 1079 /** 1080 * Constant for the "Geometric Shapes" Unicode character block. 1081 * @since 1.2 1082 */ 1083 public static final UnicodeBlock GEOMETRIC_SHAPES = 1084 new UnicodeBlock("GEOMETRIC_SHAPES", 1085 "GEOMETRIC SHAPES", 1086 "GEOMETRICSHAPES"); 1087 1088 /** 1089 * Constant for the "Miscellaneous Symbols" Unicode character block. 1090 * @since 1.2 1091 */ 1092 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1093 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1094 "MISCELLANEOUS SYMBOLS", 1095 "MISCELLANEOUSSYMBOLS"); 1096 1097 /** 1098 * Constant for the "Dingbats" Unicode character block. 1099 * @since 1.2 1100 */ 1101 public static final UnicodeBlock DINGBATS = 1102 new UnicodeBlock("DINGBATS"); 1103 1104 /** 1105 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1106 * @since 1.2 1107 */ 1108 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1109 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1110 "CJK SYMBOLS AND PUNCTUATION", 1111 "CJKSYMBOLSANDPUNCTUATION"); 1112 1113 /** 1114 * Constant for the "Hiragana" Unicode character block. 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock HIRAGANA = 1118 new UnicodeBlock("HIRAGANA"); 1119 1120 /** 1121 * Constant for the "Katakana" Unicode character block. 1122 * @since 1.2 1123 */ 1124 public static final UnicodeBlock KATAKANA = 1125 new UnicodeBlock("KATAKANA"); 1126 1127 /** 1128 * Constant for the "Bopomofo" Unicode character block. 1129 * @since 1.2 1130 */ 1131 public static final UnicodeBlock BOPOMOFO = 1132 new UnicodeBlock("BOPOMOFO"); 1133 1134 /** 1135 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1136 * @since 1.2 1137 */ 1138 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1139 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1140 "HANGUL COMPATIBILITY JAMO", 1141 "HANGULCOMPATIBILITYJAMO"); 1142 1143 /** 1144 * Constant for the "Kanbun" Unicode character block. 1145 * @since 1.2 1146 */ 1147 public static final UnicodeBlock KANBUN = 1148 new UnicodeBlock("KANBUN"); 1149 1150 /** 1151 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1152 * @since 1.2 1153 */ 1154 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1155 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1156 "ENCLOSED CJK LETTERS AND MONTHS", 1157 "ENCLOSEDCJKLETTERSANDMONTHS"); 1158 1159 /** 1160 * Constant for the "CJK Compatibility" Unicode character block. 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock CJK_COMPATIBILITY = 1164 new UnicodeBlock("CJK_COMPATIBILITY", 1165 "CJK COMPATIBILITY", 1166 "CJKCOMPATIBILITY"); 1167 1168 /** 1169 * Constant for the "CJK Unified Ideographs" Unicode character block. 1170 * @since 1.2 1171 */ 1172 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1173 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1174 "CJK UNIFIED IDEOGRAPHS", 1175 "CJKUNIFIEDIDEOGRAPHS"); 1176 1177 /** 1178 * Constant for the "Hangul Syllables" Unicode character block. 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock HANGUL_SYLLABLES = 1182 new UnicodeBlock("HANGUL_SYLLABLES", 1183 "HANGUL SYLLABLES", 1184 "HANGULSYLLABLES"); 1185 1186 /** 1187 * Constant for the "Private Use Area" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock PRIVATE_USE_AREA = 1191 new UnicodeBlock("PRIVATE_USE_AREA", 1192 "PRIVATE USE AREA", 1193 "PRIVATEUSEAREA"); 1194 1195 /** 1196 * Constant for the "CJK Compatibility Ideographs" Unicode character 1197 * block. 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1201 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1202 "CJK COMPATIBILITY IDEOGRAPHS", 1203 "CJKCOMPATIBILITYIDEOGRAPHS"); 1204 1205 /** 1206 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1210 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1211 "ALPHABETIC PRESENTATION FORMS", 1212 "ALPHABETICPRESENTATIONFORMS"); 1213 1214 /** 1215 * Constant for the "Arabic Presentation Forms-A" Unicode character 1216 * block. 1217 * @since 1.2 1218 */ 1219 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1220 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1221 "ARABIC PRESENTATION FORMS-A", 1222 "ARABICPRESENTATIONFORMS-A"); 1223 1224 /** 1225 * Constant for the "Combining Half Marks" Unicode character block. 1226 * @since 1.2 1227 */ 1228 public static final UnicodeBlock COMBINING_HALF_MARKS = 1229 new UnicodeBlock("COMBINING_HALF_MARKS", 1230 "COMBINING HALF MARKS", 1231 "COMBININGHALFMARKS"); 1232 1233 /** 1234 * Constant for the "CJK Compatibility Forms" Unicode character block. 1235 * @since 1.2 1236 */ 1237 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1238 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1239 "CJK COMPATIBILITY FORMS", 1240 "CJKCOMPATIBILITYFORMS"); 1241 1242 /** 1243 * Constant for the "Small Form Variants" Unicode character block. 1244 * @since 1.2 1245 */ 1246 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1247 new UnicodeBlock("SMALL_FORM_VARIANTS", 1248 "SMALL FORM VARIANTS", 1249 "SMALLFORMVARIANTS"); 1250 1251 /** 1252 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1253 * @since 1.2 1254 */ 1255 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1256 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1257 "ARABIC PRESENTATION FORMS-B", 1258 "ARABICPRESENTATIONFORMS-B"); 1259 1260 /** 1261 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1262 * block. 1263 * @since 1.2 1264 */ 1265 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1266 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1267 "HALFWIDTH AND FULLWIDTH FORMS", 1268 "HALFWIDTHANDFULLWIDTHFORMS"); 1269 1270 /** 1271 * Constant for the "Specials" Unicode character block. 1272 * @since 1.2 1273 */ 1274 public static final UnicodeBlock SPECIALS = 1275 new UnicodeBlock("SPECIALS"); 1276 1277 /** 1278 * @deprecated 1279 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1280 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1281 * These constants match the block definitions of the Unicode Standard. 1282 * The {@link #of(char)} and {@link #of(int)} methods return the 1283 * standard constants. 1284 */ 1285 @Deprecated(since="1.5") 1286 public static final UnicodeBlock SURROGATES_AREA = 1287 new UnicodeBlock("SURROGATES_AREA"); 1288 1289 /** 1290 * Constant for the "Syriac" Unicode character block. 1291 * @since 1.4 1292 */ 1293 public static final UnicodeBlock SYRIAC = 1294 new UnicodeBlock("SYRIAC"); 1295 1296 /** 1297 * Constant for the "Thaana" Unicode character block. 1298 * @since 1.4 1299 */ 1300 public static final UnicodeBlock THAANA = 1301 new UnicodeBlock("THAANA"); 1302 1303 /** 1304 * Constant for the "Sinhala" Unicode character block. 1305 * @since 1.4 1306 */ 1307 public static final UnicodeBlock SINHALA = 1308 new UnicodeBlock("SINHALA"); 1309 1310 /** 1311 * Constant for the "Myanmar" Unicode character block. 1312 * @since 1.4 1313 */ 1314 public static final UnicodeBlock MYANMAR = 1315 new UnicodeBlock("MYANMAR"); 1316 1317 /** 1318 * Constant for the "Ethiopic" Unicode character block. 1319 * @since 1.4 1320 */ 1321 public static final UnicodeBlock ETHIOPIC = 1322 new UnicodeBlock("ETHIOPIC"); 1323 1324 /** 1325 * Constant for the "Cherokee" Unicode character block. 1326 * @since 1.4 1327 */ 1328 public static final UnicodeBlock CHEROKEE = 1329 new UnicodeBlock("CHEROKEE"); 1330 1331 /** 1332 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1333 * @since 1.4 1334 */ 1335 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1336 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1337 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1338 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1339 1340 /** 1341 * Constant for the "Ogham" Unicode character block. 1342 * @since 1.4 1343 */ 1344 public static final UnicodeBlock OGHAM = 1345 new UnicodeBlock("OGHAM"); 1346 1347 /** 1348 * Constant for the "Runic" Unicode character block. 1349 * @since 1.4 1350 */ 1351 public static final UnicodeBlock RUNIC = 1352 new UnicodeBlock("RUNIC"); 1353 1354 /** 1355 * Constant for the "Khmer" Unicode character block. 1356 * @since 1.4 1357 */ 1358 public static final UnicodeBlock KHMER = 1359 new UnicodeBlock("KHMER"); 1360 1361 /** 1362 * Constant for the "Mongolian" Unicode character block. 1363 * @since 1.4 1364 */ 1365 public static final UnicodeBlock MONGOLIAN = 1366 new UnicodeBlock("MONGOLIAN"); 1367 1368 /** 1369 * Constant for the "Braille Patterns" Unicode character block. 1370 * @since 1.4 1371 */ 1372 public static final UnicodeBlock BRAILLE_PATTERNS = 1373 new UnicodeBlock("BRAILLE_PATTERNS", 1374 "BRAILLE PATTERNS", 1375 "BRAILLEPATTERNS"); 1376 1377 /** 1378 * Constant for the "CJK Radicals Supplement" Unicode character block. 1379 * @since 1.4 1380 */ 1381 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1382 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1383 "CJK RADICALS SUPPLEMENT", 1384 "CJKRADICALSSUPPLEMENT"); 1385 1386 /** 1387 * Constant for the "Kangxi Radicals" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock KANGXI_RADICALS = 1391 new UnicodeBlock("KANGXI_RADICALS", 1392 "KANGXI RADICALS", 1393 "KANGXIRADICALS"); 1394 1395 /** 1396 * Constant for the "Ideographic Description Characters" Unicode character block. 1397 * @since 1.4 1398 */ 1399 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1400 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1401 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1402 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1403 1404 /** 1405 * Constant for the "Bopomofo Extended" Unicode character block. 1406 * @since 1.4 1407 */ 1408 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1409 new UnicodeBlock("BOPOMOFO_EXTENDED", 1410 "BOPOMOFO EXTENDED", 1411 "BOPOMOFOEXTENDED"); 1412 1413 /** 1414 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1415 * @since 1.4 1416 */ 1417 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1418 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1419 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1420 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1421 1422 /** 1423 * Constant for the "Yi Syllables" Unicode character block. 1424 * @since 1.4 1425 */ 1426 public static final UnicodeBlock YI_SYLLABLES = 1427 new UnicodeBlock("YI_SYLLABLES", 1428 "YI SYLLABLES", 1429 "YISYLLABLES"); 1430 1431 /** 1432 * Constant for the "Yi Radicals" Unicode character block. 1433 * @since 1.4 1434 */ 1435 public static final UnicodeBlock YI_RADICALS = 1436 new UnicodeBlock("YI_RADICALS", 1437 "YI RADICALS", 1438 "YIRADICALS"); 1439 1440 /** 1441 * Constant for the "Cyrillic Supplement" Unicode character block. 1442 * This block was previously known as the "Cyrillic Supplementary" block. 1443 * @since 1.5 1444 */ 1445 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1446 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1447 "CYRILLIC SUPPLEMENTARY", 1448 "CYRILLICSUPPLEMENTARY", 1449 "CYRILLIC SUPPLEMENT", 1450 "CYRILLICSUPPLEMENT"); 1451 1452 /** 1453 * Constant for the "Tagalog" Unicode character block. 1454 * @since 1.5 1455 */ 1456 public static final UnicodeBlock TAGALOG = 1457 new UnicodeBlock("TAGALOG"); 1458 1459 /** 1460 * Constant for the "Hanunoo" Unicode character block. 1461 * @since 1.5 1462 */ 1463 public static final UnicodeBlock HANUNOO = 1464 new UnicodeBlock("HANUNOO"); 1465 1466 /** 1467 * Constant for the "Buhid" Unicode character block. 1468 * @since 1.5 1469 */ 1470 public static final UnicodeBlock BUHID = 1471 new UnicodeBlock("BUHID"); 1472 1473 /** 1474 * Constant for the "Tagbanwa" Unicode character block. 1475 * @since 1.5 1476 */ 1477 public static final UnicodeBlock TAGBANWA = 1478 new UnicodeBlock("TAGBANWA"); 1479 1480 /** 1481 * Constant for the "Limbu" Unicode character block. 1482 * @since 1.5 1483 */ 1484 public static final UnicodeBlock LIMBU = 1485 new UnicodeBlock("LIMBU"); 1486 1487 /** 1488 * Constant for the "Tai Le" Unicode character block. 1489 * @since 1.5 1490 */ 1491 public static final UnicodeBlock TAI_LE = 1492 new UnicodeBlock("TAI_LE", 1493 "TAI LE", 1494 "TAILE"); 1495 1496 /** 1497 * Constant for the "Khmer Symbols" Unicode character block. 1498 * @since 1.5 1499 */ 1500 public static final UnicodeBlock KHMER_SYMBOLS = 1501 new UnicodeBlock("KHMER_SYMBOLS", 1502 "KHMER SYMBOLS", 1503 "KHMERSYMBOLS"); 1504 1505 /** 1506 * Constant for the "Phonetic Extensions" Unicode character block. 1507 * @since 1.5 1508 */ 1509 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1510 new UnicodeBlock("PHONETIC_EXTENSIONS", 1511 "PHONETIC EXTENSIONS", 1512 "PHONETICEXTENSIONS"); 1513 1514 /** 1515 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1516 * @since 1.5 1517 */ 1518 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1519 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1520 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1521 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1522 1523 /** 1524 * Constant for the "Supplemental Arrows-A" Unicode character block. 1525 * @since 1.5 1526 */ 1527 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1528 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1529 "SUPPLEMENTAL ARROWS-A", 1530 "SUPPLEMENTALARROWS-A"); 1531 1532 /** 1533 * Constant for the "Supplemental Arrows-B" Unicode character block. 1534 * @since 1.5 1535 */ 1536 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1537 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1538 "SUPPLEMENTAL ARROWS-B", 1539 "SUPPLEMENTALARROWS-B"); 1540 1541 /** 1542 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1543 * character block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1547 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1548 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1549 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1550 1551 /** 1552 * Constant for the "Supplemental Mathematical Operators" Unicode 1553 * character block. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1557 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1558 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1559 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1560 1561 /** 1562 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1563 * block. 1564 * @since 1.5 1565 */ 1566 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1567 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1568 "MISCELLANEOUS SYMBOLS AND ARROWS", 1569 "MISCELLANEOUSSYMBOLSANDARROWS"); 1570 1571 /** 1572 * Constant for the "Katakana Phonetic Extensions" Unicode character 1573 * block. 1574 * @since 1.5 1575 */ 1576 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1577 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1578 "KATAKANA PHONETIC EXTENSIONS", 1579 "KATAKANAPHONETICEXTENSIONS"); 1580 1581 /** 1582 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1583 * @since 1.5 1584 */ 1585 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1586 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1587 "YIJING HEXAGRAM SYMBOLS", 1588 "YIJINGHEXAGRAMSYMBOLS"); 1589 1590 /** 1591 * Constant for the "Variation Selectors" Unicode character block. 1592 * @since 1.5 1593 */ 1594 public static final UnicodeBlock VARIATION_SELECTORS = 1595 new UnicodeBlock("VARIATION_SELECTORS", 1596 "VARIATION SELECTORS", 1597 "VARIATIONSELECTORS"); 1598 1599 /** 1600 * Constant for the "Linear B Syllabary" Unicode character block. 1601 * @since 1.5 1602 */ 1603 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1604 new UnicodeBlock("LINEAR_B_SYLLABARY", 1605 "LINEAR B SYLLABARY", 1606 "LINEARBSYLLABARY"); 1607 1608 /** 1609 * Constant for the "Linear B Ideograms" Unicode character block. 1610 * @since 1.5 1611 */ 1612 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1613 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1614 "LINEAR B IDEOGRAMS", 1615 "LINEARBIDEOGRAMS"); 1616 1617 /** 1618 * Constant for the "Aegean Numbers" Unicode character block. 1619 * @since 1.5 1620 */ 1621 public static final UnicodeBlock AEGEAN_NUMBERS = 1622 new UnicodeBlock("AEGEAN_NUMBERS", 1623 "AEGEAN NUMBERS", 1624 "AEGEANNUMBERS"); 1625 1626 /** 1627 * Constant for the "Old Italic" Unicode character block. 1628 * @since 1.5 1629 */ 1630 public static final UnicodeBlock OLD_ITALIC = 1631 new UnicodeBlock("OLD_ITALIC", 1632 "OLD ITALIC", 1633 "OLDITALIC"); 1634 1635 /** 1636 * Constant for the "Gothic" Unicode character block. 1637 * @since 1.5 1638 */ 1639 public static final UnicodeBlock GOTHIC = 1640 new UnicodeBlock("GOTHIC"); 1641 1642 /** 1643 * Constant for the "Ugaritic" Unicode character block. 1644 * @since 1.5 1645 */ 1646 public static final UnicodeBlock UGARITIC = 1647 new UnicodeBlock("UGARITIC"); 1648 1649 /** 1650 * Constant for the "Deseret" Unicode character block. 1651 * @since 1.5 1652 */ 1653 public static final UnicodeBlock DESERET = 1654 new UnicodeBlock("DESERET"); 1655 1656 /** 1657 * Constant for the "Shavian" Unicode character block. 1658 * @since 1.5 1659 */ 1660 public static final UnicodeBlock SHAVIAN = 1661 new UnicodeBlock("SHAVIAN"); 1662 1663 /** 1664 * Constant for the "Osmanya" Unicode character block. 1665 * @since 1.5 1666 */ 1667 public static final UnicodeBlock OSMANYA = 1668 new UnicodeBlock("OSMANYA"); 1669 1670 /** 1671 * Constant for the "Cypriot Syllabary" Unicode character block. 1672 * @since 1.5 1673 */ 1674 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1675 new UnicodeBlock("CYPRIOT_SYLLABARY", 1676 "CYPRIOT SYLLABARY", 1677 "CYPRIOTSYLLABARY"); 1678 1679 /** 1680 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1681 * @since 1.5 1682 */ 1683 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1684 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1685 "BYZANTINE MUSICAL SYMBOLS", 1686 "BYZANTINEMUSICALSYMBOLS"); 1687 1688 /** 1689 * Constant for the "Musical Symbols" Unicode character block. 1690 * @since 1.5 1691 */ 1692 public static final UnicodeBlock MUSICAL_SYMBOLS = 1693 new UnicodeBlock("MUSICAL_SYMBOLS", 1694 "MUSICAL SYMBOLS", 1695 "MUSICALSYMBOLS"); 1696 1697 /** 1698 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1702 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1703 "TAI XUAN JING SYMBOLS", 1704 "TAIXUANJINGSYMBOLS"); 1705 1706 /** 1707 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1708 * character block. 1709 * @since 1.5 1710 */ 1711 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1712 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1713 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1714 "MATHEMATICALALPHANUMERICSYMBOLS"); 1715 1716 /** 1717 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1718 * character block. 1719 * @since 1.5 1720 */ 1721 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1722 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1723 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1724 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1725 1726 /** 1727 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1728 * @since 1.5 1729 */ 1730 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1731 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1732 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1733 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1734 1735 /** 1736 * Constant for the "Tags" Unicode character block. 1737 * @since 1.5 1738 */ 1739 public static final UnicodeBlock TAGS = 1740 new UnicodeBlock("TAGS"); 1741 1742 /** 1743 * Constant for the "Variation Selectors Supplement" Unicode character 1744 * block. 1745 * @since 1.5 1746 */ 1747 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1748 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1749 "VARIATION SELECTORS SUPPLEMENT", 1750 "VARIATIONSELECTORSSUPPLEMENT"); 1751 1752 /** 1753 * Constant for the "Supplementary Private Use Area-A" Unicode character 1754 * block. 1755 * @since 1.5 1756 */ 1757 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1758 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1759 "SUPPLEMENTARY PRIVATE USE AREA-A", 1760 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1761 1762 /** 1763 * Constant for the "Supplementary Private Use Area-B" Unicode character 1764 * block. 1765 * @since 1.5 1766 */ 1767 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1768 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1769 "SUPPLEMENTARY PRIVATE USE AREA-B", 1770 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1771 1772 /** 1773 * Constant for the "High Surrogates" Unicode character block. 1774 * This block represents codepoint values in the high surrogate 1775 * range: U+D800 through U+DB7F 1776 * 1777 * @since 1.5 1778 */ 1779 public static final UnicodeBlock HIGH_SURROGATES = 1780 new UnicodeBlock("HIGH_SURROGATES", 1781 "HIGH SURROGATES", 1782 "HIGHSURROGATES"); 1783 1784 /** 1785 * Constant for the "High Private Use Surrogates" Unicode character 1786 * block. 1787 * This block represents codepoint values in the private use high 1788 * surrogate range: U+DB80 through U+DBFF 1789 * 1790 * @since 1.5 1791 */ 1792 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1793 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1794 "HIGH PRIVATE USE SURROGATES", 1795 "HIGHPRIVATEUSESURROGATES"); 1796 1797 /** 1798 * Constant for the "Low Surrogates" Unicode character block. 1799 * This block represents codepoint values in the low surrogate 1800 * range: U+DC00 through U+DFFF 1801 * 1802 * @since 1.5 1803 */ 1804 public static final UnicodeBlock LOW_SURROGATES = 1805 new UnicodeBlock("LOW_SURROGATES", 1806 "LOW SURROGATES", 1807 "LOWSURROGATES"); 1808 1809 /** 1810 * Constant for the "Arabic Supplement" Unicode character block. 1811 * @since 1.7 1812 */ 1813 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1814 new UnicodeBlock("ARABIC_SUPPLEMENT", 1815 "ARABIC SUPPLEMENT", 1816 "ARABICSUPPLEMENT"); 1817 1818 /** 1819 * Constant for the "NKo" Unicode character block. 1820 * @since 1.7 1821 */ 1822 public static final UnicodeBlock NKO = 1823 new UnicodeBlock("NKO"); 1824 1825 /** 1826 * Constant for the "Samaritan" Unicode character block. 1827 * @since 1.7 1828 */ 1829 public static final UnicodeBlock SAMARITAN = 1830 new UnicodeBlock("SAMARITAN"); 1831 1832 /** 1833 * Constant for the "Mandaic" Unicode character block. 1834 * @since 1.7 1835 */ 1836 public static final UnicodeBlock MANDAIC = 1837 new UnicodeBlock("MANDAIC"); 1838 1839 /** 1840 * Constant for the "Ethiopic Supplement" Unicode character block. 1841 * @since 1.7 1842 */ 1843 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1844 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1845 "ETHIOPIC SUPPLEMENT", 1846 "ETHIOPICSUPPLEMENT"); 1847 1848 /** 1849 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1850 * Unicode character block. 1851 * @since 1.7 1852 */ 1853 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1854 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1855 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1856 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1857 1858 /** 1859 * Constant for the "New Tai Lue" Unicode character block. 1860 * @since 1.7 1861 */ 1862 public static final UnicodeBlock NEW_TAI_LUE = 1863 new UnicodeBlock("NEW_TAI_LUE", 1864 "NEW TAI LUE", 1865 "NEWTAILUE"); 1866 1867 /** 1868 * Constant for the "Buginese" Unicode character block. 1869 * @since 1.7 1870 */ 1871 public static final UnicodeBlock BUGINESE = 1872 new UnicodeBlock("BUGINESE"); 1873 1874 /** 1875 * Constant for the "Tai Tham" Unicode character block. 1876 * @since 1.7 1877 */ 1878 public static final UnicodeBlock TAI_THAM = 1879 new UnicodeBlock("TAI_THAM", 1880 "TAI THAM", 1881 "TAITHAM"); 1882 1883 /** 1884 * Constant for the "Balinese" Unicode character block. 1885 * @since 1.7 1886 */ 1887 public static final UnicodeBlock BALINESE = 1888 new UnicodeBlock("BALINESE"); 1889 1890 /** 1891 * Constant for the "Sundanese" Unicode character block. 1892 * @since 1.7 1893 */ 1894 public static final UnicodeBlock SUNDANESE = 1895 new UnicodeBlock("SUNDANESE"); 1896 1897 /** 1898 * Constant for the "Batak" Unicode character block. 1899 * @since 1.7 1900 */ 1901 public static final UnicodeBlock BATAK = 1902 new UnicodeBlock("BATAK"); 1903 1904 /** 1905 * Constant for the "Lepcha" Unicode character block. 1906 * @since 1.7 1907 */ 1908 public static final UnicodeBlock LEPCHA = 1909 new UnicodeBlock("LEPCHA"); 1910 1911 /** 1912 * Constant for the "Ol Chiki" Unicode character block. 1913 * @since 1.7 1914 */ 1915 public static final UnicodeBlock OL_CHIKI = 1916 new UnicodeBlock("OL_CHIKI", 1917 "OL CHIKI", 1918 "OLCHIKI"); 1919 1920 /** 1921 * Constant for the "Vedic Extensions" Unicode character block. 1922 * @since 1.7 1923 */ 1924 public static final UnicodeBlock VEDIC_EXTENSIONS = 1925 new UnicodeBlock("VEDIC_EXTENSIONS", 1926 "VEDIC EXTENSIONS", 1927 "VEDICEXTENSIONS"); 1928 1929 /** 1930 * Constant for the "Phonetic Extensions Supplement" Unicode character 1931 * block. 1932 * @since 1.7 1933 */ 1934 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1935 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1936 "PHONETIC EXTENSIONS SUPPLEMENT", 1937 "PHONETICEXTENSIONSSUPPLEMENT"); 1938 1939 /** 1940 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1941 * character block. 1942 * @since 1.7 1943 */ 1944 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1945 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1946 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1947 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1948 1949 /** 1950 * Constant for the "Glagolitic" Unicode character block. 1951 * @since 1.7 1952 */ 1953 public static final UnicodeBlock GLAGOLITIC = 1954 new UnicodeBlock("GLAGOLITIC"); 1955 1956 /** 1957 * Constant for the "Latin Extended-C" Unicode character block. 1958 * @since 1.7 1959 */ 1960 public static final UnicodeBlock LATIN_EXTENDED_C = 1961 new UnicodeBlock("LATIN_EXTENDED_C", 1962 "LATIN EXTENDED-C", 1963 "LATINEXTENDED-C"); 1964 1965 /** 1966 * Constant for the "Coptic" Unicode character block. 1967 * @since 1.7 1968 */ 1969 public static final UnicodeBlock COPTIC = 1970 new UnicodeBlock("COPTIC"); 1971 1972 /** 1973 * Constant for the "Georgian Supplement" Unicode character block. 1974 * @since 1.7 1975 */ 1976 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1977 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1978 "GEORGIAN SUPPLEMENT", 1979 "GEORGIANSUPPLEMENT"); 1980 1981 /** 1982 * Constant for the "Tifinagh" Unicode character block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock TIFINAGH = 1986 new UnicodeBlock("TIFINAGH"); 1987 1988 /** 1989 * Constant for the "Ethiopic Extended" Unicode character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1993 new UnicodeBlock("ETHIOPIC_EXTENDED", 1994 "ETHIOPIC EXTENDED", 1995 "ETHIOPICEXTENDED"); 1996 1997 /** 1998 * Constant for the "Cyrillic Extended-A" Unicode character block. 1999 * @since 1.7 2000 */ 2001 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2002 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2003 "CYRILLIC EXTENDED-A", 2004 "CYRILLICEXTENDED-A"); 2005 2006 /** 2007 * Constant for the "Supplemental Punctuation" Unicode character block. 2008 * @since 1.7 2009 */ 2010 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2011 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2012 "SUPPLEMENTAL PUNCTUATION", 2013 "SUPPLEMENTALPUNCTUATION"); 2014 2015 /** 2016 * Constant for the "CJK Strokes" Unicode character block. 2017 * @since 1.7 2018 */ 2019 public static final UnicodeBlock CJK_STROKES = 2020 new UnicodeBlock("CJK_STROKES", 2021 "CJK STROKES", 2022 "CJKSTROKES"); 2023 2024 /** 2025 * Constant for the "Lisu" Unicode character block. 2026 * @since 1.7 2027 */ 2028 public static final UnicodeBlock LISU = 2029 new UnicodeBlock("LISU"); 2030 2031 /** 2032 * Constant for the "Vai" Unicode character block. 2033 * @since 1.7 2034 */ 2035 public static final UnicodeBlock VAI = 2036 new UnicodeBlock("VAI"); 2037 2038 /** 2039 * Constant for the "Cyrillic Extended-B" Unicode character block. 2040 * @since 1.7 2041 */ 2042 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2043 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2044 "CYRILLIC EXTENDED-B", 2045 "CYRILLICEXTENDED-B"); 2046 2047 /** 2048 * Constant for the "Bamum" Unicode character block. 2049 * @since 1.7 2050 */ 2051 public static final UnicodeBlock BAMUM = 2052 new UnicodeBlock("BAMUM"); 2053 2054 /** 2055 * Constant for the "Modifier Tone Letters" Unicode character block. 2056 * @since 1.7 2057 */ 2058 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2059 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2060 "MODIFIER TONE LETTERS", 2061 "MODIFIERTONELETTERS"); 2062 2063 /** 2064 * Constant for the "Latin Extended-D" Unicode character block. 2065 * @since 1.7 2066 */ 2067 public static final UnicodeBlock LATIN_EXTENDED_D = 2068 new UnicodeBlock("LATIN_EXTENDED_D", 2069 "LATIN EXTENDED-D", 2070 "LATINEXTENDED-D"); 2071 2072 /** 2073 * Constant for the "Syloti Nagri" Unicode character block. 2074 * @since 1.7 2075 */ 2076 public static final UnicodeBlock SYLOTI_NAGRI = 2077 new UnicodeBlock("SYLOTI_NAGRI", 2078 "SYLOTI NAGRI", 2079 "SYLOTINAGRI"); 2080 2081 /** 2082 * Constant for the "Common Indic Number Forms" Unicode character block. 2083 * @since 1.7 2084 */ 2085 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2086 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2087 "COMMON INDIC NUMBER FORMS", 2088 "COMMONINDICNUMBERFORMS"); 2089 2090 /** 2091 * Constant for the "Phags-pa" Unicode character block. 2092 * @since 1.7 2093 */ 2094 public static final UnicodeBlock PHAGS_PA = 2095 new UnicodeBlock("PHAGS_PA", 2096 "PHAGS-PA"); 2097 2098 /** 2099 * Constant for the "Saurashtra" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock SAURASHTRA = 2103 new UnicodeBlock("SAURASHTRA"); 2104 2105 /** 2106 * Constant for the "Devanagari Extended" Unicode character block. 2107 * @since 1.7 2108 */ 2109 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2110 new UnicodeBlock("DEVANAGARI_EXTENDED", 2111 "DEVANAGARI EXTENDED", 2112 "DEVANAGARIEXTENDED"); 2113 2114 /** 2115 * Constant for the "Kayah Li" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock KAYAH_LI = 2119 new UnicodeBlock("KAYAH_LI", 2120 "KAYAH LI", 2121 "KAYAHLI"); 2122 2123 /** 2124 * Constant for the "Rejang" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock REJANG = 2128 new UnicodeBlock("REJANG"); 2129 2130 /** 2131 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2132 * @since 1.7 2133 */ 2134 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2135 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2136 "HANGUL JAMO EXTENDED-A", 2137 "HANGULJAMOEXTENDED-A"); 2138 2139 /** 2140 * Constant for the "Javanese" Unicode character block. 2141 * @since 1.7 2142 */ 2143 public static final UnicodeBlock JAVANESE = 2144 new UnicodeBlock("JAVANESE"); 2145 2146 /** 2147 * Constant for the "Cham" Unicode character block. 2148 * @since 1.7 2149 */ 2150 public static final UnicodeBlock CHAM = 2151 new UnicodeBlock("CHAM"); 2152 2153 /** 2154 * Constant for the "Myanmar Extended-A" Unicode character block. 2155 * @since 1.7 2156 */ 2157 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2158 new UnicodeBlock("MYANMAR_EXTENDED_A", 2159 "MYANMAR EXTENDED-A", 2160 "MYANMAREXTENDED-A"); 2161 2162 /** 2163 * Constant for the "Tai Viet" Unicode character block. 2164 * @since 1.7 2165 */ 2166 public static final UnicodeBlock TAI_VIET = 2167 new UnicodeBlock("TAI_VIET", 2168 "TAI VIET", 2169 "TAIVIET"); 2170 2171 /** 2172 * Constant for the "Ethiopic Extended-A" Unicode character block. 2173 * @since 1.7 2174 */ 2175 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2176 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2177 "ETHIOPIC EXTENDED-A", 2178 "ETHIOPICEXTENDED-A"); 2179 2180 /** 2181 * Constant for the "Meetei Mayek" Unicode character block. 2182 * @since 1.7 2183 */ 2184 public static final UnicodeBlock MEETEI_MAYEK = 2185 new UnicodeBlock("MEETEI_MAYEK", 2186 "MEETEI MAYEK", 2187 "MEETEIMAYEK"); 2188 2189 /** 2190 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2191 * @since 1.7 2192 */ 2193 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2194 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2195 "HANGUL JAMO EXTENDED-B", 2196 "HANGULJAMOEXTENDED-B"); 2197 2198 /** 2199 * Constant for the "Vertical Forms" Unicode character block. 2200 * @since 1.7 2201 */ 2202 public static final UnicodeBlock VERTICAL_FORMS = 2203 new UnicodeBlock("VERTICAL_FORMS", 2204 "VERTICAL FORMS", 2205 "VERTICALFORMS"); 2206 2207 /** 2208 * Constant for the "Ancient Greek Numbers" Unicode character block. 2209 * @since 1.7 2210 */ 2211 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2212 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2213 "ANCIENT GREEK NUMBERS", 2214 "ANCIENTGREEKNUMBERS"); 2215 2216 /** 2217 * Constant for the "Ancient Symbols" Unicode character block. 2218 * @since 1.7 2219 */ 2220 public static final UnicodeBlock ANCIENT_SYMBOLS = 2221 new UnicodeBlock("ANCIENT_SYMBOLS", 2222 "ANCIENT SYMBOLS", 2223 "ANCIENTSYMBOLS"); 2224 2225 /** 2226 * Constant for the "Phaistos Disc" Unicode character block. 2227 * @since 1.7 2228 */ 2229 public static final UnicodeBlock PHAISTOS_DISC = 2230 new UnicodeBlock("PHAISTOS_DISC", 2231 "PHAISTOS DISC", 2232 "PHAISTOSDISC"); 2233 2234 /** 2235 * Constant for the "Lycian" Unicode character block. 2236 * @since 1.7 2237 */ 2238 public static final UnicodeBlock LYCIAN = 2239 new UnicodeBlock("LYCIAN"); 2240 2241 /** 2242 * Constant for the "Carian" Unicode character block. 2243 * @since 1.7 2244 */ 2245 public static final UnicodeBlock CARIAN = 2246 new UnicodeBlock("CARIAN"); 2247 2248 /** 2249 * Constant for the "Old Persian" Unicode character block. 2250 * @since 1.7 2251 */ 2252 public static final UnicodeBlock OLD_PERSIAN = 2253 new UnicodeBlock("OLD_PERSIAN", 2254 "OLD PERSIAN", 2255 "OLDPERSIAN"); 2256 2257 /** 2258 * Constant for the "Imperial Aramaic" Unicode character block. 2259 * @since 1.7 2260 */ 2261 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2262 new UnicodeBlock("IMPERIAL_ARAMAIC", 2263 "IMPERIAL ARAMAIC", 2264 "IMPERIALARAMAIC"); 2265 2266 /** 2267 * Constant for the "Phoenician" Unicode character block. 2268 * @since 1.7 2269 */ 2270 public static final UnicodeBlock PHOENICIAN = 2271 new UnicodeBlock("PHOENICIAN"); 2272 2273 /** 2274 * Constant for the "Lydian" Unicode character block. 2275 * @since 1.7 2276 */ 2277 public static final UnicodeBlock LYDIAN = 2278 new UnicodeBlock("LYDIAN"); 2279 2280 /** 2281 * Constant for the "Kharoshthi" Unicode character block. 2282 * @since 1.7 2283 */ 2284 public static final UnicodeBlock KHAROSHTHI = 2285 new UnicodeBlock("KHAROSHTHI"); 2286 2287 /** 2288 * Constant for the "Old South Arabian" Unicode character block. 2289 * @since 1.7 2290 */ 2291 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2292 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2293 "OLD SOUTH ARABIAN", 2294 "OLDSOUTHARABIAN"); 2295 2296 /** 2297 * Constant for the "Avestan" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock AVESTAN = 2301 new UnicodeBlock("AVESTAN"); 2302 2303 /** 2304 * Constant for the "Inscriptional Parthian" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2308 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2309 "INSCRIPTIONAL PARTHIAN", 2310 "INSCRIPTIONALPARTHIAN"); 2311 2312 /** 2313 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2314 * @since 1.7 2315 */ 2316 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2317 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2318 "INSCRIPTIONAL PAHLAVI", 2319 "INSCRIPTIONALPAHLAVI"); 2320 2321 /** 2322 * Constant for the "Old Turkic" Unicode character block. 2323 * @since 1.7 2324 */ 2325 public static final UnicodeBlock OLD_TURKIC = 2326 new UnicodeBlock("OLD_TURKIC", 2327 "OLD TURKIC", 2328 "OLDTURKIC"); 2329 2330 /** 2331 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2332 * @since 1.7 2333 */ 2334 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2335 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2336 "RUMI NUMERAL SYMBOLS", 2337 "RUMINUMERALSYMBOLS"); 2338 2339 /** 2340 * Constant for the "Brahmi" Unicode character block. 2341 * @since 1.7 2342 */ 2343 public static final UnicodeBlock BRAHMI = 2344 new UnicodeBlock("BRAHMI"); 2345 2346 /** 2347 * Constant for the "Kaithi" Unicode character block. 2348 * @since 1.7 2349 */ 2350 public static final UnicodeBlock KAITHI = 2351 new UnicodeBlock("KAITHI"); 2352 2353 /** 2354 * Constant for the "Cuneiform" Unicode character block. 2355 * @since 1.7 2356 */ 2357 public static final UnicodeBlock CUNEIFORM = 2358 new UnicodeBlock("CUNEIFORM"); 2359 2360 /** 2361 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2362 * character block. 2363 * @since 1.7 2364 */ 2365 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2366 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2367 "CUNEIFORM NUMBERS AND PUNCTUATION", 2368 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2369 2370 /** 2371 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2372 * @since 1.7 2373 */ 2374 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2375 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2376 "EGYPTIAN HIEROGLYPHS", 2377 "EGYPTIANHIEROGLYPHS"); 2378 2379 /** 2380 * Constant for the "Bamum Supplement" Unicode character block. 2381 * @since 1.7 2382 */ 2383 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2384 new UnicodeBlock("BAMUM_SUPPLEMENT", 2385 "BAMUM SUPPLEMENT", 2386 "BAMUMSUPPLEMENT"); 2387 2388 /** 2389 * Constant for the "Kana Supplement" Unicode character block. 2390 * @since 1.7 2391 */ 2392 public static final UnicodeBlock KANA_SUPPLEMENT = 2393 new UnicodeBlock("KANA_SUPPLEMENT", 2394 "KANA SUPPLEMENT", 2395 "KANASUPPLEMENT"); 2396 2397 /** 2398 * Constant for the "Ancient Greek Musical Notation" Unicode character 2399 * block. 2400 * @since 1.7 2401 */ 2402 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2403 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2404 "ANCIENT GREEK MUSICAL NOTATION", 2405 "ANCIENTGREEKMUSICALNOTATION"); 2406 2407 /** 2408 * Constant for the "Counting Rod Numerals" Unicode character block. 2409 * @since 1.7 2410 */ 2411 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2412 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2413 "COUNTING ROD NUMERALS", 2414 "COUNTINGRODNUMERALS"); 2415 2416 /** 2417 * Constant for the "Mahjong Tiles" Unicode character block. 2418 * @since 1.7 2419 */ 2420 public static final UnicodeBlock MAHJONG_TILES = 2421 new UnicodeBlock("MAHJONG_TILES", 2422 "MAHJONG TILES", 2423 "MAHJONGTILES"); 2424 2425 /** 2426 * Constant for the "Domino Tiles" Unicode character block. 2427 * @since 1.7 2428 */ 2429 public static final UnicodeBlock DOMINO_TILES = 2430 new UnicodeBlock("DOMINO_TILES", 2431 "DOMINO TILES", 2432 "DOMINOTILES"); 2433 2434 /** 2435 * Constant for the "Playing Cards" Unicode character block. 2436 * @since 1.7 2437 */ 2438 public static final UnicodeBlock PLAYING_CARDS = 2439 new UnicodeBlock("PLAYING_CARDS", 2440 "PLAYING CARDS", 2441 "PLAYINGCARDS"); 2442 2443 /** 2444 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2445 * block. 2446 * @since 1.7 2447 */ 2448 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2449 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2450 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2451 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2452 2453 /** 2454 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2455 * block. 2456 * @since 1.7 2457 */ 2458 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2459 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2460 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2461 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2462 2463 /** 2464 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2465 * character block. 2466 * @since 1.7 2467 */ 2468 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2469 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2470 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2471 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2472 2473 /** 2474 * Constant for the "Emoticons" Unicode character block. 2475 * @since 1.7 2476 */ 2477 public static final UnicodeBlock EMOTICONS = 2478 new UnicodeBlock("EMOTICONS"); 2479 2480 /** 2481 * Constant for the "Transport And Map Symbols" Unicode character block. 2482 * @since 1.7 2483 */ 2484 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2485 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2486 "TRANSPORT AND MAP SYMBOLS", 2487 "TRANSPORTANDMAPSYMBOLS"); 2488 2489 /** 2490 * Constant for the "Alchemical Symbols" Unicode character block. 2491 * @since 1.7 2492 */ 2493 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2494 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2495 "ALCHEMICAL SYMBOLS", 2496 "ALCHEMICALSYMBOLS"); 2497 2498 /** 2499 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2500 * character block. 2501 * @since 1.7 2502 */ 2503 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2504 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2505 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2506 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2507 2508 /** 2509 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2510 * character block. 2511 * @since 1.7 2512 */ 2513 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2514 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2515 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2516 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2517 2518 /** 2519 * Constant for the "Arabic Extended-A" Unicode character block. 2520 * @since 1.8 2521 */ 2522 public static final UnicodeBlock ARABIC_EXTENDED_A = 2523 new UnicodeBlock("ARABIC_EXTENDED_A", 2524 "ARABIC EXTENDED-A", 2525 "ARABICEXTENDED-A"); 2526 2527 /** 2528 * Constant for the "Sundanese Supplement" Unicode character block. 2529 * @since 1.8 2530 */ 2531 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2532 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2533 "SUNDANESE SUPPLEMENT", 2534 "SUNDANESESUPPLEMENT"); 2535 2536 /** 2537 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2538 * @since 1.8 2539 */ 2540 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2541 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2542 "MEETEI MAYEK EXTENSIONS", 2543 "MEETEIMAYEKEXTENSIONS"); 2544 2545 /** 2546 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2547 * @since 1.8 2548 */ 2549 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2550 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2551 "MEROITIC HIEROGLYPHS", 2552 "MEROITICHIEROGLYPHS"); 2553 2554 /** 2555 * Constant for the "Meroitic Cursive" Unicode character block. 2556 * @since 1.8 2557 */ 2558 public static final UnicodeBlock MEROITIC_CURSIVE = 2559 new UnicodeBlock("MEROITIC_CURSIVE", 2560 "MEROITIC CURSIVE", 2561 "MEROITICCURSIVE"); 2562 2563 /** 2564 * Constant for the "Sora Sompeng" Unicode character block. 2565 * @since 1.8 2566 */ 2567 public static final UnicodeBlock SORA_SOMPENG = 2568 new UnicodeBlock("SORA_SOMPENG", 2569 "SORA SOMPENG", 2570 "SORASOMPENG"); 2571 2572 /** 2573 * Constant for the "Chakma" Unicode character block. 2574 * @since 1.8 2575 */ 2576 public static final UnicodeBlock CHAKMA = 2577 new UnicodeBlock("CHAKMA"); 2578 2579 /** 2580 * Constant for the "Sharada" Unicode character block. 2581 * @since 1.8 2582 */ 2583 public static final UnicodeBlock SHARADA = 2584 new UnicodeBlock("SHARADA"); 2585 2586 /** 2587 * Constant for the "Takri" Unicode character block. 2588 * @since 1.8 2589 */ 2590 public static final UnicodeBlock TAKRI = 2591 new UnicodeBlock("TAKRI"); 2592 2593 /** 2594 * Constant for the "Miao" Unicode character block. 2595 * @since 1.8 2596 */ 2597 public static final UnicodeBlock MIAO = 2598 new UnicodeBlock("MIAO"); 2599 2600 /** 2601 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2602 * character block. 2603 * @since 1.8 2604 */ 2605 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2606 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2607 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2608 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2609 2610 /** 2611 * Constant for the "Combining Diacritical Marks Extended" Unicode 2612 * character block. 2613 * @since 9 2614 */ 2615 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2616 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2617 "COMBINING DIACRITICAL MARKS EXTENDED", 2618 "COMBININGDIACRITICALMARKSEXTENDED"); 2619 2620 /** 2621 * Constant for the "Myanmar Extended-B" Unicode character block. 2622 * @since 9 2623 */ 2624 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2625 new UnicodeBlock("MYANMAR_EXTENDED_B", 2626 "MYANMAR EXTENDED-B", 2627 "MYANMAREXTENDED-B"); 2628 2629 /** 2630 * Constant for the "Latin Extended-E" Unicode character block. 2631 * @since 9 2632 */ 2633 public static final UnicodeBlock LATIN_EXTENDED_E = 2634 new UnicodeBlock("LATIN_EXTENDED_E", 2635 "LATIN EXTENDED-E", 2636 "LATINEXTENDED-E"); 2637 2638 /** 2639 * Constant for the "Coptic Epact Numbers" Unicode character block. 2640 * @since 9 2641 */ 2642 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2643 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2644 "COPTIC EPACT NUMBERS", 2645 "COPTICEPACTNUMBERS"); 2646 2647 /** 2648 * Constant for the "Old Permic" Unicode character block. 2649 * @since 9 2650 */ 2651 public static final UnicodeBlock OLD_PERMIC = 2652 new UnicodeBlock("OLD_PERMIC", 2653 "OLD PERMIC", 2654 "OLDPERMIC"); 2655 2656 /** 2657 * Constant for the "Elbasan" Unicode character block. 2658 * @since 9 2659 */ 2660 public static final UnicodeBlock ELBASAN = 2661 new UnicodeBlock("ELBASAN"); 2662 2663 /** 2664 * Constant for the "Caucasian Albanian" Unicode character block. 2665 * @since 9 2666 */ 2667 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2668 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2669 "CAUCASIAN ALBANIAN", 2670 "CAUCASIANALBANIAN"); 2671 2672 /** 2673 * Constant for the "Linear A" Unicode character block. 2674 * @since 9 2675 */ 2676 public static final UnicodeBlock LINEAR_A = 2677 new UnicodeBlock("LINEAR_A", 2678 "LINEAR A", 2679 "LINEARA"); 2680 2681 /** 2682 * Constant for the "Palmyrene" Unicode character block. 2683 * @since 9 2684 */ 2685 public static final UnicodeBlock PALMYRENE = 2686 new UnicodeBlock("PALMYRENE"); 2687 2688 /** 2689 * Constant for the "Nabataean" Unicode character block. 2690 * @since 9 2691 */ 2692 public static final UnicodeBlock NABATAEAN = 2693 new UnicodeBlock("NABATAEAN"); 2694 2695 /** 2696 * Constant for the "Old North Arabian" Unicode character block. 2697 * @since 9 2698 */ 2699 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2700 new UnicodeBlock("OLD_NORTH_ARABIAN", 2701 "OLD NORTH ARABIAN", 2702 "OLDNORTHARABIAN"); 2703 2704 /** 2705 * Constant for the "Manichaean" Unicode character block. 2706 * @since 9 2707 */ 2708 public static final UnicodeBlock MANICHAEAN = 2709 new UnicodeBlock("MANICHAEAN"); 2710 2711 /** 2712 * Constant for the "Psalter Pahlavi" Unicode character block. 2713 * @since 9 2714 */ 2715 public static final UnicodeBlock PSALTER_PAHLAVI = 2716 new UnicodeBlock("PSALTER_PAHLAVI", 2717 "PSALTER PAHLAVI", 2718 "PSALTERPAHLAVI"); 2719 2720 /** 2721 * Constant for the "Mahajani" Unicode character block. 2722 * @since 9 2723 */ 2724 public static final UnicodeBlock MAHAJANI = 2725 new UnicodeBlock("MAHAJANI"); 2726 2727 /** 2728 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2729 * @since 9 2730 */ 2731 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2732 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2733 "SINHALA ARCHAIC NUMBERS", 2734 "SINHALAARCHAICNUMBERS"); 2735 2736 /** 2737 * Constant for the "Khojki" Unicode character block. 2738 * @since 9 2739 */ 2740 public static final UnicodeBlock KHOJKI = 2741 new UnicodeBlock("KHOJKI"); 2742 2743 /** 2744 * Constant for the "Khudawadi" Unicode character block. 2745 * @since 9 2746 */ 2747 public static final UnicodeBlock KHUDAWADI = 2748 new UnicodeBlock("KHUDAWADI"); 2749 2750 /** 2751 * Constant for the "Grantha" Unicode character block. 2752 * @since 9 2753 */ 2754 public static final UnicodeBlock GRANTHA = 2755 new UnicodeBlock("GRANTHA"); 2756 2757 /** 2758 * Constant for the "Tirhuta" Unicode character block. 2759 * @since 9 2760 */ 2761 public static final UnicodeBlock TIRHUTA = 2762 new UnicodeBlock("TIRHUTA"); 2763 2764 /** 2765 * Constant for the "Siddham" Unicode character block. 2766 * @since 9 2767 */ 2768 public static final UnicodeBlock SIDDHAM = 2769 new UnicodeBlock("SIDDHAM"); 2770 2771 /** 2772 * Constant for the "Modi" Unicode character block. 2773 * @since 9 2774 */ 2775 public static final UnicodeBlock MODI = 2776 new UnicodeBlock("MODI"); 2777 2778 /** 2779 * Constant for the "Warang Citi" Unicode character block. 2780 * @since 9 2781 */ 2782 public static final UnicodeBlock WARANG_CITI = 2783 new UnicodeBlock("WARANG_CITI", 2784 "WARANG CITI", 2785 "WARANGCITI"); 2786 2787 /** 2788 * Constant for the "Pau Cin Hau" Unicode character block. 2789 * @since 9 2790 */ 2791 public static final UnicodeBlock PAU_CIN_HAU = 2792 new UnicodeBlock("PAU_CIN_HAU", 2793 "PAU CIN HAU", 2794 "PAUCINHAU"); 2795 2796 /** 2797 * Constant for the "Mro" Unicode character block. 2798 * @since 9 2799 */ 2800 public static final UnicodeBlock MRO = 2801 new UnicodeBlock("MRO"); 2802 2803 /** 2804 * Constant for the "Bassa Vah" Unicode character block. 2805 * @since 9 2806 */ 2807 public static final UnicodeBlock BASSA_VAH = 2808 new UnicodeBlock("BASSA_VAH", 2809 "BASSA VAH", 2810 "BASSAVAH"); 2811 2812 /** 2813 * Constant for the "Pahawh Hmong" Unicode character block. 2814 * @since 9 2815 */ 2816 public static final UnicodeBlock PAHAWH_HMONG = 2817 new UnicodeBlock("PAHAWH_HMONG", 2818 "PAHAWH HMONG", 2819 "PAHAWHHMONG"); 2820 2821 /** 2822 * Constant for the "Duployan" Unicode character block. 2823 * @since 9 2824 */ 2825 public static final UnicodeBlock DUPLOYAN = 2826 new UnicodeBlock("DUPLOYAN"); 2827 2828 /** 2829 * Constant for the "Shorthand Format Controls" Unicode character block. 2830 * @since 9 2831 */ 2832 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2833 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2834 "SHORTHAND FORMAT CONTROLS", 2835 "SHORTHANDFORMATCONTROLS"); 2836 2837 /** 2838 * Constant for the "Mende Kikakui" Unicode character block. 2839 * @since 9 2840 */ 2841 public static final UnicodeBlock MENDE_KIKAKUI = 2842 new UnicodeBlock("MENDE_KIKAKUI", 2843 "MENDE KIKAKUI", 2844 "MENDEKIKAKUI"); 2845 2846 /** 2847 * Constant for the "Ornamental Dingbats" Unicode character block. 2848 * @since 9 2849 */ 2850 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2851 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2852 "ORNAMENTAL DINGBATS", 2853 "ORNAMENTALDINGBATS"); 2854 2855 /** 2856 * Constant for the "Geometric Shapes Extended" Unicode character block. 2857 * @since 9 2858 */ 2859 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2860 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2861 "GEOMETRIC SHAPES EXTENDED", 2862 "GEOMETRICSHAPESEXTENDED"); 2863 2864 /** 2865 * Constant for the "Supplemental Arrows-C" Unicode character block. 2866 * @since 9 2867 */ 2868 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2869 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2870 "SUPPLEMENTAL ARROWS-C", 2871 "SUPPLEMENTALARROWS-C"); 2872 2873 /** 2874 * Constant for the "Cherokee Supplement" Unicode character block. 2875 * @since 9 2876 */ 2877 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2878 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2879 "CHEROKEE SUPPLEMENT", 2880 "CHEROKEESUPPLEMENT"); 2881 2882 /** 2883 * Constant for the "Hatran" Unicode character block. 2884 * @since 9 2885 */ 2886 public static final UnicodeBlock HATRAN = 2887 new UnicodeBlock("HATRAN"); 2888 2889 /** 2890 * Constant for the "Old Hungarian" Unicode character block. 2891 * @since 9 2892 */ 2893 public static final UnicodeBlock OLD_HUNGARIAN = 2894 new UnicodeBlock("OLD_HUNGARIAN", 2895 "OLD HUNGARIAN", 2896 "OLDHUNGARIAN"); 2897 2898 /** 2899 * Constant for the "Multani" Unicode character block. 2900 * @since 9 2901 */ 2902 public static final UnicodeBlock MULTANI = 2903 new UnicodeBlock("MULTANI"); 2904 2905 /** 2906 * Constant for the "Ahom" Unicode character block. 2907 * @since 9 2908 */ 2909 public static final UnicodeBlock AHOM = 2910 new UnicodeBlock("AHOM"); 2911 2912 /** 2913 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2914 * @since 9 2915 */ 2916 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2917 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2918 "EARLY DYNASTIC CUNEIFORM", 2919 "EARLYDYNASTICCUNEIFORM"); 2920 2921 /** 2922 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2923 * @since 9 2924 */ 2925 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2926 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2927 "ANATOLIAN HIEROGLYPHS", 2928 "ANATOLIANHIEROGLYPHS"); 2929 2930 /** 2931 * Constant for the "Sutton SignWriting" Unicode character block. 2932 * @since 9 2933 */ 2934 public static final UnicodeBlock SUTTON_SIGNWRITING = 2935 new UnicodeBlock("SUTTON_SIGNWRITING", 2936 "SUTTON SIGNWRITING", 2937 "SUTTONSIGNWRITING"); 2938 2939 /** 2940 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2941 * character block. 2942 * @since 9 2943 */ 2944 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2945 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2946 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2947 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 2948 2949 /** 2950 * Constant for the "CJK Unified Ideographs Extension E" Unicode 2951 * character block. 2952 * @since 9 2953 */ 2954 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2955 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2956 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 2957 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 2958 2959 /** 2960 * Constant for the "Syriac Supplement" Unicode 2961 * character block. 2962 * @since 11 2963 */ 2964 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2965 new UnicodeBlock("SYRIAC_SUPPLEMENT", 2966 "SYRIAC SUPPLEMENT", 2967 "SYRIACSUPPLEMENT"); 2968 2969 /** 2970 * Constant for the "Cyrillic Extended-C" Unicode 2971 * character block. 2972 * @since 11 2973 */ 2974 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2975 new UnicodeBlock("CYRILLIC_EXTENDED_C", 2976 "CYRILLIC EXTENDED-C", 2977 "CYRILLICEXTENDED-C"); 2978 2979 /** 2980 * Constant for the "Osage" Unicode 2981 * character block. 2982 * @since 11 2983 */ 2984 public static final UnicodeBlock OSAGE = 2985 new UnicodeBlock("OSAGE"); 2986 2987 /** 2988 * Constant for the "Newa" Unicode 2989 * character block. 2990 * @since 11 2991 */ 2992 public static final UnicodeBlock NEWA = 2993 new UnicodeBlock("NEWA"); 2994 2995 /** 2996 * Constant for the "Mongolian Supplement" Unicode 2997 * character block. 2998 * @since 11 2999 */ 3000 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3001 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3002 "MONGOLIAN SUPPLEMENT", 3003 "MONGOLIANSUPPLEMENT"); 3004 3005 /** 3006 * Constant for the "Marchen" Unicode 3007 * character block. 3008 * @since 11 3009 */ 3010 public static final UnicodeBlock MARCHEN = 3011 new UnicodeBlock("MARCHEN"); 3012 3013 /** 3014 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3015 * character block. 3016 * @since 11 3017 */ 3018 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3019 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3020 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3021 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3022 3023 /** 3024 * Constant for the "Tangut" Unicode 3025 * character block. 3026 * @since 11 3027 */ 3028 public static final UnicodeBlock TANGUT = 3029 new UnicodeBlock("TANGUT"); 3030 3031 /** 3032 * Constant for the "Tangut Components" Unicode 3033 * character block. 3034 * @since 11 3035 */ 3036 public static final UnicodeBlock TANGUT_COMPONENTS = 3037 new UnicodeBlock("TANGUT_COMPONENTS", 3038 "TANGUT COMPONENTS", 3039 "TANGUTCOMPONENTS"); 3040 3041 /** 3042 * Constant for the "Kana Extended-A" Unicode 3043 * character block. 3044 * @since 11 3045 */ 3046 public static final UnicodeBlock KANA_EXTENDED_A = 3047 new UnicodeBlock("KANA_EXTENDED_A", 3048 "KANA EXTENDED-A", 3049 "KANAEXTENDED-A"); 3050 /** 3051 * Constant for the "Glagolitic Supplement" Unicode 3052 * character block. 3053 * @since 11 3054 */ 3055 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3056 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3057 "GLAGOLITIC SUPPLEMENT", 3058 "GLAGOLITICSUPPLEMENT"); 3059 /** 3060 * Constant for the "Adlam" Unicode 3061 * character block. 3062 * @since 11 3063 */ 3064 public static final UnicodeBlock ADLAM = 3065 new UnicodeBlock("ADLAM"); 3066 3067 /** 3068 * Constant for the "Masaram Gondi" Unicode 3069 * character block. 3070 * @since 11 3071 */ 3072 public static final UnicodeBlock MASARAM_GONDI = 3073 new UnicodeBlock("MASARAM_GONDI", 3074 "MASARAM GONDI", 3075 "MASARAMGONDI"); 3076 3077 /** 3078 * Constant for the "Zanabazar Square" Unicode 3079 * character block. 3080 * @since 11 3081 */ 3082 public static final UnicodeBlock ZANABAZAR_SQUARE = 3083 new UnicodeBlock("ZANABAZAR_SQUARE", 3084 "ZANABAZAR SQUARE", 3085 "ZANABAZARSQUARE"); 3086 3087 /** 3088 * Constant for the "Nushu" Unicode 3089 * character block. 3090 * @since 11 3091 */ 3092 public static final UnicodeBlock NUSHU = 3093 new UnicodeBlock("NUSHU"); 3094 3095 /** 3096 * Constant for the "Soyombo" Unicode 3097 * character block. 3098 * @since 11 3099 */ 3100 public static final UnicodeBlock SOYOMBO = 3101 new UnicodeBlock("SOYOMBO"); 3102 3103 /** 3104 * Constant for the "Bhaiksuki" Unicode 3105 * character block. 3106 * @since 11 3107 */ 3108 public static final UnicodeBlock BHAIKSUKI = 3109 new UnicodeBlock("BHAIKSUKI"); 3110 3111 /** 3112 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3113 * character block. 3114 * @since 11 3115 */ 3116 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3117 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3118 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3119 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3120 /** 3121 * Constant for the "Georgian Extended" Unicode 3122 * character block. 3123 * @since 12 3124 */ 3125 public static final UnicodeBlock GEORGIAN_EXTENDED = 3126 new UnicodeBlock("GEORGIAN_EXTENDED", 3127 "GEORGIAN EXTENDED", 3128 "GEORGIANEXTENDED"); 3129 3130 /** 3131 * Constant for the "Hanifi Rohingya" Unicode 3132 * character block. 3133 * @since 12 3134 */ 3135 public static final UnicodeBlock HANIFI_ROHINGYA = 3136 new UnicodeBlock("HANIFI_ROHINGYA", 3137 "HANIFI ROHINGYA", 3138 "HANIFIROHINGYA"); 3139 3140 /** 3141 * Constant for the "Old Sogdian" Unicode 3142 * character block. 3143 * @since 12 3144 */ 3145 public static final UnicodeBlock OLD_SOGDIAN = 3146 new UnicodeBlock("OLD_SOGDIAN", 3147 "OLD SOGDIAN", 3148 "OLDSOGDIAN"); 3149 3150 /** 3151 * Constant for the "Sogdian" Unicode 3152 * character block. 3153 * @since 12 3154 */ 3155 public static final UnicodeBlock SOGDIAN = 3156 new UnicodeBlock("SOGDIAN"); 3157 3158 /** 3159 * Constant for the "Dogra" Unicode 3160 * character block. 3161 * @since 12 3162 */ 3163 public static final UnicodeBlock DOGRA = 3164 new UnicodeBlock("DOGRA"); 3165 3166 /** 3167 * Constant for the "Gunjala Gondi" Unicode 3168 * character block. 3169 * @since 12 3170 */ 3171 public static final UnicodeBlock GUNJALA_GONDI = 3172 new UnicodeBlock("GUNJALA_GONDI", 3173 "GUNJALA GONDI", 3174 "GUNJALAGONDI"); 3175 3176 /** 3177 * Constant for the "Makasar" Unicode 3178 * character block. 3179 * @since 12 3180 */ 3181 public static final UnicodeBlock MAKASAR = 3182 new UnicodeBlock("MAKASAR"); 3183 3184 /** 3185 * Constant for the "Medefaidrin" Unicode 3186 * character block. 3187 * @since 12 3188 */ 3189 public static final UnicodeBlock MEDEFAIDRIN = 3190 new UnicodeBlock("MEDEFAIDRIN"); 3191 3192 /** 3193 * Constant for the "Mayan Numerals" Unicode 3194 * character block. 3195 * @since 12 3196 */ 3197 public static final UnicodeBlock MAYAN_NUMERALS = 3198 new UnicodeBlock("MAYAN_NUMERALS", 3199 "MAYAN NUMERALS", 3200 "MAYANNUMERALS"); 3201 3202 /** 3203 * Constant for the "Indic Siyaq Numbers" Unicode 3204 * character block. 3205 * @since 12 3206 */ 3207 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3208 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3209 "INDIC SIYAQ NUMBERS", 3210 "INDICSIYAQNUMBERS"); 3211 3212 /** 3213 * Constant for the "Chess Symbols" Unicode 3214 * character block. 3215 * @since 12 3216 */ 3217 public static final UnicodeBlock CHESS_SYMBOLS = 3218 new UnicodeBlock("CHESS_SYMBOLS", 3219 "CHESS SYMBOLS", 3220 "CHESSSYMBOLS"); 3221 3222 3223 private static final int blockStarts[] = { 3224 0x0000, // 0000..007F; Basic Latin 3225 0x0080, // 0080..00FF; Latin-1 Supplement 3226 0x0100, // 0100..017F; Latin Extended-A 3227 0x0180, // 0180..024F; Latin Extended-B 3228 0x0250, // 0250..02AF; IPA Extensions 3229 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3230 0x0300, // 0300..036F; Combining Diacritical Marks 3231 0x0370, // 0370..03FF; Greek and Coptic 3232 0x0400, // 0400..04FF; Cyrillic 3233 0x0500, // 0500..052F; Cyrillic Supplement 3234 0x0530, // 0530..058F; Armenian 3235 0x0590, // 0590..05FF; Hebrew 3236 0x0600, // 0600..06FF; Arabic 3237 0x0700, // 0700..074F; Syriac 3238 0x0750, // 0750..077F; Arabic Supplement 3239 0x0780, // 0780..07BF; Thaana 3240 0x07C0, // 07C0..07FF; NKo 3241 0x0800, // 0800..083F; Samaritan 3242 0x0840, // 0840..085F; Mandaic 3243 0x0860, // 0860..086F; Syriac Supplement 3244 0x0870, // unassigned 3245 0x08A0, // 08A0..08FF; Arabic Extended-A 3246 0x0900, // 0900..097F; Devanagari 3247 0x0980, // 0980..09FF; Bengali 3248 0x0A00, // 0A00..0A7F; Gurmukhi 3249 0x0A80, // 0A80..0AFF; Gujarati 3250 0x0B00, // 0B00..0B7F; Oriya 3251 0x0B80, // 0B80..0BFF; Tamil 3252 0x0C00, // 0C00..0C7F; Telugu 3253 0x0C80, // 0C80..0CFF; Kannada 3254 0x0D00, // 0D00..0D7F; Malayalam 3255 0x0D80, // 0D80..0DFF; Sinhala 3256 0x0E00, // 0E00..0E7F; Thai 3257 0x0E80, // 0E80..0EFF; Lao 3258 0x0F00, // 0F00..0FFF; Tibetan 3259 0x1000, // 1000..109F; Myanmar 3260 0x10A0, // 10A0..10FF; Georgian 3261 0x1100, // 1100..11FF; Hangul Jamo 3262 0x1200, // 1200..137F; Ethiopic 3263 0x1380, // 1380..139F; Ethiopic Supplement 3264 0x13A0, // 13A0..13FF; Cherokee 3265 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3266 0x1680, // 1680..169F; Ogham 3267 0x16A0, // 16A0..16FF; Runic 3268 0x1700, // 1700..171F; Tagalog 3269 0x1720, // 1720..173F; Hanunoo 3270 0x1740, // 1740..175F; Buhid 3271 0x1760, // 1760..177F; Tagbanwa 3272 0x1780, // 1780..17FF; Khmer 3273 0x1800, // 1800..18AF; Mongolian 3274 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3275 0x1900, // 1900..194F; Limbu 3276 0x1950, // 1950..197F; Tai Le 3277 0x1980, // 1980..19DF; New Tai Lue 3278 0x19E0, // 19E0..19FF; Khmer Symbols 3279 0x1A00, // 1A00..1A1F; Buginese 3280 0x1A20, // 1A20..1AAF; Tai Tham 3281 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3282 0x1B00, // 1B00..1B7F; Balinese 3283 0x1B80, // 1B80..1BBF; Sundanese 3284 0x1BC0, // 1BC0..1BFF; Batak 3285 0x1C00, // 1C00..1C4F; Lepcha 3286 0x1C50, // 1C50..1C7F; Ol Chiki 3287 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3288 0x1C90, // 1C90..1CBF; Georgian Extended 3289 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3290 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3291 0x1D00, // 1D00..1D7F; Phonetic Extensions 3292 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3293 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3294 0x1E00, // 1E00..1EFF; Latin Extended Additional 3295 0x1F00, // 1F00..1FFF; Greek Extended 3296 0x2000, // 2000..206F; General Punctuation 3297 0x2070, // 2070..209F; Superscripts and Subscripts 3298 0x20A0, // 20A0..20CF; Currency Symbols 3299 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3300 0x2100, // 2100..214F; Letterlike Symbols 3301 0x2150, // 2150..218F; Number Forms 3302 0x2190, // 2190..21FF; Arrows 3303 0x2200, // 2200..22FF; Mathematical Operators 3304 0x2300, // 2300..23FF; Miscellaneous Technical 3305 0x2400, // 2400..243F; Control Pictures 3306 0x2440, // 2440..245F; Optical Character Recognition 3307 0x2460, // 2460..24FF; Enclosed Alphanumerics 3308 0x2500, // 2500..257F; Box Drawing 3309 0x2580, // 2580..259F; Block Elements 3310 0x25A0, // 25A0..25FF; Geometric Shapes 3311 0x2600, // 2600..26FF; Miscellaneous Symbols 3312 0x2700, // 2700..27BF; Dingbats 3313 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3314 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3315 0x2800, // 2800..28FF; Braille Patterns 3316 0x2900, // 2900..297F; Supplemental Arrows-B 3317 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3318 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3319 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3320 0x2C00, // 2C00..2C5F; Glagolitic 3321 0x2C60, // 2C60..2C7F; Latin Extended-C 3322 0x2C80, // 2C80..2CFF; Coptic 3323 0x2D00, // 2D00..2D2F; Georgian Supplement 3324 0x2D30, // 2D30..2D7F; Tifinagh 3325 0x2D80, // 2D80..2DDF; Ethiopic Extended 3326 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3327 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3328 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3329 0x2F00, // 2F00..2FDF; Kangxi Radicals 3330 0x2FE0, // unassigned 3331 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3332 0x3000, // 3000..303F; CJK Symbols and Punctuation 3333 0x3040, // 3040..309F; Hiragana 3334 0x30A0, // 30A0..30FF; Katakana 3335 0x3100, // 3100..312F; Bopomofo 3336 0x3130, // 3130..318F; Hangul Compatibility Jamo 3337 0x3190, // 3190..319F; Kanbun 3338 0x31A0, // 31A0..31BF; Bopomofo Extended 3339 0x31C0, // 31C0..31EF; CJK Strokes 3340 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3341 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3342 0x3300, // 3300..33FF; CJK Compatibility 3343 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3344 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3345 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3346 0xA000, // A000..A48F; Yi Syllables 3347 0xA490, // A490..A4CF; Yi Radicals 3348 0xA4D0, // A4D0..A4FF; Lisu 3349 0xA500, // A500..A63F; Vai 3350 0xA640, // A640..A69F; Cyrillic Extended-B 3351 0xA6A0, // A6A0..A6FF; Bamum 3352 0xA700, // A700..A71F; Modifier Tone Letters 3353 0xA720, // A720..A7FF; Latin Extended-D 3354 0xA800, // A800..A82F; Syloti Nagri 3355 0xA830, // A830..A83F; Common Indic Number Forms 3356 0xA840, // A840..A87F; Phags-pa 3357 0xA880, // A880..A8DF; Saurashtra 3358 0xA8E0, // A8E0..A8FF; Devanagari Extended 3359 0xA900, // A900..A92F; Kayah Li 3360 0xA930, // A930..A95F; Rejang 3361 0xA960, // A960..A97F; Hangul Jamo Extended-A 3362 0xA980, // A980..A9DF; Javanese 3363 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3364 0xAA00, // AA00..AA5F; Cham 3365 0xAA60, // AA60..AA7F; Myanmar Extended-A 3366 0xAA80, // AA80..AADF; Tai Viet 3367 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3368 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3369 0xAB30, // AB30..AB6F; Latin Extended-E 3370 0xAB70, // AB70..ABBF; Cherokee Supplement 3371 0xABC0, // ABC0..ABFF; Meetei Mayek 3372 0xAC00, // AC00..D7AF; Hangul Syllables 3373 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3374 0xD800, // D800..DB7F; High Surrogates 3375 0xDB80, // DB80..DBFF; High Private Use Surrogates 3376 0xDC00, // DC00..DFFF; Low Surrogates 3377 0xE000, // E000..F8FF; Private Use Area 3378 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3379 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3380 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3381 0xFE00, // FE00..FE0F; Variation Selectors 3382 0xFE10, // FE10..FE1F; Vertical Forms 3383 0xFE20, // FE20..FE2F; Combining Half Marks 3384 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3385 0xFE50, // FE50..FE6F; Small Form Variants 3386 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3387 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3388 0xFFF0, // FFF0..FFFF; Specials 3389 0x10000, // 10000..1007F; Linear B Syllabary 3390 0x10080, // 10080..100FF; Linear B Ideograms 3391 0x10100, // 10100..1013F; Aegean Numbers 3392 0x10140, // 10140..1018F; Ancient Greek Numbers 3393 0x10190, // 10190..101CF; Ancient Symbols 3394 0x101D0, // 101D0..101FF; Phaistos Disc 3395 0x10200, // unassigned 3396 0x10280, // 10280..1029F; Lycian 3397 0x102A0, // 102A0..102DF; Carian 3398 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3399 0x10300, // 10300..1032F; Old Italic 3400 0x10330, // 10330..1034F; Gothic 3401 0x10350, // 10350..1037F; Old Permic 3402 0x10380, // 10380..1039F; Ugaritic 3403 0x103A0, // 103A0..103DF; Old Persian 3404 0x103E0, // unassigned 3405 0x10400, // 10400..1044F; Deseret 3406 0x10450, // 10450..1047F; Shavian 3407 0x10480, // 10480..104AF; Osmanya 3408 0x104B0, // 104B0..104FF; Osage 3409 0x10500, // 10500..1052F; Elbasan 3410 0x10530, // 10530..1056F; Caucasian Albanian 3411 0x10570, // unassigned 3412 0x10600, // 10600..1077F; Linear A 3413 0x10780, // unassigned 3414 0x10800, // 10800..1083F; Cypriot Syllabary 3415 0x10840, // 10840..1085F; Imperial Aramaic 3416 0x10860, // 10860..1087F; Palmyrene 3417 0x10880, // 10880..108AF; Nabataean 3418 0x108B0, // unassigned 3419 0x108E0, // 108E0..108FF; Hatran 3420 0x10900, // 10900..1091F; Phoenician 3421 0x10920, // 10920..1093F; Lydian 3422 0x10940, // unassigned 3423 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3424 0x109A0, // 109A0..109FF; Meroitic Cursive 3425 0x10A00, // 10A00..10A5F; Kharoshthi 3426 0x10A60, // 10A60..10A7F; Old South Arabian 3427 0x10A80, // 10A80..10A9F; Old North Arabian 3428 0x10AA0, // unassigned 3429 0x10AC0, // 10AC0..10AFF; Manichaean 3430 0x10B00, // 10B00..10B3F; Avestan 3431 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3432 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3433 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3434 0x10BB0, // unassigned 3435 0x10C00, // 10C00..10C4F; Old Turkic 3436 0x10C50, // unassigned 3437 0x10C80, // 10C80..10CFF; Old Hungarian 3438 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3439 0x10D40, // unassigned 3440 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3441 0x10E80, // unassigned 3442 0x10F00, // 10F00..10F2F; Old Sogdian 3443 0x10F30, // 10F30..10F6F; Sogdian 3444 0x10F70, // unassigned 3445 0x11000, // 11000..1107F; Brahmi 3446 0x11080, // 11080..110CF; Kaithi 3447 0x110D0, // 110D0..110FF; Sora Sompeng 3448 0x11100, // 11100..1114F; Chakma 3449 0x11150, // 11150..1117F; Mahajani 3450 0x11180, // 11180..111DF; Sharada 3451 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3452 0x11200, // 11200..1124F; Khojki 3453 0x11250, // unassigned 3454 0x11280, // 11280..112AF; Multani 3455 0x112B0, // 112B0..112FF; Khudawadi 3456 0x11300, // 11300..1137F; Grantha 3457 0x11380, // unassigned 3458 0x11400, // 11400..1147F; Newa 3459 0x11480, // 11480..114DF; Tirhuta 3460 0x114E0, // unassigned 3461 0x11580, // 11580..115FF; Siddham 3462 0x11600, // 11600..1165F; Modi 3463 0x11660, // 11660..1167F; Mongolian Supplement 3464 0x11680, // 11680..116CF; Takri 3465 0x116D0, // unassigned 3466 0x11700, // 11700..1173F; Ahom 3467 0x11740, // unassigned 3468 0x11800, // 11800..1184F; Dogra 3469 0x11850, // unassigned 3470 0x118A0, // 118A0..118FF; Warang Citi 3471 0x11900, // unassigned 3472 0x11A00, // 11A00..11A4F; Zanabazar Square 3473 0x11A50, // 11A50..11AAF; Soyombo 3474 0x11AB0, // unassigned 3475 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3476 0x11B00, // unassigned 3477 0x11C00, // 11C00..11C6F; Bhaiksuki 3478 0x11C70, // 11C70..11CBF; Marchen 3479 0x11CC0, // unassigned 3480 0x11D00, // 11D00..11D5F; Masaram Gondi 3481 0x11D60, // 11D60..11DAF; Gunjala Gondi 3482 0x11DB0, // unassigned 3483 0x11EE0, // 11EE0..11EFF; Makasar 3484 0x11F00, // unassigned 3485 0x12000, // 12000..123FF; Cuneiform 3486 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3487 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3488 0x12550, // unassigned 3489 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3490 0x13430, // unassigned 3491 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3492 0x14680, // unassigned 3493 0x16800, // 16800..16A3F; Bamum Supplement 3494 0x16A40, // 16A40..16A6F; Mro 3495 0x16A70, // unassigned 3496 0x16AD0, // 16AD0..16AFF; Bassa Vah 3497 0x16B00, // 16B00..16B8F; Pahawh Hmong 3498 0x16B90, // unassigned 3499 0x16E40, // 16E40..16E9F; Medefaidrin 3500 0x16EA0, // unassigned 3501 0x16F00, // 16F00..16F9F; Miao 3502 0x16FA0, // unassigned 3503 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3504 0x17000, // 17000..187FF; Tangut 3505 0x18800, // 18800..18AFF; Tangut Components 3506 0x18B00, // unassigned 3507 0x1B000, // 1B000..1B0FF; Kana Supplement 3508 0x1B100, // 1B100..1B12F; Kana Extended-A 3509 0x1B130, // unassigned 3510 0x1B170, // 1B170..1B2FF; Nushu 3511 0x1B300, // unassigned 3512 0x1BC00, // 1BC00..1BC9F; Duployan 3513 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3514 0x1BCB0, // unassigned 3515 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3516 0x1D100, // 1D100..1D1FF; Musical Symbols 3517 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3518 0x1D250, // unassigned 3519 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3520 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3521 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3522 0x1D380, // unassigned 3523 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3524 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3525 0x1DAB0, // unassigned 3526 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3527 0x1E030, // unassigned 3528 0x1E800, // 1E800..1E8DF; Mende Kikakui 3529 0x1E8E0, // unassigned 3530 0x1E900, // 1E900..1E95F; Adlam 3531 0x1E960, // unassigned 3532 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3533 0x1ECC0, // unassigned 3534 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3535 0x1EF00, // unassigned 3536 0x1F000, // 1F000..1F02F; Mahjong Tiles 3537 0x1F030, // 1F030..1F09F; Domino Tiles 3538 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3539 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3540 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3541 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3542 0x1F600, // 1F600..1F64F; Emoticons 3543 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3544 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3545 0x1F700, // 1F700..1F77F; Alchemical Symbols 3546 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3547 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3548 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3549 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3550 0x1FA70, // unassigned 3551 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3552 0x2A6E0, // unassigned 3553 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3554 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3555 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3556 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3557 0x2EBF0, // unassigned 3558 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3559 0x2FA20, // unassigned 3560 0xE0000, // E0000..E007F; Tags 3561 0xE0080, // unassigned 3562 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3563 0xE01F0, // unassigned 3564 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3565 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 3566 }; 3567 3568 private static final UnicodeBlock[] blocks = { 3569 BASIC_LATIN, 3570 LATIN_1_SUPPLEMENT, 3571 LATIN_EXTENDED_A, 3572 LATIN_EXTENDED_B, 3573 IPA_EXTENSIONS, 3574 SPACING_MODIFIER_LETTERS, 3575 COMBINING_DIACRITICAL_MARKS, 3576 GREEK, 3577 CYRILLIC, 3578 CYRILLIC_SUPPLEMENTARY, 3579 ARMENIAN, 3580 HEBREW, 3581 ARABIC, 3582 SYRIAC, 3583 ARABIC_SUPPLEMENT, 3584 THAANA, 3585 NKO, 3586 SAMARITAN, 3587 MANDAIC, 3588 SYRIAC_SUPPLEMENT, 3589 null, 3590 ARABIC_EXTENDED_A, 3591 DEVANAGARI, 3592 BENGALI, 3593 GURMUKHI, 3594 GUJARATI, 3595 ORIYA, 3596 TAMIL, 3597 TELUGU, 3598 KANNADA, 3599 MALAYALAM, 3600 SINHALA, 3601 THAI, 3602 LAO, 3603 TIBETAN, 3604 MYANMAR, 3605 GEORGIAN, 3606 HANGUL_JAMO, 3607 ETHIOPIC, 3608 ETHIOPIC_SUPPLEMENT, 3609 CHEROKEE, 3610 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3611 OGHAM, 3612 RUNIC, 3613 TAGALOG, 3614 HANUNOO, 3615 BUHID, 3616 TAGBANWA, 3617 KHMER, 3618 MONGOLIAN, 3619 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3620 LIMBU, 3621 TAI_LE, 3622 NEW_TAI_LUE, 3623 KHMER_SYMBOLS, 3624 BUGINESE, 3625 TAI_THAM, 3626 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3627 BALINESE, 3628 SUNDANESE, 3629 BATAK, 3630 LEPCHA, 3631 OL_CHIKI, 3632 CYRILLIC_EXTENDED_C, 3633 GEORGIAN_EXTENDED, 3634 SUNDANESE_SUPPLEMENT, 3635 VEDIC_EXTENSIONS, 3636 PHONETIC_EXTENSIONS, 3637 PHONETIC_EXTENSIONS_SUPPLEMENT, 3638 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3639 LATIN_EXTENDED_ADDITIONAL, 3640 GREEK_EXTENDED, 3641 GENERAL_PUNCTUATION, 3642 SUPERSCRIPTS_AND_SUBSCRIPTS, 3643 CURRENCY_SYMBOLS, 3644 COMBINING_MARKS_FOR_SYMBOLS, 3645 LETTERLIKE_SYMBOLS, 3646 NUMBER_FORMS, 3647 ARROWS, 3648 MATHEMATICAL_OPERATORS, 3649 MISCELLANEOUS_TECHNICAL, 3650 CONTROL_PICTURES, 3651 OPTICAL_CHARACTER_RECOGNITION, 3652 ENCLOSED_ALPHANUMERICS, 3653 BOX_DRAWING, 3654 BLOCK_ELEMENTS, 3655 GEOMETRIC_SHAPES, 3656 MISCELLANEOUS_SYMBOLS, 3657 DINGBATS, 3658 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3659 SUPPLEMENTAL_ARROWS_A, 3660 BRAILLE_PATTERNS, 3661 SUPPLEMENTAL_ARROWS_B, 3662 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3663 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3664 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3665 GLAGOLITIC, 3666 LATIN_EXTENDED_C, 3667 COPTIC, 3668 GEORGIAN_SUPPLEMENT, 3669 TIFINAGH, 3670 ETHIOPIC_EXTENDED, 3671 CYRILLIC_EXTENDED_A, 3672 SUPPLEMENTAL_PUNCTUATION, 3673 CJK_RADICALS_SUPPLEMENT, 3674 KANGXI_RADICALS, 3675 null, 3676 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3677 CJK_SYMBOLS_AND_PUNCTUATION, 3678 HIRAGANA, 3679 KATAKANA, 3680 BOPOMOFO, 3681 HANGUL_COMPATIBILITY_JAMO, 3682 KANBUN, 3683 BOPOMOFO_EXTENDED, 3684 CJK_STROKES, 3685 KATAKANA_PHONETIC_EXTENSIONS, 3686 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3687 CJK_COMPATIBILITY, 3688 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3689 YIJING_HEXAGRAM_SYMBOLS, 3690 CJK_UNIFIED_IDEOGRAPHS, 3691 YI_SYLLABLES, 3692 YI_RADICALS, 3693 LISU, 3694 VAI, 3695 CYRILLIC_EXTENDED_B, 3696 BAMUM, 3697 MODIFIER_TONE_LETTERS, 3698 LATIN_EXTENDED_D, 3699 SYLOTI_NAGRI, 3700 COMMON_INDIC_NUMBER_FORMS, 3701 PHAGS_PA, 3702 SAURASHTRA, 3703 DEVANAGARI_EXTENDED, 3704 KAYAH_LI, 3705 REJANG, 3706 HANGUL_JAMO_EXTENDED_A, 3707 JAVANESE, 3708 MYANMAR_EXTENDED_B, 3709 CHAM, 3710 MYANMAR_EXTENDED_A, 3711 TAI_VIET, 3712 MEETEI_MAYEK_EXTENSIONS, 3713 ETHIOPIC_EXTENDED_A, 3714 LATIN_EXTENDED_E, 3715 CHEROKEE_SUPPLEMENT, 3716 MEETEI_MAYEK, 3717 HANGUL_SYLLABLES, 3718 HANGUL_JAMO_EXTENDED_B, 3719 HIGH_SURROGATES, 3720 HIGH_PRIVATE_USE_SURROGATES, 3721 LOW_SURROGATES, 3722 PRIVATE_USE_AREA, 3723 CJK_COMPATIBILITY_IDEOGRAPHS, 3724 ALPHABETIC_PRESENTATION_FORMS, 3725 ARABIC_PRESENTATION_FORMS_A, 3726 VARIATION_SELECTORS, 3727 VERTICAL_FORMS, 3728 COMBINING_HALF_MARKS, 3729 CJK_COMPATIBILITY_FORMS, 3730 SMALL_FORM_VARIANTS, 3731 ARABIC_PRESENTATION_FORMS_B, 3732 HALFWIDTH_AND_FULLWIDTH_FORMS, 3733 SPECIALS, 3734 LINEAR_B_SYLLABARY, 3735 LINEAR_B_IDEOGRAMS, 3736 AEGEAN_NUMBERS, 3737 ANCIENT_GREEK_NUMBERS, 3738 ANCIENT_SYMBOLS, 3739 PHAISTOS_DISC, 3740 null, 3741 LYCIAN, 3742 CARIAN, 3743 COPTIC_EPACT_NUMBERS, 3744 OLD_ITALIC, 3745 GOTHIC, 3746 OLD_PERMIC, 3747 UGARITIC, 3748 OLD_PERSIAN, 3749 null, 3750 DESERET, 3751 SHAVIAN, 3752 OSMANYA, 3753 OSAGE, 3754 ELBASAN, 3755 CAUCASIAN_ALBANIAN, 3756 null, 3757 LINEAR_A, 3758 null, 3759 CYPRIOT_SYLLABARY, 3760 IMPERIAL_ARAMAIC, 3761 PALMYRENE, 3762 NABATAEAN, 3763 null, 3764 HATRAN, 3765 PHOENICIAN, 3766 LYDIAN, 3767 null, 3768 MEROITIC_HIEROGLYPHS, 3769 MEROITIC_CURSIVE, 3770 KHAROSHTHI, 3771 OLD_SOUTH_ARABIAN, 3772 OLD_NORTH_ARABIAN, 3773 null, 3774 MANICHAEAN, 3775 AVESTAN, 3776 INSCRIPTIONAL_PARTHIAN, 3777 INSCRIPTIONAL_PAHLAVI, 3778 PSALTER_PAHLAVI, 3779 null, 3780 OLD_TURKIC, 3781 null, 3782 OLD_HUNGARIAN, 3783 HANIFI_ROHINGYA, 3784 null, 3785 RUMI_NUMERAL_SYMBOLS, 3786 null, 3787 OLD_SOGDIAN, 3788 SOGDIAN, 3789 null, 3790 BRAHMI, 3791 KAITHI, 3792 SORA_SOMPENG, 3793 CHAKMA, 3794 MAHAJANI, 3795 SHARADA, 3796 SINHALA_ARCHAIC_NUMBERS, 3797 KHOJKI, 3798 null, 3799 MULTANI, 3800 KHUDAWADI, 3801 GRANTHA, 3802 null, 3803 NEWA, 3804 TIRHUTA, 3805 null, 3806 SIDDHAM, 3807 MODI, 3808 MONGOLIAN_SUPPLEMENT, 3809 TAKRI, 3810 null, 3811 AHOM, 3812 null, 3813 DOGRA, 3814 null, 3815 WARANG_CITI, 3816 null, 3817 ZANABAZAR_SQUARE, 3818 SOYOMBO, 3819 null, 3820 PAU_CIN_HAU, 3821 null, 3822 BHAIKSUKI, 3823 MARCHEN, 3824 null, 3825 MASARAM_GONDI, 3826 GUNJALA_GONDI, 3827 null, 3828 MAKASAR, 3829 null, 3830 CUNEIFORM, 3831 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3832 EARLY_DYNASTIC_CUNEIFORM, 3833 null, 3834 EGYPTIAN_HIEROGLYPHS, 3835 null, 3836 ANATOLIAN_HIEROGLYPHS, 3837 null, 3838 BAMUM_SUPPLEMENT, 3839 MRO, 3840 null, 3841 BASSA_VAH, 3842 PAHAWH_HMONG, 3843 null, 3844 MEDEFAIDRIN, 3845 null, 3846 MIAO, 3847 null, 3848 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 3849 TANGUT, 3850 TANGUT_COMPONENTS, 3851 null, 3852 KANA_SUPPLEMENT, 3853 KANA_EXTENDED_A, 3854 null, 3855 NUSHU, 3856 null, 3857 DUPLOYAN, 3858 SHORTHAND_FORMAT_CONTROLS, 3859 null, 3860 BYZANTINE_MUSICAL_SYMBOLS, 3861 MUSICAL_SYMBOLS, 3862 ANCIENT_GREEK_MUSICAL_NOTATION, 3863 null, 3864 MAYAN_NUMERALS, 3865 TAI_XUAN_JING_SYMBOLS, 3866 COUNTING_ROD_NUMERALS, 3867 null, 3868 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3869 SUTTON_SIGNWRITING, 3870 null, 3871 GLAGOLITIC_SUPPLEMENT, 3872 null, 3873 MENDE_KIKAKUI, 3874 null, 3875 ADLAM, 3876 null, 3877 INDIC_SIYAQ_NUMBERS, 3878 null, 3879 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3880 null, 3881 MAHJONG_TILES, 3882 DOMINO_TILES, 3883 PLAYING_CARDS, 3884 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3885 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3886 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3887 EMOTICONS, 3888 ORNAMENTAL_DINGBATS, 3889 TRANSPORT_AND_MAP_SYMBOLS, 3890 ALCHEMICAL_SYMBOLS, 3891 GEOMETRIC_SHAPES_EXTENDED, 3892 SUPPLEMENTAL_ARROWS_C, 3893 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 3894 CHESS_SYMBOLS, 3895 null, 3896 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3897 null, 3898 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3899 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3900 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 3901 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 3902 null, 3903 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3904 null, 3905 TAGS, 3906 null, 3907 VARIATION_SELECTORS_SUPPLEMENT, 3908 null, 3909 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3910 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3911 }; 3912 3913 3914 /** 3915 * Returns the object representing the Unicode block containing the 3916 * given character, or {@code null} if the character is not a 3917 * member of a defined block. 3918 * 3919 * <p><b>Note:</b> This method cannot handle 3920 * <a href="Character.html#supplementary"> supplementary 3921 * characters</a>. To support all Unicode characters, including 3922 * supplementary characters, use the {@link #of(int)} method. 3923 * 3924 * @param c The character in question 3925 * @return The {@code UnicodeBlock} instance representing the 3926 * Unicode block of which this character is a member, or 3927 * {@code null} if the character is not a member of any 3928 * Unicode block 3929 */ 3930 public static UnicodeBlock of(char c) { 3931 return of((int)c); 3932 } 3933 3934 /** 3935 * Returns the object representing the Unicode block 3936 * containing the given character (Unicode code point), or 3937 * {@code null} if the character is not a member of a 3938 * defined block. 3939 * 3940 * @param codePoint the character (Unicode code point) in question. 3941 * @return The {@code UnicodeBlock} instance representing the 3942 * Unicode block of which this character is a member, or 3943 * {@code null} if the character is not a member of any 3944 * Unicode block 3945 * @throws IllegalArgumentException if the specified 3946 * {@code codePoint} is an invalid Unicode code point. 3947 * @see Character#isValidCodePoint(int) 3948 * @since 1.5 3949 */ 3950 public static UnicodeBlock of(int codePoint) { 3951 if (!isValidCodePoint(codePoint)) { 3952 throw new IllegalArgumentException( 3953 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 3954 } 3955 3956 int top, bottom, current; 3957 bottom = 0; 3958 top = blockStarts.length; 3959 current = top/2; 3960 3961 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3962 while (top - bottom > 1) { 3963 if (codePoint >= blockStarts[current]) { 3964 bottom = current; 3965 } else { 3966 top = current; 3967 } 3968 current = (top + bottom) / 2; 3969 } 3970 return blocks[current]; 3971 } 3972 3973 /** 3974 * Returns the UnicodeBlock with the given name. Block 3975 * names are determined by The Unicode Standard. The file 3976 * {@code Blocks-<version>.txt} defines blocks for a particular 3977 * version of the standard. The {@link Character} class specifies 3978 * the version of the standard that it supports. 3979 * <p> 3980 * This method accepts block names in the following forms: 3981 * <ol> 3982 * <li> Canonical block names as defined by the Unicode Standard. 3983 * For example, the standard defines a "Basic Latin" block. Therefore, this 3984 * method accepts "Basic Latin" as a valid block name. The documentation of 3985 * each UnicodeBlock provides the canonical name. 3986 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3987 * is a valid block name for the "Basic Latin" block. 3988 * <li>The text representation of each constant UnicodeBlock identifier. 3989 * For example, this method will return the {@link #BASIC_LATIN} block if 3990 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3991 * hyphens in the canonical name with underscores. 3992 * </ol> 3993 * Finally, character case is ignored for all of the valid block name forms. 3994 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3995 * The en_US locale's case mapping rules are used to provide case-insensitive 3996 * string comparisons for block name validation. 3997 * <p> 3998 * If the Unicode Standard changes block names, both the previous and 3999 * current names will be accepted. 4000 * 4001 * @param blockName A {@code UnicodeBlock} name. 4002 * @return The {@code UnicodeBlock} instance identified 4003 * by {@code blockName} 4004 * @throws IllegalArgumentException if {@code blockName} is an 4005 * invalid name 4006 * @throws NullPointerException if {@code blockName} is null 4007 * @since 1.5 4008 */ 4009 public static final UnicodeBlock forName(String blockName) { 4010 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4011 if (block == null) { 4012 throw new IllegalArgumentException("Not a valid block name: " 4013 + blockName); 4014 } 4015 return block; 4016 } 4017 } 4018 4019 4020 /** 4021 * A family of character subsets representing the character scripts 4022 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4023 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4024 * character is assigned to a single Unicode script, either a specific 4025 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4026 * one of the following three special values, 4027 * {@link Character.UnicodeScript#INHERITED Inherited}, 4028 * {@link Character.UnicodeScript#COMMON Common} or 4029 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4030 * 4031 * @since 1.7 4032 */ 4033 public static enum UnicodeScript { 4034 /** 4035 * Unicode script "Common". 4036 */ 4037 COMMON, 4038 4039 /** 4040 * Unicode script "Latin". 4041 */ 4042 LATIN, 4043 4044 /** 4045 * Unicode script "Greek". 4046 */ 4047 GREEK, 4048 4049 /** 4050 * Unicode script "Cyrillic". 4051 */ 4052 CYRILLIC, 4053 4054 /** 4055 * Unicode script "Armenian". 4056 */ 4057 ARMENIAN, 4058 4059 /** 4060 * Unicode script "Hebrew". 4061 */ 4062 HEBREW, 4063 4064 /** 4065 * Unicode script "Arabic". 4066 */ 4067 ARABIC, 4068 4069 /** 4070 * Unicode script "Syriac". 4071 */ 4072 SYRIAC, 4073 4074 /** 4075 * Unicode script "Thaana". 4076 */ 4077 THAANA, 4078 4079 /** 4080 * Unicode script "Devanagari". 4081 */ 4082 DEVANAGARI, 4083 4084 /** 4085 * Unicode script "Bengali". 4086 */ 4087 BENGALI, 4088 4089 /** 4090 * Unicode script "Gurmukhi". 4091 */ 4092 GURMUKHI, 4093 4094 /** 4095 * Unicode script "Gujarati". 4096 */ 4097 GUJARATI, 4098 4099 /** 4100 * Unicode script "Oriya". 4101 */ 4102 ORIYA, 4103 4104 /** 4105 * Unicode script "Tamil". 4106 */ 4107 TAMIL, 4108 4109 /** 4110 * Unicode script "Telugu". 4111 */ 4112 TELUGU, 4113 4114 /** 4115 * Unicode script "Kannada". 4116 */ 4117 KANNADA, 4118 4119 /** 4120 * Unicode script "Malayalam". 4121 */ 4122 MALAYALAM, 4123 4124 /** 4125 * Unicode script "Sinhala". 4126 */ 4127 SINHALA, 4128 4129 /** 4130 * Unicode script "Thai". 4131 */ 4132 THAI, 4133 4134 /** 4135 * Unicode script "Lao". 4136 */ 4137 LAO, 4138 4139 /** 4140 * Unicode script "Tibetan". 4141 */ 4142 TIBETAN, 4143 4144 /** 4145 * Unicode script "Myanmar". 4146 */ 4147 MYANMAR, 4148 4149 /** 4150 * Unicode script "Georgian". 4151 */ 4152 GEORGIAN, 4153 4154 /** 4155 * Unicode script "Hangul". 4156 */ 4157 HANGUL, 4158 4159 /** 4160 * Unicode script "Ethiopic". 4161 */ 4162 ETHIOPIC, 4163 4164 /** 4165 * Unicode script "Cherokee". 4166 */ 4167 CHEROKEE, 4168 4169 /** 4170 * Unicode script "Canadian_Aboriginal". 4171 */ 4172 CANADIAN_ABORIGINAL, 4173 4174 /** 4175 * Unicode script "Ogham". 4176 */ 4177 OGHAM, 4178 4179 /** 4180 * Unicode script "Runic". 4181 */ 4182 RUNIC, 4183 4184 /** 4185 * Unicode script "Khmer". 4186 */ 4187 KHMER, 4188 4189 /** 4190 * Unicode script "Mongolian". 4191 */ 4192 MONGOLIAN, 4193 4194 /** 4195 * Unicode script "Hiragana". 4196 */ 4197 HIRAGANA, 4198 4199 /** 4200 * Unicode script "Katakana". 4201 */ 4202 KATAKANA, 4203 4204 /** 4205 * Unicode script "Bopomofo". 4206 */ 4207 BOPOMOFO, 4208 4209 /** 4210 * Unicode script "Han". 4211 */ 4212 HAN, 4213 4214 /** 4215 * Unicode script "Yi". 4216 */ 4217 YI, 4218 4219 /** 4220 * Unicode script "Old_Italic". 4221 */ 4222 OLD_ITALIC, 4223 4224 /** 4225 * Unicode script "Gothic". 4226 */ 4227 GOTHIC, 4228 4229 /** 4230 * Unicode script "Deseret". 4231 */ 4232 DESERET, 4233 4234 /** 4235 * Unicode script "Inherited". 4236 */ 4237 INHERITED, 4238 4239 /** 4240 * Unicode script "Tagalog". 4241 */ 4242 TAGALOG, 4243 4244 /** 4245 * Unicode script "Hanunoo". 4246 */ 4247 HANUNOO, 4248 4249 /** 4250 * Unicode script "Buhid". 4251 */ 4252 BUHID, 4253 4254 /** 4255 * Unicode script "Tagbanwa". 4256 */ 4257 TAGBANWA, 4258 4259 /** 4260 * Unicode script "Limbu". 4261 */ 4262 LIMBU, 4263 4264 /** 4265 * Unicode script "Tai_Le". 4266 */ 4267 TAI_LE, 4268 4269 /** 4270 * Unicode script "Linear_B". 4271 */ 4272 LINEAR_B, 4273 4274 /** 4275 * Unicode script "Ugaritic". 4276 */ 4277 UGARITIC, 4278 4279 /** 4280 * Unicode script "Shavian". 4281 */ 4282 SHAVIAN, 4283 4284 /** 4285 * Unicode script "Osmanya". 4286 */ 4287 OSMANYA, 4288 4289 /** 4290 * Unicode script "Cypriot". 4291 */ 4292 CYPRIOT, 4293 4294 /** 4295 * Unicode script "Braille". 4296 */ 4297 BRAILLE, 4298 4299 /** 4300 * Unicode script "Buginese". 4301 */ 4302 BUGINESE, 4303 4304 /** 4305 * Unicode script "Coptic". 4306 */ 4307 COPTIC, 4308 4309 /** 4310 * Unicode script "New_Tai_Lue". 4311 */ 4312 NEW_TAI_LUE, 4313 4314 /** 4315 * Unicode script "Glagolitic". 4316 */ 4317 GLAGOLITIC, 4318 4319 /** 4320 * Unicode script "Tifinagh". 4321 */ 4322 TIFINAGH, 4323 4324 /** 4325 * Unicode script "Syloti_Nagri". 4326 */ 4327 SYLOTI_NAGRI, 4328 4329 /** 4330 * Unicode script "Old_Persian". 4331 */ 4332 OLD_PERSIAN, 4333 4334 /** 4335 * Unicode script "Kharoshthi". 4336 */ 4337 KHAROSHTHI, 4338 4339 /** 4340 * Unicode script "Balinese". 4341 */ 4342 BALINESE, 4343 4344 /** 4345 * Unicode script "Cuneiform". 4346 */ 4347 CUNEIFORM, 4348 4349 /** 4350 * Unicode script "Phoenician". 4351 */ 4352 PHOENICIAN, 4353 4354 /** 4355 * Unicode script "Phags_Pa". 4356 */ 4357 PHAGS_PA, 4358 4359 /** 4360 * Unicode script "Nko". 4361 */ 4362 NKO, 4363 4364 /** 4365 * Unicode script "Sundanese". 4366 */ 4367 SUNDANESE, 4368 4369 /** 4370 * Unicode script "Batak". 4371 */ 4372 BATAK, 4373 4374 /** 4375 * Unicode script "Lepcha". 4376 */ 4377 LEPCHA, 4378 4379 /** 4380 * Unicode script "Ol_Chiki". 4381 */ 4382 OL_CHIKI, 4383 4384 /** 4385 * Unicode script "Vai". 4386 */ 4387 VAI, 4388 4389 /** 4390 * Unicode script "Saurashtra". 4391 */ 4392 SAURASHTRA, 4393 4394 /** 4395 * Unicode script "Kayah_Li". 4396 */ 4397 KAYAH_LI, 4398 4399 /** 4400 * Unicode script "Rejang". 4401 */ 4402 REJANG, 4403 4404 /** 4405 * Unicode script "Lycian". 4406 */ 4407 LYCIAN, 4408 4409 /** 4410 * Unicode script "Carian". 4411 */ 4412 CARIAN, 4413 4414 /** 4415 * Unicode script "Lydian". 4416 */ 4417 LYDIAN, 4418 4419 /** 4420 * Unicode script "Cham". 4421 */ 4422 CHAM, 4423 4424 /** 4425 * Unicode script "Tai_Tham". 4426 */ 4427 TAI_THAM, 4428 4429 /** 4430 * Unicode script "Tai_Viet". 4431 */ 4432 TAI_VIET, 4433 4434 /** 4435 * Unicode script "Avestan". 4436 */ 4437 AVESTAN, 4438 4439 /** 4440 * Unicode script "Egyptian_Hieroglyphs". 4441 */ 4442 EGYPTIAN_HIEROGLYPHS, 4443 4444 /** 4445 * Unicode script "Samaritan". 4446 */ 4447 SAMARITAN, 4448 4449 /** 4450 * Unicode script "Mandaic". 4451 */ 4452 MANDAIC, 4453 4454 /** 4455 * Unicode script "Lisu". 4456 */ 4457 LISU, 4458 4459 /** 4460 * Unicode script "Bamum". 4461 */ 4462 BAMUM, 4463 4464 /** 4465 * Unicode script "Javanese". 4466 */ 4467 JAVANESE, 4468 4469 /** 4470 * Unicode script "Meetei_Mayek". 4471 */ 4472 MEETEI_MAYEK, 4473 4474 /** 4475 * Unicode script "Imperial_Aramaic". 4476 */ 4477 IMPERIAL_ARAMAIC, 4478 4479 /** 4480 * Unicode script "Old_South_Arabian". 4481 */ 4482 OLD_SOUTH_ARABIAN, 4483 4484 /** 4485 * Unicode script "Inscriptional_Parthian". 4486 */ 4487 INSCRIPTIONAL_PARTHIAN, 4488 4489 /** 4490 * Unicode script "Inscriptional_Pahlavi". 4491 */ 4492 INSCRIPTIONAL_PAHLAVI, 4493 4494 /** 4495 * Unicode script "Old_Turkic". 4496 */ 4497 OLD_TURKIC, 4498 4499 /** 4500 * Unicode script "Brahmi". 4501 */ 4502 BRAHMI, 4503 4504 /** 4505 * Unicode script "Kaithi". 4506 */ 4507 KAITHI, 4508 4509 /** 4510 * Unicode script "Meroitic Hieroglyphs". 4511 * @since 1.8 4512 */ 4513 MEROITIC_HIEROGLYPHS, 4514 4515 /** 4516 * Unicode script "Meroitic Cursive". 4517 * @since 1.8 4518 */ 4519 MEROITIC_CURSIVE, 4520 4521 /** 4522 * Unicode script "Sora Sompeng". 4523 * @since 1.8 4524 */ 4525 SORA_SOMPENG, 4526 4527 /** 4528 * Unicode script "Chakma". 4529 * @since 1.8 4530 */ 4531 CHAKMA, 4532 4533 /** 4534 * Unicode script "Sharada". 4535 * @since 1.8 4536 */ 4537 SHARADA, 4538 4539 /** 4540 * Unicode script "Takri". 4541 * @since 1.8 4542 */ 4543 TAKRI, 4544 4545 /** 4546 * Unicode script "Miao". 4547 * @since 1.8 4548 */ 4549 MIAO, 4550 4551 /** 4552 * Unicode script "Caucasian Albanian". 4553 * @since 9 4554 */ 4555 CAUCASIAN_ALBANIAN, 4556 4557 /** 4558 * Unicode script "Bassa Vah". 4559 * @since 9 4560 */ 4561 BASSA_VAH, 4562 4563 /** 4564 * Unicode script "Duployan". 4565 * @since 9 4566 */ 4567 DUPLOYAN, 4568 4569 /** 4570 * Unicode script "Elbasan". 4571 * @since 9 4572 */ 4573 ELBASAN, 4574 4575 /** 4576 * Unicode script "Grantha". 4577 * @since 9 4578 */ 4579 GRANTHA, 4580 4581 /** 4582 * Unicode script "Pahawh Hmong". 4583 * @since 9 4584 */ 4585 PAHAWH_HMONG, 4586 4587 /** 4588 * Unicode script "Khojki". 4589 * @since 9 4590 */ 4591 KHOJKI, 4592 4593 /** 4594 * Unicode script "Linear A". 4595 * @since 9 4596 */ 4597 LINEAR_A, 4598 4599 /** 4600 * Unicode script "Mahajani". 4601 * @since 9 4602 */ 4603 MAHAJANI, 4604 4605 /** 4606 * Unicode script "Manichaean". 4607 * @since 9 4608 */ 4609 MANICHAEAN, 4610 4611 /** 4612 * Unicode script "Mende Kikakui". 4613 * @since 9 4614 */ 4615 MENDE_KIKAKUI, 4616 4617 /** 4618 * Unicode script "Modi". 4619 * @since 9 4620 */ 4621 MODI, 4622 4623 /** 4624 * Unicode script "Mro". 4625 * @since 9 4626 */ 4627 MRO, 4628 4629 /** 4630 * Unicode script "Old North Arabian". 4631 * @since 9 4632 */ 4633 OLD_NORTH_ARABIAN, 4634 4635 /** 4636 * Unicode script "Nabataean". 4637 * @since 9 4638 */ 4639 NABATAEAN, 4640 4641 /** 4642 * Unicode script "Palmyrene". 4643 * @since 9 4644 */ 4645 PALMYRENE, 4646 4647 /** 4648 * Unicode script "Pau Cin Hau". 4649 * @since 9 4650 */ 4651 PAU_CIN_HAU, 4652 4653 /** 4654 * Unicode script "Old Permic". 4655 * @since 9 4656 */ 4657 OLD_PERMIC, 4658 4659 /** 4660 * Unicode script "Psalter Pahlavi". 4661 * @since 9 4662 */ 4663 PSALTER_PAHLAVI, 4664 4665 /** 4666 * Unicode script "Siddham". 4667 * @since 9 4668 */ 4669 SIDDHAM, 4670 4671 /** 4672 * Unicode script "Khudawadi". 4673 * @since 9 4674 */ 4675 KHUDAWADI, 4676 4677 /** 4678 * Unicode script "Tirhuta". 4679 * @since 9 4680 */ 4681 TIRHUTA, 4682 4683 /** 4684 * Unicode script "Warang Citi". 4685 * @since 9 4686 */ 4687 WARANG_CITI, 4688 4689 /** 4690 * Unicode script "Ahom". 4691 * @since 9 4692 */ 4693 AHOM, 4694 4695 /** 4696 * Unicode script "Anatolian Hieroglyphs". 4697 * @since 9 4698 */ 4699 ANATOLIAN_HIEROGLYPHS, 4700 4701 /** 4702 * Unicode script "Hatran". 4703 * @since 9 4704 */ 4705 HATRAN, 4706 4707 /** 4708 * Unicode script "Multani". 4709 * @since 9 4710 */ 4711 MULTANI, 4712 4713 /** 4714 * Unicode script "Old Hungarian". 4715 * @since 9 4716 */ 4717 OLD_HUNGARIAN, 4718 4719 /** 4720 * Unicode script "SignWriting". 4721 * @since 9 4722 */ 4723 SIGNWRITING, 4724 4725 /** 4726 * Unicode script "Adlam". 4727 * @since 11 4728 */ 4729 ADLAM, 4730 4731 /** 4732 * Unicode script "Bhaiksuki". 4733 * @since 11 4734 */ 4735 BHAIKSUKI, 4736 4737 /** 4738 * Unicode script "Marchen". 4739 * @since 11 4740 */ 4741 MARCHEN, 4742 4743 /** 4744 * Unicode script "Newa". 4745 * @since 11 4746 */ 4747 NEWA, 4748 4749 /** 4750 * Unicode script "Osage". 4751 * @since 11 4752 */ 4753 OSAGE, 4754 4755 /** 4756 * Unicode script "Tangut". 4757 * @since 11 4758 */ 4759 TANGUT, 4760 4761 /** 4762 * Unicode script "Masaram Gondi". 4763 * @since 11 4764 */ 4765 MASARAM_GONDI, 4766 4767 /** 4768 * Unicode script "Nushu". 4769 * @since 11 4770 */ 4771 NUSHU, 4772 4773 /** 4774 * Unicode script "Soyombo". 4775 * @since 11 4776 */ 4777 SOYOMBO, 4778 4779 /** 4780 * Unicode script "Zanabazar Square". 4781 * @since 11 4782 */ 4783 ZANABAZAR_SQUARE, 4784 4785 /** 4786 * Unicode script "Hanifi Rohingya". 4787 * @since 12 4788 */ 4789 HANIFI_ROHINGYA, 4790 4791 /** 4792 * Unicode script "Old Sogdian". 4793 * @since 12 4794 */ 4795 OLD_SOGDIAN, 4796 4797 /** 4798 * Unicode script "Sogdian". 4799 * @since 12 4800 */ 4801 SOGDIAN, 4802 4803 /** 4804 * Unicode script "Dogra". 4805 * @since 12 4806 */ 4807 DOGRA, 4808 4809 /** 4810 * Unicode script "Gunjala Gondi". 4811 * @since 12 4812 */ 4813 GUNJALA_GONDI, 4814 4815 /** 4816 * Unicode script "Makasar". 4817 * @since 12 4818 */ 4819 MAKASAR, 4820 4821 /** 4822 * Unicode script "Medefaidrin". 4823 * @since 12 4824 */ 4825 MEDEFAIDRIN, 4826 4827 /** 4828 * Unicode script "Unknown". 4829 */ 4830 UNKNOWN; 4831 4832 private static final int[] scriptStarts = { 4833 0x0000, // 0000..0040; COMMON 4834 0x0041, // 0041..005A; LATIN 4835 0x005B, // 005B..0060; COMMON 4836 0x0061, // 0061..007A; LATIN 4837 0x007B, // 007B..00A9; COMMON 4838 0x00AA, // 00AA ; LATIN 4839 0x00AB, // 00AB..00B9; COMMON 4840 0x00BA, // 00BA ; LATIN 4841 0x00BB, // 00BB..00BF; COMMON 4842 0x00C0, // 00C0..00D6; LATIN 4843 0x00D7, // 00D7 ; COMMON 4844 0x00D8, // 00D8..00F6; LATIN 4845 0x00F7, // 00F7 ; COMMON 4846 0x00F8, // 00F8..02B8; LATIN 4847 0x02B9, // 02B9..02DF; COMMON 4848 0x02E0, // 02E0..02E4; LATIN 4849 0x02E5, // 02E5..02E9; COMMON 4850 0x02EA, // 02EA..02EB; BOPOMOFO 4851 0x02EC, // 02EC..02FF; COMMON 4852 0x0300, // 0300..036F; INHERITED 4853 0x0370, // 0370..0373; GREEK 4854 0x0374, // 0374 ; COMMON 4855 0x0375, // 0375..0377; GREEK 4856 0x0378, // 0378..0379; UNKNOWN 4857 0x037A, // 037A..037D; GREEK 4858 0x037E, // 037E ; COMMON 4859 0x037F, // 037F ; GREEK 4860 0x0380, // 0380..0383; UNKNOWN 4861 0x0384, // 0384 ; GREEK 4862 0x0385, // 0385 ; COMMON 4863 0x0386, // 0386 ; GREEK 4864 0x0387, // 0387 ; COMMON 4865 0x0388, // 0388..038A; GREEK 4866 0x038B, // 038B ; UNKNOWN 4867 0x038C, // 038C ; GREEK 4868 0x038D, // 038D ; UNKNOWN 4869 0x038E, // 038E..03A1; GREEK 4870 0x03A2, // 03A2 ; UNKNOWN 4871 0x03A3, // 03A3..03E1; GREEK 4872 0x03E2, // 03E2..03EF; COPTIC 4873 0x03F0, // 03F0..03FF; GREEK 4874 0x0400, // 0400..0484; CYRILLIC 4875 0x0485, // 0485..0486; INHERITED 4876 0x0487, // 0487..052F; CYRILLIC 4877 0x0530, // 0530 ; UNKNOWN 4878 0x0531, // 0531..0556; ARMENIAN 4879 0x0557, // 0557..0558; UNKNOWN 4880 0x0559, // 0559..0588; ARMENIAN 4881 0x0589, // 0589 ; COMMON 4882 0x058A, // 058A ; ARMENIAN 4883 0x058B, // 058B..058C; UNKNOWN 4884 0x058D, // 058D..058F; ARMENIAN 4885 0x0590, // 0590 ; UNKNOWN 4886 0x0591, // 0591..05C7; HEBREW 4887 0x05C8, // 05C8..05CF; UNKNOWN 4888 0x05D0, // 05D0..05EA; HEBREW 4889 0x05EB, // 05EB..05EE; UNKNOWN 4890 0x05EF, // 05EF..05F4; HEBREW 4891 0x05F5, // 05F5..05FF; UNKNOWN 4892 0x0600, // 0600..0604; ARABIC 4893 0x0605, // 0605 ; COMMON 4894 0x0606, // 0606..060B; ARABIC 4895 0x060C, // 060C ; COMMON 4896 0x060D, // 060D..061A; ARABIC 4897 0x061B, // 061B ; COMMON 4898 0x061C, // 061C ; ARABIC 4899 0x061D, // 061D ; UNKNOWN 4900 0x061E, // 061E ; ARABIC 4901 0x061F, // 061F ; COMMON 4902 0x0620, // 0620..063F; ARABIC 4903 0x0640, // 0640 ; COMMON 4904 0x0641, // 0641..064A; ARABIC 4905 0x064B, // 064B..0655; INHERITED 4906 0x0656, // 0656..066F; ARABIC 4907 0x0670, // 0670 ; INHERITED 4908 0x0671, // 0671..06DC; ARABIC 4909 0x06DD, // 06DD ; COMMON 4910 0x06DE, // 06DE..06FF; ARABIC 4911 0x0700, // 0700..070D; SYRIAC 4912 0x070E, // 070E ; UNKNOWN 4913 0x070F, // 070F..074A; SYRIAC 4914 0x074B, // 074B..074C; UNKNOWN 4915 0x074D, // 074D..074F; SYRIAC 4916 0x0750, // 0750..077F; ARABIC 4917 0x0780, // 0780..07B1; THAANA 4918 0x07B2, // 07B2..07BF; UNKNOWN 4919 0x07C0, // 07C0..07FA; NKO 4920 0x07FB, // 07FB..07FC; UNKNOWN 4921 0X07FD, // 07FD..07FF; NKO 4922 0x0800, // 0800..082D; SAMARITAN 4923 0x082E, // 082E..082F; UNKNOWN 4924 0x0830, // 0830..083E; SAMARITAN 4925 0x083F, // 083F ; UNKNOWN 4926 0x0840, // 0840..085B; MANDAIC 4927 0x085C, // 085C..085D; UNKNOWN 4928 0x085E, // 085E ; MANDAIC 4929 0x085F, // 085F ; UNKNOWN 4930 0x0860, // 0860..086A; SYRIAC 4931 0x086B, // 086B..089F; UNKNOWN 4932 0x08A0, // 08A0..08B4; ARABIC 4933 0x08B5, // 08B5 ; UNKNOWN 4934 0x08B6, // 08B6..08BD; ARABIC 4935 0x08BE, // 08BE..08D2; UNKNOWN 4936 0x08D3, // 08D3..08E1; ARABIC 4937 0x08E2, // 08E2 ; COMMON 4938 0x08E3, // 08E3..08FF; ARABIC 4939 0x0900, // 0900..0950; DEVANAGARI 4940 0x0951, // 0951..0952; INHERITED 4941 0x0953, // 0953..0963; DEVANAGARI 4942 0x0964, // 0964..0965; COMMON 4943 0x0966, // 0966..097F; DEVANAGARI 4944 0x0980, // 0980..0983; BENGALI 4945 0x0984, // 0984 ; UNKNOWN 4946 0x0985, // 0985..098C; BENGALI 4947 0x098D, // 098D..098E; UNKNOWN 4948 0x098F, // 098F..0990; BENGALI 4949 0x0991, // 0991..0992; UNKNOWN 4950 0x0993, // 0993..09A8; BENGALI 4951 0x09A9, // 09A9 ; UNKNOWN 4952 0x09AA, // 09AA..09B0; BENGALI 4953 0x09B1, // 09B1 ; UNKNOWN 4954 0x09B2, // 09B2 ; BENGALI 4955 0x09B3, // 09B3..09B5; UNKNOWN 4956 0x09B6, // 09B6..09B9; BENGALI 4957 0x09BA, // 09BA..09BB; UNKNOWN 4958 0x09BC, // 09BC..09C4; BENGALI 4959 0x09C5, // 09C5..09C6; UNKNOWN 4960 0x09C7, // 09C7..09C8; BENGALI 4961 0x09C9, // 09C9..09CA; UNKNOWN 4962 0x09CB, // 09CB..09CE; BENGALI 4963 0x09CF, // 09CF..09D6; UNKNOWN 4964 0x09D7, // 09D7 ; BENGALI 4965 0x09D8, // 09D8..09DB; UNKNOWN 4966 0x09DC, // 09DC..09DD; BENGALI 4967 0x09DE, // 09DE ; UNKNOWN 4968 0x09DF, // 09DF..09E3; BENGALI 4969 0x09E4, // 09E4..09E5; UNKNOWN 4970 0x09E6, // 09E6..09FE; BENGALI 4971 0x09FF, // 09FF..0A00; UNKNOWN 4972 0x0A01, // 0A01..0A03; GURMUKHI 4973 0x0A04, // 0A04 ; UNKNOWN 4974 0x0A05, // 0A05..0A0A; GURMUKHI 4975 0x0A0B, // 0A0B..0A0E; UNKNOWN 4976 0x0A0F, // 0A0F..0A10; GURMUKHI 4977 0x0A11, // 0A11..0A12; UNKNOWN 4978 0x0A13, // 0A13..0A28; GURMUKHI 4979 0x0A29, // 0A29 ; UNKNOWN 4980 0x0A2A, // 0A2A..0A30; GURMUKHI 4981 0x0A31, // 0A31 ; UNKNOWN 4982 0x0A32, // 0A32..0A33; GURMUKHI 4983 0x0A34, // 0A34 ; UNKNOWN 4984 0x0A35, // 0A35..0A36; GURMUKHI 4985 0x0A37, // 0A37 ; UNKNOWN 4986 0x0A38, // 0A38..0A39; GURMUKHI 4987 0x0A3A, // 0A3A..0A3B; UNKNOWN 4988 0x0A3C, // 0A3C ; GURMUKHI 4989 0x0A3D, // 0A3D ; UNKNOWN 4990 0x0A3E, // 0A3E..0A42; GURMUKHI 4991 0x0A43, // 0A43..0A46; UNKNOWN 4992 0x0A47, // 0A47..0A48; GURMUKHI 4993 0x0A49, // 0A49..0A4A; UNKNOWN 4994 0x0A4B, // 0A4B..0A4D; GURMUKHI 4995 0x0A4E, // 0A4E..0A50; UNKNOWN 4996 0x0A51, // 0A51 ; GURMUKHI 4997 0x0A52, // 0A52..0A58; UNKNOWN 4998 0x0A59, // 0A59..0A5C; GURMUKHI 4999 0x0A5D, // 0A5D ; UNKNOWN 5000 0x0A5E, // 0A5E ; GURMUKHI 5001 0x0A5F, // 0A5F..0A65; UNKNOWN 5002 0x0A66, // 0A66..0A76; GURMUKHI 5003 0x0A77, // 0A77..0A80; UNKNOWN 5004 0x0A81, // 0A81..0A83; GUJARATI 5005 0x0A84, // 0A84 ; UNKNOWN 5006 0x0A85, // 0A85..0A8D; GUJARATI 5007 0x0A8E, // 0A8E ; UNKNOWN 5008 0x0A8F, // 0A8F..0A91; GUJARATI 5009 0x0A92, // 0A92 ; UNKNOWN 5010 0x0A93, // 0A93..0AA8; GUJARATI 5011 0x0AA9, // 0AA9 ; UNKNOWN 5012 0x0AAA, // 0AAA..0AB0; GUJARATI 5013 0x0AB1, // 0AB1 ; UNKNOWN 5014 0x0AB2, // 0AB2..0AB3; GUJARATI 5015 0x0AB4, // 0AB4 ; UNKNOWN 5016 0x0AB5, // 0AB5..0AB9; GUJARATI 5017 0x0ABA, // 0ABA..0ABB; UNKNOWN 5018 0x0ABC, // 0ABC..0AC5; GUJARATI 5019 0x0AC6, // 0AC6 ; UNKNOWN 5020 0x0AC7, // 0AC7..0AC9; GUJARATI 5021 0x0ACA, // 0ACA ; UNKNOWN 5022 0x0ACB, // 0ACB..0ACD; GUJARATI 5023 0x0ACE, // 0ACE..0ACF; UNKNOWN 5024 0x0AD0, // 0AD0 ; GUJARATI 5025 0x0AD1, // 0AD1..0ADF; UNKNOWN 5026 0x0AE0, // 0AE0..0AE3; GUJARATI 5027 0x0AE4, // 0AE4..0AE5; UNKNOWN 5028 0x0AE6, // 0AE6..0AF1; GUJARATI 5029 0x0AF2, // 0AF2..0AF8; UNKNOWN 5030 0x0AF9, // 0AF9..0AFF; GUJARATI 5031 0x0B00, // 0B00 ; UNKNOWN 5032 0x0B01, // 0B01..0B03; ORIYA 5033 0x0B04, // 0B04 ; UNKNOWN 5034 0x0B05, // 0B05..0B0C; ORIYA 5035 0x0B0D, // 0B0D..0B0E; UNKNOWN 5036 0x0B0F, // 0B0F..0B10; ORIYA 5037 0x0B11, // 0B11..0B12; UNKNOWN 5038 0x0B13, // 0B13..0B28; ORIYA 5039 0x0B29, // 0B29 ; UNKNOWN 5040 0x0B2A, // 0B2A..0B30; ORIYA 5041 0x0B31, // 0B31 ; UNKNOWN 5042 0x0B32, // 0B32..0B33; ORIYA 5043 0x0B34, // 0B34 ; UNKNOWN 5044 0x0B35, // 0B35..0B39; ORIYA 5045 0x0B3A, // 0B3A..0B3B; UNKNOWN 5046 0x0B3C, // 0B3C..0B44; ORIYA 5047 0x0B45, // 0B45..0B46; UNKNOWN 5048 0x0B47, // 0B47..0B48; ORIYA 5049 0x0B49, // 0B49..0B4A; UNKNOWN 5050 0x0B4B, // 0B4B..0B4D; ORIYA 5051 0x0B4E, // 0B4E..0B55; UNKNOWN 5052 0x0B56, // 0B56..0B57; ORIYA 5053 0x0B58, // 0B58..0B5B; UNKNOWN 5054 0x0B5C, // 0B5C..0B5D; ORIYA 5055 0x0B5E, // 0B5E ; UNKNOWN 5056 0x0B5F, // 0B5F..0B63; ORIYA 5057 0x0B64, // 0B64..0B65; UNKNOWN 5058 0x0B66, // 0B66..0B77; ORIYA 5059 0x0B78, // 0B78..0B81; UNKNOWN 5060 0x0B82, // 0B82..0B83; TAMIL 5061 0x0B84, // 0B84 ; UNKNOWN 5062 0x0B85, // 0B85..0B8A; TAMIL 5063 0x0B8B, // 0B8B..0B8D; UNKNOWN 5064 0x0B8E, // 0B8E..0B90; TAMIL 5065 0x0B91, // 0B91 ; UNKNOWN 5066 0x0B92, // 0B92..0B95; TAMIL 5067 0x0B96, // 0B96..0B98; UNKNOWN 5068 0x0B99, // 0B99..0B9A; TAMIL 5069 0x0B9B, // 0B9B ; UNKNOWN 5070 0x0B9C, // 0B9C ; TAMIL 5071 0x0B9D, // 0B9D ; UNKNOWN 5072 0x0B9E, // 0B9E..0B9F; TAMIL 5073 0x0BA0, // 0BA0..0BA2; UNKNOWN 5074 0x0BA3, // 0BA3..0BA4; TAMIL 5075 0x0BA5, // 0BA5..0BA7; UNKNOWN 5076 0x0BA8, // 0BA8..0BAA; TAMIL 5077 0x0BAB, // 0BAB..0BAD; UNKNOWN 5078 0x0BAE, // 0BAE..0BB9; TAMIL 5079 0x0BBA, // 0BBA..0BBD; UNKNOWN 5080 0x0BBE, // 0BBE..0BC2; TAMIL 5081 0x0BC3, // 0BC3..0BC5; UNKNOWN 5082 0x0BC6, // 0BC6..0BC8; TAMIL 5083 0x0BC9, // 0BC9 ; UNKNOWN 5084 0x0BCA, // 0BCA..0BCD; TAMIL 5085 0x0BCE, // 0BCE..0BCF; UNKNOWN 5086 0x0BD0, // 0BD0 ; TAMIL 5087 0x0BD1, // 0BD1..0BD6; UNKNOWN 5088 0x0BD7, // 0BD7 ; TAMIL 5089 0x0BD8, // 0BD8..0BE5; UNKNOWN 5090 0x0BE6, // 0BE6..0BFA; TAMIL 5091 0x0BFB, // 0BFB..0BFF; UNKNOWN 5092 0x0C00, // 0C00..0C0C; TELUGU 5093 0x0C0D, // 0C0D ; UNKNOWN 5094 0x0C0E, // 0C0E..0C10; TELUGU 5095 0x0C11, // 0C11 ; UNKNOWN 5096 0x0C12, // 0C12..0C28; TELUGU 5097 0x0C29, // 0C29 ; UNKNOWN 5098 0x0C2A, // 0C2A..0C39; TELUGU 5099 0x0C3A, // 0C3A..0C3C; UNKNOWN 5100 0x0C3D, // 0C3D..0C44; TELUGU 5101 0x0C45, // 0C45 ; UNKNOWN 5102 0x0C46, // 0C46..0C48; TELUGU 5103 0x0C49, // 0C49 ; UNKNOWN 5104 0x0C4A, // 0C4A..0C4D; TELUGU 5105 0x0C4E, // 0C4E..0C54; UNKNOWN 5106 0x0C55, // 0C55..0C56; TELUGU 5107 0x0C57, // 0C57 ; UNKNOWN 5108 0x0C58, // 0C58..0C5A; TELUGU 5109 0x0C5B, // 0C5B..0C5F; UNKNOWN 5110 0x0C60, // 0C60..0C63; TELUGU 5111 0x0C64, // 0C64..0C65; UNKNOWN 5112 0x0C66, // 0C66..0C6F; TELUGU 5113 0x0C70, // 0C70..0C77; UNKNOWN 5114 0x0C78, // 0C78..0C7F; TELUGU 5115 0x0C80, // 0C80..0C8C; KANNADA 5116 0x0C8D, // 0C8D ; UNKNOWN 5117 0x0C8E, // 0C8E..0C90; KANNADA 5118 0x0C91, // 0C91 ; UNKNOWN 5119 0x0C92, // 0C92..0CA8; KANNADA 5120 0x0CA9, // 0CA9 ; UNKNOWN 5121 0x0CAA, // 0CAA..0CB3; KANNADA 5122 0x0CB4, // 0CB4 ; UNKNOWN 5123 0x0CB5, // 0CB5..0CB9; KANNADA 5124 0x0CBA, // 0CBA..0CBB; UNKNOWN 5125 0x0CBC, // 0CBC..0CC4; KANNADA 5126 0x0CC5, // 0CC5 ; UNKNOWN 5127 0x0CC6, // 0CC6..0CC8; KANNADA 5128 0x0CC9, // 0CC9 ; UNKNOWN 5129 0x0CCA, // 0CCA..0CCD; KANNADA 5130 0x0CCE, // 0CCE..0CD4; UNKNOWN 5131 0x0CD5, // 0CD5..0CD6; KANNADA 5132 0x0CD7, // 0CD7..0CDD; UNKNOWN 5133 0x0CDE, // 0CDE ; KANNADA 5134 0x0CDF, // 0CDF ; UNKNOWN 5135 0x0CE0, // 0CE0..0CE3; KANNADA 5136 0x0CE4, // 0CE4..0CE5; UNKNOWN 5137 0x0CE6, // 0CE6..0CEF; KANNADA 5138 0x0CF0, // 0CF0 ; UNKNOWN 5139 0x0CF1, // 0CF1..0CF2; KANNADA 5140 0x0CF3, // 0CF3..0CFF; UNKNOWN 5141 0x0D00, // 0D00..0D03; MALAYALAM 5142 0x0D04, // 0D04 ; UNKNOWN 5143 0x0D05, // 0D05..0D0C; MALAYALAM 5144 0x0D0D, // 0D0D ; UNKNOWN 5145 0x0D0E, // 0D0E..0D10; MALAYALAM 5146 0x0D11, // 0D11 ; UNKNOWN 5147 0x0D12, // 0D12..0D44; MALAYALAM 5148 0x0D45, // 0D45 ; UNKNOWN 5149 0x0D46, // 0D46..0D48; MALAYALAM 5150 0x0D49, // 0D49 ; UNKNOWN 5151 0x0D4A, // 0D4A..0D4F; MALAYALAM 5152 0x0D50, // 0D50..0D53; UNKNOWN 5153 0x0D54, // 0D54..0D63; MALAYALAM 5154 0x0D64, // 0D64..0D65; UNKNOWN 5155 0x0D66, // 0D66..0D7F; MALAYALAM 5156 0x0D80, // 0D80..0D81; UNKNOWN 5157 0x0D82, // 0D82..0D83; SINHALA 5158 0x0D84, // 0D84 ; UNKNOWN 5159 0x0D85, // 0D85..0D96; SINHALA 5160 0x0D97, // 0D97..0D99; UNKNOWN 5161 0x0D9A, // 0D9A..0DB1; SINHALA 5162 0x0DB2, // 0DB2 ; UNKNOWN 5163 0x0DB3, // 0DB3..0DBB; SINHALA 5164 0x0DBC, // 0DBC ; UNKNOWN 5165 0x0DBD, // 0DBD ; SINHALA 5166 0x0DBE, // 0DBE..0DBF; UNKNOWN 5167 0x0DC0, // 0DC0..0DC6; SINHALA 5168 0x0DC7, // 0DC7..0DC9; UNKNOWN 5169 0x0DCA, // 0DCA ; SINHALA 5170 0x0DCB, // 0DCB..0DCE; UNKNOWN 5171 0x0DCF, // 0DCF..0DD4; SINHALA 5172 0x0DD5, // 0DD5 ; UNKNOWN 5173 0x0DD6, // 0DD6 ; SINHALA 5174 0x0DD7, // 0DD7 ; UNKNOWN 5175 0x0DD8, // 0DD8..0DDF; SINHALA 5176 0x0DE0, // 0DE0..0DE5; UNKNOWN 5177 0x0DE6, // 0DE6..0DEF; SINHALA 5178 0x0DF0, // 0DF0..0DF1; UNKNOWN 5179 0x0DF2, // 0DF2..0DF4; SINHALA 5180 0x0DF5, // 0DF5..0E00; UNKNOWN 5181 0x0E01, // 0E01..0E3A; THAI 5182 0x0E3B, // 0E3B..0E3E; UNKNOWN 5183 0x0E3F, // 0E3F ; COMMON 5184 0x0E40, // 0E40..0E5B; THAI 5185 0x0E5C, // 0E5C..0E80; UNKNOWN 5186 0x0E81, // 0E81..0E82; LAO 5187 0x0E83, // 0E83 ; UNKNOWN 5188 0x0E84, // 0E84 ; LAO 5189 0x0E85, // 0E85..0E86; UNKNOWN 5190 0x0E87, // 0E87..0E88; LAO 5191 0x0E89, // 0E89 ; UNKNOWN 5192 0x0E8A, // 0E8A ; LAO 5193 0x0E8B, // 0E8B..0E8C; UNKNOWN 5194 0x0E8D, // 0E8D ; LAO 5195 0x0E8E, // 0E8E..0E93; UNKNOWN 5196 0x0E94, // 0E94..0E97; LAO 5197 0x0E98, // 0E98 ; UNKNOWN 5198 0x0E99, // 0E99..0E9F; LAO 5199 0x0EA0, // 0EA0 ; UNKNOWN 5200 0x0EA1, // 0EA1..0EA3; LAO 5201 0x0EA4, // 0EA4 ; UNKNOWN 5202 0x0EA5, // 0EA5 ; LAO 5203 0x0EA6, // 0EA6 ; UNKNOWN 5204 0x0EA7, // 0EA7 ; LAO 5205 0x0EA8, // 0EA8..0EA9; UNKNOWN 5206 0x0EAA, // 0EAA..0EAB; LAO 5207 0x0EAC, // 0EAC ; UNKNOWN 5208 0x0EAD, // 0EAD..0EB9; LAO 5209 0x0EBA, // 0EBA ; UNKNOWN 5210 0x0EBB, // 0EBB..0EBD; LAO 5211 0x0EBE, // 0EBE..0EBF; UNKNOWN 5212 0x0EC0, // 0EC0..0EC4; LAO 5213 0x0EC5, // 0EC5 ; UNKNOWN 5214 0x0EC6, // 0EC6 ; LAO 5215 0x0EC7, // 0EC7 ; UNKNOWN 5216 0x0EC8, // 0EC8..0ECD; LAO 5217 0x0ECE, // 0ECE..0ECF; UNKNOWN 5218 0x0ED0, // 0ED0..0ED9; LAO 5219 0x0EDA, // 0EDA..0EDB; UNKNOWN 5220 0x0EDC, // 0EDC..0EDF; LAO 5221 0x0EE0, // 0EE0..0EFF; UNKNOWN 5222 0x0F00, // 0F00..0F47; TIBETAN 5223 0x0F48, // 0F48 ; UNKNOWN 5224 0x0F49, // 0F49..0F6C; TIBETAN 5225 0x0F6D, // 0F6D..0F70; UNKNOWN 5226 0x0F71, // 0F71..0F97; TIBETAN 5227 0x0F98, // 0F98 ; UNKNOWN 5228 0x0F99, // 0F99..0FBC; TIBETAN 5229 0x0FBD, // 0FBD ; UNKNOWN 5230 0x0FBE, // 0FBE..0FCC; TIBETAN 5231 0x0FCD, // 0FCD ; UNKNOWN 5232 0x0FCE, // 0FCE..0FD4; TIBETAN 5233 0x0FD5, // 0FD5..0FD8; COMMON 5234 0x0FD9, // 0FD9..0FDA; TIBETAN 5235 0x0FDB, // 0FDB..FFF; UNKNOWN 5236 0x1000, // 1000..109F; MYANMAR 5237 0x10A0, // 10A0..10C5; GEORGIAN 5238 0x10C6, // 10C6 ; UNKNOWN 5239 0x10C7, // 10C7 ; GEORGIAN 5240 0x10C8, // 10C8..10CC; UNKNOWN 5241 0x10CD, // 10CD ; GEORGIAN 5242 0x10CE, // 10CE..10CF; UNKNOWN 5243 0x10D0, // 10D0..10FA; GEORGIAN 5244 0x10FB, // 10FB ; COMMON 5245 0x10FC, // 10FC..10FF; GEORGIAN 5246 0x1100, // 1100..11FF; HANGUL 5247 0x1200, // 1200..1248; ETHIOPIC 5248 0x1249, // 1249 ; UNKNOWN 5249 0x124A, // 124A..124D; ETHIOPIC 5250 0x124E, // 124E..124F; UNKNOWN 5251 0x1250, // 1250..1256; ETHIOPIC 5252 0x1257, // 1257 ; UNKNOWN 5253 0x1258, // 1258 ; ETHIOPIC 5254 0x1259, // 1259 ; UNKNOWN 5255 0x125A, // 125A..125D; ETHIOPIC 5256 0x125E, // 125E..125F; UNKNOWN 5257 0x1260, // 1260..1288; ETHIOPIC 5258 0x1289, // 1289 ; UNKNOWN 5259 0x128A, // 128A..128D; ETHIOPIC 5260 0x128E, // 128E..128F; UNKNOWN 5261 0x1290, // 1290..12B0; ETHIOPIC 5262 0x12B1, // 12B1 ; UNKNOWN 5263 0x12B2, // 12B2..12B5; ETHIOPIC 5264 0x12B6, // 12B6..12B7; UNKNOWN 5265 0x12B8, // 12B8..12BE; ETHIOPIC 5266 0x12BF, // 12BF ; UNKNOWN 5267 0x12C0, // 12C0 ; ETHIOPIC 5268 0x12C1, // 12C1 ; UNKNOWN 5269 0x12C2, // 12C2..12C5; ETHIOPIC 5270 0x12C6, // 12C6..12C7; UNKNOWN 5271 0x12C8, // 12C8..12D6; ETHIOPIC 5272 0x12D7, // 12D7 ; UNKNOWN 5273 0x12D8, // 12D8..1310; ETHIOPIC 5274 0x1311, // 1311 ; UNKNOWN 5275 0x1312, // 1312..1315; ETHIOPIC 5276 0x1316, // 1316..1317; UNKNOWN 5277 0x1318, // 1318..135A; ETHIOPIC 5278 0x135B, // 135B..135C; UNKNOWN 5279 0x135D, // 135D..137C; ETHIOPIC 5280 0x137D, // 137D..137F; UNKNOWN 5281 0x1380, // 1380..1399; ETHIOPIC 5282 0x139A, // 139A..139F; UNKNOWN 5283 0x13A0, // 13A0..13F5; CHEROKEE 5284 0x13F6, // 13F6..13F7; UNKNOWN 5285 0x13F8, // 13F8..13FD; CHEROKEE 5286 0x13FE, // 13FE..13FF; UNKNOWN 5287 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5288 0x1680, // 1680..169C; OGHAM 5289 0x169D, // 169D..169F; UNKNOWN 5290 0x16A0, // 16A0..16EA; RUNIC 5291 0x16EB, // 16EB..16ED; COMMON 5292 0x16EE, // 16EE..16F8; RUNIC 5293 0x16F9, // 16F9..16FF; UNKNOWN 5294 0x1700, // 1700..170C; TAGALOG 5295 0x170D, // 170D ; UNKNOWN 5296 0x170E, // 170E..1714; TAGALOG 5297 0x1715, // 1715..171F; UNKNOWN 5298 0x1720, // 1720..1734; HANUNOO 5299 0x1735, // 1735..1736; COMMON 5300 0x1737, // 1737..173F; UNKNOWN 5301 0x1740, // 1740..1753; BUHID 5302 0x1754, // 1754..175F; UNKNOWN 5303 0x1760, // 1760..176C; TAGBANWA 5304 0x176D, // 176D ; UNKNOWN 5305 0x176E, // 176E..1770; TAGBANWA 5306 0x1771, // 1771 ; UNKNOWN 5307 0x1772, // 1772..1773; TAGBANWA 5308 0x1774, // 1774..177F; UNKNOWN 5309 0x1780, // 1780..17DD; KHMER 5310 0x17DE, // 17DE..17DF; UNKNOWN 5311 0x17E0, // 17E0..17E9; KHMER 5312 0x17EA, // 17EA..17EF; UNKNOWN 5313 0x17F0, // 17F0..17F9; KHMER 5314 0x17FA, // 17FA..17FF; UNKNOWN 5315 0x1800, // 1800..1801; MONGOLIAN 5316 0x1802, // 1802..1803; COMMON 5317 0x1804, // 1804 ; MONGOLIAN 5318 0x1805, // 1805 ; COMMON 5319 0x1806, // 1806..180E; MONGOLIAN 5320 0x180F, // 180F ; UNKNOWN 5321 0x1810, // 1810..1819; MONGOLIAN 5322 0x181A, // 181A..181F; UNKNOWN 5323 0x1820, // 1820..1878; MONGOLIAN 5324 0x1879, // 1879..187F; UNKNOWN 5325 0x1880, // 1880..18AA; MONGOLIAN 5326 0x18AB, // 18AB..18AF; UNKNOWN 5327 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5328 0x18F6, // 18F6..18FF; UNKNOWN 5329 0x1900, // 1900..191E; LIMBU 5330 0x191F, // 191F ; UNKNOWN 5331 0x1920, // 1920..192B; LIMBU 5332 0x192C, // 192C..192F; UNKNOWN 5333 0x1930, // 1930..193B; LIMBU 5334 0x193C, // 193C..193F; UNKNOWN 5335 0x1940, // 1940 ; LIMBU 5336 0x1941, // 1941..1943; UNKNOWN 5337 0x1944, // 1944..194F; LIMBU 5338 0x1950, // 1950..196D; TAI_LE 5339 0x196E, // 196E..196F; UNKNOWN 5340 0x1970, // 1970..1974; TAI_LE 5341 0x1975, // 1975..197F; UNKNOWN 5342 0x1980, // 1980..19AB; NEW_TAI_LUE 5343 0x19AC, // 19AC..19AF; UNKNOWN 5344 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5345 0x19CA, // 19CA..19CF; UNKNOWN 5346 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5347 0x19DB, // 19DB..19DD; UNKNOWN 5348 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5349 0x19E0, // 19E0..19FF; KHMER 5350 0x1A00, // 1A00..1A1B; BUGINESE 5351 0x1A1C, // 1A1C..1A1D; UNKNOWN 5352 0x1A1E, // 1A1E..1A1F; BUGINESE 5353 0x1A20, // 1A20..1A5E; TAI_THAM 5354 0x1A5F, // 1A5F ; UNKNOWN 5355 0x1A60, // 1A60..1A7C; TAI_THAM 5356 0x1A7D, // 1A7D..1A7E; UNKNOWN 5357 0x1A7F, // 1A7F..1A89; TAI_THAM 5358 0x1A8A, // 1A8A..1A8F; UNKNOWN 5359 0x1A90, // 1A90..1A99; TAI_THAM 5360 0x1A9A, // 1A9A..1A9F; UNKNOWN 5361 0x1AA0, // 1AA0..1AAD; TAI_THAM 5362 0x1AAE, // 1AAE..1AAF; UNKNOWN 5363 0x1AB0, // 1AB0..1ABE; INHERITED 5364 0x1ABF, // 1ABF..1AFF; UNKNOWN 5365 0x1B00, // 1B00..1B4B; BALINESE 5366 0x1B4C, // 1B4C..1B4F; UNKNOWN 5367 0x1B50, // 1B50..1B7C; BALINESE 5368 0x1B7D, // 1B7D..1B7F; UNKNOWN 5369 0x1B80, // 1B80..1BBF; SUNDANESE 5370 0x1BC0, // 1BC0..1BF3; BATAK 5371 0x1BF4, // 1BF4..1BFB; UNKNOWN 5372 0x1BFC, // 1BFC..1BFF; BATAK 5373 0x1C00, // 1C00..1C37; LEPCHA 5374 0x1C38, // 1C38..1C3A; UNKNOWN 5375 0x1C3B, // 1C3B..1C49; LEPCHA 5376 0x1C4A, // 1C4A..1C4C; UNKNOWN 5377 0x1C4D, // 1C4D..1C4F; LEPCHA 5378 0x1C50, // 1C50..1C7F; OL_CHIKI 5379 0x1C80, // 1C80..1C88; CYRILLIC 5380 0x1C89, // 1C89 ; UNKNOWN 5381 0x1C90, // 1C90..1CBA; GEORGIAN 5382 0x1CBB, // 1CBB..1CBC; UNKNOWN 5383 0x1CBD, // 1CBD..1CBF; GEORGIAN 5384 0x1CC0, // 1CC0..1CC7; SUNDANESE 5385 0x1CC8, // 1CC8..1CCF; UNKNOWN 5386 0x1CD0, // 1CD0..1CD2; INHERITED 5387 0x1CD3, // 1CD3 ; COMMON 5388 0x1CD4, // 1CD4..1CE0; INHERITED 5389 0x1CE1, // 1CE1 ; COMMON 5390 0x1CE2, // 1CE2..1CE8; INHERITED 5391 0x1CE9, // 1CE9..1CEC; COMMON 5392 0x1CED, // 1CED ; INHERITED 5393 0x1CEE, // 1CEE..1CF3; COMMON 5394 0x1CF4, // 1CF4 ; INHERITED 5395 0x1CF5, // 1CF5..1CF7; COMMON 5396 0x1CF8, // 1CF8..1CF9; INHERITED 5397 0x1CFA, // 1CFA..1CFF; UNKNOWN 5398 0x1D00, // 1D00..1D25; LATIN 5399 0x1D26, // 1D26..1D2A; GREEK 5400 0x1D2B, // 1D2B ; CYRILLIC 5401 0x1D2C, // 1D2C..1D5C; LATIN 5402 0x1D5D, // 1D5D..1D61; GREEK 5403 0x1D62, // 1D62..1D65; LATIN 5404 0x1D66, // 1D66..1D6A; GREEK 5405 0x1D6B, // 1D6B..1D77; LATIN 5406 0x1D78, // 1D78 ; CYRILLIC 5407 0x1D79, // 1D79..1DBE; LATIN 5408 0x1DBF, // 1DBF ; GREEK 5409 0x1DC0, // 1DC0..1DF9; INHERITED 5410 0x1DFA, // 1DFA ; UNKNOWN 5411 0x1DFB, // 1DFB..1DFF; INHERITED 5412 0x1E00, // 1E00..1EFF; LATIN 5413 0x1F00, // 1F00..1F15; GREEK 5414 0x1F16, // 1F16..1F17; UNKNOWN 5415 0x1F18, // 1F18..1F1D; GREEK 5416 0x1F1E, // 1F1E..1F1F; UNKNOWN 5417 0x1F20, // 1F20..1F45; GREEK 5418 0x1F46, // 1F46..1F47; UNKNOWN 5419 0x1F48, // 1F48..1F4D; GREEK 5420 0x1F4E, // 1F4E..1F4F; UNKNOWN 5421 0x1F50, // 1F50..1F57; GREEK 5422 0x1F58, // 1F58 ; UNKNOWN 5423 0x1F59, // 1F59 ; GREEK 5424 0x1F5A, // 1F5A ; UNKNOWN 5425 0x1F5B, // 1F5B ; GREEK 5426 0x1F5C, // 1F5C ; UNKNOWN 5427 0x1F5D, // 1F5D ; GREEK 5428 0x1F5E, // 1F5E ; UNKNOWN 5429 0x1F5F, // 1F5F..1F7D; GREEK 5430 0x1F7E, // 1F7E..1F7F; UNKNOWN 5431 0x1F80, // 1F80..1FB4; GREEK 5432 0x1FB5, // 1FB5 ; UNKNOWN 5433 0x1FB6, // 1FB6..1FC4; GREEK 5434 0x1FC5, // 1FC5 ; UNKNOWN 5435 0x1FC6, // 1FC6..1FD3; GREEK 5436 0x1FD4, // 1FD4..1FD5; UNKNOWN 5437 0x1FD6, // 1FD6..1FDB; GREEK 5438 0x1FDC, // 1FDC ; UNKNOWN 5439 0x1FDD, // 1FDD..1FEF; GREEK 5440 0x1FF0, // 1FF0..1FF1; UNKNOWN 5441 0x1FF2, // 1FF2..1FF4; GREEK 5442 0x1FF5, // 1FF5 ; UNKNOWN 5443 0x1FF6, // 1FF6..1FFE; GREEK 5444 0x1FFF, // 1FFF ; UNKNOWN 5445 0x2000, // 2000..200B; COMMON 5446 0x200C, // 200C..200D; INHERITED 5447 0x200E, // 200E..2064; COMMON 5448 0x2065, // 2065 ; UNKNOWN 5449 0x2066, // 2066..2070; COMMON 5450 0x2071, // 2071 ; LATIN 5451 0x2072, // 2072..2073; UNKNOWN 5452 0x2074, // 2074..207E; COMMON 5453 0x207F, // 207F ; LATIN 5454 0x2080, // 2080..208E; COMMON 5455 0x208F, // 208F ; UNKNOWN 5456 0x2090, // 2090..209C; LATIN 5457 0x209D, // 209D..209F; UNKNOWN 5458 0x20A0, // 20A0..20BF; COMMON 5459 0x20C0, // 20C0..20CF; UNKNOWN 5460 0x20D0, // 20D0..20F0; INHERITED 5461 0x20F1, // 20F1..20FF; UNKNOWN 5462 0x2100, // 2100..2125; COMMON 5463 0x2126, // 2126 ; GREEK 5464 0x2127, // 2127..2129; COMMON 5465 0x212A, // 212A..212B; LATIN 5466 0x212C, // 212C..2131; COMMON 5467 0x2132, // 2132 ; LATIN 5468 0x2133, // 2133..214D; COMMON 5469 0x214E, // 214E ; LATIN 5470 0x214F, // 214F..215F; COMMON 5471 0x2160, // 2160..2188; LATIN 5472 0x2189, // 2189..218B; COMMON 5473 0x218C, // 218C..218F; UNKNOWN 5474 0x2190, // 2190..2426; COMMON 5475 0x2427, // 2427..243F; UNKNOWN 5476 0x2440, // 2440..244A; COMMON 5477 0x244B, // 244B..245F; UNKNOWN 5478 0x2460, // 2460..27FF; COMMON 5479 0x2800, // 2800..28FF; BRAILLE 5480 0x2900, // 2900..2B73; COMMON 5481 0x2B74, // 2B74..2B75; UNKNOWN 5482 0x2B76, // 2B76..2B95; COMMON 5483 0x2B96, // 2B96..2B97; UNKNOWN 5484 0x2B98, // 2B98..2BC8; COMMON 5485 0x2BC9, // 2BC9 ; UNKNOWN 5486 0x2BCA, // 2BCA..2BFE; COMMON 5487 0x2BFF, // 2BFF; UNKNOWN 5488 0x2C00, // 2C00..2C2E; GLAGOLITIC 5489 0x2C2F, // 2C2F ; UNKNOWN 5490 0x2C30, // 2C30..2C5E; GLAGOLITIC 5491 0x2C5F, // 2C5F ; UNKNOWN 5492 0x2C60, // 2C60..2C7F; LATIN 5493 0x2C80, // 2C80..2CF3; COPTIC 5494 0x2CF4, // 2CF4..2CF8; UNKNOWN 5495 0x2CF9, // 2CF9..2CFF; COPTIC 5496 0x2D00, // 2D00..2D25; GEORGIAN 5497 0x2D26, // 2D26 ; UNKNOWN 5498 0x2D27, // 2D27 ; GEORGIAN 5499 0x2D28, // 2D28..2D2C; UNKNOWN 5500 0x2D2D, // 2D2D ; GEORGIAN 5501 0x2D2E, // 2D2E..2D2F; UNKNOWN 5502 0x2D30, // 2D30..2D67; TIFINAGH 5503 0x2D68, // 2D68..2D6E; UNKNOWN 5504 0x2D6F, // 2D6F..2D70; TIFINAGH 5505 0x2D71, // 2D71..2D7E; UNKNOWN 5506 0x2D7F, // 2D7F ; TIFINAGH 5507 0x2D80, // 2D80..2D96; ETHIOPIC 5508 0x2D97, // 2D97..2D9F; UNKNOWN 5509 0x2DA0, // 2DA0..2DA6; ETHIOPIC 5510 0x2DA7, // 2DA7 ; UNKNOWN 5511 0x2DA8, // 2DA8..2DAE; ETHIOPIC 5512 0x2DAF, // 2DAF ; UNKNOWN 5513 0x2DB0, // 2DB0..2DB6; ETHIOPIC 5514 0x2DB7, // 2DB7 ; UNKNOWN 5515 0x2DB8, // 2DB8..2DBE; ETHIOPIC 5516 0x2DBF, // 2DBF ; UNKNOWN 5517 0x2DC0, // 2DC0..2DC6; ETHIOPIC 5518 0x2DC7, // 2DC7 ; UNKNOWN 5519 0x2DC8, // 2DC8..2DCE; ETHIOPIC 5520 0x2DCF, // 2DCF ; UNKNOWN 5521 0x2DD0, // 2DD0..2DD6; ETHIOPIC 5522 0x2DD7, // 2DD7 ; UNKNOWN 5523 0x2DD8, // 2DD8..2DDE; ETHIOPIC 5524 0x2DDF, // 2DDF ; UNKNOWN 5525 0x2DE0, // 2DE0..2DFF; CYRILLIC 5526 0x2E00, // 2E00..2E4E; COMMON 5527 0x2E4F, // 2E4F..2E7F; UNKNOWN 5528 0x2E80, // 2E80..2E99; HAN 5529 0x2E9A, // 2E9A ; UNKNOWN 5530 0x2E9B, // 2E9B..2EF3; HAN 5531 0x2EF4, // 2EF4..2EFF; UNKNOWN 5532 0x2F00, // 2F00..2FD5; HAN 5533 0x2FD6, // 2FD6..2FEF; UNKNOWN 5534 0x2FF0, // 2FF0..2FFB; COMMON 5535 0x2FFC, // 2FFC..2FFF; UNKNOWN 5536 0x3000, // 3000..3004; COMMON 5537 0x3005, // 3005 ; HAN 5538 0x3006, // 3006 ; COMMON 5539 0x3007, // 3007 ; HAN 5540 0x3008, // 3008..3020; COMMON 5541 0x3021, // 3021..3029; HAN 5542 0x302A, // 302A..302D; INHERITED 5543 0x302E, // 302E..302F; HANGUL 5544 0x3030, // 3030..3037; COMMON 5545 0x3038, // 3038..303B; HAN 5546 0x303C, // 303C..303F; COMMON 5547 0x3040, // 3040 ; UNKNOWN 5548 0x3041, // 3041..3096; HIRAGANA 5549 0x3097, // 3097..3098; UNKNOWN 5550 0x3099, // 3099..309A; INHERITED 5551 0x309B, // 309B..309C; COMMON 5552 0x309D, // 309D..309F; HIRAGANA 5553 0x30A0, // 30A0 ; COMMON 5554 0x30A1, // 30A1..30FA; KATAKANA 5555 0x30FB, // 30FB..30FC; COMMON 5556 0x30FD, // 30FD..30FF; KATAKANA 5557 0x3100, // 3100..3104; UNKNOWN 5558 0x3105, // 3105..312F; BOPOMOFO 5559 0x3130, // 3130; UNKNOWN 5560 0x3131, // 3131..318E; HANGUL 5561 0x318F, // 318F ; UNKNOWN 5562 0x3190, // 3190..319F; COMMON 5563 0x31A0, // 31A0..31BA; BOPOMOFO 5564 0x31BB, // 31BB..31BF; UNKNOWN 5565 0x31C0, // 31C0..31E3; COMMON 5566 0x31E4, // 31E4..31EF; UNKNOWN 5567 0x31F0, // 31F0..31FF; KATAKANA 5568 0x3200, // 3200..321E; HANGUL 5569 0x321F, // 321F ; UNKNOWN 5570 0x3220, // 3220..325F; COMMON 5571 0x3260, // 3260..327E; HANGUL 5572 0x327F, // 327F..32CF; COMMON 5573 0x32D0, // 32D0..32FE; KATAKANA 5574 0x32FF, // 32FF ; COMMON 5575 0x3300, // 3300..3357; KATAKANA 5576 0x3358, // 3358..33FF; COMMON 5577 0x3400, // 3400..4DB5; HAN 5578 0x4DB6, // 4DB6..4DBF; UNKNOWN 5579 0x4DC0, // 4DC0..4DFF; COMMON 5580 0x4E00, // 4E00..9FEF; HAN 5581 0x9FF0, // 9FF0..9FFF; UNKNOWN 5582 0xA000, // A000..A48C; YI 5583 0xA48D, // A48D..A48F; UNKNOWN 5584 0xA490, // A490..A4C6; YI 5585 0xA4C7, // A4C7..A4CF; UNKNOWN 5586 0xA4D0, // A4D0..A4FF; LISU 5587 0xA500, // A500..A62B; VAI 5588 0xA62C, // A62C..A63F; UNKNOWN 5589 0xA640, // A640..A69F; CYRILLIC 5590 0xA6A0, // A6A0..A6F7; BAMUM 5591 0xA6F8, // A6F8..A6FF; UNKNOWN 5592 0xA700, // A700..A721; COMMON 5593 0xA722, // A722..A787; LATIN 5594 0xA788, // A788..A78A; COMMON 5595 0xA78B, // A78B..A7B9; LATIN 5596 0xA7C0, // A7C0..A7F6; UNKNOWN 5597 0xA7F7, // A7F7..A7FF; LATIN 5598 0xA800, // A800..A82B; SYLOTI_NAGRI 5599 0xA82C, // A82C..A82F; UNKNOWN 5600 0xA830, // A830..A839; COMMON 5601 0xA83A, // A83A..A83F; UNKNOWN 5602 0xA840, // A840..A877; PHAGS_PA 5603 0xA878, // A878..A87F; UNKNOWN 5604 0xA880, // A880..A8C5; SAURASHTRA 5605 0xA8C6, // A8C6..A8CD; UNKNOWN 5606 0xA8CE, // A8CE..A8D9; SAURASHTRA 5607 0xA8DA, // A8DA..A8DF; UNKNOWN 5608 0xA8E0, // A8E0..A8FF; DEVANAGARI 5609 0xA900, // A900..A92D; KAYAH_LI 5610 0xA92E, // A92E ; COMMON 5611 0xA92F, // A92F ; KAYAH_LI 5612 0xA930, // A930..A953; REJANG 5613 0xA954, // A954..A95E; UNKNOWN 5614 0xA95F, // A95F ; REJANG 5615 0xA960, // A960..A97C; HANGUL 5616 0xA97D, // A97D..A97F; UNKNOWN 5617 0xA980, // A980..A9CD; JAVANESE 5618 0xA9CE, // A9CE ; UNKNOWN 5619 0xA9CF, // A9CF ; COMMON 5620 0xA9D0, // A9D0..A9D9; JAVANESE 5621 0xA9DA, // A9DA..A9DD; UNKNOWN 5622 0xA9DE, // A9DE..A9DF; JAVANESE 5623 0xA9E0, // A9E0..A9FE; MYANMAR 5624 0xA9FF, // A9FF ; UNKNOWN 5625 0xAA00, // AA00..AA36; CHAM 5626 0xAA37, // AA37..AA3F; UNKNOWN 5627 0xAA40, // AA40..AA4D; CHAM 5628 0xAA4E, // AA4E..AA4F; UNKNOWN 5629 0xAA50, // AA50..AA59; CHAM 5630 0xAA5A, // AA5A..AA5B; UNKNOWN 5631 0xAA5C, // AA5C..AA5F; CHAM 5632 0xAA60, // AA60..AA7F; MYANMAR 5633 0xAA80, // AA80..AAC2; TAI_VIET 5634 0xAAC3, // AAC3..AADA; UNKNOWN 5635 0xAADB, // AADB..AADF; TAI_VIET 5636 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5637 0xAAF7, // AAF7..AB00; UNKNOWN 5638 0xAB01, // AB01..AB06; ETHIOPIC 5639 0xAB07, // AB07..AB08; UNKNOWN 5640 0xAB09, // AB09..AB0E; ETHIOPIC 5641 0xAB0F, // AB0F..AB10; UNKNOWN 5642 0xAB11, // AB11..AB16; ETHIOPIC 5643 0xAB17, // AB17..AB1F; UNKNOWN 5644 0xAB20, // AB20..AB26; ETHIOPIC 5645 0xAB27, // AB27 ; UNKNOWN 5646 0xAB28, // AB28..AB2E; ETHIOPIC 5647 0xAB2F, // AB2F ; UNKNOWN 5648 0xAB30, // AB30..AB5A; LATIN 5649 0xAB5B, // AB5B ; COMMON 5650 0xAB5C, // AB5C..AB64; LATIN 5651 0xAB65, // AB65 ; GREEK 5652 0xAB66, // AB66..AB6F; UNKNOWN 5653 0xAB70, // AB70..ABBF; CHEROKEE 5654 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5655 0xABEE, // ABEE..ABEF; UNKNOWN 5656 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5657 0xABFA, // ABFA..ABFF; UNKNOWN 5658 0xAC00, // AC00..D7A3; HANGUL 5659 0xD7A4, // D7A4..D7AF; UNKNOWN 5660 0xD7B0, // D7B0..D7C6; HANGUL 5661 0xD7C7, // D7C7..D7CA; UNKNOWN 5662 0xD7CB, // D7CB..D7FB; HANGUL 5663 0xD7FC, // D7FC..F8FF; UNKNOWN 5664 0xF900, // F900..FA6D; HAN 5665 0xFA6E, // FA6E..FA6F; UNKNOWN 5666 0xFA70, // FA70..FAD9; HAN 5667 0xFADA, // FADA..FAFF; UNKNOWN 5668 0xFB00, // FB00..FB06; LATIN 5669 0xFB07, // FB07..FB12; UNKNOWN 5670 0xFB13, // FB13..FB17; ARMENIAN 5671 0xFB18, // FB18..FB1C; UNKNOWN 5672 0xFB1D, // FB1D..FB36; HEBREW 5673 0xFB37, // FB37 ; UNKNOWN 5674 0xFB38, // FB38..FB3C; HEBREW 5675 0xFB3D, // FB3D ; UNKNOWN 5676 0xFB3E, // FB3E ; HEBREW 5677 0xFB3F, // FB3F ; UNKNOWN 5678 0xFB40, // FB40..FB41; HEBREW 5679 0xFB42, // FB42 ; UNKNOWN 5680 0xFB43, // FB43..FB44; HEBREW 5681 0xFB45, // FB45 ; UNKNOWN 5682 0xFB46, // FB46..FB4F; HEBREW 5683 0xFB50, // FB50..FBC1; ARABIC 5684 0xFBC2, // FBC2..FBD2; UNKNOWN 5685 0xFBD3, // FBD3..FD3D; ARABIC 5686 0xFD3E, // FD3E..FD3F; COMMON 5687 0xFD40, // FD40..FD4F; UNKNOWN 5688 0xFD50, // FD50..FD8F; ARABIC 5689 0xFD90, // FD90..FD91; UNKNOWN 5690 0xFD92, // FD92..FDC7; ARABIC 5691 0xFDC8, // FDC8..FDEF; UNKNOWN 5692 0xFDF0, // FDF0..FDFD; ARABIC 5693 0xFDFE, // FDFE..FDFF; UNKNOWN 5694 0xFE00, // FE00..FE0F; INHERITED 5695 0xFE10, // FE10..FE19; COMMON 5696 0xFE1A, // FE1A..FE1F; UNKNOWN 5697 0xFE20, // FE20..FE2D; INHERITED 5698 0xFE2E, // FE2E..FE2F; CYRILLIC 5699 0xFE30, // FE30..FE52; COMMON 5700 0xFE53, // FE53 ; UNKNOWN 5701 0xFE54, // FE54..FE66; COMMON 5702 0xFE67, // FE67 ; UNKNOWN 5703 0xFE68, // FE68..FE6B; COMMON 5704 0xFE6C, // FE6C..FE6F; UNKNOWN 5705 0xFE70, // FE70..FE74; ARABIC 5706 0xFE75, // FE75 ; UNKNOWN 5707 0xFE76, // FE76..FEFC; ARABIC 5708 0xFEFD, // FEFD..FEFE; UNKNOWN 5709 0xFEFF, // FEFF ; COMMON 5710 0xFF00, // FF00 ; UNKNOWN 5711 0xFF01, // FF01..FF20; COMMON 5712 0xFF21, // FF21..FF3A; LATIN 5713 0xFF3B, // FF3B..FF40; COMMON 5714 0xFF41, // FF41..FF5A; LATIN 5715 0xFF5B, // FF5B..FF65; COMMON 5716 0xFF66, // FF66..FF6F; KATAKANA 5717 0xFF70, // FF70 ; COMMON 5718 0xFF71, // FF71..FF9D; KATAKANA 5719 0xFF9E, // FF9E..FF9F; COMMON 5720 0xFFA0, // FFA0..FFBE; HANGUL 5721 0xFFBF, // FFBF..FFC1; UNKNOWN 5722 0xFFC2, // FFC2..FFC7; HANGUL 5723 0xFFC8, // FFC8..FFC9; UNKNOWN 5724 0xFFCA, // FFCA..FFCF; HANGUL 5725 0xFFD0, // FFD0..FFD1; UNKNOWN 5726 0xFFD2, // FFD2..FFD7; HANGUL 5727 0xFFD8, // FFD8..FFD9; UNKNOWN 5728 0xFFDA, // FFDA..FFDC; HANGUL 5729 0xFFDD, // FFDD..FFDF; UNKNOWN 5730 0xFFE0, // FFE0..FFE6; COMMON 5731 0xFFE7, // FFE7 ; UNKNOWN 5732 0xFFE8, // FFE8..FFEE; COMMON 5733 0xFFEF, // FFEF..FFF8; UNKNOWN 5734 0xFFF9, // FFF9..FFFD; COMMON 5735 0xFFFE, // FFFE..FFFF; UNKNOWN 5736 0x10000, // 10000..1000B; LINEAR_B 5737 0x1000C, // 1000C ; UNKNOWN 5738 0x1000D, // 1000D..10026; LINEAR_B 5739 0x10027, // 10027 ; UNKNOWN 5740 0x10028, // 10028..1003A; LINEAR_B 5741 0x1003B, // 1003B ; UNKNOWN 5742 0x1003C, // 1003C..1003D; LINEAR_B 5743 0x1003E, // 1003E ; UNKNOWN 5744 0x1003F, // 1003F..1004D; LINEAR_B 5745 0x1004E, // 1004E..1004F; UNKNOWN 5746 0x10050, // 10050..1005D; LINEAR_B 5747 0x1005E, // 1005E..1007F; UNKNOWN 5748 0x10080, // 10080..100FA; LINEAR_B 5749 0x100FB, // 100FB..100FF; UNKNOWN 5750 0x10100, // 10100..10102; COMMON 5751 0x10103, // 10103..10106; UNKNOWN 5752 0x10107, // 10107..10133; COMMON 5753 0x10134, // 10134..10136; UNKNOWN 5754 0x10137, // 10137..1013F; COMMON 5755 0x10140, // 10140..1018E; GREEK 5756 0x1018F, // 1018F ; UNKNOWN 5757 0x10190, // 10190..1019B; COMMON 5758 0x1019C, // 1019C..1019F; UNKNOWN 5759 0x101A0, // 101A0 ; GREEK 5760 0x101A1, // 101A1..101CF; UNKNOWN 5761 0x101D0, // 101D0..101FC; COMMON 5762 0x101FD, // 101FD ; INHERITED 5763 0x101FE, // 101FE..1027F; UNKNOWN 5764 0x10280, // 10280..1029C; LYCIAN 5765 0x1029D, // 1029D..1029F; UNKNOWN 5766 0x102A0, // 102A0..102D0; CARIAN 5767 0x102D1, // 102D1..102DF; UNKNOWN 5768 0x102E0, // 102E0 ; INHERITED 5769 0x102E1, // 102E1..102FB; COMMON 5770 0x102FC, // 102FC..102FF; UNKNOWN 5771 0x10300, // 10300..10323; OLD_ITALIC 5772 0x10324, // 10324..1032C; UNKNOWN 5773 0x1032D, // 1032D..1032F; OLD_ITALIC 5774 0x10330, // 10330..1034A; GOTHIC 5775 0x1034B, // 1034B..1034F; UNKNOWN 5776 0x10350, // 10350..1037A; OLD_PERMIC 5777 0x1037B, // 1037B..1037F; UNKNOWN 5778 0x10380, // 10380..1039D; UGARITIC 5779 0x1039E, // 1039E ; UNKNOWN 5780 0x1039F, // 1039F ; UGARITIC 5781 0x103A0, // 103A0..103C3; OLD_PERSIAN 5782 0x103C4, // 103C4..103C7; UNKNOWN 5783 0x103C8, // 103C8..103D5; OLD_PERSIAN 5784 0x103D6, // 103D6..103FF; UNKNOWN 5785 0x10400, // 10400..1044F; DESERET 5786 0x10450, // 10450..1047F; SHAVIAN 5787 0x10480, // 10480..1049D; OSMANYA 5788 0x1049E, // 1049E..1049F; UNKNOWN 5789 0x104A0, // 104A0..104A9; OSMANYA 5790 0x104AA, // 104AA..104AF; UNKNOWN 5791 0x104B0, // 104B0..104D3; OSAGE 5792 0x104D4, // 104D4..104D7; UNKNOWN 5793 0x104D8, // 104D8..104FB; OSAGE 5794 0x104FC, // 104FC..104FF; UNKNOWN 5795 0x10500, // 10500..10527; ELBASAN 5796 0x10528, // 10528..1052F; UNKNOWN 5797 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 5798 0x10564, // 10564..1056E; UNKNOWN 5799 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 5800 0x10570, // 10570..105FF; UNKNOWN 5801 0x10600, // 10600..10736; LINEAR_A 5802 0x10737, // 10737..1073F; UNKNOWN 5803 0x10740, // 10740..10755; LINEAR_A 5804 0x10756, // 10756..1075F; UNKNOWN 5805 0x10760, // 10760..10767; LINEAR_A 5806 0x10768, // 10768..107FF; UNKNOWN 5807 0x10800, // 10800..10805; CYPRIOT 5808 0x10806, // 10806..10807; UNKNOWN 5809 0x10808, // 10808 ; CYPRIOT 5810 0x10809, // 10809 ; UNKNOWN 5811 0x1080A, // 1080A..10835; CYPRIOT 5812 0x10836, // 10836 ; UNKNOWN 5813 0x10837, // 10837..10838; CYPRIOT 5814 0x10839, // 10839..1083B; UNKNOWN 5815 0x1083C, // 1083C ; CYPRIOT 5816 0x1083D, // 1083D..1083E; UNKNOWN 5817 0x1083F, // 1083F ; CYPRIOT 5818 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 5819 0x10856, // 10856 ; UNKNOWN 5820 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 5821 0x10860, // 10860..1087F; PALMYRENE 5822 0x10880, // 10880..1089E; NABATAEAN 5823 0x1089F, // 1089F..108A6; UNKNOWN 5824 0x108A7, // 108A7..108AF; NABATAEAN 5825 0x108B0, // 108B0..108DF; UNKNOWN 5826 0x108E0, // 108E0..108F2; HATRAN 5827 0x108F3, // 108F3 ; UNKNOWN 5828 0x108F4, // 108F4..108F5; HATRAN 5829 0x108F6, // 108F6..108FA; UNKNOWN 5830 0x108FB, // 108FB..108FF; HATRAN 5831 0x10900, // 10900..1091B; PHOENICIAN 5832 0x1091C, // 1091C..1091E; UNKNOWN 5833 0x1091F, // 1091F ; PHOENICIAN 5834 0x10920, // 10920..10939; LYDIAN 5835 0x1093A, // 1093A..1093E; UNKNOWN 5836 0x1093F, // 1093F ; LYDIAN 5837 0x10940, // 10940..1097F; UNKNOWN 5838 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 5839 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 5840 0x109B8, // 109B8..109BB; UNKNOWN 5841 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 5842 0x109D0, // 109D0..109D1; UNKNOWN 5843 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 5844 0x10A00, // 10A00..10A03; KHAROSHTHI 5845 0x10A04, // 10A04 ; UNKNOWN 5846 0x10A05, // 10A05..10A06; KHAROSHTHI 5847 0x10A07, // 10A07..10A0B; UNKNOWN 5848 0x10A0C, // 10A0C..10A13; KHAROSHTHI 5849 0x10A14, // 10A14 ; UNKNOWN 5850 0x10A15, // 10A15..10A17; KHAROSHTHI 5851 0x10A18, // 10A18 ; UNKNOWN 5852 0x10A19, // 10A19..10A35; KHAROSHTHI 5853 0x10A36, // 10A36..10A37; UNKNOWN 5854 0x10A38, // 10A38..10A3A; KHAROSHTHI 5855 0x10A3B, // 10A3B..10A3E; UNKNOWN 5856 0x10A3F, // 10A3F..10A48; KHAROSHTHI 5857 0x10A49, // 10A49..10A4F; UNKNOWN 5858 0x10A50, // 10A50..10A58; KHAROSHTHI 5859 0x10A59, // 10A59..10A5F; UNKNOWN 5860 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 5861 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 5862 0x10AA0, // 10AA0..10ABF; UNKNOWN 5863 0x10AC0, // 10AC0..10AE6; MANICHAEAN 5864 0x10AE7, // 10AE7..10AEA; UNKNOWN 5865 0x10AEB, // 10AEB..10AF6; MANICHAEAN 5866 0x10AF7, // 10AF7..10AFF; UNKNOWN 5867 0x10B00, // 10B00..10B35; AVESTAN 5868 0x10B36, // 10B36..10B38; UNKNOWN 5869 0x10B39, // 10B39..10B3F; AVESTAN 5870 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 5871 0x10B56, // 10B56..10B57; UNKNOWN 5872 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 5873 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 5874 0x10B73, // 10B73..10B77; UNKNOWN 5875 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 5876 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 5877 0x10B92, // 10B92..10B98; UNKNOWN 5878 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 5879 0x10B9D, // 10B9D..10BA8; UNKNOWN 5880 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 5881 0x10BB0, // 10BB0..10BFF; UNKNOWN 5882 0x10C00, // 10C00..10C48; OLD_TURKIC 5883 0x10C49, // 10C49..10C7F; UNKNOWN 5884 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 5885 0x10CB3, // 10CB3..10CBF; UNKNOWN 5886 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 5887 0x10CF3, // 10CF3..10CF9; UNKNOWN 5888 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 5889 0x10D00, // 10D00..10D27; HANIFI ROHINGYA 5890 0x10D28, // 10D28..10D29; UNKNOWN 5891 0x10D30, // 10D30..10D39; HANIFI ROHINGYA 5892 0x10D3A, // 10D3A..10E5F; UNKNOWN 5893 0x10E60, // 10E60..10E7E; ARABIC 5894 0x10E7F, // 10E7F..10EFF; UNKNOWN 5895 0x10F00, // 10F00..10F27; OLD SOGDIAN 5896 0x10F28, // 10F28..10F2F; UNKNOWN 5897 0x10F30, // 10F30..10F59; SOGDIAN 5898 0x10F5A, // 10F5A..10FFF; UNKNOWN 5899 0x11000, // 11000..1104D; BRAHMI 5900 0x1104E, // 1104E..11051; UNKNOWN 5901 0x11052, // 11052..1106F; BRAHMI 5902 0x11070, // 11070..1107E; UNKNOWN 5903 0x1107F, // 1107F ; BRAHMI 5904 0x11080, // 11080..110C1; KAITHI 5905 0x110C2, // 110C2..110CC; UNKNOWN 5906 0x110CD, // 110CD ; KAITHI 5907 0x110CE, // 110CE..110CF; UNKNOWN 5908 0x110D0, // 110D0..110E8; SORA_SOMPENG 5909 0x110E9, // 110E9..110EF; UNKNOWN 5910 0x110F0, // 110F0..110F9; SORA_SOMPENG 5911 0x110FA, // 110FA..110FF; UNKNOWN 5912 0x11100, // 11100..11134; CHAKMA 5913 0x11135, // 11135 ; UNKNOWN 5914 0x11136, // 11136..11146; CHAKMA 5915 0x11147, // 11147..1114F; UNKNOWN 5916 0x11150, // 11150..11176; MAHAJANI 5917 0x11177, // 11177..1117F; UNKNOWN 5918 0x11180, // 11180..111CD; SHARADA 5919 0x111CE, // 111CE..111CF; UNKNOWN 5920 0x111D0, // 111D0..111DF; SHARADA 5921 0x111E0, // 111E0 ; UNKNOWN 5922 0x111E1, // 111E1..111F4; SINHALA 5923 0x111F5, // 111F5..111FF; UNKNOWN 5924 0x11200, // 11200..11211; KHOJKI 5925 0x11212, // 11212 ; UNKNOWN 5926 0x11213, // 11213..1123E; KHOJKI 5927 0x1123F, // 1123F..1127F; UNKNOWN 5928 0x11280, // 11280..11286; MULTANI 5929 0x11287, // 11287 ; UNKNOWN 5930 0x11288, // 11288 ; MULTANI 5931 0x11289, // 11289 ; UNKNOWN 5932 0x1128A, // 1128A..1128D; MULTANI 5933 0x1128E, // 1128E ; UNKNOWN 5934 0x1128F, // 1128F..1129D; MULTANI 5935 0x1129E, // 1129E ; UNKNOWN 5936 0x1129F, // 1129F..112A9; MULTANI 5937 0x112AA, // 112AA..112AF; UNKNOWN 5938 0x112B0, // 112B0..112EA; KHUDAWADI 5939 0x112EB, // 112EB..112EF; UNKNOWN 5940 0x112F0, // 112F0..112F9; KHUDAWADI 5941 0x112FA, // 112FA..112FF; UNKNOWN 5942 0x11300, // 11300..11303; GRANTHA 5943 0x11304, // 11304 ; UNKNOWN 5944 0x11305, // 11305..1130C; GRANTHA 5945 0x1130D, // 1130D..1130E; UNKNOWN 5946 0x1130F, // 1130F..11310; GRANTHA 5947 0x11311, // 11311..11312; UNKNOWN 5948 0x11313, // 11313..11328; GRANTHA 5949 0x11329, // 11329 ; UNKNOWN 5950 0x1132A, // 1132A..11330; GRANTHA 5951 0x11331, // 11331 ; UNKNOWN 5952 0x11332, // 11332..11333; GRANTHA 5953 0x11334, // 11334 ; UNKNOWN 5954 0x11335, // 11335..11339; GRANTHA 5955 0x1133A, // 1133A ; UNKNOWN 5956 0x1133B, // 1133B ; INHERITED 5957 0x1133C, // 1133C..11344; GRANTHA 5958 0x11345, // 11345..11346; UNKNOWN 5959 0x11347, // 11347..11348; GRANTHA 5960 0x11349, // 11349..1134A; UNKNOWN 5961 0x1134B, // 1134B..1134D; GRANTHA 5962 0x1134E, // 1134E..1134F; UNKNOWN 5963 0x11350, // 11350 ; GRANTHA 5964 0x11351, // 11351..11356; UNKNOWN 5965 0x11357, // 11357 ; GRANTHA 5966 0x11358, // 11358..1135C; UNKNOWN 5967 0x1135D, // 1135D..11363; GRANTHA 5968 0x11364, // 11364..11365; UNKNOWN 5969 0x11366, // 11366..1136C; GRANTHA 5970 0x1136D, // 1136D..1136F; UNKNOWN 5971 0x11370, // 11370..11374; GRANTHA 5972 0x11375, // 11375..113FF; UNKNOWN 5973 0x11400, // 11400..11459; NEWA 5974 0x1145A, // 1145A ; UNKNOWN 5975 0x1145B, // 1145B ; NEWA 5976 0x1145C, // 1145C ; UNKNOWN 5977 0x1145D, // 1145D..1145E; NEWA 5978 0x1145F, // 1145F..1147F; UNKNOWN 5979 0x11480, // 11480..114C7; TIRHUTA 5980 0x114C8, // 114C8..114CF; UNKNOWN 5981 0x114D0, // 114D0..114D9; TIRHUTA 5982 0x114DA, // 114DA..1157F; UNKNOWN 5983 0x11580, // 11580..115B5; SIDDHAM 5984 0x115B6, // 115B6..115B7; UNKNOWN 5985 0x115B8, // 115B8..115DD; SIDDHAM 5986 0x115DE, // 115DE..115FF; UNKNOWN 5987 0x11600, // 11600..11644; MODI 5988 0x11645, // 11645..1164F; UNKNOWN 5989 0x11650, // 11650..11659; MODI 5990 0x1165A, // 1165A..1165F; UNKNOWN 5991 0x11660, // 11660..1166C; MONGOLIAN 5992 0X1166D, // 1166D..1167F; UNKNOWN 5993 0x11680, // 11680..116B7; TAKRI 5994 0x116B8, // 116B8..116BF; UNKNOWN 5995 0x116C0, // 116C0..116C9; TAKRI 5996 0x116CA, // 116CA..116FF; UNKNOWN 5997 0x11700, // 11700..1171A; AHOM 5998 0x1171B, // 1171B..1171C; UNKNOWN 5999 0x1171D, // 1171D..1172B; AHOM 6000 0x1172C, // 1172C..1172F; UNKNOWN 6001 0x11730, // 11730..1173F; AHOM 6002 0x11740, // 11740..117FF; UNKNOWN 6003 0x11800, // 11800..1183B; DOGRA 6004 0x1183C, // 1183C..1189F; UNKNOWN 6005 0x118A0, // 118A0..118F2; WARANG_CITI 6006 0x118F3, // 118F3..118FE; UNKNOWN 6007 0x118FF, // 118FF ; WARANG_CITI 6008 0x11900, // 11900..119FF; UNKNOWN 6009 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6010 0X11A48, // 11A48..11A4F; UNKNOWN 6011 0x11A50, // 11A50..11A83; SOYOMBO 6012 0x11A84, // 11A84..11A85; UNKNOWN 6013 0x11A86, // 11A86..11AA2; SOYOMBO 6014 0x11AA3, // 11AA3..11ABF; UNKNOWN 6015 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6016 0x11AF9, // 11AF9..11BFF; UNKNOWN 6017 0x11C00, // 11C00..11C08; BHAIKSUKI 6018 0x11C09, // 11C09 ; UNKNOWN 6019 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6020 0x11C37, // 11C37 ; UNKNOWN 6021 0x11C38, // 11C38..11C45; BHAIKSUKI 6022 0x11C46, // 11C46..11C49; UNKNOWN 6023 0x11C50, // 11C50..11C6C; BHAIKSUKI 6024 0x11C6D, // 11C6D..11C6F; UNKNOWN 6025 0x11C70, // 11C70..11C8F; MARCHEN 6026 0x11C90, // 11C90..11C91; UNKNOWN 6027 0x11C92, // 11C92..11CA7; MARCHEN 6028 0x11CA8, // 11CA8 ; UNKNOWN 6029 0x11CA9, // 11CA9..11CB6; MARCHEN 6030 0x11CB7, // 11CB7..11CFF; UNKNOWN 6031 0x11D00, // 11D00..11D06; MASARAM_GONDI 6032 0x11D07, // 11D07 ; UNKNOWN 6033 0x11D08, // 11D08..11D09; MASARAM_GONDI 6034 0x11D0A, // 11D0A ; UNKNOWN 6035 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6036 0x11D37, // 11D37..11D39; UNKNOWN 6037 0x11D3A, // 11D3A ; MASARAM_GONDI 6038 0x11D3B, // 11D3B ; UNKNOWN 6039 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6040 0x11D3E, // 11D3E ; UNKNOWN 6041 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6042 0x11D48, // 11D48..11D49, UNKNOWN 6043 0x11D50, // 11D50..11D59; MASARAM_GONDI 6044 0x11D5A, // 11D5A..11D5F; UNKNOWN 6045 0x11D60, // 11D60..11D68; GUNJALA GONDI 6046 0x11D69, // ; UNKNOWN 6047 0x11D6A, // 11D6A..11D8E; GUNJALA GONDI 6048 0x11D8F, // ; UNKNOWN 6049 0x11D90, // 11D90..11D91; GUNJALA GONDI 6050 0x11D92, // ; UNKNOWN 6051 0x11D93, // 11D93..11D98; GUNJALA GONDI 6052 0x11D99, // 11D99 ; UNKNOWN 6053 0x11DA0, // 11DA0..11DA9; GUNJALA GONDI 6054 0x11DAA, // 11DAA..11DFF; UNKNOWN 6055 0x11EE0, // 11EE0..11EF8; MAKASAR 6056 0x11EF9, // 11EF9..11FFF; UNKNOWN 6057 0x12000, // 12000..12399; CUNEIFORM 6058 0x1239A, // 1239A..123FF; UNKNOWN 6059 0x12400, // 12400..1246E; CUNEIFORM 6060 0x1246F, // 1246F ; UNKNOWN 6061 0x12470, // 12470..12474; CUNEIFORM 6062 0x12475, // 12475..1247F; UNKNOWN 6063 0x12480, // 12480..12543; CUNEIFORM 6064 0x12544, // 12544..12FFF; UNKNOWN 6065 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 6066 0x1342F, // 1342F..143FF; UNKNOWN 6067 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6068 0x14647, // 14647..167FF; UNKNOWN 6069 0x16800, // 16800..16A38; BAMUM 6070 0x16A39, // 16A39..16A3F; UNKNOWN 6071 0x16A40, // 16A40..16A5E; MRO 6072 0x16A5F, // 16A5F ; UNKNOWN 6073 0x16A60, // 16A60..16A69; MRO 6074 0x16A6A, // 16A6A..16A6D; UNKNOWN 6075 0x16A6E, // 16A6E..16A6F; MRO 6076 0x16A70, // 16A70..16ACF; UNKNOWN 6077 0x16AD0, // 16AD0..16AED; BASSA_VAH 6078 0x16AEE, // 16AEE..16AEF; UNKNOWN 6079 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6080 0x16AF6, // 16AF6..16AFF; UNKNOWN 6081 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6082 0x16B46, // 16B46..16B4F; UNKNOWN 6083 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6084 0x16B5A, // 16B5A ; UNKNOWN 6085 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6086 0x16B62, // 16B62 ; UNKNOWN 6087 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6088 0x16B78, // 16B78..16B7C; UNKNOWN 6089 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6090 0x16B90, // 16B90..16E3F; UNKNOWN 6091 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6092 0x16E9B, // 16E9B..16EFF; UNKNOWN 6093 0x16F00, // 16F00..16F44; MIAO 6094 0x16F45, // 16F45..16F4F; UNKNOWN 6095 0x16F50, // 16F50..16F7E; MIAO 6096 0x16F7F, // 16F7F..16F8E; UNKNOWN 6097 0x16F8F, // 16F8F..16F9F; MIAO 6098 0x16FA0, // 16FA0..16FDF; UNKNOWN 6099 0x16FE0, // 16FE0 ; TANGUT 6100 0x16FE1, // 16FE1 ; NUSHU 6101 0x16FE2, // 16FE2..16FFF; UNKNOWN 6102 0x17000, // 17000..187F1; TANGUT 6103 0x187F2, // 187F2..187FF; UNKNOWN 6104 0x18800, // 18800..18AF2; TANGUT 6105 0x18AF3, // 18AF3..1AFFF; UNKNOWN 6106 0x1B000, // 1B000 ; KATAKANA 6107 0x1B001, // 1B001..1B11E; HIRAGANA 6108 0x1B11F, // 1B11F..1B16F; UNKNOWN 6109 0x1B170, // 1B170..1B2FB; NUSHU 6110 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6111 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6112 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6113 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6114 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6115 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6116 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6117 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6118 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6119 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6120 0x1BCA0, // 1BCA0..1BCA3; COMMON 6121 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 6122 0x1D000, // 1D000..1D0F5; COMMON 6123 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6124 0x1D100, // 1D100..1D126; COMMON 6125 0x1D127, // 1D127..1D128; UNKNOWN 6126 0x1D129, // 1D129..1D166; COMMON 6127 0x1D167, // 1D167..1D169; INHERITED 6128 0x1D16A, // 1D16A..1D17A; COMMON 6129 0x1D17B, // 1D17B..1D182; INHERITED 6130 0x1D183, // 1D183..1D184; COMMON 6131 0x1D185, // 1D185..1D18B; INHERITED 6132 0x1D18C, // 1D18C..1D1A9; COMMON 6133 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6134 0x1D1AE, // 1D1AE..1D1E8; COMMON 6135 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN 6136 0x1D200, // 1D200..1D245; GREEK 6137 0x1D246, // 1D246..1D2DF; UNKNOWN 6138 0x1D2E0, // 1D2E0..1D2F3; COMMON 6139 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6140 0x1D300, // 1D300..1D356; COMMON 6141 0x1D357, // 1D357..1D35F; UNKNOWN 6142 0x1D360, // 1D360..1D378; COMMON 6143 0x1D379, // 1D379..1D3FF; UNKNOWN 6144 0x1D400, // 1D400..1D454; COMMON 6145 0x1D455, // 1D455 ; UNKNOWN 6146 0x1D456, // 1D456..1D49C; COMMON 6147 0x1D49D, // 1D49D ; UNKNOWN 6148 0x1D49E, // 1D49E..1D49F; COMMON 6149 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6150 0x1D4A2, // 1D4A2 ; COMMON 6151 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6152 0x1D4A5, // 1D4A5..1D4A6; COMMON 6153 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6154 0x1D4A9, // 1D4A9..1D4AC; COMMON 6155 0x1D4AD, // 1D4AD ; UNKNOWN 6156 0x1D4AE, // 1D4AE..1D4B9; COMMON 6157 0x1D4BA, // 1D4BA ; UNKNOWN 6158 0x1D4BB, // 1D4BB ; COMMON 6159 0x1D4BC, // 1D4BC ; UNKNOWN 6160 0x1D4BD, // 1D4BD..1D4C3; COMMON 6161 0x1D4C4, // 1D4C4 ; UNKNOWN 6162 0x1D4C5, // 1D4C5..1D505; COMMON 6163 0x1D506, // 1D506 ; UNKNOWN 6164 0x1D507, // 1D507..1D50A; COMMON 6165 0x1D50B, // 1D50B..1D50C; UNKNOWN 6166 0x1D50D, // 1D50D..1D514; COMMON 6167 0x1D515, // 1D515 ; UNKNOWN 6168 0x1D516, // 1D516..1D51C; COMMON 6169 0x1D51D, // 1D51D ; UNKNOWN 6170 0x1D51E, // 1D51E..1D539; COMMON 6171 0x1D53A, // 1D53A ; UNKNOWN 6172 0x1D53B, // 1D53B..1D53E; COMMON 6173 0x1D53F, // 1D53F ; UNKNOWN 6174 0x1D540, // 1D540..1D544; COMMON 6175 0x1D545, // 1D545 ; UNKNOWN 6176 0x1D546, // 1D546 ; COMMON 6177 0x1D547, // 1D547..1D549; UNKNOWN 6178 0x1D54A, // 1D54A..1D550; COMMON 6179 0x1D551, // 1D551 ; UNKNOWN 6180 0x1D552, // 1D552..1D6A5; COMMON 6181 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6182 0x1D6A8, // 1D6A8..1D7CB; COMMON 6183 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6184 0x1D7CE, // 1D7CE..1D7FF; COMMON 6185 0x1D800, // 1D800..1DA8B; SIGNWRITING 6186 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6187 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6188 0x1DAA0, // 1DAA0 ; UNKNOWN 6189 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6190 0x1DAB0, // 1DAB0..1DFFF; UNKNOWN 6191 0x1E000, // 1E000..1E006; GLAGOLITIC 6192 0x1E007, // 1E007 ; UNKNOWN 6193 0x1E008, // 1E008..1E018; GLAGOLITIC 6194 0x1E019, // 1E019..1E01A; UNKNOWN 6195 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6196 0x1E022, // 1E022 ; UNKNOWN 6197 0x1E023, // 1E023..1E024; GLAGOLITIC 6198 0x1E025, // 1E025 ; UNKNOWN 6199 0x1E026, // 1E026..1E02A; GLAGOLITIC 6200 0x1E02B, // 1E02B..1E7FF; UNKNOWN 6201 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6202 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6203 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6204 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6205 0x1E900, // 1E900..1E94A; ADLAM 6206 0x1E94B, // 1E94B..1E94F; UNKNOWN 6207 0x1E950, // 1E950..1E959; ADLAM 6208 0x1E95A, // 1E95A..1E95D; UNKNOWN 6209 0x1E95E, // 1E95E..1E95F; ADLAM 6210 0x1E960, // 1E960..1EC70; UNKNOWN 6211 0x1EC71, // 1EC71..1ECB4; COMMON 6212 0x1ECB5, // 1ECB5..1EDFF; UNKNOWN 6213 0x1EE00, // 1EE00..1EE03; ARABIC 6214 0x1EE04, // 1EE04 ; UNKNOWN 6215 0x1EE05, // 1EE05..1EE1F; ARABIC 6216 0x1EE20, // 1EE20 ; UNKNOWN 6217 0x1EE21, // 1EE21..1EE22; ARABIC 6218 0x1EE23, // 1EE23 ; UNKNOWN 6219 0x1EE24, // 1EE24 ; ARABIC 6220 0x1EE25, // 1EE25..1EE26; UNKNOWN 6221 0x1EE27, // 1EE27 ; ARABIC 6222 0x1EE28, // 1EE28 ; UNKNOWN 6223 0x1EE29, // 1EE29..1EE32; ARABIC 6224 0x1EE33, // 1EE33 ; UNKNOWN 6225 0x1EE34, // 1EE34..1EE37; ARABIC 6226 0x1EE38, // 1EE38 ; UNKNOWN 6227 0x1EE39, // 1EE39 ; ARABIC 6228 0x1EE3A, // 1EE3A ; UNKNOWN 6229 0x1EE3B, // 1EE3B ; ARABIC 6230 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6231 0x1EE42, // 1EE42 ; ARABIC 6232 0x1EE43, // 1EE43..1EE46; UNKNOWN 6233 0x1EE47, // 1EE47 ; ARABIC 6234 0x1EE48, // 1EE48 ; UNKNOWN 6235 0x1EE49, // 1EE49 ; ARABIC 6236 0x1EE4A, // 1EE4A ; UNKNOWN 6237 0x1EE4B, // 1EE4B ; ARABIC 6238 0x1EE4C, // 1EE4C ; UNKNOWN 6239 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6240 0x1EE50, // 1EE50 ; UNKNOWN 6241 0x1EE51, // 1EE51..1EE52; ARABIC 6242 0x1EE53, // 1EE53 ; UNKNOWN 6243 0x1EE54, // 1EE54 ; ARABIC 6244 0x1EE55, // 1EE55..1EE56; UNKNOWN 6245 0x1EE57, // 1EE57 ; ARABIC 6246 0x1EE58, // 1EE58 ; UNKNOWN 6247 0x1EE59, // 1EE59 ; ARABIC 6248 0x1EE5A, // 1EE5A ; UNKNOWN 6249 0x1EE5B, // 1EE5B ; ARABIC 6250 0x1EE5C, // 1EE5C ; UNKNOWN 6251 0x1EE5D, // 1EE5D ; ARABIC 6252 0x1EE5E, // 1EE5E ; UNKNOWN 6253 0x1EE5F, // 1EE5F ; ARABIC 6254 0x1EE60, // 1EE60 ; UNKNOWN 6255 0x1EE61, // 1EE61..1EE62; ARABIC 6256 0x1EE63, // 1EE63 ; UNKNOWN 6257 0x1EE64, // 1EE64 ; ARABIC 6258 0x1EE65, // 1EE65..1EE66; UNKNOWN 6259 0x1EE67, // 1EE67..1EE6A; ARABIC 6260 0x1EE6B, // 1EE6B ; UNKNOWN 6261 0x1EE6C, // 1EE6C..1EE72; ARABIC 6262 0x1EE73, // 1EE73 ; UNKNOWN 6263 0x1EE74, // 1EE74..1EE77; ARABIC 6264 0x1EE78, // 1EE78 ; UNKNOWN 6265 0x1EE79, // 1EE79..1EE7C; ARABIC 6266 0x1EE7D, // 1EE7D ; UNKNOWN 6267 0x1EE7E, // 1EE7E ; ARABIC 6268 0x1EE7F, // 1EE7F ; UNKNOWN 6269 0x1EE80, // 1EE80..1EE89; ARABIC 6270 0x1EE8A, // 1EE8A ; UNKNOWN 6271 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6272 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6273 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6274 0x1EEA4, // 1EEA4 ; UNKNOWN 6275 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6276 0x1EEAA, // 1EEAA ; UNKNOWN 6277 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6278 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6279 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6280 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6281 0x1F000, // 1F000..1F02B; COMMON 6282 0x1F02C, // 1F02C..1F02F; UNKNOWN 6283 0x1F030, // 1F030..1F093; COMMON 6284 0x1F094, // 1F094..1F09F; UNKNOWN 6285 0x1F0A0, // 1F0A0..1F0AE; COMMON 6286 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6287 0x1F0B1, // 1F0B1..1F0BF; COMMON 6288 0x1F0C0, // 1F0C0 ; UNKNOWN 6289 0x1F0C1, // 1F0C1..1F0CF; COMMON 6290 0x1F0D0, // 1F0D0 ; UNKNOWN 6291 0x1F0D1, // 1F0D1..1F0F5; COMMON 6292 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6293 0x1F100, // 1F100..1F10C; COMMON 6294 0x1F10D, // 1F10D..1F10F; UNKNOWN 6295 0x1F110, // 1F110..1F16B; COMMON 6296 0x1F16C, // 1F16C..1F16F; UNKNOWN 6297 0x1F170, // 1F170..1F1AC; COMMON 6298 0x1F1AD, // 1F1AD..1F1E5; UNKNOWN 6299 0x1F1E6, // 1F1E6..1F1FF; COMMON 6300 0x1F200, // 1F200 ; HIRAGANA 6301 0x1F201, // 1F201..1F202; COMMON 6302 0x1F203, // 1F203..1F20F; UNKNOWN 6303 0x1F210, // 1F210..1F23B; COMMON 6304 0x1F23C, // 1F23C..1F23F; UNKNOWN 6305 0x1F240, // 1F240..1F248; COMMON 6306 0x1F249, // 1F249..1F24F; UNKNOWN 6307 0x1F250, // 1F250..1F251; COMMON 6308 0x1F252, // 1F252..1F25F; UNKNOWN 6309 0x1F260, // 1F260..1F265; COMMON 6310 0x1F266, // 1F266..1F2FF; UNKNOWN 6311 0x1F300, // 1F300..1F6D4; COMMON 6312 0x1F6D5, // 1F6D5..1F6DF; UNKNOWN 6313 0x1F6E0, // 1F6E0..1F6EC; COMMON 6314 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6315 0x1F6F0, // 1F6F0..1F6F9; COMMON 6316 0x1F6FA, // 1F6FA..1F6FF; UNKNOWN 6317 0x1F700, // 1F700..1F773; COMMON 6318 0x1F774, // 1F774..1F77F; UNKNOWN 6319 0x1F780, // 1F780..1F7D8; COMMON 6320 0x1F7D9, // 1F7D9..1F7FF; UNKNOWN 6321 0x1F800, // 1F800..1F80B; COMMON 6322 0x1F80C, // 1F80C..1F80F; UNKNOWN 6323 0x1F810, // 1F810..1F847; COMMON 6324 0x1F848, // 1F848..1F84F; UNKNOWN 6325 0x1F850, // 1F850..1F859; COMMON 6326 0x1F85A, // 1F85A..1F85F; UNKNOWN 6327 0x1F860, // 1F860..1F887; COMMON 6328 0x1F888, // 1F888..1F88F; UNKNOWN 6329 0x1F890, // 1F890..1F8AD; COMMON 6330 0x1F8AE, // 1F8AE..1F8FF; UNKNOWN 6331 0x1F900, // 1F900..1F90B; COMMON 6332 0x1F90C, // 1F90C..1F90F; UNKNOWN 6333 0x1F910, // 1F910..1F93E; COMMON 6334 0x1F93F, // 1F93F ; UNKNOWN 6335 0x1F940, // 1F940..1F970; COMMON 6336 0x1F971, // 1F971..1F972; UNKNOWN 6337 0x1F973, // 1F973..1F976; COMMON 6338 0x1F977, // 1F977..1F979; UNKNOWN 6339 0x1F97A, // 1F97A ; COMMON 6340 0x1F97B, // 1F97B ; UNKNOWN 6341 0x1F97C, // 1F97C..1F9A2; COMMON 6342 0x1F9A3, // 1F9A3..1F9AF; UNKNOWN 6343 0x1F9B0, // 1F9B0..1F9B9; COMMON 6344 0x1F9BA, // 1F9BA..1F9BF; UNKNOWN 6345 0x1F9C0, // 1F9C0..1F9C2; COMMON 6346 0x1F9C3, // 1F9C3..1F9CF; UNKNOWN 6347 0x1F9D0, // 1F9D0..1F9FF; COMMON 6348 0x1FA00, // 1FA00..1FA5F; UNKNOWN 6349 0x1FA60, // 1FA60..1FA6D; COMMON 6350 0x1FA6E, // 1FA6E..1FFFF; UNKNOWN 6351 0x20000, // 20000..2A6D6; HAN 6352 0x2A6D7, // 2A6D7..2A6FF; UNKNOWN 6353 0x2A700, // 2A700..2B734; HAN 6354 0x2B735, // 2B735..2B73F; UNKNOWN 6355 0x2B740, // 2B740..2B81D; HAN 6356 0x2B81E, // 2B81E..2B81F; UNKNOWN 6357 0x2B820, // 2B820..2CEA1; HAN 6358 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 6359 0x2CEB0, // 2CEB0..2EBE0; HAN 6360 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN 6361 0x2F800, // 2F800..2FA1D; HAN 6362 0x2FA1E, // 2FA1E..E0000; UNKNOWN 6363 0xE0001, // E0001 ; COMMON 6364 0xE0002, // E0002..E001F; UNKNOWN 6365 0xE0020, // E0020..E007F; COMMON 6366 0xE0080, // E0080..E00FF; UNKNOWN 6367 0xE0100, // E0100..E01EF; INHERITED 6368 0xE01F0 // E01F0..10FFFF; UNKNOWN 6369 }; 6370 6371 private static final UnicodeScript[] scripts = { 6372 COMMON, // 0000..0040 6373 LATIN, // 0041..005A 6374 COMMON, // 005B..0060 6375 LATIN, // 0061..007A 6376 COMMON, // 007B..00A9 6377 LATIN, // 00AA 6378 COMMON, // 00AB..00B9 6379 LATIN, // 00BA 6380 COMMON, // 00BB..00BF 6381 LATIN, // 00C0..00D6 6382 COMMON, // 00D7 6383 LATIN, // 00D8..00F6 6384 COMMON, // 00F7 6385 LATIN, // 00F8..02B8 6386 COMMON, // 02B9..02DF 6387 LATIN, // 02E0..02E4 6388 COMMON, // 02E5..02E9 6389 BOPOMOFO, // 02EA..02EB 6390 COMMON, // 02EC..02FF 6391 INHERITED, // 0300..036F 6392 GREEK, // 0370..0373 6393 COMMON, // 0374 6394 GREEK, // 0375..0377 6395 UNKNOWN, // 0378..0379 6396 GREEK, // 037A..037D 6397 COMMON, // 037E 6398 GREEK, // 037F 6399 UNKNOWN, // 0380..0383 6400 GREEK, // 0384 6401 COMMON, // 0385 6402 GREEK, // 0386 6403 COMMON, // 0387 6404 GREEK, // 0388..038A 6405 UNKNOWN, // 038B 6406 GREEK, // 038C 6407 UNKNOWN, // 038D 6408 GREEK, // 038E..03A1 6409 UNKNOWN, // 03A2 6410 GREEK, // 03A3..03E1 6411 COPTIC, // 03E2..03EF 6412 GREEK, // 03F0..03FF 6413 CYRILLIC, // 0400..0484 6414 INHERITED, // 0485..0486 6415 CYRILLIC, // 0487..052F 6416 UNKNOWN, // 0530 6417 ARMENIAN, // 0531..0556 6418 UNKNOWN, // 0557..0558 6419 ARMENIAN, // 0559..0588 6420 COMMON, // 0589 6421 ARMENIAN, // 058A 6422 UNKNOWN, // 058B..058C 6423 ARMENIAN, // 058D..058F 6424 UNKNOWN, // 0590 6425 HEBREW, // 0591..05C7 6426 UNKNOWN, // 05C8..05CF 6427 HEBREW, // 05D0..05EA 6428 UNKNOWN, // 05EB..05EE 6429 HEBREW, // 05EF..05F4 6430 UNKNOWN, // 05F5..05FF 6431 ARABIC, // 0600..0604 6432 COMMON, // 0605 6433 ARABIC, // 0606..060B 6434 COMMON, // 060C 6435 ARABIC, // 060D..061A 6436 COMMON, // 061B 6437 ARABIC, // 061C 6438 UNKNOWN, // 061D 6439 ARABIC, // 061E 6440 COMMON, // 061F 6441 ARABIC, // 0620..063F 6442 COMMON, // 0640 6443 ARABIC, // 0641..064A 6444 INHERITED, // 064B..0655 6445 ARABIC, // 0656..066F 6446 INHERITED, // 0670 6447 ARABIC, // 0671..06DC 6448 COMMON, // 06DD 6449 ARABIC, // 06DE..06FF 6450 SYRIAC, // 0700..070D 6451 UNKNOWN, // 070E 6452 SYRIAC, // 070F..074A 6453 UNKNOWN, // 074B..074C 6454 SYRIAC, // 074D..074F 6455 ARABIC, // 0750..077F 6456 THAANA, // 0780..07B1 6457 UNKNOWN, // 07B2..07BF 6458 NKO, // 07C0..07FA 6459 UNKNOWN, // 07FB..07FC 6460 NKO, // 07FD..07FF 6461 SAMARITAN, // 0800..082D 6462 UNKNOWN, // 082E..082F 6463 SAMARITAN, // 0830..083E 6464 UNKNOWN, // 083F 6465 MANDAIC, // 0840..085B 6466 UNKNOWN, // 085C..085D 6467 MANDAIC, // 085E 6468 UNKNOWN, // 085F 6469 SYRIAC, // 0860..086A 6470 UNKNOWN, // 086B..089F 6471 ARABIC, // 08A0..08B4 6472 UNKNOWN, // 08B5 6473 ARABIC, // 08B6..08BD 6474 UNKNOWN, // 08BE..08D2 6475 ARABIC, // 08D3..08E1 6476 COMMON, // 08E2 6477 ARABIC, // 08E3..08FF 6478 DEVANAGARI, // 0900..0950 6479 INHERITED, // 0951..0952 6480 DEVANAGARI, // 0953..0963 6481 COMMON, // 0964..0965 6482 DEVANAGARI, // 0966..097F 6483 BENGALI, // 0980..0983 6484 UNKNOWN, // 0984 6485 BENGALI, // 0985..098C 6486 UNKNOWN, // 098D..098E 6487 BENGALI, // 098F..0990 6488 UNKNOWN, // 0991..0992 6489 BENGALI, // 0993..09A8 6490 UNKNOWN, // 09A9 6491 BENGALI, // 09AA..09B0 6492 UNKNOWN, // 09B1 6493 BENGALI, // 09B2 6494 UNKNOWN, // 09B3..09B5 6495 BENGALI, // 09B6..09B9 6496 UNKNOWN, // 09BA..09BB 6497 BENGALI, // 09BC..09C4 6498 UNKNOWN, // 09C5..09C6 6499 BENGALI, // 09C7..09C8 6500 UNKNOWN, // 09C9..09CA 6501 BENGALI, // 09CB..09CE 6502 UNKNOWN, // 09CF..09D6 6503 BENGALI, // 09D7 6504 UNKNOWN, // 09D8..09DB 6505 BENGALI, // 09DC..09DD 6506 UNKNOWN, // 09DE 6507 BENGALI, // 09DF..09E3 6508 UNKNOWN, // 09E4..09E5 6509 BENGALI, // 09E6..09FE 6510 UNKNOWN, // 09FF..0A00 6511 GURMUKHI, // 0A01..0A03 6512 UNKNOWN, // 0A04 6513 GURMUKHI, // 0A05..0A0A 6514 UNKNOWN, // 0A0B..0A0E 6515 GURMUKHI, // 0A0F..0A10 6516 UNKNOWN, // 0A11..0A12 6517 GURMUKHI, // 0A13..0A28 6518 UNKNOWN, // 0A29 6519 GURMUKHI, // 0A2A..0A30 6520 UNKNOWN, // 0A31 6521 GURMUKHI, // 0A32..0A33 6522 UNKNOWN, // 0A34 6523 GURMUKHI, // 0A35..0A36 6524 UNKNOWN, // 0A37 6525 GURMUKHI, // 0A38..0A39 6526 UNKNOWN, // 0A3A..0A3B 6527 GURMUKHI, // 0A3C 6528 UNKNOWN, // 0A3D 6529 GURMUKHI, // 0A3E..0A42 6530 UNKNOWN, // 0A43..0A46 6531 GURMUKHI, // 0A47..0A48 6532 UNKNOWN, // 0A49..0A4A 6533 GURMUKHI, // 0A4B..0A4D 6534 UNKNOWN, // 0A4E..0A50 6535 GURMUKHI, // 0A51 6536 UNKNOWN, // 0A52..0A58 6537 GURMUKHI, // 0A59..0A5C 6538 UNKNOWN, // 0A5D 6539 GURMUKHI, // 0A5E 6540 UNKNOWN, // 0A5F..0A65 6541 GURMUKHI, // 0A66..0A76 6542 UNKNOWN, // 0A77..0A80 6543 GUJARATI, // 0A81..0A83 6544 UNKNOWN, // 0A84 6545 GUJARATI, // 0A85..0A8D 6546 UNKNOWN, // 0A8E 6547 GUJARATI, // 0A8F..0A91 6548 UNKNOWN, // 0A92 6549 GUJARATI, // 0A93..0AA8 6550 UNKNOWN, // 0AA9 6551 GUJARATI, // 0AAA..0AB0 6552 UNKNOWN, // 0AB1 6553 GUJARATI, // 0AB2..0AB3 6554 UNKNOWN, // 0AB4 6555 GUJARATI, // 0AB5..0AB9 6556 UNKNOWN, // 0ABA..0ABB 6557 GUJARATI, // 0ABC..0AC5 6558 UNKNOWN, // 0AC6 6559 GUJARATI, // 0AC7..0AC9 6560 UNKNOWN, // 0ACA 6561 GUJARATI, // 0ACB..0ACD 6562 UNKNOWN, // 0ACE..0ACF 6563 GUJARATI, // 0AD0 6564 UNKNOWN, // 0AD1..0ADF 6565 GUJARATI, // 0AE0..0AE3 6566 UNKNOWN, // 0AE4..0AE5 6567 GUJARATI, // 0AE6..0AF1 6568 UNKNOWN, // 0AF2..0AF8 6569 GUJARATI, // 0AF9..0AFF 6570 UNKNOWN, // 0B00 6571 ORIYA, // 0B01..0B03 6572 UNKNOWN, // 0B04 6573 ORIYA, // 0B05..0B0C 6574 UNKNOWN, // 0B0D..0B0E 6575 ORIYA, // 0B0F..0B10 6576 UNKNOWN, // 0B11..0B12 6577 ORIYA, // 0B13..0B28 6578 UNKNOWN, // 0B29 6579 ORIYA, // 0B2A..0B30 6580 UNKNOWN, // 0B31 6581 ORIYA, // 0B32..0B33 6582 UNKNOWN, // 0B34 6583 ORIYA, // 0B35..0B39 6584 UNKNOWN, // 0B3A..0B3B 6585 ORIYA, // 0B3C..0B44 6586 UNKNOWN, // 0B45..0B46 6587 ORIYA, // 0B47..0B48 6588 UNKNOWN, // 0B49..0B4A 6589 ORIYA, // 0B4B..0B4D 6590 UNKNOWN, // 0B4E..0B55 6591 ORIYA, // 0B56..0B57 6592 UNKNOWN, // 0B58..0B5B 6593 ORIYA, // 0B5C..0B5D 6594 UNKNOWN, // 0B5E 6595 ORIYA, // 0B5F..0B63 6596 UNKNOWN, // 0B64..0B65 6597 ORIYA, // 0B66..0B77 6598 UNKNOWN, // 0B78..0B81 6599 TAMIL, // 0B82..0B83 6600 UNKNOWN, // 0B84 6601 TAMIL, // 0B85..0B8A 6602 UNKNOWN, // 0B8B..0B8D 6603 TAMIL, // 0B8E..0B90 6604 UNKNOWN, // 0B91 6605 TAMIL, // 0B92..0B95 6606 UNKNOWN, // 0B96..0B98 6607 TAMIL, // 0B99..0B9A 6608 UNKNOWN, // 0B9B 6609 TAMIL, // 0B9C 6610 UNKNOWN, // 0B9D 6611 TAMIL, // 0B9E..0B9F 6612 UNKNOWN, // 0BA0..0BA2 6613 TAMIL, // 0BA3..0BA4 6614 UNKNOWN, // 0BA5..0BA7 6615 TAMIL, // 0BA8..0BAA 6616 UNKNOWN, // 0BAB..0BAD 6617 TAMIL, // 0BAE..0BB9 6618 UNKNOWN, // 0BBA..0BBD 6619 TAMIL, // 0BBE..0BC2 6620 UNKNOWN, // 0BC3..0BC5 6621 TAMIL, // 0BC6..0BC8 6622 UNKNOWN, // 0BC9 6623 TAMIL, // 0BCA..0BCD 6624 UNKNOWN, // 0BCE..0BCF 6625 TAMIL, // 0BD0 6626 UNKNOWN, // 0BD1..0BD6 6627 TAMIL, // 0BD7 6628 UNKNOWN, // 0BD8..0BE5 6629 TAMIL, // 0BE6..0BFA 6630 UNKNOWN, // 0BFB..0BFF 6631 TELUGU, // 0C00..0C0C 6632 UNKNOWN, // 0C0D 6633 TELUGU, // 0C0E..0C10 6634 UNKNOWN, // 0C11 6635 TELUGU, // 0C12..0C28 6636 UNKNOWN, // 0C29 6637 TELUGU, // 0C2A..0C39 6638 UNKNOWN, // 0C3A..0C3C 6639 TELUGU, // 0C3D..0C44 6640 UNKNOWN, // 0C45 6641 TELUGU, // 0C46..0C48 6642 UNKNOWN, // 0C49 6643 TELUGU, // 0C4A..0C4D 6644 UNKNOWN, // 0C4E..0C54 6645 TELUGU, // 0C55..0C56 6646 UNKNOWN, // 0C57 6647 TELUGU, // 0C58..0C5A 6648 UNKNOWN, // 0C5B..0C5F 6649 TELUGU, // 0C60..0C63 6650 UNKNOWN, // 0C64..0C65 6651 TELUGU, // 0C66..0C6F 6652 UNKNOWN, // 0C70..0C77 6653 TELUGU, // 0C78..0C7F 6654 KANNADA, // 0C80..0C8C 6655 UNKNOWN, // 0C8D 6656 KANNADA, // 0C8E..0C90 6657 UNKNOWN, // 0C91 6658 KANNADA, // 0C92..0CA8 6659 UNKNOWN, // 0CA9 6660 KANNADA, // 0CAA..0CB3 6661 UNKNOWN, // 0CB4 6662 KANNADA, // 0CB5..0CB9 6663 UNKNOWN, // 0CBA..0CBB 6664 KANNADA, // 0CBC..0CC4 6665 UNKNOWN, // 0CC5 6666 KANNADA, // 0CC6..0CC8 6667 UNKNOWN, // 0CC9 6668 KANNADA, // 0CCA..0CCD 6669 UNKNOWN, // 0CCE..0CD4 6670 KANNADA, // 0CD5..0CD6 6671 UNKNOWN, // 0CD7..0CDD 6672 KANNADA, // 0CDE 6673 UNKNOWN, // 0CDF 6674 KANNADA, // 0CE0..0CE3 6675 UNKNOWN, // 0CE4..0CE5 6676 KANNADA, // 0CE6..0CEF 6677 UNKNOWN, // 0CF0 6678 KANNADA, // 0CF1..0CF2 6679 UNKNOWN, // 0CF3..0CFF 6680 MALAYALAM, // 0D00..0D03 6681 UNKNOWN, // 0D04 6682 MALAYALAM, // 0D05..0D0C 6683 UNKNOWN, // 0D0D 6684 MALAYALAM, // 0D0E..0D10 6685 UNKNOWN, // 0D11 6686 MALAYALAM, // 0D12..0D44 6687 UNKNOWN, // 0D45 6688 MALAYALAM, // 0D46..0D48 6689 UNKNOWN, // 0D49 6690 MALAYALAM, // 0D4A..0D4F 6691 UNKNOWN, // 0D50..0D53 6692 MALAYALAM, // 0D54..0D63 6693 UNKNOWN, // 0D64..0D65 6694 MALAYALAM, // 0D66..0D7F 6695 UNKNOWN, // 0D80..0D81 6696 SINHALA, // 0D82..0D83 6697 UNKNOWN, // 0D84 6698 SINHALA, // 0D85..0D96 6699 UNKNOWN, // 0D97..0D99 6700 SINHALA, // 0D9A..0DB1 6701 UNKNOWN, // 0DB2 6702 SINHALA, // 0DB3..0DBB 6703 UNKNOWN, // 0DBC 6704 SINHALA, // 0DBD 6705 UNKNOWN, // 0DBE..0DBF 6706 SINHALA, // 0DC0..0DC6 6707 UNKNOWN, // 0DC7..0DC9 6708 SINHALA, // 0DCA 6709 UNKNOWN, // 0DCB..0DCE 6710 SINHALA, // 0DCF..0DD4 6711 UNKNOWN, // 0DD5 6712 SINHALA, // 0DD6 6713 UNKNOWN, // 0DD7 6714 SINHALA, // 0DD8..0DDF 6715 UNKNOWN, // 0DE0..0DE5 6716 SINHALA, // 0DE6..0DEF 6717 UNKNOWN, // 0DF0..0DF1 6718 SINHALA, // 0DF2..0DF4 6719 UNKNOWN, // 0DF5..0E00 6720 THAI, // 0E01..0E3A 6721 UNKNOWN, // 0E3B..0E3E 6722 COMMON, // 0E3F 6723 THAI, // 0E40..0E5B 6724 UNKNOWN, // 0E5C..0E80 6725 LAO, // 0E81..0E82 6726 UNKNOWN, // 0E83 6727 LAO, // 0E84 6728 UNKNOWN, // 0E85..0E86 6729 LAO, // 0E87..0E88 6730 UNKNOWN, // 0E89 6731 LAO, // 0E8A 6732 UNKNOWN, // 0E8B..0E8C 6733 LAO, // 0E8D 6734 UNKNOWN, // 0E8E..0E93 6735 LAO, // 0E94..0E97 6736 UNKNOWN, // 0E98 6737 LAO, // 0E99..0E9F 6738 UNKNOWN, // 0EA0 6739 LAO, // 0EA1..0EA3 6740 UNKNOWN, // 0EA4 6741 LAO, // 0EA5 6742 UNKNOWN, // 0EA6 6743 LAO, // 0EA7 6744 UNKNOWN, // 0EA8..0EA9 6745 LAO, // 0EAA..0EAB 6746 UNKNOWN, // 0EAC 6747 LAO, // 0EAD..0EB9 6748 UNKNOWN, // 0EBA 6749 LAO, // 0EBB..0EBD 6750 UNKNOWN, // 0EBE..0EBF 6751 LAO, // 0EC0..0EC4 6752 UNKNOWN, // 0EC5 6753 LAO, // 0EC6 6754 UNKNOWN, // 0EC7 6755 LAO, // 0EC8..0ECD 6756 UNKNOWN, // 0ECE..0ECF 6757 LAO, // 0ED0..0ED9 6758 UNKNOWN, // 0EDA..0EDB 6759 LAO, // 0EDC..0EDF 6760 UNKNOWN, // 0EE0..0EFF 6761 TIBETAN, // 0F00..0F47 6762 UNKNOWN, // 0F48 6763 TIBETAN, // 0F49..0F6C 6764 UNKNOWN, // 0F6D..0F70 6765 TIBETAN, // 0F71..0F97 6766 UNKNOWN, // 0F98 6767 TIBETAN, // 0F99..0FBC 6768 UNKNOWN, // 0FBD 6769 TIBETAN, // 0FBE..0FCC 6770 UNKNOWN, // 0FCD 6771 TIBETAN, // 0FCE..0FD4 6772 COMMON, // 0FD5..0FD8 6773 TIBETAN, // 0FD9..0FDA 6774 UNKNOWN, // 0FDB..FFF 6775 MYANMAR, // 1000..109F 6776 GEORGIAN, // 10A0..10C5 6777 UNKNOWN, // 10C6 6778 GEORGIAN, // 10C7 6779 UNKNOWN, // 10C8..10CC 6780 GEORGIAN, // 10CD 6781 UNKNOWN, // 10CE..10CF 6782 GEORGIAN, // 10D0..10FA 6783 COMMON, // 10FB 6784 GEORGIAN, // 10FC..10FF 6785 HANGUL, // 1100..11FF 6786 ETHIOPIC, // 1200..1248 6787 UNKNOWN, // 1249 6788 ETHIOPIC, // 124A..124D 6789 UNKNOWN, // 124E..124F 6790 ETHIOPIC, // 1250..1256 6791 UNKNOWN, // 1257 6792 ETHIOPIC, // 1258 6793 UNKNOWN, // 1259 6794 ETHIOPIC, // 125A..125D 6795 UNKNOWN, // 125E..125F 6796 ETHIOPIC, // 1260..1288 6797 UNKNOWN, // 1289 6798 ETHIOPIC, // 128A..128D 6799 UNKNOWN, // 128E..128F 6800 ETHIOPIC, // 1290..12B0 6801 UNKNOWN, // 12B1 6802 ETHIOPIC, // 12B2..12B5 6803 UNKNOWN, // 12B6..12B7 6804 ETHIOPIC, // 12B8..12BE 6805 UNKNOWN, // 12BF 6806 ETHIOPIC, // 12C0 6807 UNKNOWN, // 12C1 6808 ETHIOPIC, // 12C2..12C5 6809 UNKNOWN, // 12C6..12C7 6810 ETHIOPIC, // 12C8..12D6 6811 UNKNOWN, // 12D7 6812 ETHIOPIC, // 12D8..1310 6813 UNKNOWN, // 1311 6814 ETHIOPIC, // 1312..1315 6815 UNKNOWN, // 1316..1317 6816 ETHIOPIC, // 1318..135A 6817 UNKNOWN, // 135B..135C 6818 ETHIOPIC, // 135D..137C 6819 UNKNOWN, // 137D..137F 6820 ETHIOPIC, // 1380..1399 6821 UNKNOWN, // 139A..139F 6822 CHEROKEE, // 13A0..13F5 6823 UNKNOWN, // 13F6..13F7 6824 CHEROKEE, // 13F8..13FD 6825 UNKNOWN, // 13FE..13FF 6826 CANADIAN_ABORIGINAL, // 1400..167F 6827 OGHAM, // 1680..169C 6828 UNKNOWN, // 169D..169F 6829 RUNIC, // 16A0..16EA 6830 COMMON, // 16EB..16ED 6831 RUNIC, // 16EE..16F8 6832 UNKNOWN, // 16F9..16FF 6833 TAGALOG, // 1700..170C 6834 UNKNOWN, // 170D 6835 TAGALOG, // 170E..1714 6836 UNKNOWN, // 1715..171F 6837 HANUNOO, // 1720..1734 6838 COMMON, // 1735..1736 6839 UNKNOWN, // 1737..173F 6840 BUHID, // 1740..1753 6841 UNKNOWN, // 1754..175F 6842 TAGBANWA, // 1760..176C 6843 UNKNOWN, // 176D 6844 TAGBANWA, // 176E..1770 6845 UNKNOWN, // 1771 6846 TAGBANWA, // 1772..1773 6847 UNKNOWN, // 1774..177F 6848 KHMER, // 1780..17DD 6849 UNKNOWN, // 17DE..17DF 6850 KHMER, // 17E0..17E9 6851 UNKNOWN, // 17EA..17EF 6852 KHMER, // 17F0..17F9 6853 UNKNOWN, // 17FA..17FF 6854 MONGOLIAN, // 1800..1801 6855 COMMON, // 1802..1803 6856 MONGOLIAN, // 1804 6857 COMMON, // 1805 6858 MONGOLIAN, // 1806..180E 6859 UNKNOWN, // 180F 6860 MONGOLIAN, // 1810..1819 6861 UNKNOWN, // 181A..181F 6862 MONGOLIAN, // 1820..1878 6863 UNKNOWN, // 1879..187F 6864 MONGOLIAN, // 1880..18AA 6865 UNKNOWN, // 18AB..18AF 6866 CANADIAN_ABORIGINAL, // 18B0..18F5 6867 UNKNOWN, // 18F6..18FF 6868 LIMBU, // 1900..191E 6869 UNKNOWN, // 191F 6870 LIMBU, // 1920..192B 6871 UNKNOWN, // 192C..192F 6872 LIMBU, // 1930..193B 6873 UNKNOWN, // 193C..193F 6874 LIMBU, // 1940 6875 UNKNOWN, // 1941..1943 6876 LIMBU, // 1944..194F 6877 TAI_LE, // 1950..196D 6878 UNKNOWN, // 196E..196F 6879 TAI_LE, // 1970..1974 6880 UNKNOWN, // 1975..197F 6881 NEW_TAI_LUE, // 1980..19AB 6882 UNKNOWN, // 19AC..19AF 6883 NEW_TAI_LUE, // 19B0..19C9 6884 UNKNOWN, // 19CA..19CF 6885 NEW_TAI_LUE, // 19D0..19DA 6886 UNKNOWN, // 19DB..19DD 6887 NEW_TAI_LUE, // 19DE..19DF 6888 KHMER, // 19E0..19FF 6889 BUGINESE, // 1A00..1A1B 6890 UNKNOWN, // 1A1C..1A1D 6891 BUGINESE, // 1A1E..1A1F 6892 TAI_THAM, // 1A20..1A5E 6893 UNKNOWN, // 1A5F 6894 TAI_THAM, // 1A60..1A7C 6895 UNKNOWN, // 1A7D..1A7E 6896 TAI_THAM, // 1A7F..1A89 6897 UNKNOWN, // 1A8A..1A8F 6898 TAI_THAM, // 1A90..1A99 6899 UNKNOWN, // 1A9A..1A9F 6900 TAI_THAM, // 1AA0..1AAD 6901 UNKNOWN, // 1AAE..1AAF 6902 INHERITED, // 1AB0..1ABE 6903 UNKNOWN, // 1ABF..1AFF 6904 BALINESE, // 1B00..1B4B 6905 UNKNOWN, // 1B4C..1B4F 6906 BALINESE, // 1B50..1B7C 6907 UNKNOWN, // 1B7D..1B7F 6908 SUNDANESE, // 1B80..1BBF 6909 BATAK, // 1BC0..1BF3 6910 UNKNOWN, // 1BF4..1BFB 6911 BATAK, // 1BFC..1BFF 6912 LEPCHA, // 1C00..1C37 6913 UNKNOWN, // 1C38..1C3A 6914 LEPCHA, // 1C3B..1C49 6915 UNKNOWN, // 1C4A..1C4C 6916 LEPCHA, // 1C4D..1C4F 6917 OL_CHIKI, // 1C50..1C7F 6918 CYRILLIC, // 1C80..1C88 6919 UNKNOWN, // 1C89 6920 GEORGIAN, // 1C90..1CBA 6921 UNKNOWN, // 1CBB..1CBC 6922 GEORGIAN, // 1CBD..1CBF 6923 SUNDANESE, // 1CC0..1CC7 6924 UNKNOWN, // 1CC8..1CCF 6925 INHERITED, // 1CD0..1CD2 6926 COMMON, // 1CD3 6927 INHERITED, // 1CD4..1CE0 6928 COMMON, // 1CE1 6929 INHERITED, // 1CE2..1CE8 6930 COMMON, // 1CE9..1CEC 6931 INHERITED, // 1CED 6932 COMMON, // 1CEE..1CF3 6933 INHERITED, // 1CF4 6934 COMMON, // 1CF5..1CF7 6935 INHERITED, // 1CF8..1CF9 6936 UNKNOWN, // 1CFA..1CFF 6937 LATIN, // 1D00..1D25 6938 GREEK, // 1D26..1D2A 6939 CYRILLIC, // 1D2B 6940 LATIN, // 1D2C..1D5C 6941 GREEK, // 1D5D..1D61 6942 LATIN, // 1D62..1D65 6943 GREEK, // 1D66..1D6A 6944 LATIN, // 1D6B..1D77 6945 CYRILLIC, // 1D78 6946 LATIN, // 1D79..1DBE 6947 GREEK, // 1DBF 6948 INHERITED, // 1DC0..1DF9 6949 UNKNOWN, // 1DFA 6950 INHERITED, // 1DFB..1DFF 6951 LATIN, // 1E00..1EFF 6952 GREEK, // 1F00..1F15 6953 UNKNOWN, // 1F16..1F17 6954 GREEK, // 1F18..1F1D 6955 UNKNOWN, // 1F1E..1F1F 6956 GREEK, // 1F20..1F45 6957 UNKNOWN, // 1F46..1F47 6958 GREEK, // 1F48..1F4D 6959 UNKNOWN, // 1F4E..1F4F 6960 GREEK, // 1F50..1F57 6961 UNKNOWN, // 1F58 6962 GREEK, // 1F59 6963 UNKNOWN, // 1F5A 6964 GREEK, // 1F5B 6965 UNKNOWN, // 1F5C 6966 GREEK, // 1F5D 6967 UNKNOWN, // 1F5E 6968 GREEK, // 1F5F..1F7D 6969 UNKNOWN, // 1F7E..1F7F 6970 GREEK, // 1F80..1FB4 6971 UNKNOWN, // 1FB5 6972 GREEK, // 1FB6..1FC4 6973 UNKNOWN, // 1FC5 6974 GREEK, // 1FC6..1FD3 6975 UNKNOWN, // 1FD4..1FD5 6976 GREEK, // 1FD6..1FDB 6977 UNKNOWN, // 1FDC 6978 GREEK, // 1FDD..1FEF 6979 UNKNOWN, // 1FF0..1FF1 6980 GREEK, // 1FF2..1FF4 6981 UNKNOWN, // 1FF5 6982 GREEK, // 1FF6..1FFE 6983 UNKNOWN, // 1FFF 6984 COMMON, // 2000..200B 6985 INHERITED, // 200C..200D 6986 COMMON, // 200E..2064 6987 UNKNOWN, // 2065 6988 COMMON, // 2066..2070 6989 LATIN, // 2071 6990 UNKNOWN, // 2072..2073 6991 COMMON, // 2074..207E 6992 LATIN, // 207F 6993 COMMON, // 2080..208E 6994 UNKNOWN, // 208F 6995 LATIN, // 2090..209C 6996 UNKNOWN, // 209D..209F 6997 COMMON, // 20A0..20BF 6998 UNKNOWN, // 20C0..20CF 6999 INHERITED, // 20D0..20F0 7000 UNKNOWN, // 20F1..20FF 7001 COMMON, // 2100..2125 7002 GREEK, // 2126 7003 COMMON, // 2127..2129 7004 LATIN, // 212A..212B 7005 COMMON, // 212C..2131 7006 LATIN, // 2132 7007 COMMON, // 2133..214D 7008 LATIN, // 214E 7009 COMMON, // 214F..215F 7010 LATIN, // 2160..2188 7011 COMMON, // 2189..218B 7012 UNKNOWN, // 218C..218F 7013 COMMON, // 2190..2426 7014 UNKNOWN, // 2427..243F 7015 COMMON, // 2440..244A 7016 UNKNOWN, // 244B..245F 7017 COMMON, // 2460..27FF 7018 BRAILLE, // 2800..28FF 7019 COMMON, // 2900..2B73 7020 UNKNOWN, // 2B74..2B75 7021 COMMON, // 2B76..2B95 7022 UNKNOWN, // 2B96..2B97 7023 COMMON, // 2B98..2BC8 7024 UNKNOWN, // 2BC9 7025 COMMON, // 2BCA..2BFE 7026 UNKNOWN, // 0x2BFF 7027 GLAGOLITIC, // 2C00..2C2E 7028 UNKNOWN, // 2C2F 7029 GLAGOLITIC, // 2C30..2C5E 7030 UNKNOWN, // 2C5F 7031 LATIN, // 2C60..2C7F 7032 COPTIC, // 2C80..2CF3 7033 UNKNOWN, // 2CF4..2CF8 7034 COPTIC, // 2CF9..2CFF 7035 GEORGIAN, // 2D00..2D25 7036 UNKNOWN, // 2D26 7037 GEORGIAN, // 2D27 7038 UNKNOWN, // 2D28..2D2C 7039 GEORGIAN, // 2D2D 7040 UNKNOWN, // 2D2E..2D2F 7041 TIFINAGH, // 2D30..2D67 7042 UNKNOWN, // 2D68..2D6E 7043 TIFINAGH, // 2D6F..2D70 7044 UNKNOWN, // 2D71..2D7E 7045 TIFINAGH, // 2D7F 7046 ETHIOPIC, // 2D80..2D96 7047 UNKNOWN, // 2D97..2D9F 7048 ETHIOPIC, // 2DA0..2DA6 7049 UNKNOWN, // 2DA7 7050 ETHIOPIC, // 2DA8..2DAE 7051 UNKNOWN, // 2DAF 7052 ETHIOPIC, // 2DB0..2DB6 7053 UNKNOWN, // 2DB7 7054 ETHIOPIC, // 2DB8..2DBE 7055 UNKNOWN, // 2DBF 7056 ETHIOPIC, // 2DC0..2DC6 7057 UNKNOWN, // 2DC7 7058 ETHIOPIC, // 2DC8..2DCE 7059 UNKNOWN, // 2DCF 7060 ETHIOPIC, // 2DD0..2DD6 7061 UNKNOWN, // 2DD7 7062 ETHIOPIC, // 2DD8..2DDE 7063 UNKNOWN, // 2DDF 7064 CYRILLIC, // 2DE0..2DFF 7065 COMMON, // 2E00..2E4E 7066 UNKNOWN, // 2E4F..2E7F 7067 HAN, // 2E80..2E99 7068 UNKNOWN, // 2E9A 7069 HAN, // 2E9B..2EF3 7070 UNKNOWN, // 2EF4..2EFF 7071 HAN, // 2F00..2FD5 7072 UNKNOWN, // 2FD6..2FEF 7073 COMMON, // 2FF0..2FFB 7074 UNKNOWN, // 2FFC..2FFF 7075 COMMON, // 3000..3004 7076 HAN, // 3005 7077 COMMON, // 3006 7078 HAN, // 3007 7079 COMMON, // 3008..3020 7080 HAN, // 3021..3029 7081 INHERITED, // 302A..302D 7082 HANGUL, // 302E..302F 7083 COMMON, // 3030..3037 7084 HAN, // 3038..303B 7085 COMMON, // 303C..303F 7086 UNKNOWN, // 3040 7087 HIRAGANA, // 3041..3096 7088 UNKNOWN, // 3097..3098 7089 INHERITED, // 3099..309A 7090 COMMON, // 309B..309C 7091 HIRAGANA, // 309D..309F 7092 COMMON, // 30A0 7093 KATAKANA, // 30A1..30FA 7094 COMMON, // 30FB..30FC 7095 KATAKANA, // 30FD..30FF 7096 UNKNOWN, // 3100..3104 7097 BOPOMOFO, // 3105..312F 7098 UNKNOWN, // 3130 7099 HANGUL, // 3131..318E 7100 UNKNOWN, // 318F 7101 COMMON, // 3190..319F 7102 BOPOMOFO, // 31A0..31BA 7103 UNKNOWN, // 31BB..31BF 7104 COMMON, // 31C0..31E3 7105 UNKNOWN, // 31E4..31EF 7106 KATAKANA, // 31F0..31FF 7107 HANGUL, // 3200..321E 7108 UNKNOWN, // 321F 7109 COMMON, // 3220..325F 7110 HANGUL, // 3260..327E 7111 COMMON, // 327F..32CF 7112 KATAKANA, // 32D0..32FE 7113 COMMON, // 32FF 7114 KATAKANA, // 3300..3357 7115 COMMON, // 3358..33FF 7116 HAN, // 3400..4DB5 7117 UNKNOWN, // 4DB6..4DBF 7118 COMMON, // 4DC0..4DFF 7119 HAN, // 4E00..9FEF 7120 UNKNOWN, // 9FF0..9FFF 7121 YI, // A000..A48C 7122 UNKNOWN, // A48D..A48F 7123 YI, // A490..A4C6 7124 UNKNOWN, // A4C7..A4CF 7125 LISU, // A4D0..A4FF 7126 VAI, // A500..A62B 7127 UNKNOWN, // A62C..A63F 7128 CYRILLIC, // A640..A69F 7129 BAMUM, // A6A0..A6F7 7130 UNKNOWN, // A6F8..A6FF 7131 COMMON, // A700..A721 7132 LATIN, // A722..A787 7133 COMMON, // A788..A78A 7134 LATIN, // A78B..A7B9 7135 UNKNOWN, // A7C0..A7F6 7136 LATIN, // A7F7..A7FF 7137 SYLOTI_NAGRI, // A800..A82B 7138 UNKNOWN, // A82C..A82F 7139 COMMON, // A830..A839 7140 UNKNOWN, // A83A..A83F 7141 PHAGS_PA, // A840..A877 7142 UNKNOWN, // A878..A87F 7143 SAURASHTRA, // A880..A8C5 7144 UNKNOWN, // A8C6..A8CD 7145 SAURASHTRA, // A8CE..A8D9 7146 UNKNOWN, // A8DA..A8DF 7147 DEVANAGARI, // A8E0..A8FF 7148 KAYAH_LI, // A900..A92D 7149 COMMON, // A92E 7150 KAYAH_LI, // A92F 7151 REJANG, // A930..A953 7152 UNKNOWN, // A954..A95E 7153 REJANG, // A95F 7154 HANGUL, // A960..A97C 7155 UNKNOWN, // A97D..A97F 7156 JAVANESE, // A980..A9CD 7157 UNKNOWN, // A9CE 7158 COMMON, // A9CF 7159 JAVANESE, // A9D0..A9D9 7160 UNKNOWN, // A9DA..A9DD 7161 JAVANESE, // A9DE..A9DF 7162 MYANMAR, // A9E0..A9FE 7163 UNKNOWN, // A9FF 7164 CHAM, // AA00..AA36 7165 UNKNOWN, // AA37..AA3F 7166 CHAM, // AA40..AA4D 7167 UNKNOWN, // AA4E..AA4F 7168 CHAM, // AA50..AA59 7169 UNKNOWN, // AA5A..AA5B 7170 CHAM, // AA5C..AA5F 7171 MYANMAR, // AA60..AA7F 7172 TAI_VIET, // AA80..AAC2 7173 UNKNOWN, // AAC3..AADA 7174 TAI_VIET, // AADB..AADF 7175 MEETEI_MAYEK, // AAE0..AAF6 7176 UNKNOWN, // AAF7..AB00 7177 ETHIOPIC, // AB01..AB06 7178 UNKNOWN, // AB07..AB08 7179 ETHIOPIC, // AB09..AB0E 7180 UNKNOWN, // AB0F..AB10 7181 ETHIOPIC, // AB11..AB16 7182 UNKNOWN, // AB17..AB1F 7183 ETHIOPIC, // AB20..AB26 7184 UNKNOWN, // AB27 7185 ETHIOPIC, // AB28..AB2E 7186 UNKNOWN, // AB2F 7187 LATIN, // AB30..AB5A 7188 COMMON, // AB5B 7189 LATIN, // AB5C..AB64 7190 GREEK, // AB65 7191 UNKNOWN, // AB66..AB6F 7192 CHEROKEE, // AB70..ABBF 7193 MEETEI_MAYEK, // ABC0..ABED 7194 UNKNOWN, // ABEE..ABEF 7195 MEETEI_MAYEK, // ABF0..ABF9 7196 UNKNOWN, // ABFA..ABFF 7197 HANGUL, // AC00..D7A3 7198 UNKNOWN, // D7A4..D7AF 7199 HANGUL, // D7B0..D7C6 7200 UNKNOWN, // D7C7..D7CA 7201 HANGUL, // D7CB..D7FB 7202 UNKNOWN, // D7FC..F8FF 7203 HAN, // F900..FA6D 7204 UNKNOWN, // FA6E..FA6F 7205 HAN, // FA70..FAD9 7206 UNKNOWN, // FADA..FAFF 7207 LATIN, // FB00..FB06 7208 UNKNOWN, // FB07..FB12 7209 ARMENIAN, // FB13..FB17 7210 UNKNOWN, // FB18..FB1C 7211 HEBREW, // FB1D..FB36 7212 UNKNOWN, // FB37 7213 HEBREW, // FB38..FB3C 7214 UNKNOWN, // FB3D 7215 HEBREW, // FB3E 7216 UNKNOWN, // FB3F 7217 HEBREW, // FB40..FB41 7218 UNKNOWN, // FB42 7219 HEBREW, // FB43..FB44 7220 UNKNOWN, // FB45 7221 HEBREW, // FB46..FB4F 7222 ARABIC, // FB50..FBC1 7223 UNKNOWN, // FBC2..FBD2 7224 ARABIC, // FBD3..FD3D 7225 COMMON, // FD3E..FD3F 7226 UNKNOWN, // FD40..FD4F 7227 ARABIC, // FD50..FD8F 7228 UNKNOWN, // FD90..FD91 7229 ARABIC, // FD92..FDC7 7230 UNKNOWN, // FDC8..FDEF 7231 ARABIC, // FDF0..FDFD 7232 UNKNOWN, // FDFE..FDFF 7233 INHERITED, // FE00..FE0F 7234 COMMON, // FE10..FE19 7235 UNKNOWN, // FE1A..FE1F 7236 INHERITED, // FE20..FE2D 7237 CYRILLIC, // FE2E..FE2F 7238 COMMON, // FE30..FE52 7239 UNKNOWN, // FE53 7240 COMMON, // FE54..FE66 7241 UNKNOWN, // FE67 7242 COMMON, // FE68..FE6B 7243 UNKNOWN, // FE6C..FE6F 7244 ARABIC, // FE70..FE74 7245 UNKNOWN, // FE75 7246 ARABIC, // FE76..FEFC 7247 UNKNOWN, // FEFD..FEFE 7248 COMMON, // FEFF 7249 UNKNOWN, // FF00 7250 COMMON, // FF01..FF20 7251 LATIN, // FF21..FF3A 7252 COMMON, // FF3B..FF40 7253 LATIN, // FF41..FF5A 7254 COMMON, // FF5B..FF65 7255 KATAKANA, // FF66..FF6F 7256 COMMON, // FF70 7257 KATAKANA, // FF71..FF9D 7258 COMMON, // FF9E..FF9F 7259 HANGUL, // FFA0..FFBE 7260 UNKNOWN, // FFBF..FFC1 7261 HANGUL, // FFC2..FFC7 7262 UNKNOWN, // FFC8..FFC9 7263 HANGUL, // FFCA..FFCF 7264 UNKNOWN, // FFD0..FFD1 7265 HANGUL, // FFD2..FFD7 7266 UNKNOWN, // FFD8..FFD9 7267 HANGUL, // FFDA..FFDC 7268 UNKNOWN, // FFDD..FFDF 7269 COMMON, // FFE0..FFE6 7270 UNKNOWN, // FFE7 7271 COMMON, // FFE8..FFEE 7272 UNKNOWN, // FFEF..FFF8 7273 COMMON, // FFF9..FFFD 7274 UNKNOWN, // FFFE..FFFF 7275 LINEAR_B, // 10000..1000B 7276 UNKNOWN, // 1000C 7277 LINEAR_B, // 1000D..10026 7278 UNKNOWN, // 10027 7279 LINEAR_B, // 10028..1003A 7280 UNKNOWN, // 1003B 7281 LINEAR_B, // 1003C..1003D 7282 UNKNOWN, // 1003E 7283 LINEAR_B, // 1003F..1004D 7284 UNKNOWN, // 1004E..1004F 7285 LINEAR_B, // 10050..1005D 7286 UNKNOWN, // 1005E..1007F 7287 LINEAR_B, // 10080..100FA 7288 UNKNOWN, // 100FB..100FF 7289 COMMON, // 10100..10102 7290 UNKNOWN, // 10103..10106 7291 COMMON, // 10107..10133 7292 UNKNOWN, // 10134..10136 7293 COMMON, // 10137..1013F 7294 GREEK, // 10140..1018E 7295 UNKNOWN, // 1018F 7296 COMMON, // 10190..1019B 7297 UNKNOWN, // 1019C..1019F 7298 GREEK, // 101A0 7299 UNKNOWN, // 101A1..101CF 7300 COMMON, // 101D0..101FC 7301 INHERITED, // 101FD 7302 UNKNOWN, // 101FE..1027F 7303 LYCIAN, // 10280..1029C 7304 UNKNOWN, // 1029D..1029F 7305 CARIAN, // 102A0..102D0 7306 UNKNOWN, // 102D1..102DF 7307 INHERITED, // 102E0 7308 COMMON, // 102E1..102FB 7309 UNKNOWN, // 102FC..102FF 7310 OLD_ITALIC, // 10300..10323 7311 UNKNOWN, // 10324..1032C 7312 OLD_ITALIC, // 1032D..1032F 7313 GOTHIC, // 10330..1034A 7314 UNKNOWN, // 1034B..1034F 7315 OLD_PERMIC, // 10350..1037A 7316 UNKNOWN, // 1037B..1037F 7317 UGARITIC, // 10380..1039D 7318 UNKNOWN, // 1039E 7319 UGARITIC, // 1039F 7320 OLD_PERSIAN, // 103A0..103C3 7321 UNKNOWN, // 103C4..103C7 7322 OLD_PERSIAN, // 103C8..103D5 7323 UNKNOWN, // 103D6..103FF 7324 DESERET, // 10400..1044F 7325 SHAVIAN, // 10450..1047F 7326 OSMANYA, // 10480..1049D 7327 UNKNOWN, // 1049E..1049F 7328 OSMANYA, // 104A0..104A9 7329 UNKNOWN, // 104AA..104AF 7330 OSAGE, // 104B0..104D3; 7331 UNKNOWN, // 104D4..104D7; 7332 OSAGE, // 104D8..104FB; 7333 UNKNOWN, // 104FC..104FF; 7334 ELBASAN, // 10500..10527 7335 UNKNOWN, // 10528..1052F 7336 CAUCASIAN_ALBANIAN, // 10530..10563 7337 UNKNOWN, // 10564..1056E 7338 CAUCASIAN_ALBANIAN, // 1056F 7339 UNKNOWN, // 10570..105FF 7340 LINEAR_A, // 10600..10736 7341 UNKNOWN, // 10737..1073F 7342 LINEAR_A, // 10740..10755 7343 UNKNOWN, // 10756..1075F 7344 LINEAR_A, // 10760..10767 7345 UNKNOWN, // 10768..107FF 7346 CYPRIOT, // 10800..10805 7347 UNKNOWN, // 10806..10807 7348 CYPRIOT, // 10808 7349 UNKNOWN, // 10809 7350 CYPRIOT, // 1080A..10835 7351 UNKNOWN, // 10836 7352 CYPRIOT, // 10837..10838 7353 UNKNOWN, // 10839..1083B 7354 CYPRIOT, // 1083C 7355 UNKNOWN, // 1083D..1083E 7356 CYPRIOT, // 1083F 7357 IMPERIAL_ARAMAIC, // 10840..10855 7358 UNKNOWN, // 10856 7359 IMPERIAL_ARAMAIC, // 10857..1085F 7360 PALMYRENE, // 10860..1087F 7361 NABATAEAN, // 10880..1089E 7362 UNKNOWN, // 1089F..108A6 7363 NABATAEAN, // 108A7..108AF 7364 UNKNOWN, // 108B0..108DF 7365 HATRAN, // 108E0..108F2 7366 UNKNOWN, // 108F3 7367 HATRAN, // 108F4..108F5 7368 UNKNOWN, // 108F6..108FA 7369 HATRAN, // 108FB..108FF 7370 PHOENICIAN, // 10900..1091B 7371 UNKNOWN, // 1091C..1091E 7372 PHOENICIAN, // 1091F 7373 LYDIAN, // 10920..10939 7374 UNKNOWN, // 1093A..1093E 7375 LYDIAN, // 1093F 7376 UNKNOWN, // 10940..1097F 7377 MEROITIC_HIEROGLYPHS, // 10980..1099F 7378 MEROITIC_CURSIVE, // 109A0..109B7 7379 UNKNOWN, // 109B8..109BB 7380 MEROITIC_CURSIVE, // 109BC..109CF 7381 UNKNOWN, // 109D0..109D1 7382 MEROITIC_CURSIVE, // 109D2..109FF 7383 KHAROSHTHI, // 10A00..10A03 7384 UNKNOWN, // 10A04 7385 KHAROSHTHI, // 10A05..10A06 7386 UNKNOWN, // 10A07..10A0B 7387 KHAROSHTHI, // 10A0C..10A13 7388 UNKNOWN, // 10A14 7389 KHAROSHTHI, // 10A15..10A17 7390 UNKNOWN, // 10A18 7391 KHAROSHTHI, // 10A19..10A35 7392 UNKNOWN, // 10A36..10A37 7393 KHAROSHTHI, // 10A38..10A3A 7394 UNKNOWN, // 10A3B..10A3E 7395 KHAROSHTHI, // 10A3F..10A48 7396 UNKNOWN, // 10A49..10A4F 7397 KHAROSHTHI, // 10A50..10A58 7398 UNKNOWN, // 10A59..10A5F 7399 OLD_SOUTH_ARABIAN, // 10A60..10A7F 7400 OLD_NORTH_ARABIAN, // 10A80..10A9F 7401 UNKNOWN, // 10AA0..10ABF 7402 MANICHAEAN, // 10AC0..10AE6 7403 UNKNOWN, // 10AE7..10AEA 7404 MANICHAEAN, // 10AEB..10AF6 7405 UNKNOWN, // 10AF7..10AFF 7406 AVESTAN, // 10B00..10B35 7407 UNKNOWN, // 10B36..10B38 7408 AVESTAN, // 10B39..10B3F 7409 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 7410 UNKNOWN, // 10B56..10B57 7411 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 7412 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 7413 UNKNOWN, // 10B73..10B77 7414 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 7415 PSALTER_PAHLAVI, // 10B80..10B91 7416 UNKNOWN, // 10B92..10B98 7417 PSALTER_PAHLAVI, // 10B99..10B9C 7418 UNKNOWN, // 10B9D..10BA8 7419 PSALTER_PAHLAVI, // 10BA9..10BAF 7420 UNKNOWN, // 10BB0..10BFF 7421 OLD_TURKIC, // 10C00..10C48 7422 UNKNOWN, // 10C49..10C7F 7423 OLD_HUNGARIAN, // 10C80..10CB2 7424 UNKNOWN, // 10CB3..10CBF 7425 OLD_HUNGARIAN, // 10CC0..10CF2 7426 UNKNOWN, // 10CF3..10CF9 7427 OLD_HUNGARIAN, // 10CFA..10CFF 7428 HANIFI_ROHINGYA, // 10D00..10D27 7429 UNKNOWN, // 10D28..10D29 7430 HANIFI_ROHINGYA, // 10D30..10D39 7431 UNKNOWN, // 10D3A..10E5F 7432 ARABIC, // 10E60..10E7E 7433 UNKNOWN, // 10E7F..10EFF 7434 OLD_SOGDIAN, // 10F00..10F27 7435 UNKNOWN, // 10F28..10F2F 7436 SOGDIAN, // 10F30..10F59 7437 UNKNOWN, // 10F5A..10FFF 7438 BRAHMI, // 11000..1104D 7439 UNKNOWN, // 1104E..11051 7440 BRAHMI, // 11052..1106F 7441 UNKNOWN, // 11070..1107E 7442 BRAHMI, // 1107F 7443 KAITHI, // 11080..110C1 7444 UNKNOWN, // 110C2..110CC 7445 KAITHI, // 110CD 7446 UNKNOWN, // 110CE..110CF 7447 SORA_SOMPENG, // 110D0..110E8 7448 UNKNOWN, // 110E9..110EF 7449 SORA_SOMPENG, // 110F0..110F9 7450 UNKNOWN, // 110FA..110FF 7451 CHAKMA, // 11100..11134 7452 UNKNOWN, // 11135 7453 CHAKMA, // 11136..11146 7454 UNKNOWN, // 11147..1114F 7455 MAHAJANI, // 11150..11176 7456 UNKNOWN, // 11177..1117F 7457 SHARADA, // 11180..111CD 7458 UNKNOWN, // 111CE..111CF 7459 SHARADA, // 111D0..111DF 7460 UNKNOWN, // 111E0 7461 SINHALA, // 111E1..111F4 7462 UNKNOWN, // 111F5..111FF 7463 KHOJKI, // 11200..11211 7464 UNKNOWN, // 11212 7465 KHOJKI, // 11213..1123E 7466 UNKNOWN, // 1123F..1127F 7467 MULTANI, // 11280..11286 7468 UNKNOWN, // 11287 7469 MULTANI, // 11288 7470 UNKNOWN, // 11289 7471 MULTANI, // 1128A..1128D 7472 UNKNOWN, // 1128E 7473 MULTANI, // 1128F..1129D 7474 UNKNOWN, // 1129E 7475 MULTANI, // 1129F..112A9 7476 UNKNOWN, // 112AA..112AF 7477 KHUDAWADI, // 112B0..112EA 7478 UNKNOWN, // 112EB..112EF 7479 KHUDAWADI, // 112F0..112F9 7480 UNKNOWN, // 112FA..112FF 7481 GRANTHA, // 11300..11303 7482 UNKNOWN, // 11304 7483 GRANTHA, // 11305..1130C 7484 UNKNOWN, // 1130D..1130E 7485 GRANTHA, // 1130F..11310 7486 UNKNOWN, // 11311..11312 7487 GRANTHA, // 11313..11328 7488 UNKNOWN, // 11329 7489 GRANTHA, // 1132A..11330 7490 UNKNOWN, // 11331 7491 GRANTHA, // 11332..11333 7492 UNKNOWN, // 11334 7493 GRANTHA, // 11335..11339 7494 UNKNOWN, // 1133A 7495 INHERITED, // 1133B 7496 GRANTHA, // 1133C..11344 7497 UNKNOWN, // 11345..11346 7498 GRANTHA, // 11347..11348 7499 UNKNOWN, // 11349..1134A 7500 GRANTHA, // 1134B..1134D 7501 UNKNOWN, // 1134E..1134F 7502 GRANTHA, // 11350 7503 UNKNOWN, // 11351..11356 7504 GRANTHA, // 11357 7505 UNKNOWN, // 11358..1135C 7506 GRANTHA, // 1135D..11363 7507 UNKNOWN, // 11364..11365 7508 GRANTHA, // 11366..1136C 7509 UNKNOWN, // 1136D..1136F 7510 GRANTHA, // 11370..11374 7511 UNKNOWN, // 11375..113FF 7512 NEWA, // 11400..11459 7513 UNKNOWN, // 1145A 7514 NEWA, // 1145B 7515 UNKNOWN, // 1145C 7516 NEWA, // 1145D..1145E 7517 UNKNOWN, // 1145F..1147F 7518 TIRHUTA, // 11480..114C7 7519 UNKNOWN, // 114C8..114CF 7520 TIRHUTA, // 114D0..114D9 7521 UNKNOWN, // 114DA..1157F 7522 SIDDHAM, // 11580..115B5 7523 UNKNOWN, // 115B6..115B7 7524 SIDDHAM, // 115B8..115DD 7525 UNKNOWN, // 115DE..115FF 7526 MODI, // 11600..11644 7527 UNKNOWN, // 11645..1164F 7528 MODI, // 11650..11659 7529 UNKNOWN, // 1165A..1165F 7530 MONGOLIAN, // 11660..1166C 7531 UNKNOWN, // 1166D..1167F 7532 TAKRI, // 11680..116B7 7533 UNKNOWN, // 116B8..116BF 7534 TAKRI, // 116C0..116C9 7535 UNKNOWN, // 116CA..116FF 7536 AHOM, // 11700..1171A 7537 UNKNOWN, // 1171B..1171C 7538 AHOM, // 1171D..1172B 7539 UNKNOWN, // 1172C..1172F 7540 AHOM, // 11730..1173F 7541 UNKNOWN, // 11740..117FF 7542 DOGRA, // 11800..1183B 7543 UNKNOWN, // 1183C..1189F 7544 WARANG_CITI, // 118A0..118F2 7545 UNKNOWN, // 118F3..118FE 7546 WARANG_CITI, // 118FF 7547 UNKNOWN, // 11900..119FF 7548 ZANABAZAR_SQUARE, // 11A00..11A47 7549 UNKNOWN, // 11A48..11A4F 7550 SOYOMBO, // 11A50..11A83 7551 UNKNOWN, // 11A84..11A85 7552 SOYOMBO, // 11A86..11AA2 7553 UNKNOWN, // 11AA3..11ABF 7554 PAU_CIN_HAU, // 11AC0..11AF8 7555 UNKNOWN, // 11AF9..11BFF 7556 BHAIKSUKI, // 11C00..11C08 7557 UNKNOWN, // 11C09 7558 BHAIKSUKI, // 11C0A..11C36 7559 UNKNOWN, // 11C37 7560 BHAIKSUKI, // 11C38..11C45 7561 UNKNOWN, // 11C46..11C49 7562 BHAIKSUKI, // 11C50..11C6C 7563 UNKNOWN, // 11C6D..11C6F 7564 MARCHEN, // 11C70..11C8F 7565 UNKNOWN, // 11C90..11C91 7566 MARCHEN, // 11C92..11CA7 7567 UNKNOWN, // 11CA8 7568 MARCHEN, // 11CA9..11CB6 7569 UNKNOWN, // 11CB7..11CFF 7570 MASARAM_GONDI, // 11D00..11D06 7571 UNKNOWN, // 11D07 7572 MASARAM_GONDI, // 11D08..11D09 7573 UNKNOWN, // 11D0A 7574 MASARAM_GONDI, // 11D0B..11D36 7575 UNKNOWN, // 11D37..11D39 7576 MASARAM_GONDI, // 11D3A 7577 UNKNOWN, // 11D3B 7578 MASARAM_GONDI, // 11D3C..11D3D 7579 UNKNOWN, // 11D3E 7580 MASARAM_GONDI, // 11D3F..11D47 7581 UNKNOWN, // 11D48..11D49 7582 MASARAM_GONDI, // 11D50..11D59 7583 UNKNOWN, // 11D5A..11D5F 7584 GUNJALA_GONDI, // 11D60..11D68 7585 UNKNOWN, // 11D69 7586 GUNJALA_GONDI, // 11D6A..11D8E 7587 UNKNOWN, // 11D8F 7588 GUNJALA_GONDI, // 11D90..11D91 7589 UNKNOWN, // 11D92 7590 GUNJALA_GONDI, // 11D93..11D98 7591 UNKNOWN, // 11D99 7592 GUNJALA_GONDI, // 11DA0..11DA9 7593 UNKNOWN, // 11DAA..11DFF 7594 MAKASAR, // 11EE0..11EF8 7595 UNKNOWN, // 11EF9..11FFF 7596 CUNEIFORM, // 12000..12399 7597 UNKNOWN, // 1239A..123FF 7598 CUNEIFORM, // 12400..1246E 7599 UNKNOWN, // 1246F 7600 CUNEIFORM, // 12470..12474 7601 UNKNOWN, // 12475..1247F 7602 CUNEIFORM, // 12480..12543 7603 UNKNOWN, // 12544..12FFF 7604 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 7605 UNKNOWN, // 1342F..143FF 7606 ANATOLIAN_HIEROGLYPHS, // 14400..14646 7607 UNKNOWN, // 14647..167FF 7608 BAMUM, // 16800..16A38 7609 UNKNOWN, // 16A39..16A3F 7610 MRO, // 16A40..16A5E 7611 UNKNOWN, // 16A5F 7612 MRO, // 16A60..16A69 7613 UNKNOWN, // 16A6A..16A6D 7614 MRO, // 16A6E..16A6F 7615 UNKNOWN, // 16A70..16ACF 7616 BASSA_VAH, // 16AD0..16AED 7617 UNKNOWN, // 16AEE..16AEF 7618 BASSA_VAH, // 16AF0..16AF5 7619 UNKNOWN, // 16AF6..16AFF 7620 PAHAWH_HMONG, // 16B00..16B45 7621 UNKNOWN, // 16B46..16B4F 7622 PAHAWH_HMONG, // 16B50..16B59 7623 UNKNOWN, // 16B5A 7624 PAHAWH_HMONG, // 16B5B..16B61 7625 UNKNOWN, // 16B62 7626 PAHAWH_HMONG, // 16B63..16B77 7627 UNKNOWN, // 16B78..16B7C 7628 PAHAWH_HMONG, // 16B7D..16B8F 7629 UNKNOWN, // 16B90..16E3F 7630 MEDEFAIDRIN, // 16E40..16E9A 7631 UNKNOWN, // 16E9B..16EFF 7632 MIAO, // 16F00..16F44 7633 UNKNOWN, // 16F45..16F4F 7634 MIAO, // 16F50..16F7E 7635 UNKNOWN, // 16F7F..16F8E 7636 MIAO, // 16F8F..16F9F 7637 UNKNOWN, // 16FA0..16FDF 7638 TANGUT, // 16FE0 7639 NUSHU, // 16FE1 7640 UNKNOWN, // 16FE2..16FFF 7641 TANGUT, // 17000..187F1 7642 UNKNOWN, // 187F2..187FF 7643 TANGUT, // 18800..18AF2 7644 UNKNOWN, // 18AF3..1AFFF 7645 KATAKANA, // 1B000 7646 HIRAGANA, // 1B001..1B11E 7647 UNKNOWN, // 1B11F..1B16F 7648 NUSHU, // 1B170..1B2FB 7649 UNKNOWN, // 1B2FC..1BBFF 7650 DUPLOYAN, // 1BC00..1BC6A 7651 UNKNOWN, // 1BC6B..1BC6F 7652 DUPLOYAN, // 1BC70..1BC7C 7653 UNKNOWN, // 1BC7D..1BC7F 7654 DUPLOYAN, // 1BC80..1BC88 7655 UNKNOWN, // 1BC89..1BC8F 7656 DUPLOYAN, // 1BC90..1BC99 7657 UNKNOWN, // 1BC9A..1BC9B 7658 DUPLOYAN, // 1BC9C..1BC9F 7659 COMMON, // 1BCA0..1BCA3 7660 UNKNOWN, // 1BCA4..1CFFF 7661 COMMON, // 1D000..1D0F5 7662 UNKNOWN, // 1D0F6..1D0FF 7663 COMMON, // 1D100..1D126 7664 UNKNOWN, // 1D127..1D128 7665 COMMON, // 1D129..1D166 7666 INHERITED, // 1D167..1D169 7667 COMMON, // 1D16A..1D17A 7668 INHERITED, // 1D17B..1D182 7669 COMMON, // 1D183..1D184 7670 INHERITED, // 1D185..1D18B 7671 COMMON, // 1D18C..1D1A9 7672 INHERITED, // 1D1AA..1D1AD 7673 COMMON, // 1D1AE..1D1E8 7674 UNKNOWN, // 1D1E9..1D1FF 7675 GREEK, // 1D200..1D245 7676 UNKNOWN, // 1D246..1D2DF 7677 COMMON, // 1D2E0..1D2F3 7678 UNKNOWN, // 1D2F4..1D2FF 7679 COMMON, // 1D300..1D356 7680 UNKNOWN, // 1D357..1D35F 7681 COMMON, // 1D360..1D378 7682 UNKNOWN, // 1D379..1D3FF 7683 COMMON, // 1D400..1D454 7684 UNKNOWN, // 1D455 7685 COMMON, // 1D456..1D49C 7686 UNKNOWN, // 1D49D 7687 COMMON, // 1D49E..1D49F 7688 UNKNOWN, // 1D4A0..1D4A1 7689 COMMON, // 1D4A2 7690 UNKNOWN, // 1D4A3..1D4A4 7691 COMMON, // 1D4A5..1D4A6 7692 UNKNOWN, // 1D4A7..1D4A8 7693 COMMON, // 1D4A9..1D4AC 7694 UNKNOWN, // 1D4AD 7695 COMMON, // 1D4AE..1D4B9 7696 UNKNOWN, // 1D4BA 7697 COMMON, // 1D4BB 7698 UNKNOWN, // 1D4BC 7699 COMMON, // 1D4BD..1D4C3 7700 UNKNOWN, // 1D4C4 7701 COMMON, // 1D4C5..1D505 7702 UNKNOWN, // 1D506 7703 COMMON, // 1D507..1D50A 7704 UNKNOWN, // 1D50B..1D50C 7705 COMMON, // 1D50D..1D514 7706 UNKNOWN, // 1D515 7707 COMMON, // 1D516..1D51C 7708 UNKNOWN, // 1D51D 7709 COMMON, // 1D51E..1D539 7710 UNKNOWN, // 1D53A 7711 COMMON, // 1D53B..1D53E 7712 UNKNOWN, // 1D53F 7713 COMMON, // 1D540..1D544 7714 UNKNOWN, // 1D545 7715 COMMON, // 1D546 7716 UNKNOWN, // 1D547..1D549 7717 COMMON, // 1D54A..1D550 7718 UNKNOWN, // 1D551 7719 COMMON, // 1D552..1D6A5 7720 UNKNOWN, // 1D6A6..1D6A7 7721 COMMON, // 1D6A8..1D7CB 7722 UNKNOWN, // 1D7CC..1D7CD 7723 COMMON, // 1D7CE..1D7FF 7724 SIGNWRITING, // 1D800..1DA8B 7725 UNKNOWN, // 1DA8C..1DA9A 7726 SIGNWRITING, // 1DA9B..1DA9F 7727 UNKNOWN, // 1DAA0 7728 SIGNWRITING, // 1DAA1..1DAAF 7729 UNKNOWN, // 1DAB0..1DFFF 7730 GLAGOLITIC, // 1E000..1E006 7731 UNKNOWN, // 1E007 7732 GLAGOLITIC, // 1E008..1E018 7733 UNKNOWN, // 1E019..1E01A 7734 GLAGOLITIC, // 1E01B..1E021 7735 UNKNOWN, // 1E022 7736 GLAGOLITIC, // 1E023..1E024 7737 UNKNOWN, // 1E025 7738 GLAGOLITIC, // 1E026..1E02A 7739 UNKNOWN, // 1E02B..1E7FF 7740 MENDE_KIKAKUI, // 1E800..1E8C4 7741 UNKNOWN, // 1E8C5..1E8C6 7742 MENDE_KIKAKUI, // 1E8C7..1E8D6 7743 UNKNOWN, // 1E8D7..1E8FF 7744 ADLAM, // 1E900..1E94A 7745 UNKNOWN, // 1E94B..1E94F 7746 ADLAM, // 1E950..1E959 7747 UNKNOWN, // 1E95A..1E95D 7748 ADLAM, // 1E95E..1E95F 7749 UNKNOWN, // 1E960..1EC70 7750 COMMON, // 1EC71..1ECB4 7751 UNKNOWN, // 1ECB5..1EDFF 7752 ARABIC, // 1EE00..1EE03 7753 UNKNOWN, // 1EE04 7754 ARABIC, // 1EE05..1EE1F 7755 UNKNOWN, // 1EE20 7756 ARABIC, // 1EE21..1EE22 7757 UNKNOWN, // 1EE23 7758 ARABIC, // 1EE24 7759 UNKNOWN, // 1EE25..1EE26 7760 ARABIC, // 1EE27 7761 UNKNOWN, // 1EE28 7762 ARABIC, // 1EE29..1EE32 7763 UNKNOWN, // 1EE33 7764 ARABIC, // 1EE34..1EE37 7765 UNKNOWN, // 1EE38 7766 ARABIC, // 1EE39 7767 UNKNOWN, // 1EE3A 7768 ARABIC, // 1EE3B 7769 UNKNOWN, // 1EE3C..1EE41 7770 ARABIC, // 1EE42 7771 UNKNOWN, // 1EE43..1EE46 7772 ARABIC, // 1EE47 7773 UNKNOWN, // 1EE48 7774 ARABIC, // 1EE49 7775 UNKNOWN, // 1EE4A 7776 ARABIC, // 1EE4B 7777 UNKNOWN, // 1EE4C 7778 ARABIC, // 1EE4D..1EE4F 7779 UNKNOWN, // 1EE50 7780 ARABIC, // 1EE51..1EE52 7781 UNKNOWN, // 1EE53 7782 ARABIC, // 1EE54 7783 UNKNOWN, // 1EE55..1EE56 7784 ARABIC, // 1EE57 7785 UNKNOWN, // 1EE58 7786 ARABIC, // 1EE59 7787 UNKNOWN, // 1EE5A 7788 ARABIC, // 1EE5B 7789 UNKNOWN, // 1EE5C 7790 ARABIC, // 1EE5D 7791 UNKNOWN, // 1EE5E 7792 ARABIC, // 1EE5F 7793 UNKNOWN, // 1EE60 7794 ARABIC, // 1EE61..1EE62 7795 UNKNOWN, // 1EE63 7796 ARABIC, // 1EE64 7797 UNKNOWN, // 1EE65..1EE66 7798 ARABIC, // 1EE67..1EE6A 7799 UNKNOWN, // 1EE6B 7800 ARABIC, // 1EE6C..1EE72 7801 UNKNOWN, // 1EE73 7802 ARABIC, // 1EE74..1EE77 7803 UNKNOWN, // 1EE78 7804 ARABIC, // 1EE79..1EE7C 7805 UNKNOWN, // 1EE7D 7806 ARABIC, // 1EE7E 7807 UNKNOWN, // 1EE7F 7808 ARABIC, // 1EE80..1EE89 7809 UNKNOWN, // 1EE8A 7810 ARABIC, // 1EE8B..1EE9B 7811 UNKNOWN, // 1EE9C..1EEA0 7812 ARABIC, // 1EEA1..1EEA3 7813 UNKNOWN, // 1EEA4 7814 ARABIC, // 1EEA5..1EEA9 7815 UNKNOWN, // 1EEAA 7816 ARABIC, // 1EEAB..1EEBB 7817 UNKNOWN, // 1EEBC..1EEEF 7818 ARABIC, // 1EEF0..1EEF1 7819 UNKNOWN, // 1EEF2..1EFFF 7820 COMMON, // 1F000..1F02B 7821 UNKNOWN, // 1F02C..1F02F 7822 COMMON, // 1F030..1F093 7823 UNKNOWN, // 1F094..1F09F 7824 COMMON, // 1F0A0..1F0AE 7825 UNKNOWN, // 1F0AF..1F0B0 7826 COMMON, // 1F0B1..1F0BF 7827 UNKNOWN, // 1F0C0 7828 COMMON, // 1F0C1..1F0CF 7829 UNKNOWN, // 1F0D0 7830 COMMON, // 1F0D1..1F0F5 7831 UNKNOWN, // 1F0F6..1F0FF 7832 COMMON, // 1F100..1F10C 7833 UNKNOWN, // 1F10D..1F10F 7834 COMMON, // 1F110..1F16B 7835 UNKNOWN, // 1F16C..1F16F 7836 COMMON, // 1F170..1F1AC 7837 UNKNOWN, // 1F1AD..1F1E5 7838 COMMON, // 1F1E6..1F1FF 7839 HIRAGANA, // 1F200 7840 COMMON, // 1F201..1F202 7841 UNKNOWN, // 1F203..1F20F 7842 COMMON, // 1F210..1F23B 7843 UNKNOWN, // 1F23C..1F23F 7844 COMMON, // 1F240..1F248 7845 UNKNOWN, // 1F249..1F24F 7846 COMMON, // 1F250..1F251 7847 UNKNOWN, // 1F252..1F25F 7848 COMMON, // 1F260..1F265 7849 UNKNOWN, // 1F266..1F2FF 7850 COMMON, // 1F300..1F6D4 7851 UNKNOWN, // 1F6D5..1F6DF 7852 COMMON, // 1F6E0..1F6EC 7853 UNKNOWN, // 1F6ED..1F6EF 7854 COMMON, // 1F6F0..1F6F9 7855 UNKNOWN, // 1F6FA..1F6FF 7856 COMMON, // 1F700..1F773 7857 UNKNOWN, // 1F774..1F77F 7858 COMMON, // 1F780..1F7D8 7859 UNKNOWN, // 1F7D9..1F7FF 7860 COMMON, // 1F800..1F80B 7861 UNKNOWN, // 1F80C..1F80F 7862 COMMON, // 1F810..1F847 7863 UNKNOWN, // 1F848..1F84F 7864 COMMON, // 1F850..1F859 7865 UNKNOWN, // 1F85A..1F85F 7866 COMMON, // 1F860..1F887 7867 UNKNOWN, // 1F888..1F88F 7868 COMMON, // 1F890..1F8AD 7869 UNKNOWN, // 1F8AE..1F8FF 7870 COMMON, // 1F900..1F90B 7871 UNKNOWN, // 1F90C..1F90F 7872 COMMON, // 1F910..1F93E 7873 UNKNOWN, // 1F93F 7874 COMMON, // 1F940..1F970 7875 UNKNOWN, // 1F971..1F972 7876 COMMON, // 1F973..1F976 7877 UNKNOWN, // 1F977..1F979 7878 COMMON, // 1F97A 7879 UNKNOWN, // 1F97B 7880 COMMON, // 1F97C..1F9A2 7881 UNKNOWN, // 1F9A3..1F9AF 7882 COMMON, // 1F9B0..1F9B9 7883 UNKNOWN, // 1F9BA..1F9BF 7884 COMMON, // 1F9C0..1F9C2 7885 UNKNOWN, // 1F9C3..1F9CF 7886 COMMON, // 1F9D0..1F9FF 7887 UNKNOWN, // 1FA00..1FA5F 7888 COMMON, // 1FA60..1FA6D 7889 UNKNOWN, // 1FA6E..1FFFF 7890 HAN, // 20000..2A6D6 7891 UNKNOWN, // 2A6D7..2A6FF 7892 HAN, // 2A700..2B734 7893 UNKNOWN, // 2B735..2B73F 7894 HAN, // 2B740..2B81D 7895 UNKNOWN, // 2B81E..2B81F 7896 HAN, // 2B820..2CEA1 7897 UNKNOWN, // 2CEA2..2CEAF 7898 HAN, // 2CEB0..2EBE0 7899 UNKNOWN, // 2EBE1..2F7FF 7900 HAN, // 2F800..2FA1D 7901 UNKNOWN, // 2FA1E..E0000 7902 COMMON, // E0001 7903 UNKNOWN, // E0002..E001F 7904 COMMON, // E0020..E007F 7905 UNKNOWN, // E0080..E00FF 7906 INHERITED, // E0100..E01EF 7907 UNKNOWN // E01F0..10FFFF 7908 }; 7909 7910 private static HashMap<String, Character.UnicodeScript> aliases; 7911 static { 7912 aliases = new HashMap<>((int)(149 / 0.75f + 1.0f)); 7913 aliases.put("ADLM", ADLAM); 7914 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 7915 aliases.put("AHOM", AHOM); 7916 aliases.put("ARAB", ARABIC); 7917 aliases.put("ARMI", IMPERIAL_ARAMAIC); 7918 aliases.put("ARMN", ARMENIAN); 7919 aliases.put("AVST", AVESTAN); 7920 aliases.put("BALI", BALINESE); 7921 aliases.put("BAMU", BAMUM); 7922 aliases.put("BASS", BASSA_VAH); 7923 aliases.put("BATK", BATAK); 7924 aliases.put("BENG", BENGALI); 7925 aliases.put("BHKS", BHAIKSUKI); 7926 aliases.put("BOPO", BOPOMOFO); 7927 aliases.put("BRAH", BRAHMI); 7928 aliases.put("BRAI", BRAILLE); 7929 aliases.put("BUGI", BUGINESE); 7930 aliases.put("BUHD", BUHID); 7931 aliases.put("CAKM", CHAKMA); 7932 aliases.put("CANS", CANADIAN_ABORIGINAL); 7933 aliases.put("CARI", CARIAN); 7934 aliases.put("CHAM", CHAM); 7935 aliases.put("CHER", CHEROKEE); 7936 aliases.put("COPT", COPTIC); 7937 aliases.put("CPRT", CYPRIOT); 7938 aliases.put("CYRL", CYRILLIC); 7939 aliases.put("DEVA", DEVANAGARI); 7940 aliases.put("DOGR", DOGRA); 7941 aliases.put("DSRT", DESERET); 7942 aliases.put("DUPL", DUPLOYAN); 7943 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 7944 aliases.put("ELBA", ELBASAN); 7945 aliases.put("ETHI", ETHIOPIC); 7946 aliases.put("GEOR", GEORGIAN); 7947 aliases.put("GLAG", GLAGOLITIC); 7948 aliases.put("GONM", MASARAM_GONDI); 7949 aliases.put("GOTH", GOTHIC); 7950 aliases.put("GONG", GUNJALA_GONDI); 7951 aliases.put("GRAN", GRANTHA); 7952 aliases.put("GREK", GREEK); 7953 aliases.put("GUJR", GUJARATI); 7954 aliases.put("GURU", GURMUKHI); 7955 aliases.put("HANG", HANGUL); 7956 aliases.put("HANI", HAN); 7957 aliases.put("HANO", HANUNOO); 7958 aliases.put("HATR", HATRAN); 7959 aliases.put("HEBR", HEBREW); 7960 aliases.put("HIRA", HIRAGANA); 7961 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 7962 aliases.put("HMNG", PAHAWH_HMONG); 7963 // it appears we don't have the KATAKANA_OR_HIRAGANA 7964 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 7965 aliases.put("HUNG", OLD_HUNGARIAN); 7966 aliases.put("ITAL", OLD_ITALIC); 7967 aliases.put("JAVA", JAVANESE); 7968 aliases.put("KALI", KAYAH_LI); 7969 aliases.put("KANA", KATAKANA); 7970 aliases.put("KHAR", KHAROSHTHI); 7971 aliases.put("KHMR", KHMER); 7972 aliases.put("KHOJ", KHOJKI); 7973 aliases.put("KNDA", KANNADA); 7974 aliases.put("KTHI", KAITHI); 7975 aliases.put("LANA", TAI_THAM); 7976 aliases.put("LAOO", LAO); 7977 aliases.put("LATN", LATIN); 7978 aliases.put("LEPC", LEPCHA); 7979 aliases.put("LIMB", LIMBU); 7980 aliases.put("LINA", LINEAR_A); 7981 aliases.put("LINB", LINEAR_B); 7982 aliases.put("LISU", LISU); 7983 aliases.put("LYCI", LYCIAN); 7984 aliases.put("LYDI", LYDIAN); 7985 aliases.put("MAHJ", MAHAJANI); 7986 aliases.put("MAKA", MAKASAR); 7987 aliases.put("MARC", MARCHEN); 7988 aliases.put("MAND", MANDAIC); 7989 aliases.put("MANI", MANICHAEAN); 7990 aliases.put("MEDF", MEDEFAIDRIN); 7991 aliases.put("MEND", MENDE_KIKAKUI); 7992 aliases.put("MERC", MEROITIC_CURSIVE); 7993 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 7994 aliases.put("MLYM", MALAYALAM); 7995 aliases.put("MODI", MODI); 7996 aliases.put("MONG", MONGOLIAN); 7997 aliases.put("MROO", MRO); 7998 aliases.put("MTEI", MEETEI_MAYEK); 7999 aliases.put("MULT", MULTANI); 8000 aliases.put("MYMR", MYANMAR); 8001 aliases.put("NARB", OLD_NORTH_ARABIAN); 8002 aliases.put("NBAT", NABATAEAN); 8003 aliases.put("NEWA", NEWA); 8004 aliases.put("NKOO", NKO); 8005 aliases.put("NSHU", NUSHU); 8006 aliases.put("OGAM", OGHAM); 8007 aliases.put("OLCK", OL_CHIKI); 8008 aliases.put("ORKH", OLD_TURKIC); 8009 aliases.put("ORYA", ORIYA); 8010 aliases.put("OSGE", OSAGE); 8011 aliases.put("OSMA", OSMANYA); 8012 aliases.put("PALM", PALMYRENE); 8013 aliases.put("PAUC", PAU_CIN_HAU); 8014 aliases.put("PERM", OLD_PERMIC); 8015 aliases.put("PHAG", PHAGS_PA); 8016 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8017 aliases.put("PHLP", PSALTER_PAHLAVI); 8018 aliases.put("PHNX", PHOENICIAN); 8019 aliases.put("PLRD", MIAO); 8020 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8021 aliases.put("RJNG", REJANG); 8022 aliases.put("ROHG", HANIFI_ROHINGYA); 8023 aliases.put("RUNR", RUNIC); 8024 aliases.put("SAMR", SAMARITAN); 8025 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8026 aliases.put("SAUR", SAURASHTRA); 8027 aliases.put("SGNW", SIGNWRITING); 8028 aliases.put("SHAW", SHAVIAN); 8029 aliases.put("SHRD", SHARADA); 8030 aliases.put("SIDD", SIDDHAM); 8031 aliases.put("SIND", KHUDAWADI); 8032 aliases.put("SINH", SINHALA); 8033 aliases.put("SOGD", SOGDIAN); 8034 aliases.put("SOGO", OLD_SOGDIAN); 8035 aliases.put("SORA", SORA_SOMPENG); 8036 aliases.put("SOYO", SOYOMBO); 8037 aliases.put("SUND", SUNDANESE); 8038 aliases.put("SYLO", SYLOTI_NAGRI); 8039 aliases.put("SYRC", SYRIAC); 8040 aliases.put("TAGB", TAGBANWA); 8041 aliases.put("TAKR", TAKRI); 8042 aliases.put("TALE", TAI_LE); 8043 aliases.put("TALU", NEW_TAI_LUE); 8044 aliases.put("TAML", TAMIL); 8045 aliases.put("TANG", TANGUT); 8046 aliases.put("TAVT", TAI_VIET); 8047 aliases.put("TELU", TELUGU); 8048 aliases.put("TFNG", TIFINAGH); 8049 aliases.put("TGLG", TAGALOG); 8050 aliases.put("THAA", THAANA); 8051 aliases.put("THAI", THAI); 8052 aliases.put("TIBT", TIBETAN); 8053 aliases.put("TIRH", TIRHUTA); 8054 aliases.put("UGAR", UGARITIC); 8055 aliases.put("VAII", VAI); 8056 aliases.put("WARA", WARANG_CITI); 8057 aliases.put("XPEO", OLD_PERSIAN); 8058 aliases.put("XSUX", CUNEIFORM); 8059 aliases.put("YIII", YI); 8060 aliases.put("ZANB", ZANABAZAR_SQUARE); 8061 aliases.put("ZINH", INHERITED); 8062 aliases.put("ZYYY", COMMON); 8063 aliases.put("ZZZZ", UNKNOWN); 8064 } 8065 8066 /** 8067 * Returns the enum constant representing the Unicode script of which 8068 * the given character (Unicode code point) is assigned to. 8069 * 8070 * @param codePoint the character (Unicode code point) in question. 8071 * @return The {@code UnicodeScript} constant representing the 8072 * Unicode script of which this character is assigned to. 8073 * 8074 * @throws IllegalArgumentException if the specified 8075 * {@code codePoint} is an invalid Unicode code point. 8076 * @see Character#isValidCodePoint(int) 8077 * 8078 */ 8079 public static UnicodeScript of(int codePoint) { 8080 if (!isValidCodePoint(codePoint)) 8081 throw new IllegalArgumentException( 8082 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8083 int type = getType(codePoint); 8084 // leave SURROGATE and PRIVATE_USE for table lookup 8085 if (type == UNASSIGNED) 8086 return UNKNOWN; 8087 int index = Arrays.binarySearch(scriptStarts, codePoint); 8088 if (index < 0) 8089 index = -index - 2; 8090 return scripts[index]; 8091 } 8092 8093 /** 8094 * Returns the UnicodeScript constant with the given Unicode script 8095 * name or the script name alias. Script names and their aliases are 8096 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 8097 * and {@code PropertyValueAliases<version>.txt} define script names 8098 * and the script name aliases for a particular version of the 8099 * standard. The {@link Character} class specifies the version of 8100 * the standard that it supports. 8101 * <p> 8102 * Character case is ignored for all of the valid script names. 8103 * The en_US locale's case mapping rules are used to provide 8104 * case-insensitive string comparisons for script name validation. 8105 * 8106 * @param scriptName A {@code UnicodeScript} name. 8107 * @return The {@code UnicodeScript} constant identified 8108 * by {@code scriptName} 8109 * @throws IllegalArgumentException if {@code scriptName} is an 8110 * invalid name 8111 * @throws NullPointerException if {@code scriptName} is null 8112 */ 8113 public static final UnicodeScript forName(String scriptName) { 8114 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8115 //.replace(' ', '_')); 8116 UnicodeScript sc = aliases.get(scriptName); 8117 if (sc != null) 8118 return sc; 8119 return valueOf(scriptName); 8120 } 8121 } 8122 8123 /** 8124 * The value of the {@code Character}. 8125 * 8126 * @serial 8127 */ 8128 private final char value; 8129 8130 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8131 private static final long serialVersionUID = 3786198910865385080L; 8132 8133 /** 8134 * Constructs a newly allocated {@code Character} object that 8135 * represents the specified {@code char} value. 8136 * 8137 * @param value the value to be represented by the 8138 * {@code Character} object. 8139 * 8140 * @deprecated 8141 * It is rarely appropriate to use this constructor. The static factory 8142 * {@link #valueOf(char)} is generally a better choice, as it is 8143 * likely to yield significantly better space and time performance. 8144 */ 8145 @Deprecated(since="9") 8146 public Character(char value) { 8147 this.value = value; 8148 } 8149 8150 private static class CharacterCache { 8151 private CharacterCache(){} 8152 8153 static final Character[] cache; 8154 static Character[] archivedCache; 8155 8156 static { 8157 int size = 127 + 1; 8158 8159 // Load and use the archived cache if it exists 8160 VM.initializeFromArchive(CharacterCache.class); 8161 if (archivedCache == null || archivedCache.length != size) { 8162 Character[] c = new Character[size]; 8163 for (int i = 0; i < size; i++) { 8164 c[i] = new Character((char) i); 8165 } 8166 archivedCache = c; 8167 } 8168 cache = archivedCache; 8169 } 8170 } 8171 8172 /** 8173 * Returns a {@code Character} instance representing the specified 8174 * {@code char} value. 8175 * If a new {@code Character} instance is not required, this method 8176 * should generally be used in preference to the constructor 8177 * {@link #Character(char)}, as this method is likely to yield 8178 * significantly better space and time performance by caching 8179 * frequently requested values. 8180 * 8181 * This method will always cache values in the range {@code 8182 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 8183 * cache other values outside of this range. 8184 * 8185 * @param c a char value. 8186 * @return a {@code Character} instance representing {@code c}. 8187 * @since 1.5 8188 */ 8189 @HotSpotIntrinsicCandidate 8190 public static Character valueOf(char c) { 8191 if (c <= 127) { // must cache 8192 return CharacterCache.cache[(int)c]; 8193 } 8194 return new Character(c); 8195 } 8196 8197 /** 8198 * Returns the value of this {@code Character} object. 8199 * @return the primitive {@code char} value represented by 8200 * this object. 8201 */ 8202 @HotSpotIntrinsicCandidate 8203 public char charValue() { 8204 return value; 8205 } 8206 8207 /** 8208 * Returns a hash code for this {@code Character}; equal to the result 8209 * of invoking {@code charValue()}. 8210 * 8211 * @return a hash code value for this {@code Character} 8212 */ 8213 @Override 8214 public int hashCode() { 8215 return Character.hashCode(value); 8216 } 8217 8218 /** 8219 * Returns a hash code for a {@code char} value; compatible with 8220 * {@code Character.hashCode()}. 8221 * 8222 * @since 1.8 8223 * 8224 * @param value The {@code char} for which to return a hash code. 8225 * @return a hash code value for a {@code char} value. 8226 */ 8227 public static int hashCode(char value) { 8228 return (int)value; 8229 } 8230 8231 /** 8232 * Compares this object against the specified object. 8233 * The result is {@code true} if and only if the argument is not 8234 * {@code null} and is a {@code Character} object that 8235 * represents the same {@code char} value as this object. 8236 * 8237 * @param obj the object to compare with. 8238 * @return {@code true} if the objects are the same; 8239 * {@code false} otherwise. 8240 */ 8241 public boolean equals(Object obj) { 8242 if (obj instanceof Character) { 8243 return value == ((Character)obj).charValue(); 8244 } 8245 return false; 8246 } 8247 8248 /** 8249 * Returns a {@code String} object representing this 8250 * {@code Character}'s value. The result is a string of 8251 * length 1 whose sole component is the primitive 8252 * {@code char} value represented by this 8253 * {@code Character} object. 8254 * 8255 * @return a string representation of this object. 8256 */ 8257 public String toString() { 8258 char buf[] = {value}; 8259 return String.valueOf(buf); 8260 } 8261 8262 /** 8263 * Returns a {@code String} object representing the 8264 * specified {@code char}. The result is a string of length 8265 * 1 consisting solely of the specified {@code char}. 8266 * 8267 * @apiNote This method cannot handle <a 8268 * href="#supplementary"> supplementary characters</a>. To support 8269 * all Unicode characters, including supplementary characters, use 8270 * the {@link #toString(int)} method. 8271 * 8272 * @param c the {@code char} to be converted 8273 * @return the string representation of the specified {@code char} 8274 * @since 1.4 8275 */ 8276 public static String toString(char c) { 8277 return String.valueOf(c); 8278 } 8279 8280 /** 8281 * Returns a {@code String} object representing the 8282 * specified character (Unicode code point). The result is a string of 8283 * length 1 or 2, consisting solely of the specified {@code codePoint}. 8284 * 8285 * @param codePoint the {@code codePoint} to be converted 8286 * @return the string representation of the specified {@code codePoint} 8287 * @throws IllegalArgumentException if the specified 8288 * {@code codePoint} is not a {@linkplain #isValidCodePoint 8289 * valid Unicode code point}. 8290 * @since 11 8291 */ 8292 public static String toString(int codePoint) { 8293 return String.valueOfCodePoint(codePoint); 8294 } 8295 8296 /** 8297 * Determines whether the specified code point is a valid 8298 * <a href="http://www.unicode.org/glossary/#code_point"> 8299 * Unicode code point value</a>. 8300 * 8301 * @param codePoint the Unicode code point to be tested 8302 * @return {@code true} if the specified code point value is between 8303 * {@link #MIN_CODE_POINT} and 8304 * {@link #MAX_CODE_POINT} inclusive; 8305 * {@code false} otherwise. 8306 * @since 1.5 8307 */ 8308 public static boolean isValidCodePoint(int codePoint) { 8309 // Optimized form of: 8310 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 8311 int plane = codePoint >>> 16; 8312 return plane < ((MAX_CODE_POINT + 1) >>> 16); 8313 } 8314 8315 /** 8316 * Determines whether the specified character (Unicode code point) 8317 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 8318 * Such code points can be represented using a single {@code char}. 8319 * 8320 * @param codePoint the character (Unicode code point) to be tested 8321 * @return {@code true} if the specified code point is between 8322 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 8323 * {@code false} otherwise. 8324 * @since 1.7 8325 */ 8326 public static boolean isBmpCodePoint(int codePoint) { 8327 return codePoint >>> 16 == 0; 8328 // Optimized form of: 8329 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 8330 // We consistently use logical shift (>>>) to facilitate 8331 // additional runtime optimizations. 8332 } 8333 8334 /** 8335 * Determines whether the specified character (Unicode code point) 8336 * is in the <a href="#supplementary">supplementary character</a> range. 8337 * 8338 * @param codePoint the character (Unicode code point) to be tested 8339 * @return {@code true} if the specified code point is between 8340 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 8341 * {@link #MAX_CODE_POINT} inclusive; 8342 * {@code false} otherwise. 8343 * @since 1.5 8344 */ 8345 public static boolean isSupplementaryCodePoint(int codePoint) { 8346 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 8347 && codePoint < MAX_CODE_POINT + 1; 8348 } 8349 8350 /** 8351 * Determines if the given {@code char} value is a 8352 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8353 * Unicode high-surrogate code unit</a> 8354 * (also known as <i>leading-surrogate code unit</i>). 8355 * 8356 * <p>Such values do not represent characters by themselves, 8357 * but are used in the representation of 8358 * <a href="#supplementary">supplementary characters</a> 8359 * in the UTF-16 encoding. 8360 * 8361 * @param ch the {@code char} value to be tested. 8362 * @return {@code true} if the {@code char} value is between 8363 * {@link #MIN_HIGH_SURROGATE} and 8364 * {@link #MAX_HIGH_SURROGATE} inclusive; 8365 * {@code false} otherwise. 8366 * @see Character#isLowSurrogate(char) 8367 * @see Character.UnicodeBlock#of(int) 8368 * @since 1.5 8369 */ 8370 public static boolean isHighSurrogate(char ch) { 8371 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 8372 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 8373 } 8374 8375 /** 8376 * Determines if the given {@code char} value is a 8377 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8378 * Unicode low-surrogate code unit</a> 8379 * (also known as <i>trailing-surrogate code unit</i>). 8380 * 8381 * <p>Such values do not represent characters by themselves, 8382 * but are used in the representation of 8383 * <a href="#supplementary">supplementary characters</a> 8384 * in the UTF-16 encoding. 8385 * 8386 * @param ch the {@code char} value to be tested. 8387 * @return {@code true} if the {@code char} value is between 8388 * {@link #MIN_LOW_SURROGATE} and 8389 * {@link #MAX_LOW_SURROGATE} inclusive; 8390 * {@code false} otherwise. 8391 * @see Character#isHighSurrogate(char) 8392 * @since 1.5 8393 */ 8394 public static boolean isLowSurrogate(char ch) { 8395 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 8396 } 8397 8398 /** 8399 * Determines if the given {@code char} value is a Unicode 8400 * <i>surrogate code unit</i>. 8401 * 8402 * <p>Such values do not represent characters by themselves, 8403 * but are used in the representation of 8404 * <a href="#supplementary">supplementary characters</a> 8405 * in the UTF-16 encoding. 8406 * 8407 * <p>A char value is a surrogate code unit if and only if it is either 8408 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 8409 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 8410 * 8411 * @param ch the {@code char} value to be tested. 8412 * @return {@code true} if the {@code char} value is between 8413 * {@link #MIN_SURROGATE} and 8414 * {@link #MAX_SURROGATE} inclusive; 8415 * {@code false} otherwise. 8416 * @since 1.7 8417 */ 8418 public static boolean isSurrogate(char ch) { 8419 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 8420 } 8421 8422 /** 8423 * Determines whether the specified pair of {@code char} 8424 * values is a valid 8425 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8426 * Unicode surrogate pair</a>. 8427 8428 * <p>This method is equivalent to the expression: 8429 * <blockquote><pre>{@code 8430 * isHighSurrogate(high) && isLowSurrogate(low) 8431 * }</pre></blockquote> 8432 * 8433 * @param high the high-surrogate code value to be tested 8434 * @param low the low-surrogate code value to be tested 8435 * @return {@code true} if the specified high and 8436 * low-surrogate code values represent a valid surrogate pair; 8437 * {@code false} otherwise. 8438 * @since 1.5 8439 */ 8440 public static boolean isSurrogatePair(char high, char low) { 8441 return isHighSurrogate(high) && isLowSurrogate(low); 8442 } 8443 8444 /** 8445 * Determines the number of {@code char} values needed to 8446 * represent the specified character (Unicode code point). If the 8447 * specified character is equal to or greater than 0x10000, then 8448 * the method returns 2. Otherwise, the method returns 1. 8449 * 8450 * <p>This method doesn't validate the specified character to be a 8451 * valid Unicode code point. The caller must validate the 8452 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 8453 * if necessary. 8454 * 8455 * @param codePoint the character (Unicode code point) to be tested. 8456 * @return 2 if the character is a valid supplementary character; 1 otherwise. 8457 * @see Character#isSupplementaryCodePoint(int) 8458 * @since 1.5 8459 */ 8460 public static int charCount(int codePoint) { 8461 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 8462 } 8463 8464 /** 8465 * Converts the specified surrogate pair to its supplementary code 8466 * point value. This method does not validate the specified 8467 * surrogate pair. The caller must validate it using {@link 8468 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 8469 * 8470 * @param high the high-surrogate code unit 8471 * @param low the low-surrogate code unit 8472 * @return the supplementary code point composed from the 8473 * specified surrogate pair. 8474 * @since 1.5 8475 */ 8476 public static int toCodePoint(char high, char low) { 8477 // Optimized form of: 8478 // return ((high - MIN_HIGH_SURROGATE) << 10) 8479 // + (low - MIN_LOW_SURROGATE) 8480 // + MIN_SUPPLEMENTARY_CODE_POINT; 8481 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 8482 - (MIN_HIGH_SURROGATE << 10) 8483 - MIN_LOW_SURROGATE); 8484 } 8485 8486 /** 8487 * Returns the code point at the given index of the 8488 * {@code CharSequence}. If the {@code char} value at 8489 * the given index in the {@code CharSequence} is in the 8490 * high-surrogate range, the following index is less than the 8491 * length of the {@code CharSequence}, and the 8492 * {@code char} value at the following index is in the 8493 * low-surrogate range, then the supplementary code point 8494 * corresponding to this surrogate pair is returned. Otherwise, 8495 * the {@code char} value at the given index is returned. 8496 * 8497 * @param seq a sequence of {@code char} values (Unicode code 8498 * units) 8499 * @param index the index to the {@code char} values (Unicode 8500 * code units) in {@code seq} to be converted 8501 * @return the Unicode code point at the given index 8502 * @throws NullPointerException if {@code seq} is null. 8503 * @throws IndexOutOfBoundsException if the value 8504 * {@code index} is negative or not less than 8505 * {@link CharSequence#length() seq.length()}. 8506 * @since 1.5 8507 */ 8508 public static int codePointAt(CharSequence seq, int index) { 8509 char c1 = seq.charAt(index); 8510 if (isHighSurrogate(c1) && ++index < seq.length()) { 8511 char c2 = seq.charAt(index); 8512 if (isLowSurrogate(c2)) { 8513 return toCodePoint(c1, c2); 8514 } 8515 } 8516 return c1; 8517 } 8518 8519 /** 8520 * Returns the code point at the given index of the 8521 * {@code char} array. If the {@code char} value at 8522 * the given index in the {@code char} array is in the 8523 * high-surrogate range, the following index is less than the 8524 * length of the {@code char} array, and the 8525 * {@code char} value at the following index is in the 8526 * low-surrogate range, then the supplementary code point 8527 * corresponding to this surrogate pair is returned. Otherwise, 8528 * the {@code char} value at the given index is returned. 8529 * 8530 * @param a the {@code char} array 8531 * @param index the index to the {@code char} values (Unicode 8532 * code units) in the {@code char} array to be converted 8533 * @return the Unicode code point at the given index 8534 * @throws NullPointerException if {@code a} is null. 8535 * @throws IndexOutOfBoundsException if the value 8536 * {@code index} is negative or not less than 8537 * the length of the {@code char} array. 8538 * @since 1.5 8539 */ 8540 public static int codePointAt(char[] a, int index) { 8541 return codePointAtImpl(a, index, a.length); 8542 } 8543 8544 /** 8545 * Returns the code point at the given index of the 8546 * {@code char} array, where only array elements with 8547 * {@code index} less than {@code limit} can be used. If 8548 * the {@code char} value at the given index in the 8549 * {@code char} array is in the high-surrogate range, the 8550 * following index is less than the {@code limit}, and the 8551 * {@code char} value at the following index is in the 8552 * low-surrogate range, then the supplementary code point 8553 * corresponding to this surrogate pair is returned. Otherwise, 8554 * the {@code char} value at the given index is returned. 8555 * 8556 * @param a the {@code char} array 8557 * @param index the index to the {@code char} values (Unicode 8558 * code units) in the {@code char} array to be converted 8559 * @param limit the index after the last array element that 8560 * can be used in the {@code char} array 8561 * @return the Unicode code point at the given index 8562 * @throws NullPointerException if {@code a} is null. 8563 * @throws IndexOutOfBoundsException if the {@code index} 8564 * argument is negative or not less than the {@code limit} 8565 * argument, or if the {@code limit} argument is negative or 8566 * greater than the length of the {@code char} array. 8567 * @since 1.5 8568 */ 8569 public static int codePointAt(char[] a, int index, int limit) { 8570 if (index >= limit || limit < 0 || limit > a.length) { 8571 throw new IndexOutOfBoundsException(); 8572 } 8573 return codePointAtImpl(a, index, limit); 8574 } 8575 8576 // throws ArrayIndexOutOfBoundsException if index out of bounds 8577 static int codePointAtImpl(char[] a, int index, int limit) { 8578 char c1 = a[index]; 8579 if (isHighSurrogate(c1) && ++index < limit) { 8580 char c2 = a[index]; 8581 if (isLowSurrogate(c2)) { 8582 return toCodePoint(c1, c2); 8583 } 8584 } 8585 return c1; 8586 } 8587 8588 /** 8589 * Returns the code point preceding the given index of the 8590 * {@code CharSequence}. If the {@code char} value at 8591 * {@code (index - 1)} in the {@code CharSequence} is in 8592 * the low-surrogate range, {@code (index - 2)} is not 8593 * negative, and the {@code char} value at {@code (index - 2)} 8594 * in the {@code CharSequence} is in the 8595 * high-surrogate range, then the supplementary code point 8596 * corresponding to this surrogate pair is returned. Otherwise, 8597 * the {@code char} value at {@code (index - 1)} is 8598 * returned. 8599 * 8600 * @param seq the {@code CharSequence} instance 8601 * @param index the index following the code point that should be returned 8602 * @return the Unicode code point value before the given index. 8603 * @throws NullPointerException if {@code seq} is null. 8604 * @throws IndexOutOfBoundsException if the {@code index} 8605 * argument is less than 1 or greater than {@link 8606 * CharSequence#length() seq.length()}. 8607 * @since 1.5 8608 */ 8609 public static int codePointBefore(CharSequence seq, int index) { 8610 char c2 = seq.charAt(--index); 8611 if (isLowSurrogate(c2) && index > 0) { 8612 char c1 = seq.charAt(--index); 8613 if (isHighSurrogate(c1)) { 8614 return toCodePoint(c1, c2); 8615 } 8616 } 8617 return c2; 8618 } 8619 8620 /** 8621 * Returns the code point preceding the given index of the 8622 * {@code char} array. If the {@code char} value at 8623 * {@code (index - 1)} in the {@code char} array is in 8624 * the low-surrogate range, {@code (index - 2)} is not 8625 * negative, and the {@code char} value at {@code (index - 2)} 8626 * in the {@code char} array is in the 8627 * high-surrogate range, then the supplementary code point 8628 * corresponding to this surrogate pair is returned. Otherwise, 8629 * the {@code char} value at {@code (index - 1)} is 8630 * returned. 8631 * 8632 * @param a the {@code char} array 8633 * @param index the index following the code point that should be returned 8634 * @return the Unicode code point value before the given index. 8635 * @throws NullPointerException if {@code a} is null. 8636 * @throws IndexOutOfBoundsException if the {@code index} 8637 * argument is less than 1 or greater than the length of the 8638 * {@code char} array 8639 * @since 1.5 8640 */ 8641 public static int codePointBefore(char[] a, int index) { 8642 return codePointBeforeImpl(a, index, 0); 8643 } 8644 8645 /** 8646 * Returns the code point preceding the given index of the 8647 * {@code char} array, where only array elements with 8648 * {@code index} greater than or equal to {@code start} 8649 * can be used. If the {@code char} value at {@code (index - 1)} 8650 * in the {@code char} array is in the 8651 * low-surrogate range, {@code (index - 2)} is not less than 8652 * {@code start}, and the {@code char} value at 8653 * {@code (index - 2)} in the {@code char} array is in 8654 * the high-surrogate range, then the supplementary code point 8655 * corresponding to this surrogate pair is returned. Otherwise, 8656 * the {@code char} value at {@code (index - 1)} is 8657 * returned. 8658 * 8659 * @param a the {@code char} array 8660 * @param index the index following the code point that should be returned 8661 * @param start the index of the first array element in the 8662 * {@code char} array 8663 * @return the Unicode code point value before the given index. 8664 * @throws NullPointerException if {@code a} is null. 8665 * @throws IndexOutOfBoundsException if the {@code index} 8666 * argument is not greater than the {@code start} argument or 8667 * is greater than the length of the {@code char} array, or 8668 * if the {@code start} argument is negative or not less than 8669 * the length of the {@code char} array. 8670 * @since 1.5 8671 */ 8672 public static int codePointBefore(char[] a, int index, int start) { 8673 if (index <= start || start < 0 || start >= a.length) { 8674 throw new IndexOutOfBoundsException(); 8675 } 8676 return codePointBeforeImpl(a, index, start); 8677 } 8678 8679 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 8680 static int codePointBeforeImpl(char[] a, int index, int start) { 8681 char c2 = a[--index]; 8682 if (isLowSurrogate(c2) && index > start) { 8683 char c1 = a[--index]; 8684 if (isHighSurrogate(c1)) { 8685 return toCodePoint(c1, c2); 8686 } 8687 } 8688 return c2; 8689 } 8690 8691 /** 8692 * Returns the leading surrogate (a 8693 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8694 * high surrogate code unit</a>) of the 8695 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8696 * surrogate pair</a> 8697 * representing the specified supplementary character (Unicode 8698 * code point) in the UTF-16 encoding. If the specified character 8699 * is not a 8700 * <a href="Character.html#supplementary">supplementary character</a>, 8701 * an unspecified {@code char} is returned. 8702 * 8703 * <p>If 8704 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 8705 * is {@code true}, then 8706 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 8707 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 8708 * are also always {@code true}. 8709 * 8710 * @param codePoint a supplementary character (Unicode code point) 8711 * @return the leading surrogate code unit used to represent the 8712 * character in the UTF-16 encoding 8713 * @since 1.7 8714 */ 8715 public static char highSurrogate(int codePoint) { 8716 return (char) ((codePoint >>> 10) 8717 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 8718 } 8719 8720 /** 8721 * Returns the trailing surrogate (a 8722 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8723 * low surrogate code unit</a>) of the 8724 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8725 * surrogate pair</a> 8726 * representing the specified supplementary character (Unicode 8727 * code point) in the UTF-16 encoding. If the specified character 8728 * is not a 8729 * <a href="Character.html#supplementary">supplementary character</a>, 8730 * an unspecified {@code char} is returned. 8731 * 8732 * <p>If 8733 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 8734 * is {@code true}, then 8735 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 8736 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 8737 * are also always {@code true}. 8738 * 8739 * @param codePoint a supplementary character (Unicode code point) 8740 * @return the trailing surrogate code unit used to represent the 8741 * character in the UTF-16 encoding 8742 * @since 1.7 8743 */ 8744 public static char lowSurrogate(int codePoint) { 8745 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 8746 } 8747 8748 /** 8749 * Converts the specified character (Unicode code point) to its 8750 * UTF-16 representation. If the specified code point is a BMP 8751 * (Basic Multilingual Plane or Plane 0) value, the same value is 8752 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 8753 * specified code point is a supplementary character, its 8754 * surrogate values are stored in {@code dst[dstIndex]} 8755 * (high-surrogate) and {@code dst[dstIndex+1]} 8756 * (low-surrogate), and 2 is returned. 8757 * 8758 * @param codePoint the character (Unicode code point) to be converted. 8759 * @param dst an array of {@code char} in which the 8760 * {@code codePoint}'s UTF-16 value is stored. 8761 * @param dstIndex the start index into the {@code dst} 8762 * array where the converted value is stored. 8763 * @return 1 if the code point is a BMP code point, 2 if the 8764 * code point is a supplementary code point. 8765 * @throws IllegalArgumentException if the specified 8766 * {@code codePoint} is not a valid Unicode code point. 8767 * @throws NullPointerException if the specified {@code dst} is null. 8768 * @throws IndexOutOfBoundsException if {@code dstIndex} 8769 * is negative or not less than {@code dst.length}, or if 8770 * {@code dst} at {@code dstIndex} doesn't have enough 8771 * array element(s) to store the resulting {@code char} 8772 * value(s). (If {@code dstIndex} is equal to 8773 * {@code dst.length-1} and the specified 8774 * {@code codePoint} is a supplementary character, the 8775 * high-surrogate value is not stored in 8776 * {@code dst[dstIndex]}.) 8777 * @since 1.5 8778 */ 8779 public static int toChars(int codePoint, char[] dst, int dstIndex) { 8780 if (isBmpCodePoint(codePoint)) { 8781 dst[dstIndex] = (char) codePoint; 8782 return 1; 8783 } else if (isValidCodePoint(codePoint)) { 8784 toSurrogates(codePoint, dst, dstIndex); 8785 return 2; 8786 } else { 8787 throw new IllegalArgumentException( 8788 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8789 } 8790 } 8791 8792 /** 8793 * Converts the specified character (Unicode code point) to its 8794 * UTF-16 representation stored in a {@code char} array. If 8795 * the specified code point is a BMP (Basic Multilingual Plane or 8796 * Plane 0) value, the resulting {@code char} array has 8797 * the same value as {@code codePoint}. If the specified code 8798 * point is a supplementary code point, the resulting 8799 * {@code char} array has the corresponding surrogate pair. 8800 * 8801 * @param codePoint a Unicode code point 8802 * @return a {@code char} array having 8803 * {@code codePoint}'s UTF-16 representation. 8804 * @throws IllegalArgumentException if the specified 8805 * {@code codePoint} is not a valid Unicode code point. 8806 * @since 1.5 8807 */ 8808 public static char[] toChars(int codePoint) { 8809 if (isBmpCodePoint(codePoint)) { 8810 return new char[] { (char) codePoint }; 8811 } else if (isValidCodePoint(codePoint)) { 8812 char[] result = new char[2]; 8813 toSurrogates(codePoint, result, 0); 8814 return result; 8815 } else { 8816 throw new IllegalArgumentException( 8817 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8818 } 8819 } 8820 8821 static void toSurrogates(int codePoint, char[] dst, int index) { 8822 // We write elements "backwards" to guarantee all-or-nothing 8823 dst[index+1] = lowSurrogate(codePoint); 8824 dst[index] = highSurrogate(codePoint); 8825 } 8826 8827 /** 8828 * Returns the number of Unicode code points in the text range of 8829 * the specified char sequence. The text range begins at the 8830 * specified {@code beginIndex} and extends to the 8831 * {@code char} at index {@code endIndex - 1}. Thus the 8832 * length (in {@code char}s) of the text range is 8833 * {@code endIndex-beginIndex}. Unpaired surrogates within 8834 * the text range count as one code point each. 8835 * 8836 * @param seq the char sequence 8837 * @param beginIndex the index to the first {@code char} of 8838 * the text range. 8839 * @param endIndex the index after the last {@code char} of 8840 * the text range. 8841 * @return the number of Unicode code points in the specified text 8842 * range 8843 * @throws NullPointerException if {@code seq} is null. 8844 * @throws IndexOutOfBoundsException if the 8845 * {@code beginIndex} is negative, or {@code endIndex} 8846 * is larger than the length of the given sequence, or 8847 * {@code beginIndex} is larger than {@code endIndex}. 8848 * @since 1.5 8849 */ 8850 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 8851 int length = seq.length(); 8852 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 8853 throw new IndexOutOfBoundsException(); 8854 } 8855 int n = endIndex - beginIndex; 8856 for (int i = beginIndex; i < endIndex; ) { 8857 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 8858 isLowSurrogate(seq.charAt(i))) { 8859 n--; 8860 i++; 8861 } 8862 } 8863 return n; 8864 } 8865 8866 /** 8867 * Returns the number of Unicode code points in a subarray of the 8868 * {@code char} array argument. The {@code offset} 8869 * argument is the index of the first {@code char} of the 8870 * subarray and the {@code count} argument specifies the 8871 * length of the subarray in {@code char}s. Unpaired 8872 * surrogates within the subarray count as one code point each. 8873 * 8874 * @param a the {@code char} array 8875 * @param offset the index of the first {@code char} in the 8876 * given {@code char} array 8877 * @param count the length of the subarray in {@code char}s 8878 * @return the number of Unicode code points in the specified subarray 8879 * @throws NullPointerException if {@code a} is null. 8880 * @throws IndexOutOfBoundsException if {@code offset} or 8881 * {@code count} is negative, or if {@code offset + 8882 * count} is larger than the length of the given array. 8883 * @since 1.5 8884 */ 8885 public static int codePointCount(char[] a, int offset, int count) { 8886 if (count > a.length - offset || offset < 0 || count < 0) { 8887 throw new IndexOutOfBoundsException(); 8888 } 8889 return codePointCountImpl(a, offset, count); 8890 } 8891 8892 static int codePointCountImpl(char[] a, int offset, int count) { 8893 int endIndex = offset + count; 8894 int n = count; 8895 for (int i = offset; i < endIndex; ) { 8896 if (isHighSurrogate(a[i++]) && i < endIndex && 8897 isLowSurrogate(a[i])) { 8898 n--; 8899 i++; 8900 } 8901 } 8902 return n; 8903 } 8904 8905 /** 8906 * Returns the index within the given char sequence that is offset 8907 * from the given {@code index} by {@code codePointOffset} 8908 * code points. Unpaired surrogates within the text range given by 8909 * {@code index} and {@code codePointOffset} count as 8910 * one code point each. 8911 * 8912 * @param seq the char sequence 8913 * @param index the index to be offset 8914 * @param codePointOffset the offset in code points 8915 * @return the index within the char sequence 8916 * @throws NullPointerException if {@code seq} is null. 8917 * @throws IndexOutOfBoundsException if {@code index} 8918 * is negative or larger then the length of the char sequence, 8919 * or if {@code codePointOffset} is positive and the 8920 * subsequence starting with {@code index} has fewer than 8921 * {@code codePointOffset} code points, or if 8922 * {@code codePointOffset} is negative and the subsequence 8923 * before {@code index} has fewer than the absolute value 8924 * of {@code codePointOffset} code points. 8925 * @since 1.5 8926 */ 8927 public static int offsetByCodePoints(CharSequence seq, int index, 8928 int codePointOffset) { 8929 int length = seq.length(); 8930 if (index < 0 || index > length) { 8931 throw new IndexOutOfBoundsException(); 8932 } 8933 8934 int x = index; 8935 if (codePointOffset >= 0) { 8936 int i; 8937 for (i = 0; x < length && i < codePointOffset; i++) { 8938 if (isHighSurrogate(seq.charAt(x++)) && x < length && 8939 isLowSurrogate(seq.charAt(x))) { 8940 x++; 8941 } 8942 } 8943 if (i < codePointOffset) { 8944 throw new IndexOutOfBoundsException(); 8945 } 8946 } else { 8947 int i; 8948 for (i = codePointOffset; x > 0 && i < 0; i++) { 8949 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 8950 isHighSurrogate(seq.charAt(x-1))) { 8951 x--; 8952 } 8953 } 8954 if (i < 0) { 8955 throw new IndexOutOfBoundsException(); 8956 } 8957 } 8958 return x; 8959 } 8960 8961 /** 8962 * Returns the index within the given {@code char} subarray 8963 * that is offset from the given {@code index} by 8964 * {@code codePointOffset} code points. The 8965 * {@code start} and {@code count} arguments specify a 8966 * subarray of the {@code char} array. Unpaired surrogates 8967 * within the text range given by {@code index} and 8968 * {@code codePointOffset} count as one code point each. 8969 * 8970 * @param a the {@code char} array 8971 * @param start the index of the first {@code char} of the 8972 * subarray 8973 * @param count the length of the subarray in {@code char}s 8974 * @param index the index to be offset 8975 * @param codePointOffset the offset in code points 8976 * @return the index within the subarray 8977 * @throws NullPointerException if {@code a} is null. 8978 * @throws IndexOutOfBoundsException 8979 * if {@code start} or {@code count} is negative, 8980 * or if {@code start + count} is larger than the length of 8981 * the given array, 8982 * or if {@code index} is less than {@code start} or 8983 * larger then {@code start + count}, 8984 * or if {@code codePointOffset} is positive and the text range 8985 * starting with {@code index} and ending with {@code start + count - 1} 8986 * has fewer than {@code codePointOffset} code 8987 * points, 8988 * or if {@code codePointOffset} is negative and the text range 8989 * starting with {@code start} and ending with {@code index - 1} 8990 * has fewer than the absolute value of 8991 * {@code codePointOffset} code points. 8992 * @since 1.5 8993 */ 8994 public static int offsetByCodePoints(char[] a, int start, int count, 8995 int index, int codePointOffset) { 8996 if (count > a.length-start || start < 0 || count < 0 8997 || index < start || index > start+count) { 8998 throw new IndexOutOfBoundsException(); 8999 } 9000 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9001 } 9002 9003 static int offsetByCodePointsImpl(char[]a, int start, int count, 9004 int index, int codePointOffset) { 9005 int x = index; 9006 if (codePointOffset >= 0) { 9007 int limit = start + count; 9008 int i; 9009 for (i = 0; x < limit && i < codePointOffset; i++) { 9010 if (isHighSurrogate(a[x++]) && x < limit && 9011 isLowSurrogate(a[x])) { 9012 x++; 9013 } 9014 } 9015 if (i < codePointOffset) { 9016 throw new IndexOutOfBoundsException(); 9017 } 9018 } else { 9019 int i; 9020 for (i = codePointOffset; x > start && i < 0; i++) { 9021 if (isLowSurrogate(a[--x]) && x > start && 9022 isHighSurrogate(a[x-1])) { 9023 x--; 9024 } 9025 } 9026 if (i < 0) { 9027 throw new IndexOutOfBoundsException(); 9028 } 9029 } 9030 return x; 9031 } 9032 9033 /** 9034 * Determines if the specified character is a lowercase character. 9035 * <p> 9036 * A character is lowercase if its general category type, provided 9037 * by {@code Character.getType(ch)}, is 9038 * {@code LOWERCASE_LETTER}, or it has contributory property 9039 * Other_Lowercase as defined by the Unicode Standard. 9040 * <p> 9041 * The following are examples of lowercase characters: 9042 * <blockquote><pre> 9043 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9044 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9045 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9046 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9047 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9048 * </pre></blockquote> 9049 * <p> Many other Unicode characters are lowercase too. 9050 * 9051 * <p><b>Note:</b> This method cannot handle <a 9052 * href="#supplementary"> supplementary characters</a>. To support 9053 * all Unicode characters, including supplementary characters, use 9054 * the {@link #isLowerCase(int)} method. 9055 * 9056 * @param ch the character to be tested. 9057 * @return {@code true} if the character is lowercase; 9058 * {@code false} otherwise. 9059 * @see Character#isLowerCase(char) 9060 * @see Character#isTitleCase(char) 9061 * @see Character#toLowerCase(char) 9062 * @see Character#getType(char) 9063 */ 9064 public static boolean isLowerCase(char ch) { 9065 return isLowerCase((int)ch); 9066 } 9067 9068 /** 9069 * Determines if the specified character (Unicode code point) is a 9070 * lowercase character. 9071 * <p> 9072 * A character is lowercase if its general category type, provided 9073 * by {@link Character#getType getType(codePoint)}, is 9074 * {@code LOWERCASE_LETTER}, or it has contributory property 9075 * Other_Lowercase as defined by the Unicode Standard. 9076 * <p> 9077 * The following are examples of lowercase characters: 9078 * <blockquote><pre> 9079 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9080 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9081 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9082 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9083 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9084 * </pre></blockquote> 9085 * <p> Many other Unicode characters are lowercase too. 9086 * 9087 * @param codePoint the character (Unicode code point) to be tested. 9088 * @return {@code true} if the character is lowercase; 9089 * {@code false} otherwise. 9090 * @see Character#isLowerCase(int) 9091 * @see Character#isTitleCase(int) 9092 * @see Character#toLowerCase(int) 9093 * @see Character#getType(int) 9094 * @since 1.5 9095 */ 9096 public static boolean isLowerCase(int codePoint) { 9097 return CharacterData.of(codePoint).isLowerCase(codePoint) || 9098 CharacterData.of(codePoint).isOtherLowercase(codePoint); 9099 } 9100 9101 /** 9102 * Determines if the specified character is an uppercase character. 9103 * <p> 9104 * A character is uppercase if its general category type, provided by 9105 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9106 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9107 * <p> 9108 * The following are examples of uppercase characters: 9109 * <blockquote><pre> 9110 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9111 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9112 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9113 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9114 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9115 * </pre></blockquote> 9116 * <p> Many other Unicode characters are uppercase too. 9117 * 9118 * <p><b>Note:</b> This method cannot handle <a 9119 * href="#supplementary"> supplementary characters</a>. To support 9120 * all Unicode characters, including supplementary characters, use 9121 * the {@link #isUpperCase(int)} method. 9122 * 9123 * @param ch the character to be tested. 9124 * @return {@code true} if the character is uppercase; 9125 * {@code false} otherwise. 9126 * @see Character#isLowerCase(char) 9127 * @see Character#isTitleCase(char) 9128 * @see Character#toUpperCase(char) 9129 * @see Character#getType(char) 9130 * @since 1.0 9131 */ 9132 public static boolean isUpperCase(char ch) { 9133 return isUpperCase((int)ch); 9134 } 9135 9136 /** 9137 * Determines if the specified character (Unicode code point) is an uppercase character. 9138 * <p> 9139 * A character is uppercase if its general category type, provided by 9140 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9141 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9142 * <p> 9143 * The following are examples of uppercase characters: 9144 * <blockquote><pre> 9145 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9146 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9147 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9148 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9149 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9150 * </pre></blockquote> 9151 * <p> Many other Unicode characters are uppercase too. 9152 * 9153 * @param codePoint the character (Unicode code point) to be tested. 9154 * @return {@code true} if the character is uppercase; 9155 * {@code false} otherwise. 9156 * @see Character#isLowerCase(int) 9157 * @see Character#isTitleCase(int) 9158 * @see Character#toUpperCase(int) 9159 * @see Character#getType(int) 9160 * @since 1.5 9161 */ 9162 public static boolean isUpperCase(int codePoint) { 9163 return CharacterData.of(codePoint).isUpperCase(codePoint) || 9164 CharacterData.of(codePoint).isOtherUppercase(codePoint); 9165 } 9166 9167 /** 9168 * Determines if the specified character is a titlecase character. 9169 * <p> 9170 * A character is a titlecase character if its general 9171 * category type, provided by {@code Character.getType(ch)}, 9172 * is {@code TITLECASE_LETTER}. 9173 * <p> 9174 * Some characters look like pairs of Latin letters. For example, there 9175 * is an uppercase letter that looks like "LJ" and has a corresponding 9176 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9177 * is the appropriate form to use when rendering a word in lowercase 9178 * with initial capitals, as for a book title. 9179 * <p> 9180 * These are some of the Unicode characters for which this method returns 9181 * {@code true}: 9182 * <ul> 9183 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9184 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9185 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9186 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9187 * </ul> 9188 * <p> Many other Unicode characters are titlecase too. 9189 * 9190 * <p><b>Note:</b> This method cannot handle <a 9191 * href="#supplementary"> supplementary characters</a>. To support 9192 * all Unicode characters, including supplementary characters, use 9193 * the {@link #isTitleCase(int)} method. 9194 * 9195 * @param ch the character to be tested. 9196 * @return {@code true} if the character is titlecase; 9197 * {@code false} otherwise. 9198 * @see Character#isLowerCase(char) 9199 * @see Character#isUpperCase(char) 9200 * @see Character#toTitleCase(char) 9201 * @see Character#getType(char) 9202 * @since 1.0.2 9203 */ 9204 public static boolean isTitleCase(char ch) { 9205 return isTitleCase((int)ch); 9206 } 9207 9208 /** 9209 * Determines if the specified character (Unicode code point) is a titlecase character. 9210 * <p> 9211 * A character is a titlecase character if its general 9212 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9213 * is {@code TITLECASE_LETTER}. 9214 * <p> 9215 * Some characters look like pairs of Latin letters. For example, there 9216 * is an uppercase letter that looks like "LJ" and has a corresponding 9217 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9218 * is the appropriate form to use when rendering a word in lowercase 9219 * with initial capitals, as for a book title. 9220 * <p> 9221 * These are some of the Unicode characters for which this method returns 9222 * {@code true}: 9223 * <ul> 9224 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9225 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9226 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9227 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9228 * </ul> 9229 * <p> Many other Unicode characters are titlecase too. 9230 * 9231 * @param codePoint the character (Unicode code point) to be tested. 9232 * @return {@code true} if the character is titlecase; 9233 * {@code false} otherwise. 9234 * @see Character#isLowerCase(int) 9235 * @see Character#isUpperCase(int) 9236 * @see Character#toTitleCase(int) 9237 * @see Character#getType(int) 9238 * @since 1.5 9239 */ 9240 public static boolean isTitleCase(int codePoint) { 9241 return getType(codePoint) == Character.TITLECASE_LETTER; 9242 } 9243 9244 /** 9245 * Determines if the specified character is a digit. 9246 * <p> 9247 * A character is a digit if its general category type, provided 9248 * by {@code Character.getType(ch)}, is 9249 * {@code DECIMAL_DIGIT_NUMBER}. 9250 * <p> 9251 * Some Unicode character ranges that contain digits: 9252 * <ul> 9253 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9254 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9255 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9256 * Arabic-Indic digits 9257 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9258 * Extended Arabic-Indic digits 9259 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9260 * Devanagari digits 9261 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9262 * Fullwidth digits 9263 * </ul> 9264 * 9265 * Many other character ranges contain digits as well. 9266 * 9267 * <p><b>Note:</b> This method cannot handle <a 9268 * href="#supplementary"> supplementary characters</a>. To support 9269 * all Unicode characters, including supplementary characters, use 9270 * the {@link #isDigit(int)} method. 9271 * 9272 * @param ch the character to be tested. 9273 * @return {@code true} if the character is a digit; 9274 * {@code false} otherwise. 9275 * @see Character#digit(char, int) 9276 * @see Character#forDigit(int, int) 9277 * @see Character#getType(char) 9278 */ 9279 public static boolean isDigit(char ch) { 9280 return isDigit((int)ch); 9281 } 9282 9283 /** 9284 * Determines if the specified character (Unicode code point) is a digit. 9285 * <p> 9286 * A character is a digit if its general category type, provided 9287 * by {@link Character#getType(int) getType(codePoint)}, is 9288 * {@code DECIMAL_DIGIT_NUMBER}. 9289 * <p> 9290 * Some Unicode character ranges that contain digits: 9291 * <ul> 9292 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9293 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9294 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9295 * Arabic-Indic digits 9296 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9297 * Extended Arabic-Indic digits 9298 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9299 * Devanagari digits 9300 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9301 * Fullwidth digits 9302 * </ul> 9303 * 9304 * Many other character ranges contain digits as well. 9305 * 9306 * @param codePoint the character (Unicode code point) to be tested. 9307 * @return {@code true} if the character is a digit; 9308 * {@code false} otherwise. 9309 * @see Character#forDigit(int, int) 9310 * @see Character#getType(int) 9311 * @since 1.5 9312 */ 9313 public static boolean isDigit(int codePoint) { 9314 return CharacterData.of(codePoint).isDigit(codePoint); 9315 } 9316 9317 /** 9318 * Determines if a character is defined in Unicode. 9319 * <p> 9320 * A character is defined if at least one of the following is true: 9321 * <ul> 9322 * <li>It has an entry in the UnicodeData file. 9323 * <li>It has a value in a range defined by the UnicodeData file. 9324 * </ul> 9325 * 9326 * <p><b>Note:</b> This method cannot handle <a 9327 * href="#supplementary"> supplementary characters</a>. To support 9328 * all Unicode characters, including supplementary characters, use 9329 * the {@link #isDefined(int)} method. 9330 * 9331 * @param ch the character to be tested 9332 * @return {@code true} if the character has a defined meaning 9333 * in Unicode; {@code false} otherwise. 9334 * @see Character#isDigit(char) 9335 * @see Character#isLetter(char) 9336 * @see Character#isLetterOrDigit(char) 9337 * @see Character#isLowerCase(char) 9338 * @see Character#isTitleCase(char) 9339 * @see Character#isUpperCase(char) 9340 * @since 1.0.2 9341 */ 9342 public static boolean isDefined(char ch) { 9343 return isDefined((int)ch); 9344 } 9345 9346 /** 9347 * Determines if a character (Unicode code point) is defined in Unicode. 9348 * <p> 9349 * A character is defined if at least one of the following is true: 9350 * <ul> 9351 * <li>It has an entry in the UnicodeData file. 9352 * <li>It has a value in a range defined by the UnicodeData file. 9353 * </ul> 9354 * 9355 * @param codePoint the character (Unicode code point) to be tested. 9356 * @return {@code true} if the character has a defined meaning 9357 * in Unicode; {@code false} otherwise. 9358 * @see Character#isDigit(int) 9359 * @see Character#isLetter(int) 9360 * @see Character#isLetterOrDigit(int) 9361 * @see Character#isLowerCase(int) 9362 * @see Character#isTitleCase(int) 9363 * @see Character#isUpperCase(int) 9364 * @since 1.5 9365 */ 9366 public static boolean isDefined(int codePoint) { 9367 return getType(codePoint) != Character.UNASSIGNED; 9368 } 9369 9370 /** 9371 * Determines if the specified character is a letter. 9372 * <p> 9373 * A character is considered to be a letter if its general 9374 * category type, provided by {@code Character.getType(ch)}, 9375 * is any of the following: 9376 * <ul> 9377 * <li> {@code UPPERCASE_LETTER} 9378 * <li> {@code LOWERCASE_LETTER} 9379 * <li> {@code TITLECASE_LETTER} 9380 * <li> {@code MODIFIER_LETTER} 9381 * <li> {@code OTHER_LETTER} 9382 * </ul> 9383 * 9384 * Not all letters have case. Many characters are 9385 * letters but are neither uppercase nor lowercase nor titlecase. 9386 * 9387 * <p><b>Note:</b> This method cannot handle <a 9388 * href="#supplementary"> supplementary characters</a>. To support 9389 * all Unicode characters, including supplementary characters, use 9390 * the {@link #isLetter(int)} method. 9391 * 9392 * @param ch the character to be tested. 9393 * @return {@code true} if the character is a letter; 9394 * {@code false} otherwise. 9395 * @see Character#isDigit(char) 9396 * @see Character#isJavaIdentifierStart(char) 9397 * @see Character#isJavaLetter(char) 9398 * @see Character#isJavaLetterOrDigit(char) 9399 * @see Character#isLetterOrDigit(char) 9400 * @see Character#isLowerCase(char) 9401 * @see Character#isTitleCase(char) 9402 * @see Character#isUnicodeIdentifierStart(char) 9403 * @see Character#isUpperCase(char) 9404 */ 9405 public static boolean isLetter(char ch) { 9406 return isLetter((int)ch); 9407 } 9408 9409 /** 9410 * Determines if the specified character (Unicode code point) is a letter. 9411 * <p> 9412 * A character is considered to be a letter if its general 9413 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9414 * is any of the following: 9415 * <ul> 9416 * <li> {@code UPPERCASE_LETTER} 9417 * <li> {@code LOWERCASE_LETTER} 9418 * <li> {@code TITLECASE_LETTER} 9419 * <li> {@code MODIFIER_LETTER} 9420 * <li> {@code OTHER_LETTER} 9421 * </ul> 9422 * 9423 * Not all letters have case. Many characters are 9424 * letters but are neither uppercase nor lowercase nor titlecase. 9425 * 9426 * @param codePoint the character (Unicode code point) to be tested. 9427 * @return {@code true} if the character is a letter; 9428 * {@code false} otherwise. 9429 * @see Character#isDigit(int) 9430 * @see Character#isJavaIdentifierStart(int) 9431 * @see Character#isLetterOrDigit(int) 9432 * @see Character#isLowerCase(int) 9433 * @see Character#isTitleCase(int) 9434 * @see Character#isUnicodeIdentifierStart(int) 9435 * @see Character#isUpperCase(int) 9436 * @since 1.5 9437 */ 9438 public static boolean isLetter(int codePoint) { 9439 return ((((1 << Character.UPPERCASE_LETTER) | 9440 (1 << Character.LOWERCASE_LETTER) | 9441 (1 << Character.TITLECASE_LETTER) | 9442 (1 << Character.MODIFIER_LETTER) | 9443 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 9444 != 0; 9445 } 9446 9447 /** 9448 * Determines if the specified character is a letter or digit. 9449 * <p> 9450 * A character is considered to be a letter or digit if either 9451 * {@code Character.isLetter(char ch)} or 9452 * {@code Character.isDigit(char ch)} returns 9453 * {@code true} for the character. 9454 * 9455 * <p><b>Note:</b> This method cannot handle <a 9456 * href="#supplementary"> supplementary characters</a>. To support 9457 * all Unicode characters, including supplementary characters, use 9458 * the {@link #isLetterOrDigit(int)} method. 9459 * 9460 * @param ch the character to be tested. 9461 * @return {@code true} if the character is a letter or digit; 9462 * {@code false} otherwise. 9463 * @see Character#isDigit(char) 9464 * @see Character#isJavaIdentifierPart(char) 9465 * @see Character#isJavaLetter(char) 9466 * @see Character#isJavaLetterOrDigit(char) 9467 * @see Character#isLetter(char) 9468 * @see Character#isUnicodeIdentifierPart(char) 9469 * @since 1.0.2 9470 */ 9471 public static boolean isLetterOrDigit(char ch) { 9472 return isLetterOrDigit((int)ch); 9473 } 9474 9475 /** 9476 * Determines if the specified character (Unicode code point) is a letter or digit. 9477 * <p> 9478 * A character is considered to be a letter or digit if either 9479 * {@link #isLetter(int) isLetter(codePoint)} or 9480 * {@link #isDigit(int) isDigit(codePoint)} returns 9481 * {@code true} for the character. 9482 * 9483 * @param codePoint the character (Unicode code point) to be tested. 9484 * @return {@code true} if the character is a letter or digit; 9485 * {@code false} otherwise. 9486 * @see Character#isDigit(int) 9487 * @see Character#isJavaIdentifierPart(int) 9488 * @see Character#isLetter(int) 9489 * @see Character#isUnicodeIdentifierPart(int) 9490 * @since 1.5 9491 */ 9492 public static boolean isLetterOrDigit(int codePoint) { 9493 return ((((1 << Character.UPPERCASE_LETTER) | 9494 (1 << Character.LOWERCASE_LETTER) | 9495 (1 << Character.TITLECASE_LETTER) | 9496 (1 << Character.MODIFIER_LETTER) | 9497 (1 << Character.OTHER_LETTER) | 9498 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 9499 != 0; 9500 } 9501 9502 /** 9503 * Determines if the specified character is permissible as the first 9504 * character in a Java identifier. 9505 * <p> 9506 * A character may start a Java identifier if and only if 9507 * one of the following is true: 9508 * <ul> 9509 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9510 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 9511 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 9512 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 9513 * </ul> 9514 * 9515 * @param ch the character to be tested. 9516 * @return {@code true} if the character may start a Java 9517 * identifier; {@code false} otherwise. 9518 * @see Character#isJavaLetterOrDigit(char) 9519 * @see Character#isJavaIdentifierStart(char) 9520 * @see Character#isJavaIdentifierPart(char) 9521 * @see Character#isLetter(char) 9522 * @see Character#isLetterOrDigit(char) 9523 * @see Character#isUnicodeIdentifierStart(char) 9524 * @since 1.0.2 9525 * @deprecated Replaced by isJavaIdentifierStart(char). 9526 */ 9527 @Deprecated(since="1.1") 9528 public static boolean isJavaLetter(char ch) { 9529 return isJavaIdentifierStart(ch); 9530 } 9531 9532 /** 9533 * Determines if the specified character may be part of a Java 9534 * identifier as other than the first character. 9535 * <p> 9536 * A character may be part of a Java identifier if and only if any 9537 * of the following are true: 9538 * <ul> 9539 * <li> it is a letter 9540 * <li> it is a currency symbol (such as {@code '$'}) 9541 * <li> it is a connecting punctuation character (such as {@code '_'}) 9542 * <li> it is a digit 9543 * <li> it is a numeric letter (such as a Roman numeral character) 9544 * <li> it is a combining mark 9545 * <li> it is a non-spacing mark 9546 * <li> {@code isIdentifierIgnorable} returns 9547 * {@code true} for the character. 9548 * </ul> 9549 * 9550 * @param ch the character to be tested. 9551 * @return {@code true} if the character may be part of a 9552 * Java identifier; {@code false} otherwise. 9553 * @see Character#isJavaLetter(char) 9554 * @see Character#isJavaIdentifierStart(char) 9555 * @see Character#isJavaIdentifierPart(char) 9556 * @see Character#isLetter(char) 9557 * @see Character#isLetterOrDigit(char) 9558 * @see Character#isUnicodeIdentifierPart(char) 9559 * @see Character#isIdentifierIgnorable(char) 9560 * @since 1.0.2 9561 * @deprecated Replaced by isJavaIdentifierPart(char). 9562 */ 9563 @Deprecated(since="1.1") 9564 public static boolean isJavaLetterOrDigit(char ch) { 9565 return isJavaIdentifierPart(ch); 9566 } 9567 9568 /** 9569 * Determines if the specified character (Unicode code point) is an alphabet. 9570 * <p> 9571 * A character is considered to be alphabetic if its general category type, 9572 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 9573 * the following: 9574 * <ul> 9575 * <li> {@code UPPERCASE_LETTER} 9576 * <li> {@code LOWERCASE_LETTER} 9577 * <li> {@code TITLECASE_LETTER} 9578 * <li> {@code MODIFIER_LETTER} 9579 * <li> {@code OTHER_LETTER} 9580 * <li> {@code LETTER_NUMBER} 9581 * </ul> 9582 * or it has contributory property Other_Alphabetic as defined by the 9583 * Unicode Standard. 9584 * 9585 * @param codePoint the character (Unicode code point) to be tested. 9586 * @return {@code true} if the character is a Unicode alphabet 9587 * character, {@code false} otherwise. 9588 * @since 1.7 9589 */ 9590 public static boolean isAlphabetic(int codePoint) { 9591 return (((((1 << Character.UPPERCASE_LETTER) | 9592 (1 << Character.LOWERCASE_LETTER) | 9593 (1 << Character.TITLECASE_LETTER) | 9594 (1 << Character.MODIFIER_LETTER) | 9595 (1 << Character.OTHER_LETTER) | 9596 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 9597 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 9598 } 9599 9600 /** 9601 * Determines if the specified character (Unicode code point) is a CJKV 9602 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 9603 * the Unicode Standard. 9604 * 9605 * @param codePoint the character (Unicode code point) to be tested. 9606 * @return {@code true} if the character is a Unicode ideograph 9607 * character, {@code false} otherwise. 9608 * @since 1.7 9609 */ 9610 public static boolean isIdeographic(int codePoint) { 9611 return CharacterData.of(codePoint).isIdeographic(codePoint); 9612 } 9613 9614 /** 9615 * Determines if the specified character is 9616 * permissible as the first character in a Java identifier. 9617 * <p> 9618 * A character may start a Java identifier if and only if 9619 * one of the following conditions is true: 9620 * <ul> 9621 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9622 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 9623 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 9624 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 9625 * </ul> 9626 * 9627 * <p><b>Note:</b> This method cannot handle <a 9628 * href="#supplementary"> supplementary characters</a>. To support 9629 * all Unicode characters, including supplementary characters, use 9630 * the {@link #isJavaIdentifierStart(int)} method. 9631 * 9632 * @param ch the character to be tested. 9633 * @return {@code true} if the character may start a Java identifier; 9634 * {@code false} otherwise. 9635 * @see Character#isJavaIdentifierPart(char) 9636 * @see Character#isLetter(char) 9637 * @see Character#isUnicodeIdentifierStart(char) 9638 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9639 * @since 1.1 9640 */ 9641 public static boolean isJavaIdentifierStart(char ch) { 9642 return isJavaIdentifierStart((int)ch); 9643 } 9644 9645 /** 9646 * Determines if the character (Unicode code point) is 9647 * permissible as the first character in a Java identifier. 9648 * <p> 9649 * A character may start a Java identifier if and only if 9650 * one of the following conditions is true: 9651 * <ul> 9652 * <li> {@link #isLetter(int) isLetter(codePoint)} 9653 * returns {@code true} 9654 * <li> {@link #getType(int) getType(codePoint)} 9655 * returns {@code LETTER_NUMBER} 9656 * <li> the referenced character is a currency symbol (such as {@code '$'}) 9657 * <li> the referenced character is a connecting punctuation character 9658 * (such as {@code '_'}). 9659 * </ul> 9660 * 9661 * @param codePoint the character (Unicode code point) to be tested. 9662 * @return {@code true} if the character may start a Java identifier; 9663 * {@code false} otherwise. 9664 * @see Character#isJavaIdentifierPart(int) 9665 * @see Character#isLetter(int) 9666 * @see Character#isUnicodeIdentifierStart(int) 9667 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9668 * @since 1.5 9669 */ 9670 public static boolean isJavaIdentifierStart(int codePoint) { 9671 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 9672 } 9673 9674 /** 9675 * Determines if the specified character may be part of a Java 9676 * identifier as other than the first character. 9677 * <p> 9678 * A character may be part of a Java identifier if any of the following 9679 * are true: 9680 * <ul> 9681 * <li> it is a letter 9682 * <li> it is a currency symbol (such as {@code '$'}) 9683 * <li> it is a connecting punctuation character (such as {@code '_'}) 9684 * <li> it is a digit 9685 * <li> it is a numeric letter (such as a Roman numeral character) 9686 * <li> it is a combining mark 9687 * <li> it is a non-spacing mark 9688 * <li> {@code isIdentifierIgnorable} returns 9689 * {@code true} for the character 9690 * </ul> 9691 * 9692 * <p><b>Note:</b> This method cannot handle <a 9693 * href="#supplementary"> supplementary characters</a>. To support 9694 * all Unicode characters, including supplementary characters, use 9695 * the {@link #isJavaIdentifierPart(int)} method. 9696 * 9697 * @param ch the character to be tested. 9698 * @return {@code true} if the character may be part of a 9699 * Java identifier; {@code false} otherwise. 9700 * @see Character#isIdentifierIgnorable(char) 9701 * @see Character#isJavaIdentifierStart(char) 9702 * @see Character#isLetterOrDigit(char) 9703 * @see Character#isUnicodeIdentifierPart(char) 9704 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9705 * @since 1.1 9706 */ 9707 public static boolean isJavaIdentifierPart(char ch) { 9708 return isJavaIdentifierPart((int)ch); 9709 } 9710 9711 /** 9712 * Determines if the character (Unicode code point) may be part of a Java 9713 * identifier as other than the first character. 9714 * <p> 9715 * A character may be part of a Java identifier if any of the following 9716 * are true: 9717 * <ul> 9718 * <li> it is a letter 9719 * <li> it is a currency symbol (such as {@code '$'}) 9720 * <li> it is a connecting punctuation character (such as {@code '_'}) 9721 * <li> it is a digit 9722 * <li> it is a numeric letter (such as a Roman numeral character) 9723 * <li> it is a combining mark 9724 * <li> it is a non-spacing mark 9725 * <li> {@link #isIdentifierIgnorable(int) 9726 * isIdentifierIgnorable(codePoint)} returns {@code true} for 9727 * the character 9728 * </ul> 9729 * 9730 * @param codePoint the character (Unicode code point) to be tested. 9731 * @return {@code true} if the character may be part of a 9732 * Java identifier; {@code false} otherwise. 9733 * @see Character#isIdentifierIgnorable(int) 9734 * @see Character#isJavaIdentifierStart(int) 9735 * @see Character#isLetterOrDigit(int) 9736 * @see Character#isUnicodeIdentifierPart(int) 9737 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9738 * @since 1.5 9739 */ 9740 public static boolean isJavaIdentifierPart(int codePoint) { 9741 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 9742 } 9743 9744 /** 9745 * Determines if the specified character is permissible as the 9746 * first character in a Unicode identifier. 9747 * <p> 9748 * A character may start a Unicode identifier if and only if 9749 * one of the following conditions is true: 9750 * <ul> 9751 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9752 * <li> {@link #getType(char) getType(ch)} returns 9753 * {@code LETTER_NUMBER}. 9754 * </ul> 9755 * 9756 * <p><b>Note:</b> This method cannot handle <a 9757 * href="#supplementary"> supplementary characters</a>. To support 9758 * all Unicode characters, including supplementary characters, use 9759 * the {@link #isUnicodeIdentifierStart(int)} method. 9760 * 9761 * @param ch the character to be tested. 9762 * @return {@code true} if the character may start a Unicode 9763 * identifier; {@code false} otherwise. 9764 * @see Character#isJavaIdentifierStart(char) 9765 * @see Character#isLetter(char) 9766 * @see Character#isUnicodeIdentifierPart(char) 9767 * @since 1.1 9768 */ 9769 public static boolean isUnicodeIdentifierStart(char ch) { 9770 return isUnicodeIdentifierStart((int)ch); 9771 } 9772 9773 /** 9774 * Determines if the specified character (Unicode code point) is permissible as the 9775 * first character in a Unicode identifier. 9776 * <p> 9777 * A character may start a Unicode identifier if and only if 9778 * one of the following conditions is true: 9779 * <ul> 9780 * <li> {@link #isLetter(int) isLetter(codePoint)} 9781 * returns {@code true} 9782 * <li> {@link #getType(int) getType(codePoint)} 9783 * returns {@code LETTER_NUMBER}. 9784 * </ul> 9785 * @param codePoint the character (Unicode code point) to be tested. 9786 * @return {@code true} if the character may start a Unicode 9787 * identifier; {@code false} otherwise. 9788 * @see Character#isJavaIdentifierStart(int) 9789 * @see Character#isLetter(int) 9790 * @see Character#isUnicodeIdentifierPart(int) 9791 * @since 1.5 9792 */ 9793 public static boolean isUnicodeIdentifierStart(int codePoint) { 9794 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 9795 } 9796 9797 /** 9798 * Determines if the specified character may be part of a Unicode 9799 * identifier as other than the first character. 9800 * <p> 9801 * A character may be part of a Unicode identifier if and only if 9802 * one of the following statements is true: 9803 * <ul> 9804 * <li> it is a letter 9805 * <li> it is a connecting punctuation character (such as {@code '_'}) 9806 * <li> it is a digit 9807 * <li> it is a numeric letter (such as a Roman numeral character) 9808 * <li> it is a combining mark 9809 * <li> it is a non-spacing mark 9810 * <li> {@code isIdentifierIgnorable} returns 9811 * {@code true} for this character. 9812 * </ul> 9813 * 9814 * <p><b>Note:</b> This method cannot handle <a 9815 * href="#supplementary"> supplementary characters</a>. To support 9816 * all Unicode characters, including supplementary characters, use 9817 * the {@link #isUnicodeIdentifierPart(int)} method. 9818 * 9819 * @param ch the character to be tested. 9820 * @return {@code true} if the character may be part of a 9821 * Unicode identifier; {@code false} otherwise. 9822 * @see Character#isIdentifierIgnorable(char) 9823 * @see Character#isJavaIdentifierPart(char) 9824 * @see Character#isLetterOrDigit(char) 9825 * @see Character#isUnicodeIdentifierStart(char) 9826 * @since 1.1 9827 */ 9828 public static boolean isUnicodeIdentifierPart(char ch) { 9829 return isUnicodeIdentifierPart((int)ch); 9830 } 9831 9832 /** 9833 * Determines if the specified character (Unicode code point) may be part of a Unicode 9834 * identifier as other than the first character. 9835 * <p> 9836 * A character may be part of a Unicode identifier if and only if 9837 * one of the following statements is true: 9838 * <ul> 9839 * <li> it is a letter 9840 * <li> it is a connecting punctuation character (such as {@code '_'}) 9841 * <li> it is a digit 9842 * <li> it is a numeric letter (such as a Roman numeral character) 9843 * <li> it is a combining mark 9844 * <li> it is a non-spacing mark 9845 * <li> {@code isIdentifierIgnorable} returns 9846 * {@code true} for this character. 9847 * </ul> 9848 * @param codePoint the character (Unicode code point) to be tested. 9849 * @return {@code true} if the character may be part of a 9850 * Unicode identifier; {@code false} otherwise. 9851 * @see Character#isIdentifierIgnorable(int) 9852 * @see Character#isJavaIdentifierPart(int) 9853 * @see Character#isLetterOrDigit(int) 9854 * @see Character#isUnicodeIdentifierStart(int) 9855 * @since 1.5 9856 */ 9857 public static boolean isUnicodeIdentifierPart(int codePoint) { 9858 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 9859 } 9860 9861 /** 9862 * Determines if the specified character should be regarded as 9863 * an ignorable character in a Java identifier or a Unicode identifier. 9864 * <p> 9865 * The following Unicode characters are ignorable in a Java identifier 9866 * or a Unicode identifier: 9867 * <ul> 9868 * <li>ISO control characters that are not whitespace 9869 * <ul> 9870 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 9871 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 9872 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 9873 * </ul> 9874 * 9875 * <li>all characters that have the {@code FORMAT} general 9876 * category value 9877 * </ul> 9878 * 9879 * <p><b>Note:</b> This method cannot handle <a 9880 * href="#supplementary"> supplementary characters</a>. To support 9881 * all Unicode characters, including supplementary characters, use 9882 * the {@link #isIdentifierIgnorable(int)} method. 9883 * 9884 * @param ch the character to be tested. 9885 * @return {@code true} if the character is an ignorable control 9886 * character that may be part of a Java or Unicode identifier; 9887 * {@code false} otherwise. 9888 * @see Character#isJavaIdentifierPart(char) 9889 * @see Character#isUnicodeIdentifierPart(char) 9890 * @since 1.1 9891 */ 9892 public static boolean isIdentifierIgnorable(char ch) { 9893 return isIdentifierIgnorable((int)ch); 9894 } 9895 9896 /** 9897 * Determines if the specified character (Unicode code point) should be regarded as 9898 * an ignorable character in a Java identifier or a Unicode identifier. 9899 * <p> 9900 * The following Unicode characters are ignorable in a Java identifier 9901 * or a Unicode identifier: 9902 * <ul> 9903 * <li>ISO control characters that are not whitespace 9904 * <ul> 9905 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 9906 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 9907 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 9908 * </ul> 9909 * 9910 * <li>all characters that have the {@code FORMAT} general 9911 * category value 9912 * </ul> 9913 * 9914 * @param codePoint the character (Unicode code point) to be tested. 9915 * @return {@code true} if the character is an ignorable control 9916 * character that may be part of a Java or Unicode identifier; 9917 * {@code false} otherwise. 9918 * @see Character#isJavaIdentifierPart(int) 9919 * @see Character#isUnicodeIdentifierPart(int) 9920 * @since 1.5 9921 */ 9922 public static boolean isIdentifierIgnorable(int codePoint) { 9923 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 9924 } 9925 9926 /** 9927 * Converts the character argument to lowercase using case 9928 * mapping information from the UnicodeData file. 9929 * <p> 9930 * Note that 9931 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 9932 * does not always return {@code true} for some ranges of 9933 * characters, particularly those that are symbols or ideographs. 9934 * 9935 * <p>In general, {@link String#toLowerCase()} should be used to map 9936 * characters to lowercase. {@code String} case mapping methods 9937 * have several benefits over {@code Character} case mapping methods. 9938 * {@code String} case mapping methods can perform locale-sensitive 9939 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9940 * the {@code Character} case mapping methods cannot. 9941 * 9942 * <p><b>Note:</b> This method cannot handle <a 9943 * href="#supplementary"> supplementary characters</a>. To support 9944 * all Unicode characters, including supplementary characters, use 9945 * the {@link #toLowerCase(int)} method. 9946 * 9947 * @param ch the character to be converted. 9948 * @return the lowercase equivalent of the character, if any; 9949 * otherwise, the character itself. 9950 * @see Character#isLowerCase(char) 9951 * @see String#toLowerCase() 9952 */ 9953 public static char toLowerCase(char ch) { 9954 return (char)toLowerCase((int)ch); 9955 } 9956 9957 /** 9958 * Converts the character (Unicode code point) argument to 9959 * lowercase using case mapping information from the UnicodeData 9960 * file. 9961 * 9962 * <p> Note that 9963 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 9964 * does not always return {@code true} for some ranges of 9965 * characters, particularly those that are symbols or ideographs. 9966 * 9967 * <p>In general, {@link String#toLowerCase()} should be used to map 9968 * characters to lowercase. {@code String} case mapping methods 9969 * have several benefits over {@code Character} case mapping methods. 9970 * {@code String} case mapping methods can perform locale-sensitive 9971 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9972 * the {@code Character} case mapping methods cannot. 9973 * 9974 * @param codePoint the character (Unicode code point) to be converted. 9975 * @return the lowercase equivalent of the character (Unicode code 9976 * point), if any; otherwise, the character itself. 9977 * @see Character#isLowerCase(int) 9978 * @see String#toLowerCase() 9979 * 9980 * @since 1.5 9981 */ 9982 public static int toLowerCase(int codePoint) { 9983 return CharacterData.of(codePoint).toLowerCase(codePoint); 9984 } 9985 9986 /** 9987 * Converts the character argument to uppercase using case mapping 9988 * information from the UnicodeData file. 9989 * <p> 9990 * Note that 9991 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 9992 * does not always return {@code true} for some ranges of 9993 * characters, particularly those that are symbols or ideographs. 9994 * 9995 * <p>In general, {@link String#toUpperCase()} should be used to map 9996 * characters to uppercase. {@code String} case mapping methods 9997 * have several benefits over {@code Character} case mapping methods. 9998 * {@code String} case mapping methods can perform locale-sensitive 9999 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10000 * the {@code Character} case mapping methods cannot. 10001 * 10002 * <p><b>Note:</b> This method cannot handle <a 10003 * href="#supplementary"> supplementary characters</a>. To support 10004 * all Unicode characters, including supplementary characters, use 10005 * the {@link #toUpperCase(int)} method. 10006 * 10007 * @param ch the character to be converted. 10008 * @return the uppercase equivalent of the character, if any; 10009 * otherwise, the character itself. 10010 * @see Character#isUpperCase(char) 10011 * @see String#toUpperCase() 10012 */ 10013 public static char toUpperCase(char ch) { 10014 return (char)toUpperCase((int)ch); 10015 } 10016 10017 /** 10018 * Converts the character (Unicode code point) argument to 10019 * uppercase using case mapping information from the UnicodeData 10020 * file. 10021 * 10022 * <p>Note that 10023 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 10024 * does not always return {@code true} for some ranges of 10025 * characters, particularly those that are symbols or ideographs. 10026 * 10027 * <p>In general, {@link String#toUpperCase()} should be used to map 10028 * characters to uppercase. {@code String} case mapping methods 10029 * have several benefits over {@code Character} case mapping methods. 10030 * {@code String} case mapping methods can perform locale-sensitive 10031 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10032 * the {@code Character} case mapping methods cannot. 10033 * 10034 * @param codePoint the character (Unicode code point) to be converted. 10035 * @return the uppercase equivalent of the character, if any; 10036 * otherwise, the character itself. 10037 * @see Character#isUpperCase(int) 10038 * @see String#toUpperCase() 10039 * 10040 * @since 1.5 10041 */ 10042 public static int toUpperCase(int codePoint) { 10043 return CharacterData.of(codePoint).toUpperCase(codePoint); 10044 } 10045 10046 /** 10047 * Converts the character argument to titlecase using case mapping 10048 * information from the UnicodeData file. If a character has no 10049 * explicit titlecase mapping and is not itself a titlecase char 10050 * according to UnicodeData, then the uppercase mapping is 10051 * returned as an equivalent titlecase mapping. If the 10052 * {@code char} argument is already a titlecase 10053 * {@code char}, the same {@code char} value will be 10054 * returned. 10055 * <p> 10056 * Note that 10057 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 10058 * does not always return {@code true} for some ranges of 10059 * characters. 10060 * 10061 * <p><b>Note:</b> This method cannot handle <a 10062 * href="#supplementary"> supplementary characters</a>. To support 10063 * all Unicode characters, including supplementary characters, use 10064 * the {@link #toTitleCase(int)} method. 10065 * 10066 * @param ch the character to be converted. 10067 * @return the titlecase equivalent of the character, if any; 10068 * otherwise, the character itself. 10069 * @see Character#isTitleCase(char) 10070 * @see Character#toLowerCase(char) 10071 * @see Character#toUpperCase(char) 10072 * @since 1.0.2 10073 */ 10074 public static char toTitleCase(char ch) { 10075 return (char)toTitleCase((int)ch); 10076 } 10077 10078 /** 10079 * Converts the character (Unicode code point) argument to titlecase using case mapping 10080 * information from the UnicodeData file. If a character has no 10081 * explicit titlecase mapping and is not itself a titlecase char 10082 * according to UnicodeData, then the uppercase mapping is 10083 * returned as an equivalent titlecase mapping. If the 10084 * character argument is already a titlecase 10085 * character, the same character value will be 10086 * returned. 10087 * 10088 * <p>Note that 10089 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 10090 * does not always return {@code true} for some ranges of 10091 * characters. 10092 * 10093 * @param codePoint the character (Unicode code point) to be converted. 10094 * @return the titlecase equivalent of the character, if any; 10095 * otherwise, the character itself. 10096 * @see Character#isTitleCase(int) 10097 * @see Character#toLowerCase(int) 10098 * @see Character#toUpperCase(int) 10099 * @since 1.5 10100 */ 10101 public static int toTitleCase(int codePoint) { 10102 return CharacterData.of(codePoint).toTitleCase(codePoint); 10103 } 10104 10105 /** 10106 * Returns the numeric value of the character {@code ch} in the 10107 * specified radix. 10108 * <p> 10109 * If the radix is not in the range {@code MIN_RADIX} ≤ 10110 * {@code radix} ≤ {@code MAX_RADIX} or if the 10111 * value of {@code ch} is not a valid digit in the specified 10112 * radix, {@code -1} is returned. A character is a valid digit 10113 * if at least one of the following is true: 10114 * <ul> 10115 * <li>The method {@code isDigit} is {@code true} of the character 10116 * and the Unicode decimal digit value of the character (or its 10117 * single-character decomposition) is less than the specified radix. 10118 * In this case the decimal digit value is returned. 10119 * <li>The character is one of the uppercase Latin letters 10120 * {@code 'A'} through {@code 'Z'} and its code is less than 10121 * {@code radix + 'A' - 10}. 10122 * In this case, {@code ch - 'A' + 10} 10123 * is returned. 10124 * <li>The character is one of the lowercase Latin letters 10125 * {@code 'a'} through {@code 'z'} and its code is less than 10126 * {@code radix + 'a' - 10}. 10127 * In this case, {@code ch - 'a' + 10} 10128 * is returned. 10129 * <li>The character is one of the fullwidth uppercase Latin letters A 10130 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10131 * and its code is less than 10132 * {@code radix + '\u005CuFF21' - 10}. 10133 * In this case, {@code ch - '\u005CuFF21' + 10} 10134 * is returned. 10135 * <li>The character is one of the fullwidth lowercase Latin letters a 10136 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10137 * and its code is less than 10138 * {@code radix + '\u005CuFF41' - 10}. 10139 * In this case, {@code ch - '\u005CuFF41' + 10} 10140 * is returned. 10141 * </ul> 10142 * 10143 * <p><b>Note:</b> This method cannot handle <a 10144 * href="#supplementary"> supplementary characters</a>. To support 10145 * all Unicode characters, including supplementary characters, use 10146 * the {@link #digit(int, int)} method. 10147 * 10148 * @param ch the character to be converted. 10149 * @param radix the radix. 10150 * @return the numeric value represented by the character in the 10151 * specified radix. 10152 * @see Character#forDigit(int, int) 10153 * @see Character#isDigit(char) 10154 */ 10155 public static int digit(char ch, int radix) { 10156 return digit((int)ch, radix); 10157 } 10158 10159 /** 10160 * Returns the numeric value of the specified character (Unicode 10161 * code point) in the specified radix. 10162 * 10163 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 10164 * {@code radix} ≤ {@code MAX_RADIX} or if the 10165 * character is not a valid digit in the specified 10166 * radix, {@code -1} is returned. A character is a valid digit 10167 * if at least one of the following is true: 10168 * <ul> 10169 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 10170 * and the Unicode decimal digit value of the character (or its 10171 * single-character decomposition) is less than the specified radix. 10172 * In this case the decimal digit value is returned. 10173 * <li>The character is one of the uppercase Latin letters 10174 * {@code 'A'} through {@code 'Z'} and its code is less than 10175 * {@code radix + 'A' - 10}. 10176 * In this case, {@code codePoint - 'A' + 10} 10177 * is returned. 10178 * <li>The character is one of the lowercase Latin letters 10179 * {@code 'a'} through {@code 'z'} and its code is less than 10180 * {@code radix + 'a' - 10}. 10181 * In this case, {@code codePoint - 'a' + 10} 10182 * is returned. 10183 * <li>The character is one of the fullwidth uppercase Latin letters A 10184 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10185 * and its code is less than 10186 * {@code radix + '\u005CuFF21' - 10}. 10187 * In this case, 10188 * {@code codePoint - '\u005CuFF21' + 10} 10189 * is returned. 10190 * <li>The character is one of the fullwidth lowercase Latin letters a 10191 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10192 * and its code is less than 10193 * {@code radix + '\u005CuFF41'- 10}. 10194 * In this case, 10195 * {@code codePoint - '\u005CuFF41' + 10} 10196 * is returned. 10197 * </ul> 10198 * 10199 * @param codePoint the character (Unicode code point) to be converted. 10200 * @param radix the radix. 10201 * @return the numeric value represented by the character in the 10202 * specified radix. 10203 * @see Character#forDigit(int, int) 10204 * @see Character#isDigit(int) 10205 * @since 1.5 10206 */ 10207 public static int digit(int codePoint, int radix) { 10208 return CharacterData.of(codePoint).digit(codePoint, radix); 10209 } 10210 10211 /** 10212 * Returns the {@code int} value that the specified Unicode 10213 * character represents. For example, the character 10214 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 10215 * an int with a value of 50. 10216 * <p> 10217 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10218 * {@code '\u005Cu005A'}), lowercase 10219 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10220 * full width variant ({@code '\u005CuFF21'} through 10221 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10222 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10223 * through 35. This is independent of the Unicode specification, 10224 * which does not assign numeric values to these {@code char} 10225 * values. 10226 * <p> 10227 * If the character does not have a numeric value, then -1 is returned. 10228 * If the character has a numeric value that cannot be represented as a 10229 * nonnegative integer (for example, a fractional value), then -2 10230 * is returned. 10231 * 10232 * <p><b>Note:</b> This method cannot handle <a 10233 * href="#supplementary"> supplementary characters</a>. To support 10234 * all Unicode characters, including supplementary characters, use 10235 * the {@link #getNumericValue(int)} method. 10236 * 10237 * @param ch the character to be converted. 10238 * @return the numeric value of the character, as a nonnegative {@code int} 10239 * value; -2 if the character has a numeric value but the value 10240 * can not be represented as a nonnegative {@code int} value; 10241 * -1 if the character has no numeric value. 10242 * @see Character#forDigit(int, int) 10243 * @see Character#isDigit(char) 10244 * @since 1.1 10245 */ 10246 public static int getNumericValue(char ch) { 10247 return getNumericValue((int)ch); 10248 } 10249 10250 /** 10251 * Returns the {@code int} value that the specified 10252 * character (Unicode code point) represents. For example, the character 10253 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 10254 * an {@code int} with a value of 50. 10255 * <p> 10256 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10257 * {@code '\u005Cu005A'}), lowercase 10258 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10259 * full width variant ({@code '\u005CuFF21'} through 10260 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10261 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10262 * through 35. This is independent of the Unicode specification, 10263 * which does not assign numeric values to these {@code char} 10264 * values. 10265 * <p> 10266 * If the character does not have a numeric value, then -1 is returned. 10267 * If the character has a numeric value that cannot be represented as a 10268 * nonnegative integer (for example, a fractional value), then -2 10269 * is returned. 10270 * 10271 * @param codePoint the character (Unicode code point) to be converted. 10272 * @return the numeric value of the character, as a nonnegative {@code int} 10273 * value; -2 if the character has a numeric value but the value 10274 * can not be represented as a nonnegative {@code int} value; 10275 * -1 if the character has no numeric value. 10276 * @see Character#forDigit(int, int) 10277 * @see Character#isDigit(int) 10278 * @since 1.5 10279 */ 10280 public static int getNumericValue(int codePoint) { 10281 return CharacterData.of(codePoint).getNumericValue(codePoint); 10282 } 10283 10284 /** 10285 * Determines if the specified character is ISO-LATIN-1 white space. 10286 * This method returns {@code true} for the following five 10287 * characters only: 10288 * <table class="striped"> 10289 * <caption style="display:none">truechars</caption> 10290 * <thead> 10291 * <tr><th scope="col">Character 10292 * <th scope="col">Code 10293 * <th scope="col">Name 10294 * </thead> 10295 * <tbody> 10296 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 10297 * <td>{@code HORIZONTAL TABULATION}</td></tr> 10298 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 10299 * <td>{@code NEW LINE}</td></tr> 10300 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 10301 * <td>{@code FORM FEED}</td></tr> 10302 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 10303 * <td>{@code CARRIAGE RETURN}</td></tr> 10304 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 10305 * <td>{@code SPACE}</td></tr> 10306 * </tbody> 10307 * </table> 10308 * 10309 * @param ch the character to be tested. 10310 * @return {@code true} if the character is ISO-LATIN-1 white 10311 * space; {@code false} otherwise. 10312 * @see Character#isSpaceChar(char) 10313 * @see Character#isWhitespace(char) 10314 * @deprecated Replaced by isWhitespace(char). 10315 */ 10316 @Deprecated(since="1.1") 10317 public static boolean isSpace(char ch) { 10318 return (ch <= 0x0020) && 10319 (((((1L << 0x0009) | 10320 (1L << 0x000A) | 10321 (1L << 0x000C) | 10322 (1L << 0x000D) | 10323 (1L << 0x0020)) >> ch) & 1L) != 0); 10324 } 10325 10326 10327 /** 10328 * Determines if the specified character is a Unicode space character. 10329 * A character is considered to be a space character if and only if 10330 * it is specified to be a space character by the Unicode Standard. This 10331 * method returns true if the character's general category type is any of 10332 * the following: 10333 * <ul> 10334 * <li> {@code SPACE_SEPARATOR} 10335 * <li> {@code LINE_SEPARATOR} 10336 * <li> {@code PARAGRAPH_SEPARATOR} 10337 * </ul> 10338 * 10339 * <p><b>Note:</b> This method cannot handle <a 10340 * href="#supplementary"> supplementary characters</a>. To support 10341 * all Unicode characters, including supplementary characters, use 10342 * the {@link #isSpaceChar(int)} method. 10343 * 10344 * @param ch the character to be tested. 10345 * @return {@code true} if the character is a space character; 10346 * {@code false} otherwise. 10347 * @see Character#isWhitespace(char) 10348 * @since 1.1 10349 */ 10350 public static boolean isSpaceChar(char ch) { 10351 return isSpaceChar((int)ch); 10352 } 10353 10354 /** 10355 * Determines if the specified character (Unicode code point) is a 10356 * Unicode space character. A character is considered to be a 10357 * space character if and only if it is specified to be a space 10358 * character by the Unicode Standard. This method returns true if 10359 * the character's general category type is any of the following: 10360 * 10361 * <ul> 10362 * <li> {@link #SPACE_SEPARATOR} 10363 * <li> {@link #LINE_SEPARATOR} 10364 * <li> {@link #PARAGRAPH_SEPARATOR} 10365 * </ul> 10366 * 10367 * @param codePoint the character (Unicode code point) to be tested. 10368 * @return {@code true} if the character is a space character; 10369 * {@code false} otherwise. 10370 * @see Character#isWhitespace(int) 10371 * @since 1.5 10372 */ 10373 public static boolean isSpaceChar(int codePoint) { 10374 return ((((1 << Character.SPACE_SEPARATOR) | 10375 (1 << Character.LINE_SEPARATOR) | 10376 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 10377 != 0; 10378 } 10379 10380 /** 10381 * Determines if the specified character is white space according to Java. 10382 * A character is a Java whitespace character if and only if it satisfies 10383 * one of the following criteria: 10384 * <ul> 10385 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 10386 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 10387 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 10388 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 10389 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 10390 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 10391 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 10392 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 10393 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 10394 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 10395 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 10396 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 10397 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 10398 * </ul> 10399 * 10400 * <p><b>Note:</b> This method cannot handle <a 10401 * href="#supplementary"> supplementary characters</a>. To support 10402 * all Unicode characters, including supplementary characters, use 10403 * the {@link #isWhitespace(int)} method. 10404 * 10405 * @param ch the character to be tested. 10406 * @return {@code true} if the character is a Java whitespace 10407 * character; {@code false} otherwise. 10408 * @see Character#isSpaceChar(char) 10409 * @since 1.1 10410 */ 10411 public static boolean isWhitespace(char ch) { 10412 return isWhitespace((int)ch); 10413 } 10414 10415 /** 10416 * Determines if the specified character (Unicode code point) is 10417 * white space according to Java. A character is a Java 10418 * whitespace character if and only if it satisfies one of the 10419 * following criteria: 10420 * <ul> 10421 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 10422 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 10423 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 10424 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 10425 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 10426 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 10427 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 10428 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 10429 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 10430 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 10431 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 10432 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 10433 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 10434 * </ul> 10435 * 10436 * @param codePoint the character (Unicode code point) to be tested. 10437 * @return {@code true} if the character is a Java whitespace 10438 * character; {@code false} otherwise. 10439 * @see Character#isSpaceChar(int) 10440 * @since 1.5 10441 */ 10442 public static boolean isWhitespace(int codePoint) { 10443 return CharacterData.of(codePoint).isWhitespace(codePoint); 10444 } 10445 10446 /** 10447 * Determines if the specified character is an ISO control 10448 * character. A character is considered to be an ISO control 10449 * character if its code is in the range {@code '\u005Cu0000'} 10450 * through {@code '\u005Cu001F'} or in the range 10451 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 10452 * 10453 * <p><b>Note:</b> This method cannot handle <a 10454 * href="#supplementary"> supplementary characters</a>. To support 10455 * all Unicode characters, including supplementary characters, use 10456 * the {@link #isISOControl(int)} method. 10457 * 10458 * @param ch the character to be tested. 10459 * @return {@code true} if the character is an ISO control character; 10460 * {@code false} otherwise. 10461 * 10462 * @see Character#isSpaceChar(char) 10463 * @see Character#isWhitespace(char) 10464 * @since 1.1 10465 */ 10466 public static boolean isISOControl(char ch) { 10467 return isISOControl((int)ch); 10468 } 10469 10470 /** 10471 * Determines if the referenced character (Unicode code point) is an ISO control 10472 * character. A character is considered to be an ISO control 10473 * character if its code is in the range {@code '\u005Cu0000'} 10474 * through {@code '\u005Cu001F'} or in the range 10475 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 10476 * 10477 * @param codePoint the character (Unicode code point) to be tested. 10478 * @return {@code true} if the character is an ISO control character; 10479 * {@code false} otherwise. 10480 * @see Character#isSpaceChar(int) 10481 * @see Character#isWhitespace(int) 10482 * @since 1.5 10483 */ 10484 public static boolean isISOControl(int codePoint) { 10485 // Optimized form of: 10486 // (codePoint >= 0x00 && codePoint <= 0x1F) || 10487 // (codePoint >= 0x7F && codePoint <= 0x9F); 10488 return codePoint <= 0x9F && 10489 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 10490 } 10491 10492 /** 10493 * Returns a value indicating a character's general category. 10494 * 10495 * <p><b>Note:</b> This method cannot handle <a 10496 * href="#supplementary"> supplementary characters</a>. To support 10497 * all Unicode characters, including supplementary characters, use 10498 * the {@link #getType(int)} method. 10499 * 10500 * @param ch the character to be tested. 10501 * @return a value of type {@code int} representing the 10502 * character's general category. 10503 * @see Character#COMBINING_SPACING_MARK 10504 * @see Character#CONNECTOR_PUNCTUATION 10505 * @see Character#CONTROL 10506 * @see Character#CURRENCY_SYMBOL 10507 * @see Character#DASH_PUNCTUATION 10508 * @see Character#DECIMAL_DIGIT_NUMBER 10509 * @see Character#ENCLOSING_MARK 10510 * @see Character#END_PUNCTUATION 10511 * @see Character#FINAL_QUOTE_PUNCTUATION 10512 * @see Character#FORMAT 10513 * @see Character#INITIAL_QUOTE_PUNCTUATION 10514 * @see Character#LETTER_NUMBER 10515 * @see Character#LINE_SEPARATOR 10516 * @see Character#LOWERCASE_LETTER 10517 * @see Character#MATH_SYMBOL 10518 * @see Character#MODIFIER_LETTER 10519 * @see Character#MODIFIER_SYMBOL 10520 * @see Character#NON_SPACING_MARK 10521 * @see Character#OTHER_LETTER 10522 * @see Character#OTHER_NUMBER 10523 * @see Character#OTHER_PUNCTUATION 10524 * @see Character#OTHER_SYMBOL 10525 * @see Character#PARAGRAPH_SEPARATOR 10526 * @see Character#PRIVATE_USE 10527 * @see Character#SPACE_SEPARATOR 10528 * @see Character#START_PUNCTUATION 10529 * @see Character#SURROGATE 10530 * @see Character#TITLECASE_LETTER 10531 * @see Character#UNASSIGNED 10532 * @see Character#UPPERCASE_LETTER 10533 * @since 1.1 10534 */ 10535 public static int getType(char ch) { 10536 return getType((int)ch); 10537 } 10538 10539 /** 10540 * Returns a value indicating a character's general category. 10541 * 10542 * @param codePoint the character (Unicode code point) to be tested. 10543 * @return a value of type {@code int} representing the 10544 * character's general category. 10545 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 10546 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 10547 * @see Character#CONTROL CONTROL 10548 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 10549 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 10550 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 10551 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 10552 * @see Character#END_PUNCTUATION END_PUNCTUATION 10553 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 10554 * @see Character#FORMAT FORMAT 10555 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 10556 * @see Character#LETTER_NUMBER LETTER_NUMBER 10557 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 10558 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 10559 * @see Character#MATH_SYMBOL MATH_SYMBOL 10560 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 10561 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 10562 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 10563 * @see Character#OTHER_LETTER OTHER_LETTER 10564 * @see Character#OTHER_NUMBER OTHER_NUMBER 10565 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 10566 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 10567 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 10568 * @see Character#PRIVATE_USE PRIVATE_USE 10569 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 10570 * @see Character#START_PUNCTUATION START_PUNCTUATION 10571 * @see Character#SURROGATE SURROGATE 10572 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 10573 * @see Character#UNASSIGNED UNASSIGNED 10574 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 10575 * @since 1.5 10576 */ 10577 public static int getType(int codePoint) { 10578 return CharacterData.of(codePoint).getType(codePoint); 10579 } 10580 10581 /** 10582 * Determines the character representation for a specific digit in 10583 * the specified radix. If the value of {@code radix} is not a 10584 * valid radix, or the value of {@code digit} is not a valid 10585 * digit in the specified radix, the null character 10586 * ({@code '\u005Cu0000'}) is returned. 10587 * <p> 10588 * The {@code radix} argument is valid if it is greater than or 10589 * equal to {@code MIN_RADIX} and less than or equal to 10590 * {@code MAX_RADIX}. The {@code digit} argument is valid if 10591 * {@code 0 <= digit < radix}. 10592 * <p> 10593 * If the digit is less than 10, then 10594 * {@code '0' + digit} is returned. Otherwise, the value 10595 * {@code 'a' + digit - 10} is returned. 10596 * 10597 * @param digit the number to convert to a character. 10598 * @param radix the radix. 10599 * @return the {@code char} representation of the specified digit 10600 * in the specified radix. 10601 * @see Character#MIN_RADIX 10602 * @see Character#MAX_RADIX 10603 * @see Character#digit(char, int) 10604 */ 10605 public static char forDigit(int digit, int radix) { 10606 if ((digit >= radix) || (digit < 0)) { 10607 return '\0'; 10608 } 10609 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 10610 return '\0'; 10611 } 10612 if (digit < 10) { 10613 return (char)('0' + digit); 10614 } 10615 return (char)('a' - 10 + digit); 10616 } 10617 10618 /** 10619 * Returns the Unicode directionality property for the given 10620 * character. Character directionality is used to calculate the 10621 * visual ordering of text. The directionality value of undefined 10622 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 10623 * 10624 * <p><b>Note:</b> This method cannot handle <a 10625 * href="#supplementary"> supplementary characters</a>. To support 10626 * all Unicode characters, including supplementary characters, use 10627 * the {@link #getDirectionality(int)} method. 10628 * 10629 * @param ch {@code char} for which the directionality property 10630 * is requested. 10631 * @return the directionality property of the {@code char} value. 10632 * 10633 * @see Character#DIRECTIONALITY_UNDEFINED 10634 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 10635 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 10636 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 10637 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 10638 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 10639 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 10640 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 10641 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 10642 * @see Character#DIRECTIONALITY_NONSPACING_MARK 10643 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 10644 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 10645 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 10646 * @see Character#DIRECTIONALITY_WHITESPACE 10647 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 10648 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 10649 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 10650 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 10651 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 10652 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 10653 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 10654 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 10655 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 10656 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 10657 * @since 1.4 10658 */ 10659 public static byte getDirectionality(char ch) { 10660 return getDirectionality((int)ch); 10661 } 10662 10663 /** 10664 * Returns the Unicode directionality property for the given 10665 * character (Unicode code point). Character directionality is 10666 * used to calculate the visual ordering of text. The 10667 * directionality value of undefined character is {@link 10668 * #DIRECTIONALITY_UNDEFINED}. 10669 * 10670 * @param codePoint the character (Unicode code point) for which 10671 * the directionality property is requested. 10672 * @return the directionality property of the character. 10673 * 10674 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 10675 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 10676 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 10677 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 10678 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 10679 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 10680 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 10681 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 10682 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 10683 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 10684 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 10685 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 10686 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 10687 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 10688 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 10689 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 10690 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 10691 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 10692 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 10693 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 10694 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 10695 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 10696 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 10697 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 10698 * @since 1.5 10699 */ 10700 public static byte getDirectionality(int codePoint) { 10701 return CharacterData.of(codePoint).getDirectionality(codePoint); 10702 } 10703 10704 /** 10705 * Determines whether the character is mirrored according to the 10706 * Unicode specification. Mirrored characters should have their 10707 * glyphs horizontally mirrored when displayed in text that is 10708 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 10709 * PARENTHESIS is semantically defined to be an <i>opening 10710 * parenthesis</i>. This will appear as a "(" in text that is 10711 * left-to-right but as a ")" in text that is right-to-left. 10712 * 10713 * <p><b>Note:</b> This method cannot handle <a 10714 * href="#supplementary"> supplementary characters</a>. To support 10715 * all Unicode characters, including supplementary characters, use 10716 * the {@link #isMirrored(int)} method. 10717 * 10718 * @param ch {@code char} for which the mirrored property is requested 10719 * @return {@code true} if the char is mirrored, {@code false} 10720 * if the {@code char} is not mirrored or is not defined. 10721 * @since 1.4 10722 */ 10723 public static boolean isMirrored(char ch) { 10724 return isMirrored((int)ch); 10725 } 10726 10727 /** 10728 * Determines whether the specified character (Unicode code point) 10729 * is mirrored according to the Unicode specification. Mirrored 10730 * characters should have their glyphs horizontally mirrored when 10731 * displayed in text that is right-to-left. For example, 10732 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 10733 * defined to be an <i>opening parenthesis</i>. This will appear 10734 * as a "(" in text that is left-to-right but as a ")" in text 10735 * that is right-to-left. 10736 * 10737 * @param codePoint the character (Unicode code point) to be tested. 10738 * @return {@code true} if the character is mirrored, {@code false} 10739 * if the character is not mirrored or is not defined. 10740 * @since 1.5 10741 */ 10742 public static boolean isMirrored(int codePoint) { 10743 return CharacterData.of(codePoint).isMirrored(codePoint); 10744 } 10745 10746 /** 10747 * Compares two {@code Character} objects numerically. 10748 * 10749 * @param anotherCharacter the {@code Character} to be compared. 10750 10751 * @return the value {@code 0} if the argument {@code Character} 10752 * is equal to this {@code Character}; a value less than 10753 * {@code 0} if this {@code Character} is numerically less 10754 * than the {@code Character} argument; and a value greater than 10755 * {@code 0} if this {@code Character} is numerically greater 10756 * than the {@code Character} argument (unsigned comparison). 10757 * Note that this is strictly a numerical comparison; it is not 10758 * locale-dependent. 10759 * @since 1.2 10760 */ 10761 public int compareTo(Character anotherCharacter) { 10762 return compare(this.value, anotherCharacter.value); 10763 } 10764 10765 /** 10766 * Compares two {@code char} values numerically. 10767 * The value returned is identical to what would be returned by: 10768 * <pre> 10769 * Character.valueOf(x).compareTo(Character.valueOf(y)) 10770 * </pre> 10771 * 10772 * @param x the first {@code char} to compare 10773 * @param y the second {@code char} to compare 10774 * @return the value {@code 0} if {@code x == y}; 10775 * a value less than {@code 0} if {@code x < y}; and 10776 * a value greater than {@code 0} if {@code x > y} 10777 * @since 1.7 10778 */ 10779 public static int compare(char x, char y) { 10780 return x - y; 10781 } 10782 10783 /** 10784 * Converts the character (Unicode code point) argument to uppercase using 10785 * information from the UnicodeData file. 10786 * 10787 * @param codePoint the character (Unicode code point) to be converted. 10788 * @return either the uppercase equivalent of the character, if 10789 * any, or an error flag ({@code Character.ERROR}) 10790 * that indicates that a 1:M {@code char} mapping exists. 10791 * @see Character#isLowerCase(char) 10792 * @see Character#isUpperCase(char) 10793 * @see Character#toLowerCase(char) 10794 * @see Character#toTitleCase(char) 10795 * @since 1.4 10796 */ 10797 static int toUpperCaseEx(int codePoint) { 10798 assert isValidCodePoint(codePoint); 10799 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 10800 } 10801 10802 /** 10803 * Converts the character (Unicode code point) argument to uppercase using case 10804 * mapping information from the SpecialCasing file in the Unicode 10805 * specification. If a character has no explicit uppercase 10806 * mapping, then the {@code char} itself is returned in the 10807 * {@code char[]}. 10808 * 10809 * @param codePoint the character (Unicode code point) to be converted. 10810 * @return a {@code char[]} with the uppercased character. 10811 * @since 1.4 10812 */ 10813 static char[] toUpperCaseCharArray(int codePoint) { 10814 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 10815 assert isBmpCodePoint(codePoint); 10816 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 10817 } 10818 10819 /** 10820 * The number of bits used to represent a {@code char} value in unsigned 10821 * binary form, constant {@code 16}. 10822 * 10823 * @since 1.5 10824 */ 10825 public static final int SIZE = 16; 10826 10827 /** 10828 * The number of bytes used to represent a {@code char} value in unsigned 10829 * binary form. 10830 * 10831 * @since 1.8 10832 */ 10833 public static final int BYTES = SIZE / Byte.SIZE; 10834 10835 /** 10836 * Returns the value obtained by reversing the order of the bytes in the 10837 * specified {@code char} value. 10838 * 10839 * @param ch The {@code char} of which to reverse the byte order. 10840 * @return the value obtained by reversing (or, equivalently, swapping) 10841 * the bytes in the specified {@code char} value. 10842 * @since 1.5 10843 */ 10844 @HotSpotIntrinsicCandidate 10845 public static char reverseBytes(char ch) { 10846 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 10847 } 10848 10849 /** 10850 * Returns the Unicode name of the specified character 10851 * {@code codePoint}, or null if the code point is 10852 * {@link #UNASSIGNED unassigned}. 10853 * <p> 10854 * Note: if the specified character is not assigned a name by 10855 * the <i>UnicodeData</i> file (part of the Unicode Character 10856 * Database maintained by the Unicode Consortium), the returned 10857 * name is the same as the result of expression. 10858 * 10859 * <blockquote>{@code 10860 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 10861 * + " " 10862 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10863 * 10864 * }</blockquote> 10865 * 10866 * @param codePoint the character (Unicode code point) 10867 * 10868 * @return the Unicode name of the specified character, or null if 10869 * the code point is unassigned. 10870 * 10871 * @throws IllegalArgumentException if the specified 10872 * {@code codePoint} is not a valid Unicode 10873 * code point. 10874 * 10875 * @since 1.7 10876 */ 10877 public static String getName(int codePoint) { 10878 if (!isValidCodePoint(codePoint)) { 10879 throw new IllegalArgumentException( 10880 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 10881 } 10882 String name = CharacterName.getInstance().getName(codePoint); 10883 if (name != null) 10884 return name; 10885 if (getType(codePoint) == UNASSIGNED) 10886 return null; 10887 UnicodeBlock block = UnicodeBlock.of(codePoint); 10888 if (block != null) 10889 return block.toString().replace('_', ' ') + " " 10890 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10891 // should never come here 10892 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10893 } 10894 10895 /** 10896 * Returns the code point value of the Unicode character specified by 10897 * the given Unicode character name. 10898 * <p> 10899 * Note: if a character is not assigned a name by the <i>UnicodeData</i> 10900 * file (part of the Unicode Character Database maintained by the Unicode 10901 * Consortium), its name is defined as the result of expression 10902 * 10903 * <blockquote>{@code 10904 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 10905 * + " " 10906 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10907 * 10908 * }</blockquote> 10909 * <p> 10910 * The {@code name} matching is case insensitive, with any leading and 10911 * trailing whitespace character removed. 10912 * 10913 * @param name the Unicode character name 10914 * 10915 * @return the code point value of the character specified by its name. 10916 * 10917 * @throws IllegalArgumentException if the specified {@code name} 10918 * is not a valid Unicode character name. 10919 * @throws NullPointerException if {@code name} is {@code null} 10920 * 10921 * @since 9 10922 */ 10923 public static int codePointOf(String name) { 10924 name = name.trim().toUpperCase(Locale.ROOT); 10925 int cp = CharacterName.getInstance().getCodePoint(name); 10926 if (cp != -1) 10927 return cp; 10928 try { 10929 int off = name.lastIndexOf(' '); 10930 if (off != -1) { 10931 cp = Integer.parseInt(name, off + 1, name.length(), 16); 10932 if (isValidCodePoint(cp) && name.equals(getName(cp))) 10933 return cp; 10934 } 10935 } catch (Exception x) {} 10936 throw new IllegalArgumentException("Unrecognized character name :" + name); 10937 } 10938 }