1 /* 2 * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 import jdk.internal.HotSpotIntrinsicCandidate; 34 import jdk.internal.misc.VM; 35 36 /** 37 * The {@code Character} class wraps a value of the primitive 38 * type {@code char} in an object. An object of class 39 * {@code Character} contains a single field whose type is 40 * {@code char}. 41 * <p> 42 * In addition, this class provides a large number of static methods for 43 * determining a character's category (lowercase letter, digit, etc.) 44 * and for converting characters from uppercase to lowercase and vice 45 * versa. 46 * 47 * <h2><a id="conformance">Unicode Conformance</a></h2> 48 * <p> 49 * The fields and methods of class {@code Character} are defined in terms 50 * of character information from the Unicode Standard, specifically the 51 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 52 * This file specifies properties including name and category for every 53 * assigned Unicode code point or character range. The file is available 54 * from the Unicode Consortium at 55 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 56 * <p> 57 * Character information is based on the Unicode Standard, version 12.1. 58 * 59 * <h2><a id="unicode">Unicode Character Representations</a></h2> 60 * 61 * <p>The {@code char} data type (and therefore the value that a 62 * {@code Character} object encapsulates) are based on the 63 * original Unicode specification, which defined characters as 64 * fixed-width 16-bit entities. The Unicode Standard has since been 65 * changed to allow for characters whose representation requires more 66 * than 16 bits. The range of legal <em>code point</em>s is now 67 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 68 * (Refer to the <a 69 * href="http://www.unicode.org/reports/tr27/#notation"><i> 70 * definition</i></a> of the U+<i>n</i> notation in the Unicode 71 * Standard.) 72 * 73 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 74 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 75 * <a id="supplementary">Characters</a> whose code points are greater 76 * than U+FFFF are called <em>supplementary character</em>s. The Java 77 * platform uses the UTF-16 representation in {@code char} arrays and 78 * in the {@code String} and {@code StringBuffer} classes. In 79 * this representation, supplementary characters are represented as a pair 80 * of {@code char} values, the first from the <em>high-surrogates</em> 81 * range, (\uD800-\uDBFF), the second from the 82 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 83 * 84 * <p>A {@code char} value, therefore, represents Basic 85 * Multilingual Plane (BMP) code points, including the surrogate 86 * code points, or code units of the UTF-16 encoding. An 87 * {@code int} value represents all Unicode code points, 88 * including supplementary code points. The lower (least significant) 89 * 21 bits of {@code int} are used to represent Unicode code 90 * points and the upper (most significant) 11 bits must be zero. 91 * Unless otherwise specified, the behavior with respect to 92 * supplementary characters and surrogate {@code char} values is 93 * as follows: 94 * 95 * <ul> 96 * <li>The methods that only accept a {@code char} value cannot support 97 * supplementary characters. They treat {@code char} values from the 98 * surrogate ranges as undefined characters. For example, 99 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 100 * this specific value if followed by any low-surrogate value in a string 101 * would represent a letter. 102 * 103 * <li>The methods that accept an {@code int} value support all 104 * Unicode characters, including supplementary characters. For 105 * example, {@code Character.isLetter(0x2F81A)} returns 106 * {@code true} because the code point value represents a letter 107 * (a CJK ideograph). 108 * </ul> 109 * 110 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 111 * used for character values in the range between U+0000 and U+10FFFF, 112 * and <em>Unicode code unit</em> is used for 16-bit 113 * {@code char} values that are code units of the <em>UTF-16</em> 114 * encoding. For more information on Unicode terminology, refer to the 115 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 116 * 117 * @author Lee Boynton 118 * @author Guy Steele 119 * @author Akira Tanaka 120 * @author Martin Buchholz 121 * @author Ulf Zibis 122 * @since 1.0 123 */ 124 public final 125 class Character implements java.io.Serializable, Comparable<Character> { 126 /** 127 * The minimum radix available for conversion to and from strings. 128 * The constant value of this field is the smallest value permitted 129 * for the radix argument in radix-conversion methods such as the 130 * {@code digit} method, the {@code forDigit} method, and the 131 * {@code toString} method of class {@code Integer}. 132 * 133 * @see Character#digit(char, int) 134 * @see Character#forDigit(int, int) 135 * @see Integer#toString(int, int) 136 * @see Integer#valueOf(String) 137 */ 138 public static final int MIN_RADIX = 2; 139 140 /** 141 * The maximum radix available for conversion to and from strings. 142 * The constant value of this field is the largest value permitted 143 * for the radix argument in radix-conversion methods such as the 144 * {@code digit} method, the {@code forDigit} method, and the 145 * {@code toString} method of class {@code Integer}. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * {@code char}, {@code '\u005Cu0000'}. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * {@code char}, {@code '\u005CuFFFF'}. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The {@code Class} instance representing the primitive type 172 * {@code char}. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined {@code char} 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * Weak bidirectional character type "LRI" in the Unicode specification. 496 * @since 9 497 */ 498 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 499 500 /** 501 * Weak bidirectional character type "RLI" in the Unicode specification. 502 * @since 9 503 */ 504 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 505 506 /** 507 * Weak bidirectional character type "FSI" in the Unicode specification. 508 * @since 9 509 */ 510 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 511 512 /** 513 * Weak bidirectional character type "PDI" in the Unicode specification. 514 * @since 9 515 */ 516 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 517 518 /** 519 * The minimum value of a 520 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 521 * Unicode high-surrogate code unit</a> 522 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 523 * A high-surrogate is also known as a <i>leading-surrogate</i>. 524 * 525 * @since 1.5 526 */ 527 public static final char MIN_HIGH_SURROGATE = '\uD800'; 528 529 /** 530 * The maximum value of a 531 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 532 * Unicode high-surrogate code unit</a> 533 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 534 * A high-surrogate is also known as a <i>leading-surrogate</i>. 535 * 536 * @since 1.5 537 */ 538 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 539 540 /** 541 * The minimum value of a 542 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 543 * Unicode low-surrogate code unit</a> 544 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 545 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 546 * 547 * @since 1.5 548 */ 549 public static final char MIN_LOW_SURROGATE = '\uDC00'; 550 551 /** 552 * The maximum value of a 553 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 554 * Unicode low-surrogate code unit</a> 555 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 556 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 557 * 558 * @since 1.5 559 */ 560 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 561 562 /** 563 * The minimum value of a Unicode surrogate code unit in the 564 * UTF-16 encoding, constant {@code '\u005CuD800'}. 565 * 566 * @since 1.5 567 */ 568 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 569 570 /** 571 * The maximum value of a Unicode surrogate code unit in the 572 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 573 * 574 * @since 1.5 575 */ 576 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 577 578 /** 579 * The minimum value of a 580 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 581 * Unicode supplementary code point</a>, constant {@code U+10000}. 582 * 583 * @since 1.5 584 */ 585 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 586 587 /** 588 * The minimum value of a 589 * <a href="http://www.unicode.org/glossary/#code_point"> 590 * Unicode code point</a>, constant {@code U+0000}. 591 * 592 * @since 1.5 593 */ 594 public static final int MIN_CODE_POINT = 0x000000; 595 596 /** 597 * The maximum value of a 598 * <a href="http://www.unicode.org/glossary/#code_point"> 599 * Unicode code point</a>, constant {@code U+10FFFF}. 600 * 601 * @since 1.5 602 */ 603 public static final int MAX_CODE_POINT = 0X10FFFF; 604 605 606 /** 607 * Instances of this class represent particular subsets of the Unicode 608 * character set. The only family of subsets defined in the 609 * {@code Character} class is {@link Character.UnicodeBlock}. 610 * Other portions of the Java API may define other subsets for their 611 * own purposes. 612 * 613 * @since 1.2 614 */ 615 public static class Subset { 616 617 private String name; 618 619 /** 620 * Constructs a new {@code Subset} instance. 621 * 622 * @param name The name of this subset 623 * @throws NullPointerException if name is {@code null} 624 */ 625 protected Subset(String name) { 626 if (name == null) { 627 throw new NullPointerException("name"); 628 } 629 this.name = name; 630 } 631 632 /** 633 * Compares two {@code Subset} objects for equality. 634 * This method returns {@code true} if and only if 635 * {@code this} and the argument refer to the same 636 * object; since this method is {@code final}, this 637 * guarantee holds for all subclasses. 638 */ 639 public final boolean equals(Object obj) { 640 return (this == obj); 641 } 642 643 /** 644 * Returns the standard hash code as defined by the 645 * {@link Object#hashCode} method. This method 646 * is {@code final} in order to ensure that the 647 * {@code equals} and {@code hashCode} methods will 648 * be consistent in all subclasses. 649 */ 650 public final int hashCode() { 651 return super.hashCode(); 652 } 653 654 /** 655 * Returns the name of this subset. 656 */ 657 public final String toString() { 658 return name; 659 } 660 } 661 662 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 663 // for the latest specification of Unicode Blocks. 664 665 /** 666 * A family of character subsets representing the character blocks in the 667 * Unicode specification. Character blocks generally define characters 668 * used for a specific script or purpose. A character is contained by 669 * at most one Unicode block. 670 * 671 * @since 1.2 672 */ 673 public static final class UnicodeBlock extends Subset { 674 /** 675 * 676 - the expected number of entities 676 * 0.75 - the default load factor of HashMap 677 */ 678 private static final int NUM_ENTITIES = 676; 679 private static Map<String, UnicodeBlock> map = 680 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f)); 681 682 /** 683 * Creates a UnicodeBlock with the given identifier name. 684 * This name must be the same as the block identifier. 685 */ 686 private UnicodeBlock(String idName) { 687 super(idName); 688 map.put(idName, this); 689 } 690 691 /** 692 * Creates a UnicodeBlock with the given identifier name and 693 * alias name. 694 */ 695 private UnicodeBlock(String idName, String alias) { 696 this(idName); 697 map.put(alias, this); 698 } 699 700 /** 701 * Creates a UnicodeBlock with the given identifier name and 702 * alias names. 703 */ 704 private UnicodeBlock(String idName, String... aliases) { 705 this(idName); 706 for (String alias : aliases) 707 map.put(alias, this); 708 } 709 710 /** 711 * Constant for the "Basic Latin" Unicode character block. 712 * @since 1.2 713 */ 714 public static final UnicodeBlock BASIC_LATIN = 715 new UnicodeBlock("BASIC_LATIN", 716 "BASIC LATIN", 717 "BASICLATIN"); 718 719 /** 720 * Constant for the "Latin-1 Supplement" Unicode character block. 721 * @since 1.2 722 */ 723 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 724 new UnicodeBlock("LATIN_1_SUPPLEMENT", 725 "LATIN-1 SUPPLEMENT", 726 "LATIN-1SUPPLEMENT"); 727 728 /** 729 * Constant for the "Latin Extended-A" Unicode character block. 730 * @since 1.2 731 */ 732 public static final UnicodeBlock LATIN_EXTENDED_A = 733 new UnicodeBlock("LATIN_EXTENDED_A", 734 "LATIN EXTENDED-A", 735 "LATINEXTENDED-A"); 736 737 /** 738 * Constant for the "Latin Extended-B" Unicode character block. 739 * @since 1.2 740 */ 741 public static final UnicodeBlock LATIN_EXTENDED_B = 742 new UnicodeBlock("LATIN_EXTENDED_B", 743 "LATIN EXTENDED-B", 744 "LATINEXTENDED-B"); 745 746 /** 747 * Constant for the "IPA Extensions" Unicode character block. 748 * @since 1.2 749 */ 750 public static final UnicodeBlock IPA_EXTENSIONS = 751 new UnicodeBlock("IPA_EXTENSIONS", 752 "IPA EXTENSIONS", 753 "IPAEXTENSIONS"); 754 755 /** 756 * Constant for the "Spacing Modifier Letters" Unicode character block. 757 * @since 1.2 758 */ 759 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 760 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 761 "SPACING MODIFIER LETTERS", 762 "SPACINGMODIFIERLETTERS"); 763 764 /** 765 * Constant for the "Combining Diacritical Marks" Unicode character block. 766 * @since 1.2 767 */ 768 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 769 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 770 "COMBINING DIACRITICAL MARKS", 771 "COMBININGDIACRITICALMARKS"); 772 773 /** 774 * Constant for the "Greek and Coptic" Unicode character block. 775 * <p> 776 * This block was previously known as the "Greek" block. 777 * 778 * @since 1.2 779 */ 780 public static final UnicodeBlock GREEK = 781 new UnicodeBlock("GREEK", 782 "GREEK AND COPTIC", 783 "GREEKANDCOPTIC"); 784 785 /** 786 * Constant for the "Cyrillic" Unicode character block. 787 * @since 1.2 788 */ 789 public static final UnicodeBlock CYRILLIC = 790 new UnicodeBlock("CYRILLIC"); 791 792 /** 793 * Constant for the "Armenian" Unicode character block. 794 * @since 1.2 795 */ 796 public static final UnicodeBlock ARMENIAN = 797 new UnicodeBlock("ARMENIAN"); 798 799 /** 800 * Constant for the "Hebrew" Unicode character block. 801 * @since 1.2 802 */ 803 public static final UnicodeBlock HEBREW = 804 new UnicodeBlock("HEBREW"); 805 806 /** 807 * Constant for the "Arabic" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock ARABIC = 811 new UnicodeBlock("ARABIC"); 812 813 /** 814 * Constant for the "Devanagari" Unicode character block. 815 * @since 1.2 816 */ 817 public static final UnicodeBlock DEVANAGARI = 818 new UnicodeBlock("DEVANAGARI"); 819 820 /** 821 * Constant for the "Bengali" Unicode character block. 822 * @since 1.2 823 */ 824 public static final UnicodeBlock BENGALI = 825 new UnicodeBlock("BENGALI"); 826 827 /** 828 * Constant for the "Gurmukhi" Unicode character block. 829 * @since 1.2 830 */ 831 public static final UnicodeBlock GURMUKHI = 832 new UnicodeBlock("GURMUKHI"); 833 834 /** 835 * Constant for the "Gujarati" Unicode character block. 836 * @since 1.2 837 */ 838 public static final UnicodeBlock GUJARATI = 839 new UnicodeBlock("GUJARATI"); 840 841 /** 842 * Constant for the "Oriya" Unicode character block. 843 * @since 1.2 844 */ 845 public static final UnicodeBlock ORIYA = 846 new UnicodeBlock("ORIYA"); 847 848 /** 849 * Constant for the "Tamil" Unicode character block. 850 * @since 1.2 851 */ 852 public static final UnicodeBlock TAMIL = 853 new UnicodeBlock("TAMIL"); 854 855 /** 856 * Constant for the "Telugu" Unicode character block. 857 * @since 1.2 858 */ 859 public static final UnicodeBlock TELUGU = 860 new UnicodeBlock("TELUGU"); 861 862 /** 863 * Constant for the "Kannada" Unicode character block. 864 * @since 1.2 865 */ 866 public static final UnicodeBlock KANNADA = 867 new UnicodeBlock("KANNADA"); 868 869 /** 870 * Constant for the "Malayalam" Unicode character block. 871 * @since 1.2 872 */ 873 public static final UnicodeBlock MALAYALAM = 874 new UnicodeBlock("MALAYALAM"); 875 876 /** 877 * Constant for the "Thai" Unicode character block. 878 * @since 1.2 879 */ 880 public static final UnicodeBlock THAI = 881 new UnicodeBlock("THAI"); 882 883 /** 884 * Constant for the "Lao" Unicode character block. 885 * @since 1.2 886 */ 887 public static final UnicodeBlock LAO = 888 new UnicodeBlock("LAO"); 889 890 /** 891 * Constant for the "Tibetan" Unicode character block. 892 * @since 1.2 893 */ 894 public static final UnicodeBlock TIBETAN = 895 new UnicodeBlock("TIBETAN"); 896 897 /** 898 * Constant for the "Georgian" Unicode character block. 899 * @since 1.2 900 */ 901 public static final UnicodeBlock GEORGIAN = 902 new UnicodeBlock("GEORGIAN"); 903 904 /** 905 * Constant for the "Hangul Jamo" Unicode character block. 906 * @since 1.2 907 */ 908 public static final UnicodeBlock HANGUL_JAMO = 909 new UnicodeBlock("HANGUL_JAMO", 910 "HANGUL JAMO", 911 "HANGULJAMO"); 912 913 /** 914 * Constant for the "Latin Extended Additional" Unicode character block. 915 * @since 1.2 916 */ 917 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 918 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 919 "LATIN EXTENDED ADDITIONAL", 920 "LATINEXTENDEDADDITIONAL"); 921 922 /** 923 * Constant for the "Greek Extended" Unicode character block. 924 * @since 1.2 925 */ 926 public static final UnicodeBlock GREEK_EXTENDED = 927 new UnicodeBlock("GREEK_EXTENDED", 928 "GREEK EXTENDED", 929 "GREEKEXTENDED"); 930 931 /** 932 * Constant for the "General Punctuation" Unicode character block. 933 * @since 1.2 934 */ 935 public static final UnicodeBlock GENERAL_PUNCTUATION = 936 new UnicodeBlock("GENERAL_PUNCTUATION", 937 "GENERAL PUNCTUATION", 938 "GENERALPUNCTUATION"); 939 940 /** 941 * Constant for the "Superscripts and Subscripts" Unicode character 942 * block. 943 * @since 1.2 944 */ 945 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 946 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 947 "SUPERSCRIPTS AND SUBSCRIPTS", 948 "SUPERSCRIPTSANDSUBSCRIPTS"); 949 950 /** 951 * Constant for the "Currency Symbols" Unicode character block. 952 * @since 1.2 953 */ 954 public static final UnicodeBlock CURRENCY_SYMBOLS = 955 new UnicodeBlock("CURRENCY_SYMBOLS", 956 "CURRENCY SYMBOLS", 957 "CURRENCYSYMBOLS"); 958 959 /** 960 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 961 * character block. 962 * <p> 963 * This block was previously known as "Combining Marks for Symbols". 964 * @since 1.2 965 */ 966 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 967 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 968 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 969 "COMBININGDIACRITICALMARKSFORSYMBOLS", 970 "COMBINING MARKS FOR SYMBOLS", 971 "COMBININGMARKSFORSYMBOLS"); 972 973 /** 974 * Constant for the "Letterlike Symbols" Unicode character block. 975 * @since 1.2 976 */ 977 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 978 new UnicodeBlock("LETTERLIKE_SYMBOLS", 979 "LETTERLIKE SYMBOLS", 980 "LETTERLIKESYMBOLS"); 981 982 /** 983 * Constant for the "Number Forms" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock NUMBER_FORMS = 987 new UnicodeBlock("NUMBER_FORMS", 988 "NUMBER FORMS", 989 "NUMBERFORMS"); 990 991 /** 992 * Constant for the "Arrows" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock ARROWS = 996 new UnicodeBlock("ARROWS"); 997 998 /** 999 * Constant for the "Mathematical Operators" Unicode character block. 1000 * @since 1.2 1001 */ 1002 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1003 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1004 "MATHEMATICAL OPERATORS", 1005 "MATHEMATICALOPERATORS"); 1006 1007 /** 1008 * Constant for the "Miscellaneous Technical" Unicode character block. 1009 * @since 1.2 1010 */ 1011 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1012 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1013 "MISCELLANEOUS TECHNICAL", 1014 "MISCELLANEOUSTECHNICAL"); 1015 1016 /** 1017 * Constant for the "Control Pictures" Unicode character block. 1018 * @since 1.2 1019 */ 1020 public static final UnicodeBlock CONTROL_PICTURES = 1021 new UnicodeBlock("CONTROL_PICTURES", 1022 "CONTROL PICTURES", 1023 "CONTROLPICTURES"); 1024 1025 /** 1026 * Constant for the "Optical Character Recognition" Unicode character block. 1027 * @since 1.2 1028 */ 1029 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1030 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1031 "OPTICAL CHARACTER RECOGNITION", 1032 "OPTICALCHARACTERRECOGNITION"); 1033 1034 /** 1035 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1036 * @since 1.2 1037 */ 1038 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1039 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1040 "ENCLOSED ALPHANUMERICS", 1041 "ENCLOSEDALPHANUMERICS"); 1042 1043 /** 1044 * Constant for the "Box Drawing" Unicode character block. 1045 * @since 1.2 1046 */ 1047 public static final UnicodeBlock BOX_DRAWING = 1048 new UnicodeBlock("BOX_DRAWING", 1049 "BOX DRAWING", 1050 "BOXDRAWING"); 1051 1052 /** 1053 * Constant for the "Block Elements" Unicode character block. 1054 * @since 1.2 1055 */ 1056 public static final UnicodeBlock BLOCK_ELEMENTS = 1057 new UnicodeBlock("BLOCK_ELEMENTS", 1058 "BLOCK ELEMENTS", 1059 "BLOCKELEMENTS"); 1060 1061 /** 1062 * Constant for the "Geometric Shapes" Unicode character block. 1063 * @since 1.2 1064 */ 1065 public static final UnicodeBlock GEOMETRIC_SHAPES = 1066 new UnicodeBlock("GEOMETRIC_SHAPES", 1067 "GEOMETRIC SHAPES", 1068 "GEOMETRICSHAPES"); 1069 1070 /** 1071 * Constant for the "Miscellaneous Symbols" Unicode character block. 1072 * @since 1.2 1073 */ 1074 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1075 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1076 "MISCELLANEOUS SYMBOLS", 1077 "MISCELLANEOUSSYMBOLS"); 1078 1079 /** 1080 * Constant for the "Dingbats" Unicode character block. 1081 * @since 1.2 1082 */ 1083 public static final UnicodeBlock DINGBATS = 1084 new UnicodeBlock("DINGBATS"); 1085 1086 /** 1087 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1088 * @since 1.2 1089 */ 1090 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1091 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1092 "CJK SYMBOLS AND PUNCTUATION", 1093 "CJKSYMBOLSANDPUNCTUATION"); 1094 1095 /** 1096 * Constant for the "Hiragana" Unicode character block. 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock HIRAGANA = 1100 new UnicodeBlock("HIRAGANA"); 1101 1102 /** 1103 * Constant for the "Katakana" Unicode character block. 1104 * @since 1.2 1105 */ 1106 public static final UnicodeBlock KATAKANA = 1107 new UnicodeBlock("KATAKANA"); 1108 1109 /** 1110 * Constant for the "Bopomofo" Unicode character block. 1111 * @since 1.2 1112 */ 1113 public static final UnicodeBlock BOPOMOFO = 1114 new UnicodeBlock("BOPOMOFO"); 1115 1116 /** 1117 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1118 * @since 1.2 1119 */ 1120 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1121 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1122 "HANGUL COMPATIBILITY JAMO", 1123 "HANGULCOMPATIBILITYJAMO"); 1124 1125 /** 1126 * Constant for the "Kanbun" Unicode character block. 1127 * @since 1.2 1128 */ 1129 public static final UnicodeBlock KANBUN = 1130 new UnicodeBlock("KANBUN"); 1131 1132 /** 1133 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1134 * @since 1.2 1135 */ 1136 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1137 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1138 "ENCLOSED CJK LETTERS AND MONTHS", 1139 "ENCLOSEDCJKLETTERSANDMONTHS"); 1140 1141 /** 1142 * Constant for the "CJK Compatibility" Unicode character block. 1143 * @since 1.2 1144 */ 1145 public static final UnicodeBlock CJK_COMPATIBILITY = 1146 new UnicodeBlock("CJK_COMPATIBILITY", 1147 "CJK COMPATIBILITY", 1148 "CJKCOMPATIBILITY"); 1149 1150 /** 1151 * Constant for the "CJK Unified Ideographs" Unicode character block. 1152 * @since 1.2 1153 */ 1154 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1155 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1156 "CJK UNIFIED IDEOGRAPHS", 1157 "CJKUNIFIEDIDEOGRAPHS"); 1158 1159 /** 1160 * Constant for the "Hangul Syllables" Unicode character block. 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock HANGUL_SYLLABLES = 1164 new UnicodeBlock("HANGUL_SYLLABLES", 1165 "HANGUL SYLLABLES", 1166 "HANGULSYLLABLES"); 1167 1168 /** 1169 * Constant for the "Private Use Area" Unicode character block. 1170 * @since 1.2 1171 */ 1172 public static final UnicodeBlock PRIVATE_USE_AREA = 1173 new UnicodeBlock("PRIVATE_USE_AREA", 1174 "PRIVATE USE AREA", 1175 "PRIVATEUSEAREA"); 1176 1177 /** 1178 * Constant for the "CJK Compatibility Ideographs" Unicode character 1179 * block. 1180 * @since 1.2 1181 */ 1182 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1183 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1184 "CJK COMPATIBILITY IDEOGRAPHS", 1185 "CJKCOMPATIBILITYIDEOGRAPHS"); 1186 1187 /** 1188 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1192 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1193 "ALPHABETIC PRESENTATION FORMS", 1194 "ALPHABETICPRESENTATIONFORMS"); 1195 1196 /** 1197 * Constant for the "Arabic Presentation Forms-A" Unicode character 1198 * block. 1199 * @since 1.2 1200 */ 1201 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1202 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1203 "ARABIC PRESENTATION FORMS-A", 1204 "ARABICPRESENTATIONFORMS-A"); 1205 1206 /** 1207 * Constant for the "Combining Half Marks" Unicode character block. 1208 * @since 1.2 1209 */ 1210 public static final UnicodeBlock COMBINING_HALF_MARKS = 1211 new UnicodeBlock("COMBINING_HALF_MARKS", 1212 "COMBINING HALF MARKS", 1213 "COMBININGHALFMARKS"); 1214 1215 /** 1216 * Constant for the "CJK Compatibility Forms" Unicode character block. 1217 * @since 1.2 1218 */ 1219 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1220 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1221 "CJK COMPATIBILITY FORMS", 1222 "CJKCOMPATIBILITYFORMS"); 1223 1224 /** 1225 * Constant for the "Small Form Variants" Unicode character block. 1226 * @since 1.2 1227 */ 1228 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1229 new UnicodeBlock("SMALL_FORM_VARIANTS", 1230 "SMALL FORM VARIANTS", 1231 "SMALLFORMVARIANTS"); 1232 1233 /** 1234 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1235 * @since 1.2 1236 */ 1237 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1238 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1239 "ARABIC PRESENTATION FORMS-B", 1240 "ARABICPRESENTATIONFORMS-B"); 1241 1242 /** 1243 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1244 * block. 1245 * @since 1.2 1246 */ 1247 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1248 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1249 "HALFWIDTH AND FULLWIDTH FORMS", 1250 "HALFWIDTHANDFULLWIDTHFORMS"); 1251 1252 /** 1253 * Constant for the "Specials" Unicode character block. 1254 * @since 1.2 1255 */ 1256 public static final UnicodeBlock SPECIALS = 1257 new UnicodeBlock("SPECIALS"); 1258 1259 /** 1260 * @deprecated 1261 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1262 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1263 * These constants match the block definitions of the Unicode Standard. 1264 * The {@link #of(char)} and {@link #of(int)} methods return the 1265 * standard constants. 1266 */ 1267 @Deprecated(since="1.5") 1268 public static final UnicodeBlock SURROGATES_AREA = 1269 new UnicodeBlock("SURROGATES_AREA"); 1270 1271 /** 1272 * Constant for the "Syriac" Unicode character block. 1273 * @since 1.4 1274 */ 1275 public static final UnicodeBlock SYRIAC = 1276 new UnicodeBlock("SYRIAC"); 1277 1278 /** 1279 * Constant for the "Thaana" Unicode character block. 1280 * @since 1.4 1281 */ 1282 public static final UnicodeBlock THAANA = 1283 new UnicodeBlock("THAANA"); 1284 1285 /** 1286 * Constant for the "Sinhala" Unicode character block. 1287 * @since 1.4 1288 */ 1289 public static final UnicodeBlock SINHALA = 1290 new UnicodeBlock("SINHALA"); 1291 1292 /** 1293 * Constant for the "Myanmar" Unicode character block. 1294 * @since 1.4 1295 */ 1296 public static final UnicodeBlock MYANMAR = 1297 new UnicodeBlock("MYANMAR"); 1298 1299 /** 1300 * Constant for the "Ethiopic" Unicode character block. 1301 * @since 1.4 1302 */ 1303 public static final UnicodeBlock ETHIOPIC = 1304 new UnicodeBlock("ETHIOPIC"); 1305 1306 /** 1307 * Constant for the "Cherokee" Unicode character block. 1308 * @since 1.4 1309 */ 1310 public static final UnicodeBlock CHEROKEE = 1311 new UnicodeBlock("CHEROKEE"); 1312 1313 /** 1314 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1315 * @since 1.4 1316 */ 1317 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1318 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1319 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1320 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1321 1322 /** 1323 * Constant for the "Ogham" Unicode character block. 1324 * @since 1.4 1325 */ 1326 public static final UnicodeBlock OGHAM = 1327 new UnicodeBlock("OGHAM"); 1328 1329 /** 1330 * Constant for the "Runic" Unicode character block. 1331 * @since 1.4 1332 */ 1333 public static final UnicodeBlock RUNIC = 1334 new UnicodeBlock("RUNIC"); 1335 1336 /** 1337 * Constant for the "Khmer" Unicode character block. 1338 * @since 1.4 1339 */ 1340 public static final UnicodeBlock KHMER = 1341 new UnicodeBlock("KHMER"); 1342 1343 /** 1344 * Constant for the "Mongolian" Unicode character block. 1345 * @since 1.4 1346 */ 1347 public static final UnicodeBlock MONGOLIAN = 1348 new UnicodeBlock("MONGOLIAN"); 1349 1350 /** 1351 * Constant for the "Braille Patterns" Unicode character block. 1352 * @since 1.4 1353 */ 1354 public static final UnicodeBlock BRAILLE_PATTERNS = 1355 new UnicodeBlock("BRAILLE_PATTERNS", 1356 "BRAILLE PATTERNS", 1357 "BRAILLEPATTERNS"); 1358 1359 /** 1360 * Constant for the "CJK Radicals Supplement" Unicode character block. 1361 * @since 1.4 1362 */ 1363 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1364 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1365 "CJK RADICALS SUPPLEMENT", 1366 "CJKRADICALSSUPPLEMENT"); 1367 1368 /** 1369 * Constant for the "Kangxi Radicals" Unicode character block. 1370 * @since 1.4 1371 */ 1372 public static final UnicodeBlock KANGXI_RADICALS = 1373 new UnicodeBlock("KANGXI_RADICALS", 1374 "KANGXI RADICALS", 1375 "KANGXIRADICALS"); 1376 1377 /** 1378 * Constant for the "Ideographic Description Characters" Unicode character block. 1379 * @since 1.4 1380 */ 1381 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1382 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1383 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1384 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1385 1386 /** 1387 * Constant for the "Bopomofo Extended" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1391 new UnicodeBlock("BOPOMOFO_EXTENDED", 1392 "BOPOMOFO EXTENDED", 1393 "BOPOMOFOEXTENDED"); 1394 1395 /** 1396 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1397 * @since 1.4 1398 */ 1399 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1400 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1401 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1402 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1403 1404 /** 1405 * Constant for the "Yi Syllables" Unicode character block. 1406 * @since 1.4 1407 */ 1408 public static final UnicodeBlock YI_SYLLABLES = 1409 new UnicodeBlock("YI_SYLLABLES", 1410 "YI SYLLABLES", 1411 "YISYLLABLES"); 1412 1413 /** 1414 * Constant for the "Yi Radicals" Unicode character block. 1415 * @since 1.4 1416 */ 1417 public static final UnicodeBlock YI_RADICALS = 1418 new UnicodeBlock("YI_RADICALS", 1419 "YI RADICALS", 1420 "YIRADICALS"); 1421 1422 /** 1423 * Constant for the "Cyrillic Supplement" Unicode character block. 1424 * This block was previously known as the "Cyrillic Supplementary" block. 1425 * @since 1.5 1426 */ 1427 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1428 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1429 "CYRILLIC SUPPLEMENTARY", 1430 "CYRILLICSUPPLEMENTARY", 1431 "CYRILLIC SUPPLEMENT", 1432 "CYRILLICSUPPLEMENT"); 1433 1434 /** 1435 * Constant for the "Tagalog" Unicode character block. 1436 * @since 1.5 1437 */ 1438 public static final UnicodeBlock TAGALOG = 1439 new UnicodeBlock("TAGALOG"); 1440 1441 /** 1442 * Constant for the "Hanunoo" Unicode character block. 1443 * @since 1.5 1444 */ 1445 public static final UnicodeBlock HANUNOO = 1446 new UnicodeBlock("HANUNOO"); 1447 1448 /** 1449 * Constant for the "Buhid" Unicode character block. 1450 * @since 1.5 1451 */ 1452 public static final UnicodeBlock BUHID = 1453 new UnicodeBlock("BUHID"); 1454 1455 /** 1456 * Constant for the "Tagbanwa" Unicode character block. 1457 * @since 1.5 1458 */ 1459 public static final UnicodeBlock TAGBANWA = 1460 new UnicodeBlock("TAGBANWA"); 1461 1462 /** 1463 * Constant for the "Limbu" Unicode character block. 1464 * @since 1.5 1465 */ 1466 public static final UnicodeBlock LIMBU = 1467 new UnicodeBlock("LIMBU"); 1468 1469 /** 1470 * Constant for the "Tai Le" Unicode character block. 1471 * @since 1.5 1472 */ 1473 public static final UnicodeBlock TAI_LE = 1474 new UnicodeBlock("TAI_LE", 1475 "TAI LE", 1476 "TAILE"); 1477 1478 /** 1479 * Constant for the "Khmer Symbols" Unicode character block. 1480 * @since 1.5 1481 */ 1482 public static final UnicodeBlock KHMER_SYMBOLS = 1483 new UnicodeBlock("KHMER_SYMBOLS", 1484 "KHMER SYMBOLS", 1485 "KHMERSYMBOLS"); 1486 1487 /** 1488 * Constant for the "Phonetic Extensions" Unicode character block. 1489 * @since 1.5 1490 */ 1491 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1492 new UnicodeBlock("PHONETIC_EXTENSIONS", 1493 "PHONETIC EXTENSIONS", 1494 "PHONETICEXTENSIONS"); 1495 1496 /** 1497 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1498 * @since 1.5 1499 */ 1500 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1501 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1502 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1503 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1504 1505 /** 1506 * Constant for the "Supplemental Arrows-A" Unicode character block. 1507 * @since 1.5 1508 */ 1509 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1510 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1511 "SUPPLEMENTAL ARROWS-A", 1512 "SUPPLEMENTALARROWS-A"); 1513 1514 /** 1515 * Constant for the "Supplemental Arrows-B" Unicode character block. 1516 * @since 1.5 1517 */ 1518 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1519 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1520 "SUPPLEMENTAL ARROWS-B", 1521 "SUPPLEMENTALARROWS-B"); 1522 1523 /** 1524 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1525 * character block. 1526 * @since 1.5 1527 */ 1528 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1529 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1530 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1531 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1532 1533 /** 1534 * Constant for the "Supplemental Mathematical Operators" Unicode 1535 * character block. 1536 * @since 1.5 1537 */ 1538 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1539 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1540 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1541 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1542 1543 /** 1544 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1545 * block. 1546 * @since 1.5 1547 */ 1548 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1549 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1550 "MISCELLANEOUS SYMBOLS AND ARROWS", 1551 "MISCELLANEOUSSYMBOLSANDARROWS"); 1552 1553 /** 1554 * Constant for the "Katakana Phonetic Extensions" Unicode character 1555 * block. 1556 * @since 1.5 1557 */ 1558 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1559 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1560 "KATAKANA PHONETIC EXTENSIONS", 1561 "KATAKANAPHONETICEXTENSIONS"); 1562 1563 /** 1564 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1565 * @since 1.5 1566 */ 1567 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1568 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1569 "YIJING HEXAGRAM SYMBOLS", 1570 "YIJINGHEXAGRAMSYMBOLS"); 1571 1572 /** 1573 * Constant for the "Variation Selectors" Unicode character block. 1574 * @since 1.5 1575 */ 1576 public static final UnicodeBlock VARIATION_SELECTORS = 1577 new UnicodeBlock("VARIATION_SELECTORS", 1578 "VARIATION SELECTORS", 1579 "VARIATIONSELECTORS"); 1580 1581 /** 1582 * Constant for the "Linear B Syllabary" Unicode character block. 1583 * @since 1.5 1584 */ 1585 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1586 new UnicodeBlock("LINEAR_B_SYLLABARY", 1587 "LINEAR B SYLLABARY", 1588 "LINEARBSYLLABARY"); 1589 1590 /** 1591 * Constant for the "Linear B Ideograms" Unicode character block. 1592 * @since 1.5 1593 */ 1594 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1595 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1596 "LINEAR B IDEOGRAMS", 1597 "LINEARBIDEOGRAMS"); 1598 1599 /** 1600 * Constant for the "Aegean Numbers" Unicode character block. 1601 * @since 1.5 1602 */ 1603 public static final UnicodeBlock AEGEAN_NUMBERS = 1604 new UnicodeBlock("AEGEAN_NUMBERS", 1605 "AEGEAN NUMBERS", 1606 "AEGEANNUMBERS"); 1607 1608 /** 1609 * Constant for the "Old Italic" Unicode character block. 1610 * @since 1.5 1611 */ 1612 public static final UnicodeBlock OLD_ITALIC = 1613 new UnicodeBlock("OLD_ITALIC", 1614 "OLD ITALIC", 1615 "OLDITALIC"); 1616 1617 /** 1618 * Constant for the "Gothic" Unicode character block. 1619 * @since 1.5 1620 */ 1621 public static final UnicodeBlock GOTHIC = 1622 new UnicodeBlock("GOTHIC"); 1623 1624 /** 1625 * Constant for the "Ugaritic" Unicode character block. 1626 * @since 1.5 1627 */ 1628 public static final UnicodeBlock UGARITIC = 1629 new UnicodeBlock("UGARITIC"); 1630 1631 /** 1632 * Constant for the "Deseret" Unicode character block. 1633 * @since 1.5 1634 */ 1635 public static final UnicodeBlock DESERET = 1636 new UnicodeBlock("DESERET"); 1637 1638 /** 1639 * Constant for the "Shavian" Unicode character block. 1640 * @since 1.5 1641 */ 1642 public static final UnicodeBlock SHAVIAN = 1643 new UnicodeBlock("SHAVIAN"); 1644 1645 /** 1646 * Constant for the "Osmanya" Unicode character block. 1647 * @since 1.5 1648 */ 1649 public static final UnicodeBlock OSMANYA = 1650 new UnicodeBlock("OSMANYA"); 1651 1652 /** 1653 * Constant for the "Cypriot Syllabary" Unicode character block. 1654 * @since 1.5 1655 */ 1656 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1657 new UnicodeBlock("CYPRIOT_SYLLABARY", 1658 "CYPRIOT SYLLABARY", 1659 "CYPRIOTSYLLABARY"); 1660 1661 /** 1662 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1663 * @since 1.5 1664 */ 1665 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1666 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1667 "BYZANTINE MUSICAL SYMBOLS", 1668 "BYZANTINEMUSICALSYMBOLS"); 1669 1670 /** 1671 * Constant for the "Musical Symbols" Unicode character block. 1672 * @since 1.5 1673 */ 1674 public static final UnicodeBlock MUSICAL_SYMBOLS = 1675 new UnicodeBlock("MUSICAL_SYMBOLS", 1676 "MUSICAL SYMBOLS", 1677 "MUSICALSYMBOLS"); 1678 1679 /** 1680 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1681 * @since 1.5 1682 */ 1683 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1684 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1685 "TAI XUAN JING SYMBOLS", 1686 "TAIXUANJINGSYMBOLS"); 1687 1688 /** 1689 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1690 * character block. 1691 * @since 1.5 1692 */ 1693 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1694 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1695 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1696 "MATHEMATICALALPHANUMERICSYMBOLS"); 1697 1698 /** 1699 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1700 * character block. 1701 * @since 1.5 1702 */ 1703 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1704 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1705 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1706 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1707 1708 /** 1709 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1710 * @since 1.5 1711 */ 1712 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1713 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1714 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1715 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1716 1717 /** 1718 * Constant for the "Tags" Unicode character block. 1719 * @since 1.5 1720 */ 1721 public static final UnicodeBlock TAGS = 1722 new UnicodeBlock("TAGS"); 1723 1724 /** 1725 * Constant for the "Variation Selectors Supplement" Unicode character 1726 * block. 1727 * @since 1.5 1728 */ 1729 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1730 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1731 "VARIATION SELECTORS SUPPLEMENT", 1732 "VARIATIONSELECTORSSUPPLEMENT"); 1733 1734 /** 1735 * Constant for the "Supplementary Private Use Area-A" Unicode character 1736 * block. 1737 * @since 1.5 1738 */ 1739 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1740 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1741 "SUPPLEMENTARY PRIVATE USE AREA-A", 1742 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1743 1744 /** 1745 * Constant for the "Supplementary Private Use Area-B" Unicode character 1746 * block. 1747 * @since 1.5 1748 */ 1749 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1750 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1751 "SUPPLEMENTARY PRIVATE USE AREA-B", 1752 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1753 1754 /** 1755 * Constant for the "High Surrogates" Unicode character block. 1756 * This block represents codepoint values in the high surrogate 1757 * range: U+D800 through U+DB7F 1758 * 1759 * @since 1.5 1760 */ 1761 public static final UnicodeBlock HIGH_SURROGATES = 1762 new UnicodeBlock("HIGH_SURROGATES", 1763 "HIGH SURROGATES", 1764 "HIGHSURROGATES"); 1765 1766 /** 1767 * Constant for the "High Private Use Surrogates" Unicode character 1768 * block. 1769 * This block represents codepoint values in the private use high 1770 * surrogate range: U+DB80 through U+DBFF 1771 * 1772 * @since 1.5 1773 */ 1774 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1775 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1776 "HIGH PRIVATE USE SURROGATES", 1777 "HIGHPRIVATEUSESURROGATES"); 1778 1779 /** 1780 * Constant for the "Low Surrogates" Unicode character block. 1781 * This block represents codepoint values in the low surrogate 1782 * range: U+DC00 through U+DFFF 1783 * 1784 * @since 1.5 1785 */ 1786 public static final UnicodeBlock LOW_SURROGATES = 1787 new UnicodeBlock("LOW_SURROGATES", 1788 "LOW SURROGATES", 1789 "LOWSURROGATES"); 1790 1791 /** 1792 * Constant for the "Arabic Supplement" Unicode character block. 1793 * @since 1.7 1794 */ 1795 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1796 new UnicodeBlock("ARABIC_SUPPLEMENT", 1797 "ARABIC SUPPLEMENT", 1798 "ARABICSUPPLEMENT"); 1799 1800 /** 1801 * Constant for the "NKo" Unicode character block. 1802 * @since 1.7 1803 */ 1804 public static final UnicodeBlock NKO = 1805 new UnicodeBlock("NKO"); 1806 1807 /** 1808 * Constant for the "Samaritan" Unicode character block. 1809 * @since 1.7 1810 */ 1811 public static final UnicodeBlock SAMARITAN = 1812 new UnicodeBlock("SAMARITAN"); 1813 1814 /** 1815 * Constant for the "Mandaic" Unicode character block. 1816 * @since 1.7 1817 */ 1818 public static final UnicodeBlock MANDAIC = 1819 new UnicodeBlock("MANDAIC"); 1820 1821 /** 1822 * Constant for the "Ethiopic Supplement" Unicode character block. 1823 * @since 1.7 1824 */ 1825 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1826 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1827 "ETHIOPIC SUPPLEMENT", 1828 "ETHIOPICSUPPLEMENT"); 1829 1830 /** 1831 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1832 * Unicode character block. 1833 * @since 1.7 1834 */ 1835 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1836 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1837 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1838 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1839 1840 /** 1841 * Constant for the "New Tai Lue" Unicode character block. 1842 * @since 1.7 1843 */ 1844 public static final UnicodeBlock NEW_TAI_LUE = 1845 new UnicodeBlock("NEW_TAI_LUE", 1846 "NEW TAI LUE", 1847 "NEWTAILUE"); 1848 1849 /** 1850 * Constant for the "Buginese" Unicode character block. 1851 * @since 1.7 1852 */ 1853 public static final UnicodeBlock BUGINESE = 1854 new UnicodeBlock("BUGINESE"); 1855 1856 /** 1857 * Constant for the "Tai Tham" Unicode character block. 1858 * @since 1.7 1859 */ 1860 public static final UnicodeBlock TAI_THAM = 1861 new UnicodeBlock("TAI_THAM", 1862 "TAI THAM", 1863 "TAITHAM"); 1864 1865 /** 1866 * Constant for the "Balinese" Unicode character block. 1867 * @since 1.7 1868 */ 1869 public static final UnicodeBlock BALINESE = 1870 new UnicodeBlock("BALINESE"); 1871 1872 /** 1873 * Constant for the "Sundanese" Unicode character block. 1874 * @since 1.7 1875 */ 1876 public static final UnicodeBlock SUNDANESE = 1877 new UnicodeBlock("SUNDANESE"); 1878 1879 /** 1880 * Constant for the "Batak" Unicode character block. 1881 * @since 1.7 1882 */ 1883 public static final UnicodeBlock BATAK = 1884 new UnicodeBlock("BATAK"); 1885 1886 /** 1887 * Constant for the "Lepcha" Unicode character block. 1888 * @since 1.7 1889 */ 1890 public static final UnicodeBlock LEPCHA = 1891 new UnicodeBlock("LEPCHA"); 1892 1893 /** 1894 * Constant for the "Ol Chiki" Unicode character block. 1895 * @since 1.7 1896 */ 1897 public static final UnicodeBlock OL_CHIKI = 1898 new UnicodeBlock("OL_CHIKI", 1899 "OL CHIKI", 1900 "OLCHIKI"); 1901 1902 /** 1903 * Constant for the "Vedic Extensions" Unicode character block. 1904 * @since 1.7 1905 */ 1906 public static final UnicodeBlock VEDIC_EXTENSIONS = 1907 new UnicodeBlock("VEDIC_EXTENSIONS", 1908 "VEDIC EXTENSIONS", 1909 "VEDICEXTENSIONS"); 1910 1911 /** 1912 * Constant for the "Phonetic Extensions Supplement" Unicode character 1913 * block. 1914 * @since 1.7 1915 */ 1916 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1917 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1918 "PHONETIC EXTENSIONS SUPPLEMENT", 1919 "PHONETICEXTENSIONSSUPPLEMENT"); 1920 1921 /** 1922 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1923 * character block. 1924 * @since 1.7 1925 */ 1926 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1927 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1928 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1929 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1930 1931 /** 1932 * Constant for the "Glagolitic" Unicode character block. 1933 * @since 1.7 1934 */ 1935 public static final UnicodeBlock GLAGOLITIC = 1936 new UnicodeBlock("GLAGOLITIC"); 1937 1938 /** 1939 * Constant for the "Latin Extended-C" Unicode character block. 1940 * @since 1.7 1941 */ 1942 public static final UnicodeBlock LATIN_EXTENDED_C = 1943 new UnicodeBlock("LATIN_EXTENDED_C", 1944 "LATIN EXTENDED-C", 1945 "LATINEXTENDED-C"); 1946 1947 /** 1948 * Constant for the "Coptic" Unicode character block. 1949 * @since 1.7 1950 */ 1951 public static final UnicodeBlock COPTIC = 1952 new UnicodeBlock("COPTIC"); 1953 1954 /** 1955 * Constant for the "Georgian Supplement" Unicode character block. 1956 * @since 1.7 1957 */ 1958 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1959 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1960 "GEORGIAN SUPPLEMENT", 1961 "GEORGIANSUPPLEMENT"); 1962 1963 /** 1964 * Constant for the "Tifinagh" Unicode character block. 1965 * @since 1.7 1966 */ 1967 public static final UnicodeBlock TIFINAGH = 1968 new UnicodeBlock("TIFINAGH"); 1969 1970 /** 1971 * Constant for the "Ethiopic Extended" Unicode character block. 1972 * @since 1.7 1973 */ 1974 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1975 new UnicodeBlock("ETHIOPIC_EXTENDED", 1976 "ETHIOPIC EXTENDED", 1977 "ETHIOPICEXTENDED"); 1978 1979 /** 1980 * Constant for the "Cyrillic Extended-A" Unicode character block. 1981 * @since 1.7 1982 */ 1983 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1984 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1985 "CYRILLIC EXTENDED-A", 1986 "CYRILLICEXTENDED-A"); 1987 1988 /** 1989 * Constant for the "Supplemental Punctuation" Unicode character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1993 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1994 "SUPPLEMENTAL PUNCTUATION", 1995 "SUPPLEMENTALPUNCTUATION"); 1996 1997 /** 1998 * Constant for the "CJK Strokes" Unicode character block. 1999 * @since 1.7 2000 */ 2001 public static final UnicodeBlock CJK_STROKES = 2002 new UnicodeBlock("CJK_STROKES", 2003 "CJK STROKES", 2004 "CJKSTROKES"); 2005 2006 /** 2007 * Constant for the "Lisu" Unicode character block. 2008 * @since 1.7 2009 */ 2010 public static final UnicodeBlock LISU = 2011 new UnicodeBlock("LISU"); 2012 2013 /** 2014 * Constant for the "Vai" Unicode character block. 2015 * @since 1.7 2016 */ 2017 public static final UnicodeBlock VAI = 2018 new UnicodeBlock("VAI"); 2019 2020 /** 2021 * Constant for the "Cyrillic Extended-B" Unicode character block. 2022 * @since 1.7 2023 */ 2024 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2025 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2026 "CYRILLIC EXTENDED-B", 2027 "CYRILLICEXTENDED-B"); 2028 2029 /** 2030 * Constant for the "Bamum" Unicode character block. 2031 * @since 1.7 2032 */ 2033 public static final UnicodeBlock BAMUM = 2034 new UnicodeBlock("BAMUM"); 2035 2036 /** 2037 * Constant for the "Modifier Tone Letters" Unicode character block. 2038 * @since 1.7 2039 */ 2040 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2041 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2042 "MODIFIER TONE LETTERS", 2043 "MODIFIERTONELETTERS"); 2044 2045 /** 2046 * Constant for the "Latin Extended-D" Unicode character block. 2047 * @since 1.7 2048 */ 2049 public static final UnicodeBlock LATIN_EXTENDED_D = 2050 new UnicodeBlock("LATIN_EXTENDED_D", 2051 "LATIN EXTENDED-D", 2052 "LATINEXTENDED-D"); 2053 2054 /** 2055 * Constant for the "Syloti Nagri" Unicode character block. 2056 * @since 1.7 2057 */ 2058 public static final UnicodeBlock SYLOTI_NAGRI = 2059 new UnicodeBlock("SYLOTI_NAGRI", 2060 "SYLOTI NAGRI", 2061 "SYLOTINAGRI"); 2062 2063 /** 2064 * Constant for the "Common Indic Number Forms" Unicode character block. 2065 * @since 1.7 2066 */ 2067 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2068 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2069 "COMMON INDIC NUMBER FORMS", 2070 "COMMONINDICNUMBERFORMS"); 2071 2072 /** 2073 * Constant for the "Phags-pa" Unicode character block. 2074 * @since 1.7 2075 */ 2076 public static final UnicodeBlock PHAGS_PA = 2077 new UnicodeBlock("PHAGS_PA", 2078 "PHAGS-PA"); 2079 2080 /** 2081 * Constant for the "Saurashtra" Unicode character block. 2082 * @since 1.7 2083 */ 2084 public static final UnicodeBlock SAURASHTRA = 2085 new UnicodeBlock("SAURASHTRA"); 2086 2087 /** 2088 * Constant for the "Devanagari Extended" Unicode character block. 2089 * @since 1.7 2090 */ 2091 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2092 new UnicodeBlock("DEVANAGARI_EXTENDED", 2093 "DEVANAGARI EXTENDED", 2094 "DEVANAGARIEXTENDED"); 2095 2096 /** 2097 * Constant for the "Kayah Li" Unicode character block. 2098 * @since 1.7 2099 */ 2100 public static final UnicodeBlock KAYAH_LI = 2101 new UnicodeBlock("KAYAH_LI", 2102 "KAYAH LI", 2103 "KAYAHLI"); 2104 2105 /** 2106 * Constant for the "Rejang" Unicode character block. 2107 * @since 1.7 2108 */ 2109 public static final UnicodeBlock REJANG = 2110 new UnicodeBlock("REJANG"); 2111 2112 /** 2113 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2114 * @since 1.7 2115 */ 2116 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2117 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2118 "HANGUL JAMO EXTENDED-A", 2119 "HANGULJAMOEXTENDED-A"); 2120 2121 /** 2122 * Constant for the "Javanese" Unicode character block. 2123 * @since 1.7 2124 */ 2125 public static final UnicodeBlock JAVANESE = 2126 new UnicodeBlock("JAVANESE"); 2127 2128 /** 2129 * Constant for the "Cham" Unicode character block. 2130 * @since 1.7 2131 */ 2132 public static final UnicodeBlock CHAM = 2133 new UnicodeBlock("CHAM"); 2134 2135 /** 2136 * Constant for the "Myanmar Extended-A" Unicode character block. 2137 * @since 1.7 2138 */ 2139 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2140 new UnicodeBlock("MYANMAR_EXTENDED_A", 2141 "MYANMAR EXTENDED-A", 2142 "MYANMAREXTENDED-A"); 2143 2144 /** 2145 * Constant for the "Tai Viet" Unicode character block. 2146 * @since 1.7 2147 */ 2148 public static final UnicodeBlock TAI_VIET = 2149 new UnicodeBlock("TAI_VIET", 2150 "TAI VIET", 2151 "TAIVIET"); 2152 2153 /** 2154 * Constant for the "Ethiopic Extended-A" Unicode character block. 2155 * @since 1.7 2156 */ 2157 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2158 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2159 "ETHIOPIC EXTENDED-A", 2160 "ETHIOPICEXTENDED-A"); 2161 2162 /** 2163 * Constant for the "Meetei Mayek" Unicode character block. 2164 * @since 1.7 2165 */ 2166 public static final UnicodeBlock MEETEI_MAYEK = 2167 new UnicodeBlock("MEETEI_MAYEK", 2168 "MEETEI MAYEK", 2169 "MEETEIMAYEK"); 2170 2171 /** 2172 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2173 * @since 1.7 2174 */ 2175 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2176 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2177 "HANGUL JAMO EXTENDED-B", 2178 "HANGULJAMOEXTENDED-B"); 2179 2180 /** 2181 * Constant for the "Vertical Forms" Unicode character block. 2182 * @since 1.7 2183 */ 2184 public static final UnicodeBlock VERTICAL_FORMS = 2185 new UnicodeBlock("VERTICAL_FORMS", 2186 "VERTICAL FORMS", 2187 "VERTICALFORMS"); 2188 2189 /** 2190 * Constant for the "Ancient Greek Numbers" Unicode character block. 2191 * @since 1.7 2192 */ 2193 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2194 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2195 "ANCIENT GREEK NUMBERS", 2196 "ANCIENTGREEKNUMBERS"); 2197 2198 /** 2199 * Constant for the "Ancient Symbols" Unicode character block. 2200 * @since 1.7 2201 */ 2202 public static final UnicodeBlock ANCIENT_SYMBOLS = 2203 new UnicodeBlock("ANCIENT_SYMBOLS", 2204 "ANCIENT SYMBOLS", 2205 "ANCIENTSYMBOLS"); 2206 2207 /** 2208 * Constant for the "Phaistos Disc" Unicode character block. 2209 * @since 1.7 2210 */ 2211 public static final UnicodeBlock PHAISTOS_DISC = 2212 new UnicodeBlock("PHAISTOS_DISC", 2213 "PHAISTOS DISC", 2214 "PHAISTOSDISC"); 2215 2216 /** 2217 * Constant for the "Lycian" Unicode character block. 2218 * @since 1.7 2219 */ 2220 public static final UnicodeBlock LYCIAN = 2221 new UnicodeBlock("LYCIAN"); 2222 2223 /** 2224 * Constant for the "Carian" Unicode character block. 2225 * @since 1.7 2226 */ 2227 public static final UnicodeBlock CARIAN = 2228 new UnicodeBlock("CARIAN"); 2229 2230 /** 2231 * Constant for the "Old Persian" Unicode character block. 2232 * @since 1.7 2233 */ 2234 public static final UnicodeBlock OLD_PERSIAN = 2235 new UnicodeBlock("OLD_PERSIAN", 2236 "OLD PERSIAN", 2237 "OLDPERSIAN"); 2238 2239 /** 2240 * Constant for the "Imperial Aramaic" Unicode character block. 2241 * @since 1.7 2242 */ 2243 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2244 new UnicodeBlock("IMPERIAL_ARAMAIC", 2245 "IMPERIAL ARAMAIC", 2246 "IMPERIALARAMAIC"); 2247 2248 /** 2249 * Constant for the "Phoenician" Unicode character block. 2250 * @since 1.7 2251 */ 2252 public static final UnicodeBlock PHOENICIAN = 2253 new UnicodeBlock("PHOENICIAN"); 2254 2255 /** 2256 * Constant for the "Lydian" Unicode character block. 2257 * @since 1.7 2258 */ 2259 public static final UnicodeBlock LYDIAN = 2260 new UnicodeBlock("LYDIAN"); 2261 2262 /** 2263 * Constant for the "Kharoshthi" Unicode character block. 2264 * @since 1.7 2265 */ 2266 public static final UnicodeBlock KHAROSHTHI = 2267 new UnicodeBlock("KHAROSHTHI"); 2268 2269 /** 2270 * Constant for the "Old South Arabian" Unicode character block. 2271 * @since 1.7 2272 */ 2273 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2274 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2275 "OLD SOUTH ARABIAN", 2276 "OLDSOUTHARABIAN"); 2277 2278 /** 2279 * Constant for the "Avestan" Unicode character block. 2280 * @since 1.7 2281 */ 2282 public static final UnicodeBlock AVESTAN = 2283 new UnicodeBlock("AVESTAN"); 2284 2285 /** 2286 * Constant for the "Inscriptional Parthian" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2290 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2291 "INSCRIPTIONAL PARTHIAN", 2292 "INSCRIPTIONALPARTHIAN"); 2293 2294 /** 2295 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2296 * @since 1.7 2297 */ 2298 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2299 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2300 "INSCRIPTIONAL PAHLAVI", 2301 "INSCRIPTIONALPAHLAVI"); 2302 2303 /** 2304 * Constant for the "Old Turkic" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock OLD_TURKIC = 2308 new UnicodeBlock("OLD_TURKIC", 2309 "OLD TURKIC", 2310 "OLDTURKIC"); 2311 2312 /** 2313 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2314 * @since 1.7 2315 */ 2316 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2317 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2318 "RUMI NUMERAL SYMBOLS", 2319 "RUMINUMERALSYMBOLS"); 2320 2321 /** 2322 * Constant for the "Brahmi" Unicode character block. 2323 * @since 1.7 2324 */ 2325 public static final UnicodeBlock BRAHMI = 2326 new UnicodeBlock("BRAHMI"); 2327 2328 /** 2329 * Constant for the "Kaithi" Unicode character block. 2330 * @since 1.7 2331 */ 2332 public static final UnicodeBlock KAITHI = 2333 new UnicodeBlock("KAITHI"); 2334 2335 /** 2336 * Constant for the "Cuneiform" Unicode character block. 2337 * @since 1.7 2338 */ 2339 public static final UnicodeBlock CUNEIFORM = 2340 new UnicodeBlock("CUNEIFORM"); 2341 2342 /** 2343 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2344 * character block. 2345 * @since 1.7 2346 */ 2347 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2348 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2349 "CUNEIFORM NUMBERS AND PUNCTUATION", 2350 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2351 2352 /** 2353 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2354 * @since 1.7 2355 */ 2356 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2357 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2358 "EGYPTIAN HIEROGLYPHS", 2359 "EGYPTIANHIEROGLYPHS"); 2360 2361 /** 2362 * Constant for the "Bamum Supplement" Unicode character block. 2363 * @since 1.7 2364 */ 2365 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2366 new UnicodeBlock("BAMUM_SUPPLEMENT", 2367 "BAMUM SUPPLEMENT", 2368 "BAMUMSUPPLEMENT"); 2369 2370 /** 2371 * Constant for the "Kana Supplement" Unicode character block. 2372 * @since 1.7 2373 */ 2374 public static final UnicodeBlock KANA_SUPPLEMENT = 2375 new UnicodeBlock("KANA_SUPPLEMENT", 2376 "KANA SUPPLEMENT", 2377 "KANASUPPLEMENT"); 2378 2379 /** 2380 * Constant for the "Ancient Greek Musical Notation" Unicode character 2381 * block. 2382 * @since 1.7 2383 */ 2384 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2385 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2386 "ANCIENT GREEK MUSICAL NOTATION", 2387 "ANCIENTGREEKMUSICALNOTATION"); 2388 2389 /** 2390 * Constant for the "Counting Rod Numerals" Unicode character block. 2391 * @since 1.7 2392 */ 2393 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2394 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2395 "COUNTING ROD NUMERALS", 2396 "COUNTINGRODNUMERALS"); 2397 2398 /** 2399 * Constant for the "Mahjong Tiles" Unicode character block. 2400 * @since 1.7 2401 */ 2402 public static final UnicodeBlock MAHJONG_TILES = 2403 new UnicodeBlock("MAHJONG_TILES", 2404 "MAHJONG TILES", 2405 "MAHJONGTILES"); 2406 2407 /** 2408 * Constant for the "Domino Tiles" Unicode character block. 2409 * @since 1.7 2410 */ 2411 public static final UnicodeBlock DOMINO_TILES = 2412 new UnicodeBlock("DOMINO_TILES", 2413 "DOMINO TILES", 2414 "DOMINOTILES"); 2415 2416 /** 2417 * Constant for the "Playing Cards" Unicode character block. 2418 * @since 1.7 2419 */ 2420 public static final UnicodeBlock PLAYING_CARDS = 2421 new UnicodeBlock("PLAYING_CARDS", 2422 "PLAYING CARDS", 2423 "PLAYINGCARDS"); 2424 2425 /** 2426 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2427 * block. 2428 * @since 1.7 2429 */ 2430 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2431 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2432 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2433 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2434 2435 /** 2436 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2437 * block. 2438 * @since 1.7 2439 */ 2440 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2441 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2442 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2443 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2444 2445 /** 2446 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2447 * character block. 2448 * @since 1.7 2449 */ 2450 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2451 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2452 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2453 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2454 2455 /** 2456 * Constant for the "Emoticons" Unicode character block. 2457 * @since 1.7 2458 */ 2459 public static final UnicodeBlock EMOTICONS = 2460 new UnicodeBlock("EMOTICONS"); 2461 2462 /** 2463 * Constant for the "Transport And Map Symbols" Unicode character block. 2464 * @since 1.7 2465 */ 2466 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2467 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2468 "TRANSPORT AND MAP SYMBOLS", 2469 "TRANSPORTANDMAPSYMBOLS"); 2470 2471 /** 2472 * Constant for the "Alchemical Symbols" Unicode character block. 2473 * @since 1.7 2474 */ 2475 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2476 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2477 "ALCHEMICAL SYMBOLS", 2478 "ALCHEMICALSYMBOLS"); 2479 2480 /** 2481 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2482 * character block. 2483 * @since 1.7 2484 */ 2485 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2486 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2487 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2488 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2489 2490 /** 2491 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2492 * character block. 2493 * @since 1.7 2494 */ 2495 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2496 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2497 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2498 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2499 2500 /** 2501 * Constant for the "Arabic Extended-A" Unicode character block. 2502 * @since 1.8 2503 */ 2504 public static final UnicodeBlock ARABIC_EXTENDED_A = 2505 new UnicodeBlock("ARABIC_EXTENDED_A", 2506 "ARABIC EXTENDED-A", 2507 "ARABICEXTENDED-A"); 2508 2509 /** 2510 * Constant for the "Sundanese Supplement" Unicode character block. 2511 * @since 1.8 2512 */ 2513 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2514 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2515 "SUNDANESE SUPPLEMENT", 2516 "SUNDANESESUPPLEMENT"); 2517 2518 /** 2519 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2520 * @since 1.8 2521 */ 2522 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2523 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2524 "MEETEI MAYEK EXTENSIONS", 2525 "MEETEIMAYEKEXTENSIONS"); 2526 2527 /** 2528 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2529 * @since 1.8 2530 */ 2531 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2532 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2533 "MEROITIC HIEROGLYPHS", 2534 "MEROITICHIEROGLYPHS"); 2535 2536 /** 2537 * Constant for the "Meroitic Cursive" Unicode character block. 2538 * @since 1.8 2539 */ 2540 public static final UnicodeBlock MEROITIC_CURSIVE = 2541 new UnicodeBlock("MEROITIC_CURSIVE", 2542 "MEROITIC CURSIVE", 2543 "MEROITICCURSIVE"); 2544 2545 /** 2546 * Constant for the "Sora Sompeng" Unicode character block. 2547 * @since 1.8 2548 */ 2549 public static final UnicodeBlock SORA_SOMPENG = 2550 new UnicodeBlock("SORA_SOMPENG", 2551 "SORA SOMPENG", 2552 "SORASOMPENG"); 2553 2554 /** 2555 * Constant for the "Chakma" Unicode character block. 2556 * @since 1.8 2557 */ 2558 public static final UnicodeBlock CHAKMA = 2559 new UnicodeBlock("CHAKMA"); 2560 2561 /** 2562 * Constant for the "Sharada" Unicode character block. 2563 * @since 1.8 2564 */ 2565 public static final UnicodeBlock SHARADA = 2566 new UnicodeBlock("SHARADA"); 2567 2568 /** 2569 * Constant for the "Takri" Unicode character block. 2570 * @since 1.8 2571 */ 2572 public static final UnicodeBlock TAKRI = 2573 new UnicodeBlock("TAKRI"); 2574 2575 /** 2576 * Constant for the "Miao" Unicode character block. 2577 * @since 1.8 2578 */ 2579 public static final UnicodeBlock MIAO = 2580 new UnicodeBlock("MIAO"); 2581 2582 /** 2583 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2584 * character block. 2585 * @since 1.8 2586 */ 2587 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2588 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2589 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2590 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2591 2592 /** 2593 * Constant for the "Combining Diacritical Marks Extended" Unicode 2594 * character block. 2595 * @since 9 2596 */ 2597 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2598 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2599 "COMBINING DIACRITICAL MARKS EXTENDED", 2600 "COMBININGDIACRITICALMARKSEXTENDED"); 2601 2602 /** 2603 * Constant for the "Myanmar Extended-B" Unicode character block. 2604 * @since 9 2605 */ 2606 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2607 new UnicodeBlock("MYANMAR_EXTENDED_B", 2608 "MYANMAR EXTENDED-B", 2609 "MYANMAREXTENDED-B"); 2610 2611 /** 2612 * Constant for the "Latin Extended-E" Unicode character block. 2613 * @since 9 2614 */ 2615 public static final UnicodeBlock LATIN_EXTENDED_E = 2616 new UnicodeBlock("LATIN_EXTENDED_E", 2617 "LATIN EXTENDED-E", 2618 "LATINEXTENDED-E"); 2619 2620 /** 2621 * Constant for the "Coptic Epact Numbers" Unicode character block. 2622 * @since 9 2623 */ 2624 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2625 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2626 "COPTIC EPACT NUMBERS", 2627 "COPTICEPACTNUMBERS"); 2628 2629 /** 2630 * Constant for the "Old Permic" Unicode character block. 2631 * @since 9 2632 */ 2633 public static final UnicodeBlock OLD_PERMIC = 2634 new UnicodeBlock("OLD_PERMIC", 2635 "OLD PERMIC", 2636 "OLDPERMIC"); 2637 2638 /** 2639 * Constant for the "Elbasan" Unicode character block. 2640 * @since 9 2641 */ 2642 public static final UnicodeBlock ELBASAN = 2643 new UnicodeBlock("ELBASAN"); 2644 2645 /** 2646 * Constant for the "Caucasian Albanian" Unicode character block. 2647 * @since 9 2648 */ 2649 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2650 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2651 "CAUCASIAN ALBANIAN", 2652 "CAUCASIANALBANIAN"); 2653 2654 /** 2655 * Constant for the "Linear A" Unicode character block. 2656 * @since 9 2657 */ 2658 public static final UnicodeBlock LINEAR_A = 2659 new UnicodeBlock("LINEAR_A", 2660 "LINEAR A", 2661 "LINEARA"); 2662 2663 /** 2664 * Constant for the "Palmyrene" Unicode character block. 2665 * @since 9 2666 */ 2667 public static final UnicodeBlock PALMYRENE = 2668 new UnicodeBlock("PALMYRENE"); 2669 2670 /** 2671 * Constant for the "Nabataean" Unicode character block. 2672 * @since 9 2673 */ 2674 public static final UnicodeBlock NABATAEAN = 2675 new UnicodeBlock("NABATAEAN"); 2676 2677 /** 2678 * Constant for the "Old North Arabian" Unicode character block. 2679 * @since 9 2680 */ 2681 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2682 new UnicodeBlock("OLD_NORTH_ARABIAN", 2683 "OLD NORTH ARABIAN", 2684 "OLDNORTHARABIAN"); 2685 2686 /** 2687 * Constant for the "Manichaean" Unicode character block. 2688 * @since 9 2689 */ 2690 public static final UnicodeBlock MANICHAEAN = 2691 new UnicodeBlock("MANICHAEAN"); 2692 2693 /** 2694 * Constant for the "Psalter Pahlavi" Unicode character block. 2695 * @since 9 2696 */ 2697 public static final UnicodeBlock PSALTER_PAHLAVI = 2698 new UnicodeBlock("PSALTER_PAHLAVI", 2699 "PSALTER PAHLAVI", 2700 "PSALTERPAHLAVI"); 2701 2702 /** 2703 * Constant for the "Mahajani" Unicode character block. 2704 * @since 9 2705 */ 2706 public static final UnicodeBlock MAHAJANI = 2707 new UnicodeBlock("MAHAJANI"); 2708 2709 /** 2710 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2711 * @since 9 2712 */ 2713 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2714 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2715 "SINHALA ARCHAIC NUMBERS", 2716 "SINHALAARCHAICNUMBERS"); 2717 2718 /** 2719 * Constant for the "Khojki" Unicode character block. 2720 * @since 9 2721 */ 2722 public static final UnicodeBlock KHOJKI = 2723 new UnicodeBlock("KHOJKI"); 2724 2725 /** 2726 * Constant for the "Khudawadi" Unicode character block. 2727 * @since 9 2728 */ 2729 public static final UnicodeBlock KHUDAWADI = 2730 new UnicodeBlock("KHUDAWADI"); 2731 2732 /** 2733 * Constant for the "Grantha" Unicode character block. 2734 * @since 9 2735 */ 2736 public static final UnicodeBlock GRANTHA = 2737 new UnicodeBlock("GRANTHA"); 2738 2739 /** 2740 * Constant for the "Tirhuta" Unicode character block. 2741 * @since 9 2742 */ 2743 public static final UnicodeBlock TIRHUTA = 2744 new UnicodeBlock("TIRHUTA"); 2745 2746 /** 2747 * Constant for the "Siddham" Unicode character block. 2748 * @since 9 2749 */ 2750 public static final UnicodeBlock SIDDHAM = 2751 new UnicodeBlock("SIDDHAM"); 2752 2753 /** 2754 * Constant for the "Modi" Unicode character block. 2755 * @since 9 2756 */ 2757 public static final UnicodeBlock MODI = 2758 new UnicodeBlock("MODI"); 2759 2760 /** 2761 * Constant for the "Warang Citi" Unicode character block. 2762 * @since 9 2763 */ 2764 public static final UnicodeBlock WARANG_CITI = 2765 new UnicodeBlock("WARANG_CITI", 2766 "WARANG CITI", 2767 "WARANGCITI"); 2768 2769 /** 2770 * Constant for the "Pau Cin Hau" Unicode character block. 2771 * @since 9 2772 */ 2773 public static final UnicodeBlock PAU_CIN_HAU = 2774 new UnicodeBlock("PAU_CIN_HAU", 2775 "PAU CIN HAU", 2776 "PAUCINHAU"); 2777 2778 /** 2779 * Constant for the "Mro" Unicode character block. 2780 * @since 9 2781 */ 2782 public static final UnicodeBlock MRO = 2783 new UnicodeBlock("MRO"); 2784 2785 /** 2786 * Constant for the "Bassa Vah" Unicode character block. 2787 * @since 9 2788 */ 2789 public static final UnicodeBlock BASSA_VAH = 2790 new UnicodeBlock("BASSA_VAH", 2791 "BASSA VAH", 2792 "BASSAVAH"); 2793 2794 /** 2795 * Constant for the "Pahawh Hmong" Unicode character block. 2796 * @since 9 2797 */ 2798 public static final UnicodeBlock PAHAWH_HMONG = 2799 new UnicodeBlock("PAHAWH_HMONG", 2800 "PAHAWH HMONG", 2801 "PAHAWHHMONG"); 2802 2803 /** 2804 * Constant for the "Duployan" Unicode character block. 2805 * @since 9 2806 */ 2807 public static final UnicodeBlock DUPLOYAN = 2808 new UnicodeBlock("DUPLOYAN"); 2809 2810 /** 2811 * Constant for the "Shorthand Format Controls" Unicode character block. 2812 * @since 9 2813 */ 2814 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2815 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2816 "SHORTHAND FORMAT CONTROLS", 2817 "SHORTHANDFORMATCONTROLS"); 2818 2819 /** 2820 * Constant for the "Mende Kikakui" Unicode character block. 2821 * @since 9 2822 */ 2823 public static final UnicodeBlock MENDE_KIKAKUI = 2824 new UnicodeBlock("MENDE_KIKAKUI", 2825 "MENDE KIKAKUI", 2826 "MENDEKIKAKUI"); 2827 2828 /** 2829 * Constant for the "Ornamental Dingbats" Unicode character block. 2830 * @since 9 2831 */ 2832 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2833 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2834 "ORNAMENTAL DINGBATS", 2835 "ORNAMENTALDINGBATS"); 2836 2837 /** 2838 * Constant for the "Geometric Shapes Extended" Unicode character block. 2839 * @since 9 2840 */ 2841 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2842 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2843 "GEOMETRIC SHAPES EXTENDED", 2844 "GEOMETRICSHAPESEXTENDED"); 2845 2846 /** 2847 * Constant for the "Supplemental Arrows-C" Unicode character block. 2848 * @since 9 2849 */ 2850 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2851 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2852 "SUPPLEMENTAL ARROWS-C", 2853 "SUPPLEMENTALARROWS-C"); 2854 2855 /** 2856 * Constant for the "Cherokee Supplement" Unicode character block. 2857 * @since 9 2858 */ 2859 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2860 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2861 "CHEROKEE SUPPLEMENT", 2862 "CHEROKEESUPPLEMENT"); 2863 2864 /** 2865 * Constant for the "Hatran" Unicode character block. 2866 * @since 9 2867 */ 2868 public static final UnicodeBlock HATRAN = 2869 new UnicodeBlock("HATRAN"); 2870 2871 /** 2872 * Constant for the "Old Hungarian" Unicode character block. 2873 * @since 9 2874 */ 2875 public static final UnicodeBlock OLD_HUNGARIAN = 2876 new UnicodeBlock("OLD_HUNGARIAN", 2877 "OLD HUNGARIAN", 2878 "OLDHUNGARIAN"); 2879 2880 /** 2881 * Constant for the "Multani" Unicode character block. 2882 * @since 9 2883 */ 2884 public static final UnicodeBlock MULTANI = 2885 new UnicodeBlock("MULTANI"); 2886 2887 /** 2888 * Constant for the "Ahom" Unicode character block. 2889 * @since 9 2890 */ 2891 public static final UnicodeBlock AHOM = 2892 new UnicodeBlock("AHOM"); 2893 2894 /** 2895 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2896 * @since 9 2897 */ 2898 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2899 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2900 "EARLY DYNASTIC CUNEIFORM", 2901 "EARLYDYNASTICCUNEIFORM"); 2902 2903 /** 2904 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2905 * @since 9 2906 */ 2907 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2908 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2909 "ANATOLIAN HIEROGLYPHS", 2910 "ANATOLIANHIEROGLYPHS"); 2911 2912 /** 2913 * Constant for the "Sutton SignWriting" Unicode character block. 2914 * @since 9 2915 */ 2916 public static final UnicodeBlock SUTTON_SIGNWRITING = 2917 new UnicodeBlock("SUTTON_SIGNWRITING", 2918 "SUTTON SIGNWRITING", 2919 "SUTTONSIGNWRITING"); 2920 2921 /** 2922 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2923 * character block. 2924 * @since 9 2925 */ 2926 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2927 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2928 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2929 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 2930 2931 /** 2932 * Constant for the "CJK Unified Ideographs Extension E" Unicode 2933 * character block. 2934 * @since 9 2935 */ 2936 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2937 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2938 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 2939 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 2940 2941 /** 2942 * Constant for the "Syriac Supplement" Unicode 2943 * character block. 2944 * @since 11 2945 */ 2946 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2947 new UnicodeBlock("SYRIAC_SUPPLEMENT", 2948 "SYRIAC SUPPLEMENT", 2949 "SYRIACSUPPLEMENT"); 2950 2951 /** 2952 * Constant for the "Cyrillic Extended-C" Unicode 2953 * character block. 2954 * @since 11 2955 */ 2956 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2957 new UnicodeBlock("CYRILLIC_EXTENDED_C", 2958 "CYRILLIC EXTENDED-C", 2959 "CYRILLICEXTENDED-C"); 2960 2961 /** 2962 * Constant for the "Osage" Unicode 2963 * character block. 2964 * @since 11 2965 */ 2966 public static final UnicodeBlock OSAGE = 2967 new UnicodeBlock("OSAGE"); 2968 2969 /** 2970 * Constant for the "Newa" Unicode 2971 * character block. 2972 * @since 11 2973 */ 2974 public static final UnicodeBlock NEWA = 2975 new UnicodeBlock("NEWA"); 2976 2977 /** 2978 * Constant for the "Mongolian Supplement" Unicode 2979 * character block. 2980 * @since 11 2981 */ 2982 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2983 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 2984 "MONGOLIAN SUPPLEMENT", 2985 "MONGOLIANSUPPLEMENT"); 2986 2987 /** 2988 * Constant for the "Marchen" Unicode 2989 * character block. 2990 * @since 11 2991 */ 2992 public static final UnicodeBlock MARCHEN = 2993 new UnicodeBlock("MARCHEN"); 2994 2995 /** 2996 * Constant for the "Ideographic Symbols and Punctuation" Unicode 2997 * character block. 2998 * @since 11 2999 */ 3000 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3001 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3002 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3003 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3004 3005 /** 3006 * Constant for the "Tangut" Unicode 3007 * character block. 3008 * @since 11 3009 */ 3010 public static final UnicodeBlock TANGUT = 3011 new UnicodeBlock("TANGUT"); 3012 3013 /** 3014 * Constant for the "Tangut Components" Unicode 3015 * character block. 3016 * @since 11 3017 */ 3018 public static final UnicodeBlock TANGUT_COMPONENTS = 3019 new UnicodeBlock("TANGUT_COMPONENTS", 3020 "TANGUT COMPONENTS", 3021 "TANGUTCOMPONENTS"); 3022 3023 /** 3024 * Constant for the "Kana Extended-A" Unicode 3025 * character block. 3026 * @since 11 3027 */ 3028 public static final UnicodeBlock KANA_EXTENDED_A = 3029 new UnicodeBlock("KANA_EXTENDED_A", 3030 "KANA EXTENDED-A", 3031 "KANAEXTENDED-A"); 3032 /** 3033 * Constant for the "Glagolitic Supplement" Unicode 3034 * character block. 3035 * @since 11 3036 */ 3037 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3038 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3039 "GLAGOLITIC SUPPLEMENT", 3040 "GLAGOLITICSUPPLEMENT"); 3041 /** 3042 * Constant for the "Adlam" Unicode 3043 * character block. 3044 * @since 11 3045 */ 3046 public static final UnicodeBlock ADLAM = 3047 new UnicodeBlock("ADLAM"); 3048 3049 /** 3050 * Constant for the "Masaram Gondi" Unicode 3051 * character block. 3052 * @since 11 3053 */ 3054 public static final UnicodeBlock MASARAM_GONDI = 3055 new UnicodeBlock("MASARAM_GONDI", 3056 "MASARAM GONDI", 3057 "MASARAMGONDI"); 3058 3059 /** 3060 * Constant for the "Zanabazar Square" Unicode 3061 * character block. 3062 * @since 11 3063 */ 3064 public static final UnicodeBlock ZANABAZAR_SQUARE = 3065 new UnicodeBlock("ZANABAZAR_SQUARE", 3066 "ZANABAZAR SQUARE", 3067 "ZANABAZARSQUARE"); 3068 3069 /** 3070 * Constant for the "Nushu" Unicode 3071 * character block. 3072 * @since 11 3073 */ 3074 public static final UnicodeBlock NUSHU = 3075 new UnicodeBlock("NUSHU"); 3076 3077 /** 3078 * Constant for the "Soyombo" Unicode 3079 * character block. 3080 * @since 11 3081 */ 3082 public static final UnicodeBlock SOYOMBO = 3083 new UnicodeBlock("SOYOMBO"); 3084 3085 /** 3086 * Constant for the "Bhaiksuki" Unicode 3087 * character block. 3088 * @since 11 3089 */ 3090 public static final UnicodeBlock BHAIKSUKI = 3091 new UnicodeBlock("BHAIKSUKI"); 3092 3093 /** 3094 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3095 * character block. 3096 * @since 11 3097 */ 3098 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3099 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3100 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3101 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3102 /** 3103 * Constant for the "Georgian Extended" Unicode 3104 * character block. 3105 * @since 12 3106 */ 3107 public static final UnicodeBlock GEORGIAN_EXTENDED = 3108 new UnicodeBlock("GEORGIAN_EXTENDED", 3109 "GEORGIAN EXTENDED", 3110 "GEORGIANEXTENDED"); 3111 3112 /** 3113 * Constant for the "Hanifi Rohingya" Unicode 3114 * character block. 3115 * @since 12 3116 */ 3117 public static final UnicodeBlock HANIFI_ROHINGYA = 3118 new UnicodeBlock("HANIFI_ROHINGYA", 3119 "HANIFI ROHINGYA", 3120 "HANIFIROHINGYA"); 3121 3122 /** 3123 * Constant for the "Old Sogdian" Unicode 3124 * character block. 3125 * @since 12 3126 */ 3127 public static final UnicodeBlock OLD_SOGDIAN = 3128 new UnicodeBlock("OLD_SOGDIAN", 3129 "OLD SOGDIAN", 3130 "OLDSOGDIAN"); 3131 3132 /** 3133 * Constant for the "Sogdian" Unicode 3134 * character block. 3135 * @since 12 3136 */ 3137 public static final UnicodeBlock SOGDIAN = 3138 new UnicodeBlock("SOGDIAN"); 3139 3140 /** 3141 * Constant for the "Dogra" Unicode 3142 * character block. 3143 * @since 12 3144 */ 3145 public static final UnicodeBlock DOGRA = 3146 new UnicodeBlock("DOGRA"); 3147 3148 /** 3149 * Constant for the "Gunjala Gondi" Unicode 3150 * character block. 3151 * @since 12 3152 */ 3153 public static final UnicodeBlock GUNJALA_GONDI = 3154 new UnicodeBlock("GUNJALA_GONDI", 3155 "GUNJALA GONDI", 3156 "GUNJALAGONDI"); 3157 3158 /** 3159 * Constant for the "Makasar" Unicode 3160 * character block. 3161 * @since 12 3162 */ 3163 public static final UnicodeBlock MAKASAR = 3164 new UnicodeBlock("MAKASAR"); 3165 3166 /** 3167 * Constant for the "Medefaidrin" Unicode 3168 * character block. 3169 * @since 12 3170 */ 3171 public static final UnicodeBlock MEDEFAIDRIN = 3172 new UnicodeBlock("MEDEFAIDRIN"); 3173 3174 /** 3175 * Constant for the "Mayan Numerals" Unicode 3176 * character block. 3177 * @since 12 3178 */ 3179 public static final UnicodeBlock MAYAN_NUMERALS = 3180 new UnicodeBlock("MAYAN_NUMERALS", 3181 "MAYAN NUMERALS", 3182 "MAYANNUMERALS"); 3183 3184 /** 3185 * Constant for the "Indic Siyaq Numbers" Unicode 3186 * character block. 3187 * @since 12 3188 */ 3189 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3190 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3191 "INDIC SIYAQ NUMBERS", 3192 "INDICSIYAQNUMBERS"); 3193 3194 /** 3195 * Constant for the "Chess Symbols" Unicode 3196 * character block. 3197 * @since 12 3198 */ 3199 public static final UnicodeBlock CHESS_SYMBOLS = 3200 new UnicodeBlock("CHESS_SYMBOLS", 3201 "CHESS SYMBOLS", 3202 "CHESSSYMBOLS"); 3203 3204 /** 3205 * Constant for the "Elymaic" Unicode 3206 * character block. 3207 * @since 13 3208 */ 3209 public static final UnicodeBlock ELYMAIC = 3210 new UnicodeBlock("ELYMAIC"); 3211 3212 /** 3213 * Constant for the "Nandinagari" Unicode 3214 * character block. 3215 * @since 13 3216 */ 3217 public static final UnicodeBlock NANDINAGARI = 3218 new UnicodeBlock("NANDINAGARI"); 3219 3220 /** 3221 * Constant for the "Tamil Supplement" Unicode 3222 * character block. 3223 * @since 13 3224 */ 3225 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3226 new UnicodeBlock("TAMIL_SUPPLEMENT", 3227 "TAMIL SUPPLEMENT", 3228 "TAMILSUPPLEMENT"); 3229 3230 /** 3231 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3232 * character block. 3233 * @since 13 3234 */ 3235 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3236 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3237 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3238 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3239 3240 /** 3241 * Constant for the "Small Kana Extension" Unicode 3242 * character block. 3243 * @since 13 3244 */ 3245 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3246 new UnicodeBlock("SMALL_KANA_EXTENSION", 3247 "SMALL KANA EXTENSION", 3248 "SMALLKANAEXTENSION"); 3249 3250 /** 3251 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3252 * character block. 3253 * @since 13 3254 */ 3255 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3256 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3257 "NYIAKENG PUACHUE HMONG", 3258 "NYIAKENGPUACHUEHMONG"); 3259 3260 /** 3261 * Constant for the "Wancho" Unicode 3262 * character block. 3263 * @since 13 3264 */ 3265 public static final UnicodeBlock WANCHO = 3266 new UnicodeBlock("WANCHO"); 3267 3268 /** 3269 * Constant for the "Ottoman Siyaq Numbers" Unicode 3270 * character block. 3271 * @since 13 3272 */ 3273 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3274 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3275 "OTTOMAN SIYAQ NUMBERS", 3276 "OTTOMANSIYAQNUMBERS"); 3277 3278 /** 3279 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3280 * character block. 3281 * @since 13 3282 */ 3283 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3284 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3285 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3286 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3287 3288 private static final int blockStarts[] = { 3289 0x0000, // 0000..007F; Basic Latin 3290 0x0080, // 0080..00FF; Latin-1 Supplement 3291 0x0100, // 0100..017F; Latin Extended-A 3292 0x0180, // 0180..024F; Latin Extended-B 3293 0x0250, // 0250..02AF; IPA Extensions 3294 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3295 0x0300, // 0300..036F; Combining Diacritical Marks 3296 0x0370, // 0370..03FF; Greek and Coptic 3297 0x0400, // 0400..04FF; Cyrillic 3298 0x0500, // 0500..052F; Cyrillic Supplement 3299 0x0530, // 0530..058F; Armenian 3300 0x0590, // 0590..05FF; Hebrew 3301 0x0600, // 0600..06FF; Arabic 3302 0x0700, // 0700..074F; Syriac 3303 0x0750, // 0750..077F; Arabic Supplement 3304 0x0780, // 0780..07BF; Thaana 3305 0x07C0, // 07C0..07FF; NKo 3306 0x0800, // 0800..083F; Samaritan 3307 0x0840, // 0840..085F; Mandaic 3308 0x0860, // 0860..086F; Syriac Supplement 3309 0x0870, // unassigned 3310 0x08A0, // 08A0..08FF; Arabic Extended-A 3311 0x0900, // 0900..097F; Devanagari 3312 0x0980, // 0980..09FF; Bengali 3313 0x0A00, // 0A00..0A7F; Gurmukhi 3314 0x0A80, // 0A80..0AFF; Gujarati 3315 0x0B00, // 0B00..0B7F; Oriya 3316 0x0B80, // 0B80..0BFF; Tamil 3317 0x0C00, // 0C00..0C7F; Telugu 3318 0x0C80, // 0C80..0CFF; Kannada 3319 0x0D00, // 0D00..0D7F; Malayalam 3320 0x0D80, // 0D80..0DFF; Sinhala 3321 0x0E00, // 0E00..0E7F; Thai 3322 0x0E80, // 0E80..0EFF; Lao 3323 0x0F00, // 0F00..0FFF; Tibetan 3324 0x1000, // 1000..109F; Myanmar 3325 0x10A0, // 10A0..10FF; Georgian 3326 0x1100, // 1100..11FF; Hangul Jamo 3327 0x1200, // 1200..137F; Ethiopic 3328 0x1380, // 1380..139F; Ethiopic Supplement 3329 0x13A0, // 13A0..13FF; Cherokee 3330 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3331 0x1680, // 1680..169F; Ogham 3332 0x16A0, // 16A0..16FF; Runic 3333 0x1700, // 1700..171F; Tagalog 3334 0x1720, // 1720..173F; Hanunoo 3335 0x1740, // 1740..175F; Buhid 3336 0x1760, // 1760..177F; Tagbanwa 3337 0x1780, // 1780..17FF; Khmer 3338 0x1800, // 1800..18AF; Mongolian 3339 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3340 0x1900, // 1900..194F; Limbu 3341 0x1950, // 1950..197F; Tai Le 3342 0x1980, // 1980..19DF; New Tai Lue 3343 0x19E0, // 19E0..19FF; Khmer Symbols 3344 0x1A00, // 1A00..1A1F; Buginese 3345 0x1A20, // 1A20..1AAF; Tai Tham 3346 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3347 0x1B00, // 1B00..1B7F; Balinese 3348 0x1B80, // 1B80..1BBF; Sundanese 3349 0x1BC0, // 1BC0..1BFF; Batak 3350 0x1C00, // 1C00..1C4F; Lepcha 3351 0x1C50, // 1C50..1C7F; Ol Chiki 3352 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3353 0x1C90, // 1C90..1CBF; Georgian Extended 3354 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3355 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3356 0x1D00, // 1D00..1D7F; Phonetic Extensions 3357 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3358 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3359 0x1E00, // 1E00..1EFF; Latin Extended Additional 3360 0x1F00, // 1F00..1FFF; Greek Extended 3361 0x2000, // 2000..206F; General Punctuation 3362 0x2070, // 2070..209F; Superscripts and Subscripts 3363 0x20A0, // 20A0..20CF; Currency Symbols 3364 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3365 0x2100, // 2100..214F; Letterlike Symbols 3366 0x2150, // 2150..218F; Number Forms 3367 0x2190, // 2190..21FF; Arrows 3368 0x2200, // 2200..22FF; Mathematical Operators 3369 0x2300, // 2300..23FF; Miscellaneous Technical 3370 0x2400, // 2400..243F; Control Pictures 3371 0x2440, // 2440..245F; Optical Character Recognition 3372 0x2460, // 2460..24FF; Enclosed Alphanumerics 3373 0x2500, // 2500..257F; Box Drawing 3374 0x2580, // 2580..259F; Block Elements 3375 0x25A0, // 25A0..25FF; Geometric Shapes 3376 0x2600, // 2600..26FF; Miscellaneous Symbols 3377 0x2700, // 2700..27BF; Dingbats 3378 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3379 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3380 0x2800, // 2800..28FF; Braille Patterns 3381 0x2900, // 2900..297F; Supplemental Arrows-B 3382 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3383 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3384 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3385 0x2C00, // 2C00..2C5F; Glagolitic 3386 0x2C60, // 2C60..2C7F; Latin Extended-C 3387 0x2C80, // 2C80..2CFF; Coptic 3388 0x2D00, // 2D00..2D2F; Georgian Supplement 3389 0x2D30, // 2D30..2D7F; Tifinagh 3390 0x2D80, // 2D80..2DDF; Ethiopic Extended 3391 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3392 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3393 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3394 0x2F00, // 2F00..2FDF; Kangxi Radicals 3395 0x2FE0, // unassigned 3396 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3397 0x3000, // 3000..303F; CJK Symbols and Punctuation 3398 0x3040, // 3040..309F; Hiragana 3399 0x30A0, // 30A0..30FF; Katakana 3400 0x3100, // 3100..312F; Bopomofo 3401 0x3130, // 3130..318F; Hangul Compatibility Jamo 3402 0x3190, // 3190..319F; Kanbun 3403 0x31A0, // 31A0..31BF; Bopomofo Extended 3404 0x31C0, // 31C0..31EF; CJK Strokes 3405 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3406 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3407 0x3300, // 3300..33FF; CJK Compatibility 3408 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3409 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3410 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3411 0xA000, // A000..A48F; Yi Syllables 3412 0xA490, // A490..A4CF; Yi Radicals 3413 0xA4D0, // A4D0..A4FF; Lisu 3414 0xA500, // A500..A63F; Vai 3415 0xA640, // A640..A69F; Cyrillic Extended-B 3416 0xA6A0, // A6A0..A6FF; Bamum 3417 0xA700, // A700..A71F; Modifier Tone Letters 3418 0xA720, // A720..A7FF; Latin Extended-D 3419 0xA800, // A800..A82F; Syloti Nagri 3420 0xA830, // A830..A83F; Common Indic Number Forms 3421 0xA840, // A840..A87F; Phags-pa 3422 0xA880, // A880..A8DF; Saurashtra 3423 0xA8E0, // A8E0..A8FF; Devanagari Extended 3424 0xA900, // A900..A92F; Kayah Li 3425 0xA930, // A930..A95F; Rejang 3426 0xA960, // A960..A97F; Hangul Jamo Extended-A 3427 0xA980, // A980..A9DF; Javanese 3428 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3429 0xAA00, // AA00..AA5F; Cham 3430 0xAA60, // AA60..AA7F; Myanmar Extended-A 3431 0xAA80, // AA80..AADF; Tai Viet 3432 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3433 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3434 0xAB30, // AB30..AB6F; Latin Extended-E 3435 0xAB70, // AB70..ABBF; Cherokee Supplement 3436 0xABC0, // ABC0..ABFF; Meetei Mayek 3437 0xAC00, // AC00..D7AF; Hangul Syllables 3438 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3439 0xD800, // D800..DB7F; High Surrogates 3440 0xDB80, // DB80..DBFF; High Private Use Surrogates 3441 0xDC00, // DC00..DFFF; Low Surrogates 3442 0xE000, // E000..F8FF; Private Use Area 3443 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3444 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3445 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3446 0xFE00, // FE00..FE0F; Variation Selectors 3447 0xFE10, // FE10..FE1F; Vertical Forms 3448 0xFE20, // FE20..FE2F; Combining Half Marks 3449 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3450 0xFE50, // FE50..FE6F; Small Form Variants 3451 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3452 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3453 0xFFF0, // FFF0..FFFF; Specials 3454 0x10000, // 10000..1007F; Linear B Syllabary 3455 0x10080, // 10080..100FF; Linear B Ideograms 3456 0x10100, // 10100..1013F; Aegean Numbers 3457 0x10140, // 10140..1018F; Ancient Greek Numbers 3458 0x10190, // 10190..101CF; Ancient Symbols 3459 0x101D0, // 101D0..101FF; Phaistos Disc 3460 0x10200, // unassigned 3461 0x10280, // 10280..1029F; Lycian 3462 0x102A0, // 102A0..102DF; Carian 3463 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3464 0x10300, // 10300..1032F; Old Italic 3465 0x10330, // 10330..1034F; Gothic 3466 0x10350, // 10350..1037F; Old Permic 3467 0x10380, // 10380..1039F; Ugaritic 3468 0x103A0, // 103A0..103DF; Old Persian 3469 0x103E0, // unassigned 3470 0x10400, // 10400..1044F; Deseret 3471 0x10450, // 10450..1047F; Shavian 3472 0x10480, // 10480..104AF; Osmanya 3473 0x104B0, // 104B0..104FF; Osage 3474 0x10500, // 10500..1052F; Elbasan 3475 0x10530, // 10530..1056F; Caucasian Albanian 3476 0x10570, // unassigned 3477 0x10600, // 10600..1077F; Linear A 3478 0x10780, // unassigned 3479 0x10800, // 10800..1083F; Cypriot Syllabary 3480 0x10840, // 10840..1085F; Imperial Aramaic 3481 0x10860, // 10860..1087F; Palmyrene 3482 0x10880, // 10880..108AF; Nabataean 3483 0x108B0, // unassigned 3484 0x108E0, // 108E0..108FF; Hatran 3485 0x10900, // 10900..1091F; Phoenician 3486 0x10920, // 10920..1093F; Lydian 3487 0x10940, // unassigned 3488 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3489 0x109A0, // 109A0..109FF; Meroitic Cursive 3490 0x10A00, // 10A00..10A5F; Kharoshthi 3491 0x10A60, // 10A60..10A7F; Old South Arabian 3492 0x10A80, // 10A80..10A9F; Old North Arabian 3493 0x10AA0, // unassigned 3494 0x10AC0, // 10AC0..10AFF; Manichaean 3495 0x10B00, // 10B00..10B3F; Avestan 3496 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3497 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3498 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3499 0x10BB0, // unassigned 3500 0x10C00, // 10C00..10C4F; Old Turkic 3501 0x10C50, // unassigned 3502 0x10C80, // 10C80..10CFF; Old Hungarian 3503 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3504 0x10D40, // unassigned 3505 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3506 0x10E80, // unassigned 3507 0x10F00, // 10F00..10F2F; Old Sogdian 3508 0x10F30, // 10F30..10F6F; Sogdian 3509 0x10F70, // unassigned 3510 0x10FE0, // 10FE0..10FFF; Elymaic 3511 0x11000, // 11000..1107F; Brahmi 3512 0x11080, // 11080..110CF; Kaithi 3513 0x110D0, // 110D0..110FF; Sora Sompeng 3514 0x11100, // 11100..1114F; Chakma 3515 0x11150, // 11150..1117F; Mahajani 3516 0x11180, // 11180..111DF; Sharada 3517 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3518 0x11200, // 11200..1124F; Khojki 3519 0x11250, // unassigned 3520 0x11280, // 11280..112AF; Multani 3521 0x112B0, // 112B0..112FF; Khudawadi 3522 0x11300, // 11300..1137F; Grantha 3523 0x11380, // unassigned 3524 0x11400, // 11400..1147F; Newa 3525 0x11480, // 11480..114DF; Tirhuta 3526 0x114E0, // unassigned 3527 0x11580, // 11580..115FF; Siddham 3528 0x11600, // 11600..1165F; Modi 3529 0x11660, // 11660..1167F; Mongolian Supplement 3530 0x11680, // 11680..116CF; Takri 3531 0x116D0, // unassigned 3532 0x11700, // 11700..1173F; Ahom 3533 0x11740, // unassigned 3534 0x11800, // 11800..1184F; Dogra 3535 0x11850, // unassigned 3536 0x118A0, // 118A0..118FF; Warang Citi 3537 0x11900, // unassigned 3538 0x119A0, // 119A0..119FF; Nandinagari 3539 0x11A00, // 11A00..11A4F; Zanabazar Square 3540 0x11A50, // 11A50..11AAF; Soyombo 3541 0x11AB0, // unassigned 3542 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3543 0x11B00, // unassigned 3544 0x11C00, // 11C00..11C6F; Bhaiksuki 3545 0x11C70, // 11C70..11CBF; Marchen 3546 0x11CC0, // unassigned 3547 0x11D00, // 11D00..11D5F; Masaram Gondi 3548 0x11D60, // 11D60..11DAF; Gunjala Gondi 3549 0x11DB0, // unassigned 3550 0x11EE0, // 11EE0..11EFF; Makasar 3551 0x11F00, // unassigned 3552 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3553 0x12000, // 12000..123FF; Cuneiform 3554 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3555 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3556 0x12550, // unassigned 3557 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3558 0x13430, // 13430..1343F; Egyptian Hieroglyph Format Controls 3559 0x13440, // unassigned 3560 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3561 0x14680, // unassigned 3562 0x16800, // 16800..16A3F; Bamum Supplement 3563 0x16A40, // 16A40..16A6F; Mro 3564 0x16A70, // unassigned 3565 0x16AD0, // 16AD0..16AFF; Bassa Vah 3566 0x16B00, // 16B00..16B8F; Pahawh Hmong 3567 0x16B90, // unassigned 3568 0x16E40, // 16E40..16E9F; Medefaidrin 3569 0x16EA0, // unassigned 3570 0x16F00, // 16F00..16F9F; Miao 3571 0x16FA0, // unassigned 3572 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3573 0x17000, // 17000..187FF; Tangut 3574 0x18800, // 18800..18AFF; Tangut Components 3575 0x18B00, // unassigned 3576 0x1B000, // 1B000..1B0FF; Kana Supplement 3577 0x1B100, // 1B100..1B12F; Kana Extended-A 3578 0x1B130, // 1B130..1B16F; Small Kana Extension 3579 0x1B170, // 1B170..1B2FF; Nushu 3580 0x1B300, // unassigned 3581 0x1BC00, // 1BC00..1BC9F; Duployan 3582 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3583 0x1BCB0, // unassigned 3584 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3585 0x1D100, // 1D100..1D1FF; Musical Symbols 3586 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3587 0x1D250, // unassigned 3588 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3589 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3590 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3591 0x1D380, // unassigned 3592 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3593 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3594 0x1DAB0, // unassigned 3595 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3596 0x1E030, // unassigned 3597 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3598 0x1E150, // unassigned 3599 0x1E2C0, // 1E2C0..1E2FF; Wancho 3600 0x1E300, // unassigned 3601 0x1E800, // 1E800..1E8DF; Mende Kikakui 3602 0x1E8E0, // unassigned 3603 0x1E900, // 1E900..1E95F; Adlam 3604 0x1E960, // unassigned 3605 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3606 0x1ECC0, // unassigned 3607 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3608 0x1ED50, // unassigned 3609 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3610 0x1EF00, // unassigned 3611 0x1F000, // 1F000..1F02F; Mahjong Tiles 3612 0x1F030, // 1F030..1F09F; Domino Tiles 3613 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3614 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3615 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3616 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3617 0x1F600, // 1F600..1F64F; Emoticons 3618 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3619 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3620 0x1F700, // 1F700..1F77F; Alchemical Symbols 3621 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3622 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3623 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3624 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3625 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3626 0x1FB00, // unassigned 3627 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3628 0x2A6E0, // unassigned 3629 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3630 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3631 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3632 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3633 0x2EBF0, // unassigned 3634 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3635 0x2FA20, // unassigned 3636 0xE0000, // E0000..E007F; Tags 3637 0xE0080, // unassigned 3638 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3639 0xE01F0, // unassigned 3640 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3641 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 3642 }; 3643 3644 private static final UnicodeBlock[] blocks = { 3645 BASIC_LATIN, 3646 LATIN_1_SUPPLEMENT, 3647 LATIN_EXTENDED_A, 3648 LATIN_EXTENDED_B, 3649 IPA_EXTENSIONS, 3650 SPACING_MODIFIER_LETTERS, 3651 COMBINING_DIACRITICAL_MARKS, 3652 GREEK, 3653 CYRILLIC, 3654 CYRILLIC_SUPPLEMENTARY, 3655 ARMENIAN, 3656 HEBREW, 3657 ARABIC, 3658 SYRIAC, 3659 ARABIC_SUPPLEMENT, 3660 THAANA, 3661 NKO, 3662 SAMARITAN, 3663 MANDAIC, 3664 SYRIAC_SUPPLEMENT, 3665 null, 3666 ARABIC_EXTENDED_A, 3667 DEVANAGARI, 3668 BENGALI, 3669 GURMUKHI, 3670 GUJARATI, 3671 ORIYA, 3672 TAMIL, 3673 TELUGU, 3674 KANNADA, 3675 MALAYALAM, 3676 SINHALA, 3677 THAI, 3678 LAO, 3679 TIBETAN, 3680 MYANMAR, 3681 GEORGIAN, 3682 HANGUL_JAMO, 3683 ETHIOPIC, 3684 ETHIOPIC_SUPPLEMENT, 3685 CHEROKEE, 3686 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3687 OGHAM, 3688 RUNIC, 3689 TAGALOG, 3690 HANUNOO, 3691 BUHID, 3692 TAGBANWA, 3693 KHMER, 3694 MONGOLIAN, 3695 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3696 LIMBU, 3697 TAI_LE, 3698 NEW_TAI_LUE, 3699 KHMER_SYMBOLS, 3700 BUGINESE, 3701 TAI_THAM, 3702 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3703 BALINESE, 3704 SUNDANESE, 3705 BATAK, 3706 LEPCHA, 3707 OL_CHIKI, 3708 CYRILLIC_EXTENDED_C, 3709 GEORGIAN_EXTENDED, 3710 SUNDANESE_SUPPLEMENT, 3711 VEDIC_EXTENSIONS, 3712 PHONETIC_EXTENSIONS, 3713 PHONETIC_EXTENSIONS_SUPPLEMENT, 3714 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3715 LATIN_EXTENDED_ADDITIONAL, 3716 GREEK_EXTENDED, 3717 GENERAL_PUNCTUATION, 3718 SUPERSCRIPTS_AND_SUBSCRIPTS, 3719 CURRENCY_SYMBOLS, 3720 COMBINING_MARKS_FOR_SYMBOLS, 3721 LETTERLIKE_SYMBOLS, 3722 NUMBER_FORMS, 3723 ARROWS, 3724 MATHEMATICAL_OPERATORS, 3725 MISCELLANEOUS_TECHNICAL, 3726 CONTROL_PICTURES, 3727 OPTICAL_CHARACTER_RECOGNITION, 3728 ENCLOSED_ALPHANUMERICS, 3729 BOX_DRAWING, 3730 BLOCK_ELEMENTS, 3731 GEOMETRIC_SHAPES, 3732 MISCELLANEOUS_SYMBOLS, 3733 DINGBATS, 3734 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3735 SUPPLEMENTAL_ARROWS_A, 3736 BRAILLE_PATTERNS, 3737 SUPPLEMENTAL_ARROWS_B, 3738 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3739 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3740 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3741 GLAGOLITIC, 3742 LATIN_EXTENDED_C, 3743 COPTIC, 3744 GEORGIAN_SUPPLEMENT, 3745 TIFINAGH, 3746 ETHIOPIC_EXTENDED, 3747 CYRILLIC_EXTENDED_A, 3748 SUPPLEMENTAL_PUNCTUATION, 3749 CJK_RADICALS_SUPPLEMENT, 3750 KANGXI_RADICALS, 3751 null, 3752 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3753 CJK_SYMBOLS_AND_PUNCTUATION, 3754 HIRAGANA, 3755 KATAKANA, 3756 BOPOMOFO, 3757 HANGUL_COMPATIBILITY_JAMO, 3758 KANBUN, 3759 BOPOMOFO_EXTENDED, 3760 CJK_STROKES, 3761 KATAKANA_PHONETIC_EXTENSIONS, 3762 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3763 CJK_COMPATIBILITY, 3764 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3765 YIJING_HEXAGRAM_SYMBOLS, 3766 CJK_UNIFIED_IDEOGRAPHS, 3767 YI_SYLLABLES, 3768 YI_RADICALS, 3769 LISU, 3770 VAI, 3771 CYRILLIC_EXTENDED_B, 3772 BAMUM, 3773 MODIFIER_TONE_LETTERS, 3774 LATIN_EXTENDED_D, 3775 SYLOTI_NAGRI, 3776 COMMON_INDIC_NUMBER_FORMS, 3777 PHAGS_PA, 3778 SAURASHTRA, 3779 DEVANAGARI_EXTENDED, 3780 KAYAH_LI, 3781 REJANG, 3782 HANGUL_JAMO_EXTENDED_A, 3783 JAVANESE, 3784 MYANMAR_EXTENDED_B, 3785 CHAM, 3786 MYANMAR_EXTENDED_A, 3787 TAI_VIET, 3788 MEETEI_MAYEK_EXTENSIONS, 3789 ETHIOPIC_EXTENDED_A, 3790 LATIN_EXTENDED_E, 3791 CHEROKEE_SUPPLEMENT, 3792 MEETEI_MAYEK, 3793 HANGUL_SYLLABLES, 3794 HANGUL_JAMO_EXTENDED_B, 3795 HIGH_SURROGATES, 3796 HIGH_PRIVATE_USE_SURROGATES, 3797 LOW_SURROGATES, 3798 PRIVATE_USE_AREA, 3799 CJK_COMPATIBILITY_IDEOGRAPHS, 3800 ALPHABETIC_PRESENTATION_FORMS, 3801 ARABIC_PRESENTATION_FORMS_A, 3802 VARIATION_SELECTORS, 3803 VERTICAL_FORMS, 3804 COMBINING_HALF_MARKS, 3805 CJK_COMPATIBILITY_FORMS, 3806 SMALL_FORM_VARIANTS, 3807 ARABIC_PRESENTATION_FORMS_B, 3808 HALFWIDTH_AND_FULLWIDTH_FORMS, 3809 SPECIALS, 3810 LINEAR_B_SYLLABARY, 3811 LINEAR_B_IDEOGRAMS, 3812 AEGEAN_NUMBERS, 3813 ANCIENT_GREEK_NUMBERS, 3814 ANCIENT_SYMBOLS, 3815 PHAISTOS_DISC, 3816 null, 3817 LYCIAN, 3818 CARIAN, 3819 COPTIC_EPACT_NUMBERS, 3820 OLD_ITALIC, 3821 GOTHIC, 3822 OLD_PERMIC, 3823 UGARITIC, 3824 OLD_PERSIAN, 3825 null, 3826 DESERET, 3827 SHAVIAN, 3828 OSMANYA, 3829 OSAGE, 3830 ELBASAN, 3831 CAUCASIAN_ALBANIAN, 3832 null, 3833 LINEAR_A, 3834 null, 3835 CYPRIOT_SYLLABARY, 3836 IMPERIAL_ARAMAIC, 3837 PALMYRENE, 3838 NABATAEAN, 3839 null, 3840 HATRAN, 3841 PHOENICIAN, 3842 LYDIAN, 3843 null, 3844 MEROITIC_HIEROGLYPHS, 3845 MEROITIC_CURSIVE, 3846 KHAROSHTHI, 3847 OLD_SOUTH_ARABIAN, 3848 OLD_NORTH_ARABIAN, 3849 null, 3850 MANICHAEAN, 3851 AVESTAN, 3852 INSCRIPTIONAL_PARTHIAN, 3853 INSCRIPTIONAL_PAHLAVI, 3854 PSALTER_PAHLAVI, 3855 null, 3856 OLD_TURKIC, 3857 null, 3858 OLD_HUNGARIAN, 3859 HANIFI_ROHINGYA, 3860 null, 3861 RUMI_NUMERAL_SYMBOLS, 3862 null, 3863 OLD_SOGDIAN, 3864 SOGDIAN, 3865 null, 3866 ELYMAIC, 3867 BRAHMI, 3868 KAITHI, 3869 SORA_SOMPENG, 3870 CHAKMA, 3871 MAHAJANI, 3872 SHARADA, 3873 SINHALA_ARCHAIC_NUMBERS, 3874 KHOJKI, 3875 null, 3876 MULTANI, 3877 KHUDAWADI, 3878 GRANTHA, 3879 null, 3880 NEWA, 3881 TIRHUTA, 3882 null, 3883 SIDDHAM, 3884 MODI, 3885 MONGOLIAN_SUPPLEMENT, 3886 TAKRI, 3887 null, 3888 AHOM, 3889 null, 3890 DOGRA, 3891 null, 3892 WARANG_CITI, 3893 null, 3894 NANDINAGARI, 3895 ZANABAZAR_SQUARE, 3896 SOYOMBO, 3897 null, 3898 PAU_CIN_HAU, 3899 null, 3900 BHAIKSUKI, 3901 MARCHEN, 3902 null, 3903 MASARAM_GONDI, 3904 GUNJALA_GONDI, 3905 null, 3906 MAKASAR, 3907 null, 3908 TAMIL_SUPPLEMENT, 3909 CUNEIFORM, 3910 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3911 EARLY_DYNASTIC_CUNEIFORM, 3912 null, 3913 EGYPTIAN_HIEROGLYPHS, 3914 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 3915 null, 3916 ANATOLIAN_HIEROGLYPHS, 3917 null, 3918 BAMUM_SUPPLEMENT, 3919 MRO, 3920 null, 3921 BASSA_VAH, 3922 PAHAWH_HMONG, 3923 null, 3924 MEDEFAIDRIN, 3925 null, 3926 MIAO, 3927 null, 3928 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 3929 TANGUT, 3930 TANGUT_COMPONENTS, 3931 null, 3932 KANA_SUPPLEMENT, 3933 KANA_EXTENDED_A, 3934 SMALL_KANA_EXTENSION, 3935 NUSHU, 3936 null, 3937 DUPLOYAN, 3938 SHORTHAND_FORMAT_CONTROLS, 3939 null, 3940 BYZANTINE_MUSICAL_SYMBOLS, 3941 MUSICAL_SYMBOLS, 3942 ANCIENT_GREEK_MUSICAL_NOTATION, 3943 null, 3944 MAYAN_NUMERALS, 3945 TAI_XUAN_JING_SYMBOLS, 3946 COUNTING_ROD_NUMERALS, 3947 null, 3948 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3949 SUTTON_SIGNWRITING, 3950 null, 3951 GLAGOLITIC_SUPPLEMENT, 3952 null, 3953 NYIAKENG_PUACHUE_HMONG, 3954 null, 3955 WANCHO, 3956 null, 3957 MENDE_KIKAKUI, 3958 null, 3959 ADLAM, 3960 null, 3961 INDIC_SIYAQ_NUMBERS, 3962 null, 3963 OTTOMAN_SIYAQ_NUMBERS, 3964 null, 3965 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3966 null, 3967 MAHJONG_TILES, 3968 DOMINO_TILES, 3969 PLAYING_CARDS, 3970 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3971 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3972 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3973 EMOTICONS, 3974 ORNAMENTAL_DINGBATS, 3975 TRANSPORT_AND_MAP_SYMBOLS, 3976 ALCHEMICAL_SYMBOLS, 3977 GEOMETRIC_SHAPES_EXTENDED, 3978 SUPPLEMENTAL_ARROWS_C, 3979 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 3980 CHESS_SYMBOLS, 3981 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 3982 null, 3983 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3984 null, 3985 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3986 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3987 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 3988 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 3989 null, 3990 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3991 null, 3992 TAGS, 3993 null, 3994 VARIATION_SELECTORS_SUPPLEMENT, 3995 null, 3996 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3997 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 3998 }; 3999 4000 4001 /** 4002 * Returns the object representing the Unicode block containing the 4003 * given character, or {@code null} if the character is not a 4004 * member of a defined block. 4005 * 4006 * <p><b>Note:</b> This method cannot handle 4007 * <a href="Character.html#supplementary"> supplementary 4008 * characters</a>. To support all Unicode characters, including 4009 * supplementary characters, use the {@link #of(int)} method. 4010 * 4011 * @param c The character in question 4012 * @return The {@code UnicodeBlock} instance representing the 4013 * Unicode block of which this character is a member, or 4014 * {@code null} if the character is not a member of any 4015 * Unicode block 4016 */ 4017 public static UnicodeBlock of(char c) { 4018 return of((int)c); 4019 } 4020 4021 /** 4022 * Returns the object representing the Unicode block 4023 * containing the given character (Unicode code point), or 4024 * {@code null} if the character is not a member of a 4025 * defined block. 4026 * 4027 * @param codePoint the character (Unicode code point) in question. 4028 * @return The {@code UnicodeBlock} instance representing the 4029 * Unicode block of which this character is a member, or 4030 * {@code null} if the character is not a member of any 4031 * Unicode block 4032 * @throws IllegalArgumentException if the specified 4033 * {@code codePoint} is an invalid Unicode code point. 4034 * @see Character#isValidCodePoint(int) 4035 * @since 1.5 4036 */ 4037 public static UnicodeBlock of(int codePoint) { 4038 if (!isValidCodePoint(codePoint)) { 4039 throw new IllegalArgumentException( 4040 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4041 } 4042 4043 int top, bottom, current; 4044 bottom = 0; 4045 top = blockStarts.length; 4046 current = top/2; 4047 4048 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4049 while (top - bottom > 1) { 4050 if (codePoint >= blockStarts[current]) { 4051 bottom = current; 4052 } else { 4053 top = current; 4054 } 4055 current = (top + bottom) / 2; 4056 } 4057 return blocks[current]; 4058 } 4059 4060 /** 4061 * Returns the UnicodeBlock with the given name. Block 4062 * names are determined by The Unicode Standard. The file 4063 * {@code Blocks-<version>.txt} defines blocks for a particular 4064 * version of the standard. The {@link Character} class specifies 4065 * the version of the standard that it supports. 4066 * <p> 4067 * This method accepts block names in the following forms: 4068 * <ol> 4069 * <li> Canonical block names as defined by the Unicode Standard. 4070 * For example, the standard defines a "Basic Latin" block. Therefore, this 4071 * method accepts "Basic Latin" as a valid block name. The documentation of 4072 * each UnicodeBlock provides the canonical name. 4073 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4074 * is a valid block name for the "Basic Latin" block. 4075 * <li>The text representation of each constant UnicodeBlock identifier. 4076 * For example, this method will return the {@link #BASIC_LATIN} block if 4077 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4078 * hyphens in the canonical name with underscores. 4079 * </ol> 4080 * Finally, character case is ignored for all of the valid block name forms. 4081 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4082 * The en_US locale's case mapping rules are used to provide case-insensitive 4083 * string comparisons for block name validation. 4084 * <p> 4085 * If the Unicode Standard changes block names, both the previous and 4086 * current names will be accepted. 4087 * 4088 * @param blockName A {@code UnicodeBlock} name. 4089 * @return The {@code UnicodeBlock} instance identified 4090 * by {@code blockName} 4091 * @throws IllegalArgumentException if {@code blockName} is an 4092 * invalid name 4093 * @throws NullPointerException if {@code blockName} is null 4094 * @since 1.5 4095 */ 4096 public static final UnicodeBlock forName(String blockName) { 4097 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4098 if (block == null) { 4099 throw new IllegalArgumentException("Not a valid block name: " 4100 + blockName); 4101 } 4102 return block; 4103 } 4104 } 4105 4106 4107 /** 4108 * A family of character subsets representing the character scripts 4109 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4110 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4111 * character is assigned to a single Unicode script, either a specific 4112 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4113 * one of the following three special values, 4114 * {@link Character.UnicodeScript#INHERITED Inherited}, 4115 * {@link Character.UnicodeScript#COMMON Common} or 4116 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4117 * 4118 * @since 1.7 4119 */ 4120 public static enum UnicodeScript { 4121 /** 4122 * Unicode script "Common". 4123 */ 4124 COMMON, 4125 4126 /** 4127 * Unicode script "Latin". 4128 */ 4129 LATIN, 4130 4131 /** 4132 * Unicode script "Greek". 4133 */ 4134 GREEK, 4135 4136 /** 4137 * Unicode script "Cyrillic". 4138 */ 4139 CYRILLIC, 4140 4141 /** 4142 * Unicode script "Armenian". 4143 */ 4144 ARMENIAN, 4145 4146 /** 4147 * Unicode script "Hebrew". 4148 */ 4149 HEBREW, 4150 4151 /** 4152 * Unicode script "Arabic". 4153 */ 4154 ARABIC, 4155 4156 /** 4157 * Unicode script "Syriac". 4158 */ 4159 SYRIAC, 4160 4161 /** 4162 * Unicode script "Thaana". 4163 */ 4164 THAANA, 4165 4166 /** 4167 * Unicode script "Devanagari". 4168 */ 4169 DEVANAGARI, 4170 4171 /** 4172 * Unicode script "Bengali". 4173 */ 4174 BENGALI, 4175 4176 /** 4177 * Unicode script "Gurmukhi". 4178 */ 4179 GURMUKHI, 4180 4181 /** 4182 * Unicode script "Gujarati". 4183 */ 4184 GUJARATI, 4185 4186 /** 4187 * Unicode script "Oriya". 4188 */ 4189 ORIYA, 4190 4191 /** 4192 * Unicode script "Tamil". 4193 */ 4194 TAMIL, 4195 4196 /** 4197 * Unicode script "Telugu". 4198 */ 4199 TELUGU, 4200 4201 /** 4202 * Unicode script "Kannada". 4203 */ 4204 KANNADA, 4205 4206 /** 4207 * Unicode script "Malayalam". 4208 */ 4209 MALAYALAM, 4210 4211 /** 4212 * Unicode script "Sinhala". 4213 */ 4214 SINHALA, 4215 4216 /** 4217 * Unicode script "Thai". 4218 */ 4219 THAI, 4220 4221 /** 4222 * Unicode script "Lao". 4223 */ 4224 LAO, 4225 4226 /** 4227 * Unicode script "Tibetan". 4228 */ 4229 TIBETAN, 4230 4231 /** 4232 * Unicode script "Myanmar". 4233 */ 4234 MYANMAR, 4235 4236 /** 4237 * Unicode script "Georgian". 4238 */ 4239 GEORGIAN, 4240 4241 /** 4242 * Unicode script "Hangul". 4243 */ 4244 HANGUL, 4245 4246 /** 4247 * Unicode script "Ethiopic". 4248 */ 4249 ETHIOPIC, 4250 4251 /** 4252 * Unicode script "Cherokee". 4253 */ 4254 CHEROKEE, 4255 4256 /** 4257 * Unicode script "Canadian_Aboriginal". 4258 */ 4259 CANADIAN_ABORIGINAL, 4260 4261 /** 4262 * Unicode script "Ogham". 4263 */ 4264 OGHAM, 4265 4266 /** 4267 * Unicode script "Runic". 4268 */ 4269 RUNIC, 4270 4271 /** 4272 * Unicode script "Khmer". 4273 */ 4274 KHMER, 4275 4276 /** 4277 * Unicode script "Mongolian". 4278 */ 4279 MONGOLIAN, 4280 4281 /** 4282 * Unicode script "Hiragana". 4283 */ 4284 HIRAGANA, 4285 4286 /** 4287 * Unicode script "Katakana". 4288 */ 4289 KATAKANA, 4290 4291 /** 4292 * Unicode script "Bopomofo". 4293 */ 4294 BOPOMOFO, 4295 4296 /** 4297 * Unicode script "Han". 4298 */ 4299 HAN, 4300 4301 /** 4302 * Unicode script "Yi". 4303 */ 4304 YI, 4305 4306 /** 4307 * Unicode script "Old_Italic". 4308 */ 4309 OLD_ITALIC, 4310 4311 /** 4312 * Unicode script "Gothic". 4313 */ 4314 GOTHIC, 4315 4316 /** 4317 * Unicode script "Deseret". 4318 */ 4319 DESERET, 4320 4321 /** 4322 * Unicode script "Inherited". 4323 */ 4324 INHERITED, 4325 4326 /** 4327 * Unicode script "Tagalog". 4328 */ 4329 TAGALOG, 4330 4331 /** 4332 * Unicode script "Hanunoo". 4333 */ 4334 HANUNOO, 4335 4336 /** 4337 * Unicode script "Buhid". 4338 */ 4339 BUHID, 4340 4341 /** 4342 * Unicode script "Tagbanwa". 4343 */ 4344 TAGBANWA, 4345 4346 /** 4347 * Unicode script "Limbu". 4348 */ 4349 LIMBU, 4350 4351 /** 4352 * Unicode script "Tai_Le". 4353 */ 4354 TAI_LE, 4355 4356 /** 4357 * Unicode script "Linear_B". 4358 */ 4359 LINEAR_B, 4360 4361 /** 4362 * Unicode script "Ugaritic". 4363 */ 4364 UGARITIC, 4365 4366 /** 4367 * Unicode script "Shavian". 4368 */ 4369 SHAVIAN, 4370 4371 /** 4372 * Unicode script "Osmanya". 4373 */ 4374 OSMANYA, 4375 4376 /** 4377 * Unicode script "Cypriot". 4378 */ 4379 CYPRIOT, 4380 4381 /** 4382 * Unicode script "Braille". 4383 */ 4384 BRAILLE, 4385 4386 /** 4387 * Unicode script "Buginese". 4388 */ 4389 BUGINESE, 4390 4391 /** 4392 * Unicode script "Coptic". 4393 */ 4394 COPTIC, 4395 4396 /** 4397 * Unicode script "New_Tai_Lue". 4398 */ 4399 NEW_TAI_LUE, 4400 4401 /** 4402 * Unicode script "Glagolitic". 4403 */ 4404 GLAGOLITIC, 4405 4406 /** 4407 * Unicode script "Tifinagh". 4408 */ 4409 TIFINAGH, 4410 4411 /** 4412 * Unicode script "Syloti_Nagri". 4413 */ 4414 SYLOTI_NAGRI, 4415 4416 /** 4417 * Unicode script "Old_Persian". 4418 */ 4419 OLD_PERSIAN, 4420 4421 /** 4422 * Unicode script "Kharoshthi". 4423 */ 4424 KHAROSHTHI, 4425 4426 /** 4427 * Unicode script "Balinese". 4428 */ 4429 BALINESE, 4430 4431 /** 4432 * Unicode script "Cuneiform". 4433 */ 4434 CUNEIFORM, 4435 4436 /** 4437 * Unicode script "Phoenician". 4438 */ 4439 PHOENICIAN, 4440 4441 /** 4442 * Unicode script "Phags_Pa". 4443 */ 4444 PHAGS_PA, 4445 4446 /** 4447 * Unicode script "Nko". 4448 */ 4449 NKO, 4450 4451 /** 4452 * Unicode script "Sundanese". 4453 */ 4454 SUNDANESE, 4455 4456 /** 4457 * Unicode script "Batak". 4458 */ 4459 BATAK, 4460 4461 /** 4462 * Unicode script "Lepcha". 4463 */ 4464 LEPCHA, 4465 4466 /** 4467 * Unicode script "Ol_Chiki". 4468 */ 4469 OL_CHIKI, 4470 4471 /** 4472 * Unicode script "Vai". 4473 */ 4474 VAI, 4475 4476 /** 4477 * Unicode script "Saurashtra". 4478 */ 4479 SAURASHTRA, 4480 4481 /** 4482 * Unicode script "Kayah_Li". 4483 */ 4484 KAYAH_LI, 4485 4486 /** 4487 * Unicode script "Rejang". 4488 */ 4489 REJANG, 4490 4491 /** 4492 * Unicode script "Lycian". 4493 */ 4494 LYCIAN, 4495 4496 /** 4497 * Unicode script "Carian". 4498 */ 4499 CARIAN, 4500 4501 /** 4502 * Unicode script "Lydian". 4503 */ 4504 LYDIAN, 4505 4506 /** 4507 * Unicode script "Cham". 4508 */ 4509 CHAM, 4510 4511 /** 4512 * Unicode script "Tai_Tham". 4513 */ 4514 TAI_THAM, 4515 4516 /** 4517 * Unicode script "Tai_Viet". 4518 */ 4519 TAI_VIET, 4520 4521 /** 4522 * Unicode script "Avestan". 4523 */ 4524 AVESTAN, 4525 4526 /** 4527 * Unicode script "Egyptian_Hieroglyphs". 4528 */ 4529 EGYPTIAN_HIEROGLYPHS, 4530 4531 /** 4532 * Unicode script "Samaritan". 4533 */ 4534 SAMARITAN, 4535 4536 /** 4537 * Unicode script "Mandaic". 4538 */ 4539 MANDAIC, 4540 4541 /** 4542 * Unicode script "Lisu". 4543 */ 4544 LISU, 4545 4546 /** 4547 * Unicode script "Bamum". 4548 */ 4549 BAMUM, 4550 4551 /** 4552 * Unicode script "Javanese". 4553 */ 4554 JAVANESE, 4555 4556 /** 4557 * Unicode script "Meetei_Mayek". 4558 */ 4559 MEETEI_MAYEK, 4560 4561 /** 4562 * Unicode script "Imperial_Aramaic". 4563 */ 4564 IMPERIAL_ARAMAIC, 4565 4566 /** 4567 * Unicode script "Old_South_Arabian". 4568 */ 4569 OLD_SOUTH_ARABIAN, 4570 4571 /** 4572 * Unicode script "Inscriptional_Parthian". 4573 */ 4574 INSCRIPTIONAL_PARTHIAN, 4575 4576 /** 4577 * Unicode script "Inscriptional_Pahlavi". 4578 */ 4579 INSCRIPTIONAL_PAHLAVI, 4580 4581 /** 4582 * Unicode script "Old_Turkic". 4583 */ 4584 OLD_TURKIC, 4585 4586 /** 4587 * Unicode script "Brahmi". 4588 */ 4589 BRAHMI, 4590 4591 /** 4592 * Unicode script "Kaithi". 4593 */ 4594 KAITHI, 4595 4596 /** 4597 * Unicode script "Meroitic Hieroglyphs". 4598 * @since 1.8 4599 */ 4600 MEROITIC_HIEROGLYPHS, 4601 4602 /** 4603 * Unicode script "Meroitic Cursive". 4604 * @since 1.8 4605 */ 4606 MEROITIC_CURSIVE, 4607 4608 /** 4609 * Unicode script "Sora Sompeng". 4610 * @since 1.8 4611 */ 4612 SORA_SOMPENG, 4613 4614 /** 4615 * Unicode script "Chakma". 4616 * @since 1.8 4617 */ 4618 CHAKMA, 4619 4620 /** 4621 * Unicode script "Sharada". 4622 * @since 1.8 4623 */ 4624 SHARADA, 4625 4626 /** 4627 * Unicode script "Takri". 4628 * @since 1.8 4629 */ 4630 TAKRI, 4631 4632 /** 4633 * Unicode script "Miao". 4634 * @since 1.8 4635 */ 4636 MIAO, 4637 4638 /** 4639 * Unicode script "Caucasian Albanian". 4640 * @since 9 4641 */ 4642 CAUCASIAN_ALBANIAN, 4643 4644 /** 4645 * Unicode script "Bassa Vah". 4646 * @since 9 4647 */ 4648 BASSA_VAH, 4649 4650 /** 4651 * Unicode script "Duployan". 4652 * @since 9 4653 */ 4654 DUPLOYAN, 4655 4656 /** 4657 * Unicode script "Elbasan". 4658 * @since 9 4659 */ 4660 ELBASAN, 4661 4662 /** 4663 * Unicode script "Grantha". 4664 * @since 9 4665 */ 4666 GRANTHA, 4667 4668 /** 4669 * Unicode script "Pahawh Hmong". 4670 * @since 9 4671 */ 4672 PAHAWH_HMONG, 4673 4674 /** 4675 * Unicode script "Khojki". 4676 * @since 9 4677 */ 4678 KHOJKI, 4679 4680 /** 4681 * Unicode script "Linear A". 4682 * @since 9 4683 */ 4684 LINEAR_A, 4685 4686 /** 4687 * Unicode script "Mahajani". 4688 * @since 9 4689 */ 4690 MAHAJANI, 4691 4692 /** 4693 * Unicode script "Manichaean". 4694 * @since 9 4695 */ 4696 MANICHAEAN, 4697 4698 /** 4699 * Unicode script "Mende Kikakui". 4700 * @since 9 4701 */ 4702 MENDE_KIKAKUI, 4703 4704 /** 4705 * Unicode script "Modi". 4706 * @since 9 4707 */ 4708 MODI, 4709 4710 /** 4711 * Unicode script "Mro". 4712 * @since 9 4713 */ 4714 MRO, 4715 4716 /** 4717 * Unicode script "Old North Arabian". 4718 * @since 9 4719 */ 4720 OLD_NORTH_ARABIAN, 4721 4722 /** 4723 * Unicode script "Nabataean". 4724 * @since 9 4725 */ 4726 NABATAEAN, 4727 4728 /** 4729 * Unicode script "Palmyrene". 4730 * @since 9 4731 */ 4732 PALMYRENE, 4733 4734 /** 4735 * Unicode script "Pau Cin Hau". 4736 * @since 9 4737 */ 4738 PAU_CIN_HAU, 4739 4740 /** 4741 * Unicode script "Old Permic". 4742 * @since 9 4743 */ 4744 OLD_PERMIC, 4745 4746 /** 4747 * Unicode script "Psalter Pahlavi". 4748 * @since 9 4749 */ 4750 PSALTER_PAHLAVI, 4751 4752 /** 4753 * Unicode script "Siddham". 4754 * @since 9 4755 */ 4756 SIDDHAM, 4757 4758 /** 4759 * Unicode script "Khudawadi". 4760 * @since 9 4761 */ 4762 KHUDAWADI, 4763 4764 /** 4765 * Unicode script "Tirhuta". 4766 * @since 9 4767 */ 4768 TIRHUTA, 4769 4770 /** 4771 * Unicode script "Warang Citi". 4772 * @since 9 4773 */ 4774 WARANG_CITI, 4775 4776 /** 4777 * Unicode script "Ahom". 4778 * @since 9 4779 */ 4780 AHOM, 4781 4782 /** 4783 * Unicode script "Anatolian Hieroglyphs". 4784 * @since 9 4785 */ 4786 ANATOLIAN_HIEROGLYPHS, 4787 4788 /** 4789 * Unicode script "Hatran". 4790 * @since 9 4791 */ 4792 HATRAN, 4793 4794 /** 4795 * Unicode script "Multani". 4796 * @since 9 4797 */ 4798 MULTANI, 4799 4800 /** 4801 * Unicode script "Old Hungarian". 4802 * @since 9 4803 */ 4804 OLD_HUNGARIAN, 4805 4806 /** 4807 * Unicode script "SignWriting". 4808 * @since 9 4809 */ 4810 SIGNWRITING, 4811 4812 /** 4813 * Unicode script "Adlam". 4814 * @since 11 4815 */ 4816 ADLAM, 4817 4818 /** 4819 * Unicode script "Bhaiksuki". 4820 * @since 11 4821 */ 4822 BHAIKSUKI, 4823 4824 /** 4825 * Unicode script "Marchen". 4826 * @since 11 4827 */ 4828 MARCHEN, 4829 4830 /** 4831 * Unicode script "Newa". 4832 * @since 11 4833 */ 4834 NEWA, 4835 4836 /** 4837 * Unicode script "Osage". 4838 * @since 11 4839 */ 4840 OSAGE, 4841 4842 /** 4843 * Unicode script "Tangut". 4844 * @since 11 4845 */ 4846 TANGUT, 4847 4848 /** 4849 * Unicode script "Masaram Gondi". 4850 * @since 11 4851 */ 4852 MASARAM_GONDI, 4853 4854 /** 4855 * Unicode script "Nushu". 4856 * @since 11 4857 */ 4858 NUSHU, 4859 4860 /** 4861 * Unicode script "Soyombo". 4862 * @since 11 4863 */ 4864 SOYOMBO, 4865 4866 /** 4867 * Unicode script "Zanabazar Square". 4868 * @since 11 4869 */ 4870 ZANABAZAR_SQUARE, 4871 4872 /** 4873 * Unicode script "Hanifi Rohingya". 4874 * @since 12 4875 */ 4876 HANIFI_ROHINGYA, 4877 4878 /** 4879 * Unicode script "Old Sogdian". 4880 * @since 12 4881 */ 4882 OLD_SOGDIAN, 4883 4884 /** 4885 * Unicode script "Sogdian". 4886 * @since 12 4887 */ 4888 SOGDIAN, 4889 4890 /** 4891 * Unicode script "Dogra". 4892 * @since 12 4893 */ 4894 DOGRA, 4895 4896 /** 4897 * Unicode script "Gunjala Gondi". 4898 * @since 12 4899 */ 4900 GUNJALA_GONDI, 4901 4902 /** 4903 * Unicode script "Makasar". 4904 * @since 12 4905 */ 4906 MAKASAR, 4907 4908 /** 4909 * Unicode script "Medefaidrin". 4910 * @since 12 4911 */ 4912 MEDEFAIDRIN, 4913 4914 /** 4915 * Unicode script "Elymaic". 4916 * @since 13 4917 */ 4918 ELYMAIC, 4919 4920 /** 4921 * Unicode script "Nandinagari". 4922 * @since 13 4923 */ 4924 NANDINAGARI, 4925 4926 /** 4927 * Unicode script "Nyiakeng Puachue Hmong". 4928 * @since 13 4929 */ 4930 NYIAKENG_PUACHUE_HMONG, 4931 4932 /** 4933 * Unicode script "Wancho". 4934 * @since 13 4935 */ 4936 WANCHO, 4937 4938 /** 4939 * Unicode script "Unknown". 4940 */ 4941 UNKNOWN; 4942 4943 private static final int[] scriptStarts = { 4944 0x0000, // 0000..0040; COMMON 4945 0x0041, // 0041..005A; LATIN 4946 0x005B, // 005B..0060; COMMON 4947 0x0061, // 0061..007A; LATIN 4948 0x007B, // 007B..00A9; COMMON 4949 0x00AA, // 00AA ; LATIN 4950 0x00AB, // 00AB..00B9; COMMON 4951 0x00BA, // 00BA ; LATIN 4952 0x00BB, // 00BB..00BF; COMMON 4953 0x00C0, // 00C0..00D6; LATIN 4954 0x00D7, // 00D7 ; COMMON 4955 0x00D8, // 00D8..00F6; LATIN 4956 0x00F7, // 00F7 ; COMMON 4957 0x00F8, // 00F8..02B8; LATIN 4958 0x02B9, // 02B9..02DF; COMMON 4959 0x02E0, // 02E0..02E4; LATIN 4960 0x02E5, // 02E5..02E9; COMMON 4961 0x02EA, // 02EA..02EB; BOPOMOFO 4962 0x02EC, // 02EC..02FF; COMMON 4963 0x0300, // 0300..036F; INHERITED 4964 0x0370, // 0370..0373; GREEK 4965 0x0374, // 0374 ; COMMON 4966 0x0375, // 0375..0377; GREEK 4967 0x0378, // 0378..0379; UNKNOWN 4968 0x037A, // 037A..037D; GREEK 4969 0x037E, // 037E ; COMMON 4970 0x037F, // 037F ; GREEK 4971 0x0380, // 0380..0383; UNKNOWN 4972 0x0384, // 0384 ; GREEK 4973 0x0385, // 0385 ; COMMON 4974 0x0386, // 0386 ; GREEK 4975 0x0387, // 0387 ; COMMON 4976 0x0388, // 0388..038A; GREEK 4977 0x038B, // 038B ; UNKNOWN 4978 0x038C, // 038C ; GREEK 4979 0x038D, // 038D ; UNKNOWN 4980 0x038E, // 038E..03A1; GREEK 4981 0x03A2, // 03A2 ; UNKNOWN 4982 0x03A3, // 03A3..03E1; GREEK 4983 0x03E2, // 03E2..03EF; COPTIC 4984 0x03F0, // 03F0..03FF; GREEK 4985 0x0400, // 0400..0484; CYRILLIC 4986 0x0485, // 0485..0486; INHERITED 4987 0x0487, // 0487..052F; CYRILLIC 4988 0x0530, // 0530 ; UNKNOWN 4989 0x0531, // 0531..0556; ARMENIAN 4990 0x0557, // 0557..0558; UNKNOWN 4991 0x0559, // 0559..0588; ARMENIAN 4992 0x0589, // 0589 ; COMMON 4993 0x058A, // 058A ; ARMENIAN 4994 0x058B, // 058B..058C; UNKNOWN 4995 0x058D, // 058D..058F; ARMENIAN 4996 0x0590, // 0590 ; UNKNOWN 4997 0x0591, // 0591..05C7; HEBREW 4998 0x05C8, // 05C8..05CF; UNKNOWN 4999 0x05D0, // 05D0..05EA; HEBREW 5000 0x05EB, // 05EB..05EE; UNKNOWN 5001 0x05EF, // 05EF..05F4; HEBREW 5002 0x05F5, // 05F5..05FF; UNKNOWN 5003 0x0600, // 0600..0604; ARABIC 5004 0x0605, // 0605 ; COMMON 5005 0x0606, // 0606..060B; ARABIC 5006 0x060C, // 060C ; COMMON 5007 0x060D, // 060D..061A; ARABIC 5008 0x061B, // 061B ; COMMON 5009 0x061C, // 061C ; ARABIC 5010 0x061D, // 061D ; UNKNOWN 5011 0x061E, // 061E ; ARABIC 5012 0x061F, // 061F ; COMMON 5013 0x0620, // 0620..063F; ARABIC 5014 0x0640, // 0640 ; COMMON 5015 0x0641, // 0641..064A; ARABIC 5016 0x064B, // 064B..0655; INHERITED 5017 0x0656, // 0656..066F; ARABIC 5018 0x0670, // 0670 ; INHERITED 5019 0x0671, // 0671..06DC; ARABIC 5020 0x06DD, // 06DD ; COMMON 5021 0x06DE, // 06DE..06FF; ARABIC 5022 0x0700, // 0700..070D; SYRIAC 5023 0x070E, // 070E ; UNKNOWN 5024 0x070F, // 070F..074A; SYRIAC 5025 0x074B, // 074B..074C; UNKNOWN 5026 0x074D, // 074D..074F; SYRIAC 5027 0x0750, // 0750..077F; ARABIC 5028 0x0780, // 0780..07B1; THAANA 5029 0x07B2, // 07B2..07BF; UNKNOWN 5030 0x07C0, // 07C0..07FA; NKO 5031 0x07FB, // 07FB..07FC; UNKNOWN 5032 0x07FD, // 07FD..07FF; NKO 5033 0x0800, // 0800..082D; SAMARITAN 5034 0x082E, // 082E..082F; UNKNOWN 5035 0x0830, // 0830..083E; SAMARITAN 5036 0x083F, // 083F ; UNKNOWN 5037 0x0840, // 0840..085B; MANDAIC 5038 0x085C, // 085C..085D; UNKNOWN 5039 0x085E, // 085E ; MANDAIC 5040 0x085F, // 085F ; UNKNOWN 5041 0x0860, // 0860..086A; SYRIAC 5042 0x086B, // 086B..089F; UNKNOWN 5043 0x08A0, // 08A0..08B4; ARABIC 5044 0x08B5, // 08B5 ; UNKNOWN 5045 0x08B6, // 08B6..08BD; ARABIC 5046 0x08BE, // 08BE..08D2; UNKNOWN 5047 0x08D3, // 08D3..08E1; ARABIC 5048 0x08E2, // 08E2 ; COMMON 5049 0x08E3, // 08E3..08FF; ARABIC 5050 0x0900, // 0900..0950; DEVANAGARI 5051 0x0951, // 0951..0954; INHERITED 5052 0x0955, // 0955..0963; DEVANAGARI 5053 0x0964, // 0964..0965; COMMON 5054 0x0966, // 0966..097F; DEVANAGARI 5055 0x0980, // 0980..0983; BENGALI 5056 0x0984, // 0984 ; UNKNOWN 5057 0x0985, // 0985..098C; BENGALI 5058 0x098D, // 098D..098E; UNKNOWN 5059 0x098F, // 098F..0990; BENGALI 5060 0x0991, // 0991..0992; UNKNOWN 5061 0x0993, // 0993..09A8; BENGALI 5062 0x09A9, // 09A9 ; UNKNOWN 5063 0x09AA, // 09AA..09B0; BENGALI 5064 0x09B1, // 09B1 ; UNKNOWN 5065 0x09B2, // 09B2 ; BENGALI 5066 0x09B3, // 09B3..09B5; UNKNOWN 5067 0x09B6, // 09B6..09B9; BENGALI 5068 0x09BA, // 09BA..09BB; UNKNOWN 5069 0x09BC, // 09BC..09C4; BENGALI 5070 0x09C5, // 09C5..09C6; UNKNOWN 5071 0x09C7, // 09C7..09C8; BENGALI 5072 0x09C9, // 09C9..09CA; UNKNOWN 5073 0x09CB, // 09CB..09CE; BENGALI 5074 0x09CF, // 09CF..09D6; UNKNOWN 5075 0x09D7, // 09D7 ; BENGALI 5076 0x09D8, // 09D8..09DB; UNKNOWN 5077 0x09DC, // 09DC..09DD; BENGALI 5078 0x09DE, // 09DE ; UNKNOWN 5079 0x09DF, // 09DF..09E3; BENGALI 5080 0x09E4, // 09E4..09E5; UNKNOWN 5081 0x09E6, // 09E6..09FE; BENGALI 5082 0x09FF, // 09FF..0A00; UNKNOWN 5083 0x0A01, // 0A01..0A03; GURMUKHI 5084 0x0A04, // 0A04 ; UNKNOWN 5085 0x0A05, // 0A05..0A0A; GURMUKHI 5086 0x0A0B, // 0A0B..0A0E; UNKNOWN 5087 0x0A0F, // 0A0F..0A10; GURMUKHI 5088 0x0A11, // 0A11..0A12; UNKNOWN 5089 0x0A13, // 0A13..0A28; GURMUKHI 5090 0x0A29, // 0A29 ; UNKNOWN 5091 0x0A2A, // 0A2A..0A30; GURMUKHI 5092 0x0A31, // 0A31 ; UNKNOWN 5093 0x0A32, // 0A32..0A33; GURMUKHI 5094 0x0A34, // 0A34 ; UNKNOWN 5095 0x0A35, // 0A35..0A36; GURMUKHI 5096 0x0A37, // 0A37 ; UNKNOWN 5097 0x0A38, // 0A38..0A39; GURMUKHI 5098 0x0A3A, // 0A3A..0A3B; UNKNOWN 5099 0x0A3C, // 0A3C ; GURMUKHI 5100 0x0A3D, // 0A3D ; UNKNOWN 5101 0x0A3E, // 0A3E..0A42; GURMUKHI 5102 0x0A43, // 0A43..0A46; UNKNOWN 5103 0x0A47, // 0A47..0A48; GURMUKHI 5104 0x0A49, // 0A49..0A4A; UNKNOWN 5105 0x0A4B, // 0A4B..0A4D; GURMUKHI 5106 0x0A4E, // 0A4E..0A50; UNKNOWN 5107 0x0A51, // 0A51 ; GURMUKHI 5108 0x0A52, // 0A52..0A58; UNKNOWN 5109 0x0A59, // 0A59..0A5C; GURMUKHI 5110 0x0A5D, // 0A5D ; UNKNOWN 5111 0x0A5E, // 0A5E ; GURMUKHI 5112 0x0A5F, // 0A5F..0A65; UNKNOWN 5113 0x0A66, // 0A66..0A76; GURMUKHI 5114 0x0A77, // 0A77..0A80; UNKNOWN 5115 0x0A81, // 0A81..0A83; GUJARATI 5116 0x0A84, // 0A84 ; UNKNOWN 5117 0x0A85, // 0A85..0A8D; GUJARATI 5118 0x0A8E, // 0A8E ; UNKNOWN 5119 0x0A8F, // 0A8F..0A91; GUJARATI 5120 0x0A92, // 0A92 ; UNKNOWN 5121 0x0A93, // 0A93..0AA8; GUJARATI 5122 0x0AA9, // 0AA9 ; UNKNOWN 5123 0x0AAA, // 0AAA..0AB0; GUJARATI 5124 0x0AB1, // 0AB1 ; UNKNOWN 5125 0x0AB2, // 0AB2..0AB3; GUJARATI 5126 0x0AB4, // 0AB4 ; UNKNOWN 5127 0x0AB5, // 0AB5..0AB9; GUJARATI 5128 0x0ABA, // 0ABA..0ABB; UNKNOWN 5129 0x0ABC, // 0ABC..0AC5; GUJARATI 5130 0x0AC6, // 0AC6 ; UNKNOWN 5131 0x0AC7, // 0AC7..0AC9; GUJARATI 5132 0x0ACA, // 0ACA ; UNKNOWN 5133 0x0ACB, // 0ACB..0ACD; GUJARATI 5134 0x0ACE, // 0ACE..0ACF; UNKNOWN 5135 0x0AD0, // 0AD0 ; GUJARATI 5136 0x0AD1, // 0AD1..0ADF; UNKNOWN 5137 0x0AE0, // 0AE0..0AE3; GUJARATI 5138 0x0AE4, // 0AE4..0AE5; UNKNOWN 5139 0x0AE6, // 0AE6..0AF1; GUJARATI 5140 0x0AF2, // 0AF2..0AF8; UNKNOWN 5141 0x0AF9, // 0AF9..0AFF; GUJARATI 5142 0x0B00, // 0B00 ; UNKNOWN 5143 0x0B01, // 0B01..0B03; ORIYA 5144 0x0B04, // 0B04 ; UNKNOWN 5145 0x0B05, // 0B05..0B0C; ORIYA 5146 0x0B0D, // 0B0D..0B0E; UNKNOWN 5147 0x0B0F, // 0B0F..0B10; ORIYA 5148 0x0B11, // 0B11..0B12; UNKNOWN 5149 0x0B13, // 0B13..0B28; ORIYA 5150 0x0B29, // 0B29 ; UNKNOWN 5151 0x0B2A, // 0B2A..0B30; ORIYA 5152 0x0B31, // 0B31 ; UNKNOWN 5153 0x0B32, // 0B32..0B33; ORIYA 5154 0x0B34, // 0B34 ; UNKNOWN 5155 0x0B35, // 0B35..0B39; ORIYA 5156 0x0B3A, // 0B3A..0B3B; UNKNOWN 5157 0x0B3C, // 0B3C..0B44; ORIYA 5158 0x0B45, // 0B45..0B46; UNKNOWN 5159 0x0B47, // 0B47..0B48; ORIYA 5160 0x0B49, // 0B49..0B4A; UNKNOWN 5161 0x0B4B, // 0B4B..0B4D; ORIYA 5162 0x0B4E, // 0B4E..0B55; UNKNOWN 5163 0x0B56, // 0B56..0B57; ORIYA 5164 0x0B58, // 0B58..0B5B; UNKNOWN 5165 0x0B5C, // 0B5C..0B5D; ORIYA 5166 0x0B5E, // 0B5E ; UNKNOWN 5167 0x0B5F, // 0B5F..0B63; ORIYA 5168 0x0B64, // 0B64..0B65; UNKNOWN 5169 0x0B66, // 0B66..0B77; ORIYA 5170 0x0B78, // 0B78..0B81; UNKNOWN 5171 0x0B82, // 0B82..0B83; TAMIL 5172 0x0B84, // 0B84 ; UNKNOWN 5173 0x0B85, // 0B85..0B8A; TAMIL 5174 0x0B8B, // 0B8B..0B8D; UNKNOWN 5175 0x0B8E, // 0B8E..0B90; TAMIL 5176 0x0B91, // 0B91 ; UNKNOWN 5177 0x0B92, // 0B92..0B95; TAMIL 5178 0x0B96, // 0B96..0B98; UNKNOWN 5179 0x0B99, // 0B99..0B9A; TAMIL 5180 0x0B9B, // 0B9B ; UNKNOWN 5181 0x0B9C, // 0B9C ; TAMIL 5182 0x0B9D, // 0B9D ; UNKNOWN 5183 0x0B9E, // 0B9E..0B9F; TAMIL 5184 0x0BA0, // 0BA0..0BA2; UNKNOWN 5185 0x0BA3, // 0BA3..0BA4; TAMIL 5186 0x0BA5, // 0BA5..0BA7; UNKNOWN 5187 0x0BA8, // 0BA8..0BAA; TAMIL 5188 0x0BAB, // 0BAB..0BAD; UNKNOWN 5189 0x0BAE, // 0BAE..0BB9; TAMIL 5190 0x0BBA, // 0BBA..0BBD; UNKNOWN 5191 0x0BBE, // 0BBE..0BC2; TAMIL 5192 0x0BC3, // 0BC3..0BC5; UNKNOWN 5193 0x0BC6, // 0BC6..0BC8; TAMIL 5194 0x0BC9, // 0BC9 ; UNKNOWN 5195 0x0BCA, // 0BCA..0BCD; TAMIL 5196 0x0BCE, // 0BCE..0BCF; UNKNOWN 5197 0x0BD0, // 0BD0 ; TAMIL 5198 0x0BD1, // 0BD1..0BD6; UNKNOWN 5199 0x0BD7, // 0BD7 ; TAMIL 5200 0x0BD8, // 0BD8..0BE5; UNKNOWN 5201 0x0BE6, // 0BE6..0BFA; TAMIL 5202 0x0BFB, // 0BFB..0BFF; UNKNOWN 5203 0x0C00, // 0C00..0C0C; TELUGU 5204 0x0C0D, // 0C0D ; UNKNOWN 5205 0x0C0E, // 0C0E..0C10; TELUGU 5206 0x0C11, // 0C11 ; UNKNOWN 5207 0x0C12, // 0C12..0C28; TELUGU 5208 0x0C29, // 0C29 ; UNKNOWN 5209 0x0C2A, // 0C2A..0C39; TELUGU 5210 0x0C3A, // 0C3A..0C3C; UNKNOWN 5211 0x0C3D, // 0C3D..0C44; TELUGU 5212 0x0C45, // 0C45 ; UNKNOWN 5213 0x0C46, // 0C46..0C48; TELUGU 5214 0x0C49, // 0C49 ; UNKNOWN 5215 0x0C4A, // 0C4A..0C4D; TELUGU 5216 0x0C4E, // 0C4E..0C54; UNKNOWN 5217 0x0C55, // 0C55..0C56; TELUGU 5218 0x0C57, // 0C57 ; UNKNOWN 5219 0x0C58, // 0C58..0C5A; TELUGU 5220 0x0C5B, // 0C5B..0C5F; UNKNOWN 5221 0x0C60, // 0C60..0C63; TELUGU 5222 0x0C64, // 0C64..0C65; UNKNOWN 5223 0x0C66, // 0C66..0C6F; TELUGU 5224 0x0C70, // 0C70..0C76; UNKNOWN 5225 0x0C77, // 0C77..0C7F; TELUGU 5226 0x0C80, // 0C80..0C8C; KANNADA 5227 0x0C8D, // 0C8D ; UNKNOWN 5228 0x0C8E, // 0C8E..0C90; KANNADA 5229 0x0C91, // 0C91 ; UNKNOWN 5230 0x0C92, // 0C92..0CA8; KANNADA 5231 0x0CA9, // 0CA9 ; UNKNOWN 5232 0x0CAA, // 0CAA..0CB3; KANNADA 5233 0x0CB4, // 0CB4 ; UNKNOWN 5234 0x0CB5, // 0CB5..0CB9; KANNADA 5235 0x0CBA, // 0CBA..0CBB; UNKNOWN 5236 0x0CBC, // 0CBC..0CC4; KANNADA 5237 0x0CC5, // 0CC5 ; UNKNOWN 5238 0x0CC6, // 0CC6..0CC8; KANNADA 5239 0x0CC9, // 0CC9 ; UNKNOWN 5240 0x0CCA, // 0CCA..0CCD; KANNADA 5241 0x0CCE, // 0CCE..0CD4; UNKNOWN 5242 0x0CD5, // 0CD5..0CD6; KANNADA 5243 0x0CD7, // 0CD7..0CDD; UNKNOWN 5244 0x0CDE, // 0CDE ; KANNADA 5245 0x0CDF, // 0CDF ; UNKNOWN 5246 0x0CE0, // 0CE0..0CE3; KANNADA 5247 0x0CE4, // 0CE4..0CE5; UNKNOWN 5248 0x0CE6, // 0CE6..0CEF; KANNADA 5249 0x0CF0, // 0CF0 ; UNKNOWN 5250 0x0CF1, // 0CF1..0CF2; KANNADA 5251 0x0CF3, // 0CF3..0CFF; UNKNOWN 5252 0x0D00, // 0D00..0D03; MALAYALAM 5253 0x0D04, // 0D04 ; UNKNOWN 5254 0x0D05, // 0D05..0D0C; MALAYALAM 5255 0x0D0D, // 0D0D ; UNKNOWN 5256 0x0D0E, // 0D0E..0D10; MALAYALAM 5257 0x0D11, // 0D11 ; UNKNOWN 5258 0x0D12, // 0D12..0D44; MALAYALAM 5259 0x0D45, // 0D45 ; UNKNOWN 5260 0x0D46, // 0D46..0D48; MALAYALAM 5261 0x0D49, // 0D49 ; UNKNOWN 5262 0x0D4A, // 0D4A..0D4F; MALAYALAM 5263 0x0D50, // 0D50..0D53; UNKNOWN 5264 0x0D54, // 0D54..0D63; MALAYALAM 5265 0x0D64, // 0D64..0D65; UNKNOWN 5266 0x0D66, // 0D66..0D7F; MALAYALAM 5267 0x0D80, // 0D80..0D81; UNKNOWN 5268 0x0D82, // 0D82..0D83; SINHALA 5269 0x0D84, // 0D84 ; UNKNOWN 5270 0x0D85, // 0D85..0D96; SINHALA 5271 0x0D97, // 0D97..0D99; UNKNOWN 5272 0x0D9A, // 0D9A..0DB1; SINHALA 5273 0x0DB2, // 0DB2 ; UNKNOWN 5274 0x0DB3, // 0DB3..0DBB; SINHALA 5275 0x0DBC, // 0DBC ; UNKNOWN 5276 0x0DBD, // 0DBD ; SINHALA 5277 0x0DBE, // 0DBE..0DBF; UNKNOWN 5278 0x0DC0, // 0DC0..0DC6; SINHALA 5279 0x0DC7, // 0DC7..0DC9; UNKNOWN 5280 0x0DCA, // 0DCA ; SINHALA 5281 0x0DCB, // 0DCB..0DCE; UNKNOWN 5282 0x0DCF, // 0DCF..0DD4; SINHALA 5283 0x0DD5, // 0DD5 ; UNKNOWN 5284 0x0DD6, // 0DD6 ; SINHALA 5285 0x0DD7, // 0DD7 ; UNKNOWN 5286 0x0DD8, // 0DD8..0DDF; SINHALA 5287 0x0DE0, // 0DE0..0DE5; UNKNOWN 5288 0x0DE6, // 0DE6..0DEF; SINHALA 5289 0x0DF0, // 0DF0..0DF1; UNKNOWN 5290 0x0DF2, // 0DF2..0DF4; SINHALA 5291 0x0DF5, // 0DF5..0E00; UNKNOWN 5292 0x0E01, // 0E01..0E3A; THAI 5293 0x0E3B, // 0E3B..0E3E; UNKNOWN 5294 0x0E3F, // 0E3F ; COMMON 5295 0x0E40, // 0E40..0E5B; THAI 5296 0x0E5C, // 0E5C..0E80; UNKNOWN 5297 0x0E81, // 0E81..0E82; LAO 5298 0x0E83, // 0E83 ; UNKNOWN 5299 0x0E84, // 0E84 ; LAO 5300 0x0E85, // 0E85 ; UNKNOWN 5301 0x0E86, // 0E86..0E8A; LAO 5302 0x0E8B, // 0E8B ; UNKNOWN 5303 0x0E8C, // 0E8C..0EA3; LAO 5304 0x0EA4, // 0EA4 ; UNKNOWN 5305 0x0EA5, // 0EA5 ; LAO 5306 0x0EA6, // 0EA6 ; UNKNOWN 5307 0x0EA7, // 0EA7..0EBD; LAO 5308 0x0EBE, // 0EBE..0EBF; UNKNOWN 5309 0x0EC0, // 0EC0..0EC4; LAO 5310 0x0EC5, // 0EC5 ; UNKNOWN 5311 0x0EC6, // 0EC6 ; LAO 5312 0x0EC7, // 0EC7 ; UNKNOWN 5313 0x0EC8, // 0EC8..0ECD; LAO 5314 0x0ECE, // 0ECE..0ECF; UNKNOWN 5315 0x0ED0, // 0ED0..0ED9; LAO 5316 0x0EDA, // 0EDA..0EDB; UNKNOWN 5317 0x0EDC, // 0EDC..0EDF; LAO 5318 0x0EE0, // 0EE0..0EFF; UNKNOWN 5319 0x0F00, // 0F00..0F47; TIBETAN 5320 0x0F48, // 0F48 ; UNKNOWN 5321 0x0F49, // 0F49..0F6C; TIBETAN 5322 0x0F6D, // 0F6D..0F70; UNKNOWN 5323 0x0F71, // 0F71..0F97; TIBETAN 5324 0x0F98, // 0F98 ; UNKNOWN 5325 0x0F99, // 0F99..0FBC; TIBETAN 5326 0x0FBD, // 0FBD ; UNKNOWN 5327 0x0FBE, // 0FBE..0FCC; TIBETAN 5328 0x0FCD, // 0FCD ; UNKNOWN 5329 0x0FCE, // 0FCE..0FD4; TIBETAN 5330 0x0FD5, // 0FD5..0FD8; COMMON 5331 0x0FD9, // 0FD9..0FDA; TIBETAN 5332 0x0FDB, // 0FDB..0FFF; UNKNOWN 5333 0x1000, // 1000..109F; MYANMAR 5334 0x10A0, // 10A0..10C5; GEORGIAN 5335 0x10C6, // 10C6 ; UNKNOWN 5336 0x10C7, // 10C7 ; GEORGIAN 5337 0x10C8, // 10C8..10CC; UNKNOWN 5338 0x10CD, // 10CD ; GEORGIAN 5339 0x10CE, // 10CE..10CF; UNKNOWN 5340 0x10D0, // 10D0..10FA; GEORGIAN 5341 0x10FB, // 10FB ; COMMON 5342 0x10FC, // 10FC..10FF; GEORGIAN 5343 0x1100, // 1100..11FF; HANGUL 5344 0x1200, // 1200..1248; ETHIOPIC 5345 0x1249, // 1249 ; UNKNOWN 5346 0x124A, // 124A..124D; ETHIOPIC 5347 0x124E, // 124E..124F; UNKNOWN 5348 0x1250, // 1250..1256; ETHIOPIC 5349 0x1257, // 1257 ; UNKNOWN 5350 0x1258, // 1258 ; ETHIOPIC 5351 0x1259, // 1259 ; UNKNOWN 5352 0x125A, // 125A..125D; ETHIOPIC 5353 0x125E, // 125E..125F; UNKNOWN 5354 0x1260, // 1260..1288; ETHIOPIC 5355 0x1289, // 1289 ; UNKNOWN 5356 0x128A, // 128A..128D; ETHIOPIC 5357 0x128E, // 128E..128F; UNKNOWN 5358 0x1290, // 1290..12B0; ETHIOPIC 5359 0x12B1, // 12B1 ; UNKNOWN 5360 0x12B2, // 12B2..12B5; ETHIOPIC 5361 0x12B6, // 12B6..12B7; UNKNOWN 5362 0x12B8, // 12B8..12BE; ETHIOPIC 5363 0x12BF, // 12BF ; UNKNOWN 5364 0x12C0, // 12C0 ; ETHIOPIC 5365 0x12C1, // 12C1 ; UNKNOWN 5366 0x12C2, // 12C2..12C5; ETHIOPIC 5367 0x12C6, // 12C6..12C7; UNKNOWN 5368 0x12C8, // 12C8..12D6; ETHIOPIC 5369 0x12D7, // 12D7 ; UNKNOWN 5370 0x12D8, // 12D8..1310; ETHIOPIC 5371 0x1311, // 1311 ; UNKNOWN 5372 0x1312, // 1312..1315; ETHIOPIC 5373 0x1316, // 1316..1317; UNKNOWN 5374 0x1318, // 1318..135A; ETHIOPIC 5375 0x135B, // 135B..135C; UNKNOWN 5376 0x135D, // 135D..137C; ETHIOPIC 5377 0x137D, // 137D..137F; UNKNOWN 5378 0x1380, // 1380..1399; ETHIOPIC 5379 0x139A, // 139A..139F; UNKNOWN 5380 0x13A0, // 13A0..13F5; CHEROKEE 5381 0x13F6, // 13F6..13F7; UNKNOWN 5382 0x13F8, // 13F8..13FD; CHEROKEE 5383 0x13FE, // 13FE..13FF; UNKNOWN 5384 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5385 0x1680, // 1680..169C; OGHAM 5386 0x169D, // 169D..169F; UNKNOWN 5387 0x16A0, // 16A0..16EA; RUNIC 5388 0x16EB, // 16EB..16ED; COMMON 5389 0x16EE, // 16EE..16F8; RUNIC 5390 0x16F9, // 16F9..16FF; UNKNOWN 5391 0x1700, // 1700..170C; TAGALOG 5392 0x170D, // 170D ; UNKNOWN 5393 0x170E, // 170E..1714; TAGALOG 5394 0x1715, // 1715..171F; UNKNOWN 5395 0x1720, // 1720..1734; HANUNOO 5396 0x1735, // 1735..1736; COMMON 5397 0x1737, // 1737..173F; UNKNOWN 5398 0x1740, // 1740..1753; BUHID 5399 0x1754, // 1754..175F; UNKNOWN 5400 0x1760, // 1760..176C; TAGBANWA 5401 0x176D, // 176D ; UNKNOWN 5402 0x176E, // 176E..1770; TAGBANWA 5403 0x1771, // 1771 ; UNKNOWN 5404 0x1772, // 1772..1773; TAGBANWA 5405 0x1774, // 1774..177F; UNKNOWN 5406 0x1780, // 1780..17DD; KHMER 5407 0x17DE, // 17DE..17DF; UNKNOWN 5408 0x17E0, // 17E0..17E9; KHMER 5409 0x17EA, // 17EA..17EF; UNKNOWN 5410 0x17F0, // 17F0..17F9; KHMER 5411 0x17FA, // 17FA..17FF; UNKNOWN 5412 0x1800, // 1800..1801; MONGOLIAN 5413 0x1802, // 1802..1803; COMMON 5414 0x1804, // 1804 ; MONGOLIAN 5415 0x1805, // 1805 ; COMMON 5416 0x1806, // 1806..180E; MONGOLIAN 5417 0x180F, // 180F ; UNKNOWN 5418 0x1810, // 1810..1819; MONGOLIAN 5419 0x181A, // 181A..181F; UNKNOWN 5420 0x1820, // 1820..1878; MONGOLIAN 5421 0x1879, // 1879..187F; UNKNOWN 5422 0x1880, // 1880..18AA; MONGOLIAN 5423 0x18AB, // 18AB..18AF; UNKNOWN 5424 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5425 0x18F6, // 18F6..18FF; UNKNOWN 5426 0x1900, // 1900..191E; LIMBU 5427 0x191F, // 191F ; UNKNOWN 5428 0x1920, // 1920..192B; LIMBU 5429 0x192C, // 192C..192F; UNKNOWN 5430 0x1930, // 1930..193B; LIMBU 5431 0x193C, // 193C..193F; UNKNOWN 5432 0x1940, // 1940 ; LIMBU 5433 0x1941, // 1941..1943; UNKNOWN 5434 0x1944, // 1944..194F; LIMBU 5435 0x1950, // 1950..196D; TAI_LE 5436 0x196E, // 196E..196F; UNKNOWN 5437 0x1970, // 1970..1974; TAI_LE 5438 0x1975, // 1975..197F; UNKNOWN 5439 0x1980, // 1980..19AB; NEW_TAI_LUE 5440 0x19AC, // 19AC..19AF; UNKNOWN 5441 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5442 0x19CA, // 19CA..19CF; UNKNOWN 5443 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5444 0x19DB, // 19DB..19DD; UNKNOWN 5445 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5446 0x19E0, // 19E0..19FF; KHMER 5447 0x1A00, // 1A00..1A1B; BUGINESE 5448 0x1A1C, // 1A1C..1A1D; UNKNOWN 5449 0x1A1E, // 1A1E..1A1F; BUGINESE 5450 0x1A20, // 1A20..1A5E; TAI_THAM 5451 0x1A5F, // 1A5F ; UNKNOWN 5452 0x1A60, // 1A60..1A7C; TAI_THAM 5453 0x1A7D, // 1A7D..1A7E; UNKNOWN 5454 0x1A7F, // 1A7F..1A89; TAI_THAM 5455 0x1A8A, // 1A8A..1A8F; UNKNOWN 5456 0x1A90, // 1A90..1A99; TAI_THAM 5457 0x1A9A, // 1A9A..1A9F; UNKNOWN 5458 0x1AA0, // 1AA0..1AAD; TAI_THAM 5459 0x1AAE, // 1AAE..1AAF; UNKNOWN 5460 0x1AB0, // 1AB0..1ABE; INHERITED 5461 0x1ABF, // 1ABF..1AFF; UNKNOWN 5462 0x1B00, // 1B00..1B4B; BALINESE 5463 0x1B4C, // 1B4C..1B4F; UNKNOWN 5464 0x1B50, // 1B50..1B7C; BALINESE 5465 0x1B7D, // 1B7D..1B7F; UNKNOWN 5466 0x1B80, // 1B80..1BBF; SUNDANESE 5467 0x1BC0, // 1BC0..1BF3; BATAK 5468 0x1BF4, // 1BF4..1BFB; UNKNOWN 5469 0x1BFC, // 1BFC..1BFF; BATAK 5470 0x1C00, // 1C00..1C37; LEPCHA 5471 0x1C38, // 1C38..1C3A; UNKNOWN 5472 0x1C3B, // 1C3B..1C49; LEPCHA 5473 0x1C4A, // 1C4A..1C4C; UNKNOWN 5474 0x1C4D, // 1C4D..1C4F; LEPCHA 5475 0x1C50, // 1C50..1C7F; OL_CHIKI 5476 0x1C80, // 1C80..1C88; CYRILLIC 5477 0x1C89, // 1C89..1C8F; UNKNOWN 5478 0x1C90, // 1C90..1CBA; GEORGIAN 5479 0x1CBB, // 1CBB..1CBC; UNKNOWN 5480 0x1CBD, // 1CBD..1CBF; GEORGIAN 5481 0x1CC0, // 1CC0..1CC7; SUNDANESE 5482 0x1CC8, // 1CC8..1CCF; UNKNOWN 5483 0x1CD0, // 1CD0..1CD2; INHERITED 5484 0x1CD3, // 1CD3 ; COMMON 5485 0x1CD4, // 1CD4..1CE0; INHERITED 5486 0x1CE1, // 1CE1 ; COMMON 5487 0x1CE2, // 1CE2..1CE8; INHERITED 5488 0x1CE9, // 1CE9..1CEC; COMMON 5489 0x1CED, // 1CED ; INHERITED 5490 0x1CEE, // 1CEE..1CF3; COMMON 5491 0x1CF4, // 1CF4 ; INHERITED 5492 0x1CF5, // 1CF5..1CF7; COMMON 5493 0x1CF8, // 1CF8..1CF9; INHERITED 5494 0x1CFA, // 1CFA ; COMMON 5495 0x1CFB, // 1CFB..1CFF; UNKNOWN 5496 0x1D00, // 1D00..1D25; LATIN 5497 0x1D26, // 1D26..1D2A; GREEK 5498 0x1D2B, // 1D2B ; CYRILLIC 5499 0x1D2C, // 1D2C..1D5C; LATIN 5500 0x1D5D, // 1D5D..1D61; GREEK 5501 0x1D62, // 1D62..1D65; LATIN 5502 0x1D66, // 1D66..1D6A; GREEK 5503 0x1D6B, // 1D6B..1D77; LATIN 5504 0x1D78, // 1D78 ; CYRILLIC 5505 0x1D79, // 1D79..1DBE; LATIN 5506 0x1DBF, // 1DBF ; GREEK 5507 0x1DC0, // 1DC0..1DF9; INHERITED 5508 0x1DFA, // 1DFA ; UNKNOWN 5509 0x1DFB, // 1DFB..1DFF; INHERITED 5510 0x1E00, // 1E00..1EFF; LATIN 5511 0x1F00, // 1F00..1F15; GREEK 5512 0x1F16, // 1F16..1F17; UNKNOWN 5513 0x1F18, // 1F18..1F1D; GREEK 5514 0x1F1E, // 1F1E..1F1F; UNKNOWN 5515 0x1F20, // 1F20..1F45; GREEK 5516 0x1F46, // 1F46..1F47; UNKNOWN 5517 0x1F48, // 1F48..1F4D; GREEK 5518 0x1F4E, // 1F4E..1F4F; UNKNOWN 5519 0x1F50, // 1F50..1F57; GREEK 5520 0x1F58, // 1F58 ; UNKNOWN 5521 0x1F59, // 1F59 ; GREEK 5522 0x1F5A, // 1F5A ; UNKNOWN 5523 0x1F5B, // 1F5B ; GREEK 5524 0x1F5C, // 1F5C ; UNKNOWN 5525 0x1F5D, // 1F5D ; GREEK 5526 0x1F5E, // 1F5E ; UNKNOWN 5527 0x1F5F, // 1F5F..1F7D; GREEK 5528 0x1F7E, // 1F7E..1F7F; UNKNOWN 5529 0x1F80, // 1F80..1FB4; GREEK 5530 0x1FB5, // 1FB5 ; UNKNOWN 5531 0x1FB6, // 1FB6..1FC4; GREEK 5532 0x1FC5, // 1FC5 ; UNKNOWN 5533 0x1FC6, // 1FC6..1FD3; GREEK 5534 0x1FD4, // 1FD4..1FD5; UNKNOWN 5535 0x1FD6, // 1FD6..1FDB; GREEK 5536 0x1FDC, // 1FDC ; UNKNOWN 5537 0x1FDD, // 1FDD..1FEF; GREEK 5538 0x1FF0, // 1FF0..1FF1; UNKNOWN 5539 0x1FF2, // 1FF2..1FF4; GREEK 5540 0x1FF5, // 1FF5 ; UNKNOWN 5541 0x1FF6, // 1FF6..1FFE; GREEK 5542 0x1FFF, // 1FFF ; UNKNOWN 5543 0x2000, // 2000..200B; COMMON 5544 0x200C, // 200C..200D; INHERITED 5545 0x200E, // 200E..2064; COMMON 5546 0x2065, // 2065 ; UNKNOWN 5547 0x2066, // 2066..2070; COMMON 5548 0x2071, // 2071 ; LATIN 5549 0x2072, // 2072..2073; UNKNOWN 5550 0x2074, // 2074..207E; COMMON 5551 0x207F, // 207F ; LATIN 5552 0x2080, // 2080..208E; COMMON 5553 0x208F, // 208F ; UNKNOWN 5554 0x2090, // 2090..209C; LATIN 5555 0x209D, // 209D..209F; UNKNOWN 5556 0x20A0, // 20A0..20BF; COMMON 5557 0x20C0, // 20C0..20CF; UNKNOWN 5558 0x20D0, // 20D0..20F0; INHERITED 5559 0x20F1, // 20F1..20FF; UNKNOWN 5560 0x2100, // 2100..2125; COMMON 5561 0x2126, // 2126 ; GREEK 5562 0x2127, // 2127..2129; COMMON 5563 0x212A, // 212A..212B; LATIN 5564 0x212C, // 212C..2131; COMMON 5565 0x2132, // 2132 ; LATIN 5566 0x2133, // 2133..214D; COMMON 5567 0x214E, // 214E ; LATIN 5568 0x214F, // 214F..215F; COMMON 5569 0x2160, // 2160..2188; LATIN 5570 0x2189, // 2189..218B; COMMON 5571 0x218C, // 218C..218F; UNKNOWN 5572 0x2190, // 2190..2426; COMMON 5573 0x2427, // 2427..243F; UNKNOWN 5574 0x2440, // 2440..244A; COMMON 5575 0x244B, // 244B..245F; UNKNOWN 5576 0x2460, // 2460..27FF; COMMON 5577 0x2800, // 2800..28FF; BRAILLE 5578 0x2900, // 2900..2B73; COMMON 5579 0x2B74, // 2B74..2B75; UNKNOWN 5580 0x2B76, // 2B76..2B95; COMMON 5581 0x2B96, // 2B96..2B97; UNKNOWN 5582 0x2B98, // 2B98..2BFF; COMMON 5583 0x2C00, // 2C00..2C2E; GLAGOLITIC 5584 0x2C2F, // 2C2F ; UNKNOWN 5585 0x2C30, // 2C30..2C5E; GLAGOLITIC 5586 0x2C5F, // 2C5F ; UNKNOWN 5587 0x2C60, // 2C60..2C7F; LATIN 5588 0x2C80, // 2C80..2CF3; COPTIC 5589 0x2CF4, // 2CF4..2CF8; UNKNOWN 5590 0x2CF9, // 2CF9..2CFF; COPTIC 5591 0x2D00, // 2D00..2D25; GEORGIAN 5592 0x2D26, // 2D26 ; UNKNOWN 5593 0x2D27, // 2D27 ; GEORGIAN 5594 0x2D28, // 2D28..2D2C; UNKNOWN 5595 0x2D2D, // 2D2D ; GEORGIAN 5596 0x2D2E, // 2D2E..2D2F; UNKNOWN 5597 0x2D30, // 2D30..2D67; TIFINAGH 5598 0x2D68, // 2D68..2D6E; UNKNOWN 5599 0x2D6F, // 2D6F..2D70; TIFINAGH 5600 0x2D71, // 2D71..2D7E; UNKNOWN 5601 0x2D7F, // 2D7F ; TIFINAGH 5602 0x2D80, // 2D80..2D96; ETHIOPIC 5603 0x2D97, // 2D97..2D9F; UNKNOWN 5604 0x2DA0, // 2DA0..2DA6; ETHIOPIC 5605 0x2DA7, // 2DA7 ; UNKNOWN 5606 0x2DA8, // 2DA8..2DAE; ETHIOPIC 5607 0x2DAF, // 2DAF ; UNKNOWN 5608 0x2DB0, // 2DB0..2DB6; ETHIOPIC 5609 0x2DB7, // 2DB7 ; UNKNOWN 5610 0x2DB8, // 2DB8..2DBE; ETHIOPIC 5611 0x2DBF, // 2DBF ; UNKNOWN 5612 0x2DC0, // 2DC0..2DC6; ETHIOPIC 5613 0x2DC7, // 2DC7 ; UNKNOWN 5614 0x2DC8, // 2DC8..2DCE; ETHIOPIC 5615 0x2DCF, // 2DCF ; UNKNOWN 5616 0x2DD0, // 2DD0..2DD6; ETHIOPIC 5617 0x2DD7, // 2DD7 ; UNKNOWN 5618 0x2DD8, // 2DD8..2DDE; ETHIOPIC 5619 0x2DDF, // 2DDF ; UNKNOWN 5620 0x2DE0, // 2DE0..2DFF; CYRILLIC 5621 0x2E00, // 2E00..2E4F; COMMON 5622 0x2E50, // 2E50..2E7F; UNKNOWN 5623 0x2E80, // 2E80..2E99; HAN 5624 0x2E9A, // 2E9A ; UNKNOWN 5625 0x2E9B, // 2E9B..2EF3; HAN 5626 0x2EF4, // 2EF4..2EFF; UNKNOWN 5627 0x2F00, // 2F00..2FD5; HAN 5628 0x2FD6, // 2FD6..2FEF; UNKNOWN 5629 0x2FF0, // 2FF0..2FFB; COMMON 5630 0x2FFC, // 2FFC..2FFF; UNKNOWN 5631 0x3000, // 3000..3004; COMMON 5632 0x3005, // 3005 ; HAN 5633 0x3006, // 3006 ; COMMON 5634 0x3007, // 3007 ; HAN 5635 0x3008, // 3008..3020; COMMON 5636 0x3021, // 3021..3029; HAN 5637 0x302A, // 302A..302D; INHERITED 5638 0x302E, // 302E..302F; HANGUL 5639 0x3030, // 3030..3037; COMMON 5640 0x3038, // 3038..303B; HAN 5641 0x303C, // 303C..303F; COMMON 5642 0x3040, // 3040 ; UNKNOWN 5643 0x3041, // 3041..3096; HIRAGANA 5644 0x3097, // 3097..3098; UNKNOWN 5645 0x3099, // 3099..309A; INHERITED 5646 0x309B, // 309B..309C; COMMON 5647 0x309D, // 309D..309F; HIRAGANA 5648 0x30A0, // 30A0 ; COMMON 5649 0x30A1, // 30A1..30FA; KATAKANA 5650 0x30FB, // 30FB..30FC; COMMON 5651 0x30FD, // 30FD..30FF; KATAKANA 5652 0x3100, // 3100..3104; UNKNOWN 5653 0x3105, // 3105..312F; BOPOMOFO 5654 0x3130, // 3130 ; UNKNOWN 5655 0x3131, // 3131..318E; HANGUL 5656 0x318F, // 318F ; UNKNOWN 5657 0x3190, // 3190..319F; COMMON 5658 0x31A0, // 31A0..31BA; BOPOMOFO 5659 0x31BB, // 31BB..31BF; UNKNOWN 5660 0x31C0, // 31C0..31E3; COMMON 5661 0x31E4, // 31E4..31EF; UNKNOWN 5662 0x31F0, // 31F0..31FF; KATAKANA 5663 0x3200, // 3200..321E; HANGUL 5664 0x321F, // 321F ; UNKNOWN 5665 0x3220, // 3220..325F; COMMON 5666 0x3260, // 3260..327E; HANGUL 5667 0x327F, // 327F..32CF; COMMON 5668 0x32D0, // 32D0..32FE; KATAKANA 5669 0x32FF, // 32FF ; COMMON 5670 0x3300, // 3300..3357; KATAKANA 5671 0x3358, // 3358..33FF; COMMON 5672 0x3400, // 3400..4DB5; HAN 5673 0x4DB6, // 4DB6..4DBF; UNKNOWN 5674 0x4DC0, // 4DC0..4DFF; COMMON 5675 0x4E00, // 4E00..9FEF; HAN 5676 0x9FF0, // 9FF0..9FFF; UNKNOWN 5677 0xA000, // A000..A48C; YI 5678 0xA48D, // A48D..A48F; UNKNOWN 5679 0xA490, // A490..A4C6; YI 5680 0xA4C7, // A4C7..A4CF; UNKNOWN 5681 0xA4D0, // A4D0..A4FF; LISU 5682 0xA500, // A500..A62B; VAI 5683 0xA62C, // A62C..A63F; UNKNOWN 5684 0xA640, // A640..A69F; CYRILLIC 5685 0xA6A0, // A6A0..A6F7; BAMUM 5686 0xA6F8, // A6F8..A6FF; UNKNOWN 5687 0xA700, // A700..A721; COMMON 5688 0xA722, // A722..A787; LATIN 5689 0xA788, // A788..A78A; COMMON 5690 0xA78B, // A78B..A7BF; LATIN 5691 0xA7C0, // A7C0..A7C1; UNKNOWN 5692 0xA7C2, // A7C2..A7C6; LATIN 5693 0xA7C7, // A7C7..A7F6; UNKNOWN 5694 0xA7F7, // A7F7..A7FF; LATIN 5695 0xA800, // A800..A82B; SYLOTI_NAGRI 5696 0xA82C, // A82C..A82F; UNKNOWN 5697 0xA830, // A830..A839; COMMON 5698 0xA83A, // A83A..A83F; UNKNOWN 5699 0xA840, // A840..A877; PHAGS_PA 5700 0xA878, // A878..A87F; UNKNOWN 5701 0xA880, // A880..A8C5; SAURASHTRA 5702 0xA8C6, // A8C6..A8CD; UNKNOWN 5703 0xA8CE, // A8CE..A8D9; SAURASHTRA 5704 0xA8DA, // A8DA..A8DF; UNKNOWN 5705 0xA8E0, // A8E0..A8FF; DEVANAGARI 5706 0xA900, // A900..A92D; KAYAH_LI 5707 0xA92E, // A92E ; COMMON 5708 0xA92F, // A92F ; KAYAH_LI 5709 0xA930, // A930..A953; REJANG 5710 0xA954, // A954..A95E; UNKNOWN 5711 0xA95F, // A95F ; REJANG 5712 0xA960, // A960..A97C; HANGUL 5713 0xA97D, // A97D..A97F; UNKNOWN 5714 0xA980, // A980..A9CD; JAVANESE 5715 0xA9CE, // A9CE ; UNKNOWN 5716 0xA9CF, // A9CF ; COMMON 5717 0xA9D0, // A9D0..A9D9; JAVANESE 5718 0xA9DA, // A9DA..A9DD; UNKNOWN 5719 0xA9DE, // A9DE..A9DF; JAVANESE 5720 0xA9E0, // A9E0..A9FE; MYANMAR 5721 0xA9FF, // A9FF ; UNKNOWN 5722 0xAA00, // AA00..AA36; CHAM 5723 0xAA37, // AA37..AA3F; UNKNOWN 5724 0xAA40, // AA40..AA4D; CHAM 5725 0xAA4E, // AA4E..AA4F; UNKNOWN 5726 0xAA50, // AA50..AA59; CHAM 5727 0xAA5A, // AA5A..AA5B; UNKNOWN 5728 0xAA5C, // AA5C..AA5F; CHAM 5729 0xAA60, // AA60..AA7F; MYANMAR 5730 0xAA80, // AA80..AAC2; TAI_VIET 5731 0xAAC3, // AAC3..AADA; UNKNOWN 5732 0xAADB, // AADB..AADF; TAI_VIET 5733 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5734 0xAAF7, // AAF7..AB00; UNKNOWN 5735 0xAB01, // AB01..AB06; ETHIOPIC 5736 0xAB07, // AB07..AB08; UNKNOWN 5737 0xAB09, // AB09..AB0E; ETHIOPIC 5738 0xAB0F, // AB0F..AB10; UNKNOWN 5739 0xAB11, // AB11..AB16; ETHIOPIC 5740 0xAB17, // AB17..AB1F; UNKNOWN 5741 0xAB20, // AB20..AB26; ETHIOPIC 5742 0xAB27, // AB27 ; UNKNOWN 5743 0xAB28, // AB28..AB2E; ETHIOPIC 5744 0xAB2F, // AB2F ; UNKNOWN 5745 0xAB30, // AB30..AB5A; LATIN 5746 0xAB5B, // AB5B ; COMMON 5747 0xAB5C, // AB5C..AB64; LATIN 5748 0xAB65, // AB65 ; GREEK 5749 0xAB66, // AB66..AB67; LATIN 5750 0xAB68, // AB68..AB6F; UNKNOWN 5751 0xAB70, // AB70..ABBF; CHEROKEE 5752 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5753 0xABEE, // ABEE..ABEF; UNKNOWN 5754 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5755 0xABFA, // ABFA..ABFF; UNKNOWN 5756 0xAC00, // AC00..D7A3; HANGUL 5757 0xD7A4, // D7A4..D7AF; UNKNOWN 5758 0xD7B0, // D7B0..D7C6; HANGUL 5759 0xD7C7, // D7C7..D7CA; UNKNOWN 5760 0xD7CB, // D7CB..D7FB; HANGUL 5761 0xD7FC, // D7FC..F8FF; UNKNOWN 5762 0xF900, // F900..FA6D; HAN 5763 0xFA6E, // FA6E..FA6F; UNKNOWN 5764 0xFA70, // FA70..FAD9; HAN 5765 0xFADA, // FADA..FAFF; UNKNOWN 5766 0xFB00, // FB00..FB06; LATIN 5767 0xFB07, // FB07..FB12; UNKNOWN 5768 0xFB13, // FB13..FB17; ARMENIAN 5769 0xFB18, // FB18..FB1C; UNKNOWN 5770 0xFB1D, // FB1D..FB36; HEBREW 5771 0xFB37, // FB37 ; UNKNOWN 5772 0xFB38, // FB38..FB3C; HEBREW 5773 0xFB3D, // FB3D ; UNKNOWN 5774 0xFB3E, // FB3E ; HEBREW 5775 0xFB3F, // FB3F ; UNKNOWN 5776 0xFB40, // FB40..FB41; HEBREW 5777 0xFB42, // FB42 ; UNKNOWN 5778 0xFB43, // FB43..FB44; HEBREW 5779 0xFB45, // FB45 ; UNKNOWN 5780 0xFB46, // FB46..FB4F; HEBREW 5781 0xFB50, // FB50..FBC1; ARABIC 5782 0xFBC2, // FBC2..FBD2; UNKNOWN 5783 0xFBD3, // FBD3..FD3D; ARABIC 5784 0xFD3E, // FD3E..FD3F; COMMON 5785 0xFD40, // FD40..FD4F; UNKNOWN 5786 0xFD50, // FD50..FD8F; ARABIC 5787 0xFD90, // FD90..FD91; UNKNOWN 5788 0xFD92, // FD92..FDC7; ARABIC 5789 0xFDC8, // FDC8..FDEF; UNKNOWN 5790 0xFDF0, // FDF0..FDFD; ARABIC 5791 0xFDFE, // FDFE..FDFF; UNKNOWN 5792 0xFE00, // FE00..FE0F; INHERITED 5793 0xFE10, // FE10..FE19; COMMON 5794 0xFE1A, // FE1A..FE1F; UNKNOWN 5795 0xFE20, // FE20..FE2D; INHERITED 5796 0xFE2E, // FE2E..FE2F; CYRILLIC 5797 0xFE30, // FE30..FE52; COMMON 5798 0xFE53, // FE53 ; UNKNOWN 5799 0xFE54, // FE54..FE66; COMMON 5800 0xFE67, // FE67 ; UNKNOWN 5801 0xFE68, // FE68..FE6B; COMMON 5802 0xFE6C, // FE6C..FE6F; UNKNOWN 5803 0xFE70, // FE70..FE74; ARABIC 5804 0xFE75, // FE75 ; UNKNOWN 5805 0xFE76, // FE76..FEFC; ARABIC 5806 0xFEFD, // FEFD..FEFE; UNKNOWN 5807 0xFEFF, // FEFF ; COMMON 5808 0xFF00, // FF00 ; UNKNOWN 5809 0xFF01, // FF01..FF20; COMMON 5810 0xFF21, // FF21..FF3A; LATIN 5811 0xFF3B, // FF3B..FF40; COMMON 5812 0xFF41, // FF41..FF5A; LATIN 5813 0xFF5B, // FF5B..FF65; COMMON 5814 0xFF66, // FF66..FF6F; KATAKANA 5815 0xFF70, // FF70 ; COMMON 5816 0xFF71, // FF71..FF9D; KATAKANA 5817 0xFF9E, // FF9E..FF9F; COMMON 5818 0xFFA0, // FFA0..FFBE; HANGUL 5819 0xFFBF, // FFBF..FFC1; UNKNOWN 5820 0xFFC2, // FFC2..FFC7; HANGUL 5821 0xFFC8, // FFC8..FFC9; UNKNOWN 5822 0xFFCA, // FFCA..FFCF; HANGUL 5823 0xFFD0, // FFD0..FFD1; UNKNOWN 5824 0xFFD2, // FFD2..FFD7; HANGUL 5825 0xFFD8, // FFD8..FFD9; UNKNOWN 5826 0xFFDA, // FFDA..FFDC; HANGUL 5827 0xFFDD, // FFDD..FFDF; UNKNOWN 5828 0xFFE0, // FFE0..FFE6; COMMON 5829 0xFFE7, // FFE7 ; UNKNOWN 5830 0xFFE8, // FFE8..FFEE; COMMON 5831 0xFFEF, // FFEF..FFF8; UNKNOWN 5832 0xFFF9, // FFF9..FFFD; COMMON 5833 0xFFFE, // FFFE..FFFF; UNKNOWN 5834 0x10000, // 10000..1000B; LINEAR_B 5835 0x1000C, // 1000C ; UNKNOWN 5836 0x1000D, // 1000D..10026; LINEAR_B 5837 0x10027, // 10027 ; UNKNOWN 5838 0x10028, // 10028..1003A; LINEAR_B 5839 0x1003B, // 1003B ; UNKNOWN 5840 0x1003C, // 1003C..1003D; LINEAR_B 5841 0x1003E, // 1003E ; UNKNOWN 5842 0x1003F, // 1003F..1004D; LINEAR_B 5843 0x1004E, // 1004E..1004F; UNKNOWN 5844 0x10050, // 10050..1005D; LINEAR_B 5845 0x1005E, // 1005E..1007F; UNKNOWN 5846 0x10080, // 10080..100FA; LINEAR_B 5847 0x100FB, // 100FB..100FF; UNKNOWN 5848 0x10100, // 10100..10102; COMMON 5849 0x10103, // 10103..10106; UNKNOWN 5850 0x10107, // 10107..10133; COMMON 5851 0x10134, // 10134..10136; UNKNOWN 5852 0x10137, // 10137..1013F; COMMON 5853 0x10140, // 10140..1018E; GREEK 5854 0x1018F, // 1018F ; UNKNOWN 5855 0x10190, // 10190..1019B; COMMON 5856 0x1019C, // 1019C..1019F; UNKNOWN 5857 0x101A0, // 101A0 ; GREEK 5858 0x101A1, // 101A1..101CF; UNKNOWN 5859 0x101D0, // 101D0..101FC; COMMON 5860 0x101FD, // 101FD ; INHERITED 5861 0x101FE, // 101FE..1027F; UNKNOWN 5862 0x10280, // 10280..1029C; LYCIAN 5863 0x1029D, // 1029D..1029F; UNKNOWN 5864 0x102A0, // 102A0..102D0; CARIAN 5865 0x102D1, // 102D1..102DF; UNKNOWN 5866 0x102E0, // 102E0 ; INHERITED 5867 0x102E1, // 102E1..102FB; COMMON 5868 0x102FC, // 102FC..102FF; UNKNOWN 5869 0x10300, // 10300..10323; OLD_ITALIC 5870 0x10324, // 10324..1032C; UNKNOWN 5871 0x1032D, // 1032D..1032F; OLD_ITALIC 5872 0x10330, // 10330..1034A; GOTHIC 5873 0x1034B, // 1034B..1034F; UNKNOWN 5874 0x10350, // 10350..1037A; OLD_PERMIC 5875 0x1037B, // 1037B..1037F; UNKNOWN 5876 0x10380, // 10380..1039D; UGARITIC 5877 0x1039E, // 1039E ; UNKNOWN 5878 0x1039F, // 1039F ; UGARITIC 5879 0x103A0, // 103A0..103C3; OLD_PERSIAN 5880 0x103C4, // 103C4..103C7; UNKNOWN 5881 0x103C8, // 103C8..103D5; OLD_PERSIAN 5882 0x103D6, // 103D6..103FF; UNKNOWN 5883 0x10400, // 10400..1044F; DESERET 5884 0x10450, // 10450..1047F; SHAVIAN 5885 0x10480, // 10480..1049D; OSMANYA 5886 0x1049E, // 1049E..1049F; UNKNOWN 5887 0x104A0, // 104A0..104A9; OSMANYA 5888 0x104AA, // 104AA..104AF; UNKNOWN 5889 0x104B0, // 104B0..104D3; OSAGE 5890 0x104D4, // 104D4..104D7; UNKNOWN 5891 0x104D8, // 104D8..104FB; OSAGE 5892 0x104FC, // 104FC..104FF; UNKNOWN 5893 0x10500, // 10500..10527; ELBASAN 5894 0x10528, // 10528..1052F; UNKNOWN 5895 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 5896 0x10564, // 10564..1056E; UNKNOWN 5897 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 5898 0x10570, // 10570..105FF; UNKNOWN 5899 0x10600, // 10600..10736; LINEAR_A 5900 0x10737, // 10737..1073F; UNKNOWN 5901 0x10740, // 10740..10755; LINEAR_A 5902 0x10756, // 10756..1075F; UNKNOWN 5903 0x10760, // 10760..10767; LINEAR_A 5904 0x10768, // 10768..107FF; UNKNOWN 5905 0x10800, // 10800..10805; CYPRIOT 5906 0x10806, // 10806..10807; UNKNOWN 5907 0x10808, // 10808 ; CYPRIOT 5908 0x10809, // 10809 ; UNKNOWN 5909 0x1080A, // 1080A..10835; CYPRIOT 5910 0x10836, // 10836 ; UNKNOWN 5911 0x10837, // 10837..10838; CYPRIOT 5912 0x10839, // 10839..1083B; UNKNOWN 5913 0x1083C, // 1083C ; CYPRIOT 5914 0x1083D, // 1083D..1083E; UNKNOWN 5915 0x1083F, // 1083F ; CYPRIOT 5916 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 5917 0x10856, // 10856 ; UNKNOWN 5918 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 5919 0x10860, // 10860..1087F; PALMYRENE 5920 0x10880, // 10880..1089E; NABATAEAN 5921 0x1089F, // 1089F..108A6; UNKNOWN 5922 0x108A7, // 108A7..108AF; NABATAEAN 5923 0x108B0, // 108B0..108DF; UNKNOWN 5924 0x108E0, // 108E0..108F2; HATRAN 5925 0x108F3, // 108F3 ; UNKNOWN 5926 0x108F4, // 108F4..108F5; HATRAN 5927 0x108F6, // 108F6..108FA; UNKNOWN 5928 0x108FB, // 108FB..108FF; HATRAN 5929 0x10900, // 10900..1091B; PHOENICIAN 5930 0x1091C, // 1091C..1091E; UNKNOWN 5931 0x1091F, // 1091F ; PHOENICIAN 5932 0x10920, // 10920..10939; LYDIAN 5933 0x1093A, // 1093A..1093E; UNKNOWN 5934 0x1093F, // 1093F ; LYDIAN 5935 0x10940, // 10940..1097F; UNKNOWN 5936 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 5937 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 5938 0x109B8, // 109B8..109BB; UNKNOWN 5939 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 5940 0x109D0, // 109D0..109D1; UNKNOWN 5941 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 5942 0x10A00, // 10A00..10A03; KHAROSHTHI 5943 0x10A04, // 10A04 ; UNKNOWN 5944 0x10A05, // 10A05..10A06; KHAROSHTHI 5945 0x10A07, // 10A07..10A0B; UNKNOWN 5946 0x10A0C, // 10A0C..10A13; KHAROSHTHI 5947 0x10A14, // 10A14 ; UNKNOWN 5948 0x10A15, // 10A15..10A17; KHAROSHTHI 5949 0x10A18, // 10A18 ; UNKNOWN 5950 0x10A19, // 10A19..10A35; KHAROSHTHI 5951 0x10A36, // 10A36..10A37; UNKNOWN 5952 0x10A38, // 10A38..10A3A; KHAROSHTHI 5953 0x10A3B, // 10A3B..10A3E; UNKNOWN 5954 0x10A3F, // 10A3F..10A48; KHAROSHTHI 5955 0x10A49, // 10A49..10A4F; UNKNOWN 5956 0x10A50, // 10A50..10A58; KHAROSHTHI 5957 0x10A59, // 10A59..10A5F; UNKNOWN 5958 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 5959 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 5960 0x10AA0, // 10AA0..10ABF; UNKNOWN 5961 0x10AC0, // 10AC0..10AE6; MANICHAEAN 5962 0x10AE7, // 10AE7..10AEA; UNKNOWN 5963 0x10AEB, // 10AEB..10AF6; MANICHAEAN 5964 0x10AF7, // 10AF7..10AFF; UNKNOWN 5965 0x10B00, // 10B00..10B35; AVESTAN 5966 0x10B36, // 10B36..10B38; UNKNOWN 5967 0x10B39, // 10B39..10B3F; AVESTAN 5968 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 5969 0x10B56, // 10B56..10B57; UNKNOWN 5970 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 5971 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 5972 0x10B73, // 10B73..10B77; UNKNOWN 5973 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 5974 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 5975 0x10B92, // 10B92..10B98; UNKNOWN 5976 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 5977 0x10B9D, // 10B9D..10BA8; UNKNOWN 5978 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 5979 0x10BB0, // 10BB0..10BFF; UNKNOWN 5980 0x10C00, // 10C00..10C48; OLD_TURKIC 5981 0x10C49, // 10C49..10C7F; UNKNOWN 5982 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 5983 0x10CB3, // 10CB3..10CBF; UNKNOWN 5984 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 5985 0x10CF3, // 10CF3..10CF9; UNKNOWN 5986 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 5987 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 5988 0x10D28, // 10D28..10D2F; UNKNOWN 5989 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 5990 0x10D3A, // 10D3A..10E5F; UNKNOWN 5991 0x10E60, // 10E60..10E7E; ARABIC 5992 0x10E7F, // 10E7F..10EFF; UNKNOWN 5993 0x10F00, // 10F00..10F27; OLD_SOGDIAN 5994 0x10F28, // 10F28..10F2F; UNKNOWN 5995 0x10F30, // 10F30..10F59; SOGDIAN 5996 0x10F5A, // 10F5A..10FDF; UNKNOWN 5997 0x10FE0, // 10FE0..10FF6; ELYMAIC 5998 0x10FF7, // 10FF7..10FFF; UNKNOWN 5999 0x11000, // 11000..1104D; BRAHMI 6000 0x1104E, // 1104E..11051; UNKNOWN 6001 0x11052, // 11052..1106F; BRAHMI 6002 0x11070, // 11070..1107E; UNKNOWN 6003 0x1107F, // 1107F ; BRAHMI 6004 0x11080, // 11080..110C1; KAITHI 6005 0x110C2, // 110C2..110CC; UNKNOWN 6006 0x110CD, // 110CD ; KAITHI 6007 0x110CE, // 110CE..110CF; UNKNOWN 6008 0x110D0, // 110D0..110E8; SORA_SOMPENG 6009 0x110E9, // 110E9..110EF; UNKNOWN 6010 0x110F0, // 110F0..110F9; SORA_SOMPENG 6011 0x110FA, // 110FA..110FF; UNKNOWN 6012 0x11100, // 11100..11134; CHAKMA 6013 0x11135, // 11135 ; UNKNOWN 6014 0x11136, // 11136..11146; CHAKMA 6015 0x11147, // 11147..1114F; UNKNOWN 6016 0x11150, // 11150..11176; MAHAJANI 6017 0x11177, // 11177..1117F; UNKNOWN 6018 0x11180, // 11180..111CD; SHARADA 6019 0x111CE, // 111CE..111CF; UNKNOWN 6020 0x111D0, // 111D0..111DF; SHARADA 6021 0x111E0, // 111E0 ; UNKNOWN 6022 0x111E1, // 111E1..111F4; SINHALA 6023 0x111F5, // 111F5..111FF; UNKNOWN 6024 0x11200, // 11200..11211; KHOJKI 6025 0x11212, // 11212 ; UNKNOWN 6026 0x11213, // 11213..1123E; KHOJKI 6027 0x1123F, // 1123F..1127F; UNKNOWN 6028 0x11280, // 11280..11286; MULTANI 6029 0x11287, // 11287 ; UNKNOWN 6030 0x11288, // 11288 ; MULTANI 6031 0x11289, // 11289 ; UNKNOWN 6032 0x1128A, // 1128A..1128D; MULTANI 6033 0x1128E, // 1128E ; UNKNOWN 6034 0x1128F, // 1128F..1129D; MULTANI 6035 0x1129E, // 1129E ; UNKNOWN 6036 0x1129F, // 1129F..112A9; MULTANI 6037 0x112AA, // 112AA..112AF; UNKNOWN 6038 0x112B0, // 112B0..112EA; KHUDAWADI 6039 0x112EB, // 112EB..112EF; UNKNOWN 6040 0x112F0, // 112F0..112F9; KHUDAWADI 6041 0x112FA, // 112FA..112FF; UNKNOWN 6042 0x11300, // 11300..11303; GRANTHA 6043 0x11304, // 11304 ; UNKNOWN 6044 0x11305, // 11305..1130C; GRANTHA 6045 0x1130D, // 1130D..1130E; UNKNOWN 6046 0x1130F, // 1130F..11310; GRANTHA 6047 0x11311, // 11311..11312; UNKNOWN 6048 0x11313, // 11313..11328; GRANTHA 6049 0x11329, // 11329 ; UNKNOWN 6050 0x1132A, // 1132A..11330; GRANTHA 6051 0x11331, // 11331 ; UNKNOWN 6052 0x11332, // 11332..11333; GRANTHA 6053 0x11334, // 11334 ; UNKNOWN 6054 0x11335, // 11335..11339; GRANTHA 6055 0x1133A, // 1133A ; UNKNOWN 6056 0x1133B, // 1133B ; INHERITED 6057 0x1133C, // 1133C..11344; GRANTHA 6058 0x11345, // 11345..11346; UNKNOWN 6059 0x11347, // 11347..11348; GRANTHA 6060 0x11349, // 11349..1134A; UNKNOWN 6061 0x1134B, // 1134B..1134D; GRANTHA 6062 0x1134E, // 1134E..1134F; UNKNOWN 6063 0x11350, // 11350 ; GRANTHA 6064 0x11351, // 11351..11356; UNKNOWN 6065 0x11357, // 11357 ; GRANTHA 6066 0x11358, // 11358..1135C; UNKNOWN 6067 0x1135D, // 1135D..11363; GRANTHA 6068 0x11364, // 11364..11365; UNKNOWN 6069 0x11366, // 11366..1136C; GRANTHA 6070 0x1136D, // 1136D..1136F; UNKNOWN 6071 0x11370, // 11370..11374; GRANTHA 6072 0x11375, // 11375..113FF; UNKNOWN 6073 0x11400, // 11400..11459; NEWA 6074 0x1145A, // 1145A ; UNKNOWN 6075 0x1145B, // 1145B ; NEWA 6076 0x1145C, // 1145C ; UNKNOWN 6077 0x1145D, // 1145D..1145F; NEWA 6078 0x11460, // 11460..1147F; UNKNOWN 6079 0x11480, // 11480..114C7; TIRHUTA 6080 0x114C8, // 114C8..114CF; UNKNOWN 6081 0x114D0, // 114D0..114D9; TIRHUTA 6082 0x114DA, // 114DA..1157F; UNKNOWN 6083 0x11580, // 11580..115B5; SIDDHAM 6084 0x115B6, // 115B6..115B7; UNKNOWN 6085 0x115B8, // 115B8..115DD; SIDDHAM 6086 0x115DE, // 115DE..115FF; UNKNOWN 6087 0x11600, // 11600..11644; MODI 6088 0x11645, // 11645..1164F; UNKNOWN 6089 0x11650, // 11650..11659; MODI 6090 0x1165A, // 1165A..1165F; UNKNOWN 6091 0x11660, // 11660..1166C; MONGOLIAN 6092 0x1166D, // 1166D..1167F; UNKNOWN 6093 0x11680, // 11680..116B8; TAKRI 6094 0x116B9, // 116B9..116BF; UNKNOWN 6095 0x116C0, // 116C0..116C9; TAKRI 6096 0x116CA, // 116CA..116FF; UNKNOWN 6097 0x11700, // 11700..1171A; AHOM 6098 0x1171B, // 1171B..1171C; UNKNOWN 6099 0x1171D, // 1171D..1172B; AHOM 6100 0x1172C, // 1172C..1172F; UNKNOWN 6101 0x11730, // 11730..1173F; AHOM 6102 0x11740, // 11740..117FF; UNKNOWN 6103 0x11800, // 11800..1183B; DOGRA 6104 0x1183C, // 1183C..1189F; UNKNOWN 6105 0x118A0, // 118A0..118F2; WARANG_CITI 6106 0x118F3, // 118F3..118FE; UNKNOWN 6107 0x118FF, // 118FF ; WARANG_CITI 6108 0x11900, // 11900..1199F; UNKNOWN 6109 0x119A0, // 119A0..119A7; NANDINAGARI 6110 0x119A8, // 119A8..119A9; UNKNOWN 6111 0x119AA, // 119AA..119D7; NANDINAGARI 6112 0x119D8, // 119D8..119D9; UNKNOWN 6113 0x119DA, // 119DA..119E4; NANDINAGARI 6114 0x119E5, // 119E5..119FF; UNKNOWN 6115 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6116 0x11A48, // 11A48..11A4F; UNKNOWN 6117 0x11A50, // 11A50..11AA2; SOYOMBO 6118 0x11AA3, // 11AA3..11ABF; UNKNOWN 6119 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6120 0x11AF9, // 11AF9..11BFF; UNKNOWN 6121 0x11C00, // 11C00..11C08; BHAIKSUKI 6122 0x11C09, // 11C09 ; UNKNOWN 6123 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6124 0x11C37, // 11C37 ; UNKNOWN 6125 0x11C38, // 11C38..11C45; BHAIKSUKI 6126 0x11C46, // 11C46..11C4F; UNKNOWN 6127 0x11C50, // 11C50..11C6C; BHAIKSUKI 6128 0x11C6D, // 11C6D..11C6F; UNKNOWN 6129 0x11C70, // 11C70..11C8F; MARCHEN 6130 0x11C90, // 11C90..11C91; UNKNOWN 6131 0x11C92, // 11C92..11CA7; MARCHEN 6132 0x11CA8, // 11CA8 ; UNKNOWN 6133 0x11CA9, // 11CA9..11CB6; MARCHEN 6134 0x11CB7, // 11CB7..11CFF; UNKNOWN 6135 0x11D00, // 11D00..11D06; MASARAM_GONDI 6136 0x11D07, // 11D07 ; UNKNOWN 6137 0x11D08, // 11D08..11D09; MASARAM_GONDI 6138 0x11D0A, // 11D0A ; UNKNOWN 6139 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6140 0x11D37, // 11D37..11D39; UNKNOWN 6141 0x11D3A, // 11D3A ; MASARAM_GONDI 6142 0x11D3B, // 11D3B ; UNKNOWN 6143 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6144 0x11D3E, // 11D3E ; UNKNOWN 6145 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6146 0x11D48, // 11D48..11D4F; UNKNOWN 6147 0x11D50, // 11D50..11D59; MASARAM_GONDI 6148 0x11D5A, // 11D5A..11D5F; UNKNOWN 6149 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6150 0x11D66, // 11D66 ; UNKNOWN 6151 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6152 0x11D69, // 11D69 ; UNKNOWN 6153 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6154 0x11D8F, // 11D8F ; UNKNOWN 6155 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6156 0x11D92, // 11D92 ; UNKNOWN 6157 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6158 0x11D99, // 11D99..11D9F; UNKNOWN 6159 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6160 0x11DAA, // 11DAA..11EDF; UNKNOWN 6161 0x11EE0, // 11EE0..11EF8; MAKASAR 6162 0x11EF9, // 11EF9..11FBF; UNKNOWN 6163 0x11FC0, // 11FC0..11FF1; TAMIL 6164 0x11FF2, // 11FF2..11FFE; UNKNOWN 6165 0x11FFF, // 11FFF ; TAMIL 6166 0x12000, // 12000..12399; CUNEIFORM 6167 0x1239A, // 1239A..123FF; UNKNOWN 6168 0x12400, // 12400..1246E; CUNEIFORM 6169 0x1246F, // 1246F ; UNKNOWN 6170 0x12470, // 12470..12474; CUNEIFORM 6171 0x12475, // 12475..1247F; UNKNOWN 6172 0x12480, // 12480..12543; CUNEIFORM 6173 0x12544, // 12544..12FFF; UNKNOWN 6174 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 6175 0x1342F, // 1342F ; UNKNOWN 6176 0x13430, // 13430..13438; EGYPTIAN_HIEROGLYPHS 6177 0x13439, // 13439..143FF; UNKNOWN 6178 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6179 0x14647, // 14647..167FF; UNKNOWN 6180 0x16800, // 16800..16A38; BAMUM 6181 0x16A39, // 16A39..16A3F; UNKNOWN 6182 0x16A40, // 16A40..16A5E; MRO 6183 0x16A5F, // 16A5F ; UNKNOWN 6184 0x16A60, // 16A60..16A69; MRO 6185 0x16A6A, // 16A6A..16A6D; UNKNOWN 6186 0x16A6E, // 16A6E..16A6F; MRO 6187 0x16A70, // 16A70..16ACF; UNKNOWN 6188 0x16AD0, // 16AD0..16AED; BASSA_VAH 6189 0x16AEE, // 16AEE..16AEF; UNKNOWN 6190 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6191 0x16AF6, // 16AF6..16AFF; UNKNOWN 6192 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6193 0x16B46, // 16B46..16B4F; UNKNOWN 6194 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6195 0x16B5A, // 16B5A ; UNKNOWN 6196 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6197 0x16B62, // 16B62 ; UNKNOWN 6198 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6199 0x16B78, // 16B78..16B7C; UNKNOWN 6200 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6201 0x16B90, // 16B90..16E3F; UNKNOWN 6202 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6203 0x16E9B, // 16E9B..16EFF; UNKNOWN 6204 0x16F00, // 16F00..16F4A; MIAO 6205 0x16F4B, // 16F4B..16F4E; UNKNOWN 6206 0x16F4F, // 16F4F..16F87; MIAO 6207 0x16F88, // 16F88..16F8E; UNKNOWN 6208 0x16F8F, // 16F8F..16F9F; MIAO 6209 0x16FA0, // 16FA0..16FDF; UNKNOWN 6210 0x16FE0, // 16FE0 ; TANGUT 6211 0x16FE1, // 16FE1 ; NUSHU 6212 0x16FE2, // 16FE2..16FE3; COMMON 6213 0x16FE4, // 16FE4..16FFF; UNKNOWN 6214 0x17000, // 17000..187F7; TANGUT 6215 0x187F8, // 187F8..187FF; UNKNOWN 6216 0x18800, // 18800..18AF2; TANGUT 6217 0x18AF3, // 18AF3..1AFFF; UNKNOWN 6218 0x1B000, // 1B000 ; KATAKANA 6219 0x1B001, // 1B001..1B11E; HIRAGANA 6220 0x1B11F, // 1B11F..1B14F; UNKNOWN 6221 0x1B150, // 1B150..1B152; HIRAGANA 6222 0x1B153, // 1B153..1B163; UNKNOWN 6223 0x1B164, // 1B164..1B167; KATAKANA 6224 0x1B168, // 1B168..1B16F; UNKNOWN 6225 0x1B170, // 1B170..1B2FB; NUSHU 6226 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6227 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6228 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6229 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6230 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6231 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6232 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6233 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6234 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6235 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6236 0x1BCA0, // 1BCA0..1BCA3; COMMON 6237 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 6238 0x1D000, // 1D000..1D0F5; COMMON 6239 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6240 0x1D100, // 1D100..1D126; COMMON 6241 0x1D127, // 1D127..1D128; UNKNOWN 6242 0x1D129, // 1D129..1D166; COMMON 6243 0x1D167, // 1D167..1D169; INHERITED 6244 0x1D16A, // 1D16A..1D17A; COMMON 6245 0x1D17B, // 1D17B..1D182; INHERITED 6246 0x1D183, // 1D183..1D184; COMMON 6247 0x1D185, // 1D185..1D18B; INHERITED 6248 0x1D18C, // 1D18C..1D1A9; COMMON 6249 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6250 0x1D1AE, // 1D1AE..1D1E8; COMMON 6251 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN 6252 0x1D200, // 1D200..1D245; GREEK 6253 0x1D246, // 1D246..1D2DF; UNKNOWN 6254 0x1D2E0, // 1D2E0..1D2F3; COMMON 6255 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6256 0x1D300, // 1D300..1D356; COMMON 6257 0x1D357, // 1D357..1D35F; UNKNOWN 6258 0x1D360, // 1D360..1D378; COMMON 6259 0x1D379, // 1D379..1D3FF; UNKNOWN 6260 0x1D400, // 1D400..1D454; COMMON 6261 0x1D455, // 1D455 ; UNKNOWN 6262 0x1D456, // 1D456..1D49C; COMMON 6263 0x1D49D, // 1D49D ; UNKNOWN 6264 0x1D49E, // 1D49E..1D49F; COMMON 6265 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6266 0x1D4A2, // 1D4A2 ; COMMON 6267 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6268 0x1D4A5, // 1D4A5..1D4A6; COMMON 6269 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6270 0x1D4A9, // 1D4A9..1D4AC; COMMON 6271 0x1D4AD, // 1D4AD ; UNKNOWN 6272 0x1D4AE, // 1D4AE..1D4B9; COMMON 6273 0x1D4BA, // 1D4BA ; UNKNOWN 6274 0x1D4BB, // 1D4BB ; COMMON 6275 0x1D4BC, // 1D4BC ; UNKNOWN 6276 0x1D4BD, // 1D4BD..1D4C3; COMMON 6277 0x1D4C4, // 1D4C4 ; UNKNOWN 6278 0x1D4C5, // 1D4C5..1D505; COMMON 6279 0x1D506, // 1D506 ; UNKNOWN 6280 0x1D507, // 1D507..1D50A; COMMON 6281 0x1D50B, // 1D50B..1D50C; UNKNOWN 6282 0x1D50D, // 1D50D..1D514; COMMON 6283 0x1D515, // 1D515 ; UNKNOWN 6284 0x1D516, // 1D516..1D51C; COMMON 6285 0x1D51D, // 1D51D ; UNKNOWN 6286 0x1D51E, // 1D51E..1D539; COMMON 6287 0x1D53A, // 1D53A ; UNKNOWN 6288 0x1D53B, // 1D53B..1D53E; COMMON 6289 0x1D53F, // 1D53F ; UNKNOWN 6290 0x1D540, // 1D540..1D544; COMMON 6291 0x1D545, // 1D545 ; UNKNOWN 6292 0x1D546, // 1D546 ; COMMON 6293 0x1D547, // 1D547..1D549; UNKNOWN 6294 0x1D54A, // 1D54A..1D550; COMMON 6295 0x1D551, // 1D551 ; UNKNOWN 6296 0x1D552, // 1D552..1D6A5; COMMON 6297 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6298 0x1D6A8, // 1D6A8..1D7CB; COMMON 6299 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6300 0x1D7CE, // 1D7CE..1D7FF; COMMON 6301 0x1D800, // 1D800..1DA8B; SIGNWRITING 6302 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6303 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6304 0x1DAA0, // 1DAA0 ; UNKNOWN 6305 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6306 0x1DAB0, // 1DAB0..1DFFF; UNKNOWN 6307 0x1E000, // 1E000..1E006; GLAGOLITIC 6308 0x1E007, // 1E007 ; UNKNOWN 6309 0x1E008, // 1E008..1E018; GLAGOLITIC 6310 0x1E019, // 1E019..1E01A; UNKNOWN 6311 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6312 0x1E022, // 1E022 ; UNKNOWN 6313 0x1E023, // 1E023..1E024; GLAGOLITIC 6314 0x1E025, // 1E025 ; UNKNOWN 6315 0x1E026, // 1E026..1E02A; GLAGOLITIC 6316 0x1E02B, // 1E02B..1E0FF; UNKNOWN 6317 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6318 0x1E12D, // 1E12D..1E12F; UNKNOWN 6319 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6320 0x1E13E, // 1E13E..1E13F; UNKNOWN 6321 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6322 0x1E14A, // 1E14A..1E14D; UNKNOWN 6323 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6324 0x1E150, // 1E150..1E2BF; UNKNOWN 6325 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6326 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6327 0x1E2FF, // 1E2FF ; WANCHO 6328 0x1E300, // 1E300..1E7FF; UNKNOWN 6329 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6330 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6331 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6332 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6333 0x1E900, // 1E900..1E94B; ADLAM 6334 0x1E94C, // 1E94C..1E94F; UNKNOWN 6335 0x1E950, // 1E950..1E959; ADLAM 6336 0x1E95A, // 1E95A..1E95D; UNKNOWN 6337 0x1E95E, // 1E95E..1E95F; ADLAM 6338 0x1E960, // 1E960..1EC70; UNKNOWN 6339 0x1EC71, // 1EC71..1ECB4; COMMON 6340 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6341 0x1ED01, // 1ED01..1ED3D; COMMON 6342 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6343 0x1EE00, // 1EE00..1EE03; ARABIC 6344 0x1EE04, // 1EE04 ; UNKNOWN 6345 0x1EE05, // 1EE05..1EE1F; ARABIC 6346 0x1EE20, // 1EE20 ; UNKNOWN 6347 0x1EE21, // 1EE21..1EE22; ARABIC 6348 0x1EE23, // 1EE23 ; UNKNOWN 6349 0x1EE24, // 1EE24 ; ARABIC 6350 0x1EE25, // 1EE25..1EE26; UNKNOWN 6351 0x1EE27, // 1EE27 ; ARABIC 6352 0x1EE28, // 1EE28 ; UNKNOWN 6353 0x1EE29, // 1EE29..1EE32; ARABIC 6354 0x1EE33, // 1EE33 ; UNKNOWN 6355 0x1EE34, // 1EE34..1EE37; ARABIC 6356 0x1EE38, // 1EE38 ; UNKNOWN 6357 0x1EE39, // 1EE39 ; ARABIC 6358 0x1EE3A, // 1EE3A ; UNKNOWN 6359 0x1EE3B, // 1EE3B ; ARABIC 6360 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6361 0x1EE42, // 1EE42 ; ARABIC 6362 0x1EE43, // 1EE43..1EE46; UNKNOWN 6363 0x1EE47, // 1EE47 ; ARABIC 6364 0x1EE48, // 1EE48 ; UNKNOWN 6365 0x1EE49, // 1EE49 ; ARABIC 6366 0x1EE4A, // 1EE4A ; UNKNOWN 6367 0x1EE4B, // 1EE4B ; ARABIC 6368 0x1EE4C, // 1EE4C ; UNKNOWN 6369 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6370 0x1EE50, // 1EE50 ; UNKNOWN 6371 0x1EE51, // 1EE51..1EE52; ARABIC 6372 0x1EE53, // 1EE53 ; UNKNOWN 6373 0x1EE54, // 1EE54 ; ARABIC 6374 0x1EE55, // 1EE55..1EE56; UNKNOWN 6375 0x1EE57, // 1EE57 ; ARABIC 6376 0x1EE58, // 1EE58 ; UNKNOWN 6377 0x1EE59, // 1EE59 ; ARABIC 6378 0x1EE5A, // 1EE5A ; UNKNOWN 6379 0x1EE5B, // 1EE5B ; ARABIC 6380 0x1EE5C, // 1EE5C ; UNKNOWN 6381 0x1EE5D, // 1EE5D ; ARABIC 6382 0x1EE5E, // 1EE5E ; UNKNOWN 6383 0x1EE5F, // 1EE5F ; ARABIC 6384 0x1EE60, // 1EE60 ; UNKNOWN 6385 0x1EE61, // 1EE61..1EE62; ARABIC 6386 0x1EE63, // 1EE63 ; UNKNOWN 6387 0x1EE64, // 1EE64 ; ARABIC 6388 0x1EE65, // 1EE65..1EE66; UNKNOWN 6389 0x1EE67, // 1EE67..1EE6A; ARABIC 6390 0x1EE6B, // 1EE6B ; UNKNOWN 6391 0x1EE6C, // 1EE6C..1EE72; ARABIC 6392 0x1EE73, // 1EE73 ; UNKNOWN 6393 0x1EE74, // 1EE74..1EE77; ARABIC 6394 0x1EE78, // 1EE78 ; UNKNOWN 6395 0x1EE79, // 1EE79..1EE7C; ARABIC 6396 0x1EE7D, // 1EE7D ; UNKNOWN 6397 0x1EE7E, // 1EE7E ; ARABIC 6398 0x1EE7F, // 1EE7F ; UNKNOWN 6399 0x1EE80, // 1EE80..1EE89; ARABIC 6400 0x1EE8A, // 1EE8A ; UNKNOWN 6401 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6402 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6403 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6404 0x1EEA4, // 1EEA4 ; UNKNOWN 6405 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6406 0x1EEAA, // 1EEAA ; UNKNOWN 6407 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6408 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6409 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6410 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6411 0x1F000, // 1F000..1F02B; COMMON 6412 0x1F02C, // 1F02C..1F02F; UNKNOWN 6413 0x1F030, // 1F030..1F093; COMMON 6414 0x1F094, // 1F094..1F09F; UNKNOWN 6415 0x1F0A0, // 1F0A0..1F0AE; COMMON 6416 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6417 0x1F0B1, // 1F0B1..1F0BF; COMMON 6418 0x1F0C0, // 1F0C0 ; UNKNOWN 6419 0x1F0C1, // 1F0C1..1F0CF; COMMON 6420 0x1F0D0, // 1F0D0 ; UNKNOWN 6421 0x1F0D1, // 1F0D1..1F0F5; COMMON 6422 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6423 0x1F100, // 1F100..1F10C; COMMON 6424 0x1F10D, // 1F10D..1F10F; UNKNOWN 6425 0x1F110, // 1F110..1F16C; COMMON 6426 0x1F16D, // 1F16D..1F16F; UNKNOWN 6427 0x1F170, // 1F170..1F1AC; COMMON 6428 0x1F1AD, // 1F1AD..1F1E5; UNKNOWN 6429 0x1F1E6, // 1F1E6..1F1FF; COMMON 6430 0x1F200, // 1F200 ; HIRAGANA 6431 0x1F201, // 1F201..1F202; COMMON 6432 0x1F203, // 1F203..1F20F; UNKNOWN 6433 0x1F210, // 1F210..1F23B; COMMON 6434 0x1F23C, // 1F23C..1F23F; UNKNOWN 6435 0x1F240, // 1F240..1F248; COMMON 6436 0x1F249, // 1F249..1F24F; UNKNOWN 6437 0x1F250, // 1F250..1F251; COMMON 6438 0x1F252, // 1F252..1F25F; UNKNOWN 6439 0x1F260, // 1F260..1F265; COMMON 6440 0x1F266, // 1F266..1F2FF; UNKNOWN 6441 0x1F300, // 1F300..1F6D5; COMMON 6442 0x1F6D6, // 1F6D6..1F6DF; UNKNOWN 6443 0x1F6E0, // 1F6E0..1F6EC; COMMON 6444 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6445 0x1F6F0, // 1F6F0..1F6FA; COMMON 6446 0x1F6FB, // 1F6FB..1F6FF; UNKNOWN 6447 0x1F700, // 1F700..1F773; COMMON 6448 0x1F774, // 1F774..1F77F; UNKNOWN 6449 0x1F780, // 1F780..1F7D8; COMMON 6450 0x1F7D9, // 1F7D9..1F7DF; UNKNOWN 6451 0x1F7E0, // 1F7E0..1F7EB; COMMON 6452 0x1F7EC, // 1F7EC..1F7FF; UNKNOWN 6453 0x1F800, // 1F800..1F80B; COMMON 6454 0x1F80C, // 1F80C..1F80F; UNKNOWN 6455 0x1F810, // 1F810..1F847; COMMON 6456 0x1F848, // 1F848..1F84F; UNKNOWN 6457 0x1F850, // 1F850..1F859; COMMON 6458 0x1F85A, // 1F85A..1F85F; UNKNOWN 6459 0x1F860, // 1F860..1F887; COMMON 6460 0x1F888, // 1F888..1F88F; UNKNOWN 6461 0x1F890, // 1F890..1F8AD; COMMON 6462 0x1F8AE, // 1F8AE..1F8FF; UNKNOWN 6463 0x1F900, // 1F900..1F90B; COMMON 6464 0x1F90C, // 1F90C ; UNKNOWN 6465 0x1F90D, // 1F90D..1F971; COMMON 6466 0x1F972, // 1F972 ; UNKNOWN 6467 0x1F973, // 1F973..1F976; COMMON 6468 0x1F977, // 1F977..1F979; UNKNOWN 6469 0x1F97A, // 1F97A..1F9A2; COMMON 6470 0x1F9A3, // 1F9A3..1F9A4; UNKNOWN 6471 0x1F9A5, // 1F9A5..1F9AA; COMMON 6472 0x1F9AB, // 1F9AB..1F9AD; UNKNOWN 6473 0x1F9AE, // 1F9AE..1F9CA; COMMON 6474 0x1F9CB, // 1F9CB..1F9CC; UNKNOWN 6475 0x1F9CD, // 1F9CD..1FA53; COMMON 6476 0x1FA54, // 1FA54..1FA5F; UNKNOWN 6477 0x1FA60, // 1FA60..1FA6D; COMMON 6478 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 6479 0x1FA70, // 1FA70..1FA73; COMMON 6480 0x1FA74, // 1FA74..1FA77; UNKNOWN 6481 0x1FA78, // 1FA78..1FA7A; COMMON 6482 0x1FA7B, // 1FA7B..1FA7F; UNKNOWN 6483 0x1FA80, // 1FA80..1FA82; COMMON 6484 0x1FA83, // 1FA83..1FA8F; UNKNOWN 6485 0x1FA90, // 1FA90..1FA95; COMMON 6486 0x1FA96, // 1FA96..1FFFF; UNKNOWN 6487 0x20000, // 20000..2A6D6; HAN 6488 0x2A6D7, // 2A6D7..2A6FF; UNKNOWN 6489 0x2A700, // 2A700..2B734; HAN 6490 0x2B735, // 2B735..2B73F; UNKNOWN 6491 0x2B740, // 2B740..2B81D; HAN 6492 0x2B81E, // 2B81E..2B81F; UNKNOWN 6493 0x2B820, // 2B820..2CEA1; HAN 6494 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 6495 0x2CEB0, // 2CEB0..2EBE0; HAN 6496 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN 6497 0x2F800, // 2F800..2FA1D; HAN 6498 0x2FA1E, // 2FA1E..E0000; UNKNOWN 6499 0xE0001, // E0001 ; COMMON 6500 0xE0002, // E0002..E001F; UNKNOWN 6501 0xE0020, // E0020..E007F; COMMON 6502 0xE0080, // E0080..E00FF; UNKNOWN 6503 0xE0100, // E0100..E01EF; INHERITED 6504 0xE01F0, // E01F0..10FFFF; UNKNOWN 6505 }; 6506 6507 private static final UnicodeScript[] scripts = { 6508 COMMON, // 0000..0040 6509 LATIN, // 0041..005A 6510 COMMON, // 005B..0060 6511 LATIN, // 0061..007A 6512 COMMON, // 007B..00A9 6513 LATIN, // 00AA 6514 COMMON, // 00AB..00B9 6515 LATIN, // 00BA 6516 COMMON, // 00BB..00BF 6517 LATIN, // 00C0..00D6 6518 COMMON, // 00D7 6519 LATIN, // 00D8..00F6 6520 COMMON, // 00F7 6521 LATIN, // 00F8..02B8 6522 COMMON, // 02B9..02DF 6523 LATIN, // 02E0..02E4 6524 COMMON, // 02E5..02E9 6525 BOPOMOFO, // 02EA..02EB 6526 COMMON, // 02EC..02FF 6527 INHERITED, // 0300..036F 6528 GREEK, // 0370..0373 6529 COMMON, // 0374 6530 GREEK, // 0375..0377 6531 UNKNOWN, // 0378..0379 6532 GREEK, // 037A..037D 6533 COMMON, // 037E 6534 GREEK, // 037F 6535 UNKNOWN, // 0380..0383 6536 GREEK, // 0384 6537 COMMON, // 0385 6538 GREEK, // 0386 6539 COMMON, // 0387 6540 GREEK, // 0388..038A 6541 UNKNOWN, // 038B 6542 GREEK, // 038C 6543 UNKNOWN, // 038D 6544 GREEK, // 038E..03A1 6545 UNKNOWN, // 03A2 6546 GREEK, // 03A3..03E1 6547 COPTIC, // 03E2..03EF 6548 GREEK, // 03F0..03FF 6549 CYRILLIC, // 0400..0484 6550 INHERITED, // 0485..0486 6551 CYRILLIC, // 0487..052F 6552 UNKNOWN, // 0530 6553 ARMENIAN, // 0531..0556 6554 UNKNOWN, // 0557..0558 6555 ARMENIAN, // 0559..0588 6556 COMMON, // 0589 6557 ARMENIAN, // 058A 6558 UNKNOWN, // 058B..058C 6559 ARMENIAN, // 058D..058F 6560 UNKNOWN, // 0590 6561 HEBREW, // 0591..05C7 6562 UNKNOWN, // 05C8..05CF 6563 HEBREW, // 05D0..05EA 6564 UNKNOWN, // 05EB..05EE 6565 HEBREW, // 05EF..05F4 6566 UNKNOWN, // 05F5..05FF 6567 ARABIC, // 0600..0604 6568 COMMON, // 0605 6569 ARABIC, // 0606..060B 6570 COMMON, // 060C 6571 ARABIC, // 060D..061A 6572 COMMON, // 061B 6573 ARABIC, // 061C 6574 UNKNOWN, // 061D 6575 ARABIC, // 061E 6576 COMMON, // 061F 6577 ARABIC, // 0620..063F 6578 COMMON, // 0640 6579 ARABIC, // 0641..064A 6580 INHERITED, // 064B..0655 6581 ARABIC, // 0656..066F 6582 INHERITED, // 0670 6583 ARABIC, // 0671..06DC 6584 COMMON, // 06DD 6585 ARABIC, // 06DE..06FF 6586 SYRIAC, // 0700..070D 6587 UNKNOWN, // 070E 6588 SYRIAC, // 070F..074A 6589 UNKNOWN, // 074B..074C 6590 SYRIAC, // 074D..074F 6591 ARABIC, // 0750..077F 6592 THAANA, // 0780..07B1 6593 UNKNOWN, // 07B2..07BF 6594 NKO, // 07C0..07FA 6595 UNKNOWN, // 07FB..07FC 6596 NKO, // 07FD..07FF 6597 SAMARITAN, // 0800..082D 6598 UNKNOWN, // 082E..082F 6599 SAMARITAN, // 0830..083E 6600 UNKNOWN, // 083F 6601 MANDAIC, // 0840..085B 6602 UNKNOWN, // 085C..085D 6603 MANDAIC, // 085E 6604 UNKNOWN, // 085F 6605 SYRIAC, // 0860..086A 6606 UNKNOWN, // 086B..089F 6607 ARABIC, // 08A0..08B4 6608 UNKNOWN, // 08B5 6609 ARABIC, // 08B6..08BD 6610 UNKNOWN, // 08BE..08D2 6611 ARABIC, // 08D3..08E1 6612 COMMON, // 08E2 6613 ARABIC, // 08E3..08FF 6614 DEVANAGARI, // 0900..0950 6615 INHERITED, // 0951..0954 6616 DEVANAGARI, // 0955..0963 6617 COMMON, // 0964..0965 6618 DEVANAGARI, // 0966..097F 6619 BENGALI, // 0980..0983 6620 UNKNOWN, // 0984 6621 BENGALI, // 0985..098C 6622 UNKNOWN, // 098D..098E 6623 BENGALI, // 098F..0990 6624 UNKNOWN, // 0991..0992 6625 BENGALI, // 0993..09A8 6626 UNKNOWN, // 09A9 6627 BENGALI, // 09AA..09B0 6628 UNKNOWN, // 09B1 6629 BENGALI, // 09B2 6630 UNKNOWN, // 09B3..09B5 6631 BENGALI, // 09B6..09B9 6632 UNKNOWN, // 09BA..09BB 6633 BENGALI, // 09BC..09C4 6634 UNKNOWN, // 09C5..09C6 6635 BENGALI, // 09C7..09C8 6636 UNKNOWN, // 09C9..09CA 6637 BENGALI, // 09CB..09CE 6638 UNKNOWN, // 09CF..09D6 6639 BENGALI, // 09D7 6640 UNKNOWN, // 09D8..09DB 6641 BENGALI, // 09DC..09DD 6642 UNKNOWN, // 09DE 6643 BENGALI, // 09DF..09E3 6644 UNKNOWN, // 09E4..09E5 6645 BENGALI, // 09E6..09FE 6646 UNKNOWN, // 09FF..0A00 6647 GURMUKHI, // 0A01..0A03 6648 UNKNOWN, // 0A04 6649 GURMUKHI, // 0A05..0A0A 6650 UNKNOWN, // 0A0B..0A0E 6651 GURMUKHI, // 0A0F..0A10 6652 UNKNOWN, // 0A11..0A12 6653 GURMUKHI, // 0A13..0A28 6654 UNKNOWN, // 0A29 6655 GURMUKHI, // 0A2A..0A30 6656 UNKNOWN, // 0A31 6657 GURMUKHI, // 0A32..0A33 6658 UNKNOWN, // 0A34 6659 GURMUKHI, // 0A35..0A36 6660 UNKNOWN, // 0A37 6661 GURMUKHI, // 0A38..0A39 6662 UNKNOWN, // 0A3A..0A3B 6663 GURMUKHI, // 0A3C 6664 UNKNOWN, // 0A3D 6665 GURMUKHI, // 0A3E..0A42 6666 UNKNOWN, // 0A43..0A46 6667 GURMUKHI, // 0A47..0A48 6668 UNKNOWN, // 0A49..0A4A 6669 GURMUKHI, // 0A4B..0A4D 6670 UNKNOWN, // 0A4E..0A50 6671 GURMUKHI, // 0A51 6672 UNKNOWN, // 0A52..0A58 6673 GURMUKHI, // 0A59..0A5C 6674 UNKNOWN, // 0A5D 6675 GURMUKHI, // 0A5E 6676 UNKNOWN, // 0A5F..0A65 6677 GURMUKHI, // 0A66..0A76 6678 UNKNOWN, // 0A77..0A80 6679 GUJARATI, // 0A81..0A83 6680 UNKNOWN, // 0A84 6681 GUJARATI, // 0A85..0A8D 6682 UNKNOWN, // 0A8E 6683 GUJARATI, // 0A8F..0A91 6684 UNKNOWN, // 0A92 6685 GUJARATI, // 0A93..0AA8 6686 UNKNOWN, // 0AA9 6687 GUJARATI, // 0AAA..0AB0 6688 UNKNOWN, // 0AB1 6689 GUJARATI, // 0AB2..0AB3 6690 UNKNOWN, // 0AB4 6691 GUJARATI, // 0AB5..0AB9 6692 UNKNOWN, // 0ABA..0ABB 6693 GUJARATI, // 0ABC..0AC5 6694 UNKNOWN, // 0AC6 6695 GUJARATI, // 0AC7..0AC9 6696 UNKNOWN, // 0ACA 6697 GUJARATI, // 0ACB..0ACD 6698 UNKNOWN, // 0ACE..0ACF 6699 GUJARATI, // 0AD0 6700 UNKNOWN, // 0AD1..0ADF 6701 GUJARATI, // 0AE0..0AE3 6702 UNKNOWN, // 0AE4..0AE5 6703 GUJARATI, // 0AE6..0AF1 6704 UNKNOWN, // 0AF2..0AF8 6705 GUJARATI, // 0AF9..0AFF 6706 UNKNOWN, // 0B00 6707 ORIYA, // 0B01..0B03 6708 UNKNOWN, // 0B04 6709 ORIYA, // 0B05..0B0C 6710 UNKNOWN, // 0B0D..0B0E 6711 ORIYA, // 0B0F..0B10 6712 UNKNOWN, // 0B11..0B12 6713 ORIYA, // 0B13..0B28 6714 UNKNOWN, // 0B29 6715 ORIYA, // 0B2A..0B30 6716 UNKNOWN, // 0B31 6717 ORIYA, // 0B32..0B33 6718 UNKNOWN, // 0B34 6719 ORIYA, // 0B35..0B39 6720 UNKNOWN, // 0B3A..0B3B 6721 ORIYA, // 0B3C..0B44 6722 UNKNOWN, // 0B45..0B46 6723 ORIYA, // 0B47..0B48 6724 UNKNOWN, // 0B49..0B4A 6725 ORIYA, // 0B4B..0B4D 6726 UNKNOWN, // 0B4E..0B55 6727 ORIYA, // 0B56..0B57 6728 UNKNOWN, // 0B58..0B5B 6729 ORIYA, // 0B5C..0B5D 6730 UNKNOWN, // 0B5E 6731 ORIYA, // 0B5F..0B63 6732 UNKNOWN, // 0B64..0B65 6733 ORIYA, // 0B66..0B77 6734 UNKNOWN, // 0B78..0B81 6735 TAMIL, // 0B82..0B83 6736 UNKNOWN, // 0B84 6737 TAMIL, // 0B85..0B8A 6738 UNKNOWN, // 0B8B..0B8D 6739 TAMIL, // 0B8E..0B90 6740 UNKNOWN, // 0B91 6741 TAMIL, // 0B92..0B95 6742 UNKNOWN, // 0B96..0B98 6743 TAMIL, // 0B99..0B9A 6744 UNKNOWN, // 0B9B 6745 TAMIL, // 0B9C 6746 UNKNOWN, // 0B9D 6747 TAMIL, // 0B9E..0B9F 6748 UNKNOWN, // 0BA0..0BA2 6749 TAMIL, // 0BA3..0BA4 6750 UNKNOWN, // 0BA5..0BA7 6751 TAMIL, // 0BA8..0BAA 6752 UNKNOWN, // 0BAB..0BAD 6753 TAMIL, // 0BAE..0BB9 6754 UNKNOWN, // 0BBA..0BBD 6755 TAMIL, // 0BBE..0BC2 6756 UNKNOWN, // 0BC3..0BC5 6757 TAMIL, // 0BC6..0BC8 6758 UNKNOWN, // 0BC9 6759 TAMIL, // 0BCA..0BCD 6760 UNKNOWN, // 0BCE..0BCF 6761 TAMIL, // 0BD0 6762 UNKNOWN, // 0BD1..0BD6 6763 TAMIL, // 0BD7 6764 UNKNOWN, // 0BD8..0BE5 6765 TAMIL, // 0BE6..0BFA 6766 UNKNOWN, // 0BFB..0BFF 6767 TELUGU, // 0C00..0C0C 6768 UNKNOWN, // 0C0D 6769 TELUGU, // 0C0E..0C10 6770 UNKNOWN, // 0C11 6771 TELUGU, // 0C12..0C28 6772 UNKNOWN, // 0C29 6773 TELUGU, // 0C2A..0C39 6774 UNKNOWN, // 0C3A..0C3C 6775 TELUGU, // 0C3D..0C44 6776 UNKNOWN, // 0C45 6777 TELUGU, // 0C46..0C48 6778 UNKNOWN, // 0C49 6779 TELUGU, // 0C4A..0C4D 6780 UNKNOWN, // 0C4E..0C54 6781 TELUGU, // 0C55..0C56 6782 UNKNOWN, // 0C57 6783 TELUGU, // 0C58..0C5A 6784 UNKNOWN, // 0C5B..0C5F 6785 TELUGU, // 0C60..0C63 6786 UNKNOWN, // 0C64..0C65 6787 TELUGU, // 0C66..0C6F 6788 UNKNOWN, // 0C70..0C76 6789 TELUGU, // 0C77..0C7F 6790 KANNADA, // 0C80..0C8C 6791 UNKNOWN, // 0C8D 6792 KANNADA, // 0C8E..0C90 6793 UNKNOWN, // 0C91 6794 KANNADA, // 0C92..0CA8 6795 UNKNOWN, // 0CA9 6796 KANNADA, // 0CAA..0CB3 6797 UNKNOWN, // 0CB4 6798 KANNADA, // 0CB5..0CB9 6799 UNKNOWN, // 0CBA..0CBB 6800 KANNADA, // 0CBC..0CC4 6801 UNKNOWN, // 0CC5 6802 KANNADA, // 0CC6..0CC8 6803 UNKNOWN, // 0CC9 6804 KANNADA, // 0CCA..0CCD 6805 UNKNOWN, // 0CCE..0CD4 6806 KANNADA, // 0CD5..0CD6 6807 UNKNOWN, // 0CD7..0CDD 6808 KANNADA, // 0CDE 6809 UNKNOWN, // 0CDF 6810 KANNADA, // 0CE0..0CE3 6811 UNKNOWN, // 0CE4..0CE5 6812 KANNADA, // 0CE6..0CEF 6813 UNKNOWN, // 0CF0 6814 KANNADA, // 0CF1..0CF2 6815 UNKNOWN, // 0CF3..0CFF 6816 MALAYALAM, // 0D00..0D03 6817 UNKNOWN, // 0D04 6818 MALAYALAM, // 0D05..0D0C 6819 UNKNOWN, // 0D0D 6820 MALAYALAM, // 0D0E..0D10 6821 UNKNOWN, // 0D11 6822 MALAYALAM, // 0D12..0D44 6823 UNKNOWN, // 0D45 6824 MALAYALAM, // 0D46..0D48 6825 UNKNOWN, // 0D49 6826 MALAYALAM, // 0D4A..0D4F 6827 UNKNOWN, // 0D50..0D53 6828 MALAYALAM, // 0D54..0D63 6829 UNKNOWN, // 0D64..0D65 6830 MALAYALAM, // 0D66..0D7F 6831 UNKNOWN, // 0D80..0D81 6832 SINHALA, // 0D82..0D83 6833 UNKNOWN, // 0D84 6834 SINHALA, // 0D85..0D96 6835 UNKNOWN, // 0D97..0D99 6836 SINHALA, // 0D9A..0DB1 6837 UNKNOWN, // 0DB2 6838 SINHALA, // 0DB3..0DBB 6839 UNKNOWN, // 0DBC 6840 SINHALA, // 0DBD 6841 UNKNOWN, // 0DBE..0DBF 6842 SINHALA, // 0DC0..0DC6 6843 UNKNOWN, // 0DC7..0DC9 6844 SINHALA, // 0DCA 6845 UNKNOWN, // 0DCB..0DCE 6846 SINHALA, // 0DCF..0DD4 6847 UNKNOWN, // 0DD5 6848 SINHALA, // 0DD6 6849 UNKNOWN, // 0DD7 6850 SINHALA, // 0DD8..0DDF 6851 UNKNOWN, // 0DE0..0DE5 6852 SINHALA, // 0DE6..0DEF 6853 UNKNOWN, // 0DF0..0DF1 6854 SINHALA, // 0DF2..0DF4 6855 UNKNOWN, // 0DF5..0E00 6856 THAI, // 0E01..0E3A 6857 UNKNOWN, // 0E3B..0E3E 6858 COMMON, // 0E3F 6859 THAI, // 0E40..0E5B 6860 UNKNOWN, // 0E5C..0E80 6861 LAO, // 0E81..0E82 6862 UNKNOWN, // 0E83 6863 LAO, // 0E84 6864 UNKNOWN, // 0E85 6865 LAO, // 0E86..0E8A 6866 UNKNOWN, // 0E8B 6867 LAO, // 0E8C..0EA3 6868 UNKNOWN, // 0EA4 6869 LAO, // 0EA5 6870 UNKNOWN, // 0EA6 6871 LAO, // 0EA7..0EBD 6872 UNKNOWN, // 0EBE..0EBF 6873 LAO, // 0EC0..0EC4 6874 UNKNOWN, // 0EC5 6875 LAO, // 0EC6 6876 UNKNOWN, // 0EC7 6877 LAO, // 0EC8..0ECD 6878 UNKNOWN, // 0ECE..0ECF 6879 LAO, // 0ED0..0ED9 6880 UNKNOWN, // 0EDA..0EDB 6881 LAO, // 0EDC..0EDF 6882 UNKNOWN, // 0EE0..0EFF 6883 TIBETAN, // 0F00..0F47 6884 UNKNOWN, // 0F48 6885 TIBETAN, // 0F49..0F6C 6886 UNKNOWN, // 0F6D..0F70 6887 TIBETAN, // 0F71..0F97 6888 UNKNOWN, // 0F98 6889 TIBETAN, // 0F99..0FBC 6890 UNKNOWN, // 0FBD 6891 TIBETAN, // 0FBE..0FCC 6892 UNKNOWN, // 0FCD 6893 TIBETAN, // 0FCE..0FD4 6894 COMMON, // 0FD5..0FD8 6895 TIBETAN, // 0FD9..0FDA 6896 UNKNOWN, // 0FDB..0FFF 6897 MYANMAR, // 1000..109F 6898 GEORGIAN, // 10A0..10C5 6899 UNKNOWN, // 10C6 6900 GEORGIAN, // 10C7 6901 UNKNOWN, // 10C8..10CC 6902 GEORGIAN, // 10CD 6903 UNKNOWN, // 10CE..10CF 6904 GEORGIAN, // 10D0..10FA 6905 COMMON, // 10FB 6906 GEORGIAN, // 10FC..10FF 6907 HANGUL, // 1100..11FF 6908 ETHIOPIC, // 1200..1248 6909 UNKNOWN, // 1249 6910 ETHIOPIC, // 124A..124D 6911 UNKNOWN, // 124E..124F 6912 ETHIOPIC, // 1250..1256 6913 UNKNOWN, // 1257 6914 ETHIOPIC, // 1258 6915 UNKNOWN, // 1259 6916 ETHIOPIC, // 125A..125D 6917 UNKNOWN, // 125E..125F 6918 ETHIOPIC, // 1260..1288 6919 UNKNOWN, // 1289 6920 ETHIOPIC, // 128A..128D 6921 UNKNOWN, // 128E..128F 6922 ETHIOPIC, // 1290..12B0 6923 UNKNOWN, // 12B1 6924 ETHIOPIC, // 12B2..12B5 6925 UNKNOWN, // 12B6..12B7 6926 ETHIOPIC, // 12B8..12BE 6927 UNKNOWN, // 12BF 6928 ETHIOPIC, // 12C0 6929 UNKNOWN, // 12C1 6930 ETHIOPIC, // 12C2..12C5 6931 UNKNOWN, // 12C6..12C7 6932 ETHIOPIC, // 12C8..12D6 6933 UNKNOWN, // 12D7 6934 ETHIOPIC, // 12D8..1310 6935 UNKNOWN, // 1311 6936 ETHIOPIC, // 1312..1315 6937 UNKNOWN, // 1316..1317 6938 ETHIOPIC, // 1318..135A 6939 UNKNOWN, // 135B..135C 6940 ETHIOPIC, // 135D..137C 6941 UNKNOWN, // 137D..137F 6942 ETHIOPIC, // 1380..1399 6943 UNKNOWN, // 139A..139F 6944 CHEROKEE, // 13A0..13F5 6945 UNKNOWN, // 13F6..13F7 6946 CHEROKEE, // 13F8..13FD 6947 UNKNOWN, // 13FE..13FF 6948 CANADIAN_ABORIGINAL, // 1400..167F 6949 OGHAM, // 1680..169C 6950 UNKNOWN, // 169D..169F 6951 RUNIC, // 16A0..16EA 6952 COMMON, // 16EB..16ED 6953 RUNIC, // 16EE..16F8 6954 UNKNOWN, // 16F9..16FF 6955 TAGALOG, // 1700..170C 6956 UNKNOWN, // 170D 6957 TAGALOG, // 170E..1714 6958 UNKNOWN, // 1715..171F 6959 HANUNOO, // 1720..1734 6960 COMMON, // 1735..1736 6961 UNKNOWN, // 1737..173F 6962 BUHID, // 1740..1753 6963 UNKNOWN, // 1754..175F 6964 TAGBANWA, // 1760..176C 6965 UNKNOWN, // 176D 6966 TAGBANWA, // 176E..1770 6967 UNKNOWN, // 1771 6968 TAGBANWA, // 1772..1773 6969 UNKNOWN, // 1774..177F 6970 KHMER, // 1780..17DD 6971 UNKNOWN, // 17DE..17DF 6972 KHMER, // 17E0..17E9 6973 UNKNOWN, // 17EA..17EF 6974 KHMER, // 17F0..17F9 6975 UNKNOWN, // 17FA..17FF 6976 MONGOLIAN, // 1800..1801 6977 COMMON, // 1802..1803 6978 MONGOLIAN, // 1804 6979 COMMON, // 1805 6980 MONGOLIAN, // 1806..180E 6981 UNKNOWN, // 180F 6982 MONGOLIAN, // 1810..1819 6983 UNKNOWN, // 181A..181F 6984 MONGOLIAN, // 1820..1878 6985 UNKNOWN, // 1879..187F 6986 MONGOLIAN, // 1880..18AA 6987 UNKNOWN, // 18AB..18AF 6988 CANADIAN_ABORIGINAL, // 18B0..18F5 6989 UNKNOWN, // 18F6..18FF 6990 LIMBU, // 1900..191E 6991 UNKNOWN, // 191F 6992 LIMBU, // 1920..192B 6993 UNKNOWN, // 192C..192F 6994 LIMBU, // 1930..193B 6995 UNKNOWN, // 193C..193F 6996 LIMBU, // 1940 6997 UNKNOWN, // 1941..1943 6998 LIMBU, // 1944..194F 6999 TAI_LE, // 1950..196D 7000 UNKNOWN, // 196E..196F 7001 TAI_LE, // 1970..1974 7002 UNKNOWN, // 1975..197F 7003 NEW_TAI_LUE, // 1980..19AB 7004 UNKNOWN, // 19AC..19AF 7005 NEW_TAI_LUE, // 19B0..19C9 7006 UNKNOWN, // 19CA..19CF 7007 NEW_TAI_LUE, // 19D0..19DA 7008 UNKNOWN, // 19DB..19DD 7009 NEW_TAI_LUE, // 19DE..19DF 7010 KHMER, // 19E0..19FF 7011 BUGINESE, // 1A00..1A1B 7012 UNKNOWN, // 1A1C..1A1D 7013 BUGINESE, // 1A1E..1A1F 7014 TAI_THAM, // 1A20..1A5E 7015 UNKNOWN, // 1A5F 7016 TAI_THAM, // 1A60..1A7C 7017 UNKNOWN, // 1A7D..1A7E 7018 TAI_THAM, // 1A7F..1A89 7019 UNKNOWN, // 1A8A..1A8F 7020 TAI_THAM, // 1A90..1A99 7021 UNKNOWN, // 1A9A..1A9F 7022 TAI_THAM, // 1AA0..1AAD 7023 UNKNOWN, // 1AAE..1AAF 7024 INHERITED, // 1AB0..1ABE 7025 UNKNOWN, // 1ABF..1AFF 7026 BALINESE, // 1B00..1B4B 7027 UNKNOWN, // 1B4C..1B4F 7028 BALINESE, // 1B50..1B7C 7029 UNKNOWN, // 1B7D..1B7F 7030 SUNDANESE, // 1B80..1BBF 7031 BATAK, // 1BC0..1BF3 7032 UNKNOWN, // 1BF4..1BFB 7033 BATAK, // 1BFC..1BFF 7034 LEPCHA, // 1C00..1C37 7035 UNKNOWN, // 1C38..1C3A 7036 LEPCHA, // 1C3B..1C49 7037 UNKNOWN, // 1C4A..1C4C 7038 LEPCHA, // 1C4D..1C4F 7039 OL_CHIKI, // 1C50..1C7F 7040 CYRILLIC, // 1C80..1C88 7041 UNKNOWN, // 1C89..1C8F 7042 GEORGIAN, // 1C90..1CBA 7043 UNKNOWN, // 1CBB..1CBC 7044 GEORGIAN, // 1CBD..1CBF 7045 SUNDANESE, // 1CC0..1CC7 7046 UNKNOWN, // 1CC8..1CCF 7047 INHERITED, // 1CD0..1CD2 7048 COMMON, // 1CD3 7049 INHERITED, // 1CD4..1CE0 7050 COMMON, // 1CE1 7051 INHERITED, // 1CE2..1CE8 7052 COMMON, // 1CE9..1CEC 7053 INHERITED, // 1CED 7054 COMMON, // 1CEE..1CF3 7055 INHERITED, // 1CF4 7056 COMMON, // 1CF5..1CF7 7057 INHERITED, // 1CF8..1CF9 7058 COMMON, // 1CFA 7059 UNKNOWN, // 1CFB..1CFF 7060 LATIN, // 1D00..1D25 7061 GREEK, // 1D26..1D2A 7062 CYRILLIC, // 1D2B 7063 LATIN, // 1D2C..1D5C 7064 GREEK, // 1D5D..1D61 7065 LATIN, // 1D62..1D65 7066 GREEK, // 1D66..1D6A 7067 LATIN, // 1D6B..1D77 7068 CYRILLIC, // 1D78 7069 LATIN, // 1D79..1DBE 7070 GREEK, // 1DBF 7071 INHERITED, // 1DC0..1DF9 7072 UNKNOWN, // 1DFA 7073 INHERITED, // 1DFB..1DFF 7074 LATIN, // 1E00..1EFF 7075 GREEK, // 1F00..1F15 7076 UNKNOWN, // 1F16..1F17 7077 GREEK, // 1F18..1F1D 7078 UNKNOWN, // 1F1E..1F1F 7079 GREEK, // 1F20..1F45 7080 UNKNOWN, // 1F46..1F47 7081 GREEK, // 1F48..1F4D 7082 UNKNOWN, // 1F4E..1F4F 7083 GREEK, // 1F50..1F57 7084 UNKNOWN, // 1F58 7085 GREEK, // 1F59 7086 UNKNOWN, // 1F5A 7087 GREEK, // 1F5B 7088 UNKNOWN, // 1F5C 7089 GREEK, // 1F5D 7090 UNKNOWN, // 1F5E 7091 GREEK, // 1F5F..1F7D 7092 UNKNOWN, // 1F7E..1F7F 7093 GREEK, // 1F80..1FB4 7094 UNKNOWN, // 1FB5 7095 GREEK, // 1FB6..1FC4 7096 UNKNOWN, // 1FC5 7097 GREEK, // 1FC6..1FD3 7098 UNKNOWN, // 1FD4..1FD5 7099 GREEK, // 1FD6..1FDB 7100 UNKNOWN, // 1FDC 7101 GREEK, // 1FDD..1FEF 7102 UNKNOWN, // 1FF0..1FF1 7103 GREEK, // 1FF2..1FF4 7104 UNKNOWN, // 1FF5 7105 GREEK, // 1FF6..1FFE 7106 UNKNOWN, // 1FFF 7107 COMMON, // 2000..200B 7108 INHERITED, // 200C..200D 7109 COMMON, // 200E..2064 7110 UNKNOWN, // 2065 7111 COMMON, // 2066..2070 7112 LATIN, // 2071 7113 UNKNOWN, // 2072..2073 7114 COMMON, // 2074..207E 7115 LATIN, // 207F 7116 COMMON, // 2080..208E 7117 UNKNOWN, // 208F 7118 LATIN, // 2090..209C 7119 UNKNOWN, // 209D..209F 7120 COMMON, // 20A0..20BF 7121 UNKNOWN, // 20C0..20CF 7122 INHERITED, // 20D0..20F0 7123 UNKNOWN, // 20F1..20FF 7124 COMMON, // 2100..2125 7125 GREEK, // 2126 7126 COMMON, // 2127..2129 7127 LATIN, // 212A..212B 7128 COMMON, // 212C..2131 7129 LATIN, // 2132 7130 COMMON, // 2133..214D 7131 LATIN, // 214E 7132 COMMON, // 214F..215F 7133 LATIN, // 2160..2188 7134 COMMON, // 2189..218B 7135 UNKNOWN, // 218C..218F 7136 COMMON, // 2190..2426 7137 UNKNOWN, // 2427..243F 7138 COMMON, // 2440..244A 7139 UNKNOWN, // 244B..245F 7140 COMMON, // 2460..27FF 7141 BRAILLE, // 2800..28FF 7142 COMMON, // 2900..2B73 7143 UNKNOWN, // 2B74..2B75 7144 COMMON, // 2B76..2B95 7145 UNKNOWN, // 2B96..2B97 7146 COMMON, // 2B98..2BFF 7147 GLAGOLITIC, // 2C00..2C2E 7148 UNKNOWN, // 2C2F 7149 GLAGOLITIC, // 2C30..2C5E 7150 UNKNOWN, // 2C5F 7151 LATIN, // 2C60..2C7F 7152 COPTIC, // 2C80..2CF3 7153 UNKNOWN, // 2CF4..2CF8 7154 COPTIC, // 2CF9..2CFF 7155 GEORGIAN, // 2D00..2D25 7156 UNKNOWN, // 2D26 7157 GEORGIAN, // 2D27 7158 UNKNOWN, // 2D28..2D2C 7159 GEORGIAN, // 2D2D 7160 UNKNOWN, // 2D2E..2D2F 7161 TIFINAGH, // 2D30..2D67 7162 UNKNOWN, // 2D68..2D6E 7163 TIFINAGH, // 2D6F..2D70 7164 UNKNOWN, // 2D71..2D7E 7165 TIFINAGH, // 2D7F 7166 ETHIOPIC, // 2D80..2D96 7167 UNKNOWN, // 2D97..2D9F 7168 ETHIOPIC, // 2DA0..2DA6 7169 UNKNOWN, // 2DA7 7170 ETHIOPIC, // 2DA8..2DAE 7171 UNKNOWN, // 2DAF 7172 ETHIOPIC, // 2DB0..2DB6 7173 UNKNOWN, // 2DB7 7174 ETHIOPIC, // 2DB8..2DBE 7175 UNKNOWN, // 2DBF 7176 ETHIOPIC, // 2DC0..2DC6 7177 UNKNOWN, // 2DC7 7178 ETHIOPIC, // 2DC8..2DCE 7179 UNKNOWN, // 2DCF 7180 ETHIOPIC, // 2DD0..2DD6 7181 UNKNOWN, // 2DD7 7182 ETHIOPIC, // 2DD8..2DDE 7183 UNKNOWN, // 2DDF 7184 CYRILLIC, // 2DE0..2DFF 7185 COMMON, // 2E00..2E4F 7186 UNKNOWN, // 2E50..2E7F 7187 HAN, // 2E80..2E99 7188 UNKNOWN, // 2E9A 7189 HAN, // 2E9B..2EF3 7190 UNKNOWN, // 2EF4..2EFF 7191 HAN, // 2F00..2FD5 7192 UNKNOWN, // 2FD6..2FEF 7193 COMMON, // 2FF0..2FFB 7194 UNKNOWN, // 2FFC..2FFF 7195 COMMON, // 3000..3004 7196 HAN, // 3005 7197 COMMON, // 3006 7198 HAN, // 3007 7199 COMMON, // 3008..3020 7200 HAN, // 3021..3029 7201 INHERITED, // 302A..302D 7202 HANGUL, // 302E..302F 7203 COMMON, // 3030..3037 7204 HAN, // 3038..303B 7205 COMMON, // 303C..303F 7206 UNKNOWN, // 3040 7207 HIRAGANA, // 3041..3096 7208 UNKNOWN, // 3097..3098 7209 INHERITED, // 3099..309A 7210 COMMON, // 309B..309C 7211 HIRAGANA, // 309D..309F 7212 COMMON, // 30A0 7213 KATAKANA, // 30A1..30FA 7214 COMMON, // 30FB..30FC 7215 KATAKANA, // 30FD..30FF 7216 UNKNOWN, // 3100..3104 7217 BOPOMOFO, // 3105..312F 7218 UNKNOWN, // 3130 7219 HANGUL, // 3131..318E 7220 UNKNOWN, // 318F 7221 COMMON, // 3190..319F 7222 BOPOMOFO, // 31A0..31BA 7223 UNKNOWN, // 31BB..31BF 7224 COMMON, // 31C0..31E3 7225 UNKNOWN, // 31E4..31EF 7226 KATAKANA, // 31F0..31FF 7227 HANGUL, // 3200..321E 7228 UNKNOWN, // 321F 7229 COMMON, // 3220..325F 7230 HANGUL, // 3260..327E 7231 COMMON, // 327F..32CF 7232 KATAKANA, // 32D0..32FE 7233 COMMON, // 32FF 7234 KATAKANA, // 3300..3357 7235 COMMON, // 3358..33FF 7236 HAN, // 3400..4DB5 7237 UNKNOWN, // 4DB6..4DBF 7238 COMMON, // 4DC0..4DFF 7239 HAN, // 4E00..9FEF 7240 UNKNOWN, // 9FF0..9FFF 7241 YI, // A000..A48C 7242 UNKNOWN, // A48D..A48F 7243 YI, // A490..A4C6 7244 UNKNOWN, // A4C7..A4CF 7245 LISU, // A4D0..A4FF 7246 VAI, // A500..A62B 7247 UNKNOWN, // A62C..A63F 7248 CYRILLIC, // A640..A69F 7249 BAMUM, // A6A0..A6F7 7250 UNKNOWN, // A6F8..A6FF 7251 COMMON, // A700..A721 7252 LATIN, // A722..A787 7253 COMMON, // A788..A78A 7254 LATIN, // A78B..A7BF 7255 UNKNOWN, // A7C0..A7C1 7256 LATIN, // A7C2..A7C6 7257 UNKNOWN, // A7C7..A7F6 7258 LATIN, // A7F7..A7FF 7259 SYLOTI_NAGRI, // A800..A82B 7260 UNKNOWN, // A82C..A82F 7261 COMMON, // A830..A839 7262 UNKNOWN, // A83A..A83F 7263 PHAGS_PA, // A840..A877 7264 UNKNOWN, // A878..A87F 7265 SAURASHTRA, // A880..A8C5 7266 UNKNOWN, // A8C6..A8CD 7267 SAURASHTRA, // A8CE..A8D9 7268 UNKNOWN, // A8DA..A8DF 7269 DEVANAGARI, // A8E0..A8FF 7270 KAYAH_LI, // A900..A92D 7271 COMMON, // A92E 7272 KAYAH_LI, // A92F 7273 REJANG, // A930..A953 7274 UNKNOWN, // A954..A95E 7275 REJANG, // A95F 7276 HANGUL, // A960..A97C 7277 UNKNOWN, // A97D..A97F 7278 JAVANESE, // A980..A9CD 7279 UNKNOWN, // A9CE 7280 COMMON, // A9CF 7281 JAVANESE, // A9D0..A9D9 7282 UNKNOWN, // A9DA..A9DD 7283 JAVANESE, // A9DE..A9DF 7284 MYANMAR, // A9E0..A9FE 7285 UNKNOWN, // A9FF 7286 CHAM, // AA00..AA36 7287 UNKNOWN, // AA37..AA3F 7288 CHAM, // AA40..AA4D 7289 UNKNOWN, // AA4E..AA4F 7290 CHAM, // AA50..AA59 7291 UNKNOWN, // AA5A..AA5B 7292 CHAM, // AA5C..AA5F 7293 MYANMAR, // AA60..AA7F 7294 TAI_VIET, // AA80..AAC2 7295 UNKNOWN, // AAC3..AADA 7296 TAI_VIET, // AADB..AADF 7297 MEETEI_MAYEK, // AAE0..AAF6 7298 UNKNOWN, // AAF7..AB00 7299 ETHIOPIC, // AB01..AB06 7300 UNKNOWN, // AB07..AB08 7301 ETHIOPIC, // AB09..AB0E 7302 UNKNOWN, // AB0F..AB10 7303 ETHIOPIC, // AB11..AB16 7304 UNKNOWN, // AB17..AB1F 7305 ETHIOPIC, // AB20..AB26 7306 UNKNOWN, // AB27 7307 ETHIOPIC, // AB28..AB2E 7308 UNKNOWN, // AB2F 7309 LATIN, // AB30..AB5A 7310 COMMON, // AB5B 7311 LATIN, // AB5C..AB64 7312 GREEK, // AB65 7313 LATIN, // AB66..AB67 7314 UNKNOWN, // AB68..AB6F 7315 CHEROKEE, // AB70..ABBF 7316 MEETEI_MAYEK, // ABC0..ABED 7317 UNKNOWN, // ABEE..ABEF 7318 MEETEI_MAYEK, // ABF0..ABF9 7319 UNKNOWN, // ABFA..ABFF 7320 HANGUL, // AC00..D7A3 7321 UNKNOWN, // D7A4..D7AF 7322 HANGUL, // D7B0..D7C6 7323 UNKNOWN, // D7C7..D7CA 7324 HANGUL, // D7CB..D7FB 7325 UNKNOWN, // D7FC..F8FF 7326 HAN, // F900..FA6D 7327 UNKNOWN, // FA6E..FA6F 7328 HAN, // FA70..FAD9 7329 UNKNOWN, // FADA..FAFF 7330 LATIN, // FB00..FB06 7331 UNKNOWN, // FB07..FB12 7332 ARMENIAN, // FB13..FB17 7333 UNKNOWN, // FB18..FB1C 7334 HEBREW, // FB1D..FB36 7335 UNKNOWN, // FB37 7336 HEBREW, // FB38..FB3C 7337 UNKNOWN, // FB3D 7338 HEBREW, // FB3E 7339 UNKNOWN, // FB3F 7340 HEBREW, // FB40..FB41 7341 UNKNOWN, // FB42 7342 HEBREW, // FB43..FB44 7343 UNKNOWN, // FB45 7344 HEBREW, // FB46..FB4F 7345 ARABIC, // FB50..FBC1 7346 UNKNOWN, // FBC2..FBD2 7347 ARABIC, // FBD3..FD3D 7348 COMMON, // FD3E..FD3F 7349 UNKNOWN, // FD40..FD4F 7350 ARABIC, // FD50..FD8F 7351 UNKNOWN, // FD90..FD91 7352 ARABIC, // FD92..FDC7 7353 UNKNOWN, // FDC8..FDEF 7354 ARABIC, // FDF0..FDFD 7355 UNKNOWN, // FDFE..FDFF 7356 INHERITED, // FE00..FE0F 7357 COMMON, // FE10..FE19 7358 UNKNOWN, // FE1A..FE1F 7359 INHERITED, // FE20..FE2D 7360 CYRILLIC, // FE2E..FE2F 7361 COMMON, // FE30..FE52 7362 UNKNOWN, // FE53 7363 COMMON, // FE54..FE66 7364 UNKNOWN, // FE67 7365 COMMON, // FE68..FE6B 7366 UNKNOWN, // FE6C..FE6F 7367 ARABIC, // FE70..FE74 7368 UNKNOWN, // FE75 7369 ARABIC, // FE76..FEFC 7370 UNKNOWN, // FEFD..FEFE 7371 COMMON, // FEFF 7372 UNKNOWN, // FF00 7373 COMMON, // FF01..FF20 7374 LATIN, // FF21..FF3A 7375 COMMON, // FF3B..FF40 7376 LATIN, // FF41..FF5A 7377 COMMON, // FF5B..FF65 7378 KATAKANA, // FF66..FF6F 7379 COMMON, // FF70 7380 KATAKANA, // FF71..FF9D 7381 COMMON, // FF9E..FF9F 7382 HANGUL, // FFA0..FFBE 7383 UNKNOWN, // FFBF..FFC1 7384 HANGUL, // FFC2..FFC7 7385 UNKNOWN, // FFC8..FFC9 7386 HANGUL, // FFCA..FFCF 7387 UNKNOWN, // FFD0..FFD1 7388 HANGUL, // FFD2..FFD7 7389 UNKNOWN, // FFD8..FFD9 7390 HANGUL, // FFDA..FFDC 7391 UNKNOWN, // FFDD..FFDF 7392 COMMON, // FFE0..FFE6 7393 UNKNOWN, // FFE7 7394 COMMON, // FFE8..FFEE 7395 UNKNOWN, // FFEF..FFF8 7396 COMMON, // FFF9..FFFD 7397 UNKNOWN, // FFFE..FFFF 7398 LINEAR_B, // 10000..1000B 7399 UNKNOWN, // 1000C 7400 LINEAR_B, // 1000D..10026 7401 UNKNOWN, // 10027 7402 LINEAR_B, // 10028..1003A 7403 UNKNOWN, // 1003B 7404 LINEAR_B, // 1003C..1003D 7405 UNKNOWN, // 1003E 7406 LINEAR_B, // 1003F..1004D 7407 UNKNOWN, // 1004E..1004F 7408 LINEAR_B, // 10050..1005D 7409 UNKNOWN, // 1005E..1007F 7410 LINEAR_B, // 10080..100FA 7411 UNKNOWN, // 100FB..100FF 7412 COMMON, // 10100..10102 7413 UNKNOWN, // 10103..10106 7414 COMMON, // 10107..10133 7415 UNKNOWN, // 10134..10136 7416 COMMON, // 10137..1013F 7417 GREEK, // 10140..1018E 7418 UNKNOWN, // 1018F 7419 COMMON, // 10190..1019B 7420 UNKNOWN, // 1019C..1019F 7421 GREEK, // 101A0 7422 UNKNOWN, // 101A1..101CF 7423 COMMON, // 101D0..101FC 7424 INHERITED, // 101FD 7425 UNKNOWN, // 101FE..1027F 7426 LYCIAN, // 10280..1029C 7427 UNKNOWN, // 1029D..1029F 7428 CARIAN, // 102A0..102D0 7429 UNKNOWN, // 102D1..102DF 7430 INHERITED, // 102E0 7431 COMMON, // 102E1..102FB 7432 UNKNOWN, // 102FC..102FF 7433 OLD_ITALIC, // 10300..10323 7434 UNKNOWN, // 10324..1032C 7435 OLD_ITALIC, // 1032D..1032F 7436 GOTHIC, // 10330..1034A 7437 UNKNOWN, // 1034B..1034F 7438 OLD_PERMIC, // 10350..1037A 7439 UNKNOWN, // 1037B..1037F 7440 UGARITIC, // 10380..1039D 7441 UNKNOWN, // 1039E 7442 UGARITIC, // 1039F 7443 OLD_PERSIAN, // 103A0..103C3 7444 UNKNOWN, // 103C4..103C7 7445 OLD_PERSIAN, // 103C8..103D5 7446 UNKNOWN, // 103D6..103FF 7447 DESERET, // 10400..1044F 7448 SHAVIAN, // 10450..1047F 7449 OSMANYA, // 10480..1049D 7450 UNKNOWN, // 1049E..1049F 7451 OSMANYA, // 104A0..104A9 7452 UNKNOWN, // 104AA..104AF 7453 OSAGE, // 104B0..104D3 7454 UNKNOWN, // 104D4..104D7 7455 OSAGE, // 104D8..104FB 7456 UNKNOWN, // 104FC..104FF 7457 ELBASAN, // 10500..10527 7458 UNKNOWN, // 10528..1052F 7459 CAUCASIAN_ALBANIAN, // 10530..10563 7460 UNKNOWN, // 10564..1056E 7461 CAUCASIAN_ALBANIAN, // 1056F 7462 UNKNOWN, // 10570..105FF 7463 LINEAR_A, // 10600..10736 7464 UNKNOWN, // 10737..1073F 7465 LINEAR_A, // 10740..10755 7466 UNKNOWN, // 10756..1075F 7467 LINEAR_A, // 10760..10767 7468 UNKNOWN, // 10768..107FF 7469 CYPRIOT, // 10800..10805 7470 UNKNOWN, // 10806..10807 7471 CYPRIOT, // 10808 7472 UNKNOWN, // 10809 7473 CYPRIOT, // 1080A..10835 7474 UNKNOWN, // 10836 7475 CYPRIOT, // 10837..10838 7476 UNKNOWN, // 10839..1083B 7477 CYPRIOT, // 1083C 7478 UNKNOWN, // 1083D..1083E 7479 CYPRIOT, // 1083F 7480 IMPERIAL_ARAMAIC, // 10840..10855 7481 UNKNOWN, // 10856 7482 IMPERIAL_ARAMAIC, // 10857..1085F 7483 PALMYRENE, // 10860..1087F 7484 NABATAEAN, // 10880..1089E 7485 UNKNOWN, // 1089F..108A6 7486 NABATAEAN, // 108A7..108AF 7487 UNKNOWN, // 108B0..108DF 7488 HATRAN, // 108E0..108F2 7489 UNKNOWN, // 108F3 7490 HATRAN, // 108F4..108F5 7491 UNKNOWN, // 108F6..108FA 7492 HATRAN, // 108FB..108FF 7493 PHOENICIAN, // 10900..1091B 7494 UNKNOWN, // 1091C..1091E 7495 PHOENICIAN, // 1091F 7496 LYDIAN, // 10920..10939 7497 UNKNOWN, // 1093A..1093E 7498 LYDIAN, // 1093F 7499 UNKNOWN, // 10940..1097F 7500 MEROITIC_HIEROGLYPHS, // 10980..1099F 7501 MEROITIC_CURSIVE, // 109A0..109B7 7502 UNKNOWN, // 109B8..109BB 7503 MEROITIC_CURSIVE, // 109BC..109CF 7504 UNKNOWN, // 109D0..109D1 7505 MEROITIC_CURSIVE, // 109D2..109FF 7506 KHAROSHTHI, // 10A00..10A03 7507 UNKNOWN, // 10A04 7508 KHAROSHTHI, // 10A05..10A06 7509 UNKNOWN, // 10A07..10A0B 7510 KHAROSHTHI, // 10A0C..10A13 7511 UNKNOWN, // 10A14 7512 KHAROSHTHI, // 10A15..10A17 7513 UNKNOWN, // 10A18 7514 KHAROSHTHI, // 10A19..10A35 7515 UNKNOWN, // 10A36..10A37 7516 KHAROSHTHI, // 10A38..10A3A 7517 UNKNOWN, // 10A3B..10A3E 7518 KHAROSHTHI, // 10A3F..10A48 7519 UNKNOWN, // 10A49..10A4F 7520 KHAROSHTHI, // 10A50..10A58 7521 UNKNOWN, // 10A59..10A5F 7522 OLD_SOUTH_ARABIAN, // 10A60..10A7F 7523 OLD_NORTH_ARABIAN, // 10A80..10A9F 7524 UNKNOWN, // 10AA0..10ABF 7525 MANICHAEAN, // 10AC0..10AE6 7526 UNKNOWN, // 10AE7..10AEA 7527 MANICHAEAN, // 10AEB..10AF6 7528 UNKNOWN, // 10AF7..10AFF 7529 AVESTAN, // 10B00..10B35 7530 UNKNOWN, // 10B36..10B38 7531 AVESTAN, // 10B39..10B3F 7532 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 7533 UNKNOWN, // 10B56..10B57 7534 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 7535 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 7536 UNKNOWN, // 10B73..10B77 7537 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 7538 PSALTER_PAHLAVI, // 10B80..10B91 7539 UNKNOWN, // 10B92..10B98 7540 PSALTER_PAHLAVI, // 10B99..10B9C 7541 UNKNOWN, // 10B9D..10BA8 7542 PSALTER_PAHLAVI, // 10BA9..10BAF 7543 UNKNOWN, // 10BB0..10BFF 7544 OLD_TURKIC, // 10C00..10C48 7545 UNKNOWN, // 10C49..10C7F 7546 OLD_HUNGARIAN, // 10C80..10CB2 7547 UNKNOWN, // 10CB3..10CBF 7548 OLD_HUNGARIAN, // 10CC0..10CF2 7549 UNKNOWN, // 10CF3..10CF9 7550 OLD_HUNGARIAN, // 10CFA..10CFF 7551 HANIFI_ROHINGYA, // 10D00..10D27 7552 UNKNOWN, // 10D28..10D2F 7553 HANIFI_ROHINGYA, // 10D30..10D39 7554 UNKNOWN, // 10D3A..10E5F 7555 ARABIC, // 10E60..10E7E 7556 UNKNOWN, // 10E7F..10EFF 7557 OLD_SOGDIAN, // 10F00..10F27 7558 UNKNOWN, // 10F28..10F2F 7559 SOGDIAN, // 10F30..10F59 7560 UNKNOWN, // 10F5A..10FDF 7561 ELYMAIC, // 10FE0..10FF6 7562 UNKNOWN, // 10FF7..10FFF 7563 BRAHMI, // 11000..1104D 7564 UNKNOWN, // 1104E..11051 7565 BRAHMI, // 11052..1106F 7566 UNKNOWN, // 11070..1107E 7567 BRAHMI, // 1107F 7568 KAITHI, // 11080..110C1 7569 UNKNOWN, // 110C2..110CC 7570 KAITHI, // 110CD 7571 UNKNOWN, // 110CE..110CF 7572 SORA_SOMPENG, // 110D0..110E8 7573 UNKNOWN, // 110E9..110EF 7574 SORA_SOMPENG, // 110F0..110F9 7575 UNKNOWN, // 110FA..110FF 7576 CHAKMA, // 11100..11134 7577 UNKNOWN, // 11135 7578 CHAKMA, // 11136..11146 7579 UNKNOWN, // 11147..1114F 7580 MAHAJANI, // 11150..11176 7581 UNKNOWN, // 11177..1117F 7582 SHARADA, // 11180..111CD 7583 UNKNOWN, // 111CE..111CF 7584 SHARADA, // 111D0..111DF 7585 UNKNOWN, // 111E0 7586 SINHALA, // 111E1..111F4 7587 UNKNOWN, // 111F5..111FF 7588 KHOJKI, // 11200..11211 7589 UNKNOWN, // 11212 7590 KHOJKI, // 11213..1123E 7591 UNKNOWN, // 1123F..1127F 7592 MULTANI, // 11280..11286 7593 UNKNOWN, // 11287 7594 MULTANI, // 11288 7595 UNKNOWN, // 11289 7596 MULTANI, // 1128A..1128D 7597 UNKNOWN, // 1128E 7598 MULTANI, // 1128F..1129D 7599 UNKNOWN, // 1129E 7600 MULTANI, // 1129F..112A9 7601 UNKNOWN, // 112AA..112AF 7602 KHUDAWADI, // 112B0..112EA 7603 UNKNOWN, // 112EB..112EF 7604 KHUDAWADI, // 112F0..112F9 7605 UNKNOWN, // 112FA..112FF 7606 GRANTHA, // 11300..11303 7607 UNKNOWN, // 11304 7608 GRANTHA, // 11305..1130C 7609 UNKNOWN, // 1130D..1130E 7610 GRANTHA, // 1130F..11310 7611 UNKNOWN, // 11311..11312 7612 GRANTHA, // 11313..11328 7613 UNKNOWN, // 11329 7614 GRANTHA, // 1132A..11330 7615 UNKNOWN, // 11331 7616 GRANTHA, // 11332..11333 7617 UNKNOWN, // 11334 7618 GRANTHA, // 11335..11339 7619 UNKNOWN, // 1133A 7620 INHERITED, // 1133B 7621 GRANTHA, // 1133C..11344 7622 UNKNOWN, // 11345..11346 7623 GRANTHA, // 11347..11348 7624 UNKNOWN, // 11349..1134A 7625 GRANTHA, // 1134B..1134D 7626 UNKNOWN, // 1134E..1134F 7627 GRANTHA, // 11350 7628 UNKNOWN, // 11351..11356 7629 GRANTHA, // 11357 7630 UNKNOWN, // 11358..1135C 7631 GRANTHA, // 1135D..11363 7632 UNKNOWN, // 11364..11365 7633 GRANTHA, // 11366..1136C 7634 UNKNOWN, // 1136D..1136F 7635 GRANTHA, // 11370..11374 7636 UNKNOWN, // 11375..113FF 7637 NEWA, // 11400..11459 7638 UNKNOWN, // 1145A 7639 NEWA, // 1145B 7640 UNKNOWN, // 1145C 7641 NEWA, // 1145D..1145F 7642 UNKNOWN, // 11460..1147F 7643 TIRHUTA, // 11480..114C7 7644 UNKNOWN, // 114C8..114CF 7645 TIRHUTA, // 114D0..114D9 7646 UNKNOWN, // 114DA..1157F 7647 SIDDHAM, // 11580..115B5 7648 UNKNOWN, // 115B6..115B7 7649 SIDDHAM, // 115B8..115DD 7650 UNKNOWN, // 115DE..115FF 7651 MODI, // 11600..11644 7652 UNKNOWN, // 11645..1164F 7653 MODI, // 11650..11659 7654 UNKNOWN, // 1165A..1165F 7655 MONGOLIAN, // 11660..1166C 7656 UNKNOWN, // 1166D..1167F 7657 TAKRI, // 11680..116B8 7658 UNKNOWN, // 116B9..116BF 7659 TAKRI, // 116C0..116C9 7660 UNKNOWN, // 116CA..116FF 7661 AHOM, // 11700..1171A 7662 UNKNOWN, // 1171B..1171C 7663 AHOM, // 1171D..1172B 7664 UNKNOWN, // 1172C..1172F 7665 AHOM, // 11730..1173F 7666 UNKNOWN, // 11740..117FF 7667 DOGRA, // 11800..1183B 7668 UNKNOWN, // 1183C..1189F 7669 WARANG_CITI, // 118A0..118F2 7670 UNKNOWN, // 118F3..118FE 7671 WARANG_CITI, // 118FF 7672 UNKNOWN, // 11900..1199F 7673 NANDINAGARI, // 119A0..119A7 7674 UNKNOWN, // 119A8..119A9 7675 NANDINAGARI, // 119AA..119D7 7676 UNKNOWN, // 119D8..119D9 7677 NANDINAGARI, // 119DA..119E4 7678 UNKNOWN, // 119E5..119FF 7679 ZANABAZAR_SQUARE, // 11A00..11A47 7680 UNKNOWN, // 11A48..11A4F 7681 SOYOMBO, // 11A50..11AA2 7682 UNKNOWN, // 11AA3..11ABF 7683 PAU_CIN_HAU, // 11AC0..11AF8 7684 UNKNOWN, // 11AF9..11BFF 7685 BHAIKSUKI, // 11C00..11C08 7686 UNKNOWN, // 11C09 7687 BHAIKSUKI, // 11C0A..11C36 7688 UNKNOWN, // 11C37 7689 BHAIKSUKI, // 11C38..11C45 7690 UNKNOWN, // 11C46..11C4F 7691 BHAIKSUKI, // 11C50..11C6C 7692 UNKNOWN, // 11C6D..11C6F 7693 MARCHEN, // 11C70..11C8F 7694 UNKNOWN, // 11C90..11C91 7695 MARCHEN, // 11C92..11CA7 7696 UNKNOWN, // 11CA8 7697 MARCHEN, // 11CA9..11CB6 7698 UNKNOWN, // 11CB7..11CFF 7699 MASARAM_GONDI, // 11D00..11D06 7700 UNKNOWN, // 11D07 7701 MASARAM_GONDI, // 11D08..11D09 7702 UNKNOWN, // 11D0A 7703 MASARAM_GONDI, // 11D0B..11D36 7704 UNKNOWN, // 11D37..11D39 7705 MASARAM_GONDI, // 11D3A 7706 UNKNOWN, // 11D3B 7707 MASARAM_GONDI, // 11D3C..11D3D 7708 UNKNOWN, // 11D3E 7709 MASARAM_GONDI, // 11D3F..11D47 7710 UNKNOWN, // 11D48..11D4F 7711 MASARAM_GONDI, // 11D50..11D59 7712 UNKNOWN, // 11D5A..11D5F 7713 GUNJALA_GONDI, // 11D60..11D65 7714 UNKNOWN, // 11D66 7715 GUNJALA_GONDI, // 11D67..11D68 7716 UNKNOWN, // 11D69 7717 GUNJALA_GONDI, // 11D6A..11D8E 7718 UNKNOWN, // 11D8F 7719 GUNJALA_GONDI, // 11D90..11D91 7720 UNKNOWN, // 11D92 7721 GUNJALA_GONDI, // 11D93..11D98 7722 UNKNOWN, // 11D99..11D9F 7723 GUNJALA_GONDI, // 11DA0..11DA9 7724 UNKNOWN, // 11DAA..11EDF 7725 MAKASAR, // 11EE0..11EF8 7726 UNKNOWN, // 11EF9..11FBF 7727 TAMIL, // 11FC0..11FF1 7728 UNKNOWN, // 11FF2..11FFE 7729 TAMIL, // 11FFF 7730 CUNEIFORM, // 12000..12399 7731 UNKNOWN, // 1239A..123FF 7732 CUNEIFORM, // 12400..1246E 7733 UNKNOWN, // 1246F 7734 CUNEIFORM, // 12470..12474 7735 UNKNOWN, // 12475..1247F 7736 CUNEIFORM, // 12480..12543 7737 UNKNOWN, // 12544..12FFF 7738 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 7739 UNKNOWN, // 1342F 7740 EGYPTIAN_HIEROGLYPHS, // 13430..13438 7741 UNKNOWN, // 13439..143FF 7742 ANATOLIAN_HIEROGLYPHS, // 14400..14646 7743 UNKNOWN, // 14647..167FF 7744 BAMUM, // 16800..16A38 7745 UNKNOWN, // 16A39..16A3F 7746 MRO, // 16A40..16A5E 7747 UNKNOWN, // 16A5F 7748 MRO, // 16A60..16A69 7749 UNKNOWN, // 16A6A..16A6D 7750 MRO, // 16A6E..16A6F 7751 UNKNOWN, // 16A70..16ACF 7752 BASSA_VAH, // 16AD0..16AED 7753 UNKNOWN, // 16AEE..16AEF 7754 BASSA_VAH, // 16AF0..16AF5 7755 UNKNOWN, // 16AF6..16AFF 7756 PAHAWH_HMONG, // 16B00..16B45 7757 UNKNOWN, // 16B46..16B4F 7758 PAHAWH_HMONG, // 16B50..16B59 7759 UNKNOWN, // 16B5A 7760 PAHAWH_HMONG, // 16B5B..16B61 7761 UNKNOWN, // 16B62 7762 PAHAWH_HMONG, // 16B63..16B77 7763 UNKNOWN, // 16B78..16B7C 7764 PAHAWH_HMONG, // 16B7D..16B8F 7765 UNKNOWN, // 16B90..16E3F 7766 MEDEFAIDRIN, // 16E40..16E9A 7767 UNKNOWN, // 16E9B..16EFF 7768 MIAO, // 16F00..16F4A 7769 UNKNOWN, // 16F4B..16F4E 7770 MIAO, // 16F4F..16F87 7771 UNKNOWN, // 16F88..16F8E 7772 MIAO, // 16F8F..16F9F 7773 UNKNOWN, // 16FA0..16FDF 7774 TANGUT, // 16FE0 7775 NUSHU, // 16FE1 7776 COMMON, // 16FE2..16FE3 7777 UNKNOWN, // 16FE4..16FFF 7778 TANGUT, // 17000..187F7 7779 UNKNOWN, // 187F8..187FF 7780 TANGUT, // 18800..18AF2 7781 UNKNOWN, // 18AF3..1AFFF 7782 KATAKANA, // 1B000 7783 HIRAGANA, // 1B001..1B11E 7784 UNKNOWN, // 1B11F..1B14F 7785 HIRAGANA, // 1B150..1B152 7786 UNKNOWN, // 1B153..1B163 7787 KATAKANA, // 1B164..1B167 7788 UNKNOWN, // 1B168..1B16F 7789 NUSHU, // 1B170..1B2FB 7790 UNKNOWN, // 1B2FC..1BBFF 7791 DUPLOYAN, // 1BC00..1BC6A 7792 UNKNOWN, // 1BC6B..1BC6F 7793 DUPLOYAN, // 1BC70..1BC7C 7794 UNKNOWN, // 1BC7D..1BC7F 7795 DUPLOYAN, // 1BC80..1BC88 7796 UNKNOWN, // 1BC89..1BC8F 7797 DUPLOYAN, // 1BC90..1BC99 7798 UNKNOWN, // 1BC9A..1BC9B 7799 DUPLOYAN, // 1BC9C..1BC9F 7800 COMMON, // 1BCA0..1BCA3 7801 UNKNOWN, // 1BCA4..1CFFF 7802 COMMON, // 1D000..1D0F5 7803 UNKNOWN, // 1D0F6..1D0FF 7804 COMMON, // 1D100..1D126 7805 UNKNOWN, // 1D127..1D128 7806 COMMON, // 1D129..1D166 7807 INHERITED, // 1D167..1D169 7808 COMMON, // 1D16A..1D17A 7809 INHERITED, // 1D17B..1D182 7810 COMMON, // 1D183..1D184 7811 INHERITED, // 1D185..1D18B 7812 COMMON, // 1D18C..1D1A9 7813 INHERITED, // 1D1AA..1D1AD 7814 COMMON, // 1D1AE..1D1E8 7815 UNKNOWN, // 1D1E9..1D1FF 7816 GREEK, // 1D200..1D245 7817 UNKNOWN, // 1D246..1D2DF 7818 COMMON, // 1D2E0..1D2F3 7819 UNKNOWN, // 1D2F4..1D2FF 7820 COMMON, // 1D300..1D356 7821 UNKNOWN, // 1D357..1D35F 7822 COMMON, // 1D360..1D378 7823 UNKNOWN, // 1D379..1D3FF 7824 COMMON, // 1D400..1D454 7825 UNKNOWN, // 1D455 7826 COMMON, // 1D456..1D49C 7827 UNKNOWN, // 1D49D 7828 COMMON, // 1D49E..1D49F 7829 UNKNOWN, // 1D4A0..1D4A1 7830 COMMON, // 1D4A2 7831 UNKNOWN, // 1D4A3..1D4A4 7832 COMMON, // 1D4A5..1D4A6 7833 UNKNOWN, // 1D4A7..1D4A8 7834 COMMON, // 1D4A9..1D4AC 7835 UNKNOWN, // 1D4AD 7836 COMMON, // 1D4AE..1D4B9 7837 UNKNOWN, // 1D4BA 7838 COMMON, // 1D4BB 7839 UNKNOWN, // 1D4BC 7840 COMMON, // 1D4BD..1D4C3 7841 UNKNOWN, // 1D4C4 7842 COMMON, // 1D4C5..1D505 7843 UNKNOWN, // 1D506 7844 COMMON, // 1D507..1D50A 7845 UNKNOWN, // 1D50B..1D50C 7846 COMMON, // 1D50D..1D514 7847 UNKNOWN, // 1D515 7848 COMMON, // 1D516..1D51C 7849 UNKNOWN, // 1D51D 7850 COMMON, // 1D51E..1D539 7851 UNKNOWN, // 1D53A 7852 COMMON, // 1D53B..1D53E 7853 UNKNOWN, // 1D53F 7854 COMMON, // 1D540..1D544 7855 UNKNOWN, // 1D545 7856 COMMON, // 1D546 7857 UNKNOWN, // 1D547..1D549 7858 COMMON, // 1D54A..1D550 7859 UNKNOWN, // 1D551 7860 COMMON, // 1D552..1D6A5 7861 UNKNOWN, // 1D6A6..1D6A7 7862 COMMON, // 1D6A8..1D7CB 7863 UNKNOWN, // 1D7CC..1D7CD 7864 COMMON, // 1D7CE..1D7FF 7865 SIGNWRITING, // 1D800..1DA8B 7866 UNKNOWN, // 1DA8C..1DA9A 7867 SIGNWRITING, // 1DA9B..1DA9F 7868 UNKNOWN, // 1DAA0 7869 SIGNWRITING, // 1DAA1..1DAAF 7870 UNKNOWN, // 1DAB0..1DFFF 7871 GLAGOLITIC, // 1E000..1E006 7872 UNKNOWN, // 1E007 7873 GLAGOLITIC, // 1E008..1E018 7874 UNKNOWN, // 1E019..1E01A 7875 GLAGOLITIC, // 1E01B..1E021 7876 UNKNOWN, // 1E022 7877 GLAGOLITIC, // 1E023..1E024 7878 UNKNOWN, // 1E025 7879 GLAGOLITIC, // 1E026..1E02A 7880 UNKNOWN, // 1E02B..1E0FF 7881 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 7882 UNKNOWN, // 1E12D..1E12F 7883 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 7884 UNKNOWN, // 1E13E..1E13F 7885 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 7886 UNKNOWN, // 1E14A..1E14D 7887 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 7888 UNKNOWN, // 1E150..1E2BF 7889 WANCHO, // 1E2C0..1E2F9 7890 UNKNOWN, // 1E2FA..1E2FE 7891 WANCHO, // 1E2FF 7892 UNKNOWN, // 1E300..1E7FF 7893 MENDE_KIKAKUI, // 1E800..1E8C4 7894 UNKNOWN, // 1E8C5..1E8C6 7895 MENDE_KIKAKUI, // 1E8C7..1E8D6 7896 UNKNOWN, // 1E8D7..1E8FF 7897 ADLAM, // 1E900..1E94B 7898 UNKNOWN, // 1E94C..1E94F 7899 ADLAM, // 1E950..1E959 7900 UNKNOWN, // 1E95A..1E95D 7901 ADLAM, // 1E95E..1E95F 7902 UNKNOWN, // 1E960..1EC70 7903 COMMON, // 1EC71..1ECB4 7904 UNKNOWN, // 1ECB5..1ED00 7905 COMMON, // 1ED01..1ED3D 7906 UNKNOWN, // 1ED3E..1EDFF 7907 ARABIC, // 1EE00..1EE03 7908 UNKNOWN, // 1EE04 7909 ARABIC, // 1EE05..1EE1F 7910 UNKNOWN, // 1EE20 7911 ARABIC, // 1EE21..1EE22 7912 UNKNOWN, // 1EE23 7913 ARABIC, // 1EE24 7914 UNKNOWN, // 1EE25..1EE26 7915 ARABIC, // 1EE27 7916 UNKNOWN, // 1EE28 7917 ARABIC, // 1EE29..1EE32 7918 UNKNOWN, // 1EE33 7919 ARABIC, // 1EE34..1EE37 7920 UNKNOWN, // 1EE38 7921 ARABIC, // 1EE39 7922 UNKNOWN, // 1EE3A 7923 ARABIC, // 1EE3B 7924 UNKNOWN, // 1EE3C..1EE41 7925 ARABIC, // 1EE42 7926 UNKNOWN, // 1EE43..1EE46 7927 ARABIC, // 1EE47 7928 UNKNOWN, // 1EE48 7929 ARABIC, // 1EE49 7930 UNKNOWN, // 1EE4A 7931 ARABIC, // 1EE4B 7932 UNKNOWN, // 1EE4C 7933 ARABIC, // 1EE4D..1EE4F 7934 UNKNOWN, // 1EE50 7935 ARABIC, // 1EE51..1EE52 7936 UNKNOWN, // 1EE53 7937 ARABIC, // 1EE54 7938 UNKNOWN, // 1EE55..1EE56 7939 ARABIC, // 1EE57 7940 UNKNOWN, // 1EE58 7941 ARABIC, // 1EE59 7942 UNKNOWN, // 1EE5A 7943 ARABIC, // 1EE5B 7944 UNKNOWN, // 1EE5C 7945 ARABIC, // 1EE5D 7946 UNKNOWN, // 1EE5E 7947 ARABIC, // 1EE5F 7948 UNKNOWN, // 1EE60 7949 ARABIC, // 1EE61..1EE62 7950 UNKNOWN, // 1EE63 7951 ARABIC, // 1EE64 7952 UNKNOWN, // 1EE65..1EE66 7953 ARABIC, // 1EE67..1EE6A 7954 UNKNOWN, // 1EE6B 7955 ARABIC, // 1EE6C..1EE72 7956 UNKNOWN, // 1EE73 7957 ARABIC, // 1EE74..1EE77 7958 UNKNOWN, // 1EE78 7959 ARABIC, // 1EE79..1EE7C 7960 UNKNOWN, // 1EE7D 7961 ARABIC, // 1EE7E 7962 UNKNOWN, // 1EE7F 7963 ARABIC, // 1EE80..1EE89 7964 UNKNOWN, // 1EE8A 7965 ARABIC, // 1EE8B..1EE9B 7966 UNKNOWN, // 1EE9C..1EEA0 7967 ARABIC, // 1EEA1..1EEA3 7968 UNKNOWN, // 1EEA4 7969 ARABIC, // 1EEA5..1EEA9 7970 UNKNOWN, // 1EEAA 7971 ARABIC, // 1EEAB..1EEBB 7972 UNKNOWN, // 1EEBC..1EEEF 7973 ARABIC, // 1EEF0..1EEF1 7974 UNKNOWN, // 1EEF2..1EFFF 7975 COMMON, // 1F000..1F02B 7976 UNKNOWN, // 1F02C..1F02F 7977 COMMON, // 1F030..1F093 7978 UNKNOWN, // 1F094..1F09F 7979 COMMON, // 1F0A0..1F0AE 7980 UNKNOWN, // 1F0AF..1F0B0 7981 COMMON, // 1F0B1..1F0BF 7982 UNKNOWN, // 1F0C0 7983 COMMON, // 1F0C1..1F0CF 7984 UNKNOWN, // 1F0D0 7985 COMMON, // 1F0D1..1F0F5 7986 UNKNOWN, // 1F0F6..1F0FF 7987 COMMON, // 1F100..1F10C 7988 UNKNOWN, // 1F10D..1F10F 7989 COMMON, // 1F110..1F16C 7990 UNKNOWN, // 1F16D..1F16F 7991 COMMON, // 1F170..1F1AC 7992 UNKNOWN, // 1F1AD..1F1E5 7993 COMMON, // 1F1E6..1F1FF 7994 HIRAGANA, // 1F200 7995 COMMON, // 1F201..1F202 7996 UNKNOWN, // 1F203..1F20F 7997 COMMON, // 1F210..1F23B 7998 UNKNOWN, // 1F23C..1F23F 7999 COMMON, // 1F240..1F248 8000 UNKNOWN, // 1F249..1F24F 8001 COMMON, // 1F250..1F251 8002 UNKNOWN, // 1F252..1F25F 8003 COMMON, // 1F260..1F265 8004 UNKNOWN, // 1F266..1F2FF 8005 COMMON, // 1F300..1F6D5 8006 UNKNOWN, // 1F6D6..1F6DF 8007 COMMON, // 1F6E0..1F6EC 8008 UNKNOWN, // 1F6ED..1F6EF 8009 COMMON, // 1F6F0..1F6FA 8010 UNKNOWN, // 1F6FB..1F6FF 8011 COMMON, // 1F700..1F773 8012 UNKNOWN, // 1F774..1F77F 8013 COMMON, // 1F780..1F7D8 8014 UNKNOWN, // 1F7D9..1F7DF 8015 COMMON, // 1F7E0..1F7EB 8016 UNKNOWN, // 1F7EC..1F7FF 8017 COMMON, // 1F800..1F80B 8018 UNKNOWN, // 1F80C..1F80F 8019 COMMON, // 1F810..1F847 8020 UNKNOWN, // 1F848..1F84F 8021 COMMON, // 1F850..1F859 8022 UNKNOWN, // 1F85A..1F85F 8023 COMMON, // 1F860..1F887 8024 UNKNOWN, // 1F888..1F88F 8025 COMMON, // 1F890..1F8AD 8026 UNKNOWN, // 1F8AE..1F8FF 8027 COMMON, // 1F900..1F90B 8028 UNKNOWN, // 1F90C 8029 COMMON, // 1F90D..1F971 8030 UNKNOWN, // 1F972 8031 COMMON, // 1F973..1F976 8032 UNKNOWN, // 1F977..1F979 8033 COMMON, // 1F97A..1F9A2 8034 UNKNOWN, // 1F9A3..1F9A4 8035 COMMON, // 1F9A5..1F9AA 8036 UNKNOWN, // 1F9AB..1F9AD 8037 COMMON, // 1F9AE..1F9CA 8038 UNKNOWN, // 1F9CB..1F9CC 8039 COMMON, // 1F9CD..1FA53 8040 UNKNOWN, // 1FA54..1FA5F 8041 COMMON, // 1FA60..1FA6D 8042 UNKNOWN, // 1FA6E..1FA6F 8043 COMMON, // 1FA70..1FA73 8044 UNKNOWN, // 1FA74..1FA77 8045 COMMON, // 1FA78..1FA7A 8046 UNKNOWN, // 1FA7B..1FA7F 8047 COMMON, // 1FA80..1FA82 8048 UNKNOWN, // 1FA83..1FA8F 8049 COMMON, // 1FA90..1FA95 8050 UNKNOWN, // 1FA96..1FFFF 8051 HAN, // 20000..2A6D6 8052 UNKNOWN, // 2A6D7..2A6FF 8053 HAN, // 2A700..2B734 8054 UNKNOWN, // 2B735..2B73F 8055 HAN, // 2B740..2B81D 8056 UNKNOWN, // 2B81E..2B81F 8057 HAN, // 2B820..2CEA1 8058 UNKNOWN, // 2CEA2..2CEAF 8059 HAN, // 2CEB0..2EBE0 8060 UNKNOWN, // 2EBE1..2F7FF 8061 HAN, // 2F800..2FA1D 8062 UNKNOWN, // 2FA1E..E0000 8063 COMMON, // E0001 8064 UNKNOWN, // E0002..E001F 8065 COMMON, // E0020..E007F 8066 UNKNOWN, // E0080..E00FF 8067 INHERITED, // E0100..E01EF 8068 UNKNOWN, // E01F0..10FFFF 8069 }; 8070 8071 private static HashMap<String, Character.UnicodeScript> aliases; 8072 static { 8073 aliases = new HashMap<>((int)(153 / 0.75f + 1.0f)); 8074 aliases.put("ADLM", ADLAM); 8075 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8076 aliases.put("AHOM", AHOM); 8077 aliases.put("ARAB", ARABIC); 8078 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8079 aliases.put("ARMN", ARMENIAN); 8080 aliases.put("AVST", AVESTAN); 8081 aliases.put("BALI", BALINESE); 8082 aliases.put("BAMU", BAMUM); 8083 aliases.put("BASS", BASSA_VAH); 8084 aliases.put("BATK", BATAK); 8085 aliases.put("BENG", BENGALI); 8086 aliases.put("BHKS", BHAIKSUKI); 8087 aliases.put("BOPO", BOPOMOFO); 8088 aliases.put("BRAH", BRAHMI); 8089 aliases.put("BRAI", BRAILLE); 8090 aliases.put("BUGI", BUGINESE); 8091 aliases.put("BUHD", BUHID); 8092 aliases.put("CAKM", CHAKMA); 8093 aliases.put("CANS", CANADIAN_ABORIGINAL); 8094 aliases.put("CARI", CARIAN); 8095 aliases.put("CHAM", CHAM); 8096 aliases.put("CHER", CHEROKEE); 8097 aliases.put("COPT", COPTIC); 8098 aliases.put("CPRT", CYPRIOT); 8099 aliases.put("CYRL", CYRILLIC); 8100 aliases.put("DEVA", DEVANAGARI); 8101 aliases.put("DOGR", DOGRA); 8102 aliases.put("DSRT", DESERET); 8103 aliases.put("DUPL", DUPLOYAN); 8104 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8105 aliases.put("ELBA", ELBASAN); 8106 aliases.put("ELYM", ELYMAIC); 8107 aliases.put("ETHI", ETHIOPIC); 8108 aliases.put("GEOR", GEORGIAN); 8109 aliases.put("GLAG", GLAGOLITIC); 8110 aliases.put("GONM", MASARAM_GONDI); 8111 aliases.put("GOTH", GOTHIC); 8112 aliases.put("GONG", GUNJALA_GONDI); 8113 aliases.put("GRAN", GRANTHA); 8114 aliases.put("GREK", GREEK); 8115 aliases.put("GUJR", GUJARATI); 8116 aliases.put("GURU", GURMUKHI); 8117 aliases.put("HANG", HANGUL); 8118 aliases.put("HANI", HAN); 8119 aliases.put("HANO", HANUNOO); 8120 aliases.put("HATR", HATRAN); 8121 aliases.put("HEBR", HEBREW); 8122 aliases.put("HIRA", HIRAGANA); 8123 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8124 aliases.put("HMNG", PAHAWH_HMONG); 8125 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8126 // it appears we don't have the KATAKANA_OR_HIRAGANA 8127 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 8128 aliases.put("HUNG", OLD_HUNGARIAN); 8129 aliases.put("ITAL", OLD_ITALIC); 8130 aliases.put("JAVA", JAVANESE); 8131 aliases.put("KALI", KAYAH_LI); 8132 aliases.put("KANA", KATAKANA); 8133 aliases.put("KHAR", KHAROSHTHI); 8134 aliases.put("KHMR", KHMER); 8135 aliases.put("KHOJ", KHOJKI); 8136 aliases.put("KNDA", KANNADA); 8137 aliases.put("KTHI", KAITHI); 8138 aliases.put("LANA", TAI_THAM); 8139 aliases.put("LAOO", LAO); 8140 aliases.put("LATN", LATIN); 8141 aliases.put("LEPC", LEPCHA); 8142 aliases.put("LIMB", LIMBU); 8143 aliases.put("LINA", LINEAR_A); 8144 aliases.put("LINB", LINEAR_B); 8145 aliases.put("LISU", LISU); 8146 aliases.put("LYCI", LYCIAN); 8147 aliases.put("LYDI", LYDIAN); 8148 aliases.put("MAHJ", MAHAJANI); 8149 aliases.put("MAKA", MAKASAR); 8150 aliases.put("MARC", MARCHEN); 8151 aliases.put("MAND", MANDAIC); 8152 aliases.put("MANI", MANICHAEAN); 8153 aliases.put("MEDF", MEDEFAIDRIN); 8154 aliases.put("MEND", MENDE_KIKAKUI); 8155 aliases.put("MERC", MEROITIC_CURSIVE); 8156 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8157 aliases.put("MLYM", MALAYALAM); 8158 aliases.put("MODI", MODI); 8159 aliases.put("MONG", MONGOLIAN); 8160 aliases.put("MROO", MRO); 8161 aliases.put("MTEI", MEETEI_MAYEK); 8162 aliases.put("MULT", MULTANI); 8163 aliases.put("MYMR", MYANMAR); 8164 aliases.put("NAND", NANDINAGARI); 8165 aliases.put("NARB", OLD_NORTH_ARABIAN); 8166 aliases.put("NBAT", NABATAEAN); 8167 aliases.put("NEWA", NEWA); 8168 aliases.put("NKOO", NKO); 8169 aliases.put("NSHU", NUSHU); 8170 aliases.put("OGAM", OGHAM); 8171 aliases.put("OLCK", OL_CHIKI); 8172 aliases.put("ORKH", OLD_TURKIC); 8173 aliases.put("ORYA", ORIYA); 8174 aliases.put("OSGE", OSAGE); 8175 aliases.put("OSMA", OSMANYA); 8176 aliases.put("PALM", PALMYRENE); 8177 aliases.put("PAUC", PAU_CIN_HAU); 8178 aliases.put("PERM", OLD_PERMIC); 8179 aliases.put("PHAG", PHAGS_PA); 8180 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8181 aliases.put("PHLP", PSALTER_PAHLAVI); 8182 aliases.put("PHNX", PHOENICIAN); 8183 aliases.put("PLRD", MIAO); 8184 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8185 aliases.put("RJNG", REJANG); 8186 aliases.put("ROHG", HANIFI_ROHINGYA); 8187 aliases.put("RUNR", RUNIC); 8188 aliases.put("SAMR", SAMARITAN); 8189 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8190 aliases.put("SAUR", SAURASHTRA); 8191 aliases.put("SGNW", SIGNWRITING); 8192 aliases.put("SHAW", SHAVIAN); 8193 aliases.put("SHRD", SHARADA); 8194 aliases.put("SIDD", SIDDHAM); 8195 aliases.put("SIND", KHUDAWADI); 8196 aliases.put("SINH", SINHALA); 8197 aliases.put("SOGD", SOGDIAN); 8198 aliases.put("SOGO", OLD_SOGDIAN); 8199 aliases.put("SORA", SORA_SOMPENG); 8200 aliases.put("SOYO", SOYOMBO); 8201 aliases.put("SUND", SUNDANESE); 8202 aliases.put("SYLO", SYLOTI_NAGRI); 8203 aliases.put("SYRC", SYRIAC); 8204 aliases.put("TAGB", TAGBANWA); 8205 aliases.put("TAKR", TAKRI); 8206 aliases.put("TALE", TAI_LE); 8207 aliases.put("TALU", NEW_TAI_LUE); 8208 aliases.put("TAML", TAMIL); 8209 aliases.put("TANG", TANGUT); 8210 aliases.put("TAVT", TAI_VIET); 8211 aliases.put("TELU", TELUGU); 8212 aliases.put("TFNG", TIFINAGH); 8213 aliases.put("TGLG", TAGALOG); 8214 aliases.put("THAA", THAANA); 8215 aliases.put("THAI", THAI); 8216 aliases.put("TIBT", TIBETAN); 8217 aliases.put("TIRH", TIRHUTA); 8218 aliases.put("UGAR", UGARITIC); 8219 aliases.put("VAII", VAI); 8220 aliases.put("WARA", WARANG_CITI); 8221 aliases.put("WCHO", WANCHO); 8222 aliases.put("XPEO", OLD_PERSIAN); 8223 aliases.put("XSUX", CUNEIFORM); 8224 aliases.put("YIII", YI); 8225 aliases.put("ZANB", ZANABAZAR_SQUARE); 8226 aliases.put("ZINH", INHERITED); 8227 aliases.put("ZYYY", COMMON); 8228 aliases.put("ZZZZ", UNKNOWN); 8229 } 8230 8231 /** 8232 * Returns the enum constant representing the Unicode script of which 8233 * the given character (Unicode code point) is assigned to. 8234 * 8235 * @param codePoint the character (Unicode code point) in question. 8236 * @return The {@code UnicodeScript} constant representing the 8237 * Unicode script of which this character is assigned to. 8238 * 8239 * @throws IllegalArgumentException if the specified 8240 * {@code codePoint} is an invalid Unicode code point. 8241 * @see Character#isValidCodePoint(int) 8242 * 8243 */ 8244 public static UnicodeScript of(int codePoint) { 8245 if (!isValidCodePoint(codePoint)) 8246 throw new IllegalArgumentException( 8247 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8248 int type = getType(codePoint); 8249 // leave SURROGATE and PRIVATE_USE for table lookup 8250 if (type == UNASSIGNED) 8251 return UNKNOWN; 8252 int index = Arrays.binarySearch(scriptStarts, codePoint); 8253 if (index < 0) 8254 index = -index - 2; 8255 return scripts[index]; 8256 } 8257 8258 /** 8259 * Returns the UnicodeScript constant with the given Unicode script 8260 * name or the script name alias. Script names and their aliases are 8261 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 8262 * and {@code PropertyValueAliases<version>.txt} define script names 8263 * and the script name aliases for a particular version of the 8264 * standard. The {@link Character} class specifies the version of 8265 * the standard that it supports. 8266 * <p> 8267 * Character case is ignored for all of the valid script names. 8268 * The en_US locale's case mapping rules are used to provide 8269 * case-insensitive string comparisons for script name validation. 8270 * 8271 * @param scriptName A {@code UnicodeScript} name. 8272 * @return The {@code UnicodeScript} constant identified 8273 * by {@code scriptName} 8274 * @throws IllegalArgumentException if {@code scriptName} is an 8275 * invalid name 8276 * @throws NullPointerException if {@code scriptName} is null 8277 */ 8278 public static final UnicodeScript forName(String scriptName) { 8279 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8280 //.replace(' ', '_')); 8281 UnicodeScript sc = aliases.get(scriptName); 8282 if (sc != null) 8283 return sc; 8284 return valueOf(scriptName); 8285 } 8286 } 8287 8288 /** 8289 * The value of the {@code Character}. 8290 * 8291 * @serial 8292 */ 8293 private final char value; 8294 8295 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8296 @java.io.Serial 8297 private static final long serialVersionUID = 3786198910865385080L; 8298 8299 /** 8300 * Constructs a newly allocated {@code Character} object that 8301 * represents the specified {@code char} value. 8302 * 8303 * @param value the value to be represented by the 8304 * {@code Character} object. 8305 * 8306 * @deprecated 8307 * It is rarely appropriate to use this constructor. The static factory 8308 * {@link #valueOf(char)} is generally a better choice, as it is 8309 * likely to yield significantly better space and time performance. 8310 */ 8311 @Deprecated(since="9") 8312 public Character(char value) { 8313 this.value = value; 8314 } 8315 8316 private static class CharacterCache { 8317 private CharacterCache(){} 8318 8319 static final Character[] cache; 8320 static Character[] archivedCache; 8321 8322 static { 8323 int size = 127 + 1; 8324 8325 // Load and use the archived cache if it exists 8326 VM.initializeFromArchive(CharacterCache.class); 8327 if (archivedCache == null || archivedCache.length != size) { 8328 Character[] c = new Character[size]; 8329 for (int i = 0; i < size; i++) { 8330 c[i] = new Character((char) i); 8331 } 8332 archivedCache = c; 8333 } 8334 cache = archivedCache; 8335 } 8336 } 8337 8338 /** 8339 * Returns a {@code Character} instance representing the specified 8340 * {@code char} value. 8341 * If a new {@code Character} instance is not required, this method 8342 * should generally be used in preference to the constructor 8343 * {@link #Character(char)}, as this method is likely to yield 8344 * significantly better space and time performance by caching 8345 * frequently requested values. 8346 * 8347 * This method will always cache values in the range {@code 8348 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 8349 * cache other values outside of this range. 8350 * 8351 * @param c a char value. 8352 * @return a {@code Character} instance representing {@code c}. 8353 * @since 1.5 8354 */ 8355 @HotSpotIntrinsicCandidate 8356 public static Character valueOf(char c) { 8357 if (c <= 127) { // must cache 8358 return CharacterCache.cache[(int)c]; 8359 } 8360 return new Character(c); 8361 } 8362 8363 /** 8364 * Returns the value of this {@code Character} object. 8365 * @return the primitive {@code char} value represented by 8366 * this object. 8367 */ 8368 @HotSpotIntrinsicCandidate 8369 public char charValue() { 8370 return value; 8371 } 8372 8373 /** 8374 * Returns a hash code for this {@code Character}; equal to the result 8375 * of invoking {@code charValue()}. 8376 * 8377 * @return a hash code value for this {@code Character} 8378 */ 8379 @Override 8380 public int hashCode() { 8381 return Character.hashCode(value); 8382 } 8383 8384 /** 8385 * Returns a hash code for a {@code char} value; compatible with 8386 * {@code Character.hashCode()}. 8387 * 8388 * @since 1.8 8389 * 8390 * @param value The {@code char} for which to return a hash code. 8391 * @return a hash code value for a {@code char} value. 8392 */ 8393 public static int hashCode(char value) { 8394 return (int)value; 8395 } 8396 8397 /** 8398 * Compares this object against the specified object. 8399 * The result is {@code true} if and only if the argument is not 8400 * {@code null} and is a {@code Character} object that 8401 * represents the same {@code char} value as this object. 8402 * 8403 * @param obj the object to compare with. 8404 * @return {@code true} if the objects are the same; 8405 * {@code false} otherwise. 8406 */ 8407 public boolean equals(Object obj) { 8408 if (obj instanceof Character) { 8409 return value == ((Character)obj).charValue(); 8410 } 8411 return false; 8412 } 8413 8414 /** 8415 * Returns a {@code String} object representing this 8416 * {@code Character}'s value. The result is a string of 8417 * length 1 whose sole component is the primitive 8418 * {@code char} value represented by this 8419 * {@code Character} object. 8420 * 8421 * @return a string representation of this object. 8422 */ 8423 public String toString() { 8424 char buf[] = {value}; 8425 return String.valueOf(buf); 8426 } 8427 8428 /** 8429 * Returns a {@code String} object representing the 8430 * specified {@code char}. The result is a string of length 8431 * 1 consisting solely of the specified {@code char}. 8432 * 8433 * @apiNote This method cannot handle <a 8434 * href="#supplementary"> supplementary characters</a>. To support 8435 * all Unicode characters, including supplementary characters, use 8436 * the {@link #toString(int)} method. 8437 * 8438 * @param c the {@code char} to be converted 8439 * @return the string representation of the specified {@code char} 8440 * @since 1.4 8441 */ 8442 public static String toString(char c) { 8443 return String.valueOf(c); 8444 } 8445 8446 /** 8447 * Returns a {@code String} object representing the 8448 * specified character (Unicode code point). The result is a string of 8449 * length 1 or 2, consisting solely of the specified {@code codePoint}. 8450 * 8451 * @param codePoint the {@code codePoint} to be converted 8452 * @return the string representation of the specified {@code codePoint} 8453 * @throws IllegalArgumentException if the specified 8454 * {@code codePoint} is not a {@linkplain #isValidCodePoint 8455 * valid Unicode code point}. 8456 * @since 11 8457 */ 8458 public static String toString(int codePoint) { 8459 return String.valueOfCodePoint(codePoint); 8460 } 8461 8462 /** 8463 * Determines whether the specified code point is a valid 8464 * <a href="http://www.unicode.org/glossary/#code_point"> 8465 * Unicode code point value</a>. 8466 * 8467 * @param codePoint the Unicode code point to be tested 8468 * @return {@code true} if the specified code point value is between 8469 * {@link #MIN_CODE_POINT} and 8470 * {@link #MAX_CODE_POINT} inclusive; 8471 * {@code false} otherwise. 8472 * @since 1.5 8473 */ 8474 public static boolean isValidCodePoint(int codePoint) { 8475 // Optimized form of: 8476 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 8477 int plane = codePoint >>> 16; 8478 return plane < ((MAX_CODE_POINT + 1) >>> 16); 8479 } 8480 8481 /** 8482 * Determines whether the specified character (Unicode code point) 8483 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 8484 * Such code points can be represented using a single {@code char}. 8485 * 8486 * @param codePoint the character (Unicode code point) to be tested 8487 * @return {@code true} if the specified code point is between 8488 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 8489 * {@code false} otherwise. 8490 * @since 1.7 8491 */ 8492 public static boolean isBmpCodePoint(int codePoint) { 8493 return codePoint >>> 16 == 0; 8494 // Optimized form of: 8495 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 8496 // We consistently use logical shift (>>>) to facilitate 8497 // additional runtime optimizations. 8498 } 8499 8500 /** 8501 * Determines whether the specified character (Unicode code point) 8502 * is in the <a href="#supplementary">supplementary character</a> range. 8503 * 8504 * @param codePoint the character (Unicode code point) to be tested 8505 * @return {@code true} if the specified code point is between 8506 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 8507 * {@link #MAX_CODE_POINT} inclusive; 8508 * {@code false} otherwise. 8509 * @since 1.5 8510 */ 8511 public static boolean isSupplementaryCodePoint(int codePoint) { 8512 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 8513 && codePoint < MAX_CODE_POINT + 1; 8514 } 8515 8516 /** 8517 * Determines if the given {@code char} value is a 8518 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8519 * Unicode high-surrogate code unit</a> 8520 * (also known as <i>leading-surrogate code unit</i>). 8521 * 8522 * <p>Such values do not represent characters by themselves, 8523 * but are used in the representation of 8524 * <a href="#supplementary">supplementary characters</a> 8525 * in the UTF-16 encoding. 8526 * 8527 * @param ch the {@code char} value to be tested. 8528 * @return {@code true} if the {@code char} value is between 8529 * {@link #MIN_HIGH_SURROGATE} and 8530 * {@link #MAX_HIGH_SURROGATE} inclusive; 8531 * {@code false} otherwise. 8532 * @see Character#isLowSurrogate(char) 8533 * @see Character.UnicodeBlock#of(int) 8534 * @since 1.5 8535 */ 8536 public static boolean isHighSurrogate(char ch) { 8537 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 8538 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 8539 } 8540 8541 /** 8542 * Determines if the given {@code char} value is a 8543 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8544 * Unicode low-surrogate code unit</a> 8545 * (also known as <i>trailing-surrogate code unit</i>). 8546 * 8547 * <p>Such values do not represent characters by themselves, 8548 * but are used in the representation of 8549 * <a href="#supplementary">supplementary characters</a> 8550 * in the UTF-16 encoding. 8551 * 8552 * @param ch the {@code char} value to be tested. 8553 * @return {@code true} if the {@code char} value is between 8554 * {@link #MIN_LOW_SURROGATE} and 8555 * {@link #MAX_LOW_SURROGATE} inclusive; 8556 * {@code false} otherwise. 8557 * @see Character#isHighSurrogate(char) 8558 * @since 1.5 8559 */ 8560 public static boolean isLowSurrogate(char ch) { 8561 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 8562 } 8563 8564 /** 8565 * Determines if the given {@code char} value is a Unicode 8566 * <i>surrogate code unit</i>. 8567 * 8568 * <p>Such values do not represent characters by themselves, 8569 * but are used in the representation of 8570 * <a href="#supplementary">supplementary characters</a> 8571 * in the UTF-16 encoding. 8572 * 8573 * <p>A char value is a surrogate code unit if and only if it is either 8574 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 8575 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 8576 * 8577 * @param ch the {@code char} value to be tested. 8578 * @return {@code true} if the {@code char} value is between 8579 * {@link #MIN_SURROGATE} and 8580 * {@link #MAX_SURROGATE} inclusive; 8581 * {@code false} otherwise. 8582 * @since 1.7 8583 */ 8584 public static boolean isSurrogate(char ch) { 8585 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 8586 } 8587 8588 /** 8589 * Determines whether the specified pair of {@code char} 8590 * values is a valid 8591 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8592 * Unicode surrogate pair</a>. 8593 8594 * <p>This method is equivalent to the expression: 8595 * <blockquote><pre>{@code 8596 * isHighSurrogate(high) && isLowSurrogate(low) 8597 * }</pre></blockquote> 8598 * 8599 * @param high the high-surrogate code value to be tested 8600 * @param low the low-surrogate code value to be tested 8601 * @return {@code true} if the specified high and 8602 * low-surrogate code values represent a valid surrogate pair; 8603 * {@code false} otherwise. 8604 * @since 1.5 8605 */ 8606 public static boolean isSurrogatePair(char high, char low) { 8607 return isHighSurrogate(high) && isLowSurrogate(low); 8608 } 8609 8610 /** 8611 * Determines the number of {@code char} values needed to 8612 * represent the specified character (Unicode code point). If the 8613 * specified character is equal to or greater than 0x10000, then 8614 * the method returns 2. Otherwise, the method returns 1. 8615 * 8616 * <p>This method doesn't validate the specified character to be a 8617 * valid Unicode code point. The caller must validate the 8618 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 8619 * if necessary. 8620 * 8621 * @param codePoint the character (Unicode code point) to be tested. 8622 * @return 2 if the character is a valid supplementary character; 1 otherwise. 8623 * @see Character#isSupplementaryCodePoint(int) 8624 * @since 1.5 8625 */ 8626 public static int charCount(int codePoint) { 8627 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 8628 } 8629 8630 /** 8631 * Converts the specified surrogate pair to its supplementary code 8632 * point value. This method does not validate the specified 8633 * surrogate pair. The caller must validate it using {@link 8634 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 8635 * 8636 * @param high the high-surrogate code unit 8637 * @param low the low-surrogate code unit 8638 * @return the supplementary code point composed from the 8639 * specified surrogate pair. 8640 * @since 1.5 8641 */ 8642 public static int toCodePoint(char high, char low) { 8643 // Optimized form of: 8644 // return ((high - MIN_HIGH_SURROGATE) << 10) 8645 // + (low - MIN_LOW_SURROGATE) 8646 // + MIN_SUPPLEMENTARY_CODE_POINT; 8647 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 8648 - (MIN_HIGH_SURROGATE << 10) 8649 - MIN_LOW_SURROGATE); 8650 } 8651 8652 /** 8653 * Returns the code point at the given index of the 8654 * {@code CharSequence}. If the {@code char} value at 8655 * the given index in the {@code CharSequence} is in the 8656 * high-surrogate range, the following index is less than the 8657 * length of the {@code CharSequence}, and the 8658 * {@code char} value at the following index is in the 8659 * low-surrogate range, then the supplementary code point 8660 * corresponding to this surrogate pair is returned. Otherwise, 8661 * the {@code char} value at the given index is returned. 8662 * 8663 * @param seq a sequence of {@code char} values (Unicode code 8664 * units) 8665 * @param index the index to the {@code char} values (Unicode 8666 * code units) in {@code seq} to be converted 8667 * @return the Unicode code point at the given index 8668 * @throws NullPointerException if {@code seq} is null. 8669 * @throws IndexOutOfBoundsException if the value 8670 * {@code index} is negative or not less than 8671 * {@link CharSequence#length() seq.length()}. 8672 * @since 1.5 8673 */ 8674 public static int codePointAt(CharSequence seq, int index) { 8675 char c1 = seq.charAt(index); 8676 if (isHighSurrogate(c1) && ++index < seq.length()) { 8677 char c2 = seq.charAt(index); 8678 if (isLowSurrogate(c2)) { 8679 return toCodePoint(c1, c2); 8680 } 8681 } 8682 return c1; 8683 } 8684 8685 /** 8686 * Returns the code point at the given index of the 8687 * {@code char} array. If the {@code char} value at 8688 * the given index in the {@code char} array is in the 8689 * high-surrogate range, the following index is less than the 8690 * length of the {@code char} array, and the 8691 * {@code char} value at the following index is in the 8692 * low-surrogate range, then the supplementary code point 8693 * corresponding to this surrogate pair is returned. Otherwise, 8694 * the {@code char} value at the given index is returned. 8695 * 8696 * @param a the {@code char} array 8697 * @param index the index to the {@code char} values (Unicode 8698 * code units) in the {@code char} array to be converted 8699 * @return the Unicode code point at the given index 8700 * @throws NullPointerException if {@code a} is null. 8701 * @throws IndexOutOfBoundsException if the value 8702 * {@code index} is negative or not less than 8703 * the length of the {@code char} array. 8704 * @since 1.5 8705 */ 8706 public static int codePointAt(char[] a, int index) { 8707 return codePointAtImpl(a, index, a.length); 8708 } 8709 8710 /** 8711 * Returns the code point at the given index of the 8712 * {@code char} array, where only array elements with 8713 * {@code index} less than {@code limit} can be used. If 8714 * the {@code char} value at the given index in the 8715 * {@code char} array is in the high-surrogate range, the 8716 * following index is less than the {@code limit}, and the 8717 * {@code char} value at the following index is in the 8718 * low-surrogate range, then the supplementary code point 8719 * corresponding to this surrogate pair is returned. Otherwise, 8720 * the {@code char} value at the given index is returned. 8721 * 8722 * @param a the {@code char} array 8723 * @param index the index to the {@code char} values (Unicode 8724 * code units) in the {@code char} array to be converted 8725 * @param limit the index after the last array element that 8726 * can be used in the {@code char} array 8727 * @return the Unicode code point at the given index 8728 * @throws NullPointerException if {@code a} is null. 8729 * @throws IndexOutOfBoundsException if the {@code index} 8730 * argument is negative or not less than the {@code limit} 8731 * argument, or if the {@code limit} argument is negative or 8732 * greater than the length of the {@code char} array. 8733 * @since 1.5 8734 */ 8735 public static int codePointAt(char[] a, int index, int limit) { 8736 if (index >= limit || limit < 0 || limit > a.length) { 8737 throw new IndexOutOfBoundsException(); 8738 } 8739 return codePointAtImpl(a, index, limit); 8740 } 8741 8742 // throws ArrayIndexOutOfBoundsException if index out of bounds 8743 static int codePointAtImpl(char[] a, int index, int limit) { 8744 char c1 = a[index]; 8745 if (isHighSurrogate(c1) && ++index < limit) { 8746 char c2 = a[index]; 8747 if (isLowSurrogate(c2)) { 8748 return toCodePoint(c1, c2); 8749 } 8750 } 8751 return c1; 8752 } 8753 8754 /** 8755 * Returns the code point preceding the given index of the 8756 * {@code CharSequence}. If the {@code char} value at 8757 * {@code (index - 1)} in the {@code CharSequence} is in 8758 * the low-surrogate range, {@code (index - 2)} is not 8759 * negative, and the {@code char} value at {@code (index - 2)} 8760 * in the {@code CharSequence} is in the 8761 * high-surrogate range, then the supplementary code point 8762 * corresponding to this surrogate pair is returned. Otherwise, 8763 * the {@code char} value at {@code (index - 1)} is 8764 * returned. 8765 * 8766 * @param seq the {@code CharSequence} instance 8767 * @param index the index following the code point that should be returned 8768 * @return the Unicode code point value before the given index. 8769 * @throws NullPointerException if {@code seq} is null. 8770 * @throws IndexOutOfBoundsException if the {@code index} 8771 * argument is less than 1 or greater than {@link 8772 * CharSequence#length() seq.length()}. 8773 * @since 1.5 8774 */ 8775 public static int codePointBefore(CharSequence seq, int index) { 8776 char c2 = seq.charAt(--index); 8777 if (isLowSurrogate(c2) && index > 0) { 8778 char c1 = seq.charAt(--index); 8779 if (isHighSurrogate(c1)) { 8780 return toCodePoint(c1, c2); 8781 } 8782 } 8783 return c2; 8784 } 8785 8786 /** 8787 * Returns the code point preceding the given index of the 8788 * {@code char} array. If the {@code char} value at 8789 * {@code (index - 1)} in the {@code char} array is in 8790 * the low-surrogate range, {@code (index - 2)} is not 8791 * negative, and the {@code char} value at {@code (index - 2)} 8792 * in the {@code char} array is in the 8793 * high-surrogate range, then the supplementary code point 8794 * corresponding to this surrogate pair is returned. Otherwise, 8795 * the {@code char} value at {@code (index - 1)} is 8796 * returned. 8797 * 8798 * @param a the {@code char} array 8799 * @param index the index following the code point that should be returned 8800 * @return the Unicode code point value before the given index. 8801 * @throws NullPointerException if {@code a} is null. 8802 * @throws IndexOutOfBoundsException if the {@code index} 8803 * argument is less than 1 or greater than the length of the 8804 * {@code char} array 8805 * @since 1.5 8806 */ 8807 public static int codePointBefore(char[] a, int index) { 8808 return codePointBeforeImpl(a, index, 0); 8809 } 8810 8811 /** 8812 * Returns the code point preceding the given index of the 8813 * {@code char} array, where only array elements with 8814 * {@code index} greater than or equal to {@code start} 8815 * can be used. If the {@code char} value at {@code (index - 1)} 8816 * in the {@code char} array is in the 8817 * low-surrogate range, {@code (index - 2)} is not less than 8818 * {@code start}, and the {@code char} value at 8819 * {@code (index - 2)} in the {@code char} array is in 8820 * the high-surrogate range, then the supplementary code point 8821 * corresponding to this surrogate pair is returned. Otherwise, 8822 * the {@code char} value at {@code (index - 1)} is 8823 * returned. 8824 * 8825 * @param a the {@code char} array 8826 * @param index the index following the code point that should be returned 8827 * @param start the index of the first array element in the 8828 * {@code char} array 8829 * @return the Unicode code point value before the given index. 8830 * @throws NullPointerException if {@code a} is null. 8831 * @throws IndexOutOfBoundsException if the {@code index} 8832 * argument is not greater than the {@code start} argument or 8833 * is greater than the length of the {@code char} array, or 8834 * if the {@code start} argument is negative or not less than 8835 * the length of the {@code char} array. 8836 * @since 1.5 8837 */ 8838 public static int codePointBefore(char[] a, int index, int start) { 8839 if (index <= start || start < 0 || start >= a.length) { 8840 throw new IndexOutOfBoundsException(); 8841 } 8842 return codePointBeforeImpl(a, index, start); 8843 } 8844 8845 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 8846 static int codePointBeforeImpl(char[] a, int index, int start) { 8847 char c2 = a[--index]; 8848 if (isLowSurrogate(c2) && index > start) { 8849 char c1 = a[--index]; 8850 if (isHighSurrogate(c1)) { 8851 return toCodePoint(c1, c2); 8852 } 8853 } 8854 return c2; 8855 } 8856 8857 /** 8858 * Returns the leading surrogate (a 8859 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8860 * high surrogate code unit</a>) of the 8861 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8862 * surrogate pair</a> 8863 * representing the specified supplementary character (Unicode 8864 * code point) in the UTF-16 encoding. If the specified character 8865 * is not a 8866 * <a href="Character.html#supplementary">supplementary character</a>, 8867 * an unspecified {@code char} is returned. 8868 * 8869 * <p>If 8870 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 8871 * is {@code true}, then 8872 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 8873 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 8874 * are also always {@code true}. 8875 * 8876 * @param codePoint a supplementary character (Unicode code point) 8877 * @return the leading surrogate code unit used to represent the 8878 * character in the UTF-16 encoding 8879 * @since 1.7 8880 */ 8881 public static char highSurrogate(int codePoint) { 8882 return (char) ((codePoint >>> 10) 8883 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 8884 } 8885 8886 /** 8887 * Returns the trailing surrogate (a 8888 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8889 * low surrogate code unit</a>) of the 8890 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8891 * surrogate pair</a> 8892 * representing the specified supplementary character (Unicode 8893 * code point) in the UTF-16 encoding. If the specified character 8894 * is not a 8895 * <a href="Character.html#supplementary">supplementary character</a>, 8896 * an unspecified {@code char} is returned. 8897 * 8898 * <p>If 8899 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 8900 * is {@code true}, then 8901 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 8902 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 8903 * are also always {@code true}. 8904 * 8905 * @param codePoint a supplementary character (Unicode code point) 8906 * @return the trailing surrogate code unit used to represent the 8907 * character in the UTF-16 encoding 8908 * @since 1.7 8909 */ 8910 public static char lowSurrogate(int codePoint) { 8911 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 8912 } 8913 8914 /** 8915 * Converts the specified character (Unicode code point) to its 8916 * UTF-16 representation. If the specified code point is a BMP 8917 * (Basic Multilingual Plane or Plane 0) value, the same value is 8918 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 8919 * specified code point is a supplementary character, its 8920 * surrogate values are stored in {@code dst[dstIndex]} 8921 * (high-surrogate) and {@code dst[dstIndex+1]} 8922 * (low-surrogate), and 2 is returned. 8923 * 8924 * @param codePoint the character (Unicode code point) to be converted. 8925 * @param dst an array of {@code char} in which the 8926 * {@code codePoint}'s UTF-16 value is stored. 8927 * @param dstIndex the start index into the {@code dst} 8928 * array where the converted value is stored. 8929 * @return 1 if the code point is a BMP code point, 2 if the 8930 * code point is a supplementary code point. 8931 * @throws IllegalArgumentException if the specified 8932 * {@code codePoint} is not a valid Unicode code point. 8933 * @throws NullPointerException if the specified {@code dst} is null. 8934 * @throws IndexOutOfBoundsException if {@code dstIndex} 8935 * is negative or not less than {@code dst.length}, or if 8936 * {@code dst} at {@code dstIndex} doesn't have enough 8937 * array element(s) to store the resulting {@code char} 8938 * value(s). (If {@code dstIndex} is equal to 8939 * {@code dst.length-1} and the specified 8940 * {@code codePoint} is a supplementary character, the 8941 * high-surrogate value is not stored in 8942 * {@code dst[dstIndex]}.) 8943 * @since 1.5 8944 */ 8945 public static int toChars(int codePoint, char[] dst, int dstIndex) { 8946 if (isBmpCodePoint(codePoint)) { 8947 dst[dstIndex] = (char) codePoint; 8948 return 1; 8949 } else if (isValidCodePoint(codePoint)) { 8950 toSurrogates(codePoint, dst, dstIndex); 8951 return 2; 8952 } else { 8953 throw new IllegalArgumentException( 8954 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8955 } 8956 } 8957 8958 /** 8959 * Converts the specified character (Unicode code point) to its 8960 * UTF-16 representation stored in a {@code char} array. If 8961 * the specified code point is a BMP (Basic Multilingual Plane or 8962 * Plane 0) value, the resulting {@code char} array has 8963 * the same value as {@code codePoint}. If the specified code 8964 * point is a supplementary code point, the resulting 8965 * {@code char} array has the corresponding surrogate pair. 8966 * 8967 * @param codePoint a Unicode code point 8968 * @return a {@code char} array having 8969 * {@code codePoint}'s UTF-16 representation. 8970 * @throws IllegalArgumentException if the specified 8971 * {@code codePoint} is not a valid Unicode code point. 8972 * @since 1.5 8973 */ 8974 public static char[] toChars(int codePoint) { 8975 if (isBmpCodePoint(codePoint)) { 8976 return new char[] { (char) codePoint }; 8977 } else if (isValidCodePoint(codePoint)) { 8978 char[] result = new char[2]; 8979 toSurrogates(codePoint, result, 0); 8980 return result; 8981 } else { 8982 throw new IllegalArgumentException( 8983 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8984 } 8985 } 8986 8987 static void toSurrogates(int codePoint, char[] dst, int index) { 8988 // We write elements "backwards" to guarantee all-or-nothing 8989 dst[index+1] = lowSurrogate(codePoint); 8990 dst[index] = highSurrogate(codePoint); 8991 } 8992 8993 /** 8994 * Returns the number of Unicode code points in the text range of 8995 * the specified char sequence. The text range begins at the 8996 * specified {@code beginIndex} and extends to the 8997 * {@code char} at index {@code endIndex - 1}. Thus the 8998 * length (in {@code char}s) of the text range is 8999 * {@code endIndex-beginIndex}. Unpaired surrogates within 9000 * the text range count as one code point each. 9001 * 9002 * @param seq the char sequence 9003 * @param beginIndex the index to the first {@code char} of 9004 * the text range. 9005 * @param endIndex the index after the last {@code char} of 9006 * the text range. 9007 * @return the number of Unicode code points in the specified text 9008 * range 9009 * @throws NullPointerException if {@code seq} is null. 9010 * @throws IndexOutOfBoundsException if the 9011 * {@code beginIndex} is negative, or {@code endIndex} 9012 * is larger than the length of the given sequence, or 9013 * {@code beginIndex} is larger than {@code endIndex}. 9014 * @since 1.5 9015 */ 9016 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9017 int length = seq.length(); 9018 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 9019 throw new IndexOutOfBoundsException(); 9020 } 9021 int n = endIndex - beginIndex; 9022 for (int i = beginIndex; i < endIndex; ) { 9023 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9024 isLowSurrogate(seq.charAt(i))) { 9025 n--; 9026 i++; 9027 } 9028 } 9029 return n; 9030 } 9031 9032 /** 9033 * Returns the number of Unicode code points in a subarray of the 9034 * {@code char} array argument. The {@code offset} 9035 * argument is the index of the first {@code char} of the 9036 * subarray and the {@code count} argument specifies the 9037 * length of the subarray in {@code char}s. Unpaired 9038 * surrogates within the subarray count as one code point each. 9039 * 9040 * @param a the {@code char} array 9041 * @param offset the index of the first {@code char} in the 9042 * given {@code char} array 9043 * @param count the length of the subarray in {@code char}s 9044 * @return the number of Unicode code points in the specified subarray 9045 * @throws NullPointerException if {@code a} is null. 9046 * @throws IndexOutOfBoundsException if {@code offset} or 9047 * {@code count} is negative, or if {@code offset + 9048 * count} is larger than the length of the given array. 9049 * @since 1.5 9050 */ 9051 public static int codePointCount(char[] a, int offset, int count) { 9052 if (count > a.length - offset || offset < 0 || count < 0) { 9053 throw new IndexOutOfBoundsException(); 9054 } 9055 return codePointCountImpl(a, offset, count); 9056 } 9057 9058 static int codePointCountImpl(char[] a, int offset, int count) { 9059 int endIndex = offset + count; 9060 int n = count; 9061 for (int i = offset; i < endIndex; ) { 9062 if (isHighSurrogate(a[i++]) && i < endIndex && 9063 isLowSurrogate(a[i])) { 9064 n--; 9065 i++; 9066 } 9067 } 9068 return n; 9069 } 9070 9071 /** 9072 * Returns the index within the given char sequence that is offset 9073 * from the given {@code index} by {@code codePointOffset} 9074 * code points. Unpaired surrogates within the text range given by 9075 * {@code index} and {@code codePointOffset} count as 9076 * one code point each. 9077 * 9078 * @param seq the char sequence 9079 * @param index the index to be offset 9080 * @param codePointOffset the offset in code points 9081 * @return the index within the char sequence 9082 * @throws NullPointerException if {@code seq} is null. 9083 * @throws IndexOutOfBoundsException if {@code index} 9084 * is negative or larger then the length of the char sequence, 9085 * or if {@code codePointOffset} is positive and the 9086 * subsequence starting with {@code index} has fewer than 9087 * {@code codePointOffset} code points, or if 9088 * {@code codePointOffset} is negative and the subsequence 9089 * before {@code index} has fewer than the absolute value 9090 * of {@code codePointOffset} code points. 9091 * @since 1.5 9092 */ 9093 public static int offsetByCodePoints(CharSequence seq, int index, 9094 int codePointOffset) { 9095 int length = seq.length(); 9096 if (index < 0 || index > length) { 9097 throw new IndexOutOfBoundsException(); 9098 } 9099 9100 int x = index; 9101 if (codePointOffset >= 0) { 9102 int i; 9103 for (i = 0; x < length && i < codePointOffset; i++) { 9104 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9105 isLowSurrogate(seq.charAt(x))) { 9106 x++; 9107 } 9108 } 9109 if (i < codePointOffset) { 9110 throw new IndexOutOfBoundsException(); 9111 } 9112 } else { 9113 int i; 9114 for (i = codePointOffset; x > 0 && i < 0; i++) { 9115 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9116 isHighSurrogate(seq.charAt(x-1))) { 9117 x--; 9118 } 9119 } 9120 if (i < 0) { 9121 throw new IndexOutOfBoundsException(); 9122 } 9123 } 9124 return x; 9125 } 9126 9127 /** 9128 * Returns the index within the given {@code char} subarray 9129 * that is offset from the given {@code index} by 9130 * {@code codePointOffset} code points. The 9131 * {@code start} and {@code count} arguments specify a 9132 * subarray of the {@code char} array. Unpaired surrogates 9133 * within the text range given by {@code index} and 9134 * {@code codePointOffset} count as one code point each. 9135 * 9136 * @param a the {@code char} array 9137 * @param start the index of the first {@code char} of the 9138 * subarray 9139 * @param count the length of the subarray in {@code char}s 9140 * @param index the index to be offset 9141 * @param codePointOffset the offset in code points 9142 * @return the index within the subarray 9143 * @throws NullPointerException if {@code a} is null. 9144 * @throws IndexOutOfBoundsException 9145 * if {@code start} or {@code count} is negative, 9146 * or if {@code start + count} is larger than the length of 9147 * the given array, 9148 * or if {@code index} is less than {@code start} or 9149 * larger then {@code start + count}, 9150 * or if {@code codePointOffset} is positive and the text range 9151 * starting with {@code index} and ending with {@code start + count - 1} 9152 * has fewer than {@code codePointOffset} code 9153 * points, 9154 * or if {@code codePointOffset} is negative and the text range 9155 * starting with {@code start} and ending with {@code index - 1} 9156 * has fewer than the absolute value of 9157 * {@code codePointOffset} code points. 9158 * @since 1.5 9159 */ 9160 public static int offsetByCodePoints(char[] a, int start, int count, 9161 int index, int codePointOffset) { 9162 if (count > a.length-start || start < 0 || count < 0 9163 || index < start || index > start+count) { 9164 throw new IndexOutOfBoundsException(); 9165 } 9166 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9167 } 9168 9169 static int offsetByCodePointsImpl(char[]a, int start, int count, 9170 int index, int codePointOffset) { 9171 int x = index; 9172 if (codePointOffset >= 0) { 9173 int limit = start + count; 9174 int i; 9175 for (i = 0; x < limit && i < codePointOffset; i++) { 9176 if (isHighSurrogate(a[x++]) && x < limit && 9177 isLowSurrogate(a[x])) { 9178 x++; 9179 } 9180 } 9181 if (i < codePointOffset) { 9182 throw new IndexOutOfBoundsException(); 9183 } 9184 } else { 9185 int i; 9186 for (i = codePointOffset; x > start && i < 0; i++) { 9187 if (isLowSurrogate(a[--x]) && x > start && 9188 isHighSurrogate(a[x-1])) { 9189 x--; 9190 } 9191 } 9192 if (i < 0) { 9193 throw new IndexOutOfBoundsException(); 9194 } 9195 } 9196 return x; 9197 } 9198 9199 /** 9200 * Determines if the specified character is a lowercase character. 9201 * <p> 9202 * A character is lowercase if its general category type, provided 9203 * by {@code Character.getType(ch)}, is 9204 * {@code LOWERCASE_LETTER}, or it has contributory property 9205 * Other_Lowercase as defined by the Unicode Standard. 9206 * <p> 9207 * The following are examples of lowercase characters: 9208 * <blockquote><pre> 9209 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9210 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9211 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9212 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9213 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9214 * </pre></blockquote> 9215 * <p> Many other Unicode characters are lowercase too. 9216 * 9217 * <p><b>Note:</b> This method cannot handle <a 9218 * href="#supplementary"> supplementary characters</a>. To support 9219 * all Unicode characters, including supplementary characters, use 9220 * the {@link #isLowerCase(int)} method. 9221 * 9222 * @param ch the character to be tested. 9223 * @return {@code true} if the character is lowercase; 9224 * {@code false} otherwise. 9225 * @see Character#isLowerCase(char) 9226 * @see Character#isTitleCase(char) 9227 * @see Character#toLowerCase(char) 9228 * @see Character#getType(char) 9229 */ 9230 public static boolean isLowerCase(char ch) { 9231 return isLowerCase((int)ch); 9232 } 9233 9234 /** 9235 * Determines if the specified character (Unicode code point) is a 9236 * lowercase character. 9237 * <p> 9238 * A character is lowercase if its general category type, provided 9239 * by {@link Character#getType getType(codePoint)}, is 9240 * {@code LOWERCASE_LETTER}, or it has contributory property 9241 * Other_Lowercase as defined by the Unicode Standard. 9242 * <p> 9243 * The following are examples of lowercase characters: 9244 * <blockquote><pre> 9245 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9246 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9247 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9248 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9249 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9250 * </pre></blockquote> 9251 * <p> Many other Unicode characters are lowercase too. 9252 * 9253 * @param codePoint the character (Unicode code point) to be tested. 9254 * @return {@code true} if the character is lowercase; 9255 * {@code false} otherwise. 9256 * @see Character#isLowerCase(int) 9257 * @see Character#isTitleCase(int) 9258 * @see Character#toLowerCase(int) 9259 * @see Character#getType(int) 9260 * @since 1.5 9261 */ 9262 public static boolean isLowerCase(int codePoint) { 9263 return CharacterData.of(codePoint).isLowerCase(codePoint) || 9264 CharacterData.of(codePoint).isOtherLowercase(codePoint); 9265 } 9266 9267 /** 9268 * Determines if the specified character is an uppercase character. 9269 * <p> 9270 * A character is uppercase if its general category type, provided by 9271 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9272 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9273 * <p> 9274 * The following are examples of uppercase characters: 9275 * <blockquote><pre> 9276 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9277 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9278 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9279 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9280 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9281 * </pre></blockquote> 9282 * <p> Many other Unicode characters are uppercase too. 9283 * 9284 * <p><b>Note:</b> This method cannot handle <a 9285 * href="#supplementary"> supplementary characters</a>. To support 9286 * all Unicode characters, including supplementary characters, use 9287 * the {@link #isUpperCase(int)} method. 9288 * 9289 * @param ch the character to be tested. 9290 * @return {@code true} if the character is uppercase; 9291 * {@code false} otherwise. 9292 * @see Character#isLowerCase(char) 9293 * @see Character#isTitleCase(char) 9294 * @see Character#toUpperCase(char) 9295 * @see Character#getType(char) 9296 * @since 1.0 9297 */ 9298 public static boolean isUpperCase(char ch) { 9299 return isUpperCase((int)ch); 9300 } 9301 9302 /** 9303 * Determines if the specified character (Unicode code point) is an uppercase character. 9304 * <p> 9305 * A character is uppercase if its general category type, provided by 9306 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9307 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9308 * <p> 9309 * The following are examples of uppercase characters: 9310 * <blockquote><pre> 9311 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9312 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9313 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9314 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9315 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9316 * </pre></blockquote> 9317 * <p> Many other Unicode characters are uppercase too. 9318 * 9319 * @param codePoint the character (Unicode code point) to be tested. 9320 * @return {@code true} if the character is uppercase; 9321 * {@code false} otherwise. 9322 * @see Character#isLowerCase(int) 9323 * @see Character#isTitleCase(int) 9324 * @see Character#toUpperCase(int) 9325 * @see Character#getType(int) 9326 * @since 1.5 9327 */ 9328 public static boolean isUpperCase(int codePoint) { 9329 return CharacterData.of(codePoint).isUpperCase(codePoint) || 9330 CharacterData.of(codePoint).isOtherUppercase(codePoint); 9331 } 9332 9333 /** 9334 * Determines if the specified character is a titlecase character. 9335 * <p> 9336 * A character is a titlecase character if its general 9337 * category type, provided by {@code Character.getType(ch)}, 9338 * is {@code TITLECASE_LETTER}. 9339 * <p> 9340 * Some characters look like pairs of Latin letters. For example, there 9341 * is an uppercase letter that looks like "LJ" and has a corresponding 9342 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9343 * is the appropriate form to use when rendering a word in lowercase 9344 * with initial capitals, as for a book title. 9345 * <p> 9346 * These are some of the Unicode characters for which this method returns 9347 * {@code true}: 9348 * <ul> 9349 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9350 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9351 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9352 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9353 * </ul> 9354 * <p> Many other Unicode characters are titlecase too. 9355 * 9356 * <p><b>Note:</b> This method cannot handle <a 9357 * href="#supplementary"> supplementary characters</a>. To support 9358 * all Unicode characters, including supplementary characters, use 9359 * the {@link #isTitleCase(int)} method. 9360 * 9361 * @param ch the character to be tested. 9362 * @return {@code true} if the character is titlecase; 9363 * {@code false} otherwise. 9364 * @see Character#isLowerCase(char) 9365 * @see Character#isUpperCase(char) 9366 * @see Character#toTitleCase(char) 9367 * @see Character#getType(char) 9368 * @since 1.0.2 9369 */ 9370 public static boolean isTitleCase(char ch) { 9371 return isTitleCase((int)ch); 9372 } 9373 9374 /** 9375 * Determines if the specified character (Unicode code point) is a titlecase character. 9376 * <p> 9377 * A character is a titlecase character if its general 9378 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9379 * is {@code TITLECASE_LETTER}. 9380 * <p> 9381 * Some characters look like pairs of Latin letters. For example, there 9382 * is an uppercase letter that looks like "LJ" and has a corresponding 9383 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9384 * is the appropriate form to use when rendering a word in lowercase 9385 * with initial capitals, as for a book title. 9386 * <p> 9387 * These are some of the Unicode characters for which this method returns 9388 * {@code true}: 9389 * <ul> 9390 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9391 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9392 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9393 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9394 * </ul> 9395 * <p> Many other Unicode characters are titlecase too. 9396 * 9397 * @param codePoint the character (Unicode code point) to be tested. 9398 * @return {@code true} if the character is titlecase; 9399 * {@code false} otherwise. 9400 * @see Character#isLowerCase(int) 9401 * @see Character#isUpperCase(int) 9402 * @see Character#toTitleCase(int) 9403 * @see Character#getType(int) 9404 * @since 1.5 9405 */ 9406 public static boolean isTitleCase(int codePoint) { 9407 return getType(codePoint) == Character.TITLECASE_LETTER; 9408 } 9409 9410 /** 9411 * Determines if the specified character is a digit. 9412 * <p> 9413 * A character is a digit if its general category type, provided 9414 * by {@code Character.getType(ch)}, is 9415 * {@code DECIMAL_DIGIT_NUMBER}. 9416 * <p> 9417 * Some Unicode character ranges that contain digits: 9418 * <ul> 9419 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9420 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9421 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9422 * Arabic-Indic digits 9423 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9424 * Extended Arabic-Indic digits 9425 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9426 * Devanagari digits 9427 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9428 * Fullwidth digits 9429 * </ul> 9430 * 9431 * Many other character ranges contain digits as well. 9432 * 9433 * <p><b>Note:</b> This method cannot handle <a 9434 * href="#supplementary"> supplementary characters</a>. To support 9435 * all Unicode characters, including supplementary characters, use 9436 * the {@link #isDigit(int)} method. 9437 * 9438 * @param ch the character to be tested. 9439 * @return {@code true} if the character is a digit; 9440 * {@code false} otherwise. 9441 * @see Character#digit(char, int) 9442 * @see Character#forDigit(int, int) 9443 * @see Character#getType(char) 9444 */ 9445 public static boolean isDigit(char ch) { 9446 return isDigit((int)ch); 9447 } 9448 9449 /** 9450 * Determines if the specified character (Unicode code point) is a digit. 9451 * <p> 9452 * A character is a digit if its general category type, provided 9453 * by {@link Character#getType(int) getType(codePoint)}, is 9454 * {@code DECIMAL_DIGIT_NUMBER}. 9455 * <p> 9456 * Some Unicode character ranges that contain digits: 9457 * <ul> 9458 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9459 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9460 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9461 * Arabic-Indic digits 9462 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9463 * Extended Arabic-Indic digits 9464 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9465 * Devanagari digits 9466 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9467 * Fullwidth digits 9468 * </ul> 9469 * 9470 * Many other character ranges contain digits as well. 9471 * 9472 * @param codePoint the character (Unicode code point) to be tested. 9473 * @return {@code true} if the character is a digit; 9474 * {@code false} otherwise. 9475 * @see Character#forDigit(int, int) 9476 * @see Character#getType(int) 9477 * @since 1.5 9478 */ 9479 public static boolean isDigit(int codePoint) { 9480 return CharacterData.of(codePoint).isDigit(codePoint); 9481 } 9482 9483 /** 9484 * Determines if a character is defined in Unicode. 9485 * <p> 9486 * A character is defined if at least one of the following is true: 9487 * <ul> 9488 * <li>It has an entry in the UnicodeData file. 9489 * <li>It has a value in a range defined by the UnicodeData file. 9490 * </ul> 9491 * 9492 * <p><b>Note:</b> This method cannot handle <a 9493 * href="#supplementary"> supplementary characters</a>. To support 9494 * all Unicode characters, including supplementary characters, use 9495 * the {@link #isDefined(int)} method. 9496 * 9497 * @param ch the character to be tested 9498 * @return {@code true} if the character has a defined meaning 9499 * in Unicode; {@code false} otherwise. 9500 * @see Character#isDigit(char) 9501 * @see Character#isLetter(char) 9502 * @see Character#isLetterOrDigit(char) 9503 * @see Character#isLowerCase(char) 9504 * @see Character#isTitleCase(char) 9505 * @see Character#isUpperCase(char) 9506 * @since 1.0.2 9507 */ 9508 public static boolean isDefined(char ch) { 9509 return isDefined((int)ch); 9510 } 9511 9512 /** 9513 * Determines if a character (Unicode code point) is defined in Unicode. 9514 * <p> 9515 * A character is defined if at least one of the following is true: 9516 * <ul> 9517 * <li>It has an entry in the UnicodeData file. 9518 * <li>It has a value in a range defined by the UnicodeData file. 9519 * </ul> 9520 * 9521 * @param codePoint the character (Unicode code point) to be tested. 9522 * @return {@code true} if the character has a defined meaning 9523 * in Unicode; {@code false} otherwise. 9524 * @see Character#isDigit(int) 9525 * @see Character#isLetter(int) 9526 * @see Character#isLetterOrDigit(int) 9527 * @see Character#isLowerCase(int) 9528 * @see Character#isTitleCase(int) 9529 * @see Character#isUpperCase(int) 9530 * @since 1.5 9531 */ 9532 public static boolean isDefined(int codePoint) { 9533 return getType(codePoint) != Character.UNASSIGNED; 9534 } 9535 9536 /** 9537 * Determines if the specified character is a letter. 9538 * <p> 9539 * A character is considered to be a letter if its general 9540 * category type, provided by {@code Character.getType(ch)}, 9541 * is any of the following: 9542 * <ul> 9543 * <li> {@code UPPERCASE_LETTER} 9544 * <li> {@code LOWERCASE_LETTER} 9545 * <li> {@code TITLECASE_LETTER} 9546 * <li> {@code MODIFIER_LETTER} 9547 * <li> {@code OTHER_LETTER} 9548 * </ul> 9549 * 9550 * Not all letters have case. Many characters are 9551 * letters but are neither uppercase nor lowercase nor titlecase. 9552 * 9553 * <p><b>Note:</b> This method cannot handle <a 9554 * href="#supplementary"> supplementary characters</a>. To support 9555 * all Unicode characters, including supplementary characters, use 9556 * the {@link #isLetter(int)} method. 9557 * 9558 * @param ch the character to be tested. 9559 * @return {@code true} if the character is a letter; 9560 * {@code false} otherwise. 9561 * @see Character#isDigit(char) 9562 * @see Character#isJavaIdentifierStart(char) 9563 * @see Character#isJavaLetter(char) 9564 * @see Character#isJavaLetterOrDigit(char) 9565 * @see Character#isLetterOrDigit(char) 9566 * @see Character#isLowerCase(char) 9567 * @see Character#isTitleCase(char) 9568 * @see Character#isUnicodeIdentifierStart(char) 9569 * @see Character#isUpperCase(char) 9570 */ 9571 public static boolean isLetter(char ch) { 9572 return isLetter((int)ch); 9573 } 9574 9575 /** 9576 * Determines if the specified character (Unicode code point) is a letter. 9577 * <p> 9578 * A character is considered to be a letter if its general 9579 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9580 * is any of the following: 9581 * <ul> 9582 * <li> {@code UPPERCASE_LETTER} 9583 * <li> {@code LOWERCASE_LETTER} 9584 * <li> {@code TITLECASE_LETTER} 9585 * <li> {@code MODIFIER_LETTER} 9586 * <li> {@code OTHER_LETTER} 9587 * </ul> 9588 * 9589 * Not all letters have case. Many characters are 9590 * letters but are neither uppercase nor lowercase nor titlecase. 9591 * 9592 * @param codePoint the character (Unicode code point) to be tested. 9593 * @return {@code true} if the character is a letter; 9594 * {@code false} otherwise. 9595 * @see Character#isDigit(int) 9596 * @see Character#isJavaIdentifierStart(int) 9597 * @see Character#isLetterOrDigit(int) 9598 * @see Character#isLowerCase(int) 9599 * @see Character#isTitleCase(int) 9600 * @see Character#isUnicodeIdentifierStart(int) 9601 * @see Character#isUpperCase(int) 9602 * @since 1.5 9603 */ 9604 public static boolean isLetter(int codePoint) { 9605 return ((((1 << Character.UPPERCASE_LETTER) | 9606 (1 << Character.LOWERCASE_LETTER) | 9607 (1 << Character.TITLECASE_LETTER) | 9608 (1 << Character.MODIFIER_LETTER) | 9609 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 9610 != 0; 9611 } 9612 9613 /** 9614 * Determines if the specified character is a letter or digit. 9615 * <p> 9616 * A character is considered to be a letter or digit if either 9617 * {@code Character.isLetter(char ch)} or 9618 * {@code Character.isDigit(char ch)} returns 9619 * {@code true} for the character. 9620 * 9621 * <p><b>Note:</b> This method cannot handle <a 9622 * href="#supplementary"> supplementary characters</a>. To support 9623 * all Unicode characters, including supplementary characters, use 9624 * the {@link #isLetterOrDigit(int)} method. 9625 * 9626 * @param ch the character to be tested. 9627 * @return {@code true} if the character is a letter or digit; 9628 * {@code false} otherwise. 9629 * @see Character#isDigit(char) 9630 * @see Character#isJavaIdentifierPart(char) 9631 * @see Character#isJavaLetter(char) 9632 * @see Character#isJavaLetterOrDigit(char) 9633 * @see Character#isLetter(char) 9634 * @see Character#isUnicodeIdentifierPart(char) 9635 * @since 1.0.2 9636 */ 9637 public static boolean isLetterOrDigit(char ch) { 9638 return isLetterOrDigit((int)ch); 9639 } 9640 9641 /** 9642 * Determines if the specified character (Unicode code point) is a letter or digit. 9643 * <p> 9644 * A character is considered to be a letter or digit if either 9645 * {@link #isLetter(int) isLetter(codePoint)} or 9646 * {@link #isDigit(int) isDigit(codePoint)} returns 9647 * {@code true} for the character. 9648 * 9649 * @param codePoint the character (Unicode code point) to be tested. 9650 * @return {@code true} if the character is a letter or digit; 9651 * {@code false} otherwise. 9652 * @see Character#isDigit(int) 9653 * @see Character#isJavaIdentifierPart(int) 9654 * @see Character#isLetter(int) 9655 * @see Character#isUnicodeIdentifierPart(int) 9656 * @since 1.5 9657 */ 9658 public static boolean isLetterOrDigit(int codePoint) { 9659 return ((((1 << Character.UPPERCASE_LETTER) | 9660 (1 << Character.LOWERCASE_LETTER) | 9661 (1 << Character.TITLECASE_LETTER) | 9662 (1 << Character.MODIFIER_LETTER) | 9663 (1 << Character.OTHER_LETTER) | 9664 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 9665 != 0; 9666 } 9667 9668 /** 9669 * Determines if the specified character is permissible as the first 9670 * character in a Java identifier. 9671 * <p> 9672 * A character may start a Java identifier if and only if 9673 * one of the following conditions is true: 9674 * <ul> 9675 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9676 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 9677 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 9678 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 9679 * </ul> 9680 * 9681 * @param ch the character to be tested. 9682 * @return {@code true} if the character may start a Java 9683 * identifier; {@code false} otherwise. 9684 * @see Character#isJavaLetterOrDigit(char) 9685 * @see Character#isJavaIdentifierStart(char) 9686 * @see Character#isJavaIdentifierPart(char) 9687 * @see Character#isLetter(char) 9688 * @see Character#isLetterOrDigit(char) 9689 * @see Character#isUnicodeIdentifierStart(char) 9690 * @since 1.0.2 9691 * @deprecated Replaced by isJavaIdentifierStart(char). 9692 */ 9693 @Deprecated(since="1.1") 9694 public static boolean isJavaLetter(char ch) { 9695 return isJavaIdentifierStart(ch); 9696 } 9697 9698 /** 9699 * Determines if the specified character may be part of a Java 9700 * identifier as other than the first character. 9701 * <p> 9702 * A character may be part of a Java identifier if and only if one 9703 * of the following conditions is true: 9704 * <ul> 9705 * <li> it is a letter 9706 * <li> it is a currency symbol (such as {@code '$'}) 9707 * <li> it is a connecting punctuation character (such as {@code '_'}) 9708 * <li> it is a digit 9709 * <li> it is a numeric letter (such as a Roman numeral character) 9710 * <li> it is a combining mark 9711 * <li> it is a non-spacing mark 9712 * <li> {@code isIdentifierIgnorable} returns 9713 * {@code true} for the character. 9714 * </ul> 9715 * 9716 * @param ch the character to be tested. 9717 * @return {@code true} if the character may be part of a 9718 * Java identifier; {@code false} otherwise. 9719 * @see Character#isJavaLetter(char) 9720 * @see Character#isJavaIdentifierStart(char) 9721 * @see Character#isJavaIdentifierPart(char) 9722 * @see Character#isLetter(char) 9723 * @see Character#isLetterOrDigit(char) 9724 * @see Character#isUnicodeIdentifierPart(char) 9725 * @see Character#isIdentifierIgnorable(char) 9726 * @since 1.0.2 9727 * @deprecated Replaced by isJavaIdentifierPart(char). 9728 */ 9729 @Deprecated(since="1.1") 9730 public static boolean isJavaLetterOrDigit(char ch) { 9731 return isJavaIdentifierPart(ch); 9732 } 9733 9734 /** 9735 * Determines if the specified character (Unicode code point) is alphabetic. 9736 * <p> 9737 * A character is considered to be alphabetic if its general category type, 9738 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 9739 * the following: 9740 * <ul> 9741 * <li> {@code UPPERCASE_LETTER} 9742 * <li> {@code LOWERCASE_LETTER} 9743 * <li> {@code TITLECASE_LETTER} 9744 * <li> {@code MODIFIER_LETTER} 9745 * <li> {@code OTHER_LETTER} 9746 * <li> {@code LETTER_NUMBER} 9747 * </ul> 9748 * or it has contributory property Other_Alphabetic as defined by the 9749 * Unicode Standard. 9750 * 9751 * @param codePoint the character (Unicode code point) to be tested. 9752 * @return {@code true} if the character is a Unicode alphabet 9753 * character, {@code false} otherwise. 9754 * @since 1.7 9755 */ 9756 public static boolean isAlphabetic(int codePoint) { 9757 return (((((1 << Character.UPPERCASE_LETTER) | 9758 (1 << Character.LOWERCASE_LETTER) | 9759 (1 << Character.TITLECASE_LETTER) | 9760 (1 << Character.MODIFIER_LETTER) | 9761 (1 << Character.OTHER_LETTER) | 9762 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 9763 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 9764 } 9765 9766 /** 9767 * Determines if the specified character (Unicode code point) is a CJKV 9768 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 9769 * the Unicode Standard. 9770 * 9771 * @param codePoint the character (Unicode code point) to be tested. 9772 * @return {@code true} if the character is a Unicode ideograph 9773 * character, {@code false} otherwise. 9774 * @since 1.7 9775 */ 9776 public static boolean isIdeographic(int codePoint) { 9777 return CharacterData.of(codePoint).isIdeographic(codePoint); 9778 } 9779 9780 /** 9781 * Determines if the specified character is 9782 * permissible as the first character in a Java identifier. 9783 * <p> 9784 * A character may start a Java identifier if and only if 9785 * one of the following conditions is true: 9786 * <ul> 9787 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9788 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 9789 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 9790 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 9791 * </ul> 9792 * 9793 * <p><b>Note:</b> This method cannot handle <a 9794 * href="#supplementary"> supplementary characters</a>. To support 9795 * all Unicode characters, including supplementary characters, use 9796 * the {@link #isJavaIdentifierStart(int)} method. 9797 * 9798 * @param ch the character to be tested. 9799 * @return {@code true} if the character may start a Java identifier; 9800 * {@code false} otherwise. 9801 * @see Character#isJavaIdentifierPart(char) 9802 * @see Character#isLetter(char) 9803 * @see Character#isUnicodeIdentifierStart(char) 9804 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9805 * @since 1.1 9806 */ 9807 public static boolean isJavaIdentifierStart(char ch) { 9808 return isJavaIdentifierStart((int)ch); 9809 } 9810 9811 /** 9812 * Determines if the character (Unicode code point) is 9813 * permissible as the first character in a Java identifier. 9814 * <p> 9815 * A character may start a Java identifier if and only if 9816 * one of the following conditions is true: 9817 * <ul> 9818 * <li> {@link #isLetter(int) isLetter(codePoint)} 9819 * returns {@code true} 9820 * <li> {@link #getType(int) getType(codePoint)} 9821 * returns {@code LETTER_NUMBER} 9822 * <li> the referenced character is a currency symbol (such as {@code '$'}) 9823 * <li> the referenced character is a connecting punctuation character 9824 * (such as {@code '_'}). 9825 * </ul> 9826 * 9827 * @param codePoint the character (Unicode code point) to be tested. 9828 * @return {@code true} if the character may start a Java identifier; 9829 * {@code false} otherwise. 9830 * @see Character#isJavaIdentifierPart(int) 9831 * @see Character#isLetter(int) 9832 * @see Character#isUnicodeIdentifierStart(int) 9833 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9834 * @since 1.5 9835 */ 9836 public static boolean isJavaIdentifierStart(int codePoint) { 9837 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 9838 } 9839 9840 /** 9841 * Determines if the specified character may be part of a Java 9842 * identifier as other than the first character. 9843 * <p> 9844 * A character may be part of a Java identifier if any of the following 9845 * conditions are true: 9846 * <ul> 9847 * <li> it is a letter 9848 * <li> it is a currency symbol (such as {@code '$'}) 9849 * <li> it is a connecting punctuation character (such as {@code '_'}) 9850 * <li> it is a digit 9851 * <li> it is a numeric letter (such as a Roman numeral character) 9852 * <li> it is a combining mark 9853 * <li> it is a non-spacing mark 9854 * <li> {@code isIdentifierIgnorable} returns 9855 * {@code true} for the character 9856 * </ul> 9857 * 9858 * <p><b>Note:</b> This method cannot handle <a 9859 * href="#supplementary"> supplementary characters</a>. To support 9860 * all Unicode characters, including supplementary characters, use 9861 * the {@link #isJavaIdentifierPart(int)} method. 9862 * 9863 * @param ch the character to be tested. 9864 * @return {@code true} if the character may be part of a 9865 * Java identifier; {@code false} otherwise. 9866 * @see Character#isIdentifierIgnorable(char) 9867 * @see Character#isJavaIdentifierStart(char) 9868 * @see Character#isLetterOrDigit(char) 9869 * @see Character#isUnicodeIdentifierPart(char) 9870 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9871 * @since 1.1 9872 */ 9873 public static boolean isJavaIdentifierPart(char ch) { 9874 return isJavaIdentifierPart((int)ch); 9875 } 9876 9877 /** 9878 * Determines if the character (Unicode code point) may be part of a Java 9879 * identifier as other than the first character. 9880 * <p> 9881 * A character may be part of a Java identifier if any of the following 9882 * conditions are true: 9883 * <ul> 9884 * <li> it is a letter 9885 * <li> it is a currency symbol (such as {@code '$'}) 9886 * <li> it is a connecting punctuation character (such as {@code '_'}) 9887 * <li> it is a digit 9888 * <li> it is a numeric letter (such as a Roman numeral character) 9889 * <li> it is a combining mark 9890 * <li> it is a non-spacing mark 9891 * <li> {@link #isIdentifierIgnorable(int) 9892 * isIdentifierIgnorable(codePoint)} returns {@code true} for 9893 * the code point 9894 * </ul> 9895 * 9896 * @param codePoint the character (Unicode code point) to be tested. 9897 * @return {@code true} if the character may be part of a 9898 * Java identifier; {@code false} otherwise. 9899 * @see Character#isIdentifierIgnorable(int) 9900 * @see Character#isJavaIdentifierStart(int) 9901 * @see Character#isLetterOrDigit(int) 9902 * @see Character#isUnicodeIdentifierPart(int) 9903 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9904 * @since 1.5 9905 */ 9906 public static boolean isJavaIdentifierPart(int codePoint) { 9907 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 9908 } 9909 9910 /** 9911 * Determines if the specified character is permissible as the 9912 * first character in a Unicode identifier. 9913 * <p> 9914 * A character may start a Unicode identifier if and only if 9915 * one of the following conditions is true: 9916 * <ul> 9917 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9918 * <li> {@link #getType(char) getType(ch)} returns 9919 * {@code LETTER_NUMBER}. 9920 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 9921 * {@code Other_ID_Start}</a> character. 9922 * </ul> 9923 * <p> 9924 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 9925 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 9926 * with the following profile of UAX31: 9927 * <pre> 9928 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 9929 * </pre> 9930 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 9931 * compatibility. 9932 * 9933 * <p><b>Note:</b> This method cannot handle <a 9934 * href="#supplementary"> supplementary characters</a>. To support 9935 * all Unicode characters, including supplementary characters, use 9936 * the {@link #isUnicodeIdentifierStart(int)} method. 9937 * 9938 * @param ch the character to be tested. 9939 * @return {@code true} if the character may start a Unicode 9940 * identifier; {@code false} otherwise. 9941 * @see Character#isJavaIdentifierStart(char) 9942 * @see Character#isLetter(char) 9943 * @see Character#isUnicodeIdentifierPart(char) 9944 * @since 1.1 9945 */ 9946 public static boolean isUnicodeIdentifierStart(char ch) { 9947 return isUnicodeIdentifierStart((int)ch); 9948 } 9949 9950 /** 9951 * Determines if the specified character (Unicode code point) is permissible as the 9952 * first character in a Unicode identifier. 9953 * <p> 9954 * A character may start a Unicode identifier if and only if 9955 * one of the following conditions is true: 9956 * <ul> 9957 * <li> {@link #isLetter(int) isLetter(codePoint)} 9958 * returns {@code true} 9959 * <li> {@link #getType(int) getType(codePoint)} 9960 * returns {@code LETTER_NUMBER}. 9961 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 9962 * {@code Other_ID_Start}</a> character. 9963 * </ul> 9964 * <p> 9965 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 9966 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 9967 * with the following profile of UAX31: 9968 * <pre> 9969 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 9970 * </pre> 9971 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 9972 * compatibility. 9973 * 9974 * @param codePoint the character (Unicode code point) to be tested. 9975 * @return {@code true} if the character may start a Unicode 9976 * identifier; {@code false} otherwise. 9977 * @see Character#isJavaIdentifierStart(int) 9978 * @see Character#isLetter(int) 9979 * @see Character#isUnicodeIdentifierPart(int) 9980 * @since 1.5 9981 */ 9982 public static boolean isUnicodeIdentifierStart(int codePoint) { 9983 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 9984 } 9985 9986 /** 9987 * Determines if the specified character may be part of a Unicode 9988 * identifier as other than the first character. 9989 * <p> 9990 * A character may be part of a Unicode identifier if and only if 9991 * one of the following statements is true: 9992 * <ul> 9993 * <li> it is a letter 9994 * <li> it is a connecting punctuation character (such as {@code '_'}) 9995 * <li> it is a digit 9996 * <li> it is a numeric letter (such as a Roman numeral character) 9997 * <li> it is a combining mark 9998 * <li> it is a non-spacing mark 9999 * <li> {@code isIdentifierIgnorable} returns 10000 * {@code true} for this character. 10001 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10002 * {@code Other_ID_Start}</a> character. 10003 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10004 * {@code Other_ID_Continue}</a> character. 10005 * </ul> 10006 * <p> 10007 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10008 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10009 * with the following profile of UAX31: 10010 * <pre> 10011 * Continue := Start + ID_Continue + ignorable 10012 * Medial := empty 10013 * ignorable := isIdentifierIgnorable(char) returns true for the character 10014 * </pre> 10015 * {@code ignorable} is added to {@code Continue} for backward 10016 * compatibility. 10017 * 10018 * <p><b>Note:</b> This method cannot handle <a 10019 * href="#supplementary"> supplementary characters</a>. To support 10020 * all Unicode characters, including supplementary characters, use 10021 * the {@link #isUnicodeIdentifierPart(int)} method. 10022 * 10023 * @param ch the character to be tested. 10024 * @return {@code true} if the character may be part of a 10025 * Unicode identifier; {@code false} otherwise. 10026 * @see Character#isIdentifierIgnorable(char) 10027 * @see Character#isJavaIdentifierPart(char) 10028 * @see Character#isLetterOrDigit(char) 10029 * @see Character#isUnicodeIdentifierStart(char) 10030 * @since 1.1 10031 */ 10032 public static boolean isUnicodeIdentifierPart(char ch) { 10033 return isUnicodeIdentifierPart((int)ch); 10034 } 10035 10036 /** 10037 * Determines if the specified character (Unicode code point) may be part of a Unicode 10038 * identifier as other than the first character. 10039 * <p> 10040 * A character may be part of a Unicode identifier if and only if 10041 * one of the following statements is true: 10042 * <ul> 10043 * <li> it is a letter 10044 * <li> it is a connecting punctuation character (such as {@code '_'}) 10045 * <li> it is a digit 10046 * <li> it is a numeric letter (such as a Roman numeral character) 10047 * <li> it is a combining mark 10048 * <li> it is a non-spacing mark 10049 * <li> {@code isIdentifierIgnorable} returns 10050 * {@code true} for this character. 10051 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10052 * {@code Other_ID_Start}</a> character. 10053 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10054 * {@code Other_ID_Continue}</a> character. 10055 * </ul> 10056 * <p> 10057 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10058 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10059 * with the following profile of UAX31: 10060 * <pre> 10061 * Continue := Start + ID_Continue + ignorable 10062 * Medial := empty 10063 * ignorable := isIdentifierIgnorable(int) returns true for the character 10064 * </pre> 10065 * {@code ignorable} is added to {@code Continue} for backward 10066 * compatibility. 10067 * 10068 * @param codePoint the character (Unicode code point) to be tested. 10069 * @return {@code true} if the character may be part of a 10070 * Unicode identifier; {@code false} otherwise. 10071 * @see Character#isIdentifierIgnorable(int) 10072 * @see Character#isJavaIdentifierPart(int) 10073 * @see Character#isLetterOrDigit(int) 10074 * @see Character#isUnicodeIdentifierStart(int) 10075 * @since 1.5 10076 */ 10077 public static boolean isUnicodeIdentifierPart(int codePoint) { 10078 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10079 } 10080 10081 /** 10082 * Determines if the specified character should be regarded as 10083 * an ignorable character in a Java identifier or a Unicode identifier. 10084 * <p> 10085 * The following Unicode characters are ignorable in a Java identifier 10086 * or a Unicode identifier: 10087 * <ul> 10088 * <li>ISO control characters that are not whitespace 10089 * <ul> 10090 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10091 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10092 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10093 * </ul> 10094 * 10095 * <li>all characters that have the {@code FORMAT} general 10096 * category value 10097 * </ul> 10098 * 10099 * <p><b>Note:</b> This method cannot handle <a 10100 * href="#supplementary"> supplementary characters</a>. To support 10101 * all Unicode characters, including supplementary characters, use 10102 * the {@link #isIdentifierIgnorable(int)} method. 10103 * 10104 * @param ch the character to be tested. 10105 * @return {@code true} if the character is an ignorable control 10106 * character that may be part of a Java or Unicode identifier; 10107 * {@code false} otherwise. 10108 * @see Character#isJavaIdentifierPart(char) 10109 * @see Character#isUnicodeIdentifierPart(char) 10110 * @since 1.1 10111 */ 10112 public static boolean isIdentifierIgnorable(char ch) { 10113 return isIdentifierIgnorable((int)ch); 10114 } 10115 10116 /** 10117 * Determines if the specified character (Unicode code point) should be regarded as 10118 * an ignorable character in a Java identifier or a Unicode identifier. 10119 * <p> 10120 * The following Unicode characters are ignorable in a Java identifier 10121 * or a Unicode identifier: 10122 * <ul> 10123 * <li>ISO control characters that are not whitespace 10124 * <ul> 10125 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10126 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10127 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10128 * </ul> 10129 * 10130 * <li>all characters that have the {@code FORMAT} general 10131 * category value 10132 * </ul> 10133 * 10134 * @param codePoint the character (Unicode code point) to be tested. 10135 * @return {@code true} if the character is an ignorable control 10136 * character that may be part of a Java or Unicode identifier; 10137 * {@code false} otherwise. 10138 * @see Character#isJavaIdentifierPart(int) 10139 * @see Character#isUnicodeIdentifierPart(int) 10140 * @since 1.5 10141 */ 10142 public static boolean isIdentifierIgnorable(int codePoint) { 10143 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10144 } 10145 10146 /** 10147 * Converts the character argument to lowercase using case 10148 * mapping information from the UnicodeData file. 10149 * <p> 10150 * Note that 10151 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10152 * does not always return {@code true} for some ranges of 10153 * characters, particularly those that are symbols or ideographs. 10154 * 10155 * <p>In general, {@link String#toLowerCase()} should be used to map 10156 * characters to lowercase. {@code String} case mapping methods 10157 * have several benefits over {@code Character} case mapping methods. 10158 * {@code String} case mapping methods can perform locale-sensitive 10159 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10160 * the {@code Character} case mapping methods cannot. 10161 * 10162 * <p><b>Note:</b> This method cannot handle <a 10163 * href="#supplementary"> supplementary characters</a>. To support 10164 * all Unicode characters, including supplementary characters, use 10165 * the {@link #toLowerCase(int)} method. 10166 * 10167 * @param ch the character to be converted. 10168 * @return the lowercase equivalent of the character, if any; 10169 * otherwise, the character itself. 10170 * @see Character#isLowerCase(char) 10171 * @see String#toLowerCase() 10172 */ 10173 public static char toLowerCase(char ch) { 10174 return (char)toLowerCase((int)ch); 10175 } 10176 10177 /** 10178 * Converts the character (Unicode code point) argument to 10179 * lowercase using case mapping information from the UnicodeData 10180 * file. 10181 * 10182 * <p> Note that 10183 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10184 * does not always return {@code true} for some ranges of 10185 * characters, particularly those that are symbols or ideographs. 10186 * 10187 * <p>In general, {@link String#toLowerCase()} should be used to map 10188 * characters to lowercase. {@code String} case mapping methods 10189 * have several benefits over {@code Character} case mapping methods. 10190 * {@code String} case mapping methods can perform locale-sensitive 10191 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10192 * the {@code Character} case mapping methods cannot. 10193 * 10194 * @param codePoint the character (Unicode code point) to be converted. 10195 * @return the lowercase equivalent of the character (Unicode code 10196 * point), if any; otherwise, the character itself. 10197 * @see Character#isLowerCase(int) 10198 * @see String#toLowerCase() 10199 * 10200 * @since 1.5 10201 */ 10202 public static int toLowerCase(int codePoint) { 10203 return CharacterData.of(codePoint).toLowerCase(codePoint); 10204 } 10205 10206 /** 10207 * Converts the character argument to uppercase using case mapping 10208 * information from the UnicodeData file. 10209 * <p> 10210 * Note that 10211 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10212 * does not always return {@code true} for some ranges of 10213 * characters, particularly those that are symbols or ideographs. 10214 * 10215 * <p>In general, {@link String#toUpperCase()} should be used to map 10216 * characters to uppercase. {@code String} case mapping methods 10217 * have several benefits over {@code Character} case mapping methods. 10218 * {@code String} case mapping methods can perform locale-sensitive 10219 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10220 * the {@code Character} case mapping methods cannot. 10221 * 10222 * <p><b>Note:</b> This method cannot handle <a 10223 * href="#supplementary"> supplementary characters</a>. To support 10224 * all Unicode characters, including supplementary characters, use 10225 * the {@link #toUpperCase(int)} method. 10226 * 10227 * @param ch the character to be converted. 10228 * @return the uppercase equivalent of the character, if any; 10229 * otherwise, the character itself. 10230 * @see Character#isUpperCase(char) 10231 * @see String#toUpperCase() 10232 */ 10233 public static char toUpperCase(char ch) { 10234 return (char)toUpperCase((int)ch); 10235 } 10236 10237 /** 10238 * Converts the character (Unicode code point) argument to 10239 * uppercase using case mapping information from the UnicodeData 10240 * file. 10241 * 10242 * <p>Note that 10243 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 10244 * does not always return {@code true} for some ranges of 10245 * characters, particularly those that are symbols or ideographs. 10246 * 10247 * <p>In general, {@link String#toUpperCase()} should be used to map 10248 * characters to uppercase. {@code String} case mapping methods 10249 * have several benefits over {@code Character} case mapping methods. 10250 * {@code String} case mapping methods can perform locale-sensitive 10251 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10252 * the {@code Character} case mapping methods cannot. 10253 * 10254 * @param codePoint the character (Unicode code point) to be converted. 10255 * @return the uppercase equivalent of the character, if any; 10256 * otherwise, the character itself. 10257 * @see Character#isUpperCase(int) 10258 * @see String#toUpperCase() 10259 * 10260 * @since 1.5 10261 */ 10262 public static int toUpperCase(int codePoint) { 10263 return CharacterData.of(codePoint).toUpperCase(codePoint); 10264 } 10265 10266 /** 10267 * Converts the character argument to titlecase using case mapping 10268 * information from the UnicodeData file. If a character has no 10269 * explicit titlecase mapping and is not itself a titlecase char 10270 * according to UnicodeData, then the uppercase mapping is 10271 * returned as an equivalent titlecase mapping. If the 10272 * {@code char} argument is already a titlecase 10273 * {@code char}, the same {@code char} value will be 10274 * returned. 10275 * <p> 10276 * Note that 10277 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 10278 * does not always return {@code true} for some ranges of 10279 * characters. 10280 * 10281 * <p><b>Note:</b> This method cannot handle <a 10282 * href="#supplementary"> supplementary characters</a>. To support 10283 * all Unicode characters, including supplementary characters, use 10284 * the {@link #toTitleCase(int)} method. 10285 * 10286 * @param ch the character to be converted. 10287 * @return the titlecase equivalent of the character, if any; 10288 * otherwise, the character itself. 10289 * @see Character#isTitleCase(char) 10290 * @see Character#toLowerCase(char) 10291 * @see Character#toUpperCase(char) 10292 * @since 1.0.2 10293 */ 10294 public static char toTitleCase(char ch) { 10295 return (char)toTitleCase((int)ch); 10296 } 10297 10298 /** 10299 * Converts the character (Unicode code point) argument to titlecase using case mapping 10300 * information from the UnicodeData file. If a character has no 10301 * explicit titlecase mapping and is not itself a titlecase char 10302 * according to UnicodeData, then the uppercase mapping is 10303 * returned as an equivalent titlecase mapping. If the 10304 * character argument is already a titlecase 10305 * character, the same character value will be 10306 * returned. 10307 * 10308 * <p>Note that 10309 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 10310 * does not always return {@code true} for some ranges of 10311 * characters. 10312 * 10313 * @param codePoint the character (Unicode code point) to be converted. 10314 * @return the titlecase equivalent of the character, if any; 10315 * otherwise, the character itself. 10316 * @see Character#isTitleCase(int) 10317 * @see Character#toLowerCase(int) 10318 * @see Character#toUpperCase(int) 10319 * @since 1.5 10320 */ 10321 public static int toTitleCase(int codePoint) { 10322 return CharacterData.of(codePoint).toTitleCase(codePoint); 10323 } 10324 10325 /** 10326 * Returns the numeric value of the character {@code ch} in the 10327 * specified radix. 10328 * <p> 10329 * If the radix is not in the range {@code MIN_RADIX} ≤ 10330 * {@code radix} ≤ {@code MAX_RADIX} or if the 10331 * value of {@code ch} is not a valid digit in the specified 10332 * radix, {@code -1} is returned. A character is a valid digit 10333 * if at least one of the following is true: 10334 * <ul> 10335 * <li>The method {@code isDigit} is {@code true} of the character 10336 * and the Unicode decimal digit value of the character (or its 10337 * single-character decomposition) is less than the specified radix. 10338 * In this case the decimal digit value is returned. 10339 * <li>The character is one of the uppercase Latin letters 10340 * {@code 'A'} through {@code 'Z'} and its code is less than 10341 * {@code radix + 'A' - 10}. 10342 * In this case, {@code ch - 'A' + 10} 10343 * is returned. 10344 * <li>The character is one of the lowercase Latin letters 10345 * {@code 'a'} through {@code 'z'} and its code is less than 10346 * {@code radix + 'a' - 10}. 10347 * In this case, {@code ch - 'a' + 10} 10348 * is returned. 10349 * <li>The character is one of the fullwidth uppercase Latin letters A 10350 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10351 * and its code is less than 10352 * {@code radix + '\u005CuFF21' - 10}. 10353 * In this case, {@code ch - '\u005CuFF21' + 10} 10354 * is returned. 10355 * <li>The character is one of the fullwidth lowercase Latin letters a 10356 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10357 * and its code is less than 10358 * {@code radix + '\u005CuFF41' - 10}. 10359 * In this case, {@code ch - '\u005CuFF41' + 10} 10360 * is returned. 10361 * </ul> 10362 * 10363 * <p><b>Note:</b> This method cannot handle <a 10364 * href="#supplementary"> supplementary characters</a>. To support 10365 * all Unicode characters, including supplementary characters, use 10366 * the {@link #digit(int, int)} method. 10367 * 10368 * @param ch the character to be converted. 10369 * @param radix the radix. 10370 * @return the numeric value represented by the character in the 10371 * specified radix. 10372 * @see Character#forDigit(int, int) 10373 * @see Character#isDigit(char) 10374 */ 10375 public static int digit(char ch, int radix) { 10376 return digit((int)ch, radix); 10377 } 10378 10379 /** 10380 * Returns the numeric value of the specified character (Unicode 10381 * code point) in the specified radix. 10382 * 10383 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 10384 * {@code radix} ≤ {@code MAX_RADIX} or if the 10385 * character is not a valid digit in the specified 10386 * radix, {@code -1} is returned. A character is a valid digit 10387 * if at least one of the following is true: 10388 * <ul> 10389 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 10390 * and the Unicode decimal digit value of the character (or its 10391 * single-character decomposition) is less than the specified radix. 10392 * In this case the decimal digit value is returned. 10393 * <li>The character is one of the uppercase Latin letters 10394 * {@code 'A'} through {@code 'Z'} and its code is less than 10395 * {@code radix + 'A' - 10}. 10396 * In this case, {@code codePoint - 'A' + 10} 10397 * is returned. 10398 * <li>The character is one of the lowercase Latin letters 10399 * {@code 'a'} through {@code 'z'} and its code is less than 10400 * {@code radix + 'a' - 10}. 10401 * In this case, {@code codePoint - 'a' + 10} 10402 * is returned. 10403 * <li>The character is one of the fullwidth uppercase Latin letters A 10404 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10405 * and its code is less than 10406 * {@code radix + '\u005CuFF21' - 10}. 10407 * In this case, 10408 * {@code codePoint - '\u005CuFF21' + 10} 10409 * is returned. 10410 * <li>The character is one of the fullwidth lowercase Latin letters a 10411 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10412 * and its code is less than 10413 * {@code radix + '\u005CuFF41'- 10}. 10414 * In this case, 10415 * {@code codePoint - '\u005CuFF41' + 10} 10416 * is returned. 10417 * </ul> 10418 * 10419 * @param codePoint the character (Unicode code point) to be converted. 10420 * @param radix the radix. 10421 * @return the numeric value represented by the character in the 10422 * specified radix. 10423 * @see Character#forDigit(int, int) 10424 * @see Character#isDigit(int) 10425 * @since 1.5 10426 */ 10427 public static int digit(int codePoint, int radix) { 10428 return CharacterData.of(codePoint).digit(codePoint, radix); 10429 } 10430 10431 /** 10432 * Returns the {@code int} value that the specified Unicode 10433 * character represents. For example, the character 10434 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 10435 * an int with a value of 50. 10436 * <p> 10437 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10438 * {@code '\u005Cu005A'}), lowercase 10439 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10440 * full width variant ({@code '\u005CuFF21'} through 10441 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10442 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10443 * through 35. This is independent of the Unicode specification, 10444 * which does not assign numeric values to these {@code char} 10445 * values. 10446 * <p> 10447 * If the character does not have a numeric value, then -1 is returned. 10448 * If the character has a numeric value that cannot be represented as a 10449 * nonnegative integer (for example, a fractional value), then -2 10450 * is returned. 10451 * 10452 * <p><b>Note:</b> This method cannot handle <a 10453 * href="#supplementary"> supplementary characters</a>. To support 10454 * all Unicode characters, including supplementary characters, use 10455 * the {@link #getNumericValue(int)} method. 10456 * 10457 * @param ch the character to be converted. 10458 * @return the numeric value of the character, as a nonnegative {@code int} 10459 * value; -2 if the character has a numeric value but the value 10460 * can not be represented as a nonnegative {@code int} value; 10461 * -1 if the character has no numeric value. 10462 * @see Character#forDigit(int, int) 10463 * @see Character#isDigit(char) 10464 * @since 1.1 10465 */ 10466 public static int getNumericValue(char ch) { 10467 return getNumericValue((int)ch); 10468 } 10469 10470 /** 10471 * Returns the {@code int} value that the specified 10472 * character (Unicode code point) represents. For example, the character 10473 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 10474 * an {@code int} with a value of 50. 10475 * <p> 10476 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10477 * {@code '\u005Cu005A'}), lowercase 10478 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10479 * full width variant ({@code '\u005CuFF21'} through 10480 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10481 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10482 * through 35. This is independent of the Unicode specification, 10483 * which does not assign numeric values to these {@code char} 10484 * values. 10485 * <p> 10486 * If the character does not have a numeric value, then -1 is returned. 10487 * If the character has a numeric value that cannot be represented as a 10488 * nonnegative integer (for example, a fractional value), then -2 10489 * is returned. 10490 * 10491 * @param codePoint the character (Unicode code point) to be converted. 10492 * @return the numeric value of the character, as a nonnegative {@code int} 10493 * value; -2 if the character has a numeric value but the value 10494 * can not be represented as a nonnegative {@code int} value; 10495 * -1 if the character has no numeric value. 10496 * @see Character#forDigit(int, int) 10497 * @see Character#isDigit(int) 10498 * @since 1.5 10499 */ 10500 public static int getNumericValue(int codePoint) { 10501 return CharacterData.of(codePoint).getNumericValue(codePoint); 10502 } 10503 10504 /** 10505 * Determines if the specified character is ISO-LATIN-1 white space. 10506 * This method returns {@code true} for the following five 10507 * characters only: 10508 * <table class="striped"> 10509 * <caption style="display:none">truechars</caption> 10510 * <thead> 10511 * <tr><th scope="col">Character 10512 * <th scope="col">Code 10513 * <th scope="col">Name 10514 * </thead> 10515 * <tbody> 10516 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 10517 * <td>{@code HORIZONTAL TABULATION}</td></tr> 10518 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 10519 * <td>{@code NEW LINE}</td></tr> 10520 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 10521 * <td>{@code FORM FEED}</td></tr> 10522 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 10523 * <td>{@code CARRIAGE RETURN}</td></tr> 10524 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 10525 * <td>{@code SPACE}</td></tr> 10526 * </tbody> 10527 * </table> 10528 * 10529 * @param ch the character to be tested. 10530 * @return {@code true} if the character is ISO-LATIN-1 white 10531 * space; {@code false} otherwise. 10532 * @see Character#isSpaceChar(char) 10533 * @see Character#isWhitespace(char) 10534 * @deprecated Replaced by isWhitespace(char). 10535 */ 10536 @Deprecated(since="1.1") 10537 public static boolean isSpace(char ch) { 10538 return (ch <= 0x0020) && 10539 (((((1L << 0x0009) | 10540 (1L << 0x000A) | 10541 (1L << 0x000C) | 10542 (1L << 0x000D) | 10543 (1L << 0x0020)) >> ch) & 1L) != 0); 10544 } 10545 10546 10547 /** 10548 * Determines if the specified character is a Unicode space character. 10549 * A character is considered to be a space character if and only if 10550 * it is specified to be a space character by the Unicode Standard. This 10551 * method returns true if the character's general category type is any of 10552 * the following: 10553 * <ul> 10554 * <li> {@code SPACE_SEPARATOR} 10555 * <li> {@code LINE_SEPARATOR} 10556 * <li> {@code PARAGRAPH_SEPARATOR} 10557 * </ul> 10558 * 10559 * <p><b>Note:</b> This method cannot handle <a 10560 * href="#supplementary"> supplementary characters</a>. To support 10561 * all Unicode characters, including supplementary characters, use 10562 * the {@link #isSpaceChar(int)} method. 10563 * 10564 * @param ch the character to be tested. 10565 * @return {@code true} if the character is a space character; 10566 * {@code false} otherwise. 10567 * @see Character#isWhitespace(char) 10568 * @since 1.1 10569 */ 10570 public static boolean isSpaceChar(char ch) { 10571 return isSpaceChar((int)ch); 10572 } 10573 10574 /** 10575 * Determines if the specified character (Unicode code point) is a 10576 * Unicode space character. A character is considered to be a 10577 * space character if and only if it is specified to be a space 10578 * character by the Unicode Standard. This method returns true if 10579 * the character's general category type is any of the following: 10580 * 10581 * <ul> 10582 * <li> {@link #SPACE_SEPARATOR} 10583 * <li> {@link #LINE_SEPARATOR} 10584 * <li> {@link #PARAGRAPH_SEPARATOR} 10585 * </ul> 10586 * 10587 * @param codePoint the character (Unicode code point) to be tested. 10588 * @return {@code true} if the character is a space character; 10589 * {@code false} otherwise. 10590 * @see Character#isWhitespace(int) 10591 * @since 1.5 10592 */ 10593 public static boolean isSpaceChar(int codePoint) { 10594 return ((((1 << Character.SPACE_SEPARATOR) | 10595 (1 << Character.LINE_SEPARATOR) | 10596 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 10597 != 0; 10598 } 10599 10600 /** 10601 * Determines if the specified character is white space according to Java. 10602 * A character is a Java whitespace character if and only if it satisfies 10603 * one of the following criteria: 10604 * <ul> 10605 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 10606 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 10607 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 10608 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 10609 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 10610 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 10611 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 10612 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 10613 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 10614 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 10615 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 10616 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 10617 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 10618 * </ul> 10619 * 10620 * <p><b>Note:</b> This method cannot handle <a 10621 * href="#supplementary"> supplementary characters</a>. To support 10622 * all Unicode characters, including supplementary characters, use 10623 * the {@link #isWhitespace(int)} method. 10624 * 10625 * @param ch the character to be tested. 10626 * @return {@code true} if the character is a Java whitespace 10627 * character; {@code false} otherwise. 10628 * @see Character#isSpaceChar(char) 10629 * @since 1.1 10630 */ 10631 public static boolean isWhitespace(char ch) { 10632 return isWhitespace((int)ch); 10633 } 10634 10635 /** 10636 * Determines if the specified character (Unicode code point) is 10637 * white space according to Java. A character is a Java 10638 * whitespace character if and only if it satisfies one of the 10639 * following criteria: 10640 * <ul> 10641 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 10642 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 10643 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 10644 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 10645 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 10646 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 10647 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 10648 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 10649 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 10650 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 10651 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 10652 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 10653 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 10654 * </ul> 10655 * 10656 * @param codePoint the character (Unicode code point) to be tested. 10657 * @return {@code true} if the character is a Java whitespace 10658 * character; {@code false} otherwise. 10659 * @see Character#isSpaceChar(int) 10660 * @since 1.5 10661 */ 10662 public static boolean isWhitespace(int codePoint) { 10663 return CharacterData.of(codePoint).isWhitespace(codePoint); 10664 } 10665 10666 /** 10667 * Determines if the specified character is an ISO control 10668 * character. A character is considered to be an ISO control 10669 * character if its code is in the range {@code '\u005Cu0000'} 10670 * through {@code '\u005Cu001F'} or in the range 10671 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 10672 * 10673 * <p><b>Note:</b> This method cannot handle <a 10674 * href="#supplementary"> supplementary characters</a>. To support 10675 * all Unicode characters, including supplementary characters, use 10676 * the {@link #isISOControl(int)} method. 10677 * 10678 * @param ch the character to be tested. 10679 * @return {@code true} if the character is an ISO control character; 10680 * {@code false} otherwise. 10681 * 10682 * @see Character#isSpaceChar(char) 10683 * @see Character#isWhitespace(char) 10684 * @since 1.1 10685 */ 10686 public static boolean isISOControl(char ch) { 10687 return isISOControl((int)ch); 10688 } 10689 10690 /** 10691 * Determines if the referenced character (Unicode code point) is an ISO control 10692 * character. A character is considered to be an ISO control 10693 * character if its code is in the range {@code '\u005Cu0000'} 10694 * through {@code '\u005Cu001F'} or in the range 10695 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 10696 * 10697 * @param codePoint the character (Unicode code point) to be tested. 10698 * @return {@code true} if the character is an ISO control character; 10699 * {@code false} otherwise. 10700 * @see Character#isSpaceChar(int) 10701 * @see Character#isWhitespace(int) 10702 * @since 1.5 10703 */ 10704 public static boolean isISOControl(int codePoint) { 10705 // Optimized form of: 10706 // (codePoint >= 0x00 && codePoint <= 0x1F) || 10707 // (codePoint >= 0x7F && codePoint <= 0x9F); 10708 return codePoint <= 0x9F && 10709 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 10710 } 10711 10712 /** 10713 * Returns a value indicating a character's general category. 10714 * 10715 * <p><b>Note:</b> This method cannot handle <a 10716 * href="#supplementary"> supplementary characters</a>. To support 10717 * all Unicode characters, including supplementary characters, use 10718 * the {@link #getType(int)} method. 10719 * 10720 * @param ch the character to be tested. 10721 * @return a value of type {@code int} representing the 10722 * character's general category. 10723 * @see Character#COMBINING_SPACING_MARK 10724 * @see Character#CONNECTOR_PUNCTUATION 10725 * @see Character#CONTROL 10726 * @see Character#CURRENCY_SYMBOL 10727 * @see Character#DASH_PUNCTUATION 10728 * @see Character#DECIMAL_DIGIT_NUMBER 10729 * @see Character#ENCLOSING_MARK 10730 * @see Character#END_PUNCTUATION 10731 * @see Character#FINAL_QUOTE_PUNCTUATION 10732 * @see Character#FORMAT 10733 * @see Character#INITIAL_QUOTE_PUNCTUATION 10734 * @see Character#LETTER_NUMBER 10735 * @see Character#LINE_SEPARATOR 10736 * @see Character#LOWERCASE_LETTER 10737 * @see Character#MATH_SYMBOL 10738 * @see Character#MODIFIER_LETTER 10739 * @see Character#MODIFIER_SYMBOL 10740 * @see Character#NON_SPACING_MARK 10741 * @see Character#OTHER_LETTER 10742 * @see Character#OTHER_NUMBER 10743 * @see Character#OTHER_PUNCTUATION 10744 * @see Character#OTHER_SYMBOL 10745 * @see Character#PARAGRAPH_SEPARATOR 10746 * @see Character#PRIVATE_USE 10747 * @see Character#SPACE_SEPARATOR 10748 * @see Character#START_PUNCTUATION 10749 * @see Character#SURROGATE 10750 * @see Character#TITLECASE_LETTER 10751 * @see Character#UNASSIGNED 10752 * @see Character#UPPERCASE_LETTER 10753 * @since 1.1 10754 */ 10755 public static int getType(char ch) { 10756 return getType((int)ch); 10757 } 10758 10759 /** 10760 * Returns a value indicating a character's general category. 10761 * 10762 * @param codePoint the character (Unicode code point) to be tested. 10763 * @return a value of type {@code int} representing the 10764 * character's general category. 10765 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 10766 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 10767 * @see Character#CONTROL CONTROL 10768 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 10769 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 10770 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 10771 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 10772 * @see Character#END_PUNCTUATION END_PUNCTUATION 10773 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 10774 * @see Character#FORMAT FORMAT 10775 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 10776 * @see Character#LETTER_NUMBER LETTER_NUMBER 10777 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 10778 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 10779 * @see Character#MATH_SYMBOL MATH_SYMBOL 10780 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 10781 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 10782 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 10783 * @see Character#OTHER_LETTER OTHER_LETTER 10784 * @see Character#OTHER_NUMBER OTHER_NUMBER 10785 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 10786 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 10787 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 10788 * @see Character#PRIVATE_USE PRIVATE_USE 10789 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 10790 * @see Character#START_PUNCTUATION START_PUNCTUATION 10791 * @see Character#SURROGATE SURROGATE 10792 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 10793 * @see Character#UNASSIGNED UNASSIGNED 10794 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 10795 * @since 1.5 10796 */ 10797 public static int getType(int codePoint) { 10798 return CharacterData.of(codePoint).getType(codePoint); 10799 } 10800 10801 /** 10802 * Determines the character representation for a specific digit in 10803 * the specified radix. If the value of {@code radix} is not a 10804 * valid radix, or the value of {@code digit} is not a valid 10805 * digit in the specified radix, the null character 10806 * ({@code '\u005Cu0000'}) is returned. 10807 * <p> 10808 * The {@code radix} argument is valid if it is greater than or 10809 * equal to {@code MIN_RADIX} and less than or equal to 10810 * {@code MAX_RADIX}. The {@code digit} argument is valid if 10811 * {@code 0 <= digit < radix}. 10812 * <p> 10813 * If the digit is less than 10, then 10814 * {@code '0' + digit} is returned. Otherwise, the value 10815 * {@code 'a' + digit - 10} is returned. 10816 * 10817 * @param digit the number to convert to a character. 10818 * @param radix the radix. 10819 * @return the {@code char} representation of the specified digit 10820 * in the specified radix. 10821 * @see Character#MIN_RADIX 10822 * @see Character#MAX_RADIX 10823 * @see Character#digit(char, int) 10824 */ 10825 public static char forDigit(int digit, int radix) { 10826 if ((digit >= radix) || (digit < 0)) { 10827 return '\0'; 10828 } 10829 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 10830 return '\0'; 10831 } 10832 if (digit < 10) { 10833 return (char)('0' + digit); 10834 } 10835 return (char)('a' - 10 + digit); 10836 } 10837 10838 /** 10839 * Returns the Unicode directionality property for the given 10840 * character. Character directionality is used to calculate the 10841 * visual ordering of text. The directionality value of undefined 10842 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 10843 * 10844 * <p><b>Note:</b> This method cannot handle <a 10845 * href="#supplementary"> supplementary characters</a>. To support 10846 * all Unicode characters, including supplementary characters, use 10847 * the {@link #getDirectionality(int)} method. 10848 * 10849 * @param ch {@code char} for which the directionality property 10850 * is requested. 10851 * @return the directionality property of the {@code char} value. 10852 * 10853 * @see Character#DIRECTIONALITY_UNDEFINED 10854 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 10855 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 10856 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 10857 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 10858 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 10859 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 10860 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 10861 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 10862 * @see Character#DIRECTIONALITY_NONSPACING_MARK 10863 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 10864 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 10865 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 10866 * @see Character#DIRECTIONALITY_WHITESPACE 10867 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 10868 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 10869 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 10870 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 10871 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 10872 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 10873 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 10874 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 10875 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 10876 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 10877 * @since 1.4 10878 */ 10879 public static byte getDirectionality(char ch) { 10880 return getDirectionality((int)ch); 10881 } 10882 10883 /** 10884 * Returns the Unicode directionality property for the given 10885 * character (Unicode code point). Character directionality is 10886 * used to calculate the visual ordering of text. The 10887 * directionality value of undefined character is {@link 10888 * #DIRECTIONALITY_UNDEFINED}. 10889 * 10890 * @param codePoint the character (Unicode code point) for which 10891 * the directionality property is requested. 10892 * @return the directionality property of the character. 10893 * 10894 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 10895 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 10896 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 10897 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 10898 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 10899 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 10900 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 10901 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 10902 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 10903 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 10904 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 10905 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 10906 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 10907 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 10908 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 10909 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 10910 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 10911 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 10912 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 10913 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 10914 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 10915 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 10916 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 10917 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 10918 * @since 1.5 10919 */ 10920 public static byte getDirectionality(int codePoint) { 10921 return CharacterData.of(codePoint).getDirectionality(codePoint); 10922 } 10923 10924 /** 10925 * Determines whether the character is mirrored according to the 10926 * Unicode specification. Mirrored characters should have their 10927 * glyphs horizontally mirrored when displayed in text that is 10928 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 10929 * PARENTHESIS is semantically defined to be an <i>opening 10930 * parenthesis</i>. This will appear as a "(" in text that is 10931 * left-to-right but as a ")" in text that is right-to-left. 10932 * 10933 * <p><b>Note:</b> This method cannot handle <a 10934 * href="#supplementary"> supplementary characters</a>. To support 10935 * all Unicode characters, including supplementary characters, use 10936 * the {@link #isMirrored(int)} method. 10937 * 10938 * @param ch {@code char} for which the mirrored property is requested 10939 * @return {@code true} if the char is mirrored, {@code false} 10940 * if the {@code char} is not mirrored or is not defined. 10941 * @since 1.4 10942 */ 10943 public static boolean isMirrored(char ch) { 10944 return isMirrored((int)ch); 10945 } 10946 10947 /** 10948 * Determines whether the specified character (Unicode code point) 10949 * is mirrored according to the Unicode specification. Mirrored 10950 * characters should have their glyphs horizontally mirrored when 10951 * displayed in text that is right-to-left. For example, 10952 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 10953 * defined to be an <i>opening parenthesis</i>. This will appear 10954 * as a "(" in text that is left-to-right but as a ")" in text 10955 * that is right-to-left. 10956 * 10957 * @param codePoint the character (Unicode code point) to be tested. 10958 * @return {@code true} if the character is mirrored, {@code false} 10959 * if the character is not mirrored or is not defined. 10960 * @since 1.5 10961 */ 10962 public static boolean isMirrored(int codePoint) { 10963 return CharacterData.of(codePoint).isMirrored(codePoint); 10964 } 10965 10966 /** 10967 * Compares two {@code Character} objects numerically. 10968 * 10969 * @param anotherCharacter the {@code Character} to be compared. 10970 10971 * @return the value {@code 0} if the argument {@code Character} 10972 * is equal to this {@code Character}; a value less than 10973 * {@code 0} if this {@code Character} is numerically less 10974 * than the {@code Character} argument; and a value greater than 10975 * {@code 0} if this {@code Character} is numerically greater 10976 * than the {@code Character} argument (unsigned comparison). 10977 * Note that this is strictly a numerical comparison; it is not 10978 * locale-dependent. 10979 * @since 1.2 10980 */ 10981 public int compareTo(Character anotherCharacter) { 10982 return compare(this.value, anotherCharacter.value); 10983 } 10984 10985 /** 10986 * Compares two {@code char} values numerically. 10987 * The value returned is identical to what would be returned by: 10988 * <pre> 10989 * Character.valueOf(x).compareTo(Character.valueOf(y)) 10990 * </pre> 10991 * 10992 * @param x the first {@code char} to compare 10993 * @param y the second {@code char} to compare 10994 * @return the value {@code 0} if {@code x == y}; 10995 * a value less than {@code 0} if {@code x < y}; and 10996 * a value greater than {@code 0} if {@code x > y} 10997 * @since 1.7 10998 */ 10999 public static int compare(char x, char y) { 11000 return x - y; 11001 } 11002 11003 /** 11004 * Converts the character (Unicode code point) argument to uppercase using 11005 * information from the UnicodeData file. 11006 * 11007 * @param codePoint the character (Unicode code point) to be converted. 11008 * @return either the uppercase equivalent of the character, if 11009 * any, or an error flag ({@code Character.ERROR}) 11010 * that indicates that a 1:M {@code char} mapping exists. 11011 * @see Character#isLowerCase(char) 11012 * @see Character#isUpperCase(char) 11013 * @see Character#toLowerCase(char) 11014 * @see Character#toTitleCase(char) 11015 * @since 1.4 11016 */ 11017 static int toUpperCaseEx(int codePoint) { 11018 assert isValidCodePoint(codePoint); 11019 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11020 } 11021 11022 /** 11023 * Converts the character (Unicode code point) argument to uppercase using case 11024 * mapping information from the SpecialCasing file in the Unicode 11025 * specification. If a character has no explicit uppercase 11026 * mapping, then the {@code char} itself is returned in the 11027 * {@code char[]}. 11028 * 11029 * @param codePoint the character (Unicode code point) to be converted. 11030 * @return a {@code char[]} with the uppercased character. 11031 * @since 1.4 11032 */ 11033 static char[] toUpperCaseCharArray(int codePoint) { 11034 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11035 assert isBmpCodePoint(codePoint); 11036 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11037 } 11038 11039 /** 11040 * The number of bits used to represent a {@code char} value in unsigned 11041 * binary form, constant {@code 16}. 11042 * 11043 * @since 1.5 11044 */ 11045 public static final int SIZE = 16; 11046 11047 /** 11048 * The number of bytes used to represent a {@code char} value in unsigned 11049 * binary form. 11050 * 11051 * @since 1.8 11052 */ 11053 public static final int BYTES = SIZE / Byte.SIZE; 11054 11055 /** 11056 * Returns the value obtained by reversing the order of the bytes in the 11057 * specified {@code char} value. 11058 * 11059 * @param ch The {@code char} of which to reverse the byte order. 11060 * @return the value obtained by reversing (or, equivalently, swapping) 11061 * the bytes in the specified {@code char} value. 11062 * @since 1.5 11063 */ 11064 @HotSpotIntrinsicCandidate 11065 public static char reverseBytes(char ch) { 11066 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11067 } 11068 11069 /** 11070 * Returns the Unicode name of the specified character 11071 * {@code codePoint}, or null if the code point is 11072 * {@link #UNASSIGNED unassigned}. 11073 * <p> 11074 * Note: if the specified character is not assigned a name by 11075 * the <i>UnicodeData</i> file (part of the Unicode Character 11076 * Database maintained by the Unicode Consortium), the returned 11077 * name is the same as the result of expression. 11078 * 11079 * <blockquote>{@code 11080 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11081 * + " " 11082 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11083 * 11084 * }</blockquote> 11085 * 11086 * @param codePoint the character (Unicode code point) 11087 * 11088 * @return the Unicode name of the specified character, or null if 11089 * the code point is unassigned. 11090 * 11091 * @throws IllegalArgumentException if the specified 11092 * {@code codePoint} is not a valid Unicode 11093 * code point. 11094 * 11095 * @since 1.7 11096 */ 11097 public static String getName(int codePoint) { 11098 if (!isValidCodePoint(codePoint)) { 11099 throw new IllegalArgumentException( 11100 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11101 } 11102 String name = CharacterName.getInstance().getName(codePoint); 11103 if (name != null) 11104 return name; 11105 if (getType(codePoint) == UNASSIGNED) 11106 return null; 11107 UnicodeBlock block = UnicodeBlock.of(codePoint); 11108 if (block != null) 11109 return block.toString().replace('_', ' ') + " " 11110 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11111 // should never come here 11112 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11113 } 11114 11115 /** 11116 * Returns the code point value of the Unicode character specified by 11117 * the given Unicode character name. 11118 * <p> 11119 * Note: if a character is not assigned a name by the <i>UnicodeData</i> 11120 * file (part of the Unicode Character Database maintained by the Unicode 11121 * Consortium), its name is defined as the result of expression 11122 * 11123 * <blockquote>{@code 11124 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11125 * + " " 11126 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11127 * 11128 * }</blockquote> 11129 * <p> 11130 * The {@code name} matching is case insensitive, with any leading and 11131 * trailing whitespace character removed. 11132 * 11133 * @param name the Unicode character name 11134 * 11135 * @return the code point value of the character specified by its name. 11136 * 11137 * @throws IllegalArgumentException if the specified {@code name} 11138 * is not a valid Unicode character name. 11139 * @throws NullPointerException if {@code name} is {@code null} 11140 * 11141 * @since 9 11142 */ 11143 public static int codePointOf(String name) { 11144 name = name.trim().toUpperCase(Locale.ROOT); 11145 int cp = CharacterName.getInstance().getCodePoint(name); 11146 if (cp != -1) 11147 return cp; 11148 try { 11149 int off = name.lastIndexOf(' '); 11150 if (off != -1) { 11151 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11152 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11153 return cp; 11154 } 11155 } catch (Exception x) {} 11156 throw new IllegalArgumentException("Unrecognized character name :" + name); 11157 } 11158 }