1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 import jdk.internal.HotSpotIntrinsicCandidate; 34 35 /** 36 * The {@code Character} class wraps a value of the primitive 37 * type {@code char} in an object. An object of type 38 * {@code Character} contains a single field whose type is 39 * {@code char}. 40 * <p> 41 * In addition, this class provides several methods for determining 42 * a character's category (lowercase letter, digit, etc.) and for converting 43 * characters from uppercase to lowercase and vice versa. 44 * <p> 45 * Character information is based on the Unicode Standard, version 8.0.0. 46 * <p> 47 * The methods and data of class {@code Character} are defined by 48 * the information in the <i>UnicodeData</i> file that is part of the 49 * Unicode Character Database maintained by the Unicode 50 * Consortium. This file specifies various properties including name 51 * and general category for every defined Unicode code point or 52 * character range. 53 * <p> 54 * The file and its description are available from the Unicode Consortium at: 55 * <ul> 56 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 57 * </ul> 58 * 59 * <h3><a id="unicode">Unicode Character Representations</a></h3> 60 * 61 * <p>The {@code char} data type (and therefore the value that a 62 * {@code Character} object encapsulates) are based on the 63 * original Unicode specification, which defined characters as 64 * fixed-width 16-bit entities. The Unicode Standard has since been 65 * changed to allow for characters whose representation requires more 66 * than 16 bits. The range of legal <em>code point</em>s is now 67 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 68 * (Refer to the <a 69 * href="http://www.unicode.org/reports/tr27/#notation"><i> 70 * definition</i></a> of the U+<i>n</i> notation in the Unicode 71 * Standard.) 72 * 73 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 74 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 75 * <a id="supplementary">Characters</a> whose code points are greater 76 * than U+FFFF are called <em>supplementary character</em>s. The Java 77 * platform uses the UTF-16 representation in {@code char} arrays and 78 * in the {@code String} and {@code StringBuffer} classes. In 79 * this representation, supplementary characters are represented as a pair 80 * of {@code char} values, the first from the <em>high-surrogates</em> 81 * range, (\uD800-\uDBFF), the second from the 82 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 83 * 84 * <p>A {@code char} value, therefore, represents Basic 85 * Multilingual Plane (BMP) code points, including the surrogate 86 * code points, or code units of the UTF-16 encoding. An 87 * {@code int} value represents all Unicode code points, 88 * including supplementary code points. The lower (least significant) 89 * 21 bits of {@code int} are used to represent Unicode code 90 * points and the upper (most significant) 11 bits must be zero. 91 * Unless otherwise specified, the behavior with respect to 92 * supplementary characters and surrogate {@code char} values is 93 * as follows: 94 * 95 * <ul> 96 * <li>The methods that only accept a {@code char} value cannot support 97 * supplementary characters. They treat {@code char} values from the 98 * surrogate ranges as undefined characters. For example, 99 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 100 * this specific value if followed by any low-surrogate value in a string 101 * would represent a letter. 102 * 103 * <li>The methods that accept an {@code int} value support all 104 * Unicode characters, including supplementary characters. For 105 * example, {@code Character.isLetter(0x2F81A)} returns 106 * {@code true} because the code point value represents a letter 107 * (a CJK ideograph). 108 * </ul> 109 * 110 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 111 * used for character values in the range between U+0000 and U+10FFFF, 112 * and <em>Unicode code unit</em> is used for 16-bit 113 * {@code char} values that are code units of the <em>UTF-16</em> 114 * encoding. For more information on Unicode terminology, refer to the 115 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 116 * 117 * @author Lee Boynton 118 * @author Guy Steele 119 * @author Akira Tanaka 120 * @author Martin Buchholz 121 * @author Ulf Zibis 122 * @since 1.0 123 */ 124 public final 125 class Character implements java.io.Serializable, Comparable<Character> { 126 /** 127 * The minimum radix available for conversion to and from strings. 128 * The constant value of this field is the smallest value permitted 129 * for the radix argument in radix-conversion methods such as the 130 * {@code digit} method, the {@code forDigit} method, and the 131 * {@code toString} method of class {@code Integer}. 132 * 133 * @see Character#digit(char, int) 134 * @see Character#forDigit(int, int) 135 * @see Integer#toString(int, int) 136 * @see Integer#valueOf(String) 137 */ 138 public static final int MIN_RADIX = 2; 139 140 /** 141 * The maximum radix available for conversion to and from strings. 142 * The constant value of this field is the largest value permitted 143 * for the radix argument in radix-conversion methods such as the 144 * {@code digit} method, the {@code forDigit} method, and the 145 * {@code toString} method of class {@code Integer}. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * {@code char}, {@code '\u005Cu0000'}. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * {@code char}, {@code '\u005CuFFFF'}. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The {@code Class} instance representing the primitive type 172 * {@code char}. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined {@code char} 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * Weak bidirectional character type "LRI" in the Unicode specification. 496 * @since 9 497 */ 498 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 499 500 /** 501 * Weak bidirectional character type "RLI" in the Unicode specification. 502 * @since 9 503 */ 504 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 505 506 /** 507 * Weak bidirectional character type "FSI" in the Unicode specification. 508 * @since 9 509 */ 510 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 511 512 /** 513 * Weak bidirectional character type "PDI" in the Unicode specification. 514 * @since 9 515 */ 516 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 517 518 /** 519 * The minimum value of a 520 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 521 * Unicode high-surrogate code unit</a> 522 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 523 * A high-surrogate is also known as a <i>leading-surrogate</i>. 524 * 525 * @since 1.5 526 */ 527 public static final char MIN_HIGH_SURROGATE = '\uD800'; 528 529 /** 530 * The maximum value of a 531 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 532 * Unicode high-surrogate code unit</a> 533 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 534 * A high-surrogate is also known as a <i>leading-surrogate</i>. 535 * 536 * @since 1.5 537 */ 538 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 539 540 /** 541 * The minimum value of a 542 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 543 * Unicode low-surrogate code unit</a> 544 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 545 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 546 * 547 * @since 1.5 548 */ 549 public static final char MIN_LOW_SURROGATE = '\uDC00'; 550 551 /** 552 * The maximum value of a 553 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 554 * Unicode low-surrogate code unit</a> 555 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 556 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 557 * 558 * @since 1.5 559 */ 560 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 561 562 /** 563 * The minimum value of a Unicode surrogate code unit in the 564 * UTF-16 encoding, constant {@code '\u005CuD800'}. 565 * 566 * @since 1.5 567 */ 568 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 569 570 /** 571 * The maximum value of a Unicode surrogate code unit in the 572 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 573 * 574 * @since 1.5 575 */ 576 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 577 578 /** 579 * The minimum value of a 580 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 581 * Unicode supplementary code point</a>, constant {@code U+10000}. 582 * 583 * @since 1.5 584 */ 585 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 586 587 /** 588 * The minimum value of a 589 * <a href="http://www.unicode.org/glossary/#code_point"> 590 * Unicode code point</a>, constant {@code U+0000}. 591 * 592 * @since 1.5 593 */ 594 public static final int MIN_CODE_POINT = 0x000000; 595 596 /** 597 * The maximum value of a 598 * <a href="http://www.unicode.org/glossary/#code_point"> 599 * Unicode code point</a>, constant {@code U+10FFFF}. 600 * 601 * @since 1.5 602 */ 603 public static final int MAX_CODE_POINT = 0X10FFFF; 604 605 606 /** 607 * Instances of this class represent particular subsets of the Unicode 608 * character set. The only family of subsets defined in the 609 * {@code Character} class is {@link Character.UnicodeBlock}. 610 * Other portions of the Java API may define other subsets for their 611 * own purposes. 612 * 613 * @since 1.2 614 */ 615 public static class Subset { 616 617 private String name; 618 619 /** 620 * Constructs a new {@code Subset} instance. 621 * 622 * @param name The name of this subset 623 * @exception NullPointerException if name is {@code null} 624 */ 625 protected Subset(String name) { 626 if (name == null) { 627 throw new NullPointerException("name"); 628 } 629 this.name = name; 630 } 631 632 /** 633 * Compares two {@code Subset} objects for equality. 634 * This method returns {@code true} if and only if 635 * {@code this} and the argument refer to the same 636 * object; since this method is {@code final}, this 637 * guarantee holds for all subclasses. 638 */ 639 public final boolean equals(Object obj) { 640 return (this == obj); 641 } 642 643 /** 644 * Returns the standard hash code as defined by the 645 * {@link Object#hashCode} method. This method 646 * is {@code final} in order to ensure that the 647 * {@code equals} and {@code hashCode} methods will 648 * be consistent in all subclasses. 649 */ 650 public final int hashCode() { 651 return super.hashCode(); 652 } 653 654 /** 655 * Returns the name of this subset. 656 */ 657 public final String toString() { 658 return name; 659 } 660 } 661 662 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 663 // for the latest specification of Unicode Blocks. 664 665 /** 666 * A family of character subsets representing the character blocks in the 667 * Unicode specification. Character blocks generally define characters 668 * used for a specific script or purpose. A character is contained by 669 * at most one Unicode block. 670 * 671 * @since 1.2 672 */ 673 public static final class UnicodeBlock extends Subset { 674 /** 675 * 510 - the expected number of entities 676 * 0.75 - the default load factor of HashMap 677 */ 678 private static Map<String, UnicodeBlock> map = 679 new HashMap<>((int)(510 / 0.75f + 1.0f)); 680 681 /** 682 * Creates a UnicodeBlock with the given identifier name. 683 * This name must be the same as the block identifier. 684 */ 685 private UnicodeBlock(String idName) { 686 super(idName); 687 map.put(idName, this); 688 } 689 690 /** 691 * Creates a UnicodeBlock with the given identifier name and 692 * alias name. 693 */ 694 private UnicodeBlock(String idName, String alias) { 695 this(idName); 696 map.put(alias, this); 697 } 698 699 /** 700 * Creates a UnicodeBlock with the given identifier name and 701 * alias names. 702 */ 703 private UnicodeBlock(String idName, String... aliases) { 704 this(idName); 705 for (String alias : aliases) 706 map.put(alias, this); 707 } 708 709 /** 710 * Constant for the "Basic Latin" Unicode character block. 711 * @since 1.2 712 */ 713 public static final UnicodeBlock BASIC_LATIN = 714 new UnicodeBlock("BASIC_LATIN", 715 "BASIC LATIN", 716 "BASICLATIN"); 717 718 /** 719 * Constant for the "Latin-1 Supplement" Unicode character block. 720 * @since 1.2 721 */ 722 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 723 new UnicodeBlock("LATIN_1_SUPPLEMENT", 724 "LATIN-1 SUPPLEMENT", 725 "LATIN-1SUPPLEMENT"); 726 727 /** 728 * Constant for the "Latin Extended-A" Unicode character block. 729 * @since 1.2 730 */ 731 public static final UnicodeBlock LATIN_EXTENDED_A = 732 new UnicodeBlock("LATIN_EXTENDED_A", 733 "LATIN EXTENDED-A", 734 "LATINEXTENDED-A"); 735 736 /** 737 * Constant for the "Latin Extended-B" Unicode character block. 738 * @since 1.2 739 */ 740 public static final UnicodeBlock LATIN_EXTENDED_B = 741 new UnicodeBlock("LATIN_EXTENDED_B", 742 "LATIN EXTENDED-B", 743 "LATINEXTENDED-B"); 744 745 /** 746 * Constant for the "IPA Extensions" Unicode character block. 747 * @since 1.2 748 */ 749 public static final UnicodeBlock IPA_EXTENSIONS = 750 new UnicodeBlock("IPA_EXTENSIONS", 751 "IPA EXTENSIONS", 752 "IPAEXTENSIONS"); 753 754 /** 755 * Constant for the "Spacing Modifier Letters" Unicode character block. 756 * @since 1.2 757 */ 758 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 759 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 760 "SPACING MODIFIER LETTERS", 761 "SPACINGMODIFIERLETTERS"); 762 763 /** 764 * Constant for the "Combining Diacritical Marks" Unicode character block. 765 * @since 1.2 766 */ 767 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 768 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 769 "COMBINING DIACRITICAL MARKS", 770 "COMBININGDIACRITICALMARKS"); 771 772 /** 773 * Constant for the "Greek and Coptic" Unicode character block. 774 * <p> 775 * This block was previously known as the "Greek" block. 776 * 777 * @since 1.2 778 */ 779 public static final UnicodeBlock GREEK = 780 new UnicodeBlock("GREEK", 781 "GREEK AND COPTIC", 782 "GREEKANDCOPTIC"); 783 784 /** 785 * Constant for the "Cyrillic" Unicode character block. 786 * @since 1.2 787 */ 788 public static final UnicodeBlock CYRILLIC = 789 new UnicodeBlock("CYRILLIC"); 790 791 /** 792 * Constant for the "Armenian" Unicode character block. 793 * @since 1.2 794 */ 795 public static final UnicodeBlock ARMENIAN = 796 new UnicodeBlock("ARMENIAN"); 797 798 /** 799 * Constant for the "Hebrew" Unicode character block. 800 * @since 1.2 801 */ 802 public static final UnicodeBlock HEBREW = 803 new UnicodeBlock("HEBREW"); 804 805 /** 806 * Constant for the "Arabic" Unicode character block. 807 * @since 1.2 808 */ 809 public static final UnicodeBlock ARABIC = 810 new UnicodeBlock("ARABIC"); 811 812 /** 813 * Constant for the "Devanagari" Unicode character block. 814 * @since 1.2 815 */ 816 public static final UnicodeBlock DEVANAGARI = 817 new UnicodeBlock("DEVANAGARI"); 818 819 /** 820 * Constant for the "Bengali" Unicode character block. 821 * @since 1.2 822 */ 823 public static final UnicodeBlock BENGALI = 824 new UnicodeBlock("BENGALI"); 825 826 /** 827 * Constant for the "Gurmukhi" Unicode character block. 828 * @since 1.2 829 */ 830 public static final UnicodeBlock GURMUKHI = 831 new UnicodeBlock("GURMUKHI"); 832 833 /** 834 * Constant for the "Gujarati" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock GUJARATI = 838 new UnicodeBlock("GUJARATI"); 839 840 /** 841 * Constant for the "Oriya" Unicode character block. 842 * @since 1.2 843 */ 844 public static final UnicodeBlock ORIYA = 845 new UnicodeBlock("ORIYA"); 846 847 /** 848 * Constant for the "Tamil" Unicode character block. 849 * @since 1.2 850 */ 851 public static final UnicodeBlock TAMIL = 852 new UnicodeBlock("TAMIL"); 853 854 /** 855 * Constant for the "Telugu" Unicode character block. 856 * @since 1.2 857 */ 858 public static final UnicodeBlock TELUGU = 859 new UnicodeBlock("TELUGU"); 860 861 /** 862 * Constant for the "Kannada" Unicode character block. 863 * @since 1.2 864 */ 865 public static final UnicodeBlock KANNADA = 866 new UnicodeBlock("KANNADA"); 867 868 /** 869 * Constant for the "Malayalam" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock MALAYALAM = 873 new UnicodeBlock("MALAYALAM"); 874 875 /** 876 * Constant for the "Thai" Unicode character block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock THAI = 880 new UnicodeBlock("THAI"); 881 882 /** 883 * Constant for the "Lao" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock LAO = 887 new UnicodeBlock("LAO"); 888 889 /** 890 * Constant for the "Tibetan" Unicode character block. 891 * @since 1.2 892 */ 893 public static final UnicodeBlock TIBETAN = 894 new UnicodeBlock("TIBETAN"); 895 896 /** 897 * Constant for the "Georgian" Unicode character block. 898 * @since 1.2 899 */ 900 public static final UnicodeBlock GEORGIAN = 901 new UnicodeBlock("GEORGIAN"); 902 903 /** 904 * Constant for the "Hangul Jamo" Unicode character block. 905 * @since 1.2 906 */ 907 public static final UnicodeBlock HANGUL_JAMO = 908 new UnicodeBlock("HANGUL_JAMO", 909 "HANGUL JAMO", 910 "HANGULJAMO"); 911 912 /** 913 * Constant for the "Latin Extended Additional" Unicode character block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 917 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 918 "LATIN EXTENDED ADDITIONAL", 919 "LATINEXTENDEDADDITIONAL"); 920 921 /** 922 * Constant for the "Greek Extended" Unicode character block. 923 * @since 1.2 924 */ 925 public static final UnicodeBlock GREEK_EXTENDED = 926 new UnicodeBlock("GREEK_EXTENDED", 927 "GREEK EXTENDED", 928 "GREEKEXTENDED"); 929 930 /** 931 * Constant for the "General Punctuation" Unicode character block. 932 * @since 1.2 933 */ 934 public static final UnicodeBlock GENERAL_PUNCTUATION = 935 new UnicodeBlock("GENERAL_PUNCTUATION", 936 "GENERAL PUNCTUATION", 937 "GENERALPUNCTUATION"); 938 939 /** 940 * Constant for the "Superscripts and Subscripts" Unicode character 941 * block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 945 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 946 "SUPERSCRIPTS AND SUBSCRIPTS", 947 "SUPERSCRIPTSANDSUBSCRIPTS"); 948 949 /** 950 * Constant for the "Currency Symbols" Unicode character block. 951 * @since 1.2 952 */ 953 public static final UnicodeBlock CURRENCY_SYMBOLS = 954 new UnicodeBlock("CURRENCY_SYMBOLS", 955 "CURRENCY SYMBOLS", 956 "CURRENCYSYMBOLS"); 957 958 /** 959 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 960 * character block. 961 * <p> 962 * This block was previously known as "Combining Marks for Symbols". 963 * @since 1.2 964 */ 965 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 966 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 967 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 968 "COMBININGDIACRITICALMARKSFORSYMBOLS", 969 "COMBINING MARKS FOR SYMBOLS", 970 "COMBININGMARKSFORSYMBOLS"); 971 972 /** 973 * Constant for the "Letterlike Symbols" Unicode character block. 974 * @since 1.2 975 */ 976 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 977 new UnicodeBlock("LETTERLIKE_SYMBOLS", 978 "LETTERLIKE SYMBOLS", 979 "LETTERLIKESYMBOLS"); 980 981 /** 982 * Constant for the "Number Forms" Unicode character block. 983 * @since 1.2 984 */ 985 public static final UnicodeBlock NUMBER_FORMS = 986 new UnicodeBlock("NUMBER_FORMS", 987 "NUMBER FORMS", 988 "NUMBERFORMS"); 989 990 /** 991 * Constant for the "Arrows" Unicode character block. 992 * @since 1.2 993 */ 994 public static final UnicodeBlock ARROWS = 995 new UnicodeBlock("ARROWS"); 996 997 /** 998 * Constant for the "Mathematical Operators" Unicode character block. 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1002 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1003 "MATHEMATICAL OPERATORS", 1004 "MATHEMATICALOPERATORS"); 1005 1006 /** 1007 * Constant for the "Miscellaneous Technical" Unicode character block. 1008 * @since 1.2 1009 */ 1010 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1011 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1012 "MISCELLANEOUS TECHNICAL", 1013 "MISCELLANEOUSTECHNICAL"); 1014 1015 /** 1016 * Constant for the "Control Pictures" Unicode character block. 1017 * @since 1.2 1018 */ 1019 public static final UnicodeBlock CONTROL_PICTURES = 1020 new UnicodeBlock("CONTROL_PICTURES", 1021 "CONTROL PICTURES", 1022 "CONTROLPICTURES"); 1023 1024 /** 1025 * Constant for the "Optical Character Recognition" Unicode character block. 1026 * @since 1.2 1027 */ 1028 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1029 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1030 "OPTICAL CHARACTER RECOGNITION", 1031 "OPTICALCHARACTERRECOGNITION"); 1032 1033 /** 1034 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1035 * @since 1.2 1036 */ 1037 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1038 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1039 "ENCLOSED ALPHANUMERICS", 1040 "ENCLOSEDALPHANUMERICS"); 1041 1042 /** 1043 * Constant for the "Box Drawing" Unicode character block. 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock BOX_DRAWING = 1047 new UnicodeBlock("BOX_DRAWING", 1048 "BOX DRAWING", 1049 "BOXDRAWING"); 1050 1051 /** 1052 * Constant for the "Block Elements" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock BLOCK_ELEMENTS = 1056 new UnicodeBlock("BLOCK_ELEMENTS", 1057 "BLOCK ELEMENTS", 1058 "BLOCKELEMENTS"); 1059 1060 /** 1061 * Constant for the "Geometric Shapes" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock GEOMETRIC_SHAPES = 1065 new UnicodeBlock("GEOMETRIC_SHAPES", 1066 "GEOMETRIC SHAPES", 1067 "GEOMETRICSHAPES"); 1068 1069 /** 1070 * Constant for the "Miscellaneous Symbols" Unicode character block. 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1074 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1075 "MISCELLANEOUS SYMBOLS", 1076 "MISCELLANEOUSSYMBOLS"); 1077 1078 /** 1079 * Constant for the "Dingbats" Unicode character block. 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock DINGBATS = 1083 new UnicodeBlock("DINGBATS"); 1084 1085 /** 1086 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1090 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1091 "CJK SYMBOLS AND PUNCTUATION", 1092 "CJKSYMBOLSANDPUNCTUATION"); 1093 1094 /** 1095 * Constant for the "Hiragana" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock HIRAGANA = 1099 new UnicodeBlock("HIRAGANA"); 1100 1101 /** 1102 * Constant for the "Katakana" Unicode character block. 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock KATAKANA = 1106 new UnicodeBlock("KATAKANA"); 1107 1108 /** 1109 * Constant for the "Bopomofo" Unicode character block. 1110 * @since 1.2 1111 */ 1112 public static final UnicodeBlock BOPOMOFO = 1113 new UnicodeBlock("BOPOMOFO"); 1114 1115 /** 1116 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1117 * @since 1.2 1118 */ 1119 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1120 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1121 "HANGUL COMPATIBILITY JAMO", 1122 "HANGULCOMPATIBILITYJAMO"); 1123 1124 /** 1125 * Constant for the "Kanbun" Unicode character block. 1126 * @since 1.2 1127 */ 1128 public static final UnicodeBlock KANBUN = 1129 new UnicodeBlock("KANBUN"); 1130 1131 /** 1132 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1136 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1137 "ENCLOSED CJK LETTERS AND MONTHS", 1138 "ENCLOSEDCJKLETTERSANDMONTHS"); 1139 1140 /** 1141 * Constant for the "CJK Compatibility" Unicode character block. 1142 * @since 1.2 1143 */ 1144 public static final UnicodeBlock CJK_COMPATIBILITY = 1145 new UnicodeBlock("CJK_COMPATIBILITY", 1146 "CJK COMPATIBILITY", 1147 "CJKCOMPATIBILITY"); 1148 1149 /** 1150 * Constant for the "CJK Unified Ideographs" Unicode character block. 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1154 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1155 "CJK UNIFIED IDEOGRAPHS", 1156 "CJKUNIFIEDIDEOGRAPHS"); 1157 1158 /** 1159 * Constant for the "Hangul Syllables" Unicode character block. 1160 * @since 1.2 1161 */ 1162 public static final UnicodeBlock HANGUL_SYLLABLES = 1163 new UnicodeBlock("HANGUL_SYLLABLES", 1164 "HANGUL SYLLABLES", 1165 "HANGULSYLLABLES"); 1166 1167 /** 1168 * Constant for the "Private Use Area" Unicode character block. 1169 * @since 1.2 1170 */ 1171 public static final UnicodeBlock PRIVATE_USE_AREA = 1172 new UnicodeBlock("PRIVATE_USE_AREA", 1173 "PRIVATE USE AREA", 1174 "PRIVATEUSEAREA"); 1175 1176 /** 1177 * Constant for the "CJK Compatibility Ideographs" Unicode character 1178 * block. 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1182 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1183 "CJK COMPATIBILITY IDEOGRAPHS", 1184 "CJKCOMPATIBILITYIDEOGRAPHS"); 1185 1186 /** 1187 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1191 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1192 "ALPHABETIC PRESENTATION FORMS", 1193 "ALPHABETICPRESENTATIONFORMS"); 1194 1195 /** 1196 * Constant for the "Arabic Presentation Forms-A" Unicode character 1197 * block. 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1201 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1202 "ARABIC PRESENTATION FORMS-A", 1203 "ARABICPRESENTATIONFORMS-A"); 1204 1205 /** 1206 * Constant for the "Combining Half Marks" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock COMBINING_HALF_MARKS = 1210 new UnicodeBlock("COMBINING_HALF_MARKS", 1211 "COMBINING HALF MARKS", 1212 "COMBININGHALFMARKS"); 1213 1214 /** 1215 * Constant for the "CJK Compatibility Forms" Unicode character block. 1216 * @since 1.2 1217 */ 1218 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1219 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1220 "CJK COMPATIBILITY FORMS", 1221 "CJKCOMPATIBILITYFORMS"); 1222 1223 /** 1224 * Constant for the "Small Form Variants" Unicode character block. 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1228 new UnicodeBlock("SMALL_FORM_VARIANTS", 1229 "SMALL FORM VARIANTS", 1230 "SMALLFORMVARIANTS"); 1231 1232 /** 1233 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1234 * @since 1.2 1235 */ 1236 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1237 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1238 "ARABIC PRESENTATION FORMS-B", 1239 "ARABICPRESENTATIONFORMS-B"); 1240 1241 /** 1242 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1243 * block. 1244 * @since 1.2 1245 */ 1246 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1247 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1248 "HALFWIDTH AND FULLWIDTH FORMS", 1249 "HALFWIDTHANDFULLWIDTHFORMS"); 1250 1251 /** 1252 * Constant for the "Specials" Unicode character block. 1253 * @since 1.2 1254 */ 1255 public static final UnicodeBlock SPECIALS = 1256 new UnicodeBlock("SPECIALS"); 1257 1258 /** 1259 * @deprecated 1260 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1261 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1262 * These constants match the block definitions of the Unicode Standard. 1263 * The {@link #of(char)} and {@link #of(int)} methods return the 1264 * standard constants. 1265 */ 1266 @Deprecated(since="1.5") 1267 public static final UnicodeBlock SURROGATES_AREA = 1268 new UnicodeBlock("SURROGATES_AREA"); 1269 1270 /** 1271 * Constant for the "Syriac" Unicode character block. 1272 * @since 1.4 1273 */ 1274 public static final UnicodeBlock SYRIAC = 1275 new UnicodeBlock("SYRIAC"); 1276 1277 /** 1278 * Constant for the "Thaana" Unicode character block. 1279 * @since 1.4 1280 */ 1281 public static final UnicodeBlock THAANA = 1282 new UnicodeBlock("THAANA"); 1283 1284 /** 1285 * Constant for the "Sinhala" Unicode character block. 1286 * @since 1.4 1287 */ 1288 public static final UnicodeBlock SINHALA = 1289 new UnicodeBlock("SINHALA"); 1290 1291 /** 1292 * Constant for the "Myanmar" Unicode character block. 1293 * @since 1.4 1294 */ 1295 public static final UnicodeBlock MYANMAR = 1296 new UnicodeBlock("MYANMAR"); 1297 1298 /** 1299 * Constant for the "Ethiopic" Unicode character block. 1300 * @since 1.4 1301 */ 1302 public static final UnicodeBlock ETHIOPIC = 1303 new UnicodeBlock("ETHIOPIC"); 1304 1305 /** 1306 * Constant for the "Cherokee" Unicode character block. 1307 * @since 1.4 1308 */ 1309 public static final UnicodeBlock CHEROKEE = 1310 new UnicodeBlock("CHEROKEE"); 1311 1312 /** 1313 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1314 * @since 1.4 1315 */ 1316 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1317 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1318 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1319 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1320 1321 /** 1322 * Constant for the "Ogham" Unicode character block. 1323 * @since 1.4 1324 */ 1325 public static final UnicodeBlock OGHAM = 1326 new UnicodeBlock("OGHAM"); 1327 1328 /** 1329 * Constant for the "Runic" Unicode character block. 1330 * @since 1.4 1331 */ 1332 public static final UnicodeBlock RUNIC = 1333 new UnicodeBlock("RUNIC"); 1334 1335 /** 1336 * Constant for the "Khmer" Unicode character block. 1337 * @since 1.4 1338 */ 1339 public static final UnicodeBlock KHMER = 1340 new UnicodeBlock("KHMER"); 1341 1342 /** 1343 * Constant for the "Mongolian" Unicode character block. 1344 * @since 1.4 1345 */ 1346 public static final UnicodeBlock MONGOLIAN = 1347 new UnicodeBlock("MONGOLIAN"); 1348 1349 /** 1350 * Constant for the "Braille Patterns" Unicode character block. 1351 * @since 1.4 1352 */ 1353 public static final UnicodeBlock BRAILLE_PATTERNS = 1354 new UnicodeBlock("BRAILLE_PATTERNS", 1355 "BRAILLE PATTERNS", 1356 "BRAILLEPATTERNS"); 1357 1358 /** 1359 * Constant for the "CJK Radicals Supplement" Unicode character block. 1360 * @since 1.4 1361 */ 1362 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1363 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1364 "CJK RADICALS SUPPLEMENT", 1365 "CJKRADICALSSUPPLEMENT"); 1366 1367 /** 1368 * Constant for the "Kangxi Radicals" Unicode character block. 1369 * @since 1.4 1370 */ 1371 public static final UnicodeBlock KANGXI_RADICALS = 1372 new UnicodeBlock("KANGXI_RADICALS", 1373 "KANGXI RADICALS", 1374 "KANGXIRADICALS"); 1375 1376 /** 1377 * Constant for the "Ideographic Description Characters" Unicode character block. 1378 * @since 1.4 1379 */ 1380 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1381 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1382 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1383 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1384 1385 /** 1386 * Constant for the "Bopomofo Extended" Unicode character block. 1387 * @since 1.4 1388 */ 1389 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1390 new UnicodeBlock("BOPOMOFO_EXTENDED", 1391 "BOPOMOFO EXTENDED", 1392 "BOPOMOFOEXTENDED"); 1393 1394 /** 1395 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1396 * @since 1.4 1397 */ 1398 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1399 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1400 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1401 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1402 1403 /** 1404 * Constant for the "Yi Syllables" Unicode character block. 1405 * @since 1.4 1406 */ 1407 public static final UnicodeBlock YI_SYLLABLES = 1408 new UnicodeBlock("YI_SYLLABLES", 1409 "YI SYLLABLES", 1410 "YISYLLABLES"); 1411 1412 /** 1413 * Constant for the "Yi Radicals" Unicode character block. 1414 * @since 1.4 1415 */ 1416 public static final UnicodeBlock YI_RADICALS = 1417 new UnicodeBlock("YI_RADICALS", 1418 "YI RADICALS", 1419 "YIRADICALS"); 1420 1421 /** 1422 * Constant for the "Cyrillic Supplementary" Unicode character block. 1423 * @since 1.5 1424 */ 1425 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1426 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1427 "CYRILLIC SUPPLEMENTARY", 1428 "CYRILLICSUPPLEMENTARY", 1429 "CYRILLIC SUPPLEMENT", 1430 "CYRILLICSUPPLEMENT"); 1431 1432 /** 1433 * Constant for the "Tagalog" Unicode character block. 1434 * @since 1.5 1435 */ 1436 public static final UnicodeBlock TAGALOG = 1437 new UnicodeBlock("TAGALOG"); 1438 1439 /** 1440 * Constant for the "Hanunoo" Unicode character block. 1441 * @since 1.5 1442 */ 1443 public static final UnicodeBlock HANUNOO = 1444 new UnicodeBlock("HANUNOO"); 1445 1446 /** 1447 * Constant for the "Buhid" Unicode character block. 1448 * @since 1.5 1449 */ 1450 public static final UnicodeBlock BUHID = 1451 new UnicodeBlock("BUHID"); 1452 1453 /** 1454 * Constant for the "Tagbanwa" Unicode character block. 1455 * @since 1.5 1456 */ 1457 public static final UnicodeBlock TAGBANWA = 1458 new UnicodeBlock("TAGBANWA"); 1459 1460 /** 1461 * Constant for the "Limbu" Unicode character block. 1462 * @since 1.5 1463 */ 1464 public static final UnicodeBlock LIMBU = 1465 new UnicodeBlock("LIMBU"); 1466 1467 /** 1468 * Constant for the "Tai Le" Unicode character block. 1469 * @since 1.5 1470 */ 1471 public static final UnicodeBlock TAI_LE = 1472 new UnicodeBlock("TAI_LE", 1473 "TAI LE", 1474 "TAILE"); 1475 1476 /** 1477 * Constant for the "Khmer Symbols" Unicode character block. 1478 * @since 1.5 1479 */ 1480 public static final UnicodeBlock KHMER_SYMBOLS = 1481 new UnicodeBlock("KHMER_SYMBOLS", 1482 "KHMER SYMBOLS", 1483 "KHMERSYMBOLS"); 1484 1485 /** 1486 * Constant for the "Phonetic Extensions" Unicode character block. 1487 * @since 1.5 1488 */ 1489 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1490 new UnicodeBlock("PHONETIC_EXTENSIONS", 1491 "PHONETIC EXTENSIONS", 1492 "PHONETICEXTENSIONS"); 1493 1494 /** 1495 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1496 * @since 1.5 1497 */ 1498 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1499 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1500 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1501 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1502 1503 /** 1504 * Constant for the "Supplemental Arrows-A" Unicode character block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1508 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1509 "SUPPLEMENTAL ARROWS-A", 1510 "SUPPLEMENTALARROWS-A"); 1511 1512 /** 1513 * Constant for the "Supplemental Arrows-B" Unicode character block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1517 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1518 "SUPPLEMENTAL ARROWS-B", 1519 "SUPPLEMENTALARROWS-B"); 1520 1521 /** 1522 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1523 * character block. 1524 * @since 1.5 1525 */ 1526 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1527 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1528 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1529 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1530 1531 /** 1532 * Constant for the "Supplemental Mathematical Operators" Unicode 1533 * character block. 1534 * @since 1.5 1535 */ 1536 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1537 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1538 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1539 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1540 1541 /** 1542 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1543 * block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1547 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1548 "MISCELLANEOUS SYMBOLS AND ARROWS", 1549 "MISCELLANEOUSSYMBOLSANDARROWS"); 1550 1551 /** 1552 * Constant for the "Katakana Phonetic Extensions" Unicode character 1553 * block. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1557 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1558 "KATAKANA PHONETIC EXTENSIONS", 1559 "KATAKANAPHONETICEXTENSIONS"); 1560 1561 /** 1562 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1563 * @since 1.5 1564 */ 1565 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1566 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1567 "YIJING HEXAGRAM SYMBOLS", 1568 "YIJINGHEXAGRAMSYMBOLS"); 1569 1570 /** 1571 * Constant for the "Variation Selectors" Unicode character block. 1572 * @since 1.5 1573 */ 1574 public static final UnicodeBlock VARIATION_SELECTORS = 1575 new UnicodeBlock("VARIATION_SELECTORS", 1576 "VARIATION SELECTORS", 1577 "VARIATIONSELECTORS"); 1578 1579 /** 1580 * Constant for the "Linear B Syllabary" Unicode character block. 1581 * @since 1.5 1582 */ 1583 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1584 new UnicodeBlock("LINEAR_B_SYLLABARY", 1585 "LINEAR B SYLLABARY", 1586 "LINEARBSYLLABARY"); 1587 1588 /** 1589 * Constant for the "Linear B Ideograms" Unicode character block. 1590 * @since 1.5 1591 */ 1592 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1593 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1594 "LINEAR B IDEOGRAMS", 1595 "LINEARBIDEOGRAMS"); 1596 1597 /** 1598 * Constant for the "Aegean Numbers" Unicode character block. 1599 * @since 1.5 1600 */ 1601 public static final UnicodeBlock AEGEAN_NUMBERS = 1602 new UnicodeBlock("AEGEAN_NUMBERS", 1603 "AEGEAN NUMBERS", 1604 "AEGEANNUMBERS"); 1605 1606 /** 1607 * Constant for the "Old Italic" Unicode character block. 1608 * @since 1.5 1609 */ 1610 public static final UnicodeBlock OLD_ITALIC = 1611 new UnicodeBlock("OLD_ITALIC", 1612 "OLD ITALIC", 1613 "OLDITALIC"); 1614 1615 /** 1616 * Constant for the "Gothic" Unicode character block. 1617 * @since 1.5 1618 */ 1619 public static final UnicodeBlock GOTHIC = 1620 new UnicodeBlock("GOTHIC"); 1621 1622 /** 1623 * Constant for the "Ugaritic" Unicode character block. 1624 * @since 1.5 1625 */ 1626 public static final UnicodeBlock UGARITIC = 1627 new UnicodeBlock("UGARITIC"); 1628 1629 /** 1630 * Constant for the "Deseret" Unicode character block. 1631 * @since 1.5 1632 */ 1633 public static final UnicodeBlock DESERET = 1634 new UnicodeBlock("DESERET"); 1635 1636 /** 1637 * Constant for the "Shavian" Unicode character block. 1638 * @since 1.5 1639 */ 1640 public static final UnicodeBlock SHAVIAN = 1641 new UnicodeBlock("SHAVIAN"); 1642 1643 /** 1644 * Constant for the "Osmanya" Unicode character block. 1645 * @since 1.5 1646 */ 1647 public static final UnicodeBlock OSMANYA = 1648 new UnicodeBlock("OSMANYA"); 1649 1650 /** 1651 * Constant for the "Cypriot Syllabary" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1655 new UnicodeBlock("CYPRIOT_SYLLABARY", 1656 "CYPRIOT SYLLABARY", 1657 "CYPRIOTSYLLABARY"); 1658 1659 /** 1660 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1664 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1665 "BYZANTINE MUSICAL SYMBOLS", 1666 "BYZANTINEMUSICALSYMBOLS"); 1667 1668 /** 1669 * Constant for the "Musical Symbols" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock MUSICAL_SYMBOLS = 1673 new UnicodeBlock("MUSICAL_SYMBOLS", 1674 "MUSICAL SYMBOLS", 1675 "MUSICALSYMBOLS"); 1676 1677 /** 1678 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1682 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1683 "TAI XUAN JING SYMBOLS", 1684 "TAIXUANJINGSYMBOLS"); 1685 1686 /** 1687 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1688 * character block. 1689 * @since 1.5 1690 */ 1691 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1692 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1693 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1694 "MATHEMATICALALPHANUMERICSYMBOLS"); 1695 1696 /** 1697 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1698 * character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1702 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1703 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1704 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1705 1706 /** 1707 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1708 * @since 1.5 1709 */ 1710 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1711 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1712 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1713 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1714 1715 /** 1716 * Constant for the "Tags" Unicode character block. 1717 * @since 1.5 1718 */ 1719 public static final UnicodeBlock TAGS = 1720 new UnicodeBlock("TAGS"); 1721 1722 /** 1723 * Constant for the "Variation Selectors Supplement" Unicode character 1724 * block. 1725 * @since 1.5 1726 */ 1727 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1728 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1729 "VARIATION SELECTORS SUPPLEMENT", 1730 "VARIATIONSELECTORSSUPPLEMENT"); 1731 1732 /** 1733 * Constant for the "Supplementary Private Use Area-A" Unicode character 1734 * block. 1735 * @since 1.5 1736 */ 1737 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1738 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1739 "SUPPLEMENTARY PRIVATE USE AREA-A", 1740 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1741 1742 /** 1743 * Constant for the "Supplementary Private Use Area-B" Unicode character 1744 * block. 1745 * @since 1.5 1746 */ 1747 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1748 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1749 "SUPPLEMENTARY PRIVATE USE AREA-B", 1750 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1751 1752 /** 1753 * Constant for the "High Surrogates" Unicode character block. 1754 * This block represents codepoint values in the high surrogate 1755 * range: U+D800 through U+DB7F 1756 * 1757 * @since 1.5 1758 */ 1759 public static final UnicodeBlock HIGH_SURROGATES = 1760 new UnicodeBlock("HIGH_SURROGATES", 1761 "HIGH SURROGATES", 1762 "HIGHSURROGATES"); 1763 1764 /** 1765 * Constant for the "High Private Use Surrogates" Unicode character 1766 * block. 1767 * This block represents codepoint values in the private use high 1768 * surrogate range: U+DB80 through U+DBFF 1769 * 1770 * @since 1.5 1771 */ 1772 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1773 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1774 "HIGH PRIVATE USE SURROGATES", 1775 "HIGHPRIVATEUSESURROGATES"); 1776 1777 /** 1778 * Constant for the "Low Surrogates" Unicode character block. 1779 * This block represents codepoint values in the low surrogate 1780 * range: U+DC00 through U+DFFF 1781 * 1782 * @since 1.5 1783 */ 1784 public static final UnicodeBlock LOW_SURROGATES = 1785 new UnicodeBlock("LOW_SURROGATES", 1786 "LOW SURROGATES", 1787 "LOWSURROGATES"); 1788 1789 /** 1790 * Constant for the "Arabic Supplement" Unicode character block. 1791 * @since 1.7 1792 */ 1793 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1794 new UnicodeBlock("ARABIC_SUPPLEMENT", 1795 "ARABIC SUPPLEMENT", 1796 "ARABICSUPPLEMENT"); 1797 1798 /** 1799 * Constant for the "NKo" Unicode character block. 1800 * @since 1.7 1801 */ 1802 public static final UnicodeBlock NKO = 1803 new UnicodeBlock("NKO"); 1804 1805 /** 1806 * Constant for the "Samaritan" Unicode character block. 1807 * @since 1.7 1808 */ 1809 public static final UnicodeBlock SAMARITAN = 1810 new UnicodeBlock("SAMARITAN"); 1811 1812 /** 1813 * Constant for the "Mandaic" Unicode character block. 1814 * @since 1.7 1815 */ 1816 public static final UnicodeBlock MANDAIC = 1817 new UnicodeBlock("MANDAIC"); 1818 1819 /** 1820 * Constant for the "Ethiopic Supplement" Unicode character block. 1821 * @since 1.7 1822 */ 1823 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1824 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1825 "ETHIOPIC SUPPLEMENT", 1826 "ETHIOPICSUPPLEMENT"); 1827 1828 /** 1829 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1830 * Unicode character block. 1831 * @since 1.7 1832 */ 1833 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1834 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1835 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1836 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1837 1838 /** 1839 * Constant for the "New Tai Lue" Unicode character block. 1840 * @since 1.7 1841 */ 1842 public static final UnicodeBlock NEW_TAI_LUE = 1843 new UnicodeBlock("NEW_TAI_LUE", 1844 "NEW TAI LUE", 1845 "NEWTAILUE"); 1846 1847 /** 1848 * Constant for the "Buginese" Unicode character block. 1849 * @since 1.7 1850 */ 1851 public static final UnicodeBlock BUGINESE = 1852 new UnicodeBlock("BUGINESE"); 1853 1854 /** 1855 * Constant for the "Tai Tham" Unicode character block. 1856 * @since 1.7 1857 */ 1858 public static final UnicodeBlock TAI_THAM = 1859 new UnicodeBlock("TAI_THAM", 1860 "TAI THAM", 1861 "TAITHAM"); 1862 1863 /** 1864 * Constant for the "Balinese" Unicode character block. 1865 * @since 1.7 1866 */ 1867 public static final UnicodeBlock BALINESE = 1868 new UnicodeBlock("BALINESE"); 1869 1870 /** 1871 * Constant for the "Sundanese" Unicode character block. 1872 * @since 1.7 1873 */ 1874 public static final UnicodeBlock SUNDANESE = 1875 new UnicodeBlock("SUNDANESE"); 1876 1877 /** 1878 * Constant for the "Batak" Unicode character block. 1879 * @since 1.7 1880 */ 1881 public static final UnicodeBlock BATAK = 1882 new UnicodeBlock("BATAK"); 1883 1884 /** 1885 * Constant for the "Lepcha" Unicode character block. 1886 * @since 1.7 1887 */ 1888 public static final UnicodeBlock LEPCHA = 1889 new UnicodeBlock("LEPCHA"); 1890 1891 /** 1892 * Constant for the "Ol Chiki" Unicode character block. 1893 * @since 1.7 1894 */ 1895 public static final UnicodeBlock OL_CHIKI = 1896 new UnicodeBlock("OL_CHIKI", 1897 "OL CHIKI", 1898 "OLCHIKI"); 1899 1900 /** 1901 * Constant for the "Vedic Extensions" Unicode character block. 1902 * @since 1.7 1903 */ 1904 public static final UnicodeBlock VEDIC_EXTENSIONS = 1905 new UnicodeBlock("VEDIC_EXTENSIONS", 1906 "VEDIC EXTENSIONS", 1907 "VEDICEXTENSIONS"); 1908 1909 /** 1910 * Constant for the "Phonetic Extensions Supplement" Unicode character 1911 * block. 1912 * @since 1.7 1913 */ 1914 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1915 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1916 "PHONETIC EXTENSIONS SUPPLEMENT", 1917 "PHONETICEXTENSIONSSUPPLEMENT"); 1918 1919 /** 1920 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1921 * character block. 1922 * @since 1.7 1923 */ 1924 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1925 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1926 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1927 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1928 1929 /** 1930 * Constant for the "Glagolitic" Unicode character block. 1931 * @since 1.7 1932 */ 1933 public static final UnicodeBlock GLAGOLITIC = 1934 new UnicodeBlock("GLAGOLITIC"); 1935 1936 /** 1937 * Constant for the "Latin Extended-C" Unicode character block. 1938 * @since 1.7 1939 */ 1940 public static final UnicodeBlock LATIN_EXTENDED_C = 1941 new UnicodeBlock("LATIN_EXTENDED_C", 1942 "LATIN EXTENDED-C", 1943 "LATINEXTENDED-C"); 1944 1945 /** 1946 * Constant for the "Coptic" Unicode character block. 1947 * @since 1.7 1948 */ 1949 public static final UnicodeBlock COPTIC = 1950 new UnicodeBlock("COPTIC"); 1951 1952 /** 1953 * Constant for the "Georgian Supplement" Unicode character block. 1954 * @since 1.7 1955 */ 1956 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1957 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1958 "GEORGIAN SUPPLEMENT", 1959 "GEORGIANSUPPLEMENT"); 1960 1961 /** 1962 * Constant for the "Tifinagh" Unicode character block. 1963 * @since 1.7 1964 */ 1965 public static final UnicodeBlock TIFINAGH = 1966 new UnicodeBlock("TIFINAGH"); 1967 1968 /** 1969 * Constant for the "Ethiopic Extended" Unicode character block. 1970 * @since 1.7 1971 */ 1972 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1973 new UnicodeBlock("ETHIOPIC_EXTENDED", 1974 "ETHIOPIC EXTENDED", 1975 "ETHIOPICEXTENDED"); 1976 1977 /** 1978 * Constant for the "Cyrillic Extended-A" Unicode character block. 1979 * @since 1.7 1980 */ 1981 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1982 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1983 "CYRILLIC EXTENDED-A", 1984 "CYRILLICEXTENDED-A"); 1985 1986 /** 1987 * Constant for the "Supplemental Punctuation" Unicode character block. 1988 * @since 1.7 1989 */ 1990 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1991 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1992 "SUPPLEMENTAL PUNCTUATION", 1993 "SUPPLEMENTALPUNCTUATION"); 1994 1995 /** 1996 * Constant for the "CJK Strokes" Unicode character block. 1997 * @since 1.7 1998 */ 1999 public static final UnicodeBlock CJK_STROKES = 2000 new UnicodeBlock("CJK_STROKES", 2001 "CJK STROKES", 2002 "CJKSTROKES"); 2003 2004 /** 2005 * Constant for the "Lisu" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock LISU = 2009 new UnicodeBlock("LISU"); 2010 2011 /** 2012 * Constant for the "Vai" Unicode character block. 2013 * @since 1.7 2014 */ 2015 public static final UnicodeBlock VAI = 2016 new UnicodeBlock("VAI"); 2017 2018 /** 2019 * Constant for the "Cyrillic Extended-B" Unicode character block. 2020 * @since 1.7 2021 */ 2022 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2023 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2024 "CYRILLIC EXTENDED-B", 2025 "CYRILLICEXTENDED-B"); 2026 2027 /** 2028 * Constant for the "Bamum" Unicode character block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock BAMUM = 2032 new UnicodeBlock("BAMUM"); 2033 2034 /** 2035 * Constant for the "Modifier Tone Letters" Unicode character block. 2036 * @since 1.7 2037 */ 2038 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2039 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2040 "MODIFIER TONE LETTERS", 2041 "MODIFIERTONELETTERS"); 2042 2043 /** 2044 * Constant for the "Latin Extended-D" Unicode character block. 2045 * @since 1.7 2046 */ 2047 public static final UnicodeBlock LATIN_EXTENDED_D = 2048 new UnicodeBlock("LATIN_EXTENDED_D", 2049 "LATIN EXTENDED-D", 2050 "LATINEXTENDED-D"); 2051 2052 /** 2053 * Constant for the "Syloti Nagri" Unicode character block. 2054 * @since 1.7 2055 */ 2056 public static final UnicodeBlock SYLOTI_NAGRI = 2057 new UnicodeBlock("SYLOTI_NAGRI", 2058 "SYLOTI NAGRI", 2059 "SYLOTINAGRI"); 2060 2061 /** 2062 * Constant for the "Common Indic Number Forms" Unicode character block. 2063 * @since 1.7 2064 */ 2065 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2066 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2067 "COMMON INDIC NUMBER FORMS", 2068 "COMMONINDICNUMBERFORMS"); 2069 2070 /** 2071 * Constant for the "Phags-pa" Unicode character block. 2072 * @since 1.7 2073 */ 2074 public static final UnicodeBlock PHAGS_PA = 2075 new UnicodeBlock("PHAGS_PA", 2076 "PHAGS-PA"); 2077 2078 /** 2079 * Constant for the "Saurashtra" Unicode character block. 2080 * @since 1.7 2081 */ 2082 public static final UnicodeBlock SAURASHTRA = 2083 new UnicodeBlock("SAURASHTRA"); 2084 2085 /** 2086 * Constant for the "Devanagari Extended" Unicode character block. 2087 * @since 1.7 2088 */ 2089 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2090 new UnicodeBlock("DEVANAGARI_EXTENDED", 2091 "DEVANAGARI EXTENDED", 2092 "DEVANAGARIEXTENDED"); 2093 2094 /** 2095 * Constant for the "Kayah Li" Unicode character block. 2096 * @since 1.7 2097 */ 2098 public static final UnicodeBlock KAYAH_LI = 2099 new UnicodeBlock("KAYAH_LI", 2100 "KAYAH LI", 2101 "KAYAHLI"); 2102 2103 /** 2104 * Constant for the "Rejang" Unicode character block. 2105 * @since 1.7 2106 */ 2107 public static final UnicodeBlock REJANG = 2108 new UnicodeBlock("REJANG"); 2109 2110 /** 2111 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2112 * @since 1.7 2113 */ 2114 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2115 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2116 "HANGUL JAMO EXTENDED-A", 2117 "HANGULJAMOEXTENDED-A"); 2118 2119 /** 2120 * Constant for the "Javanese" Unicode character block. 2121 * @since 1.7 2122 */ 2123 public static final UnicodeBlock JAVANESE = 2124 new UnicodeBlock("JAVANESE"); 2125 2126 /** 2127 * Constant for the "Cham" Unicode character block. 2128 * @since 1.7 2129 */ 2130 public static final UnicodeBlock CHAM = 2131 new UnicodeBlock("CHAM"); 2132 2133 /** 2134 * Constant for the "Myanmar Extended-A" Unicode character block. 2135 * @since 1.7 2136 */ 2137 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2138 new UnicodeBlock("MYANMAR_EXTENDED_A", 2139 "MYANMAR EXTENDED-A", 2140 "MYANMAREXTENDED-A"); 2141 2142 /** 2143 * Constant for the "Tai Viet" Unicode character block. 2144 * @since 1.7 2145 */ 2146 public static final UnicodeBlock TAI_VIET = 2147 new UnicodeBlock("TAI_VIET", 2148 "TAI VIET", 2149 "TAIVIET"); 2150 2151 /** 2152 * Constant for the "Ethiopic Extended-A" Unicode character block. 2153 * @since 1.7 2154 */ 2155 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2156 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2157 "ETHIOPIC EXTENDED-A", 2158 "ETHIOPICEXTENDED-A"); 2159 2160 /** 2161 * Constant for the "Meetei Mayek" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock MEETEI_MAYEK = 2165 new UnicodeBlock("MEETEI_MAYEK", 2166 "MEETEI MAYEK", 2167 "MEETEIMAYEK"); 2168 2169 /** 2170 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2171 * @since 1.7 2172 */ 2173 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2174 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2175 "HANGUL JAMO EXTENDED-B", 2176 "HANGULJAMOEXTENDED-B"); 2177 2178 /** 2179 * Constant for the "Vertical Forms" Unicode character block. 2180 * @since 1.7 2181 */ 2182 public static final UnicodeBlock VERTICAL_FORMS = 2183 new UnicodeBlock("VERTICAL_FORMS", 2184 "VERTICAL FORMS", 2185 "VERTICALFORMS"); 2186 2187 /** 2188 * Constant for the "Ancient Greek Numbers" Unicode character block. 2189 * @since 1.7 2190 */ 2191 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2192 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2193 "ANCIENT GREEK NUMBERS", 2194 "ANCIENTGREEKNUMBERS"); 2195 2196 /** 2197 * Constant for the "Ancient Symbols" Unicode character block. 2198 * @since 1.7 2199 */ 2200 public static final UnicodeBlock ANCIENT_SYMBOLS = 2201 new UnicodeBlock("ANCIENT_SYMBOLS", 2202 "ANCIENT SYMBOLS", 2203 "ANCIENTSYMBOLS"); 2204 2205 /** 2206 * Constant for the "Phaistos Disc" Unicode character block. 2207 * @since 1.7 2208 */ 2209 public static final UnicodeBlock PHAISTOS_DISC = 2210 new UnicodeBlock("PHAISTOS_DISC", 2211 "PHAISTOS DISC", 2212 "PHAISTOSDISC"); 2213 2214 /** 2215 * Constant for the "Lycian" Unicode character block. 2216 * @since 1.7 2217 */ 2218 public static final UnicodeBlock LYCIAN = 2219 new UnicodeBlock("LYCIAN"); 2220 2221 /** 2222 * Constant for the "Carian" Unicode character block. 2223 * @since 1.7 2224 */ 2225 public static final UnicodeBlock CARIAN = 2226 new UnicodeBlock("CARIAN"); 2227 2228 /** 2229 * Constant for the "Old Persian" Unicode character block. 2230 * @since 1.7 2231 */ 2232 public static final UnicodeBlock OLD_PERSIAN = 2233 new UnicodeBlock("OLD_PERSIAN", 2234 "OLD PERSIAN", 2235 "OLDPERSIAN"); 2236 2237 /** 2238 * Constant for the "Imperial Aramaic" Unicode character block. 2239 * @since 1.7 2240 */ 2241 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2242 new UnicodeBlock("IMPERIAL_ARAMAIC", 2243 "IMPERIAL ARAMAIC", 2244 "IMPERIALARAMAIC"); 2245 2246 /** 2247 * Constant for the "Phoenician" Unicode character block. 2248 * @since 1.7 2249 */ 2250 public static final UnicodeBlock PHOENICIAN = 2251 new UnicodeBlock("PHOENICIAN"); 2252 2253 /** 2254 * Constant for the "Lydian" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock LYDIAN = 2258 new UnicodeBlock("LYDIAN"); 2259 2260 /** 2261 * Constant for the "Kharoshthi" Unicode character block. 2262 * @since 1.7 2263 */ 2264 public static final UnicodeBlock KHAROSHTHI = 2265 new UnicodeBlock("KHAROSHTHI"); 2266 2267 /** 2268 * Constant for the "Old South Arabian" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2272 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2273 "OLD SOUTH ARABIAN", 2274 "OLDSOUTHARABIAN"); 2275 2276 /** 2277 * Constant for the "Avestan" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock AVESTAN = 2281 new UnicodeBlock("AVESTAN"); 2282 2283 /** 2284 * Constant for the "Inscriptional Parthian" Unicode character block. 2285 * @since 1.7 2286 */ 2287 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2288 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2289 "INSCRIPTIONAL PARTHIAN", 2290 "INSCRIPTIONALPARTHIAN"); 2291 2292 /** 2293 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2294 * @since 1.7 2295 */ 2296 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2297 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2298 "INSCRIPTIONAL PAHLAVI", 2299 "INSCRIPTIONALPAHLAVI"); 2300 2301 /** 2302 * Constant for the "Old Turkic" Unicode character block. 2303 * @since 1.7 2304 */ 2305 public static final UnicodeBlock OLD_TURKIC = 2306 new UnicodeBlock("OLD_TURKIC", 2307 "OLD TURKIC", 2308 "OLDTURKIC"); 2309 2310 /** 2311 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2312 * @since 1.7 2313 */ 2314 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2315 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2316 "RUMI NUMERAL SYMBOLS", 2317 "RUMINUMERALSYMBOLS"); 2318 2319 /** 2320 * Constant for the "Brahmi" Unicode character block. 2321 * @since 1.7 2322 */ 2323 public static final UnicodeBlock BRAHMI = 2324 new UnicodeBlock("BRAHMI"); 2325 2326 /** 2327 * Constant for the "Kaithi" Unicode character block. 2328 * @since 1.7 2329 */ 2330 public static final UnicodeBlock KAITHI = 2331 new UnicodeBlock("KAITHI"); 2332 2333 /** 2334 * Constant for the "Cuneiform" Unicode character block. 2335 * @since 1.7 2336 */ 2337 public static final UnicodeBlock CUNEIFORM = 2338 new UnicodeBlock("CUNEIFORM"); 2339 2340 /** 2341 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2342 * character block. 2343 * @since 1.7 2344 */ 2345 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2346 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2347 "CUNEIFORM NUMBERS AND PUNCTUATION", 2348 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2349 2350 /** 2351 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2352 * @since 1.7 2353 */ 2354 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2355 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2356 "EGYPTIAN HIEROGLYPHS", 2357 "EGYPTIANHIEROGLYPHS"); 2358 2359 /** 2360 * Constant for the "Bamum Supplement" Unicode character block. 2361 * @since 1.7 2362 */ 2363 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2364 new UnicodeBlock("BAMUM_SUPPLEMENT", 2365 "BAMUM SUPPLEMENT", 2366 "BAMUMSUPPLEMENT"); 2367 2368 /** 2369 * Constant for the "Kana Supplement" Unicode character block. 2370 * @since 1.7 2371 */ 2372 public static final UnicodeBlock KANA_SUPPLEMENT = 2373 new UnicodeBlock("KANA_SUPPLEMENT", 2374 "KANA SUPPLEMENT", 2375 "KANASUPPLEMENT"); 2376 2377 /** 2378 * Constant for the "Ancient Greek Musical Notation" Unicode character 2379 * block. 2380 * @since 1.7 2381 */ 2382 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2383 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2384 "ANCIENT GREEK MUSICAL NOTATION", 2385 "ANCIENTGREEKMUSICALNOTATION"); 2386 2387 /** 2388 * Constant for the "Counting Rod Numerals" Unicode character block. 2389 * @since 1.7 2390 */ 2391 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2392 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2393 "COUNTING ROD NUMERALS", 2394 "COUNTINGRODNUMERALS"); 2395 2396 /** 2397 * Constant for the "Mahjong Tiles" Unicode character block. 2398 * @since 1.7 2399 */ 2400 public static final UnicodeBlock MAHJONG_TILES = 2401 new UnicodeBlock("MAHJONG_TILES", 2402 "MAHJONG TILES", 2403 "MAHJONGTILES"); 2404 2405 /** 2406 * Constant for the "Domino Tiles" Unicode character block. 2407 * @since 1.7 2408 */ 2409 public static final UnicodeBlock DOMINO_TILES = 2410 new UnicodeBlock("DOMINO_TILES", 2411 "DOMINO TILES", 2412 "DOMINOTILES"); 2413 2414 /** 2415 * Constant for the "Playing Cards" Unicode character block. 2416 * @since 1.7 2417 */ 2418 public static final UnicodeBlock PLAYING_CARDS = 2419 new UnicodeBlock("PLAYING_CARDS", 2420 "PLAYING CARDS", 2421 "PLAYINGCARDS"); 2422 2423 /** 2424 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2425 * block. 2426 * @since 1.7 2427 */ 2428 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2429 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2430 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2431 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2432 2433 /** 2434 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2435 * block. 2436 * @since 1.7 2437 */ 2438 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2439 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2440 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2441 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2442 2443 /** 2444 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2445 * character block. 2446 * @since 1.7 2447 */ 2448 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2449 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2450 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2451 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2452 2453 /** 2454 * Constant for the "Emoticons" Unicode character block. 2455 * @since 1.7 2456 */ 2457 public static final UnicodeBlock EMOTICONS = 2458 new UnicodeBlock("EMOTICONS"); 2459 2460 /** 2461 * Constant for the "Transport And Map Symbols" Unicode character block. 2462 * @since 1.7 2463 */ 2464 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2465 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2466 "TRANSPORT AND MAP SYMBOLS", 2467 "TRANSPORTANDMAPSYMBOLS"); 2468 2469 /** 2470 * Constant for the "Alchemical Symbols" Unicode character block. 2471 * @since 1.7 2472 */ 2473 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2474 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2475 "ALCHEMICAL SYMBOLS", 2476 "ALCHEMICALSYMBOLS"); 2477 2478 /** 2479 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2480 * character block. 2481 * @since 1.7 2482 */ 2483 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2484 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2485 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2486 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2487 2488 /** 2489 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2490 * character block. 2491 * @since 1.7 2492 */ 2493 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2494 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2495 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2496 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2497 2498 /** 2499 * Constant for the "Arabic Extended-A" Unicode character block. 2500 * @since 1.8 2501 */ 2502 public static final UnicodeBlock ARABIC_EXTENDED_A = 2503 new UnicodeBlock("ARABIC_EXTENDED_A", 2504 "ARABIC EXTENDED-A", 2505 "ARABICEXTENDED-A"); 2506 2507 /** 2508 * Constant for the "Sundanese Supplement" Unicode character block. 2509 * @since 1.8 2510 */ 2511 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2512 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2513 "SUNDANESE SUPPLEMENT", 2514 "SUNDANESESUPPLEMENT"); 2515 2516 /** 2517 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2518 * @since 1.8 2519 */ 2520 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2521 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2522 "MEETEI MAYEK EXTENSIONS", 2523 "MEETEIMAYEKEXTENSIONS"); 2524 2525 /** 2526 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2527 * @since 1.8 2528 */ 2529 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2530 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2531 "MEROITIC HIEROGLYPHS", 2532 "MEROITICHIEROGLYPHS"); 2533 2534 /** 2535 * Constant for the "Meroitic Cursive" Unicode character block. 2536 * @since 1.8 2537 */ 2538 public static final UnicodeBlock MEROITIC_CURSIVE = 2539 new UnicodeBlock("MEROITIC_CURSIVE", 2540 "MEROITIC CURSIVE", 2541 "MEROITICCURSIVE"); 2542 2543 /** 2544 * Constant for the "Sora Sompeng" Unicode character block. 2545 * @since 1.8 2546 */ 2547 public static final UnicodeBlock SORA_SOMPENG = 2548 new UnicodeBlock("SORA_SOMPENG", 2549 "SORA SOMPENG", 2550 "SORASOMPENG"); 2551 2552 /** 2553 * Constant for the "Chakma" Unicode character block. 2554 * @since 1.8 2555 */ 2556 public static final UnicodeBlock CHAKMA = 2557 new UnicodeBlock("CHAKMA"); 2558 2559 /** 2560 * Constant for the "Sharada" Unicode character block. 2561 * @since 1.8 2562 */ 2563 public static final UnicodeBlock SHARADA = 2564 new UnicodeBlock("SHARADA"); 2565 2566 /** 2567 * Constant for the "Takri" Unicode character block. 2568 * @since 1.8 2569 */ 2570 public static final UnicodeBlock TAKRI = 2571 new UnicodeBlock("TAKRI"); 2572 2573 /** 2574 * Constant for the "Miao" Unicode character block. 2575 * @since 1.8 2576 */ 2577 public static final UnicodeBlock MIAO = 2578 new UnicodeBlock("MIAO"); 2579 2580 /** 2581 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2582 * character block. 2583 * @since 1.8 2584 */ 2585 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2586 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2587 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2588 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2589 2590 /** 2591 * Constant for the "Combining Diacritical Marks Extended" Unicode 2592 * character block. 2593 * @since 9 2594 */ 2595 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2596 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2597 "COMBINING DIACRITICAL MARKS EXTENDED", 2598 "COMBININGDIACRITICALMARKSEXTENDED"); 2599 2600 /** 2601 * Constant for the "Myanmar Extended-B" Unicode character block. 2602 * @since 9 2603 */ 2604 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2605 new UnicodeBlock("MYANMAR_EXTENDED_B", 2606 "MYANMAR EXTENDED-B", 2607 "MYANMAREXTENDED-B"); 2608 2609 /** 2610 * Constant for the "Latin Extended-E" Unicode character block. 2611 * @since 9 2612 */ 2613 public static final UnicodeBlock LATIN_EXTENDED_E = 2614 new UnicodeBlock("LATIN_EXTENDED_E", 2615 "LATIN EXTENDED-E", 2616 "LATINEXTENDED-E"); 2617 2618 /** 2619 * Constant for the "Coptic Epact Numbers" Unicode character block. 2620 * @since 9 2621 */ 2622 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2623 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2624 "COPTIC EPACT NUMBERS", 2625 "COPTICEPACTNUMBERS"); 2626 2627 /** 2628 * Constant for the "Old Permic" Unicode character block. 2629 * @since 9 2630 */ 2631 public static final UnicodeBlock OLD_PERMIC = 2632 new UnicodeBlock("OLD_PERMIC", 2633 "OLD PERMIC", 2634 "OLDPERMIC"); 2635 2636 /** 2637 * Constant for the "Elbasan" Unicode character block. 2638 * @since 9 2639 */ 2640 public static final UnicodeBlock ELBASAN = 2641 new UnicodeBlock("ELBASAN"); 2642 2643 /** 2644 * Constant for the "Caucasian Albanian" Unicode character block. 2645 * @since 9 2646 */ 2647 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2648 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2649 "CAUCASIAN ALBANIAN", 2650 "CAUCASIANALBANIAN"); 2651 2652 /** 2653 * Constant for the "Linear A" Unicode character block. 2654 * @since 9 2655 */ 2656 public static final UnicodeBlock LINEAR_A = 2657 new UnicodeBlock("LINEAR_A", 2658 "LINEAR A", 2659 "LINEARA"); 2660 2661 /** 2662 * Constant for the "Palmyrene" Unicode character block. 2663 * @since 9 2664 */ 2665 public static final UnicodeBlock PALMYRENE = 2666 new UnicodeBlock("PALMYRENE"); 2667 2668 /** 2669 * Constant for the "Nabataean" Unicode character block. 2670 * @since 9 2671 */ 2672 public static final UnicodeBlock NABATAEAN = 2673 new UnicodeBlock("NABATAEAN"); 2674 2675 /** 2676 * Constant for the "Old North Arabian" Unicode character block. 2677 * @since 9 2678 */ 2679 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2680 new UnicodeBlock("OLD_NORTH_ARABIAN", 2681 "OLD NORTH ARABIAN", 2682 "OLDNORTHARABIAN"); 2683 2684 /** 2685 * Constant for the "Manichaean" Unicode character block. 2686 * @since 9 2687 */ 2688 public static final UnicodeBlock MANICHAEAN = 2689 new UnicodeBlock("MANICHAEAN"); 2690 2691 /** 2692 * Constant for the "Psalter Pahlavi" Unicode character block. 2693 * @since 9 2694 */ 2695 public static final UnicodeBlock PSALTER_PAHLAVI = 2696 new UnicodeBlock("PSALTER_PAHLAVI", 2697 "PSALTER PAHLAVI", 2698 "PSALTERPAHLAVI"); 2699 2700 /** 2701 * Constant for the "Mahajani" Unicode character block. 2702 * @since 9 2703 */ 2704 public static final UnicodeBlock MAHAJANI = 2705 new UnicodeBlock("MAHAJANI"); 2706 2707 /** 2708 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2709 * @since 9 2710 */ 2711 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2712 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2713 "SINHALA ARCHAIC NUMBERS", 2714 "SINHALAARCHAICNUMBERS"); 2715 2716 /** 2717 * Constant for the "Khojki" Unicode character block. 2718 * @since 9 2719 */ 2720 public static final UnicodeBlock KHOJKI = 2721 new UnicodeBlock("KHOJKI"); 2722 2723 /** 2724 * Constant for the "Khudawadi" Unicode character block. 2725 * @since 9 2726 */ 2727 public static final UnicodeBlock KHUDAWADI = 2728 new UnicodeBlock("KHUDAWADI"); 2729 2730 /** 2731 * Constant for the "Grantha" Unicode character block. 2732 * @since 9 2733 */ 2734 public static final UnicodeBlock GRANTHA = 2735 new UnicodeBlock("GRANTHA"); 2736 2737 /** 2738 * Constant for the "Tirhuta" Unicode character block. 2739 * @since 9 2740 */ 2741 public static final UnicodeBlock TIRHUTA = 2742 new UnicodeBlock("TIRHUTA"); 2743 2744 /** 2745 * Constant for the "Siddham" Unicode character block. 2746 * @since 9 2747 */ 2748 public static final UnicodeBlock SIDDHAM = 2749 new UnicodeBlock("SIDDHAM"); 2750 2751 /** 2752 * Constant for the "Modi" Unicode character block. 2753 * @since 9 2754 */ 2755 public static final UnicodeBlock MODI = 2756 new UnicodeBlock("MODI"); 2757 2758 /** 2759 * Constant for the "Warang Citi" Unicode character block. 2760 * @since 9 2761 */ 2762 public static final UnicodeBlock WARANG_CITI = 2763 new UnicodeBlock("WARANG_CITI", 2764 "WARANG CITI", 2765 "WARANGCITI"); 2766 2767 /** 2768 * Constant for the "Pau Cin Hau" Unicode character block. 2769 * @since 9 2770 */ 2771 public static final UnicodeBlock PAU_CIN_HAU = 2772 new UnicodeBlock("PAU_CIN_HAU", 2773 "PAU CIN HAU", 2774 "PAUCINHAU"); 2775 2776 /** 2777 * Constant for the "Mro" Unicode character block. 2778 * @since 9 2779 */ 2780 public static final UnicodeBlock MRO = 2781 new UnicodeBlock("MRO"); 2782 2783 /** 2784 * Constant for the "Bassa Vah" Unicode character block. 2785 * @since 9 2786 */ 2787 public static final UnicodeBlock BASSA_VAH = 2788 new UnicodeBlock("BASSA_VAH", 2789 "BASSA VAH", 2790 "BASSAVAH"); 2791 2792 /** 2793 * Constant for the "Pahawh Hmong" Unicode character block. 2794 * @since 9 2795 */ 2796 public static final UnicodeBlock PAHAWH_HMONG = 2797 new UnicodeBlock("PAHAWH_HMONG", 2798 "PAHAWH HMONG", 2799 "PAHAWHHMONG"); 2800 2801 /** 2802 * Constant for the "Duployan" Unicode character block. 2803 * @since 9 2804 */ 2805 public static final UnicodeBlock DUPLOYAN = 2806 new UnicodeBlock("DUPLOYAN"); 2807 2808 /** 2809 * Constant for the "Shorthand Format Controls" Unicode character block. 2810 * @since 9 2811 */ 2812 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2813 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2814 "SHORTHAND FORMAT CONTROLS", 2815 "SHORTHANDFORMATCONTROLS"); 2816 2817 /** 2818 * Constant for the "Mende Kikakui" Unicode character block. 2819 * @since 9 2820 */ 2821 public static final UnicodeBlock MENDE_KIKAKUI = 2822 new UnicodeBlock("MENDE_KIKAKUI", 2823 "MENDE KIKAKUI", 2824 "MENDEKIKAKUI"); 2825 2826 /** 2827 * Constant for the "Ornamental Dingbats" Unicode character block. 2828 * @since 9 2829 */ 2830 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2831 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2832 "ORNAMENTAL DINGBATS", 2833 "ORNAMENTALDINGBATS"); 2834 2835 /** 2836 * Constant for the "Geometric Shapes Extended" Unicode character block. 2837 * @since 9 2838 */ 2839 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2840 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2841 "GEOMETRIC SHAPES EXTENDED", 2842 "GEOMETRICSHAPESEXTENDED"); 2843 2844 /** 2845 * Constant for the "Supplemental Arrows-C" Unicode character block. 2846 * @since 9 2847 */ 2848 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2849 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2850 "SUPPLEMENTAL ARROWS-C", 2851 "SUPPLEMENTALARROWS-C"); 2852 2853 /** 2854 * Constant for the "Cherokee Supplement" Unicode character block. 2855 * @since 9 2856 */ 2857 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2858 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2859 "CHEROKEE SUPPLEMENT", 2860 "CHEROKEESUPPLEMENT"); 2861 2862 /** 2863 * Constant for the "Hatran" Unicode character block. 2864 * @since 9 2865 */ 2866 public static final UnicodeBlock HATRAN = 2867 new UnicodeBlock("HATRAN"); 2868 2869 /** 2870 * Constant for the "Old Hungarian" Unicode character block. 2871 * @since 9 2872 */ 2873 public static final UnicodeBlock OLD_HUNGARIAN = 2874 new UnicodeBlock("OLD_HUNGARIAN", 2875 "OLD HUNGARIAN", 2876 "OLDHUNGARIAN"); 2877 2878 /** 2879 * Constant for the "Multani" Unicode character block. 2880 * @since 9 2881 */ 2882 public static final UnicodeBlock MULTANI = 2883 new UnicodeBlock("MULTANI"); 2884 2885 /** 2886 * Constant for the "Ahom" Unicode character block. 2887 * @since 9 2888 */ 2889 public static final UnicodeBlock AHOM = 2890 new UnicodeBlock("AHOM"); 2891 2892 /** 2893 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2894 * @since 9 2895 */ 2896 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2897 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2898 "EARLY DYNASTIC CUNEIFORM", 2899 "EARLYDYNASTICCUNEIFORM"); 2900 2901 /** 2902 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2903 * @since 9 2904 */ 2905 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2906 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2907 "ANATOLIAN HIEROGLYPHS", 2908 "ANATOLIANHIEROGLYPHS"); 2909 2910 /** 2911 * Constant for the "Sutton SignWriting" Unicode character block. 2912 * @since 9 2913 */ 2914 public static final UnicodeBlock SUTTON_SIGNWRITING = 2915 new UnicodeBlock("SUTTON_SIGNWRITING", 2916 "SUTTON SIGNWRITING", 2917 "SUTTONSIGNWRITING"); 2918 2919 /** 2920 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2921 * character block. 2922 * @since 9 2923 */ 2924 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2925 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2926 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2927 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 2928 2929 /** 2930 * Constant for the "CJK Unified Ideographs Extension E" Unicode 2931 * character block. 2932 * @since 9 2933 */ 2934 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2935 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2936 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 2937 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 2938 2939 2940 private static final int blockStarts[] = { 2941 0x0000, // 0000..007F; Basic Latin 2942 0x0080, // 0080..00FF; Latin-1 Supplement 2943 0x0100, // 0100..017F; Latin Extended-A 2944 0x0180, // 0180..024F; Latin Extended-B 2945 0x0250, // 0250..02AF; IPA Extensions 2946 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2947 0x0300, // 0300..036F; Combining Diacritical Marks 2948 0x0370, // 0370..03FF; Greek and Coptic 2949 0x0400, // 0400..04FF; Cyrillic 2950 0x0500, // 0500..052F; Cyrillic Supplement 2951 0x0530, // 0530..058F; Armenian 2952 0x0590, // 0590..05FF; Hebrew 2953 0x0600, // 0600..06FF; Arabic 2954 0x0700, // 0700..074F; Syriac 2955 0x0750, // 0750..077F; Arabic Supplement 2956 0x0780, // 0780..07BF; Thaana 2957 0x07C0, // 07C0..07FF; NKo 2958 0x0800, // 0800..083F; Samaritan 2959 0x0840, // 0840..085F; Mandaic 2960 0x0860, // unassigned 2961 0x08A0, // 08A0..08FF; Arabic Extended-A 2962 0x0900, // 0900..097F; Devanagari 2963 0x0980, // 0980..09FF; Bengali 2964 0x0A00, // 0A00..0A7F; Gurmukhi 2965 0x0A80, // 0A80..0AFF; Gujarati 2966 0x0B00, // 0B00..0B7F; Oriya 2967 0x0B80, // 0B80..0BFF; Tamil 2968 0x0C00, // 0C00..0C7F; Telugu 2969 0x0C80, // 0C80..0CFF; Kannada 2970 0x0D00, // 0D00..0D7F; Malayalam 2971 0x0D80, // 0D80..0DFF; Sinhala 2972 0x0E00, // 0E00..0E7F; Thai 2973 0x0E80, // 0E80..0EFF; Lao 2974 0x0F00, // 0F00..0FFF; Tibetan 2975 0x1000, // 1000..109F; Myanmar 2976 0x10A0, // 10A0..10FF; Georgian 2977 0x1100, // 1100..11FF; Hangul Jamo 2978 0x1200, // 1200..137F; Ethiopic 2979 0x1380, // 1380..139F; Ethiopic Supplement 2980 0x13A0, // 13A0..13FF; Cherokee 2981 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2982 0x1680, // 1680..169F; Ogham 2983 0x16A0, // 16A0..16FF; Runic 2984 0x1700, // 1700..171F; Tagalog 2985 0x1720, // 1720..173F; Hanunoo 2986 0x1740, // 1740..175F; Buhid 2987 0x1760, // 1760..177F; Tagbanwa 2988 0x1780, // 1780..17FF; Khmer 2989 0x1800, // 1800..18AF; Mongolian 2990 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2991 0x1900, // 1900..194F; Limbu 2992 0x1950, // 1950..197F; Tai Le 2993 0x1980, // 1980..19DF; New Tai Lue 2994 0x19E0, // 19E0..19FF; Khmer Symbols 2995 0x1A00, // 1A00..1A1F; Buginese 2996 0x1A20, // 1A20..1AAF; Tai Tham 2997 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 2998 0x1B00, // 1B00..1B7F; Balinese 2999 0x1B80, // 1B80..1BBF; Sundanese 3000 0x1BC0, // 1BC0..1BFF; Batak 3001 0x1C00, // 1C00..1C4F; Lepcha 3002 0x1C50, // 1C50..1C7F; Ol Chiki 3003 0x1C80, // unassigned 3004 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3005 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3006 0x1D00, // 1D00..1D7F; Phonetic Extensions 3007 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3008 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3009 0x1E00, // 1E00..1EFF; Latin Extended Additional 3010 0x1F00, // 1F00..1FFF; Greek Extended 3011 0x2000, // 2000..206F; General Punctuation 3012 0x2070, // 2070..209F; Superscripts and Subscripts 3013 0x20A0, // 20A0..20CF; Currency Symbols 3014 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3015 0x2100, // 2100..214F; Letterlike Symbols 3016 0x2150, // 2150..218F; Number Forms 3017 0x2190, // 2190..21FF; Arrows 3018 0x2200, // 2200..22FF; Mathematical Operators 3019 0x2300, // 2300..23FF; Miscellaneous Technical 3020 0x2400, // 2400..243F; Control Pictures 3021 0x2440, // 2440..245F; Optical Character Recognition 3022 0x2460, // 2460..24FF; Enclosed Alphanumerics 3023 0x2500, // 2500..257F; Box Drawing 3024 0x2580, // 2580..259F; Block Elements 3025 0x25A0, // 25A0..25FF; Geometric Shapes 3026 0x2600, // 2600..26FF; Miscellaneous Symbols 3027 0x2700, // 2700..27BF; Dingbats 3028 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3029 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3030 0x2800, // 2800..28FF; Braille Patterns 3031 0x2900, // 2900..297F; Supplemental Arrows-B 3032 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3033 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3034 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3035 0x2C00, // 2C00..2C5F; Glagolitic 3036 0x2C60, // 2C60..2C7F; Latin Extended-C 3037 0x2C80, // 2C80..2CFF; Coptic 3038 0x2D00, // 2D00..2D2F; Georgian Supplement 3039 0x2D30, // 2D30..2D7F; Tifinagh 3040 0x2D80, // 2D80..2DDF; Ethiopic Extended 3041 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3042 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3043 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3044 0x2F00, // 2F00..2FDF; Kangxi Radicals 3045 0x2FE0, // unassigned 3046 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3047 0x3000, // 3000..303F; CJK Symbols and Punctuation 3048 0x3040, // 3040..309F; Hiragana 3049 0x30A0, // 30A0..30FF; Katakana 3050 0x3100, // 3100..312F; Bopomofo 3051 0x3130, // 3130..318F; Hangul Compatibility Jamo 3052 0x3190, // 3190..319F; Kanbun 3053 0x31A0, // 31A0..31BF; Bopomofo Extended 3054 0x31C0, // 31C0..31EF; CJK Strokes 3055 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3056 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3057 0x3300, // 3300..33FF; CJK Compatibility 3058 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3059 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3060 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3061 0xA000, // A000..A48F; Yi Syllables 3062 0xA490, // A490..A4CF; Yi Radicals 3063 0xA4D0, // A4D0..A4FF; Lisu 3064 0xA500, // A500..A63F; Vai 3065 0xA640, // A640..A69F; Cyrillic Extended-B 3066 0xA6A0, // A6A0..A6FF; Bamum 3067 0xA700, // A700..A71F; Modifier Tone Letters 3068 0xA720, // A720..A7FF; Latin Extended-D 3069 0xA800, // A800..A82F; Syloti Nagri 3070 0xA830, // A830..A83F; Common Indic Number Forms 3071 0xA840, // A840..A87F; Phags-pa 3072 0xA880, // A880..A8DF; Saurashtra 3073 0xA8E0, // A8E0..A8FF; Devanagari Extended 3074 0xA900, // A900..A92F; Kayah Li 3075 0xA930, // A930..A95F; Rejang 3076 0xA960, // A960..A97F; Hangul Jamo Extended-A 3077 0xA980, // A980..A9DF; Javanese 3078 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3079 0xAA00, // AA00..AA5F; Cham 3080 0xAA60, // AA60..AA7F; Myanmar Extended-A 3081 0xAA80, // AA80..AADF; Tai Viet 3082 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3083 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3084 0xAB30, // AB30..AB6F; Latin Extended-E 3085 0xAB70, // AB70..ABBF; Cherokee Supplement 3086 0xABC0, // ABC0..ABFF; Meetei Mayek 3087 0xAC00, // AC00..D7AF; Hangul Syllables 3088 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3089 0xD800, // D800..DB7F; High Surrogates 3090 0xDB80, // DB80..DBFF; High Private Use Surrogates 3091 0xDC00, // DC00..DFFF; Low Surrogates 3092 0xE000, // E000..F8FF; Private Use Area 3093 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3094 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3095 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3096 0xFE00, // FE00..FE0F; Variation Selectors 3097 0xFE10, // FE10..FE1F; Vertical Forms 3098 0xFE20, // FE20..FE2F; Combining Half Marks 3099 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3100 0xFE50, // FE50..FE6F; Small Form Variants 3101 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3102 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3103 0xFFF0, // FFF0..FFFF; Specials 3104 0x10000, // 10000..1007F; Linear B Syllabary 3105 0x10080, // 10080..100FF; Linear B Ideograms 3106 0x10100, // 10100..1013F; Aegean Numbers 3107 0x10140, // 10140..1018F; Ancient Greek Numbers 3108 0x10190, // 10190..101CF; Ancient Symbols 3109 0x101D0, // 101D0..101FF; Phaistos Disc 3110 0x10200, // unassigned 3111 0x10280, // 10280..1029F; Lycian 3112 0x102A0, // 102A0..102DF; Carian 3113 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3114 0x10300, // 10300..1032F; Old Italic 3115 0x10330, // 10330..1034F; Gothic 3116 0x10350, // 10350..1037F; Old Permic 3117 0x10380, // 10380..1039F; Ugaritic 3118 0x103A0, // 103A0..103DF; Old Persian 3119 0x103E0, // unassigned 3120 0x10400, // 10400..1044F; Deseret 3121 0x10450, // 10450..1047F; Shavian 3122 0x10480, // 10480..104AF; Osmanya 3123 0x104B0, // unassigned 3124 0x10500, // 10500..1052F; Elbasan 3125 0x10530, // 10530..1056F; Caucasian Albanian 3126 0x10570, // unassigned 3127 0x10600, // 10600..1077F; Linear A 3128 0x10780, // unassigned 3129 0x10800, // 10800..1083F; Cypriot Syllabary 3130 0x10840, // 10840..1085F; Imperial Aramaic 3131 0x10860, // 10860..1087F; Palmyrene 3132 0x10880, // 10880..108AF; Nabataean 3133 0x108B0, // unassigned 3134 0x108E0, // 108E0..108FF; Hatran 3135 0x10900, // 10900..1091F; Phoenician 3136 0x10920, // 10920..1093F; Lydian 3137 0x10940, // unassigned 3138 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3139 0x109A0, // 109A0..109FF; Meroitic Cursive 3140 0x10A00, // 10A00..10A5F; Kharoshthi 3141 0x10A60, // 10A60..10A7F; Old South Arabian 3142 0x10A80, // 10A80..10A9F; Old North Arabian 3143 0x10AA0, // unassigned 3144 0x10AC0, // 10AC0..10AFF; Manichaean 3145 0x10B00, // 10B00..10B3F; Avestan 3146 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3147 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3148 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3149 0x10BB0, // unassigned 3150 0x10C00, // 10C00..10C4F; Old Turkic 3151 0x10C50, // unassigned 3152 0x10C80, // 10C80..10CFF; Old Hungarian 3153 0x10D00, // unassigned 3154 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3155 0x10E80, // unassigned 3156 0x11000, // 11000..1107F; Brahmi 3157 0x11080, // 11080..110CF; Kaithi 3158 0x110D0, // 110D0..110FF; Sora Sompeng 3159 0x11100, // 11100..1114F; Chakma 3160 0x11150, // 11150..1117F; Mahajani 3161 0x11180, // 11180..111DF; Sharada 3162 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3163 0x11200, // 11200..1124F; Khojki 3164 0x11250, // unassigned 3165 0x11280, // 11280..112AF; Multani 3166 0x112B0, // 112B0..112FF; Khudawadi 3167 0x11300, // 11300..1137F; Grantha 3168 0x11380, // unassigned 3169 0x11480, // 11480..114DF; Tirhuta 3170 0x114E0, // unassigned 3171 0x11580, // 11580..115FF; Siddham 3172 0x11600, // 11600..1165F; Modi 3173 0x11660, // unassigned 3174 0x11680, // 11680..116CF; Takri 3175 0x116D0, // unassigned 3176 0x11700, // 11700..1173F; Ahom 3177 0x11740, // unassigned 3178 0x118A0, // 118A0..118FF; Warang Citi 3179 0x11900, // unassigned 3180 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3181 0x11B00, // unassigned 3182 0x12000, // 12000..123FF; Cuneiform 3183 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3184 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3185 0x12550, // unassigned 3186 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3187 0x13430, // unassigned 3188 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3189 0x14680, // unassigned 3190 0x16800, // 16800..16A3F; Bamum Supplement 3191 0x16A40, // 16A40..16A6F; Mro 3192 0x16A70, // unassigned 3193 0x16AD0, // 16AD0..16AFF; Bassa Vah 3194 0x16B00, // 16B00..16B8F; Pahawh Hmong 3195 0x16B90, // unassigned 3196 0x16F00, // 16F00..16F9F; Miao 3197 0x16FA0, // unassigned 3198 0x1B000, // 1B000..1B0FF; Kana Supplement 3199 0x1B100, // unassigned 3200 0x1BC00, // 1BC00..1BC9F; Duployan 3201 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3202 0x1BCB0, // unassigned 3203 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3204 0x1D100, // 1D100..1D1FF; Musical Symbols 3205 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3206 0x1D250, // unassigned 3207 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3208 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3209 0x1D380, // unassigned 3210 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3211 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3212 0x1DAB0, // unassigned 3213 0x1E800, // 1E800..1E8DF; Mende Kikakui 3214 0x1E8E0, // unassigned 3215 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3216 0x1EF00, // unassigned 3217 0x1F000, // 1F000..1F02F; Mahjong Tiles 3218 0x1F030, // 1F030..1F09F; Domino Tiles 3219 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3220 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3221 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3222 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3223 0x1F600, // 1F600..1F64F; Emoticons 3224 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3225 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3226 0x1F700, // 1F700..1F77F; Alchemical Symbols 3227 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3228 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3229 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3230 0x1FA00, // unassigned 3231 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3232 0x2A6E0, // unassigned 3233 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3234 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3235 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3236 0x2CEB0, // unassigned 3237 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3238 0x2FA20, // unassigned 3239 0xE0000, // E0000..E007F; Tags 3240 0xE0080, // unassigned 3241 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3242 0xE01F0, // unassigned 3243 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3244 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 3245 }; 3246 3247 private static final UnicodeBlock[] blocks = { 3248 BASIC_LATIN, 3249 LATIN_1_SUPPLEMENT, 3250 LATIN_EXTENDED_A, 3251 LATIN_EXTENDED_B, 3252 IPA_EXTENSIONS, 3253 SPACING_MODIFIER_LETTERS, 3254 COMBINING_DIACRITICAL_MARKS, 3255 GREEK, 3256 CYRILLIC, 3257 CYRILLIC_SUPPLEMENTARY, 3258 ARMENIAN, 3259 HEBREW, 3260 ARABIC, 3261 SYRIAC, 3262 ARABIC_SUPPLEMENT, 3263 THAANA, 3264 NKO, 3265 SAMARITAN, 3266 MANDAIC, 3267 null, 3268 ARABIC_EXTENDED_A, 3269 DEVANAGARI, 3270 BENGALI, 3271 GURMUKHI, 3272 GUJARATI, 3273 ORIYA, 3274 TAMIL, 3275 TELUGU, 3276 KANNADA, 3277 MALAYALAM, 3278 SINHALA, 3279 THAI, 3280 LAO, 3281 TIBETAN, 3282 MYANMAR, 3283 GEORGIAN, 3284 HANGUL_JAMO, 3285 ETHIOPIC, 3286 ETHIOPIC_SUPPLEMENT, 3287 CHEROKEE, 3288 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3289 OGHAM, 3290 RUNIC, 3291 TAGALOG, 3292 HANUNOO, 3293 BUHID, 3294 TAGBANWA, 3295 KHMER, 3296 MONGOLIAN, 3297 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3298 LIMBU, 3299 TAI_LE, 3300 NEW_TAI_LUE, 3301 KHMER_SYMBOLS, 3302 BUGINESE, 3303 TAI_THAM, 3304 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3305 BALINESE, 3306 SUNDANESE, 3307 BATAK, 3308 LEPCHA, 3309 OL_CHIKI, 3310 null, 3311 SUNDANESE_SUPPLEMENT, 3312 VEDIC_EXTENSIONS, 3313 PHONETIC_EXTENSIONS, 3314 PHONETIC_EXTENSIONS_SUPPLEMENT, 3315 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3316 LATIN_EXTENDED_ADDITIONAL, 3317 GREEK_EXTENDED, 3318 GENERAL_PUNCTUATION, 3319 SUPERSCRIPTS_AND_SUBSCRIPTS, 3320 CURRENCY_SYMBOLS, 3321 COMBINING_MARKS_FOR_SYMBOLS, 3322 LETTERLIKE_SYMBOLS, 3323 NUMBER_FORMS, 3324 ARROWS, 3325 MATHEMATICAL_OPERATORS, 3326 MISCELLANEOUS_TECHNICAL, 3327 CONTROL_PICTURES, 3328 OPTICAL_CHARACTER_RECOGNITION, 3329 ENCLOSED_ALPHANUMERICS, 3330 BOX_DRAWING, 3331 BLOCK_ELEMENTS, 3332 GEOMETRIC_SHAPES, 3333 MISCELLANEOUS_SYMBOLS, 3334 DINGBATS, 3335 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3336 SUPPLEMENTAL_ARROWS_A, 3337 BRAILLE_PATTERNS, 3338 SUPPLEMENTAL_ARROWS_B, 3339 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3340 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3341 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3342 GLAGOLITIC, 3343 LATIN_EXTENDED_C, 3344 COPTIC, 3345 GEORGIAN_SUPPLEMENT, 3346 TIFINAGH, 3347 ETHIOPIC_EXTENDED, 3348 CYRILLIC_EXTENDED_A, 3349 SUPPLEMENTAL_PUNCTUATION, 3350 CJK_RADICALS_SUPPLEMENT, 3351 KANGXI_RADICALS, 3352 null, 3353 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3354 CJK_SYMBOLS_AND_PUNCTUATION, 3355 HIRAGANA, 3356 KATAKANA, 3357 BOPOMOFO, 3358 HANGUL_COMPATIBILITY_JAMO, 3359 KANBUN, 3360 BOPOMOFO_EXTENDED, 3361 CJK_STROKES, 3362 KATAKANA_PHONETIC_EXTENSIONS, 3363 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3364 CJK_COMPATIBILITY, 3365 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3366 YIJING_HEXAGRAM_SYMBOLS, 3367 CJK_UNIFIED_IDEOGRAPHS, 3368 YI_SYLLABLES, 3369 YI_RADICALS, 3370 LISU, 3371 VAI, 3372 CYRILLIC_EXTENDED_B, 3373 BAMUM, 3374 MODIFIER_TONE_LETTERS, 3375 LATIN_EXTENDED_D, 3376 SYLOTI_NAGRI, 3377 COMMON_INDIC_NUMBER_FORMS, 3378 PHAGS_PA, 3379 SAURASHTRA, 3380 DEVANAGARI_EXTENDED, 3381 KAYAH_LI, 3382 REJANG, 3383 HANGUL_JAMO_EXTENDED_A, 3384 JAVANESE, 3385 MYANMAR_EXTENDED_B, 3386 CHAM, 3387 MYANMAR_EXTENDED_A, 3388 TAI_VIET, 3389 MEETEI_MAYEK_EXTENSIONS, 3390 ETHIOPIC_EXTENDED_A, 3391 LATIN_EXTENDED_E, 3392 CHEROKEE_SUPPLEMENT, 3393 MEETEI_MAYEK, 3394 HANGUL_SYLLABLES, 3395 HANGUL_JAMO_EXTENDED_B, 3396 HIGH_SURROGATES, 3397 HIGH_PRIVATE_USE_SURROGATES, 3398 LOW_SURROGATES, 3399 PRIVATE_USE_AREA, 3400 CJK_COMPATIBILITY_IDEOGRAPHS, 3401 ALPHABETIC_PRESENTATION_FORMS, 3402 ARABIC_PRESENTATION_FORMS_A, 3403 VARIATION_SELECTORS, 3404 VERTICAL_FORMS, 3405 COMBINING_HALF_MARKS, 3406 CJK_COMPATIBILITY_FORMS, 3407 SMALL_FORM_VARIANTS, 3408 ARABIC_PRESENTATION_FORMS_B, 3409 HALFWIDTH_AND_FULLWIDTH_FORMS, 3410 SPECIALS, 3411 LINEAR_B_SYLLABARY, 3412 LINEAR_B_IDEOGRAMS, 3413 AEGEAN_NUMBERS, 3414 ANCIENT_GREEK_NUMBERS, 3415 ANCIENT_SYMBOLS, 3416 PHAISTOS_DISC, 3417 null, 3418 LYCIAN, 3419 CARIAN, 3420 COPTIC_EPACT_NUMBERS, 3421 OLD_ITALIC, 3422 GOTHIC, 3423 OLD_PERMIC, 3424 UGARITIC, 3425 OLD_PERSIAN, 3426 null, 3427 DESERET, 3428 SHAVIAN, 3429 OSMANYA, 3430 null, 3431 ELBASAN, 3432 CAUCASIAN_ALBANIAN, 3433 null, 3434 LINEAR_A, 3435 null, 3436 CYPRIOT_SYLLABARY, 3437 IMPERIAL_ARAMAIC, 3438 PALMYRENE, 3439 NABATAEAN, 3440 null, 3441 HATRAN, 3442 PHOENICIAN, 3443 LYDIAN, 3444 null, 3445 MEROITIC_HIEROGLYPHS, 3446 MEROITIC_CURSIVE, 3447 KHAROSHTHI, 3448 OLD_SOUTH_ARABIAN, 3449 OLD_NORTH_ARABIAN, 3450 null, 3451 MANICHAEAN, 3452 AVESTAN, 3453 INSCRIPTIONAL_PARTHIAN, 3454 INSCRIPTIONAL_PAHLAVI, 3455 PSALTER_PAHLAVI, 3456 null, 3457 OLD_TURKIC, 3458 null, 3459 OLD_HUNGARIAN, 3460 null, 3461 RUMI_NUMERAL_SYMBOLS, 3462 null, 3463 BRAHMI, 3464 KAITHI, 3465 SORA_SOMPENG, 3466 CHAKMA, 3467 MAHAJANI, 3468 SHARADA, 3469 SINHALA_ARCHAIC_NUMBERS, 3470 KHOJKI, 3471 null, 3472 MULTANI, 3473 KHUDAWADI, 3474 GRANTHA, 3475 null, 3476 TIRHUTA, 3477 null, 3478 SIDDHAM, 3479 MODI, 3480 null, 3481 TAKRI, 3482 null, 3483 AHOM, 3484 null, 3485 WARANG_CITI, 3486 null, 3487 PAU_CIN_HAU, 3488 null, 3489 CUNEIFORM, 3490 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3491 EARLY_DYNASTIC_CUNEIFORM, 3492 null, 3493 EGYPTIAN_HIEROGLYPHS, 3494 null, 3495 ANATOLIAN_HIEROGLYPHS, 3496 null, 3497 BAMUM_SUPPLEMENT, 3498 MRO, 3499 null, 3500 BASSA_VAH, 3501 PAHAWH_HMONG, 3502 null, 3503 MIAO, 3504 null, 3505 KANA_SUPPLEMENT, 3506 null, 3507 DUPLOYAN, 3508 SHORTHAND_FORMAT_CONTROLS, 3509 null, 3510 BYZANTINE_MUSICAL_SYMBOLS, 3511 MUSICAL_SYMBOLS, 3512 ANCIENT_GREEK_MUSICAL_NOTATION, 3513 null, 3514 TAI_XUAN_JING_SYMBOLS, 3515 COUNTING_ROD_NUMERALS, 3516 null, 3517 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3518 SUTTON_SIGNWRITING, 3519 null, 3520 MENDE_KIKAKUI, 3521 null, 3522 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3523 null, 3524 MAHJONG_TILES, 3525 DOMINO_TILES, 3526 PLAYING_CARDS, 3527 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3528 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3529 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3530 EMOTICONS, 3531 ORNAMENTAL_DINGBATS, 3532 TRANSPORT_AND_MAP_SYMBOLS, 3533 ALCHEMICAL_SYMBOLS, 3534 GEOMETRIC_SHAPES_EXTENDED, 3535 SUPPLEMENTAL_ARROWS_C, 3536 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 3537 null, 3538 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3539 null, 3540 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3541 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3542 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 3543 null, 3544 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3545 null, 3546 TAGS, 3547 null, 3548 VARIATION_SELECTORS_SUPPLEMENT, 3549 null, 3550 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3551 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3552 }; 3553 3554 3555 /** 3556 * Returns the object representing the Unicode block containing the 3557 * given character, or {@code null} if the character is not a 3558 * member of a defined block. 3559 * 3560 * <p><b>Note:</b> This method cannot handle 3561 * <a href="Character.html#supplementary"> supplementary 3562 * characters</a>. To support all Unicode characters, including 3563 * supplementary characters, use the {@link #of(int)} method. 3564 * 3565 * @param c The character in question 3566 * @return The {@code UnicodeBlock} instance representing the 3567 * Unicode block of which this character is a member, or 3568 * {@code null} if the character is not a member of any 3569 * Unicode block 3570 */ 3571 public static UnicodeBlock of(char c) { 3572 return of((int)c); 3573 } 3574 3575 /** 3576 * Returns the object representing the Unicode block 3577 * containing the given character (Unicode code point), or 3578 * {@code null} if the character is not a member of a 3579 * defined block. 3580 * 3581 * @param codePoint the character (Unicode code point) in question. 3582 * @return The {@code UnicodeBlock} instance representing the 3583 * Unicode block of which this character is a member, or 3584 * {@code null} if the character is not a member of any 3585 * Unicode block 3586 * @exception IllegalArgumentException if the specified 3587 * {@code codePoint} is an invalid Unicode code point. 3588 * @see Character#isValidCodePoint(int) 3589 * @since 1.5 3590 */ 3591 public static UnicodeBlock of(int codePoint) { 3592 if (!isValidCodePoint(codePoint)) { 3593 throw new IllegalArgumentException(); 3594 } 3595 3596 int top, bottom, current; 3597 bottom = 0; 3598 top = blockStarts.length; 3599 current = top/2; 3600 3601 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3602 while (top - bottom > 1) { 3603 if (codePoint >= blockStarts[current]) { 3604 bottom = current; 3605 } else { 3606 top = current; 3607 } 3608 current = (top + bottom) / 2; 3609 } 3610 return blocks[current]; 3611 } 3612 3613 /** 3614 * Returns the UnicodeBlock with the given name. Block 3615 * names are determined by The Unicode Standard. The file 3616 * {@code Blocks-<version>.txt} defines blocks for a particular 3617 * version of the standard. The {@link Character} class specifies 3618 * the version of the standard that it supports. 3619 * <p> 3620 * This method accepts block names in the following forms: 3621 * <ol> 3622 * <li> Canonical block names as defined by the Unicode Standard. 3623 * For example, the standard defines a "Basic Latin" block. Therefore, this 3624 * method accepts "Basic Latin" as a valid block name. The documentation of 3625 * each UnicodeBlock provides the canonical name. 3626 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3627 * is a valid block name for the "Basic Latin" block. 3628 * <li>The text representation of each constant UnicodeBlock identifier. 3629 * For example, this method will return the {@link #BASIC_LATIN} block if 3630 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3631 * hyphens in the canonical name with underscores. 3632 * </ol> 3633 * Finally, character case is ignored for all of the valid block name forms. 3634 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3635 * The en_US locale's case mapping rules are used to provide case-insensitive 3636 * string comparisons for block name validation. 3637 * <p> 3638 * If the Unicode Standard changes block names, both the previous and 3639 * current names will be accepted. 3640 * 3641 * @param blockName A {@code UnicodeBlock} name. 3642 * @return The {@code UnicodeBlock} instance identified 3643 * by {@code blockName} 3644 * @throws IllegalArgumentException if {@code blockName} is an 3645 * invalid name 3646 * @throws NullPointerException if {@code blockName} is null 3647 * @since 1.5 3648 */ 3649 public static final UnicodeBlock forName(String blockName) { 3650 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3651 if (block == null) { 3652 throw new IllegalArgumentException(); 3653 } 3654 return block; 3655 } 3656 } 3657 3658 3659 /** 3660 * A family of character subsets representing the character scripts 3661 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3662 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3663 * character is assigned to a single Unicode script, either a specific 3664 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3665 * one of the following three special values, 3666 * {@link Character.UnicodeScript#INHERITED Inherited}, 3667 * {@link Character.UnicodeScript#COMMON Common} or 3668 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3669 * 3670 * @since 1.7 3671 */ 3672 public static enum UnicodeScript { 3673 /** 3674 * Unicode script "Common". 3675 */ 3676 COMMON, 3677 3678 /** 3679 * Unicode script "Latin". 3680 */ 3681 LATIN, 3682 3683 /** 3684 * Unicode script "Greek". 3685 */ 3686 GREEK, 3687 3688 /** 3689 * Unicode script "Cyrillic". 3690 */ 3691 CYRILLIC, 3692 3693 /** 3694 * Unicode script "Armenian". 3695 */ 3696 ARMENIAN, 3697 3698 /** 3699 * Unicode script "Hebrew". 3700 */ 3701 HEBREW, 3702 3703 /** 3704 * Unicode script "Arabic". 3705 */ 3706 ARABIC, 3707 3708 /** 3709 * Unicode script "Syriac". 3710 */ 3711 SYRIAC, 3712 3713 /** 3714 * Unicode script "Thaana". 3715 */ 3716 THAANA, 3717 3718 /** 3719 * Unicode script "Devanagari". 3720 */ 3721 DEVANAGARI, 3722 3723 /** 3724 * Unicode script "Bengali". 3725 */ 3726 BENGALI, 3727 3728 /** 3729 * Unicode script "Gurmukhi". 3730 */ 3731 GURMUKHI, 3732 3733 /** 3734 * Unicode script "Gujarati". 3735 */ 3736 GUJARATI, 3737 3738 /** 3739 * Unicode script "Oriya". 3740 */ 3741 ORIYA, 3742 3743 /** 3744 * Unicode script "Tamil". 3745 */ 3746 TAMIL, 3747 3748 /** 3749 * Unicode script "Telugu". 3750 */ 3751 TELUGU, 3752 3753 /** 3754 * Unicode script "Kannada". 3755 */ 3756 KANNADA, 3757 3758 /** 3759 * Unicode script "Malayalam". 3760 */ 3761 MALAYALAM, 3762 3763 /** 3764 * Unicode script "Sinhala". 3765 */ 3766 SINHALA, 3767 3768 /** 3769 * Unicode script "Thai". 3770 */ 3771 THAI, 3772 3773 /** 3774 * Unicode script "Lao". 3775 */ 3776 LAO, 3777 3778 /** 3779 * Unicode script "Tibetan". 3780 */ 3781 TIBETAN, 3782 3783 /** 3784 * Unicode script "Myanmar". 3785 */ 3786 MYANMAR, 3787 3788 /** 3789 * Unicode script "Georgian". 3790 */ 3791 GEORGIAN, 3792 3793 /** 3794 * Unicode script "Hangul". 3795 */ 3796 HANGUL, 3797 3798 /** 3799 * Unicode script "Ethiopic". 3800 */ 3801 ETHIOPIC, 3802 3803 /** 3804 * Unicode script "Cherokee". 3805 */ 3806 CHEROKEE, 3807 3808 /** 3809 * Unicode script "Canadian_Aboriginal". 3810 */ 3811 CANADIAN_ABORIGINAL, 3812 3813 /** 3814 * Unicode script "Ogham". 3815 */ 3816 OGHAM, 3817 3818 /** 3819 * Unicode script "Runic". 3820 */ 3821 RUNIC, 3822 3823 /** 3824 * Unicode script "Khmer". 3825 */ 3826 KHMER, 3827 3828 /** 3829 * Unicode script "Mongolian". 3830 */ 3831 MONGOLIAN, 3832 3833 /** 3834 * Unicode script "Hiragana". 3835 */ 3836 HIRAGANA, 3837 3838 /** 3839 * Unicode script "Katakana". 3840 */ 3841 KATAKANA, 3842 3843 /** 3844 * Unicode script "Bopomofo". 3845 */ 3846 BOPOMOFO, 3847 3848 /** 3849 * Unicode script "Han". 3850 */ 3851 HAN, 3852 3853 /** 3854 * Unicode script "Yi". 3855 */ 3856 YI, 3857 3858 /** 3859 * Unicode script "Old_Italic". 3860 */ 3861 OLD_ITALIC, 3862 3863 /** 3864 * Unicode script "Gothic". 3865 */ 3866 GOTHIC, 3867 3868 /** 3869 * Unicode script "Deseret". 3870 */ 3871 DESERET, 3872 3873 /** 3874 * Unicode script "Inherited". 3875 */ 3876 INHERITED, 3877 3878 /** 3879 * Unicode script "Tagalog". 3880 */ 3881 TAGALOG, 3882 3883 /** 3884 * Unicode script "Hanunoo". 3885 */ 3886 HANUNOO, 3887 3888 /** 3889 * Unicode script "Buhid". 3890 */ 3891 BUHID, 3892 3893 /** 3894 * Unicode script "Tagbanwa". 3895 */ 3896 TAGBANWA, 3897 3898 /** 3899 * Unicode script "Limbu". 3900 */ 3901 LIMBU, 3902 3903 /** 3904 * Unicode script "Tai_Le". 3905 */ 3906 TAI_LE, 3907 3908 /** 3909 * Unicode script "Linear_B". 3910 */ 3911 LINEAR_B, 3912 3913 /** 3914 * Unicode script "Ugaritic". 3915 */ 3916 UGARITIC, 3917 3918 /** 3919 * Unicode script "Shavian". 3920 */ 3921 SHAVIAN, 3922 3923 /** 3924 * Unicode script "Osmanya". 3925 */ 3926 OSMANYA, 3927 3928 /** 3929 * Unicode script "Cypriot". 3930 */ 3931 CYPRIOT, 3932 3933 /** 3934 * Unicode script "Braille". 3935 */ 3936 BRAILLE, 3937 3938 /** 3939 * Unicode script "Buginese". 3940 */ 3941 BUGINESE, 3942 3943 /** 3944 * Unicode script "Coptic". 3945 */ 3946 COPTIC, 3947 3948 /** 3949 * Unicode script "New_Tai_Lue". 3950 */ 3951 NEW_TAI_LUE, 3952 3953 /** 3954 * Unicode script "Glagolitic". 3955 */ 3956 GLAGOLITIC, 3957 3958 /** 3959 * Unicode script "Tifinagh". 3960 */ 3961 TIFINAGH, 3962 3963 /** 3964 * Unicode script "Syloti_Nagri". 3965 */ 3966 SYLOTI_NAGRI, 3967 3968 /** 3969 * Unicode script "Old_Persian". 3970 */ 3971 OLD_PERSIAN, 3972 3973 /** 3974 * Unicode script "Kharoshthi". 3975 */ 3976 KHAROSHTHI, 3977 3978 /** 3979 * Unicode script "Balinese". 3980 */ 3981 BALINESE, 3982 3983 /** 3984 * Unicode script "Cuneiform". 3985 */ 3986 CUNEIFORM, 3987 3988 /** 3989 * Unicode script "Phoenician". 3990 */ 3991 PHOENICIAN, 3992 3993 /** 3994 * Unicode script "Phags_Pa". 3995 */ 3996 PHAGS_PA, 3997 3998 /** 3999 * Unicode script "Nko". 4000 */ 4001 NKO, 4002 4003 /** 4004 * Unicode script "Sundanese". 4005 */ 4006 SUNDANESE, 4007 4008 /** 4009 * Unicode script "Batak". 4010 */ 4011 BATAK, 4012 4013 /** 4014 * Unicode script "Lepcha". 4015 */ 4016 LEPCHA, 4017 4018 /** 4019 * Unicode script "Ol_Chiki". 4020 */ 4021 OL_CHIKI, 4022 4023 /** 4024 * Unicode script "Vai". 4025 */ 4026 VAI, 4027 4028 /** 4029 * Unicode script "Saurashtra". 4030 */ 4031 SAURASHTRA, 4032 4033 /** 4034 * Unicode script "Kayah_Li". 4035 */ 4036 KAYAH_LI, 4037 4038 /** 4039 * Unicode script "Rejang". 4040 */ 4041 REJANG, 4042 4043 /** 4044 * Unicode script "Lycian". 4045 */ 4046 LYCIAN, 4047 4048 /** 4049 * Unicode script "Carian". 4050 */ 4051 CARIAN, 4052 4053 /** 4054 * Unicode script "Lydian". 4055 */ 4056 LYDIAN, 4057 4058 /** 4059 * Unicode script "Cham". 4060 */ 4061 CHAM, 4062 4063 /** 4064 * Unicode script "Tai_Tham". 4065 */ 4066 TAI_THAM, 4067 4068 /** 4069 * Unicode script "Tai_Viet". 4070 */ 4071 TAI_VIET, 4072 4073 /** 4074 * Unicode script "Avestan". 4075 */ 4076 AVESTAN, 4077 4078 /** 4079 * Unicode script "Egyptian_Hieroglyphs". 4080 */ 4081 EGYPTIAN_HIEROGLYPHS, 4082 4083 /** 4084 * Unicode script "Samaritan". 4085 */ 4086 SAMARITAN, 4087 4088 /** 4089 * Unicode script "Mandaic". 4090 */ 4091 MANDAIC, 4092 4093 /** 4094 * Unicode script "Lisu". 4095 */ 4096 LISU, 4097 4098 /** 4099 * Unicode script "Bamum". 4100 */ 4101 BAMUM, 4102 4103 /** 4104 * Unicode script "Javanese". 4105 */ 4106 JAVANESE, 4107 4108 /** 4109 * Unicode script "Meetei_Mayek". 4110 */ 4111 MEETEI_MAYEK, 4112 4113 /** 4114 * Unicode script "Imperial_Aramaic". 4115 */ 4116 IMPERIAL_ARAMAIC, 4117 4118 /** 4119 * Unicode script "Old_South_Arabian". 4120 */ 4121 OLD_SOUTH_ARABIAN, 4122 4123 /** 4124 * Unicode script "Inscriptional_Parthian". 4125 */ 4126 INSCRIPTIONAL_PARTHIAN, 4127 4128 /** 4129 * Unicode script "Inscriptional_Pahlavi". 4130 */ 4131 INSCRIPTIONAL_PAHLAVI, 4132 4133 /** 4134 * Unicode script "Old_Turkic". 4135 */ 4136 OLD_TURKIC, 4137 4138 /** 4139 * Unicode script "Brahmi". 4140 */ 4141 BRAHMI, 4142 4143 /** 4144 * Unicode script "Kaithi". 4145 */ 4146 KAITHI, 4147 4148 /** 4149 * Unicode script "Meroitic Hieroglyphs". 4150 * @since 1.8 4151 */ 4152 MEROITIC_HIEROGLYPHS, 4153 4154 /** 4155 * Unicode script "Meroitic Cursive". 4156 * @since 1.8 4157 */ 4158 MEROITIC_CURSIVE, 4159 4160 /** 4161 * Unicode script "Sora Sompeng". 4162 * @since 1.8 4163 */ 4164 SORA_SOMPENG, 4165 4166 /** 4167 * Unicode script "Chakma". 4168 * @since 1.8 4169 */ 4170 CHAKMA, 4171 4172 /** 4173 * Unicode script "Sharada". 4174 * @since 1.8 4175 */ 4176 SHARADA, 4177 4178 /** 4179 * Unicode script "Takri". 4180 * @since 1.8 4181 */ 4182 TAKRI, 4183 4184 /** 4185 * Unicode script "Miao". 4186 * @since 1.8 4187 */ 4188 MIAO, 4189 4190 /** 4191 * Unicode script "Caucasian Albanian". 4192 * @since 9 4193 */ 4194 CAUCASIAN_ALBANIAN, 4195 4196 /** 4197 * Unicode script "Bassa Vah". 4198 * @since 9 4199 */ 4200 BASSA_VAH, 4201 4202 /** 4203 * Unicode script "Duployan". 4204 * @since 9 4205 */ 4206 DUPLOYAN, 4207 4208 /** 4209 * Unicode script "Elbasan". 4210 * @since 9 4211 */ 4212 ELBASAN, 4213 4214 /** 4215 * Unicode script "Grantha". 4216 * @since 9 4217 */ 4218 GRANTHA, 4219 4220 /** 4221 * Unicode script "Pahawh Hmong". 4222 * @since 9 4223 */ 4224 PAHAWH_HMONG, 4225 4226 /** 4227 * Unicode script "Khojki". 4228 * @since 9 4229 */ 4230 KHOJKI, 4231 4232 /** 4233 * Unicode script "Linear A". 4234 * @since 9 4235 */ 4236 LINEAR_A, 4237 4238 /** 4239 * Unicode script "Mahajani". 4240 * @since 9 4241 */ 4242 MAHAJANI, 4243 4244 /** 4245 * Unicode script "Manichaean". 4246 * @since 9 4247 */ 4248 MANICHAEAN, 4249 4250 /** 4251 * Unicode script "Mende Kikakui". 4252 * @since 9 4253 */ 4254 MENDE_KIKAKUI, 4255 4256 /** 4257 * Unicode script "Modi". 4258 * @since 9 4259 */ 4260 MODI, 4261 4262 /** 4263 * Unicode script "Mro". 4264 * @since 9 4265 */ 4266 MRO, 4267 4268 /** 4269 * Unicode script "Old North Arabian". 4270 * @since 9 4271 */ 4272 OLD_NORTH_ARABIAN, 4273 4274 /** 4275 * Unicode script "Nabataean". 4276 * @since 9 4277 */ 4278 NABATAEAN, 4279 4280 /** 4281 * Unicode script "Palmyrene". 4282 * @since 9 4283 */ 4284 PALMYRENE, 4285 4286 /** 4287 * Unicode script "Pau Cin Hau". 4288 * @since 9 4289 */ 4290 PAU_CIN_HAU, 4291 4292 /** 4293 * Unicode script "Old Permic". 4294 * @since 9 4295 */ 4296 OLD_PERMIC, 4297 4298 /** 4299 * Unicode script "Psalter Pahlavi". 4300 * @since 9 4301 */ 4302 PSALTER_PAHLAVI, 4303 4304 /** 4305 * Unicode script "Siddham". 4306 * @since 9 4307 */ 4308 SIDDHAM, 4309 4310 /** 4311 * Unicode script "Khudawadi". 4312 * @since 9 4313 */ 4314 KHUDAWADI, 4315 4316 /** 4317 * Unicode script "Tirhuta". 4318 * @since 9 4319 */ 4320 TIRHUTA, 4321 4322 /** 4323 * Unicode script "Warang Citi". 4324 * @since 9 4325 */ 4326 WARANG_CITI, 4327 4328 /** 4329 * Unicode script "Ahom". 4330 * @since 9 4331 */ 4332 AHOM, 4333 4334 /** 4335 * Unicode script "Anatolian Hieroglyphs". 4336 * @since 9 4337 */ 4338 ANATOLIAN_HIEROGLYPHS, 4339 4340 /** 4341 * Unicode script "Hatran". 4342 * @since 9 4343 */ 4344 HATRAN, 4345 4346 /** 4347 * Unicode script "Multani". 4348 * @since 9 4349 */ 4350 MULTANI, 4351 4352 /** 4353 * Unicode script "Old Hungarian". 4354 * @since 9 4355 */ 4356 OLD_HUNGARIAN, 4357 4358 /** 4359 * Unicode script "SignWriting". 4360 * @since 9 4361 */ 4362 SIGNWRITING, 4363 4364 /** 4365 * Unicode script "Unknown". 4366 */ 4367 UNKNOWN; 4368 4369 private static final int[] scriptStarts = { 4370 0x0000, // 0000..0040; COMMON 4371 0x0041, // 0041..005A; LATIN 4372 0x005B, // 005B..0060; COMMON 4373 0x0061, // 0061..007A; LATIN 4374 0x007B, // 007B..00A9; COMMON 4375 0x00AA, // 00AA ; LATIN 4376 0x00AB, // 00AB..00B9; COMMON 4377 0x00BA, // 00BA ; LATIN 4378 0x00BB, // 00BB..00BF; COMMON 4379 0x00C0, // 00C0..00D6; LATIN 4380 0x00D7, // 00D7 ; COMMON 4381 0x00D8, // 00D8..00F6; LATIN 4382 0x00F7, // 00F7 ; COMMON 4383 0x00F8, // 00F8..02B8; LATIN 4384 0x02B9, // 02B9..02DF; COMMON 4385 0x02E0, // 02E0..02E4; LATIN 4386 0x02E5, // 02E5..02E9; COMMON 4387 0x02EA, // 02EA..02EB; BOPOMOFO 4388 0x02EC, // 02EC..02FF; COMMON 4389 0x0300, // 0300..036F; INHERITED 4390 0x0370, // 0370..0373; GREEK 4391 0x0374, // 0374 ; COMMON 4392 0x0375, // 0375..0377; GREEK 4393 0x0378, // 0378..0379; UNKNOWN 4394 0x037A, // 037A..037D; GREEK 4395 0x037E, // 037E ; COMMON 4396 0x037F, // 037F ; GREEK 4397 0x0380, // 0380..0383; UNKNOWN 4398 0x0384, // 0384 ; GREEK 4399 0x0385, // 0385 ; COMMON 4400 0x0386, // 0386 ; GREEK 4401 0x0387, // 0387 ; COMMON 4402 0x0388, // 0388..038A; GREEK 4403 0x038B, // 038B ; UNKNOWN 4404 0x038C, // 038C ; GREEK 4405 0x038D, // 038D ; UNKNOWN 4406 0x038E, // 038E..03A1; GREEK 4407 0x03A2, // 03A2 ; UNKNOWN 4408 0x03A3, // 03A3..03E1; GREEK 4409 0x03E2, // 03E2..03EF; COPTIC 4410 0x03F0, // 03F0..03FF; GREEK 4411 0x0400, // 0400..0484; CYRILLIC 4412 0x0485, // 0485..0486; INHERITED 4413 0x0487, // 0487..052F; CYRILLIC 4414 0x0530, // 0530 ; UNKNOWN 4415 0x0531, // 0531..0556; ARMENIAN 4416 0x0557, // 0557..0558; UNKNOWN 4417 0x0559, // 0559..055F; ARMENIAN 4418 0x0560, // 0560 ; UNKNOWN 4419 0x0561, // 0561..0587; ARMENIAN 4420 0x0588, // 0588 ; UNKNOWN 4421 0x0589, // 0589 ; COMMON 4422 0x058A, // 058A ; ARMENIAN 4423 0x058B, // 058B..058C; UNKNOWN 4424 0x058D, // 058D..058F; ARMENIAN 4425 0x0590, // 0590 ; UNKNOWN 4426 0x0591, // 0591..05C7; HEBREW 4427 0x05C8, // 05C8..05CF; UNKNOWN 4428 0x05D0, // 05D0..05EA; HEBREW 4429 0x05EB, // 05EB..05EF; UNKNOWN 4430 0x05F0, // 05F0..05F4; HEBREW 4431 0x05F5, // 05F5..05FF; UNKNOWN 4432 0x0600, // 0600..0604; ARABIC 4433 0x0605, // 0605 ; COMMON 4434 0x0606, // 0606..060B; ARABIC 4435 0x060C, // 060C ; COMMON 4436 0x060D, // 060D..061A; ARABIC 4437 0x061B, // 061B..061C; COMMON 4438 0x061D, // 061D ; UNKNOWN 4439 0x061E, // 061E ; ARABIC 4440 0x061F, // 061F ; COMMON 4441 0x0620, // 0620..063F; ARABIC 4442 0x0640, // 0640 ; COMMON 4443 0x0641, // 0641..064A; ARABIC 4444 0x064B, // 064B..0655; INHERITED 4445 0x0656, // 0656..066F; ARABIC 4446 0x0670, // 0670 ; INHERITED 4447 0x0671, // 0671..06DC; ARABIC 4448 0x06DD, // 06DD ; COMMON 4449 0x06DE, // 06DE..06FF; ARABIC 4450 0x0700, // 0700..070D; SYRIAC 4451 0x070E, // 070E ; UNKNOWN 4452 0x070F, // 070F..074A; SYRIAC 4453 0x074B, // 074B..074C; UNKNOWN 4454 0x074D, // 074D..074F; SYRIAC 4455 0x0750, // 0750..077F; ARABIC 4456 0x0780, // 0780..07B1; THAANA 4457 0x07B2, // 07B2..07BF; UNKNOWN 4458 0x07C0, // 07C0..07FA; NKO 4459 0x07FB, // 07FB..07FF; UNKNOWN 4460 0x0800, // 0800..082D; SAMARITAN 4461 0x082E, // 082E..082F; UNKNOWN 4462 0x0830, // 0830..083E; SAMARITAN 4463 0x083F, // 083F ; UNKNOWN 4464 0x0840, // 0840..085B; MANDAIC 4465 0x085C, // 085C..085D; UNKNOWN 4466 0x085E, // 085E ; MANDAIC 4467 0x085F, // 085F..089F; UNKNOWN 4468 0x08A0, // 08A0..08B4; ARABIC 4469 0x08B5, // 08B5..08E2; UNKNOWN 4470 0x08E3, // 08E3..08FF; ARABIC 4471 0x0900, // 0900..0950; DEVANAGARI 4472 0x0951, // 0951..0952; INHERITED 4473 0x0953, // 0953..0963; DEVANAGARI 4474 0x0964, // 0964..0965; COMMON 4475 0x0966, // 0966..097F; DEVANAGARI 4476 0x0980, // 0980..0983; BENGALI 4477 0x0984, // 0984 ; UNKNOWN 4478 0x0985, // 0985..098C; BENGALI 4479 0x098D, // 098D..098E; UNKNOWN 4480 0x098F, // 098F..0990; BENGALI 4481 0x0991, // 0991..0992; UNKNOWN 4482 0x0993, // 0993..09A8; BENGALI 4483 0x09A9, // 09A9 ; UNKNOWN 4484 0x09AA, // 09AA..09B0; BENGALI 4485 0x09B1, // 09B1 ; UNKNOWN 4486 0x09B2, // 09B2 ; BENGALI 4487 0x09B3, // 09B3..09B5; UNKNOWN 4488 0x09B6, // 09B6..09B9; BENGALI 4489 0x09BA, // 09BA..09BB; UNKNOWN 4490 0x09BC, // 09BC..09C4; BENGALI 4491 0x09C5, // 09C5..09C6; UNKNOWN 4492 0x09C7, // 09C7..09C8; BENGALI 4493 0x09C9, // 09C9..09CA; UNKNOWN 4494 0x09CB, // 09CB..09CE; BENGALI 4495 0x09CF, // 09CF..09D6; UNKNOWN 4496 0x09D7, // 09D7 ; BENGALI 4497 0x09D8, // 09D8..09DB; UNKNOWN 4498 0x09DC, // 09DC..09DD; BENGALI 4499 0x09DE, // 09DE ; UNKNOWN 4500 0x09DF, // 09DF..09E3; BENGALI 4501 0x09E4, // 09E4..09E5; UNKNOWN 4502 0x09E6, // 09E6..09FB; BENGALI 4503 0x09FC, // 09FC..0A00; UNKNOWN 4504 0x0A01, // 0A01..0A03; GURMUKHI 4505 0x0A04, // 0A04 ; UNKNOWN 4506 0x0A05, // 0A05..0A0A; GURMUKHI 4507 0x0A0B, // 0A0B..0A0E; UNKNOWN 4508 0x0A0F, // 0A0F..0A10; GURMUKHI 4509 0x0A11, // 0A11..0A12; UNKNOWN 4510 0x0A13, // 0A13..0A28; GURMUKHI 4511 0x0A29, // 0A29 ; UNKNOWN 4512 0x0A2A, // 0A2A..0A30; GURMUKHI 4513 0x0A31, // 0A31 ; UNKNOWN 4514 0x0A32, // 0A32..0A33; GURMUKHI 4515 0x0A34, // 0A34 ; UNKNOWN 4516 0x0A35, // 0A35..0A36; GURMUKHI 4517 0x0A37, // 0A37 ; UNKNOWN 4518 0x0A38, // 0A38..0A39; GURMUKHI 4519 0x0A3A, // 0A3A..0A3B; UNKNOWN 4520 0x0A3C, // 0A3C ; GURMUKHI 4521 0x0A3D, // 0A3D ; UNKNOWN 4522 0x0A3E, // 0A3E..0A42; GURMUKHI 4523 0x0A43, // 0A43..0A46; UNKNOWN 4524 0x0A47, // 0A47..0A48; GURMUKHI 4525 0x0A49, // 0A49..0A4A; UNKNOWN 4526 0x0A4B, // 0A4B..0A4D; GURMUKHI 4527 0x0A4E, // 0A4E..0A50; UNKNOWN 4528 0x0A51, // 0A51 ; GURMUKHI 4529 0x0A52, // 0A52..0A58; UNKNOWN 4530 0x0A59, // 0A59..0A5C; GURMUKHI 4531 0x0A5D, // 0A5D ; UNKNOWN 4532 0x0A5E, // 0A5E ; GURMUKHI 4533 0x0A5F, // 0A5F..0A65; UNKNOWN 4534 0x0A66, // 0A66..0A75; GURMUKHI 4535 0x0A76, // 0A76..0A80; UNKNOWN 4536 0x0A81, // 0A81..0A83; GUJARATI 4537 0x0A84, // 0A84 ; UNKNOWN 4538 0x0A85, // 0A85..0A8D; GUJARATI 4539 0x0A8E, // 0A8E ; UNKNOWN 4540 0x0A8F, // 0A8F..0A91; GUJARATI 4541 0x0A92, // 0A92 ; UNKNOWN 4542 0x0A93, // 0A93..0AA8; GUJARATI 4543 0x0AA9, // 0AA9 ; UNKNOWN 4544 0x0AAA, // 0AAA..0AB0; GUJARATI 4545 0x0AB1, // 0AB1 ; UNKNOWN 4546 0x0AB2, // 0AB2..0AB3; GUJARATI 4547 0x0AB4, // 0AB4 ; UNKNOWN 4548 0x0AB5, // 0AB5..0AB9; GUJARATI 4549 0x0ABA, // 0ABA..0ABB; UNKNOWN 4550 0x0ABC, // 0ABC..0AC5; GUJARATI 4551 0x0AC6, // 0AC6 ; UNKNOWN 4552 0x0AC7, // 0AC7..0AC9; GUJARATI 4553 0x0ACA, // 0ACA ; UNKNOWN 4554 0x0ACB, // 0ACB..0ACD; GUJARATI 4555 0x0ACE, // 0ACE..0ACF; UNKNOWN 4556 0x0AD0, // 0AD0 ; GUJARATI 4557 0x0AD1, // 0AD1..0ADF; UNKNOWN 4558 0x0AE0, // 0AE0..0AE3; GUJARATI 4559 0x0AE4, // 0AE4..0AE5; UNKNOWN 4560 0x0AE6, // 0AE6..0AF1; GUJARATI 4561 0x0AF2, // 0AF2..0AF8; UNKNOWN 4562 0x0AF9, // 0AF9 ; GUJARATI 4563 0x0AFA, // 0AFA..0B00; UNKNOWN 4564 0x0B01, // 0B01..0B03; ORIYA 4565 0x0B04, // 0B04 ; UNKNOWN 4566 0x0B05, // 0B05..0B0C; ORIYA 4567 0x0B0D, // 0B0D..0B0E; UNKNOWN 4568 0x0B0F, // 0B0F..0B10; ORIYA 4569 0x0B11, // 0B11..0B12; UNKNOWN 4570 0x0B13, // 0B13..0B28; ORIYA 4571 0x0B29, // 0B29 ; UNKNOWN 4572 0x0B2A, // 0B2A..0B30; ORIYA 4573 0x0B31, // 0B31 ; UNKNOWN 4574 0x0B32, // 0B32..0B33; ORIYA 4575 0x0B34, // 0B34 ; UNKNOWN 4576 0x0B35, // 0B35..0B39; ORIYA 4577 0x0B3A, // 0B3A..0B3B; UNKNOWN 4578 0x0B3C, // 0B3C..0B44; ORIYA 4579 0x0B45, // 0B45..0B46; UNKNOWN 4580 0x0B47, // 0B47..0B48; ORIYA 4581 0x0B49, // 0B49..0B4A; UNKNOWN 4582 0x0B4B, // 0B4B..0B4D; ORIYA 4583 0x0B4E, // 0B4E..0B55; UNKNOWN 4584 0x0B56, // 0B56..0B57; ORIYA 4585 0x0B58, // 0B58..0B5B; UNKNOWN 4586 0x0B5C, // 0B5C..0B5D; ORIYA 4587 0x0B5E, // 0B5E ; UNKNOWN 4588 0x0B5F, // 0B5F..0B63; ORIYA 4589 0x0B64, // 0B64..0B65; UNKNOWN 4590 0x0B66, // 0B66..0B77; ORIYA 4591 0x0B78, // 0B78..0B81; UNKNOWN 4592 0x0B82, // 0B82..0B83; TAMIL 4593 0x0B84, // 0B84 ; UNKNOWN 4594 0x0B85, // 0B85..0B8A; TAMIL 4595 0x0B8B, // 0B8B..0B8D; UNKNOWN 4596 0x0B8E, // 0B8E..0B90; TAMIL 4597 0x0B91, // 0B91 ; UNKNOWN 4598 0x0B92, // 0B92..0B95; TAMIL 4599 0x0B96, // 0B96..0B98; UNKNOWN 4600 0x0B99, // 0B99..0B9A; TAMIL 4601 0x0B9B, // 0B9B ; UNKNOWN 4602 0x0B9C, // 0B9C ; TAMIL 4603 0x0B9D, // 0B9D ; UNKNOWN 4604 0x0B9E, // 0B9E..0B9F; TAMIL 4605 0x0BA0, // 0BA0..0BA2; UNKNOWN 4606 0x0BA3, // 0BA3..0BA4; TAMIL 4607 0x0BA5, // 0BA5..0BA7; UNKNOWN 4608 0x0BA8, // 0BA8..0BAA; TAMIL 4609 0x0BAB, // 0BAB..0BAD; UNKNOWN 4610 0x0BAE, // 0BAE..0BB9; TAMIL 4611 0x0BBA, // 0BBA..0BBD; UNKNOWN 4612 0x0BBE, // 0BBE..0BC2; TAMIL 4613 0x0BC3, // 0BC3..0BC5; UNKNOWN 4614 0x0BC6, // 0BC6..0BC8; TAMIL 4615 0x0BC9, // 0BC9 ; UNKNOWN 4616 0x0BCA, // 0BCA..0BCD; TAMIL 4617 0x0BCE, // 0BCE..0BCF; UNKNOWN 4618 0x0BD0, // 0BD0 ; TAMIL 4619 0x0BD1, // 0BD1..0BD6; UNKNOWN 4620 0x0BD7, // 0BD7 ; TAMIL 4621 0x0BD8, // 0BD8..0BE5; UNKNOWN 4622 0x0BE6, // 0BE6..0BFA; TAMIL 4623 0x0BFB, // 0BFB..0BFF; UNKNOWN 4624 0x0C00, // 0C00..0C03; TELUGU 4625 0x0C04, // 0C04 ; UNKNOWN 4626 0x0C05, // 0C05..0C0C; TELUGU 4627 0x0C0D, // 0C0D ; UNKNOWN 4628 0x0C0E, // 0C0E..0C10; TELUGU 4629 0x0C11, // 0C11 ; UNKNOWN 4630 0x0C12, // 0C12..0C28; TELUGU 4631 0x0C29, // 0C29 ; UNKNOWN 4632 0x0C2A, // 0C2A..0C39; TELUGU 4633 0x0C3A, // 0C3A..0C3C; UNKNOWN 4634 0x0C3D, // 0C3D..0C44; TELUGU 4635 0x0C45, // 0C45 ; UNKNOWN 4636 0x0C46, // 0C46..0C48; TELUGU 4637 0x0C49, // 0C49 ; UNKNOWN 4638 0x0C4A, // 0C4A..0C4D; TELUGU 4639 0x0C4E, // 0C4E..0C54; UNKNOWN 4640 0x0C55, // 0C55..0C56; TELUGU 4641 0x0C57, // 0C57 ; UNKNOWN 4642 0x0C58, // 0C58..0C5A; TELUGU 4643 0x0C5B, // 0C5B..0C5F; UNKNOWN 4644 0x0C60, // 0C60..0C63; TELUGU 4645 0x0C64, // 0C64..0C65; UNKNOWN 4646 0x0C66, // 0C66..0C6F; TELUGU 4647 0x0C70, // 0C70..0C77; UNKNOWN 4648 0x0C78, // 0C78..0C7F; TELUGU 4649 0x0C80, // 0C80 ; UNKNOWN 4650 0x0C81, // 0C81..0C83; KANNADA 4651 0x0C84, // 0C84 ; UNKNOWN 4652 0x0C85, // 0C85..0C8C; KANNADA 4653 0x0C8D, // 0C8D ; UNKNOWN 4654 0x0C8E, // 0C8E..0C90; KANNADA 4655 0x0C91, // 0C91 ; UNKNOWN 4656 0x0C92, // 0C92..0CA8; KANNADA 4657 0x0CA9, // 0CA9 ; UNKNOWN 4658 0x0CAA, // 0CAA..0CB3; KANNADA 4659 0x0CB4, // 0CB4 ; UNKNOWN 4660 0x0CB5, // 0CB5..0CB9; KANNADA 4661 0x0CBA, // 0CBA..0CBB; UNKNOWN 4662 0x0CBC, // 0CBC..0CC4; KANNADA 4663 0x0CC5, // 0CC5 ; UNKNOWN 4664 0x0CC6, // 0CC6..0CC8; KANNADA 4665 0x0CC9, // 0CC9 ; UNKNOWN 4666 0x0CCA, // 0CCA..0CCD; KANNADA 4667 0x0CCE, // 0CCE..0CD4; UNKNOWN 4668 0x0CD5, // 0CD5..0CD6; KANNADA 4669 0x0CD7, // 0CD7..0CDD; UNKNOWN 4670 0x0CDE, // 0CDE ; KANNADA 4671 0x0CDF, // 0CDF ; UNKNOWN 4672 0x0CE0, // 0CE0..0CE3; KANNADA 4673 0x0CE4, // 0CE4..0CE5; UNKNOWN 4674 0x0CE6, // 0CE6..0CEF; KANNADA 4675 0x0CF0, // 0CF0 ; UNKNOWN 4676 0x0CF1, // 0CF1..0CF2; KANNADA 4677 0x0CF3, // 0CF3..0D00; UNKNOWN 4678 0x0D01, // 0D01..0D03; MALAYALAM 4679 0x0D04, // 0D04 ; UNKNOWN 4680 0x0D05, // 0D05..0D0C; MALAYALAM 4681 0x0D0D, // 0D0D ; UNKNOWN 4682 0x0D0E, // 0D0E..0D10; MALAYALAM 4683 0x0D11, // 0D11 ; UNKNOWN 4684 0x0D12, // 0D12..0D3A; MALAYALAM 4685 0x0D3B, // 0D3B..0D3C; UNKNOWN 4686 0x0D3D, // 0D3D..0D44; MALAYALAM 4687 0x0D45, // 0D45 ; UNKNOWN 4688 0x0D46, // 0D46..0D48; MALAYALAM 4689 0x0D49, // 0D49 ; UNKNOWN 4690 0x0D4A, // 0D4A..0D4E; MALAYALAM 4691 0x0D4F, // 0D4F..0D56; UNKNOWN 4692 0x0D57, // 0D57 ; MALAYALAM 4693 0x0D58, // 0D58..0D5E; UNKNOWN 4694 0x0D5F, // 0D5F..0D63; MALAYALAM 4695 0x0D64, // 0D64..0D65; UNKNOWN 4696 0x0D66, // 0D66..0D75; MALAYALAM 4697 0x0D76, // 0D76..0D78; UNKNOWN 4698 0x0D79, // 0D79..0D7F; MALAYALAM 4699 0x0D80, // 0D80..0D81; UNKNOWN 4700 0x0D82, // 0D82..0D83; SINHALA 4701 0x0D84, // 0D84 ; UNKNOWN 4702 0x0D85, // 0D85..0D96; SINHALA 4703 0x0D97, // 0D97..0D99; UNKNOWN 4704 0x0D9A, // 0D9A..0DB1; SINHALA 4705 0x0DB2, // 0DB2 ; UNKNOWN 4706 0x0DB3, // 0DB3..0DBB; SINHALA 4707 0x0DBC, // 0DBC ; UNKNOWN 4708 0x0DBD, // 0DBD ; SINHALA 4709 0x0DBE, // 0DBE..0DBF; UNKNOWN 4710 0x0DC0, // 0DC0..0DC6; SINHALA 4711 0x0DC7, // 0DC7..0DC9; UNKNOWN 4712 0x0DCA, // 0DCA ; SINHALA 4713 0x0DCB, // 0DCB..0DCE; UNKNOWN 4714 0x0DCF, // 0DCF..0DD4; SINHALA 4715 0x0DD5, // 0DD5 ; UNKNOWN 4716 0x0DD6, // 0DD6 ; SINHALA 4717 0x0DD7, // 0DD7 ; UNKNOWN 4718 0x0DD8, // 0DD8..0DDF; SINHALA 4719 0x0DE0, // 0DE0..0DE5; UNKNOWN 4720 0x0DE6, // 0DE6..0DEF; SINHALA 4721 0x0DF0, // 0DF0..0DF1; UNKNOWN 4722 0x0DF2, // 0DF2..0DF4; SINHALA 4723 0x0DF5, // 0DF5..0E00; UNKNOWN 4724 0x0E01, // 0E01..0E3A; THAI 4725 0x0E3B, // 0E3B..0E3E; UNKNOWN 4726 0x0E3F, // 0E3F ; COMMON 4727 0x0E40, // 0E40..0E5B; THAI 4728 0x0E5C, // 0E5C..0E80; UNKNOWN 4729 0x0E81, // 0E81..0E82; LAO 4730 0x0E83, // 0E83 ; UNKNOWN 4731 0x0E84, // 0E84 ; LAO 4732 0x0E85, // 0E85..0E86; UNKNOWN 4733 0x0E87, // 0E87..0E88; LAO 4734 0x0E89, // 0E89 ; UNKNOWN 4735 0x0E8A, // 0E8A ; LAO 4736 0x0E8B, // 0E8B..0E8C; UNKNOWN 4737 0x0E8D, // 0E8D ; LAO 4738 0x0E8E, // 0E8E..0E93; UNKNOWN 4739 0x0E94, // 0E94..0E97; LAO 4740 0x0E98, // 0E98 ; UNKNOWN 4741 0x0E99, // 0E99..0E9F; LAO 4742 0x0EA0, // 0EA0 ; UNKNOWN 4743 0x0EA1, // 0EA1..0EA3; LAO 4744 0x0EA4, // 0EA4 ; UNKNOWN 4745 0x0EA5, // 0EA5 ; LAO 4746 0x0EA6, // 0EA6 ; UNKNOWN 4747 0x0EA7, // 0EA7 ; LAO 4748 0x0EA8, // 0EA8..0EA9; UNKNOWN 4749 0x0EAA, // 0EAA..0EAB; LAO 4750 0x0EAC, // 0EAC ; UNKNOWN 4751 0x0EAD, // 0EAD..0EB9; LAO 4752 0x0EBA, // 0EBA ; UNKNOWN 4753 0x0EBB, // 0EBB..0EBD; LAO 4754 0x0EBE, // 0EBE..0EBF; UNKNOWN 4755 0x0EC0, // 0EC0..0EC4; LAO 4756 0x0EC5, // 0EC5 ; UNKNOWN 4757 0x0EC6, // 0EC6 ; LAO 4758 0x0EC7, // 0EC7 ; UNKNOWN 4759 0x0EC8, // 0EC8..0ECD; LAO 4760 0x0ECE, // 0ECE..0ECF; UNKNOWN 4761 0x0ED0, // 0ED0..0ED9; LAO 4762 0x0EDA, // 0EDA..0EDB; UNKNOWN 4763 0x0EDC, // 0EDC..0EDF; LAO 4764 0x0EE0, // 0EE0..0EFF; UNKNOWN 4765 0x0F00, // 0F00..0F47; TIBETAN 4766 0x0F48, // 0F48 ; UNKNOWN 4767 0x0F49, // 0F49..0F6C; TIBETAN 4768 0x0F6D, // 0F6D..0F70; UNKNOWN 4769 0x0F71, // 0F71..0F97; TIBETAN 4770 0x0F98, // 0F98 ; UNKNOWN 4771 0x0F99, // 0F99..0FBC; TIBETAN 4772 0x0FBD, // 0FBD ; UNKNOWN 4773 0x0FBE, // 0FBE..0FCC; TIBETAN 4774 0x0FCD, // 0FCD ; UNKNOWN 4775 0x0FCE, // 0FCE..0FD4; TIBETAN 4776 0x0FD5, // 0FD5..0FD8; COMMON 4777 0x0FD9, // 0FD9..0FDA; TIBETAN 4778 0x0FDB, // 0FDB..FFF; UNKNOWN 4779 0x1000, // 1000..109F; MYANMAR 4780 0x10A0, // 10A0..10C5; GEORGIAN 4781 0x10C6, // 10C6 ; UNKNOWN 4782 0x10C7, // 10C7 ; GEORGIAN 4783 0x10C8, // 10C8..10CC; UNKNOWN 4784 0x10CD, // 10CD ; GEORGIAN 4785 0x10CE, // 10CE..10CF; UNKNOWN 4786 0x10D0, // 10D0..10FA; GEORGIAN 4787 0x10FB, // 10FB ; COMMON 4788 0x10FC, // 10FC..10FF; GEORGIAN 4789 0x1100, // 1100..11FF; HANGUL 4790 0x1200, // 1200..1248; ETHIOPIC 4791 0x1249, // 1249 ; UNKNOWN 4792 0x124A, // 124A..124D; ETHIOPIC 4793 0x124E, // 124E..124F; UNKNOWN 4794 0x1250, // 1250..1256; ETHIOPIC 4795 0x1257, // 1257 ; UNKNOWN 4796 0x1258, // 1258 ; ETHIOPIC 4797 0x1259, // 1259 ; UNKNOWN 4798 0x125A, // 125A..125D; ETHIOPIC 4799 0x125E, // 125E..125F; UNKNOWN 4800 0x1260, // 1260..1288; ETHIOPIC 4801 0x1289, // 1289 ; UNKNOWN 4802 0x128A, // 128A..128D; ETHIOPIC 4803 0x128E, // 128E..128F; UNKNOWN 4804 0x1290, // 1290..12B0; ETHIOPIC 4805 0x12B1, // 12B1 ; UNKNOWN 4806 0x12B2, // 12B2..12B5; ETHIOPIC 4807 0x12B6, // 12B6..12B7; UNKNOWN 4808 0x12B8, // 12B8..12BE; ETHIOPIC 4809 0x12BF, // 12BF ; UNKNOWN 4810 0x12C0, // 12C0 ; ETHIOPIC 4811 0x12C1, // 12C1 ; UNKNOWN 4812 0x12C2, // 12C2..12C5; ETHIOPIC 4813 0x12C6, // 12C6..12C7; UNKNOWN 4814 0x12C8, // 12C8..12D6; ETHIOPIC 4815 0x12D7, // 12D7 ; UNKNOWN 4816 0x12D8, // 12D8..1310; ETHIOPIC 4817 0x1311, // 1311 ; UNKNOWN 4818 0x1312, // 1312..1315; ETHIOPIC 4819 0x1316, // 1316..1317; UNKNOWN 4820 0x1318, // 1318..135A; ETHIOPIC 4821 0x135B, // 135B..135C; UNKNOWN 4822 0x135D, // 135D..137C; ETHIOPIC 4823 0x137D, // 137D..137F; UNKNOWN 4824 0x1380, // 1380..1399; ETHIOPIC 4825 0x139A, // 139A..139F; UNKNOWN 4826 0x13A0, // 13A0..13F5; CHEROKEE 4827 0x13F6, // 13F6..13F7; UNKNOWN 4828 0x13F8, // 13F8..13FD; CHEROKEE 4829 0x13FE, // 13FE..13FF; UNKNOWN 4830 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 4831 0x1680, // 1680..169C; OGHAM 4832 0x169D, // 169D..169F; UNKNOWN 4833 0x16A0, // 16A0..16EA; RUNIC 4834 0x16EB, // 16EB..16ED; COMMON 4835 0x16EE, // 16EE..16F8; RUNIC 4836 0x16F9, // 16F9..16FF; UNKNOWN 4837 0x1700, // 1700..170C; TAGALOG 4838 0x170D, // 170D ; UNKNOWN 4839 0x170E, // 170E..1714; TAGALOG 4840 0x1715, // 1715..171F; UNKNOWN 4841 0x1720, // 1720..1734; HANUNOO 4842 0x1735, // 1735..1736; COMMON 4843 0x1737, // 1737..173F; UNKNOWN 4844 0x1740, // 1740..1753; BUHID 4845 0x1754, // 1754..175F; UNKNOWN 4846 0x1760, // 1760..176C; TAGBANWA 4847 0x176D, // 176D ; UNKNOWN 4848 0x176E, // 176E..1770; TAGBANWA 4849 0x1771, // 1771 ; UNKNOWN 4850 0x1772, // 1772..1773; TAGBANWA 4851 0x1774, // 1774..177F; UNKNOWN 4852 0x1780, // 1780..17DD; KHMER 4853 0x17DE, // 17DE..17DF; UNKNOWN 4854 0x17E0, // 17E0..17E9; KHMER 4855 0x17EA, // 17EA..17EF; UNKNOWN 4856 0x17F0, // 17F0..17F9; KHMER 4857 0x17FA, // 17FA..17FF; UNKNOWN 4858 0x1800, // 1800..1801; MONGOLIAN 4859 0x1802, // 1802..1803; COMMON 4860 0x1804, // 1804 ; MONGOLIAN 4861 0x1805, // 1805 ; COMMON 4862 0x1806, // 1806..180E; MONGOLIAN 4863 0x180F, // 180F ; UNKNOWN 4864 0x1810, // 1810..1819; MONGOLIAN 4865 0x181A, // 181A..181F; UNKNOWN 4866 0x1820, // 1820..1877; MONGOLIAN 4867 0x1878, // 1878..187F; UNKNOWN 4868 0x1880, // 1880..18AA; MONGOLIAN 4869 0x18AB, // 18AB..18AF; UNKNOWN 4870 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 4871 0x18F6, // 18F6..18FF; UNKNOWN 4872 0x1900, // 1900..191E; LIMBU 4873 0x191F, // 191F ; UNKNOWN 4874 0x1920, // 1920..192B; LIMBU 4875 0x192C, // 192C..192F; UNKNOWN 4876 0x1930, // 1930..193B; LIMBU 4877 0x193C, // 193C..193F; UNKNOWN 4878 0x1940, // 1940 ; LIMBU 4879 0x1941, // 1941..1943; UNKNOWN 4880 0x1944, // 1944..194F; LIMBU 4881 0x1950, // 1950..196D; TAI_LE 4882 0x196E, // 196E..196F; UNKNOWN 4883 0x1970, // 1970..1974; TAI_LE 4884 0x1975, // 1975..197F; UNKNOWN 4885 0x1980, // 1980..19AB; NEW_TAI_LUE 4886 0x19AC, // 19AC..19AF; UNKNOWN 4887 0x19B0, // 19B0..19C9; NEW_TAI_LUE 4888 0x19CA, // 19CA..19CF; UNKNOWN 4889 0x19D0, // 19D0..19DA; NEW_TAI_LUE 4890 0x19DB, // 19DB..19DD; UNKNOWN 4891 0x19DE, // 19DE..19DF; NEW_TAI_LUE 4892 0x19E0, // 19E0..19FF; KHMER 4893 0x1A00, // 1A00..1A1B; BUGINESE 4894 0x1A1C, // 1A1C..1A1D; UNKNOWN 4895 0x1A1E, // 1A1E..1A1F; BUGINESE 4896 0x1A20, // 1A20..1A5E; TAI_THAM 4897 0x1A5F, // 1A5F ; UNKNOWN 4898 0x1A60, // 1A60..1A7C; TAI_THAM 4899 0x1A7D, // 1A7D..1A7E; UNKNOWN 4900 0x1A7F, // 1A7F..1A89; TAI_THAM 4901 0x1A8A, // 1A8A..1A8F; UNKNOWN 4902 0x1A90, // 1A90..1A99; TAI_THAM 4903 0x1A9A, // 1A9A..1A9F; UNKNOWN 4904 0x1AA0, // 1AA0..1AAD; TAI_THAM 4905 0x1AAE, // 1AAE..1AAF; UNKNOWN 4906 0x1AB0, // 1AB0..1ABE; INHERITED 4907 0x1ABF, // 1ABF..1AFF; UNKNOWN 4908 0x1B00, // 1B00..1B4B; BALINESE 4909 0x1B4C, // 1B4C..1B4F; UNKNOWN 4910 0x1B50, // 1B50..1B7C; BALINESE 4911 0x1B7D, // 1B7D..1B7F; UNKNOWN 4912 0x1B80, // 1B80..1BBF; SUNDANESE 4913 0x1BC0, // 1BC0..1BF3; BATAK 4914 0x1BF4, // 1BF4..1BFB; UNKNOWN 4915 0x1BFC, // 1BFC..1BFF; BATAK 4916 0x1C00, // 1C00..1C37; LEPCHA 4917 0x1C38, // 1C38..1C3A; UNKNOWN 4918 0x1C3B, // 1C3B..1C49; LEPCHA 4919 0x1C4A, // 1C4A..1C4C; UNKNOWN 4920 0x1C4D, // 1C4D..1C4F; LEPCHA 4921 0x1C50, // 1C50..1C7F; OL_CHIKI 4922 0x1C80, // 1C80..1CBF; UNKNOWN 4923 0x1CC0, // 1CC0..1CC7; SUNDANESE 4924 0x1CC8, // 1CC8..1CCF; UNKNOWN 4925 0x1CD0, // 1CD0..1CD2; INHERITED 4926 0x1CD3, // 1CD3 ; COMMON 4927 0x1CD4, // 1CD4..1CE0; INHERITED 4928 0x1CE1, // 1CE1 ; COMMON 4929 0x1CE2, // 1CE2..1CE8; INHERITED 4930 0x1CE9, // 1CE9..1CEC; COMMON 4931 0x1CED, // 1CED ; INHERITED 4932 0x1CEE, // 1CEE..1CF3; COMMON 4933 0x1CF4, // 1CF4 ; INHERITED 4934 0x1CF5, // 1CF5..1CF6; COMMON 4935 0x1CF7, // 1CF7 ; UNKNOWN 4936 0x1CF8, // 1CF8..1CF9; INHERITED 4937 0x1CFA, // 1CFA..1CFF; UNKNOWN 4938 0x1D00, // 1D00..1D25; LATIN 4939 0x1D26, // 1D26..1D2A; GREEK 4940 0x1D2B, // 1D2B ; CYRILLIC 4941 0x1D2C, // 1D2C..1D5C; LATIN 4942 0x1D5D, // 1D5D..1D61; GREEK 4943 0x1D62, // 1D62..1D65; LATIN 4944 0x1D66, // 1D66..1D6A; GREEK 4945 0x1D6B, // 1D6B..1D77; LATIN 4946 0x1D78, // 1D78 ; CYRILLIC 4947 0x1D79, // 1D79..1DBE; LATIN 4948 0x1DBF, // 1DBF ; GREEK 4949 0x1DC0, // 1DC0..1DF5; INHERITED 4950 0x1DF6, // 1DF6..1DFB; UNKNOWN 4951 0x1DFC, // 1DFC..1DFF; INHERITED 4952 0x1E00, // 1E00..1EFF; LATIN 4953 0x1F00, // 1F00..1F15; GREEK 4954 0x1F16, // 1F16..1F17; UNKNOWN 4955 0x1F18, // 1F18..1F1D; GREEK 4956 0x1F1E, // 1F1E..1F1F; UNKNOWN 4957 0x1F20, // 1F20..1F45; GREEK 4958 0x1F46, // 1F46..1F47; UNKNOWN 4959 0x1F48, // 1F48..1F4D; GREEK 4960 0x1F4E, // 1F4E..1F4F; UNKNOWN 4961 0x1F50, // 1F50..1F57; GREEK 4962 0x1F58, // 1F58 ; UNKNOWN 4963 0x1F59, // 1F59 ; GREEK 4964 0x1F5A, // 1F5A ; UNKNOWN 4965 0x1F5B, // 1F5B ; GREEK 4966 0x1F5C, // 1F5C ; UNKNOWN 4967 0x1F5D, // 1F5D ; GREEK 4968 0x1F5E, // 1F5E ; UNKNOWN 4969 0x1F5F, // 1F5F..1F7D; GREEK 4970 0x1F7E, // 1F7E..1F7F; UNKNOWN 4971 0x1F80, // 1F80..1FB4; GREEK 4972 0x1FB5, // 1FB5 ; UNKNOWN 4973 0x1FB6, // 1FB6..1FC4; GREEK 4974 0x1FC5, // 1FC5 ; UNKNOWN 4975 0x1FC6, // 1FC6..1FD3; GREEK 4976 0x1FD4, // 1FD4..1FD5; UNKNOWN 4977 0x1FD6, // 1FD6..1FDB; GREEK 4978 0x1FDC, // 1FDC ; UNKNOWN 4979 0x1FDD, // 1FDD..1FEF; GREEK 4980 0x1FF0, // 1FF0..1FF1; UNKNOWN 4981 0x1FF2, // 1FF2..1FF4; GREEK 4982 0x1FF5, // 1FF5 ; UNKNOWN 4983 0x1FF6, // 1FF6..1FFE; GREEK 4984 0x1FFF, // 1FFF ; UNKNOWN 4985 0x2000, // 2000..200B; COMMON 4986 0x200C, // 200C..200D; INHERITED 4987 0x200E, // 200E..2064; COMMON 4988 0x2065, // 2065 ; UNKNOWN 4989 0x2066, // 2066..2070; COMMON 4990 0x2071, // 2071 ; LATIN 4991 0x2072, // 2072..2073; UNKNOWN 4992 0x2074, // 2074..207E; COMMON 4993 0x207F, // 207F ; LATIN 4994 0x2080, // 2080..208E; COMMON 4995 0x208F, // 208F ; UNKNOWN 4996 0x2090, // 2090..209C; LATIN 4997 0x209D, // 209D..209F; UNKNOWN 4998 0x20A0, // 20A0..20BE; COMMON 4999 0x20BF, // 20BF..20CF; UNKNOWN 5000 0x20D0, // 20D0..20F0; INHERITED 5001 0x20F1, // 20F1..20FF; UNKNOWN 5002 0x2100, // 2100..2125; COMMON 5003 0x2126, // 2126 ; GREEK 5004 0x2127, // 2127..2129; COMMON 5005 0x212A, // 212A..212B; LATIN 5006 0x212C, // 212C..2131; COMMON 5007 0x2132, // 2132 ; LATIN 5008 0x2133, // 2133..214D; COMMON 5009 0x214E, // 214E ; LATIN 5010 0x214F, // 214F..215F; COMMON 5011 0x2160, // 2160..2188; LATIN 5012 0x2189, // 2189..218B; COMMON 5013 0x218C, // 218C..218F; UNKNOWN 5014 0x2190, // 2190..23FA; COMMON 5015 0x23FB, // 23FB..23FF; UNKNOWN 5016 0x2400, // 2400..2426; COMMON 5017 0x2427, // 2427..243F; UNKNOWN 5018 0x2440, // 2440..244A; COMMON 5019 0x244B, // 244B..245F; UNKNOWN 5020 0x2460, // 2460..27FF; COMMON 5021 0x2800, // 2800..28FF; BRAILLE 5022 0x2900, // 2900..2B73; COMMON 5023 0x2B74, // 2B74..2B75; UNKNOWN 5024 0x2B76, // 2B76..2B95; COMMON 5025 0x2B96, // 2B96..2B97; UNKNOWN 5026 0x2B98, // 2B98..2BB9; COMMON 5027 0x2BBA, // 2BBA..2BBC; UNKNOWN 5028 0x2BBD, // 2BBD..2BC8; COMMON 5029 0x2BC9, // 2BC9 ; UNKNOWN 5030 0x2BCA, // 2BCA..2BD1; COMMON 5031 0x2BD2, // 2BD2..2BEB; UNKNOWN 5032 0x2BEC, // 2BEC..2BEF; COMMON 5033 0x2BF0, // 2BF0..2BFF; UNKNOWN 5034 0x2C00, // 2C00..2C2E; GLAGOLITIC 5035 0x2C2F, // 2C2F ; UNKNOWN 5036 0x2C30, // 2C30..2C5E; GLAGOLITIC 5037 0x2C5F, // 2C5F ; UNKNOWN 5038 0x2C60, // 2C60..2C7F; LATIN 5039 0x2C80, // 2C80..2CF3; COPTIC 5040 0x2CF4, // 2CF4..2CF8; UNKNOWN 5041 0x2CF9, // 2CF9..2CFF; COPTIC 5042 0x2D00, // 2D00..2D25; GEORGIAN 5043 0x2D26, // 2D26 ; UNKNOWN 5044 0x2D27, // 2D27 ; GEORGIAN 5045 0x2D28, // 2D28..2D2C; UNKNOWN 5046 0x2D2D, // 2D2D ; GEORGIAN 5047 0x2D2E, // 2D2E..2D2F; UNKNOWN 5048 0x2D30, // 2D30..2D67; TIFINAGH 5049 0x2D68, // 2D68..2D6E; UNKNOWN 5050 0x2D6F, // 2D6F..2D70; TIFINAGH 5051 0x2D71, // 2D71..2D7E; UNKNOWN 5052 0x2D7F, // 2D7F ; TIFINAGH 5053 0x2D80, // 2D80..2D96; ETHIOPIC 5054 0x2D97, // 2D97..2D9F; UNKNOWN 5055 0x2DA0, // 2DA0..2DA6; ETHIOPIC 5056 0x2DA7, // 2DA7 ; UNKNOWN 5057 0x2DA8, // 2DA8..2DAE; ETHIOPIC 5058 0x2DAF, // 2DAF ; UNKNOWN 5059 0x2DB0, // 2DB0..2DB6; ETHIOPIC 5060 0x2DB7, // 2DB7 ; UNKNOWN 5061 0x2DB8, // 2DB8..2DBE; ETHIOPIC 5062 0x2DBF, // 2DBF ; UNKNOWN 5063 0x2DC0, // 2DC0..2DC6; ETHIOPIC 5064 0x2DC7, // 2DC7 ; UNKNOWN 5065 0x2DC8, // 2DC8..2DCE; ETHIOPIC 5066 0x2DCF, // 2DCF ; UNKNOWN 5067 0x2DD0, // 2DD0..2DD6; ETHIOPIC 5068 0x2DD7, // 2DD7 ; UNKNOWN 5069 0x2DD8, // 2DD8..2DDE; ETHIOPIC 5070 0x2DDF, // 2DDF ; UNKNOWN 5071 0x2DE0, // 2DE0..2DFF; CYRILLIC 5072 0x2E00, // 2E00..2E42; COMMON 5073 0x2E43, // 2E43..2E7F; UNKNOWN 5074 0x2E80, // 2E80..2E99; HAN 5075 0x2E9A, // 2E9A ; UNKNOWN 5076 0x2E9B, // 2E9B..2EF3; HAN 5077 0x2EF4, // 2EF4..2EFF; UNKNOWN 5078 0x2F00, // 2F00..2FD5; HAN 5079 0x2FD6, // 2FD6..2FEF; UNKNOWN 5080 0x2FF0, // 2FF0..2FFB; COMMON 5081 0x2FFC, // 2FFC..2FFF; UNKNOWN 5082 0x3000, // 3000..3004; COMMON 5083 0x3005, // 3005 ; HAN 5084 0x3006, // 3006 ; COMMON 5085 0x3007, // 3007 ; HAN 5086 0x3008, // 3008..3020; COMMON 5087 0x3021, // 3021..3029; HAN 5088 0x302A, // 302A..302D; INHERITED 5089 0x302E, // 302E..302F; HANGUL 5090 0x3030, // 3030..3037; COMMON 5091 0x3038, // 3038..303B; HAN 5092 0x303C, // 303C..303F; COMMON 5093 0x3040, // 3040 ; UNKNOWN 5094 0x3041, // 3041..3096; HIRAGANA 5095 0x3097, // 3097..3098; UNKNOWN 5096 0x3099, // 3099..309A; INHERITED 5097 0x309B, // 309B..309C; COMMON 5098 0x309D, // 309D..309F; HIRAGANA 5099 0x30A0, // 30A0 ; COMMON 5100 0x30A1, // 30A1..30FA; KATAKANA 5101 0x30FB, // 30FB..30FC; COMMON 5102 0x30FD, // 30FD..30FF; KATAKANA 5103 0x3100, // 3100..3104; UNKNOWN 5104 0x3105, // 3105..312D; BOPOMOFO 5105 0x312E, // 312E..3130; UNKNOWN 5106 0x3131, // 3131..318E; HANGUL 5107 0x318F, // 318F ; UNKNOWN 5108 0x3190, // 3190..319F; COMMON 5109 0x31A0, // 31A0..31BA; BOPOMOFO 5110 0x31BB, // 31BB..31BF; UNKNOWN 5111 0x31C0, // 31C0..31E3; COMMON 5112 0x31E4, // 31E4..31EF; UNKNOWN 5113 0x31F0, // 31F0..31FF; KATAKANA 5114 0x3200, // 3200..321E; HANGUL 5115 0x321F, // 321F ; UNKNOWN 5116 0x3220, // 3220..325F; COMMON 5117 0x3260, // 3260..327E; HANGUL 5118 0x327F, // 327F..32CF; COMMON 5119 0x32D0, // 32D0..32FE; KATAKANA 5120 0x32FF, // 32FF ; UNKNOWN 5121 0x3300, // 3300..3357; KATAKANA 5122 0x3358, // 3358..33FF; COMMON 5123 0x3400, // 3400..4DB5; HAN 5124 0x4DB6, // 4DB6..4DBF; UNKNOWN 5125 0x4DC0, // 4DC0..4DFF; COMMON 5126 0x4E00, // 4E00..9FD5; HAN 5127 0x9FD6, // 9FD6..9FFF; UNKNOWN 5128 0xA000, // A000..A48C; YI 5129 0xA48D, // A48D..A48F; UNKNOWN 5130 0xA490, // A490..A4C6; YI 5131 0xA4C7, // A4C7..A4CF; UNKNOWN 5132 0xA4D0, // A4D0..A4FF; LISU 5133 0xA500, // A500..A62B; VAI 5134 0xA62C, // A62C..A63F; UNKNOWN 5135 0xA640, // A640..A69F; CYRILLIC 5136 0xA6A0, // A6A0..A6F7; BAMUM 5137 0xA6F8, // A6F8..A6FF; UNKNOWN 5138 0xA700, // A700..A721; COMMON 5139 0xA722, // A722..A787; LATIN 5140 0xA788, // A788..A78A; COMMON 5141 0xA78B, // A78B..A7AD; LATIN 5142 0xA7AE, // A7AE..A7AF; UNKNOWN 5143 0xA7B0, // A7B0..A7B7; LATIN 5144 0xA7B8, // A7B8..A7F6; UNKNOWN 5145 0xA7F7, // A7F7..A7FF; LATIN 5146 0xA800, // A800..A82B; SYLOTI_NAGRI 5147 0xA82C, // A82C..A82F; UNKNOWN 5148 0xA830, // A830..A839; COMMON 5149 0xA83A, // A83A..A83F; UNKNOWN 5150 0xA840, // A840..A877; PHAGS_PA 5151 0xA878, // A878..A87F; UNKNOWN 5152 0xA880, // A880..A8C4; SAURASHTRA 5153 0xA8C5, // A8C5..A8CD; UNKNOWN 5154 0xA8CE, // A8CE..A8D9; SAURASHTRA 5155 0xA8DA, // A8DA..A8DF; UNKNOWN 5156 0xA8E0, // A8E0..A8FD; DEVANAGARI 5157 0xA8FE, // A8FE..A8FF; UNKNOWN 5158 0xA900, // A900..A92D; KAYAH_LI 5159 0xA92E, // A92E ; COMMON 5160 0xA92F, // A92F ; KAYAH_LI 5161 0xA930, // A930..A953; REJANG 5162 0xA954, // A954..A95E; UNKNOWN 5163 0xA95F, // A95F ; REJANG 5164 0xA960, // A960..A97C; HANGUL 5165 0xA97D, // A97D..A97F; UNKNOWN 5166 0xA980, // A980..A9CD; JAVANESE 5167 0xA9CE, // A9CE ; UNKNOWN 5168 0xA9CF, // A9CF ; COMMON 5169 0xA9D0, // A9D0..A9D9; JAVANESE 5170 0xA9DA, // A9DA..A9DD; UNKNOWN 5171 0xA9DE, // A9DE..A9DF; JAVANESE 5172 0xA9E0, // A9E0..A9FE; MYANMAR 5173 0xA9FF, // A9FF ; UNKNOWN 5174 0xAA00, // AA00..AA36; CHAM 5175 0xAA37, // AA37..AA3F; UNKNOWN 5176 0xAA40, // AA40..AA4D; CHAM 5177 0xAA4E, // AA4E..AA4F; UNKNOWN 5178 0xAA50, // AA50..AA59; CHAM 5179 0xAA5A, // AA5A..AA5B; UNKNOWN 5180 0xAA5C, // AA5C..AA5F; CHAM 5181 0xAA60, // AA60..AA7F; MYANMAR 5182 0xAA80, // AA80..AAC2; TAI_VIET 5183 0xAAC3, // AAC3..AADA; UNKNOWN 5184 0xAADB, // AADB..AADF; TAI_VIET 5185 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5186 0xAAF7, // AAF7..AB00; UNKNOWN 5187 0xAB01, // AB01..AB06; ETHIOPIC 5188 0xAB07, // AB07..AB08; UNKNOWN 5189 0xAB09, // AB09..AB0E; ETHIOPIC 5190 0xAB0F, // AB0F..AB10; UNKNOWN 5191 0xAB11, // AB11..AB16; ETHIOPIC 5192 0xAB17, // AB17..AB1F; UNKNOWN 5193 0xAB20, // AB20..AB26; ETHIOPIC 5194 0xAB27, // AB27 ; UNKNOWN 5195 0xAB28, // AB28..AB2E; ETHIOPIC 5196 0xAB2F, // AB2F ; UNKNOWN 5197 0xAB30, // AB30..AB5A; LATIN 5198 0xAB5B, // AB5B ; COMMON 5199 0xAB5C, // AB5C..AB64; LATIN 5200 0xAB65, // AB65 ; GREEK 5201 0xAB66, // AB66..AB6F; UNKNOWN 5202 0xAB70, // AB70..ABBF; CHEROKEE 5203 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5204 0xABEE, // ABEE..ABEF; UNKNOWN 5205 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5206 0xABFA, // ABFA..ABFF; UNKNOWN 5207 0xAC00, // AC00..D7A3; HANGUL 5208 0xD7A4, // D7A4..D7AF; UNKNOWN 5209 0xD7B0, // D7B0..D7C6; HANGUL 5210 0xD7C7, // D7C7..D7CA; UNKNOWN 5211 0xD7CB, // D7CB..D7FB; HANGUL 5212 0xD7FC, // D7FC..F8FF; UNKNOWN 5213 0xF900, // F900..FA6D; HAN 5214 0xFA6E, // FA6E..FA6F; UNKNOWN 5215 0xFA70, // FA70..FAD9; HAN 5216 0xFADA, // FADA..FAFF; UNKNOWN 5217 0xFB00, // FB00..FB06; LATIN 5218 0xFB07, // FB07..FB12; UNKNOWN 5219 0xFB13, // FB13..FB17; ARMENIAN 5220 0xFB18, // FB18..FB1C; UNKNOWN 5221 0xFB1D, // FB1D..FB36; HEBREW 5222 0xFB37, // FB37 ; UNKNOWN 5223 0xFB38, // FB38..FB3C; HEBREW 5224 0xFB3D, // FB3D ; UNKNOWN 5225 0xFB3E, // FB3E ; HEBREW 5226 0xFB3F, // FB3F ; UNKNOWN 5227 0xFB40, // FB40..FB41; HEBREW 5228 0xFB42, // FB42 ; UNKNOWN 5229 0xFB43, // FB43..FB44; HEBREW 5230 0xFB45, // FB45 ; UNKNOWN 5231 0xFB46, // FB46..FB4F; HEBREW 5232 0xFB50, // FB50..FBC1; ARABIC 5233 0xFBC2, // FBC2..FBD2; UNKNOWN 5234 0xFBD3, // FBD3..FD3D; ARABIC 5235 0xFD3E, // FD3E..FD3F; COMMON 5236 0xFD40, // FD40..FD4F; UNKNOWN 5237 0xFD50, // FD50..FD8F; ARABIC 5238 0xFD90, // FD90..FD91; UNKNOWN 5239 0xFD92, // FD92..FDC7; ARABIC 5240 0xFDC8, // FDC8..FDEF; UNKNOWN 5241 0xFDF0, // FDF0..FDFD; ARABIC 5242 0xFDFE, // FDFE..FDFF; UNKNOWN 5243 0xFE00, // FE00..FE0F; INHERITED 5244 0xFE10, // FE10..FE19; COMMON 5245 0xFE1A, // FE1A..FE1F; UNKNOWN 5246 0xFE20, // FE20..FE2D; INHERITED 5247 0xFE2E, // FE2E..FE2F; CYRILLIC 5248 0xFE30, // FE30..FE52; COMMON 5249 0xFE53, // FE53 ; UNKNOWN 5250 0xFE54, // FE54..FE66; COMMON 5251 0xFE67, // FE67 ; UNKNOWN 5252 0xFE68, // FE68..FE6B; COMMON 5253 0xFE6C, // FE6C..FE6F; UNKNOWN 5254 0xFE70, // FE70..FE74; ARABIC 5255 0xFE75, // FE75 ; UNKNOWN 5256 0xFE76, // FE76..FEFC; ARABIC 5257 0xFEFD, // FEFD..FEFE; UNKNOWN 5258 0xFEFF, // FEFF ; COMMON 5259 0xFF00, // FF00 ; UNKNOWN 5260 0xFF01, // FF01..FF20; COMMON 5261 0xFF21, // FF21..FF3A; LATIN 5262 0xFF3B, // FF3B..FF40; COMMON 5263 0xFF41, // FF41..FF5A; LATIN 5264 0xFF5B, // FF5B..FF65; COMMON 5265 0xFF66, // FF66..FF6F; KATAKANA 5266 0xFF70, // FF70 ; COMMON 5267 0xFF71, // FF71..FF9D; KATAKANA 5268 0xFF9E, // FF9E..FF9F; COMMON 5269 0xFFA0, // FFA0..FFBE; HANGUL 5270 0xFFBF, // FFBF..FFC1; UNKNOWN 5271 0xFFC2, // FFC2..FFC7; HANGUL 5272 0xFFC8, // FFC8..FFC9; UNKNOWN 5273 0xFFCA, // FFCA..FFCF; HANGUL 5274 0xFFD0, // FFD0..FFD1; UNKNOWN 5275 0xFFD2, // FFD2..FFD7; HANGUL 5276 0xFFD8, // FFD8..FFD9; UNKNOWN 5277 0xFFDA, // FFDA..FFDC; HANGUL 5278 0xFFDD, // FFDD..FFDF; UNKNOWN 5279 0xFFE0, // FFE0..FFE6; COMMON 5280 0xFFE7, // FFE7 ; UNKNOWN 5281 0xFFE8, // FFE8..FFEE; COMMON 5282 0xFFEF, // FFEF..FFF8; UNKNOWN 5283 0xFFF9, // FFF9..FFFD; COMMON 5284 0xFFFE, // FFFE..FFFF; UNKNOWN 5285 0x10000, // 10000..1000B; LINEAR_B 5286 0x1000C, // 1000C ; UNKNOWN 5287 0x1000D, // 1000D..10026; LINEAR_B 5288 0x10027, // 10027 ; UNKNOWN 5289 0x10028, // 10028..1003A; LINEAR_B 5290 0x1003B, // 1003B ; UNKNOWN 5291 0x1003C, // 1003C..1003D; LINEAR_B 5292 0x1003E, // 1003E ; UNKNOWN 5293 0x1003F, // 1003F..1004D; LINEAR_B 5294 0x1004E, // 1004E..1004F; UNKNOWN 5295 0x10050, // 10050..1005D; LINEAR_B 5296 0x1005E, // 1005E..1007F; UNKNOWN 5297 0x10080, // 10080..100FA; LINEAR_B 5298 0x100FB, // 100FB..100FF; UNKNOWN 5299 0x10100, // 10100..10102; COMMON 5300 0x10103, // 10103..10106; UNKNOWN 5301 0x10107, // 10107..10133; COMMON 5302 0x10134, // 10134..10136; UNKNOWN 5303 0x10137, // 10137..1013F; COMMON 5304 0x10140, // 10140..1018C; GREEK 5305 0x1018D, // 1018D..1018F; UNKNOWN 5306 0x10190, // 10190..1019B; COMMON 5307 0x1019C, // 1019C..1019F; UNKNOWN 5308 0x101A0, // 101A0 ; GREEK 5309 0x101A1, // 101A1..101CF; UNKNOWN 5310 0x101D0, // 101D0..101FC; COMMON 5311 0x101FD, // 101FD ; INHERITED 5312 0x101FE, // 101FE..1027F; UNKNOWN 5313 0x10280, // 10280..1029C; LYCIAN 5314 0x1029D, // 1029D..1029F; UNKNOWN 5315 0x102A0, // 102A0..102D0; CARIAN 5316 0x102D1, // 102D1..102DF; UNKNOWN 5317 0x102E0, // 102E0 ; INHERITED 5318 0x102E1, // 102E1..102FB; COMMON 5319 0x102FC, // 102FC..102FF; UNKNOWN 5320 0x10300, // 10300..10323; OLD_ITALIC 5321 0x10324, // 10324..1032F; UNKNOWN 5322 0x10330, // 10330..1034A; GOTHIC 5323 0x1034B, // 1034B..1034F; UNKNOWN 5324 0x10350, // 10350..1037A; OLD_PERMIC 5325 0x1037B, // 1037B..1037F; UNKNOWN 5326 0x10380, // 10380..1039D; UGARITIC 5327 0x1039E, // 1039E ; UNKNOWN 5328 0x1039F, // 1039F ; UGARITIC 5329 0x103A0, // 103A0..103C3; OLD_PERSIAN 5330 0x103C4, // 103C4..103C7; UNKNOWN 5331 0x103C8, // 103C8..103D5; OLD_PERSIAN 5332 0x103D6, // 103D6..103FF; UNKNOWN 5333 0x10400, // 10400..1044F; DESERET 5334 0x10450, // 10450..1047F; SHAVIAN 5335 0x10480, // 10480..1049D; OSMANYA 5336 0x1049E, // 1049E..1049F; UNKNOWN 5337 0x104A0, // 104A0..104A9; OSMANYA 5338 0x104AA, // 104AA..104FF; UNKNOWN 5339 0x10500, // 10500..10527; ELBASAN 5340 0x10528, // 10528..1052F; UNKNOWN 5341 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 5342 0x10564, // 10564..1056E; UNKNOWN 5343 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 5344 0x10570, // 10570..105FF; UNKNOWN 5345 0x10600, // 10600..10736; LINEAR_A 5346 0x10737, // 10737..1073F; UNKNOWN 5347 0x10740, // 10740..10755; LINEAR_A 5348 0x10756, // 10756..1075F; UNKNOWN 5349 0x10760, // 10760..10767; LINEAR_A 5350 0x10768, // 10768..107FF; UNKNOWN 5351 0x10800, // 10800..10805; CYPRIOT 5352 0x10806, // 10806..10807; UNKNOWN 5353 0x10808, // 10808 ; CYPRIOT 5354 0x10809, // 10809 ; UNKNOWN 5355 0x1080A, // 1080A..10835; CYPRIOT 5356 0x10836, // 10836 ; UNKNOWN 5357 0x10837, // 10837..10838; CYPRIOT 5358 0x10839, // 10839..1083B; UNKNOWN 5359 0x1083C, // 1083C ; CYPRIOT 5360 0x1083D, // 1083D..1083E; UNKNOWN 5361 0x1083F, // 1083F ; CYPRIOT 5362 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 5363 0x10856, // 10856 ; UNKNOWN 5364 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 5365 0x10860, // 10860..1087F; PALMYRENE 5366 0x10880, // 10880..1089E; NABATAEAN 5367 0x1089F, // 1089F..108A6; UNKNOWN 5368 0x108A7, // 108A7..108AF; NABATAEAN 5369 0x108B0, // 108B0..108DF; UNKNOWN 5370 0x108E0, // 108E0..108F2; HATRAN 5371 0x108F3, // 108F3 ; UNKNOWN 5372 0x108F4, // 108F4..108F5; HATRAN 5373 0x108F6, // 108F6..108FA; UNKNOWN 5374 0x108FB, // 108FB..108FF; HATRAN 5375 0x10900, // 10900..1091B; PHOENICIAN 5376 0x1091C, // 1091C..1091E; UNKNOWN 5377 0x1091F, // 1091F ; PHOENICIAN 5378 0x10920, // 10920..10939; LYDIAN 5379 0x1093A, // 1093A..1093E; UNKNOWN 5380 0x1093F, // 1093F ; LYDIAN 5381 0x10940, // 10940..1097F; UNKNOWN 5382 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 5383 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 5384 0x109B8, // 109B8..109BB; UNKNOWN 5385 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 5386 0x109D0, // 109D0..109D1; UNKNOWN 5387 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 5388 0x10A00, // 10A00..10A03; KHAROSHTHI 5389 0x10A04, // 10A04 ; UNKNOWN 5390 0x10A05, // 10A05..10A06; KHAROSHTHI 5391 0x10A07, // 10A07..10A0B; UNKNOWN 5392 0x10A0C, // 10A0C..10A13; KHAROSHTHI 5393 0x10A14, // 10A14 ; UNKNOWN 5394 0x10A15, // 10A15..10A17; KHAROSHTHI 5395 0x10A18, // 10A18 ; UNKNOWN 5396 0x10A19, // 10A19..10A33; KHAROSHTHI 5397 0x10A34, // 10A34..10A37; UNKNOWN 5398 0x10A38, // 10A38..10A3A; KHAROSHTHI 5399 0x10A3B, // 10A3B..10A3E; UNKNOWN 5400 0x10A3F, // 10A3F..10A47; KHAROSHTHI 5401 0x10A48, // 10A48..10A4F; UNKNOWN 5402 0x10A50, // 10A50..10A58; KHAROSHTHI 5403 0x10A59, // 10A59..10A5F; UNKNOWN 5404 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 5405 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 5406 0x10AA0, // 10AA0..10ABF; UNKNOWN 5407 0x10AC0, // 10AC0..10AE6; MANICHAEAN 5408 0x10AE7, // 10AE7..10AEA; UNKNOWN 5409 0x10AEB, // 10AEB..10AF6; MANICHAEAN 5410 0x10AF7, // 10AF7..10AFF; UNKNOWN 5411 0x10B00, // 10B00..10B35; AVESTAN 5412 0x10B36, // 10B36..10B38; UNKNOWN 5413 0x10B39, // 10B39..10B3F; AVESTAN 5414 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 5415 0x10B56, // 10B56..10B57; UNKNOWN 5416 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 5417 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 5418 0x10B73, // 10B73..10B77; UNKNOWN 5419 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 5420 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 5421 0x10B92, // 10B92..10B98; UNKNOWN 5422 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 5423 0x10B9D, // 10B9D..10BA8; UNKNOWN 5424 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 5425 0x10BB0, // 10BB0..10BFF; UNKNOWN 5426 0x10C00, // 10C00..10C48; OLD_TURKIC 5427 0x10C49, // 10C49..10C7F; UNKNOWN 5428 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 5429 0x10CB3, // 10CB3..10CBF; UNKNOWN 5430 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 5431 0x10CF3, // 10CF3..10CF9; UNKNOWN 5432 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 5433 0x10D00, // 10D00..10E5F; UNKNOWN 5434 0x10E60, // 10E60..10E7E; ARABIC 5435 0x10E7F, // 10E7F..10FFF; UNKNOWN 5436 0x11000, // 11000..1104D; BRAHMI 5437 0x1104E, // 1104E..11051; UNKNOWN 5438 0x11052, // 11052..1106F; BRAHMI 5439 0x11070, // 11070..1107E; UNKNOWN 5440 0x1107F, // 1107F ; BRAHMI 5441 0x11080, // 11080..110C1; KAITHI 5442 0x110C2, // 110C2..110CF; UNKNOWN 5443 0x110D0, // 110D0..110E8; SORA_SOMPENG 5444 0x110E9, // 110E9..110EF; UNKNOWN 5445 0x110F0, // 110F0..110F9; SORA_SOMPENG 5446 0x110FA, // 110FA..110FF; UNKNOWN 5447 0x11100, // 11100..11134; CHAKMA 5448 0x11135, // 11135 ; UNKNOWN 5449 0x11136, // 11136..11143; CHAKMA 5450 0x11144, // 11144..1114F; UNKNOWN 5451 0x11150, // 11150..11176; MAHAJANI 5452 0x11177, // 11177..1117F; UNKNOWN 5453 0x11180, // 11180..111CD; SHARADA 5454 0x111CE, // 111CE..111CF; UNKNOWN 5455 0x111D0, // 111D0..111DF; SHARADA 5456 0x111E0, // 111E0 ; UNKNOWN 5457 0x111E1, // 111E1..111F4; SINHALA 5458 0x111F5, // 111F5..111FF; UNKNOWN 5459 0x11200, // 11200..11211; KHOJKI 5460 0x11212, // 11212 ; UNKNOWN 5461 0x11213, // 11213..1123D; KHOJKI 5462 0x1123E, // 1123E..1127F; UNKNOWN 5463 0x11280, // 11280..11286; MULTANI 5464 0x11287, // 11287 ; UNKNOWN 5465 0x11288, // 11288 ; MULTANI 5466 0x11289, // 11289 ; UNKNOWN 5467 0x1128A, // 1128A..1128D; MULTANI 5468 0x1128E, // 1128E ; UNKNOWN 5469 0x1128F, // 1128F..1129D; MULTANI 5470 0x1129E, // 1129E ; UNKNOWN 5471 0x1129F, // 1129F..112A9; MULTANI 5472 0x112AA, // 112AA..112AF; UNKNOWN 5473 0x112B0, // 112B0..112EA; KHUDAWADI 5474 0x112EB, // 112EB..112EF; UNKNOWN 5475 0x112F0, // 112F0..112F9; KHUDAWADI 5476 0x112FA, // 112FA..112FF; UNKNOWN 5477 0x11300, // 11300..11303; GRANTHA 5478 0x11304, // 11304 ; UNKNOWN 5479 0x11305, // 11305..1130C; GRANTHA 5480 0x1130D, // 1130D..1130E; UNKNOWN 5481 0x1130F, // 1130F..11310; GRANTHA 5482 0x11311, // 11311..11312; UNKNOWN 5483 0x11313, // 11313..11328; GRANTHA 5484 0x11329, // 11329 ; UNKNOWN 5485 0x1132A, // 1132A..11330; GRANTHA 5486 0x11331, // 11331 ; UNKNOWN 5487 0x11332, // 11332..11333; GRANTHA 5488 0x11334, // 11334 ; UNKNOWN 5489 0x11335, // 11335..11339; GRANTHA 5490 0x1133A, // 1133A..1133B; UNKNOWN 5491 0x1133C, // 1133C..11344; GRANTHA 5492 0x11345, // 11345..11346; UNKNOWN 5493 0x11347, // 11347..11348; GRANTHA 5494 0x11349, // 11349..1134A; UNKNOWN 5495 0x1134B, // 1134B..1134D; GRANTHA 5496 0x1134E, // 1134E..1134F; UNKNOWN 5497 0x11350, // 11350 ; GRANTHA 5498 0x11351, // 11351..11356; UNKNOWN 5499 0x11357, // 11357 ; GRANTHA 5500 0x11358, // 11358..1135C; UNKNOWN 5501 0x1135D, // 1135D..11363; GRANTHA 5502 0x11364, // 11364..11365; UNKNOWN 5503 0x11366, // 11366..1136C; GRANTHA 5504 0x1136D, // 1136D..1136F; UNKNOWN 5505 0x11370, // 11370..11374; GRANTHA 5506 0x11375, // 11375..1147F; UNKNOWN 5507 0x11480, // 11480..114C7; TIRHUTA 5508 0x114C8, // 114C8..114CF; UNKNOWN 5509 0x114D0, // 114D0..114D9; TIRHUTA 5510 0x114DA, // 114DA..1157F; UNKNOWN 5511 0x11580, // 11580..115B5; SIDDHAM 5512 0x115B6, // 115B6..115B7; UNKNOWN 5513 0x115B8, // 115B8..115DD; SIDDHAM 5514 0x115DE, // 115DE..115FF; UNKNOWN 5515 0x11600, // 11600..11644; MODI 5516 0x11645, // 11645..1164F; UNKNOWN 5517 0x11650, // 11650..11659; MODI 5518 0x1165A, // 1165A..1167F; UNKNOWN 5519 0x11680, // 11680..116B7; TAKRI 5520 0x116B8, // 116B8..116BF; UNKNOWN 5521 0x116C0, // 116C0..116C9; TAKRI 5522 0x116CA, // 116CA..116FF; UNKNOWN 5523 0x11700, // 11700..11719; AHOM 5524 0x1171A, // 1171A..1171C; UNKNOWN 5525 0x1171D, // 1171D..1172B; AHOM 5526 0x1172C, // 1172C..1172F; UNKNOWN 5527 0x11730, // 11730..1173F; AHOM 5528 0x11740, // 11740..1189F; UNKNOWN 5529 0x118A0, // 118A0..118F2; WARANG_CITI 5530 0x118F3, // 118F3..118FE; UNKNOWN 5531 0x118FF, // 118FF ; WARANG_CITI 5532 0x11900, // 11900..11ABF; UNKNOWN 5533 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 5534 0x11AF9, // 11AF9..11FFF; UNKNOWN 5535 0x12000, // 12000..12399; CUNEIFORM 5536 0x1239A, // 1239A..123FF; UNKNOWN 5537 0x12400, // 12400..1246E; CUNEIFORM 5538 0x1246F, // 1246F ; UNKNOWN 5539 0x12470, // 12470..12474; CUNEIFORM 5540 0x12475, // 12475..1247F; UNKNOWN 5541 0x12480, // 12480..12543; CUNEIFORM 5542 0x12544, // 12544..12FFF; UNKNOWN 5543 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 5544 0x1342F, // 1342F..143FF; UNKNOWN 5545 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 5546 0x14647, // 14647..167FF; UNKNOWN 5547 0x16800, // 16800..16A38; BAMUM 5548 0x16A39, // 16A39..16A3F; UNKNOWN 5549 0x16A40, // 16A40..16A5E; MRO 5550 0x16A5F, // 16A5F ; UNKNOWN 5551 0x16A60, // 16A60..16A69; MRO 5552 0x16A6A, // 16A6A..16A6D; UNKNOWN 5553 0x16A6E, // 16A6E..16A6F; MRO 5554 0x16A70, // 16A70..16ACF; UNKNOWN 5555 0x16AD0, // 16AD0..16AED; BASSA_VAH 5556 0x16AEE, // 16AEE..16AEF; UNKNOWN 5557 0x16AF0, // 16AF0..16AF5; BASSA_VAH 5558 0x16AF6, // 16AF6..16AFF; UNKNOWN 5559 0x16B00, // 16B00..16B45; PAHAWH_HMONG 5560 0x16B46, // 16B46..16B4F; UNKNOWN 5561 0x16B50, // 16B50..16B59; PAHAWH_HMONG 5562 0x16B5A, // 16B5A ; UNKNOWN 5563 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 5564 0x16B62, // 16B62 ; UNKNOWN 5565 0x16B63, // 16B63..16B77; PAHAWH_HMONG 5566 0x16B78, // 16B78..16B7C; UNKNOWN 5567 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 5568 0x16B90, // 16B90..16EFF; UNKNOWN 5569 0x16F00, // 16F00..16F44; MIAO 5570 0x16F45, // 16F45..16F4F; UNKNOWN 5571 0x16F50, // 16F50..16F7E; MIAO 5572 0x16F7F, // 16F7F..16F8E; UNKNOWN 5573 0x16F8F, // 16F8F..16F9F; MIAO 5574 0x16FA0, // 16FA0..1AFFF; UNKNOWN 5575 0x1B000, // 1B000 ; KATAKANA 5576 0x1B001, // 1B001 ; HIRAGANA 5577 0x1B002, // 1B002..1BBFF; UNKNOWN 5578 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 5579 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 5580 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 5581 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 5582 0x1BC80, // 1BC80..1BC88; DUPLOYAN 5583 0x1BC89, // 1BC89..1BC8F; UNKNOWN 5584 0x1BC90, // 1BC90..1BC99; DUPLOYAN 5585 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 5586 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 5587 0x1BCA0, // 1BCA0..1BCA3; COMMON 5588 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 5589 0x1D000, // 1D000..1D0F5; COMMON 5590 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 5591 0x1D100, // 1D100..1D126; COMMON 5592 0x1D127, // 1D127..1D128; UNKNOWN 5593 0x1D129, // 1D129..1D166; COMMON 5594 0x1D167, // 1D167..1D169; INHERITED 5595 0x1D16A, // 1D16A..1D17A; COMMON 5596 0x1D17B, // 1D17B..1D182; INHERITED 5597 0x1D183, // 1D183..1D184; COMMON 5598 0x1D185, // 1D185..1D18B; INHERITED 5599 0x1D18C, // 1D18C..1D1A9; COMMON 5600 0x1D1AA, // 1D1AA..1D1AD; INHERITED 5601 0x1D1AE, // 1D1AE..1D1E8; COMMON 5602 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN 5603 0x1D200, // 1D200..1D245; GREEK 5604 0x1D246, // 1D246..1D2FF; UNKNOWN 5605 0x1D300, // 1D300..1D356; COMMON 5606 0x1D357, // 1D357..1D35F; UNKNOWN 5607 0x1D360, // 1D360..1D371; COMMON 5608 0x1D372, // 1D372..1D3FF; UNKNOWN 5609 0x1D400, // 1D400..1D454; COMMON 5610 0x1D455, // 1D455 ; UNKNOWN 5611 0x1D456, // 1D456..1D49C; COMMON 5612 0x1D49D, // 1D49D ; UNKNOWN 5613 0x1D49E, // 1D49E..1D49F; COMMON 5614 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 5615 0x1D4A2, // 1D4A2 ; COMMON 5616 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 5617 0x1D4A5, // 1D4A5..1D4A6; COMMON 5618 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 5619 0x1D4A9, // 1D4A9..1D4AC; COMMON 5620 0x1D4AD, // 1D4AD ; UNKNOWN 5621 0x1D4AE, // 1D4AE..1D4B9; COMMON 5622 0x1D4BA, // 1D4BA ; UNKNOWN 5623 0x1D4BB, // 1D4BB ; COMMON 5624 0x1D4BC, // 1D4BC ; UNKNOWN 5625 0x1D4BD, // 1D4BD..1D4C3; COMMON 5626 0x1D4C4, // 1D4C4 ; UNKNOWN 5627 0x1D4C5, // 1D4C5..1D505; COMMON 5628 0x1D506, // 1D506 ; UNKNOWN 5629 0x1D507, // 1D507..1D50A; COMMON 5630 0x1D50B, // 1D50B..1D50C; UNKNOWN 5631 0x1D50D, // 1D50D..1D514; COMMON 5632 0x1D515, // 1D515 ; UNKNOWN 5633 0x1D516, // 1D516..1D51C; COMMON 5634 0x1D51D, // 1D51D ; UNKNOWN 5635 0x1D51E, // 1D51E..1D539; COMMON 5636 0x1D53A, // 1D53A ; UNKNOWN 5637 0x1D53B, // 1D53B..1D53E; COMMON 5638 0x1D53F, // 1D53F ; UNKNOWN 5639 0x1D540, // 1D540..1D544; COMMON 5640 0x1D545, // 1D545 ; UNKNOWN 5641 0x1D546, // 1D546 ; COMMON 5642 0x1D547, // 1D547..1D549; UNKNOWN 5643 0x1D54A, // 1D54A..1D550; COMMON 5644 0x1D551, // 1D551 ; UNKNOWN 5645 0x1D552, // 1D552..1D6A5; COMMON 5646 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 5647 0x1D6A8, // 1D6A8..1D7CB; COMMON 5648 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 5649 0x1D7CE, // 1D7CE..1D7FF; COMMON 5650 0x1D800, // 1D800..1DA8B; SIGNWRITING 5651 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 5652 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 5653 0x1DAA0, // 1DAA0 ; UNKNOWN 5654 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 5655 0x1DAB0, // 1DAB0..1E7FF; UNKNOWN 5656 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 5657 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 5658 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 5659 0x1E8D7, // 1E8D7..1EDFF; UNKNOWN 5660 0x1EE00, // 1EE00..1EE03; ARABIC 5661 0x1EE04, // 1EE04 ; UNKNOWN 5662 0x1EE05, // 1EE05..1EE1F; ARABIC 5663 0x1EE20, // 1EE20 ; UNKNOWN 5664 0x1EE21, // 1EE21..1EE22; ARABIC 5665 0x1EE23, // 1EE23 ; UNKNOWN 5666 0x1EE24, // 1EE24 ; ARABIC 5667 0x1EE25, // 1EE25..1EE26; UNKNOWN 5668 0x1EE27, // 1EE27 ; ARABIC 5669 0x1EE28, // 1EE28 ; UNKNOWN 5670 0x1EE29, // 1EE29..1EE32; ARABIC 5671 0x1EE33, // 1EE33 ; UNKNOWN 5672 0x1EE34, // 1EE34..1EE37; ARABIC 5673 0x1EE38, // 1EE38 ; UNKNOWN 5674 0x1EE39, // 1EE39 ; ARABIC 5675 0x1EE3A, // 1EE3A ; UNKNOWN 5676 0x1EE3B, // 1EE3B ; ARABIC 5677 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 5678 0x1EE42, // 1EE42 ; ARABIC 5679 0x1EE43, // 1EE43..1EE46; UNKNOWN 5680 0x1EE47, // 1EE47 ; ARABIC 5681 0x1EE48, // 1EE48 ; UNKNOWN 5682 0x1EE49, // 1EE49 ; ARABIC 5683 0x1EE4A, // 1EE4A ; UNKNOWN 5684 0x1EE4B, // 1EE4B ; ARABIC 5685 0x1EE4C, // 1EE4C ; UNKNOWN 5686 0x1EE4D, // 1EE4D..1EE4F; ARABIC 5687 0x1EE50, // 1EE50 ; UNKNOWN 5688 0x1EE51, // 1EE51..1EE52; ARABIC 5689 0x1EE53, // 1EE53 ; UNKNOWN 5690 0x1EE54, // 1EE54 ; ARABIC 5691 0x1EE55, // 1EE55..1EE56; UNKNOWN 5692 0x1EE57, // 1EE57 ; ARABIC 5693 0x1EE58, // 1EE58 ; UNKNOWN 5694 0x1EE59, // 1EE59 ; ARABIC 5695 0x1EE5A, // 1EE5A ; UNKNOWN 5696 0x1EE5B, // 1EE5B ; ARABIC 5697 0x1EE5C, // 1EE5C ; UNKNOWN 5698 0x1EE5D, // 1EE5D ; ARABIC 5699 0x1EE5E, // 1EE5E ; UNKNOWN 5700 0x1EE5F, // 1EE5F ; ARABIC 5701 0x1EE60, // 1EE60 ; UNKNOWN 5702 0x1EE61, // 1EE61..1EE62; ARABIC 5703 0x1EE63, // 1EE63 ; UNKNOWN 5704 0x1EE64, // 1EE64 ; ARABIC 5705 0x1EE65, // 1EE65..1EE66; UNKNOWN 5706 0x1EE67, // 1EE67..1EE6A; ARABIC 5707 0x1EE6B, // 1EE6B ; UNKNOWN 5708 0x1EE6C, // 1EE6C..1EE72; ARABIC 5709 0x1EE73, // 1EE73 ; UNKNOWN 5710 0x1EE74, // 1EE74..1EE77; ARABIC 5711 0x1EE78, // 1EE78 ; UNKNOWN 5712 0x1EE79, // 1EE79..1EE7C; ARABIC 5713 0x1EE7D, // 1EE7D ; UNKNOWN 5714 0x1EE7E, // 1EE7E ; ARABIC 5715 0x1EE7F, // 1EE7F ; UNKNOWN 5716 0x1EE80, // 1EE80..1EE89; ARABIC 5717 0x1EE8A, // 1EE8A ; UNKNOWN 5718 0x1EE8B, // 1EE8B..1EE9B; ARABIC 5719 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 5720 0x1EEA1, // 1EEA1..1EEA3; ARABIC 5721 0x1EEA4, // 1EEA4 ; UNKNOWN 5722 0x1EEA5, // 1EEA5..1EEA9; ARABIC 5723 0x1EEAA, // 1EEAA ; UNKNOWN 5724 0x1EEAB, // 1EEAB..1EEBB; ARABIC 5725 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 5726 0x1EEF0, // 1EEF0..1EEF1; ARABIC 5727 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 5728 0x1F000, // 1F000..1F02B; COMMON 5729 0x1F02C, // 1F02C..1F02F; UNKNOWN 5730 0x1F030, // 1F030..1F093; COMMON 5731 0x1F094, // 1F094..1F09F; UNKNOWN 5732 0x1F0A0, // 1F0A0..1F0AE; COMMON 5733 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 5734 0x1F0B1, // 1F0B1..1F0BF; COMMON 5735 0x1F0C0, // 1F0C0 ; UNKNOWN 5736 0x1F0C1, // 1F0C1..1F0CF; COMMON 5737 0x1F0D0, // 1F0D0 ; UNKNOWN 5738 0x1F0D1, // 1F0D1..1F0F5; COMMON 5739 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 5740 0x1F100, // 1F100..1F10C; COMMON 5741 0x1F10D, // 1F10D..1F10F; UNKNOWN 5742 0x1F110, // 1F110..1F12E; COMMON 5743 0x1F12F, // 1F12F ; UNKNOWN 5744 0x1F130, // 1F130..1F16B; COMMON 5745 0x1F16C, // 1F16C..1F16F; UNKNOWN 5746 0x1F170, // 1F170..1F19A; COMMON 5747 0x1F19B, // 1F19B..1F1E5; UNKNOWN 5748 0x1F1E6, // 1F1E6..1F1FF; COMMON 5749 0x1F200, // 1F200 ; HIRAGANA 5750 0x1F201, // 1F201..1F202; COMMON 5751 0x1F203, // 1F203..1F20F; UNKNOWN 5752 0x1F210, // 1F210..1F23A; COMMON 5753 0x1F23B, // 1F23B..1F23F; UNKNOWN 5754 0x1F240, // 1F240..1F248; COMMON 5755 0x1F249, // 1F249..1F24F; UNKNOWN 5756 0x1F250, // 1F250..1F251; COMMON 5757 0x1F252, // 1F252..1F2FF; UNKNOWN 5758 0x1F300, // 1F300..1F579; COMMON 5759 0x1F57A, // 1F57A ; UNKNOWN 5760 0x1F57B, // 1F57B..1F5A3; COMMON 5761 0x1F5A4, // 1F5A4 ; UNKNOWN 5762 0x1F5A5, // 1F5A5..1F6D0; COMMON 5763 0x1F6D1, // 1F6D1..1F6DF; UNKNOWN 5764 0x1F6E0, // 1F6E0..1F6EC; COMMON 5765 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 5766 0x1F6F0, // 1F6F0..1F6F3; COMMON 5767 0x1F6F4, // 1F6F4..1F6FF; UNKNOWN 5768 0x1F700, // 1F700..1F773; COMMON 5769 0x1F774, // 1F774..1F77F; UNKNOWN 5770 0x1F780, // 1F780..1F7D4; COMMON 5771 0x1F7D5, // 1F7D5..1F7FF; UNKNOWN 5772 0x1F800, // 1F800..1F80B; COMMON 5773 0x1F80C, // 1F80C..1F80F; UNKNOWN 5774 0x1F810, // 1F810..1F847; COMMON 5775 0x1F848, // 1F848..1F84F; UNKNOWN 5776 0x1F850, // 1F850..1F859; COMMON 5777 0x1F85A, // 1F85A..1F85F; UNKNOWN 5778 0x1F860, // 1F860..1F887; COMMON 5779 0x1F888, // 1F888..1F88F; UNKNOWN 5780 0x1F890, // 1F890..1F8AD; COMMON 5781 0x1F8AE, // 1F8AE..1F90F; UNKNOWN 5782 0x1F910, // 1F910..1F918; COMMON 5783 0x1F919, // 1F919..1F97F; UNKNOWN 5784 0x1F980, // 1F980..1F984; COMMON 5785 0x1F985, // 1F985..1F9BF; UNKNOWN 5786 0x1F9C0, // 1F9C0 ; COMMON 5787 0x1F9C1, // 1F9C1..1FFFF; UNKNOWN 5788 0x20000, // 20000..2A6D6; HAN 5789 0x2A6D7, // 2A6D7..2A6FF; UNKNOWN 5790 0x2A700, // 2A700..2B734; HAN 5791 0x2B735, // 2B735..2B73F; UNKNOWN 5792 0x2B740, // 2B740..2B81D; HAN 5793 0x2B81E, // 2B81E..2B81F; UNKNOWN 5794 0x2B820, // 2B820..2CEA1; HAN 5795 0x2CEA2, // 2CEA2..2F7FF; UNKNOWN 5796 0x2F800, // 2F800..2FA1D; HAN 5797 0x2FA1E, // 2FA1E..E0000; UNKNOWN 5798 0xE0001, // E0001 ; COMMON 5799 0xE0002, // E0002..E001F; UNKNOWN 5800 0xE0020, // E0020..E007F; COMMON 5801 0xE0080, // E0080..E00FF; UNKNOWN 5802 0xE0100, // E0100..E01EF; INHERITED 5803 0xE01F0 // E01F0..10FFFF; UNKNOWN 5804 }; 5805 5806 private static final UnicodeScript[] scripts = { 5807 COMMON, // 0000..0040 5808 LATIN, // 0041..005A 5809 COMMON, // 005B..0060 5810 LATIN, // 0061..007A 5811 COMMON, // 007B..00A9 5812 LATIN, // 00AA 5813 COMMON, // 00AB..00B9 5814 LATIN, // 00BA 5815 COMMON, // 00BB..00BF 5816 LATIN, // 00C0..00D6 5817 COMMON, // 00D7 5818 LATIN, // 00D8..00F6 5819 COMMON, // 00F7 5820 LATIN, // 00F8..02B8 5821 COMMON, // 02B9..02DF 5822 LATIN, // 02E0..02E4 5823 COMMON, // 02E5..02E9 5824 BOPOMOFO, // 02EA..02EB 5825 COMMON, // 02EC..02FF 5826 INHERITED, // 0300..036F 5827 GREEK, // 0370..0373 5828 COMMON, // 0374 5829 GREEK, // 0375..0377 5830 UNKNOWN, // 0378..0379 5831 GREEK, // 037A..037D 5832 COMMON, // 037E 5833 GREEK, // 037F 5834 UNKNOWN, // 0380..0383 5835 GREEK, // 0384 5836 COMMON, // 0385 5837 GREEK, // 0386 5838 COMMON, // 0387 5839 GREEK, // 0388..038A 5840 UNKNOWN, // 038B 5841 GREEK, // 038C 5842 UNKNOWN, // 038D 5843 GREEK, // 038E..03A1 5844 UNKNOWN, // 03A2 5845 GREEK, // 03A3..03E1 5846 COPTIC, // 03E2..03EF 5847 GREEK, // 03F0..03FF 5848 CYRILLIC, // 0400..0484 5849 INHERITED, // 0485..0486 5850 CYRILLIC, // 0487..052F 5851 UNKNOWN, // 0530 5852 ARMENIAN, // 0531..0556 5853 UNKNOWN, // 0557..0558 5854 ARMENIAN, // 0559..055F 5855 UNKNOWN, // 0560 5856 ARMENIAN, // 0561..0587 5857 UNKNOWN, // 0588 5858 COMMON, // 0589 5859 ARMENIAN, // 058A 5860 UNKNOWN, // 058B..058C 5861 ARMENIAN, // 058D..058F 5862 UNKNOWN, // 0590 5863 HEBREW, // 0591..05C7 5864 UNKNOWN, // 05C8..05CF 5865 HEBREW, // 05D0..05EA 5866 UNKNOWN, // 05EB..05EF 5867 HEBREW, // 05F0..05F4 5868 UNKNOWN, // 05F5..05FF 5869 ARABIC, // 0600..0604 5870 COMMON, // 0605 5871 ARABIC, // 0606..060B 5872 COMMON, // 060C 5873 ARABIC, // 060D..061A 5874 COMMON, // 061B..061C 5875 UNKNOWN, // 061D 5876 ARABIC, // 061E 5877 COMMON, // 061F 5878 ARABIC, // 0620..063F 5879 COMMON, // 0640 5880 ARABIC, // 0641..064A 5881 INHERITED, // 064B..0655 5882 ARABIC, // 0656..066F 5883 INHERITED, // 0670 5884 ARABIC, // 0671..06DC 5885 COMMON, // 06DD 5886 ARABIC, // 06DE..06FF 5887 SYRIAC, // 0700..070D 5888 UNKNOWN, // 070E 5889 SYRIAC, // 070F..074A 5890 UNKNOWN, // 074B..074C 5891 SYRIAC, // 074D..074F 5892 ARABIC, // 0750..077F 5893 THAANA, // 0780..07B1 5894 UNKNOWN, // 07B2..07BF 5895 NKO, // 07C0..07FA 5896 UNKNOWN, // 07FB..07FF 5897 SAMARITAN, // 0800..082D 5898 UNKNOWN, // 082E..082F 5899 SAMARITAN, // 0830..083E 5900 UNKNOWN, // 083F 5901 MANDAIC, // 0840..085B 5902 UNKNOWN, // 085C..085D 5903 MANDAIC, // 085E 5904 UNKNOWN, // 085F..089F 5905 ARABIC, // 08A0..08B4 5906 UNKNOWN, // 08B5..08E2 5907 ARABIC, // 08E3..08FF 5908 DEVANAGARI, // 0900..0950 5909 INHERITED, // 0951..0952 5910 DEVANAGARI, // 0953..0963 5911 COMMON, // 0964..0965 5912 DEVANAGARI, // 0966..097F 5913 BENGALI, // 0980..0983 5914 UNKNOWN, // 0984 5915 BENGALI, // 0985..098C 5916 UNKNOWN, // 098D..098E 5917 BENGALI, // 098F..0990 5918 UNKNOWN, // 0991..0992 5919 BENGALI, // 0993..09A8 5920 UNKNOWN, // 09A9 5921 BENGALI, // 09AA..09B0 5922 UNKNOWN, // 09B1 5923 BENGALI, // 09B2 5924 UNKNOWN, // 09B3..09B5 5925 BENGALI, // 09B6..09B9 5926 UNKNOWN, // 09BA..09BB 5927 BENGALI, // 09BC..09C4 5928 UNKNOWN, // 09C5..09C6 5929 BENGALI, // 09C7..09C8 5930 UNKNOWN, // 09C9..09CA 5931 BENGALI, // 09CB..09CE 5932 UNKNOWN, // 09CF..09D6 5933 BENGALI, // 09D7 5934 UNKNOWN, // 09D8..09DB 5935 BENGALI, // 09DC..09DD 5936 UNKNOWN, // 09DE 5937 BENGALI, // 09DF..09E3 5938 UNKNOWN, // 09E4..09E5 5939 BENGALI, // 09E6..09FB 5940 UNKNOWN, // 09FC..0A00 5941 GURMUKHI, // 0A01..0A03 5942 UNKNOWN, // 0A04 5943 GURMUKHI, // 0A05..0A0A 5944 UNKNOWN, // 0A0B..0A0E 5945 GURMUKHI, // 0A0F..0A10 5946 UNKNOWN, // 0A11..0A12 5947 GURMUKHI, // 0A13..0A28 5948 UNKNOWN, // 0A29 5949 GURMUKHI, // 0A2A..0A30 5950 UNKNOWN, // 0A31 5951 GURMUKHI, // 0A32..0A33 5952 UNKNOWN, // 0A34 5953 GURMUKHI, // 0A35..0A36 5954 UNKNOWN, // 0A37 5955 GURMUKHI, // 0A38..0A39 5956 UNKNOWN, // 0A3A..0A3B 5957 GURMUKHI, // 0A3C 5958 UNKNOWN, // 0A3D 5959 GURMUKHI, // 0A3E..0A42 5960 UNKNOWN, // 0A43..0A46 5961 GURMUKHI, // 0A47..0A48 5962 UNKNOWN, // 0A49..0A4A 5963 GURMUKHI, // 0A4B..0A4D 5964 UNKNOWN, // 0A4E..0A50 5965 GURMUKHI, // 0A51 5966 UNKNOWN, // 0A52..0A58 5967 GURMUKHI, // 0A59..0A5C 5968 UNKNOWN, // 0A5D 5969 GURMUKHI, // 0A5E 5970 UNKNOWN, // 0A5F..0A65 5971 GURMUKHI, // 0A66..0A75 5972 UNKNOWN, // 0A76..0A80 5973 GUJARATI, // 0A81..0A83 5974 UNKNOWN, // 0A84 5975 GUJARATI, // 0A85..0A8D 5976 UNKNOWN, // 0A8E 5977 GUJARATI, // 0A8F..0A91 5978 UNKNOWN, // 0A92 5979 GUJARATI, // 0A93..0AA8 5980 UNKNOWN, // 0AA9 5981 GUJARATI, // 0AAA..0AB0 5982 UNKNOWN, // 0AB1 5983 GUJARATI, // 0AB2..0AB3 5984 UNKNOWN, // 0AB4 5985 GUJARATI, // 0AB5..0AB9 5986 UNKNOWN, // 0ABA..0ABB 5987 GUJARATI, // 0ABC..0AC5 5988 UNKNOWN, // 0AC6 5989 GUJARATI, // 0AC7..0AC9 5990 UNKNOWN, // 0ACA 5991 GUJARATI, // 0ACB..0ACD 5992 UNKNOWN, // 0ACE..0ACF 5993 GUJARATI, // 0AD0 5994 UNKNOWN, // 0AD1..0ADF 5995 GUJARATI, // 0AE0..0AE3 5996 UNKNOWN, // 0AE4..0AE5 5997 GUJARATI, // 0AE6..0AF1 5998 UNKNOWN, // 0AF2..0AF8 5999 GUJARATI, // 0AF9 6000 UNKNOWN, // 0AFA..0B00 6001 ORIYA, // 0B01..0B03 6002 UNKNOWN, // 0B04 6003 ORIYA, // 0B05..0B0C 6004 UNKNOWN, // 0B0D..0B0E 6005 ORIYA, // 0B0F..0B10 6006 UNKNOWN, // 0B11..0B12 6007 ORIYA, // 0B13..0B28 6008 UNKNOWN, // 0B29 6009 ORIYA, // 0B2A..0B30 6010 UNKNOWN, // 0B31 6011 ORIYA, // 0B32..0B33 6012 UNKNOWN, // 0B34 6013 ORIYA, // 0B35..0B39 6014 UNKNOWN, // 0B3A..0B3B 6015 ORIYA, // 0B3C..0B44 6016 UNKNOWN, // 0B45..0B46 6017 ORIYA, // 0B47..0B48 6018 UNKNOWN, // 0B49..0B4A 6019 ORIYA, // 0B4B..0B4D 6020 UNKNOWN, // 0B4E..0B55 6021 ORIYA, // 0B56..0B57 6022 UNKNOWN, // 0B58..0B5B 6023 ORIYA, // 0B5C..0B5D 6024 UNKNOWN, // 0B5E 6025 ORIYA, // 0B5F..0B63 6026 UNKNOWN, // 0B64..0B65 6027 ORIYA, // 0B66..0B77 6028 UNKNOWN, // 0B78..0B81 6029 TAMIL, // 0B82..0B83 6030 UNKNOWN, // 0B84 6031 TAMIL, // 0B85..0B8A 6032 UNKNOWN, // 0B8B..0B8D 6033 TAMIL, // 0B8E..0B90 6034 UNKNOWN, // 0B91 6035 TAMIL, // 0B92..0B95 6036 UNKNOWN, // 0B96..0B98 6037 TAMIL, // 0B99..0B9A 6038 UNKNOWN, // 0B9B 6039 TAMIL, // 0B9C 6040 UNKNOWN, // 0B9D 6041 TAMIL, // 0B9E..0B9F 6042 UNKNOWN, // 0BA0..0BA2 6043 TAMIL, // 0BA3..0BA4 6044 UNKNOWN, // 0BA5..0BA7 6045 TAMIL, // 0BA8..0BAA 6046 UNKNOWN, // 0BAB..0BAD 6047 TAMIL, // 0BAE..0BB9 6048 UNKNOWN, // 0BBA..0BBD 6049 TAMIL, // 0BBE..0BC2 6050 UNKNOWN, // 0BC3..0BC5 6051 TAMIL, // 0BC6..0BC8 6052 UNKNOWN, // 0BC9 6053 TAMIL, // 0BCA..0BCD 6054 UNKNOWN, // 0BCE..0BCF 6055 TAMIL, // 0BD0 6056 UNKNOWN, // 0BD1..0BD6 6057 TAMIL, // 0BD7 6058 UNKNOWN, // 0BD8..0BE5 6059 TAMIL, // 0BE6..0BFA 6060 UNKNOWN, // 0BFB..0BFF 6061 TELUGU, // 0C00..0C03 6062 UNKNOWN, // 0C04 6063 TELUGU, // 0C05..0C0C 6064 UNKNOWN, // 0C0D 6065 TELUGU, // 0C0E..0C10 6066 UNKNOWN, // 0C11 6067 TELUGU, // 0C12..0C28 6068 UNKNOWN, // 0C29 6069 TELUGU, // 0C2A..0C39 6070 UNKNOWN, // 0C3A..0C3C 6071 TELUGU, // 0C3D..0C44 6072 UNKNOWN, // 0C45 6073 TELUGU, // 0C46..0C48 6074 UNKNOWN, // 0C49 6075 TELUGU, // 0C4A..0C4D 6076 UNKNOWN, // 0C4E..0C54 6077 TELUGU, // 0C55..0C56 6078 UNKNOWN, // 0C57 6079 TELUGU, // 0C58..0C5A 6080 UNKNOWN, // 0C5B..0C5F 6081 TELUGU, // 0C60..0C63 6082 UNKNOWN, // 0C64..0C65 6083 TELUGU, // 0C66..0C6F 6084 UNKNOWN, // 0C70..0C77 6085 TELUGU, // 0C78..0C7F 6086 UNKNOWN, // 0C80 6087 KANNADA, // 0C81..0C83 6088 UNKNOWN, // 0C84 6089 KANNADA, // 0C85..0C8C 6090 UNKNOWN, // 0C8D 6091 KANNADA, // 0C8E..0C90 6092 UNKNOWN, // 0C91 6093 KANNADA, // 0C92..0CA8 6094 UNKNOWN, // 0CA9 6095 KANNADA, // 0CAA..0CB3 6096 UNKNOWN, // 0CB4 6097 KANNADA, // 0CB5..0CB9 6098 UNKNOWN, // 0CBA..0CBB 6099 KANNADA, // 0CBC..0CC4 6100 UNKNOWN, // 0CC5 6101 KANNADA, // 0CC6..0CC8 6102 UNKNOWN, // 0CC9 6103 KANNADA, // 0CCA..0CCD 6104 UNKNOWN, // 0CCE..0CD4 6105 KANNADA, // 0CD5..0CD6 6106 UNKNOWN, // 0CD7..0CDD 6107 KANNADA, // 0CDE 6108 UNKNOWN, // 0CDF 6109 KANNADA, // 0CE0..0CE3 6110 UNKNOWN, // 0CE4..0CE5 6111 KANNADA, // 0CE6..0CEF 6112 UNKNOWN, // 0CF0 6113 KANNADA, // 0CF1..0CF2 6114 UNKNOWN, // 0CF3..0D00 6115 MALAYALAM, // 0D01..0D03 6116 UNKNOWN, // 0D04 6117 MALAYALAM, // 0D05..0D0C 6118 UNKNOWN, // 0D0D 6119 MALAYALAM, // 0D0E..0D10 6120 UNKNOWN, // 0D11 6121 MALAYALAM, // 0D12..0D3A 6122 UNKNOWN, // 0D3B..0D3C 6123 MALAYALAM, // 0D3D..0D44 6124 UNKNOWN, // 0D45 6125 MALAYALAM, // 0D46..0D48 6126 UNKNOWN, // 0D49 6127 MALAYALAM, // 0D4A..0D4E 6128 UNKNOWN, // 0D4F..0D56 6129 MALAYALAM, // 0D57 6130 UNKNOWN, // 0D58..0D5E 6131 MALAYALAM, // 0D5F..0D63 6132 UNKNOWN, // 0D64..0D65 6133 MALAYALAM, // 0D66..0D75 6134 UNKNOWN, // 0D76..0D78 6135 MALAYALAM, // 0D79..0D7F 6136 UNKNOWN, // 0D80..0D81 6137 SINHALA, // 0D82..0D83 6138 UNKNOWN, // 0D84 6139 SINHALA, // 0D85..0D96 6140 UNKNOWN, // 0D97..0D99 6141 SINHALA, // 0D9A..0DB1 6142 UNKNOWN, // 0DB2 6143 SINHALA, // 0DB3..0DBB 6144 UNKNOWN, // 0DBC 6145 SINHALA, // 0DBD 6146 UNKNOWN, // 0DBE..0DBF 6147 SINHALA, // 0DC0..0DC6 6148 UNKNOWN, // 0DC7..0DC9 6149 SINHALA, // 0DCA 6150 UNKNOWN, // 0DCB..0DCE 6151 SINHALA, // 0DCF..0DD4 6152 UNKNOWN, // 0DD5 6153 SINHALA, // 0DD6 6154 UNKNOWN, // 0DD7 6155 SINHALA, // 0DD8..0DDF 6156 UNKNOWN, // 0DE0..0DE5 6157 SINHALA, // 0DE6..0DEF 6158 UNKNOWN, // 0DF0..0DF1 6159 SINHALA, // 0DF2..0DF4 6160 UNKNOWN, // 0DF5..0E00 6161 THAI, // 0E01..0E3A 6162 UNKNOWN, // 0E3B..0E3E 6163 COMMON, // 0E3F 6164 THAI, // 0E40..0E5B 6165 UNKNOWN, // 0E5C..0E80 6166 LAO, // 0E81..0E82 6167 UNKNOWN, // 0E83 6168 LAO, // 0E84 6169 UNKNOWN, // 0E85..0E86 6170 LAO, // 0E87..0E88 6171 UNKNOWN, // 0E89 6172 LAO, // 0E8A 6173 UNKNOWN, // 0E8B..0E8C 6174 LAO, // 0E8D 6175 UNKNOWN, // 0E8E..0E93 6176 LAO, // 0E94..0E97 6177 UNKNOWN, // 0E98 6178 LAO, // 0E99..0E9F 6179 UNKNOWN, // 0EA0 6180 LAO, // 0EA1..0EA3 6181 UNKNOWN, // 0EA4 6182 LAO, // 0EA5 6183 UNKNOWN, // 0EA6 6184 LAO, // 0EA7 6185 UNKNOWN, // 0EA8..0EA9 6186 LAO, // 0EAA..0EAB 6187 UNKNOWN, // 0EAC 6188 LAO, // 0EAD..0EB9 6189 UNKNOWN, // 0EBA 6190 LAO, // 0EBB..0EBD 6191 UNKNOWN, // 0EBE..0EBF 6192 LAO, // 0EC0..0EC4 6193 UNKNOWN, // 0EC5 6194 LAO, // 0EC6 6195 UNKNOWN, // 0EC7 6196 LAO, // 0EC8..0ECD 6197 UNKNOWN, // 0ECE..0ECF 6198 LAO, // 0ED0..0ED9 6199 UNKNOWN, // 0EDA..0EDB 6200 LAO, // 0EDC..0EDF 6201 UNKNOWN, // 0EE0..0EFF 6202 TIBETAN, // 0F00..0F47 6203 UNKNOWN, // 0F48 6204 TIBETAN, // 0F49..0F6C 6205 UNKNOWN, // 0F6D..0F70 6206 TIBETAN, // 0F71..0F97 6207 UNKNOWN, // 0F98 6208 TIBETAN, // 0F99..0FBC 6209 UNKNOWN, // 0FBD 6210 TIBETAN, // 0FBE..0FCC 6211 UNKNOWN, // 0FCD 6212 TIBETAN, // 0FCE..0FD4 6213 COMMON, // 0FD5..0FD8 6214 TIBETAN, // 0FD9..0FDA 6215 UNKNOWN, // 0FDB..FFF 6216 MYANMAR, // 1000..109F 6217 GEORGIAN, // 10A0..10C5 6218 UNKNOWN, // 10C6 6219 GEORGIAN, // 10C7 6220 UNKNOWN, // 10C8..10CC 6221 GEORGIAN, // 10CD 6222 UNKNOWN, // 10CE..10CF 6223 GEORGIAN, // 10D0..10FA 6224 COMMON, // 10FB 6225 GEORGIAN, // 10FC..10FF 6226 HANGUL, // 1100..11FF 6227 ETHIOPIC, // 1200..1248 6228 UNKNOWN, // 1249 6229 ETHIOPIC, // 124A..124D 6230 UNKNOWN, // 124E..124F 6231 ETHIOPIC, // 1250..1256 6232 UNKNOWN, // 1257 6233 ETHIOPIC, // 1258 6234 UNKNOWN, // 1259 6235 ETHIOPIC, // 125A..125D 6236 UNKNOWN, // 125E..125F 6237 ETHIOPIC, // 1260..1288 6238 UNKNOWN, // 1289 6239 ETHIOPIC, // 128A..128D 6240 UNKNOWN, // 128E..128F 6241 ETHIOPIC, // 1290..12B0 6242 UNKNOWN, // 12B1 6243 ETHIOPIC, // 12B2..12B5 6244 UNKNOWN, // 12B6..12B7 6245 ETHIOPIC, // 12B8..12BE 6246 UNKNOWN, // 12BF 6247 ETHIOPIC, // 12C0 6248 UNKNOWN, // 12C1 6249 ETHIOPIC, // 12C2..12C5 6250 UNKNOWN, // 12C6..12C7 6251 ETHIOPIC, // 12C8..12D6 6252 UNKNOWN, // 12D7 6253 ETHIOPIC, // 12D8..1310 6254 UNKNOWN, // 1311 6255 ETHIOPIC, // 1312..1315 6256 UNKNOWN, // 1316..1317 6257 ETHIOPIC, // 1318..135A 6258 UNKNOWN, // 135B..135C 6259 ETHIOPIC, // 135D..137C 6260 UNKNOWN, // 137D..137F 6261 ETHIOPIC, // 1380..1399 6262 UNKNOWN, // 139A..139F 6263 CHEROKEE, // 13A0..13F5 6264 UNKNOWN, // 13F6..13F7 6265 CHEROKEE, // 13F8..13FD 6266 UNKNOWN, // 13FE..13FF 6267 CANADIAN_ABORIGINAL, // 1400..167F 6268 OGHAM, // 1680..169C 6269 UNKNOWN, // 169D..169F 6270 RUNIC, // 16A0..16EA 6271 COMMON, // 16EB..16ED 6272 RUNIC, // 16EE..16F8 6273 UNKNOWN, // 16F9..16FF 6274 TAGALOG, // 1700..170C 6275 UNKNOWN, // 170D 6276 TAGALOG, // 170E..1714 6277 UNKNOWN, // 1715..171F 6278 HANUNOO, // 1720..1734 6279 COMMON, // 1735..1736 6280 UNKNOWN, // 1737..173F 6281 BUHID, // 1740..1753 6282 UNKNOWN, // 1754..175F 6283 TAGBANWA, // 1760..176C 6284 UNKNOWN, // 176D 6285 TAGBANWA, // 176E..1770 6286 UNKNOWN, // 1771 6287 TAGBANWA, // 1772..1773 6288 UNKNOWN, // 1774..177F 6289 KHMER, // 1780..17DD 6290 UNKNOWN, // 17DE..17DF 6291 KHMER, // 17E0..17E9 6292 UNKNOWN, // 17EA..17EF 6293 KHMER, // 17F0..17F9 6294 UNKNOWN, // 17FA..17FF 6295 MONGOLIAN, // 1800..1801 6296 COMMON, // 1802..1803 6297 MONGOLIAN, // 1804 6298 COMMON, // 1805 6299 MONGOLIAN, // 1806..180E 6300 UNKNOWN, // 180F 6301 MONGOLIAN, // 1810..1819 6302 UNKNOWN, // 181A..181F 6303 MONGOLIAN, // 1820..1877 6304 UNKNOWN, // 1878..187F 6305 MONGOLIAN, // 1880..18AA 6306 UNKNOWN, // 18AB..18AF 6307 CANADIAN_ABORIGINAL, // 18B0..18F5 6308 UNKNOWN, // 18F6..18FF 6309 LIMBU, // 1900..191E 6310 UNKNOWN, // 191F 6311 LIMBU, // 1920..192B 6312 UNKNOWN, // 192C..192F 6313 LIMBU, // 1930..193B 6314 UNKNOWN, // 193C..193F 6315 LIMBU, // 1940 6316 UNKNOWN, // 1941..1943 6317 LIMBU, // 1944..194F 6318 TAI_LE, // 1950..196D 6319 UNKNOWN, // 196E..196F 6320 TAI_LE, // 1970..1974 6321 UNKNOWN, // 1975..197F 6322 NEW_TAI_LUE, // 1980..19AB 6323 UNKNOWN, // 19AC..19AF 6324 NEW_TAI_LUE, // 19B0..19C9 6325 UNKNOWN, // 19CA..19CF 6326 NEW_TAI_LUE, // 19D0..19DA 6327 UNKNOWN, // 19DB..19DD 6328 NEW_TAI_LUE, // 19DE..19DF 6329 KHMER, // 19E0..19FF 6330 BUGINESE, // 1A00..1A1B 6331 UNKNOWN, // 1A1C..1A1D 6332 BUGINESE, // 1A1E..1A1F 6333 TAI_THAM, // 1A20..1A5E 6334 UNKNOWN, // 1A5F 6335 TAI_THAM, // 1A60..1A7C 6336 UNKNOWN, // 1A7D..1A7E 6337 TAI_THAM, // 1A7F..1A89 6338 UNKNOWN, // 1A8A..1A8F 6339 TAI_THAM, // 1A90..1A99 6340 UNKNOWN, // 1A9A..1A9F 6341 TAI_THAM, // 1AA0..1AAD 6342 UNKNOWN, // 1AAE..1AAF 6343 INHERITED, // 1AB0..1ABE 6344 UNKNOWN, // 1ABF..1AFF 6345 BALINESE, // 1B00..1B4B 6346 UNKNOWN, // 1B4C..1B4F 6347 BALINESE, // 1B50..1B7C 6348 UNKNOWN, // 1B7D..1B7F 6349 SUNDANESE, // 1B80..1BBF 6350 BATAK, // 1BC0..1BF3 6351 UNKNOWN, // 1BF4..1BFB 6352 BATAK, // 1BFC..1BFF 6353 LEPCHA, // 1C00..1C37 6354 UNKNOWN, // 1C38..1C3A 6355 LEPCHA, // 1C3B..1C49 6356 UNKNOWN, // 1C4A..1C4C 6357 LEPCHA, // 1C4D..1C4F 6358 OL_CHIKI, // 1C50..1C7F 6359 UNKNOWN, // 1C80..1CBF 6360 SUNDANESE, // 1CC0..1CC7 6361 UNKNOWN, // 1CC8..1CCF 6362 INHERITED, // 1CD0..1CD2 6363 COMMON, // 1CD3 6364 INHERITED, // 1CD4..1CE0 6365 COMMON, // 1CE1 6366 INHERITED, // 1CE2..1CE8 6367 COMMON, // 1CE9..1CEC 6368 INHERITED, // 1CED 6369 COMMON, // 1CEE..1CF3 6370 INHERITED, // 1CF4 6371 COMMON, // 1CF5..1CF6 6372 UNKNOWN, // 1CF7 6373 INHERITED, // 1CF8..1CF9 6374 UNKNOWN, // 1CFA..1CFF 6375 LATIN, // 1D00..1D25 6376 GREEK, // 1D26..1D2A 6377 CYRILLIC, // 1D2B 6378 LATIN, // 1D2C..1D5C 6379 GREEK, // 1D5D..1D61 6380 LATIN, // 1D62..1D65 6381 GREEK, // 1D66..1D6A 6382 LATIN, // 1D6B..1D77 6383 CYRILLIC, // 1D78 6384 LATIN, // 1D79..1DBE 6385 GREEK, // 1DBF 6386 INHERITED, // 1DC0..1DF5 6387 UNKNOWN, // 1DF6..1DFB 6388 INHERITED, // 1DFC..1DFF 6389 LATIN, // 1E00..1EFF 6390 GREEK, // 1F00..1F15 6391 UNKNOWN, // 1F16..1F17 6392 GREEK, // 1F18..1F1D 6393 UNKNOWN, // 1F1E..1F1F 6394 GREEK, // 1F20..1F45 6395 UNKNOWN, // 1F46..1F47 6396 GREEK, // 1F48..1F4D 6397 UNKNOWN, // 1F4E..1F4F 6398 GREEK, // 1F50..1F57 6399 UNKNOWN, // 1F58 6400 GREEK, // 1F59 6401 UNKNOWN, // 1F5A 6402 GREEK, // 1F5B 6403 UNKNOWN, // 1F5C 6404 GREEK, // 1F5D 6405 UNKNOWN, // 1F5E 6406 GREEK, // 1F5F..1F7D 6407 UNKNOWN, // 1F7E..1F7F 6408 GREEK, // 1F80..1FB4 6409 UNKNOWN, // 1FB5 6410 GREEK, // 1FB6..1FC4 6411 UNKNOWN, // 1FC5 6412 GREEK, // 1FC6..1FD3 6413 UNKNOWN, // 1FD4..1FD5 6414 GREEK, // 1FD6..1FDB 6415 UNKNOWN, // 1FDC 6416 GREEK, // 1FDD..1FEF 6417 UNKNOWN, // 1FF0..1FF1 6418 GREEK, // 1FF2..1FF4 6419 UNKNOWN, // 1FF5 6420 GREEK, // 1FF6..1FFE 6421 UNKNOWN, // 1FFF 6422 COMMON, // 2000..200B 6423 INHERITED, // 200C..200D 6424 COMMON, // 200E..2064 6425 UNKNOWN, // 2065 6426 COMMON, // 2066..2070 6427 LATIN, // 2071 6428 UNKNOWN, // 2072..2073 6429 COMMON, // 2074..207E 6430 LATIN, // 207F 6431 COMMON, // 2080..208E 6432 UNKNOWN, // 208F 6433 LATIN, // 2090..209C 6434 UNKNOWN, // 209D..209F 6435 COMMON, // 20A0..20BE 6436 UNKNOWN, // 20BF..20CF 6437 INHERITED, // 20D0..20F0 6438 UNKNOWN, // 20F1..20FF 6439 COMMON, // 2100..2125 6440 GREEK, // 2126 6441 COMMON, // 2127..2129 6442 LATIN, // 212A..212B 6443 COMMON, // 212C..2131 6444 LATIN, // 2132 6445 COMMON, // 2133..214D 6446 LATIN, // 214E 6447 COMMON, // 214F..215F 6448 LATIN, // 2160..2188 6449 COMMON, // 2189..218B 6450 UNKNOWN, // 218C..218F 6451 COMMON, // 2190..23FA 6452 UNKNOWN, // 23FB..23FF 6453 COMMON, // 2400..2426 6454 UNKNOWN, // 2427..243F 6455 COMMON, // 2440..244A 6456 UNKNOWN, // 244B..245F 6457 COMMON, // 2460..27FF 6458 BRAILLE, // 2800..28FF 6459 COMMON, // 2900..2B73 6460 UNKNOWN, // 2B74..2B75 6461 COMMON, // 2B76..2B95 6462 UNKNOWN, // 2B96..2B97 6463 COMMON, // 2B98..2BB9 6464 UNKNOWN, // 2BBA..2BBC 6465 COMMON, // 2BBD..2BC8 6466 UNKNOWN, // 2BC9 6467 COMMON, // 2BCA..2BD1 6468 UNKNOWN, // 2BD2..2BEB 6469 COMMON, // 2BEC..2BEF 6470 UNKNOWN, // 2BF0..2BFF 6471 GLAGOLITIC, // 2C00..2C2E 6472 UNKNOWN, // 2C2F 6473 GLAGOLITIC, // 2C30..2C5E 6474 UNKNOWN, // 2C5F 6475 LATIN, // 2C60..2C7F 6476 COPTIC, // 2C80..2CF3 6477 UNKNOWN, // 2CF4..2CF8 6478 COPTIC, // 2CF9..2CFF 6479 GEORGIAN, // 2D00..2D25 6480 UNKNOWN, // 2D26 6481 GEORGIAN, // 2D27 6482 UNKNOWN, // 2D28..2D2C 6483 GEORGIAN, // 2D2D 6484 UNKNOWN, // 2D2E..2D2F 6485 TIFINAGH, // 2D30..2D67 6486 UNKNOWN, // 2D68..2D6E 6487 TIFINAGH, // 2D6F..2D70 6488 UNKNOWN, // 2D71..2D7E 6489 TIFINAGH, // 2D7F 6490 ETHIOPIC, // 2D80..2D96 6491 UNKNOWN, // 2D97..2D9F 6492 ETHIOPIC, // 2DA0..2DA6 6493 UNKNOWN, // 2DA7 6494 ETHIOPIC, // 2DA8..2DAE 6495 UNKNOWN, // 2DAF 6496 ETHIOPIC, // 2DB0..2DB6 6497 UNKNOWN, // 2DB7 6498 ETHIOPIC, // 2DB8..2DBE 6499 UNKNOWN, // 2DBF 6500 ETHIOPIC, // 2DC0..2DC6 6501 UNKNOWN, // 2DC7 6502 ETHIOPIC, // 2DC8..2DCE 6503 UNKNOWN, // 2DCF 6504 ETHIOPIC, // 2DD0..2DD6 6505 UNKNOWN, // 2DD7 6506 ETHIOPIC, // 2DD8..2DDE 6507 UNKNOWN, // 2DDF 6508 CYRILLIC, // 2DE0..2DFF 6509 COMMON, // 2E00..2E42 6510 UNKNOWN, // 2E43..2E7F 6511 HAN, // 2E80..2E99 6512 UNKNOWN, // 2E9A 6513 HAN, // 2E9B..2EF3 6514 UNKNOWN, // 2EF4..2EFF 6515 HAN, // 2F00..2FD5 6516 UNKNOWN, // 2FD6..2FEF 6517 COMMON, // 2FF0..2FFB 6518 UNKNOWN, // 2FFC..2FFF 6519 COMMON, // 3000..3004 6520 HAN, // 3005 6521 COMMON, // 3006 6522 HAN, // 3007 6523 COMMON, // 3008..3020 6524 HAN, // 3021..3029 6525 INHERITED, // 302A..302D 6526 HANGUL, // 302E..302F 6527 COMMON, // 3030..3037 6528 HAN, // 3038..303B 6529 COMMON, // 303C..303F 6530 UNKNOWN, // 3040 6531 HIRAGANA, // 3041..3096 6532 UNKNOWN, // 3097..3098 6533 INHERITED, // 3099..309A 6534 COMMON, // 309B..309C 6535 HIRAGANA, // 309D..309F 6536 COMMON, // 30A0 6537 KATAKANA, // 30A1..30FA 6538 COMMON, // 30FB..30FC 6539 KATAKANA, // 30FD..30FF 6540 UNKNOWN, // 3100..3104 6541 BOPOMOFO, // 3105..312D 6542 UNKNOWN, // 312E..3130 6543 HANGUL, // 3131..318E 6544 UNKNOWN, // 318F 6545 COMMON, // 3190..319F 6546 BOPOMOFO, // 31A0..31BA 6547 UNKNOWN, // 31BB..31BF 6548 COMMON, // 31C0..31E3 6549 UNKNOWN, // 31E4..31EF 6550 KATAKANA, // 31F0..31FF 6551 HANGUL, // 3200..321E 6552 UNKNOWN, // 321F 6553 COMMON, // 3220..325F 6554 HANGUL, // 3260..327E 6555 COMMON, // 327F..32CF 6556 KATAKANA, // 32D0..32FE 6557 UNKNOWN, // 32FF 6558 KATAKANA, // 3300..3357 6559 COMMON, // 3358..33FF 6560 HAN, // 3400..4DB5 6561 UNKNOWN, // 4DB6..4DBF 6562 COMMON, // 4DC0..4DFF 6563 HAN, // 4E00..9FD5 6564 UNKNOWN, // 9FD6..9FFF 6565 YI, // A000..A48C 6566 UNKNOWN, // A48D..A48F 6567 YI, // A490..A4C6 6568 UNKNOWN, // A4C7..A4CF 6569 LISU, // A4D0..A4FF 6570 VAI, // A500..A62B 6571 UNKNOWN, // A62C..A63F 6572 CYRILLIC, // A640..A69F 6573 BAMUM, // A6A0..A6F7 6574 UNKNOWN, // A6F8..A6FF 6575 COMMON, // A700..A721 6576 LATIN, // A722..A787 6577 COMMON, // A788..A78A 6578 LATIN, // A78B..A7AD 6579 UNKNOWN, // A7AE..A7AF 6580 LATIN, // A7B0..A7B7 6581 UNKNOWN, // A7B8..A7F6 6582 LATIN, // A7F7..A7FF 6583 SYLOTI_NAGRI, // A800..A82B 6584 UNKNOWN, // A82C..A82F 6585 COMMON, // A830..A839 6586 UNKNOWN, // A83A..A83F 6587 PHAGS_PA, // A840..A877 6588 UNKNOWN, // A878..A87F 6589 SAURASHTRA, // A880..A8C4 6590 UNKNOWN, // A8C5..A8CD 6591 SAURASHTRA, // A8CE..A8D9 6592 UNKNOWN, // A8DA..A8DF 6593 DEVANAGARI, // A8E0..A8FD 6594 UNKNOWN, // A8FE..A8FF 6595 KAYAH_LI, // A900..A92D 6596 COMMON, // A92E 6597 KAYAH_LI, // A92F 6598 REJANG, // A930..A953 6599 UNKNOWN, // A954..A95E 6600 REJANG, // A95F 6601 HANGUL, // A960..A97C 6602 UNKNOWN, // A97D..A97F 6603 JAVANESE, // A980..A9CD 6604 UNKNOWN, // A9CE 6605 COMMON, // A9CF 6606 JAVANESE, // A9D0..A9D9 6607 UNKNOWN, // A9DA..A9DD 6608 JAVANESE, // A9DE..A9DF 6609 MYANMAR, // A9E0..A9FE 6610 UNKNOWN, // A9FF 6611 CHAM, // AA00..AA36 6612 UNKNOWN, // AA37..AA3F 6613 CHAM, // AA40..AA4D 6614 UNKNOWN, // AA4E..AA4F 6615 CHAM, // AA50..AA59 6616 UNKNOWN, // AA5A..AA5B 6617 CHAM, // AA5C..AA5F 6618 MYANMAR, // AA60..AA7F 6619 TAI_VIET, // AA80..AAC2 6620 UNKNOWN, // AAC3..AADA 6621 TAI_VIET, // AADB..AADF 6622 MEETEI_MAYEK, // AAE0..AAF6 6623 UNKNOWN, // AAF7..AB00 6624 ETHIOPIC, // AB01..AB06 6625 UNKNOWN, // AB07..AB08 6626 ETHIOPIC, // AB09..AB0E 6627 UNKNOWN, // AB0F..AB10 6628 ETHIOPIC, // AB11..AB16 6629 UNKNOWN, // AB17..AB1F 6630 ETHIOPIC, // AB20..AB26 6631 UNKNOWN, // AB27 6632 ETHIOPIC, // AB28..AB2E 6633 UNKNOWN, // AB2F 6634 LATIN, // AB30..AB5A 6635 COMMON, // AB5B 6636 LATIN, // AB5C..AB64 6637 GREEK, // AB65 6638 UNKNOWN, // AB66..AB6F 6639 CHEROKEE, // AB70..ABBF 6640 MEETEI_MAYEK, // ABC0..ABED 6641 UNKNOWN, // ABEE..ABEF 6642 MEETEI_MAYEK, // ABF0..ABF9 6643 UNKNOWN, // ABFA..ABFF 6644 HANGUL, // AC00..D7A3 6645 UNKNOWN, // D7A4..D7AF 6646 HANGUL, // D7B0..D7C6 6647 UNKNOWN, // D7C7..D7CA 6648 HANGUL, // D7CB..D7FB 6649 UNKNOWN, // D7FC..F8FF 6650 HAN, // F900..FA6D 6651 UNKNOWN, // FA6E..FA6F 6652 HAN, // FA70..FAD9 6653 UNKNOWN, // FADA..FAFF 6654 LATIN, // FB00..FB06 6655 UNKNOWN, // FB07..FB12 6656 ARMENIAN, // FB13..FB17 6657 UNKNOWN, // FB18..FB1C 6658 HEBREW, // FB1D..FB36 6659 UNKNOWN, // FB37 6660 HEBREW, // FB38..FB3C 6661 UNKNOWN, // FB3D 6662 HEBREW, // FB3E 6663 UNKNOWN, // FB3F 6664 HEBREW, // FB40..FB41 6665 UNKNOWN, // FB42 6666 HEBREW, // FB43..FB44 6667 UNKNOWN, // FB45 6668 HEBREW, // FB46..FB4F 6669 ARABIC, // FB50..FBC1 6670 UNKNOWN, // FBC2..FBD2 6671 ARABIC, // FBD3..FD3D 6672 COMMON, // FD3E..FD3F 6673 UNKNOWN, // FD40..FD4F 6674 ARABIC, // FD50..FD8F 6675 UNKNOWN, // FD90..FD91 6676 ARABIC, // FD92..FDC7 6677 UNKNOWN, // FDC8..FDEF 6678 ARABIC, // FDF0..FDFD 6679 UNKNOWN, // FDFE..FDFF 6680 INHERITED, // FE00..FE0F 6681 COMMON, // FE10..FE19 6682 UNKNOWN, // FE1A..FE1F 6683 INHERITED, // FE20..FE2D 6684 CYRILLIC, // FE2E..FE2F 6685 COMMON, // FE30..FE52 6686 UNKNOWN, // FE53 6687 COMMON, // FE54..FE66 6688 UNKNOWN, // FE67 6689 COMMON, // FE68..FE6B 6690 UNKNOWN, // FE6C..FE6F 6691 ARABIC, // FE70..FE74 6692 UNKNOWN, // FE75 6693 ARABIC, // FE76..FEFC 6694 UNKNOWN, // FEFD..FEFE 6695 COMMON, // FEFF 6696 UNKNOWN, // FF00 6697 COMMON, // FF01..FF20 6698 LATIN, // FF21..FF3A 6699 COMMON, // FF3B..FF40 6700 LATIN, // FF41..FF5A 6701 COMMON, // FF5B..FF65 6702 KATAKANA, // FF66..FF6F 6703 COMMON, // FF70 6704 KATAKANA, // FF71..FF9D 6705 COMMON, // FF9E..FF9F 6706 HANGUL, // FFA0..FFBE 6707 UNKNOWN, // FFBF..FFC1 6708 HANGUL, // FFC2..FFC7 6709 UNKNOWN, // FFC8..FFC9 6710 HANGUL, // FFCA..FFCF 6711 UNKNOWN, // FFD0..FFD1 6712 HANGUL, // FFD2..FFD7 6713 UNKNOWN, // FFD8..FFD9 6714 HANGUL, // FFDA..FFDC 6715 UNKNOWN, // FFDD..FFDF 6716 COMMON, // FFE0..FFE6 6717 UNKNOWN, // FFE7 6718 COMMON, // FFE8..FFEE 6719 UNKNOWN, // FFEF..FFF8 6720 COMMON, // FFF9..FFFD 6721 UNKNOWN, // FFFE..FFFF 6722 LINEAR_B, // 10000..1000B 6723 UNKNOWN, // 1000C 6724 LINEAR_B, // 1000D..10026 6725 UNKNOWN, // 10027 6726 LINEAR_B, // 10028..1003A 6727 UNKNOWN, // 1003B 6728 LINEAR_B, // 1003C..1003D 6729 UNKNOWN, // 1003E 6730 LINEAR_B, // 1003F..1004D 6731 UNKNOWN, // 1004E..1004F 6732 LINEAR_B, // 10050..1005D 6733 UNKNOWN, // 1005E..1007F 6734 LINEAR_B, // 10080..100FA 6735 UNKNOWN, // 100FB..100FF 6736 COMMON, // 10100..10102 6737 UNKNOWN, // 10103..10106 6738 COMMON, // 10107..10133 6739 UNKNOWN, // 10134..10136 6740 COMMON, // 10137..1013F 6741 GREEK, // 10140..1018C 6742 UNKNOWN, // 1018D..1018F 6743 COMMON, // 10190..1019B 6744 UNKNOWN, // 1019C..1019F 6745 GREEK, // 101A0 6746 UNKNOWN, // 101A1..101CF 6747 COMMON, // 101D0..101FC 6748 INHERITED, // 101FD 6749 UNKNOWN, // 101FE..1027F 6750 LYCIAN, // 10280..1029C 6751 UNKNOWN, // 1029D..1029F 6752 CARIAN, // 102A0..102D0 6753 UNKNOWN, // 102D1..102DF 6754 INHERITED, // 102E0 6755 COMMON, // 102E1..102FB 6756 UNKNOWN, // 102FC..102FF 6757 OLD_ITALIC, // 10300..10323 6758 UNKNOWN, // 10324..1032F 6759 GOTHIC, // 10330..1034A 6760 UNKNOWN, // 1034B..1034F 6761 OLD_PERMIC, // 10350..1037A 6762 UNKNOWN, // 1037B..1037F 6763 UGARITIC, // 10380..1039D 6764 UNKNOWN, // 1039E 6765 UGARITIC, // 1039F 6766 OLD_PERSIAN, // 103A0..103C3 6767 UNKNOWN, // 103C4..103C7 6768 OLD_PERSIAN, // 103C8..103D5 6769 UNKNOWN, // 103D6..103FF 6770 DESERET, // 10400..1044F 6771 SHAVIAN, // 10450..1047F 6772 OSMANYA, // 10480..1049D 6773 UNKNOWN, // 1049E..1049F 6774 OSMANYA, // 104A0..104A9 6775 UNKNOWN, // 104AA..104FF 6776 ELBASAN, // 10500..10527 6777 UNKNOWN, // 10528..1052F 6778 CAUCASIAN_ALBANIAN, // 10530..10563 6779 UNKNOWN, // 10564..1056E 6780 CAUCASIAN_ALBANIAN, // 1056F 6781 UNKNOWN, // 10570..105FF 6782 LINEAR_A, // 10600..10736 6783 UNKNOWN, // 10737..1073F 6784 LINEAR_A, // 10740..10755 6785 UNKNOWN, // 10756..1075F 6786 LINEAR_A, // 10760..10767 6787 UNKNOWN, // 10768..107FF 6788 CYPRIOT, // 10800..10805 6789 UNKNOWN, // 10806..10807 6790 CYPRIOT, // 10808 6791 UNKNOWN, // 10809 6792 CYPRIOT, // 1080A..10835 6793 UNKNOWN, // 10836 6794 CYPRIOT, // 10837..10838 6795 UNKNOWN, // 10839..1083B 6796 CYPRIOT, // 1083C 6797 UNKNOWN, // 1083D..1083E 6798 CYPRIOT, // 1083F 6799 IMPERIAL_ARAMAIC, // 10840..10855 6800 UNKNOWN, // 10856 6801 IMPERIAL_ARAMAIC, // 10857..1085F 6802 PALMYRENE, // 10860..1087F 6803 NABATAEAN, // 10880..1089E 6804 UNKNOWN, // 1089F..108A6 6805 NABATAEAN, // 108A7..108AF 6806 UNKNOWN, // 108B0..108DF 6807 HATRAN, // 108E0..108F2 6808 UNKNOWN, // 108F3 6809 HATRAN, // 108F4..108F5 6810 UNKNOWN, // 108F6..108FA 6811 HATRAN, // 108FB..108FF 6812 PHOENICIAN, // 10900..1091B 6813 UNKNOWN, // 1091C..1091E 6814 PHOENICIAN, // 1091F 6815 LYDIAN, // 10920..10939 6816 UNKNOWN, // 1093A..1093E 6817 LYDIAN, // 1093F 6818 UNKNOWN, // 10940..1097F 6819 MEROITIC_HIEROGLYPHS, // 10980..1099F 6820 MEROITIC_CURSIVE, // 109A0..109B7 6821 UNKNOWN, // 109B8..109BB 6822 MEROITIC_CURSIVE, // 109BC..109CF 6823 UNKNOWN, // 109D0..109D1 6824 MEROITIC_CURSIVE, // 109D2..109FF 6825 KHAROSHTHI, // 10A00..10A03 6826 UNKNOWN, // 10A04 6827 KHAROSHTHI, // 10A05..10A06 6828 UNKNOWN, // 10A07..10A0B 6829 KHAROSHTHI, // 10A0C..10A13 6830 UNKNOWN, // 10A14 6831 KHAROSHTHI, // 10A15..10A17 6832 UNKNOWN, // 10A18 6833 KHAROSHTHI, // 10A19..10A33 6834 UNKNOWN, // 10A34..10A37 6835 KHAROSHTHI, // 10A38..10A3A 6836 UNKNOWN, // 10A3B..10A3E 6837 KHAROSHTHI, // 10A3F..10A47 6838 UNKNOWN, // 10A48..10A4F 6839 KHAROSHTHI, // 10A50..10A58 6840 UNKNOWN, // 10A59..10A5F 6841 OLD_SOUTH_ARABIAN, // 10A60..10A7F 6842 OLD_NORTH_ARABIAN, // 10A80..10A9F 6843 UNKNOWN, // 10AA0..10ABF 6844 MANICHAEAN, // 10AC0..10AE6 6845 UNKNOWN, // 10AE7..10AEA 6846 MANICHAEAN, // 10AEB..10AF6 6847 UNKNOWN, // 10AF7..10AFF 6848 AVESTAN, // 10B00..10B35 6849 UNKNOWN, // 10B36..10B38 6850 AVESTAN, // 10B39..10B3F 6851 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 6852 UNKNOWN, // 10B56..10B57 6853 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 6854 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 6855 UNKNOWN, // 10B73..10B77 6856 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 6857 PSALTER_PAHLAVI, // 10B80..10B91 6858 UNKNOWN, // 10B92..10B98 6859 PSALTER_PAHLAVI, // 10B99..10B9C 6860 UNKNOWN, // 10B9D..10BA8 6861 PSALTER_PAHLAVI, // 10BA9..10BAF 6862 UNKNOWN, // 10BB0..10BFF 6863 OLD_TURKIC, // 10C00..10C48 6864 UNKNOWN, // 10C49..10C7F 6865 OLD_HUNGARIAN, // 10C80..10CB2 6866 UNKNOWN, // 10CB3..10CBF 6867 OLD_HUNGARIAN, // 10CC0..10CF2 6868 UNKNOWN, // 10CF3..10CF9 6869 OLD_HUNGARIAN, // 10CFA..10CFF 6870 UNKNOWN, // 10D00..10E5F 6871 ARABIC, // 10E60..10E7E 6872 UNKNOWN, // 10E7F..10FFF 6873 BRAHMI, // 11000..1104D 6874 UNKNOWN, // 1104E..11051 6875 BRAHMI, // 11052..1106F 6876 UNKNOWN, // 11070..1107E 6877 BRAHMI, // 1107F 6878 KAITHI, // 11080..110C1 6879 UNKNOWN, // 110C2..110CF 6880 SORA_SOMPENG, // 110D0..110E8 6881 UNKNOWN, // 110E9..110EF 6882 SORA_SOMPENG, // 110F0..110F9 6883 UNKNOWN, // 110FA..110FF 6884 CHAKMA, // 11100..11134 6885 UNKNOWN, // 11135 6886 CHAKMA, // 11136..11143 6887 UNKNOWN, // 11144..1114F 6888 MAHAJANI, // 11150..11176 6889 UNKNOWN, // 11177..1117F 6890 SHARADA, // 11180..111CD 6891 UNKNOWN, // 111CE..111CF 6892 SHARADA, // 111D0..111DF 6893 UNKNOWN, // 111E0 6894 SINHALA, // 111E1..111F4 6895 UNKNOWN, // 111F5..111FF 6896 KHOJKI, // 11200..11211 6897 UNKNOWN, // 11212 6898 KHOJKI, // 11213..1123D 6899 UNKNOWN, // 1123E..1127F 6900 MULTANI, // 11280..11286 6901 UNKNOWN, // 11287 6902 MULTANI, // 11288 6903 UNKNOWN, // 11289 6904 MULTANI, // 1128A..1128D 6905 UNKNOWN, // 1128E 6906 MULTANI, // 1128F..1129D 6907 UNKNOWN, // 1129E 6908 MULTANI, // 1129F..112A9 6909 UNKNOWN, // 112AA..112AF 6910 KHUDAWADI, // 112B0..112EA 6911 UNKNOWN, // 112EB..112EF 6912 KHUDAWADI, // 112F0..112F9 6913 UNKNOWN, // 112FA..112FF 6914 GRANTHA, // 11300..11303 6915 UNKNOWN, // 11304 6916 GRANTHA, // 11305..1130C 6917 UNKNOWN, // 1130D..1130E 6918 GRANTHA, // 1130F..11310 6919 UNKNOWN, // 11311..11312 6920 GRANTHA, // 11313..11328 6921 UNKNOWN, // 11329 6922 GRANTHA, // 1132A..11330 6923 UNKNOWN, // 11331 6924 GRANTHA, // 11332..11333 6925 UNKNOWN, // 11334 6926 GRANTHA, // 11335..11339 6927 UNKNOWN, // 1133A..1133B 6928 GRANTHA, // 1133C..11344 6929 UNKNOWN, // 11345..11346 6930 GRANTHA, // 11347..11348 6931 UNKNOWN, // 11349..1134A 6932 GRANTHA, // 1134B..1134D 6933 UNKNOWN, // 1134E..1134F 6934 GRANTHA, // 11350 6935 UNKNOWN, // 11351..11356 6936 GRANTHA, // 11357 6937 UNKNOWN, // 11358..1135C 6938 GRANTHA, // 1135D..11363 6939 UNKNOWN, // 11364..11365 6940 GRANTHA, // 11366..1136C 6941 UNKNOWN, // 1136D..1136F 6942 GRANTHA, // 11370..11374 6943 UNKNOWN, // 11375..1147F 6944 TIRHUTA, // 11480..114C7 6945 UNKNOWN, // 114C8..114CF 6946 TIRHUTA, // 114D0..114D9 6947 UNKNOWN, // 114DA..1157F 6948 SIDDHAM, // 11580..115B5 6949 UNKNOWN, // 115B6..115B7 6950 SIDDHAM, // 115B8..115DD 6951 UNKNOWN, // 115DE..115FF 6952 MODI, // 11600..11644 6953 UNKNOWN, // 11645..1164F 6954 MODI, // 11650..11659 6955 UNKNOWN, // 1165A..1167F 6956 TAKRI, // 11680..116B7 6957 UNKNOWN, // 116B8..116BF 6958 TAKRI, // 116C0..116C9 6959 UNKNOWN, // 116CA..116FF 6960 AHOM, // 11700..11719 6961 UNKNOWN, // 1171A..1171C 6962 AHOM, // 1171D..1172B 6963 UNKNOWN, // 1172C..1172F 6964 AHOM, // 11730..1173F 6965 UNKNOWN, // 11740..1189F 6966 WARANG_CITI, // 118A0..118F2 6967 UNKNOWN, // 118F3..118FE 6968 WARANG_CITI, // 118FF 6969 UNKNOWN, // 11900..11ABF 6970 PAU_CIN_HAU, // 11AC0..11AF8 6971 UNKNOWN, // 11AF9..11FFF 6972 CUNEIFORM, // 12000..12399 6973 UNKNOWN, // 1239A..123FF 6974 CUNEIFORM, // 12400..1246E 6975 UNKNOWN, // 1246F 6976 CUNEIFORM, // 12470..12474 6977 UNKNOWN, // 12475..1247F 6978 CUNEIFORM, // 12480..12543 6979 UNKNOWN, // 12544..12FFF 6980 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 6981 UNKNOWN, // 1342F..143FF 6982 ANATOLIAN_HIEROGLYPHS, // 14400..14646 6983 UNKNOWN, // 14647..167FF 6984 BAMUM, // 16800..16A38 6985 UNKNOWN, // 16A39..16A3F 6986 MRO, // 16A40..16A5E 6987 UNKNOWN, // 16A5F 6988 MRO, // 16A60..16A69 6989 UNKNOWN, // 16A6A..16A6D 6990 MRO, // 16A6E..16A6F 6991 UNKNOWN, // 16A70..16ACF 6992 BASSA_VAH, // 16AD0..16AED 6993 UNKNOWN, // 16AEE..16AEF 6994 BASSA_VAH, // 16AF0..16AF5 6995 UNKNOWN, // 16AF6..16AFF 6996 PAHAWH_HMONG, // 16B00..16B45 6997 UNKNOWN, // 16B46..16B4F 6998 PAHAWH_HMONG, // 16B50..16B59 6999 UNKNOWN, // 16B5A 7000 PAHAWH_HMONG, // 16B5B..16B61 7001 UNKNOWN, // 16B62 7002 PAHAWH_HMONG, // 16B63..16B77 7003 UNKNOWN, // 16B78..16B7C 7004 PAHAWH_HMONG, // 16B7D..16B8F 7005 UNKNOWN, // 16B90..16EFF 7006 MIAO, // 16F00..16F44 7007 UNKNOWN, // 16F45..16F4F 7008 MIAO, // 16F50..16F7E 7009 UNKNOWN, // 16F7F..16F8E 7010 MIAO, // 16F8F..16F9F 7011 UNKNOWN, // 16FA0..1AFFF 7012 KATAKANA, // 1B000 7013 HIRAGANA, // 1B001 7014 UNKNOWN, // 1B002..1BBFF 7015 DUPLOYAN, // 1BC00..1BC6A 7016 UNKNOWN, // 1BC6B..1BC6F 7017 DUPLOYAN, // 1BC70..1BC7C 7018 UNKNOWN, // 1BC7D..1BC7F 7019 DUPLOYAN, // 1BC80..1BC88 7020 UNKNOWN, // 1BC89..1BC8F 7021 DUPLOYAN, // 1BC90..1BC99 7022 UNKNOWN, // 1BC9A..1BC9B 7023 DUPLOYAN, // 1BC9C..1BC9F 7024 COMMON, // 1BCA0..1BCA3 7025 UNKNOWN, // 1BCA4..1CFFF 7026 COMMON, // 1D000..1D0F5 7027 UNKNOWN, // 1D0F6..1D0FF 7028 COMMON, // 1D100..1D126 7029 UNKNOWN, // 1D127..1D128 7030 COMMON, // 1D129..1D166 7031 INHERITED, // 1D167..1D169 7032 COMMON, // 1D16A..1D17A 7033 INHERITED, // 1D17B..1D182 7034 COMMON, // 1D183..1D184 7035 INHERITED, // 1D185..1D18B 7036 COMMON, // 1D18C..1D1A9 7037 INHERITED, // 1D1AA..1D1AD 7038 COMMON, // 1D1AE..1D1E8 7039 UNKNOWN, // 1D1E9..1D1FF 7040 GREEK, // 1D200..1D245 7041 UNKNOWN, // 1D246..1D2FF 7042 COMMON, // 1D300..1D356 7043 UNKNOWN, // 1D357..1D35F 7044 COMMON, // 1D360..1D371 7045 UNKNOWN, // 1D372..1D3FF 7046 COMMON, // 1D400..1D454 7047 UNKNOWN, // 1D455 7048 COMMON, // 1D456..1D49C 7049 UNKNOWN, // 1D49D 7050 COMMON, // 1D49E..1D49F 7051 UNKNOWN, // 1D4A0..1D4A1 7052 COMMON, // 1D4A2 7053 UNKNOWN, // 1D4A3..1D4A4 7054 COMMON, // 1D4A5..1D4A6 7055 UNKNOWN, // 1D4A7..1D4A8 7056 COMMON, // 1D4A9..1D4AC 7057 UNKNOWN, // 1D4AD 7058 COMMON, // 1D4AE..1D4B9 7059 UNKNOWN, // 1D4BA 7060 COMMON, // 1D4BB 7061 UNKNOWN, // 1D4BC 7062 COMMON, // 1D4BD..1D4C3 7063 UNKNOWN, // 1D4C4 7064 COMMON, // 1D4C5..1D505 7065 UNKNOWN, // 1D506 7066 COMMON, // 1D507..1D50A 7067 UNKNOWN, // 1D50B..1D50C 7068 COMMON, // 1D50D..1D514 7069 UNKNOWN, // 1D515 7070 COMMON, // 1D516..1D51C 7071 UNKNOWN, // 1D51D 7072 COMMON, // 1D51E..1D539 7073 UNKNOWN, // 1D53A 7074 COMMON, // 1D53B..1D53E 7075 UNKNOWN, // 1D53F 7076 COMMON, // 1D540..1D544 7077 UNKNOWN, // 1D545 7078 COMMON, // 1D546 7079 UNKNOWN, // 1D547..1D549 7080 COMMON, // 1D54A..1D550 7081 UNKNOWN, // 1D551 7082 COMMON, // 1D552..1D6A5 7083 UNKNOWN, // 1D6A6..1D6A7 7084 COMMON, // 1D6A8..1D7CB 7085 UNKNOWN, // 1D7CC..1D7CD 7086 COMMON, // 1D7CE..1D7FF 7087 SIGNWRITING, // 1D800..1DA8B 7088 UNKNOWN, // 1DA8C..1DA9A 7089 SIGNWRITING, // 1DA9B..1DA9F 7090 UNKNOWN, // 1DAA0 7091 SIGNWRITING, // 1DAA1..1DAAF 7092 UNKNOWN, // 1DAB0..1E7FF 7093 MENDE_KIKAKUI, // 1E800..1E8C4 7094 UNKNOWN, // 1E8C5..1E8C6 7095 MENDE_KIKAKUI, // 1E8C7..1E8D6 7096 UNKNOWN, // 1E8D7..1EDFF 7097 ARABIC, // 1EE00..1EE03 7098 UNKNOWN, // 1EE04 7099 ARABIC, // 1EE05..1EE1F 7100 UNKNOWN, // 1EE20 7101 ARABIC, // 1EE21..1EE22 7102 UNKNOWN, // 1EE23 7103 ARABIC, // 1EE24 7104 UNKNOWN, // 1EE25..1EE26 7105 ARABIC, // 1EE27 7106 UNKNOWN, // 1EE28 7107 ARABIC, // 1EE29..1EE32 7108 UNKNOWN, // 1EE33 7109 ARABIC, // 1EE34..1EE37 7110 UNKNOWN, // 1EE38 7111 ARABIC, // 1EE39 7112 UNKNOWN, // 1EE3A 7113 ARABIC, // 1EE3B 7114 UNKNOWN, // 1EE3C..1EE41 7115 ARABIC, // 1EE42 7116 UNKNOWN, // 1EE43..1EE46 7117 ARABIC, // 1EE47 7118 UNKNOWN, // 1EE48 7119 ARABIC, // 1EE49 7120 UNKNOWN, // 1EE4A 7121 ARABIC, // 1EE4B 7122 UNKNOWN, // 1EE4C 7123 ARABIC, // 1EE4D..1EE4F 7124 UNKNOWN, // 1EE50 7125 ARABIC, // 1EE51..1EE52 7126 UNKNOWN, // 1EE53 7127 ARABIC, // 1EE54 7128 UNKNOWN, // 1EE55..1EE56 7129 ARABIC, // 1EE57 7130 UNKNOWN, // 1EE58 7131 ARABIC, // 1EE59 7132 UNKNOWN, // 1EE5A 7133 ARABIC, // 1EE5B 7134 UNKNOWN, // 1EE5C 7135 ARABIC, // 1EE5D 7136 UNKNOWN, // 1EE5E 7137 ARABIC, // 1EE5F 7138 UNKNOWN, // 1EE60 7139 ARABIC, // 1EE61..1EE62 7140 UNKNOWN, // 1EE63 7141 ARABIC, // 1EE64 7142 UNKNOWN, // 1EE65..1EE66 7143 ARABIC, // 1EE67..1EE6A 7144 UNKNOWN, // 1EE6B 7145 ARABIC, // 1EE6C..1EE72 7146 UNKNOWN, // 1EE73 7147 ARABIC, // 1EE74..1EE77 7148 UNKNOWN, // 1EE78 7149 ARABIC, // 1EE79..1EE7C 7150 UNKNOWN, // 1EE7D 7151 ARABIC, // 1EE7E 7152 UNKNOWN, // 1EE7F 7153 ARABIC, // 1EE80..1EE89 7154 UNKNOWN, // 1EE8A 7155 ARABIC, // 1EE8B..1EE9B 7156 UNKNOWN, // 1EE9C..1EEA0 7157 ARABIC, // 1EEA1..1EEA3 7158 UNKNOWN, // 1EEA4 7159 ARABIC, // 1EEA5..1EEA9 7160 UNKNOWN, // 1EEAA 7161 ARABIC, // 1EEAB..1EEBB 7162 UNKNOWN, // 1EEBC..1EEEF 7163 ARABIC, // 1EEF0..1EEF1 7164 UNKNOWN, // 1EEF2..1EFFF 7165 COMMON, // 1F000..1F02B 7166 UNKNOWN, // 1F02C..1F02F 7167 COMMON, // 1F030..1F093 7168 UNKNOWN, // 1F094..1F09F 7169 COMMON, // 1F0A0..1F0AE 7170 UNKNOWN, // 1F0AF..1F0B0 7171 COMMON, // 1F0B1..1F0BF 7172 UNKNOWN, // 1F0C0 7173 COMMON, // 1F0C1..1F0CF 7174 UNKNOWN, // 1F0D0 7175 COMMON, // 1F0D1..1F0F5 7176 UNKNOWN, // 1F0F6..1F0FF 7177 COMMON, // 1F100..1F10C 7178 UNKNOWN, // 1F10D..1F10F 7179 COMMON, // 1F110..1F12E 7180 UNKNOWN, // 1F12F 7181 COMMON, // 1F130..1F16B 7182 UNKNOWN, // 1F16C..1F16F 7183 COMMON, // 1F170..1F19A 7184 UNKNOWN, // 1F19B..1F1E5 7185 COMMON, // 1F1E6..1F1FF 7186 HIRAGANA, // 1F200 7187 COMMON, // 1F201..1F202 7188 UNKNOWN, // 1F203..1F20F 7189 COMMON, // 1F210..1F23A 7190 UNKNOWN, // 1F23B..1F23F 7191 COMMON, // 1F240..1F248 7192 UNKNOWN, // 1F249..1F24F 7193 COMMON, // 1F250..1F251 7194 UNKNOWN, // 1F252..1F2FF 7195 COMMON, // 1F300..1F579 7196 UNKNOWN, // 1F57A 7197 COMMON, // 1F57B..1F5A3 7198 UNKNOWN, // 1F5A4 7199 COMMON, // 1F5A5..1F6D0 7200 UNKNOWN, // 1F6D1..1F6DF 7201 COMMON, // 1F6E0..1F6EC 7202 UNKNOWN, // 1F6ED..1F6EF 7203 COMMON, // 1F6F0..1F6F3 7204 UNKNOWN, // 1F6F4..1F6FF 7205 COMMON, // 1F700..1F773 7206 UNKNOWN, // 1F774..1F77F 7207 COMMON, // 1F780..1F7D4 7208 UNKNOWN, // 1F7D5..1F7FF 7209 COMMON, // 1F800..1F80B 7210 UNKNOWN, // 1F80C..1F80F 7211 COMMON, // 1F810..1F847 7212 UNKNOWN, // 1F848..1F84F 7213 COMMON, // 1F850..1F859 7214 UNKNOWN, // 1F85A..1F85F 7215 COMMON, // 1F860..1F887 7216 UNKNOWN, // 1F888..1F88F 7217 COMMON, // 1F890..1F8AD 7218 UNKNOWN, // 1F8AE..1F90F 7219 COMMON, // 1F910..1F918 7220 UNKNOWN, // 1F919..1F97F 7221 COMMON, // 1F980..1F984 7222 UNKNOWN, // 1F985..1F9BF 7223 COMMON, // 1F9C0 7224 UNKNOWN, // 1F9C1..1FFFF 7225 HAN, // 20000..2A6D6 7226 UNKNOWN, // 2A6D7..2A6FF 7227 HAN, // 2A700..2B734 7228 UNKNOWN, // 2B735..2B73F 7229 HAN, // 2B740..2B81D 7230 UNKNOWN, // 2B81E..2B81F 7231 HAN, // 2B820..2CEA1 7232 UNKNOWN, // 2CEA2..2F7FF 7233 HAN, // 2F800..2FA1D 7234 UNKNOWN, // 2FA1E..E0000 7235 COMMON, // E0001 7236 UNKNOWN, // E0002..E001F 7237 COMMON, // E0020..E007F 7238 UNKNOWN, // E0080..E00FF 7239 INHERITED, // E0100..E01EF 7240 UNKNOWN // E01F0..10FFFF 7241 }; 7242 7243 private static HashMap<String, Character.UnicodeScript> aliases; 7244 static { 7245 aliases = new HashMap<>(134); 7246 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 7247 aliases.put("AHOM", AHOM); 7248 aliases.put("ARAB", ARABIC); 7249 aliases.put("ARMI", IMPERIAL_ARAMAIC); 7250 aliases.put("ARMN", ARMENIAN); 7251 aliases.put("AVST", AVESTAN); 7252 aliases.put("BALI", BALINESE); 7253 aliases.put("BAMU", BAMUM); 7254 aliases.put("BASS", BASSA_VAH); 7255 aliases.put("BATK", BATAK); 7256 aliases.put("BENG", BENGALI); 7257 aliases.put("BOPO", BOPOMOFO); 7258 aliases.put("BRAH", BRAHMI); 7259 aliases.put("BRAI", BRAILLE); 7260 aliases.put("BUGI", BUGINESE); 7261 aliases.put("BUHD", BUHID); 7262 aliases.put("CAKM", CHAKMA); 7263 aliases.put("CANS", CANADIAN_ABORIGINAL); 7264 aliases.put("CARI", CARIAN); 7265 aliases.put("CHAM", CHAM); 7266 aliases.put("CHER", CHEROKEE); 7267 aliases.put("COPT", COPTIC); 7268 aliases.put("CPRT", CYPRIOT); 7269 aliases.put("CYRL", CYRILLIC); 7270 aliases.put("DEVA", DEVANAGARI); 7271 aliases.put("DSRT", DESERET); 7272 aliases.put("DUPL", DUPLOYAN); 7273 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 7274 aliases.put("ELBA", ELBASAN); 7275 aliases.put("ETHI", ETHIOPIC); 7276 aliases.put("GEOR", GEORGIAN); 7277 aliases.put("GLAG", GLAGOLITIC); 7278 aliases.put("GOTH", GOTHIC); 7279 aliases.put("GRAN", GRANTHA); 7280 aliases.put("GREK", GREEK); 7281 aliases.put("GUJR", GUJARATI); 7282 aliases.put("GURU", GURMUKHI); 7283 aliases.put("HANG", HANGUL); 7284 aliases.put("HANI", HAN); 7285 aliases.put("HANO", HANUNOO); 7286 aliases.put("HATR", HATRAN); 7287 aliases.put("HEBR", HEBREW); 7288 aliases.put("HIRA", HIRAGANA); 7289 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 7290 aliases.put("HMNG", PAHAWH_HMONG); 7291 // it appears we don't have the KATAKANA_OR_HIRAGANA 7292 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 7293 aliases.put("HUNG", OLD_HUNGARIAN); 7294 aliases.put("ITAL", OLD_ITALIC); 7295 aliases.put("JAVA", JAVANESE); 7296 aliases.put("KALI", KAYAH_LI); 7297 aliases.put("KANA", KATAKANA); 7298 aliases.put("KHAR", KHAROSHTHI); 7299 aliases.put("KHMR", KHMER); 7300 aliases.put("KHOJ", KHOJKI); 7301 aliases.put("KNDA", KANNADA); 7302 aliases.put("KTHI", KAITHI); 7303 aliases.put("LANA", TAI_THAM); 7304 aliases.put("LAOO", LAO); 7305 aliases.put("LATN", LATIN); 7306 aliases.put("LEPC", LEPCHA); 7307 aliases.put("LIMB", LIMBU); 7308 aliases.put("LINA", LINEAR_A); 7309 aliases.put("LINB", LINEAR_B); 7310 aliases.put("LISU", LISU); 7311 aliases.put("LYCI", LYCIAN); 7312 aliases.put("LYDI", LYDIAN); 7313 aliases.put("MAHJ", MAHAJANI); 7314 aliases.put("MAND", MANDAIC); 7315 aliases.put("MANI", MANICHAEAN); 7316 aliases.put("MEND", MENDE_KIKAKUI); 7317 aliases.put("MERC", MEROITIC_CURSIVE); 7318 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 7319 aliases.put("MLYM", MALAYALAM); 7320 aliases.put("MODI", MODI); 7321 aliases.put("MONG", MONGOLIAN); 7322 aliases.put("MROO", MRO); 7323 aliases.put("MTEI", MEETEI_MAYEK); 7324 aliases.put("MULT", MULTANI); 7325 aliases.put("MYMR", MYANMAR); 7326 aliases.put("NARB", OLD_NORTH_ARABIAN); 7327 aliases.put("NBAT", NABATAEAN); 7328 aliases.put("NKOO", NKO); 7329 aliases.put("OGAM", OGHAM); 7330 aliases.put("OLCK", OL_CHIKI); 7331 aliases.put("ORKH", OLD_TURKIC); 7332 aliases.put("ORYA", ORIYA); 7333 aliases.put("OSMA", OSMANYA); 7334 aliases.put("PALM", PALMYRENE); 7335 aliases.put("PAUC", PAU_CIN_HAU); 7336 aliases.put("PERM", OLD_PERMIC); 7337 aliases.put("PHAG", PHAGS_PA); 7338 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 7339 aliases.put("PHLP", PSALTER_PAHLAVI); 7340 aliases.put("PHNX", PHOENICIAN); 7341 aliases.put("PLRD", MIAO); 7342 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 7343 aliases.put("RJNG", REJANG); 7344 aliases.put("RUNR", RUNIC); 7345 aliases.put("SAMR", SAMARITAN); 7346 aliases.put("SARB", OLD_SOUTH_ARABIAN); 7347 aliases.put("SAUR", SAURASHTRA); 7348 aliases.put("SGNW", SIGNWRITING); 7349 aliases.put("SHAW", SHAVIAN); 7350 aliases.put("SHRD", SHARADA); 7351 aliases.put("SIDD", SIDDHAM); 7352 aliases.put("SIND", KHUDAWADI); 7353 aliases.put("SINH", SINHALA); 7354 aliases.put("SORA", SORA_SOMPENG); 7355 aliases.put("SUND", SUNDANESE); 7356 aliases.put("SYLO", SYLOTI_NAGRI); 7357 aliases.put("SYRC", SYRIAC); 7358 aliases.put("TAGB", TAGBANWA); 7359 aliases.put("TAKR", TAKRI); 7360 aliases.put("TALE", TAI_LE); 7361 aliases.put("TALU", NEW_TAI_LUE); 7362 aliases.put("TAML", TAMIL); 7363 aliases.put("TAVT", TAI_VIET); 7364 aliases.put("TELU", TELUGU); 7365 aliases.put("TFNG", TIFINAGH); 7366 aliases.put("TGLG", TAGALOG); 7367 aliases.put("THAA", THAANA); 7368 aliases.put("THAI", THAI); 7369 aliases.put("TIBT", TIBETAN); 7370 aliases.put("TIRH", TIRHUTA); 7371 aliases.put("UGAR", UGARITIC); 7372 aliases.put("VAII", VAI); 7373 aliases.put("WARA", WARANG_CITI); 7374 aliases.put("XPEO", OLD_PERSIAN); 7375 aliases.put("XSUX", CUNEIFORM); 7376 aliases.put("YIII", YI); 7377 aliases.put("ZINH", INHERITED); 7378 aliases.put("ZYYY", COMMON); 7379 aliases.put("ZZZZ", UNKNOWN); 7380 } 7381 7382 /** 7383 * Returns the enum constant representing the Unicode script of which 7384 * the given character (Unicode code point) is assigned to. 7385 * 7386 * @param codePoint the character (Unicode code point) in question. 7387 * @return The {@code UnicodeScript} constant representing the 7388 * Unicode script of which this character is assigned to. 7389 * 7390 * @exception IllegalArgumentException if the specified 7391 * {@code codePoint} is an invalid Unicode code point. 7392 * @see Character#isValidCodePoint(int) 7393 * 7394 */ 7395 public static UnicodeScript of(int codePoint) { 7396 if (!isValidCodePoint(codePoint)) 7397 throw new IllegalArgumentException(); 7398 int type = getType(codePoint); 7399 // leave SURROGATE and PRIVATE_USE for table lookup 7400 if (type == UNASSIGNED) 7401 return UNKNOWN; 7402 int index = Arrays.binarySearch(scriptStarts, codePoint); 7403 if (index < 0) 7404 index = -index - 2; 7405 return scripts[index]; 7406 } 7407 7408 /** 7409 * Returns the UnicodeScript constant with the given Unicode script 7410 * name or the script name alias. Script names and their aliases are 7411 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 7412 * and {@code PropertyValueAliases<version>.txt} define script names 7413 * and the script name aliases for a particular version of the 7414 * standard. The {@link Character} class specifies the version of 7415 * the standard that it supports. 7416 * <p> 7417 * Character case is ignored for all of the valid script names. 7418 * The en_US locale's case mapping rules are used to provide 7419 * case-insensitive string comparisons for script name validation. 7420 * 7421 * @param scriptName A {@code UnicodeScript} name. 7422 * @return The {@code UnicodeScript} constant identified 7423 * by {@code scriptName} 7424 * @throws IllegalArgumentException if {@code scriptName} is an 7425 * invalid name 7426 * @throws NullPointerException if {@code scriptName} is null 7427 */ 7428 public static final UnicodeScript forName(String scriptName) { 7429 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 7430 //.replace(' ', '_')); 7431 UnicodeScript sc = aliases.get(scriptName); 7432 if (sc != null) 7433 return sc; 7434 return valueOf(scriptName); 7435 } 7436 } 7437 7438 /** 7439 * The value of the {@code Character}. 7440 * 7441 * @serial 7442 */ 7443 private final char value; 7444 7445 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 7446 private static final long serialVersionUID = 3786198910865385080L; 7447 7448 /** 7449 * Constructs a newly allocated {@code Character} object that 7450 * represents the specified {@code char} value. 7451 * 7452 * @param value the value to be represented by the 7453 * {@code Character} object. 7454 * 7455 * @deprecated 7456 * It is rarely appropriate to use this constructor. The static factory 7457 * {@link #valueOf(char)} is generally a better choice, as it is 7458 * likely to yield significantly better space and time performance. 7459 */ 7460 @Deprecated(since="9") 7461 public Character(char value) { 7462 this.value = value; 7463 } 7464 7465 private static class CharacterCache { 7466 private CharacterCache(){} 7467 7468 static final Character cache[] = new Character[127 + 1]; 7469 7470 static { 7471 for (int i = 0; i < cache.length; i++) 7472 cache[i] = new Character((char)i); 7473 } 7474 } 7475 7476 /** 7477 * Returns a {@code Character} instance representing the specified 7478 * {@code char} value. 7479 * If a new {@code Character} instance is not required, this method 7480 * should generally be used in preference to the constructor 7481 * {@link #Character(char)}, as this method is likely to yield 7482 * significantly better space and time performance by caching 7483 * frequently requested values. 7484 * 7485 * This method will always cache values in the range {@code 7486 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 7487 * cache other values outside of this range. 7488 * 7489 * @param c a char value. 7490 * @return a {@code Character} instance representing {@code c}. 7491 * @since 1.5 7492 */ 7493 @HotSpotIntrinsicCandidate 7494 public static Character valueOf(char c) { 7495 if (c <= 127) { // must cache 7496 return CharacterCache.cache[(int)c]; 7497 } 7498 return new Character(c); 7499 } 7500 7501 /** 7502 * Returns the value of this {@code Character} object. 7503 * @return the primitive {@code char} value represented by 7504 * this object. 7505 */ 7506 @HotSpotIntrinsicCandidate 7507 public char charValue() { 7508 return value; 7509 } 7510 7511 /** 7512 * Returns a hash code for this {@code Character}; equal to the result 7513 * of invoking {@code charValue()}. 7514 * 7515 * @return a hash code value for this {@code Character} 7516 */ 7517 @Override 7518 public int hashCode() { 7519 return Character.hashCode(value); 7520 } 7521 7522 /** 7523 * Returns a hash code for a {@code char} value; compatible with 7524 * {@code Character.hashCode()}. 7525 * 7526 * @since 1.8 7527 * 7528 * @param value The {@code char} for which to return a hash code. 7529 * @return a hash code value for a {@code char} value. 7530 */ 7531 public static int hashCode(char value) { 7532 return (int)value; 7533 } 7534 7535 /** 7536 * Compares this object against the specified object. 7537 * The result is {@code true} if and only if the argument is not 7538 * {@code null} and is a {@code Character} object that 7539 * represents the same {@code char} value as this object. 7540 * 7541 * @param obj the object to compare with. 7542 * @return {@code true} if the objects are the same; 7543 * {@code false} otherwise. 7544 */ 7545 public boolean equals(Object obj) { 7546 if (obj instanceof Character) { 7547 return value == ((Character)obj).charValue(); 7548 } 7549 return false; 7550 } 7551 7552 /** 7553 * Returns a {@code String} object representing this 7554 * {@code Character}'s value. The result is a string of 7555 * length 1 whose sole component is the primitive 7556 * {@code char} value represented by this 7557 * {@code Character} object. 7558 * 7559 * @return a string representation of this object. 7560 */ 7561 public String toString() { 7562 char buf[] = {value}; 7563 return String.valueOf(buf); 7564 } 7565 7566 /** 7567 * Returns a {@code String} object representing the 7568 * specified {@code char}. The result is a string of length 7569 * 1 consisting solely of the specified {@code char}. 7570 * 7571 * @param c the {@code char} to be converted 7572 * @return the string representation of the specified {@code char} 7573 * @since 1.4 7574 */ 7575 public static String toString(char c) { 7576 return String.valueOf(c); 7577 } 7578 7579 /** 7580 * Determines whether the specified code point is a valid 7581 * <a href="http://www.unicode.org/glossary/#code_point"> 7582 * Unicode code point value</a>. 7583 * 7584 * @param codePoint the Unicode code point to be tested 7585 * @return {@code true} if the specified code point value is between 7586 * {@link #MIN_CODE_POINT} and 7587 * {@link #MAX_CODE_POINT} inclusive; 7588 * {@code false} otherwise. 7589 * @since 1.5 7590 */ 7591 public static boolean isValidCodePoint(int codePoint) { 7592 // Optimized form of: 7593 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 7594 int plane = codePoint >>> 16; 7595 return plane < ((MAX_CODE_POINT + 1) >>> 16); 7596 } 7597 7598 /** 7599 * Determines whether the specified character (Unicode code point) 7600 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 7601 * Such code points can be represented using a single {@code char}. 7602 * 7603 * @param codePoint the character (Unicode code point) to be tested 7604 * @return {@code true} if the specified code point is between 7605 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 7606 * {@code false} otherwise. 7607 * @since 1.7 7608 */ 7609 public static boolean isBmpCodePoint(int codePoint) { 7610 return codePoint >>> 16 == 0; 7611 // Optimized form of: 7612 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 7613 // We consistently use logical shift (>>>) to facilitate 7614 // additional runtime optimizations. 7615 } 7616 7617 /** 7618 * Determines whether the specified character (Unicode code point) 7619 * is in the <a href="#supplementary">supplementary character</a> range. 7620 * 7621 * @param codePoint the character (Unicode code point) to be tested 7622 * @return {@code true} if the specified code point is between 7623 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 7624 * {@link #MAX_CODE_POINT} inclusive; 7625 * {@code false} otherwise. 7626 * @since 1.5 7627 */ 7628 public static boolean isSupplementaryCodePoint(int codePoint) { 7629 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 7630 && codePoint < MAX_CODE_POINT + 1; 7631 } 7632 7633 /** 7634 * Determines if the given {@code char} value is a 7635 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 7636 * Unicode high-surrogate code unit</a> 7637 * (also known as <i>leading-surrogate code unit</i>). 7638 * 7639 * <p>Such values do not represent characters by themselves, 7640 * but are used in the representation of 7641 * <a href="#supplementary">supplementary characters</a> 7642 * in the UTF-16 encoding. 7643 * 7644 * @param ch the {@code char} value to be tested. 7645 * @return {@code true} if the {@code char} value is between 7646 * {@link #MIN_HIGH_SURROGATE} and 7647 * {@link #MAX_HIGH_SURROGATE} inclusive; 7648 * {@code false} otherwise. 7649 * @see Character#isLowSurrogate(char) 7650 * @see Character.UnicodeBlock#of(int) 7651 * @since 1.5 7652 */ 7653 public static boolean isHighSurrogate(char ch) { 7654 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 7655 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 7656 } 7657 7658 /** 7659 * Determines if the given {@code char} value is a 7660 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 7661 * Unicode low-surrogate code unit</a> 7662 * (also known as <i>trailing-surrogate code unit</i>). 7663 * 7664 * <p>Such values do not represent characters by themselves, 7665 * but are used in the representation of 7666 * <a href="#supplementary">supplementary characters</a> 7667 * in the UTF-16 encoding. 7668 * 7669 * @param ch the {@code char} value to be tested. 7670 * @return {@code true} if the {@code char} value is between 7671 * {@link #MIN_LOW_SURROGATE} and 7672 * {@link #MAX_LOW_SURROGATE} inclusive; 7673 * {@code false} otherwise. 7674 * @see Character#isHighSurrogate(char) 7675 * @since 1.5 7676 */ 7677 public static boolean isLowSurrogate(char ch) { 7678 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 7679 } 7680 7681 /** 7682 * Determines if the given {@code char} value is a Unicode 7683 * <i>surrogate code unit</i>. 7684 * 7685 * <p>Such values do not represent characters by themselves, 7686 * but are used in the representation of 7687 * <a href="#supplementary">supplementary characters</a> 7688 * in the UTF-16 encoding. 7689 * 7690 * <p>A char value is a surrogate code unit if and only if it is either 7691 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 7692 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 7693 * 7694 * @param ch the {@code char} value to be tested. 7695 * @return {@code true} if the {@code char} value is between 7696 * {@link #MIN_SURROGATE} and 7697 * {@link #MAX_SURROGATE} inclusive; 7698 * {@code false} otherwise. 7699 * @since 1.7 7700 */ 7701 public static boolean isSurrogate(char ch) { 7702 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 7703 } 7704 7705 /** 7706 * Determines whether the specified pair of {@code char} 7707 * values is a valid 7708 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 7709 * Unicode surrogate pair</a>. 7710 7711 * <p>This method is equivalent to the expression: 7712 * <blockquote><pre>{@code 7713 * isHighSurrogate(high) && isLowSurrogate(low) 7714 * }</pre></blockquote> 7715 * 7716 * @param high the high-surrogate code value to be tested 7717 * @param low the low-surrogate code value to be tested 7718 * @return {@code true} if the specified high and 7719 * low-surrogate code values represent a valid surrogate pair; 7720 * {@code false} otherwise. 7721 * @since 1.5 7722 */ 7723 public static boolean isSurrogatePair(char high, char low) { 7724 return isHighSurrogate(high) && isLowSurrogate(low); 7725 } 7726 7727 /** 7728 * Determines the number of {@code char} values needed to 7729 * represent the specified character (Unicode code point). If the 7730 * specified character is equal to or greater than 0x10000, then 7731 * the method returns 2. Otherwise, the method returns 1. 7732 * 7733 * <p>This method doesn't validate the specified character to be a 7734 * valid Unicode code point. The caller must validate the 7735 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 7736 * if necessary. 7737 * 7738 * @param codePoint the character (Unicode code point) to be tested. 7739 * @return 2 if the character is a valid supplementary character; 1 otherwise. 7740 * @see Character#isSupplementaryCodePoint(int) 7741 * @since 1.5 7742 */ 7743 public static int charCount(int codePoint) { 7744 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 7745 } 7746 7747 /** 7748 * Converts the specified surrogate pair to its supplementary code 7749 * point value. This method does not validate the specified 7750 * surrogate pair. The caller must validate it using {@link 7751 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 7752 * 7753 * @param high the high-surrogate code unit 7754 * @param low the low-surrogate code unit 7755 * @return the supplementary code point composed from the 7756 * specified surrogate pair. 7757 * @since 1.5 7758 */ 7759 public static int toCodePoint(char high, char low) { 7760 // Optimized form of: 7761 // return ((high - MIN_HIGH_SURROGATE) << 10) 7762 // + (low - MIN_LOW_SURROGATE) 7763 // + MIN_SUPPLEMENTARY_CODE_POINT; 7764 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 7765 - (MIN_HIGH_SURROGATE << 10) 7766 - MIN_LOW_SURROGATE); 7767 } 7768 7769 /** 7770 * Returns the code point at the given index of the 7771 * {@code CharSequence}. If the {@code char} value at 7772 * the given index in the {@code CharSequence} is in the 7773 * high-surrogate range, the following index is less than the 7774 * length of the {@code CharSequence}, and the 7775 * {@code char} value at the following index is in the 7776 * low-surrogate range, then the supplementary code point 7777 * corresponding to this surrogate pair is returned. Otherwise, 7778 * the {@code char} value at the given index is returned. 7779 * 7780 * @param seq a sequence of {@code char} values (Unicode code 7781 * units) 7782 * @param index the index to the {@code char} values (Unicode 7783 * code units) in {@code seq} to be converted 7784 * @return the Unicode code point at the given index 7785 * @exception NullPointerException if {@code seq} is null. 7786 * @exception IndexOutOfBoundsException if the value 7787 * {@code index} is negative or not less than 7788 * {@link CharSequence#length() seq.length()}. 7789 * @since 1.5 7790 */ 7791 public static int codePointAt(CharSequence seq, int index) { 7792 char c1 = seq.charAt(index); 7793 if (isHighSurrogate(c1) && ++index < seq.length()) { 7794 char c2 = seq.charAt(index); 7795 if (isLowSurrogate(c2)) { 7796 return toCodePoint(c1, c2); 7797 } 7798 } 7799 return c1; 7800 } 7801 7802 /** 7803 * Returns the code point at the given index of the 7804 * {@code char} array. If the {@code char} value at 7805 * the given index in the {@code char} array is in the 7806 * high-surrogate range, the following index is less than the 7807 * length of the {@code char} array, and the 7808 * {@code char} value at the following index is in the 7809 * low-surrogate range, then the supplementary code point 7810 * corresponding to this surrogate pair is returned. Otherwise, 7811 * the {@code char} value at the given index is returned. 7812 * 7813 * @param a the {@code char} array 7814 * @param index the index to the {@code char} values (Unicode 7815 * code units) in the {@code char} array to be converted 7816 * @return the Unicode code point at the given index 7817 * @exception NullPointerException if {@code a} is null. 7818 * @exception IndexOutOfBoundsException if the value 7819 * {@code index} is negative or not less than 7820 * the length of the {@code char} array. 7821 * @since 1.5 7822 */ 7823 public static int codePointAt(char[] a, int index) { 7824 return codePointAtImpl(a, index, a.length); 7825 } 7826 7827 /** 7828 * Returns the code point at the given index of the 7829 * {@code char} array, where only array elements with 7830 * {@code index} less than {@code limit} can be used. If 7831 * the {@code char} value at the given index in the 7832 * {@code char} array is in the high-surrogate range, the 7833 * following index is less than the {@code limit}, and the 7834 * {@code char} value at the following index is in the 7835 * low-surrogate range, then the supplementary code point 7836 * corresponding to this surrogate pair is returned. Otherwise, 7837 * the {@code char} value at the given index is returned. 7838 * 7839 * @param a the {@code char} array 7840 * @param index the index to the {@code char} values (Unicode 7841 * code units) in the {@code char} array to be converted 7842 * @param limit the index after the last array element that 7843 * can be used in the {@code char} array 7844 * @return the Unicode code point at the given index 7845 * @exception NullPointerException if {@code a} is null. 7846 * @exception IndexOutOfBoundsException if the {@code index} 7847 * argument is negative or not less than the {@code limit} 7848 * argument, or if the {@code limit} argument is negative or 7849 * greater than the length of the {@code char} array. 7850 * @since 1.5 7851 */ 7852 public static int codePointAt(char[] a, int index, int limit) { 7853 if (index >= limit || limit < 0 || limit > a.length) { 7854 throw new IndexOutOfBoundsException(); 7855 } 7856 return codePointAtImpl(a, index, limit); 7857 } 7858 7859 // throws ArrayIndexOutOfBoundsException if index out of bounds 7860 static int codePointAtImpl(char[] a, int index, int limit) { 7861 char c1 = a[index]; 7862 if (isHighSurrogate(c1) && ++index < limit) { 7863 char c2 = a[index]; 7864 if (isLowSurrogate(c2)) { 7865 return toCodePoint(c1, c2); 7866 } 7867 } 7868 return c1; 7869 } 7870 7871 /** 7872 * Returns the code point preceding the given index of the 7873 * {@code CharSequence}. If the {@code char} value at 7874 * {@code (index - 1)} in the {@code CharSequence} is in 7875 * the low-surrogate range, {@code (index - 2)} is not 7876 * negative, and the {@code char} value at {@code (index - 2)} 7877 * in the {@code CharSequence} is in the 7878 * high-surrogate range, then the supplementary code point 7879 * corresponding to this surrogate pair is returned. Otherwise, 7880 * the {@code char} value at {@code (index - 1)} is 7881 * returned. 7882 * 7883 * @param seq the {@code CharSequence} instance 7884 * @param index the index following the code point that should be returned 7885 * @return the Unicode code point value before the given index. 7886 * @exception NullPointerException if {@code seq} is null. 7887 * @exception IndexOutOfBoundsException if the {@code index} 7888 * argument is less than 1 or greater than {@link 7889 * CharSequence#length() seq.length()}. 7890 * @since 1.5 7891 */ 7892 public static int codePointBefore(CharSequence seq, int index) { 7893 char c2 = seq.charAt(--index); 7894 if (isLowSurrogate(c2) && index > 0) { 7895 char c1 = seq.charAt(--index); 7896 if (isHighSurrogate(c1)) { 7897 return toCodePoint(c1, c2); 7898 } 7899 } 7900 return c2; 7901 } 7902 7903 /** 7904 * Returns the code point preceding the given index of the 7905 * {@code char} array. If the {@code char} value at 7906 * {@code (index - 1)} in the {@code char} array is in 7907 * the low-surrogate range, {@code (index - 2)} is not 7908 * negative, and the {@code char} value at {@code (index - 2)} 7909 * in the {@code char} array is in the 7910 * high-surrogate range, then the supplementary code point 7911 * corresponding to this surrogate pair is returned. Otherwise, 7912 * the {@code char} value at {@code (index - 1)} is 7913 * returned. 7914 * 7915 * @param a the {@code char} array 7916 * @param index the index following the code point that should be returned 7917 * @return the Unicode code point value before the given index. 7918 * @exception NullPointerException if {@code a} is null. 7919 * @exception IndexOutOfBoundsException if the {@code index} 7920 * argument is less than 1 or greater than the length of the 7921 * {@code char} array 7922 * @since 1.5 7923 */ 7924 public static int codePointBefore(char[] a, int index) { 7925 return codePointBeforeImpl(a, index, 0); 7926 } 7927 7928 /** 7929 * Returns the code point preceding the given index of the 7930 * {@code char} array, where only array elements with 7931 * {@code index} greater than or equal to {@code start} 7932 * can be used. If the {@code char} value at {@code (index - 1)} 7933 * in the {@code char} array is in the 7934 * low-surrogate range, {@code (index - 2)} is not less than 7935 * {@code start}, and the {@code char} value at 7936 * {@code (index - 2)} in the {@code char} array is in 7937 * the high-surrogate range, then the supplementary code point 7938 * corresponding to this surrogate pair is returned. Otherwise, 7939 * the {@code char} value at {@code (index - 1)} is 7940 * returned. 7941 * 7942 * @param a the {@code char} array 7943 * @param index the index following the code point that should be returned 7944 * @param start the index of the first array element in the 7945 * {@code char} array 7946 * @return the Unicode code point value before the given index. 7947 * @exception NullPointerException if {@code a} is null. 7948 * @exception IndexOutOfBoundsException if the {@code index} 7949 * argument is not greater than the {@code start} argument or 7950 * is greater than the length of the {@code char} array, or 7951 * if the {@code start} argument is negative or not less than 7952 * the length of the {@code char} array. 7953 * @since 1.5 7954 */ 7955 public static int codePointBefore(char[] a, int index, int start) { 7956 if (index <= start || start < 0 || start >= a.length) { 7957 throw new IndexOutOfBoundsException(); 7958 } 7959 return codePointBeforeImpl(a, index, start); 7960 } 7961 7962 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 7963 static int codePointBeforeImpl(char[] a, int index, int start) { 7964 char c2 = a[--index]; 7965 if (isLowSurrogate(c2) && index > start) { 7966 char c1 = a[--index]; 7967 if (isHighSurrogate(c1)) { 7968 return toCodePoint(c1, c2); 7969 } 7970 } 7971 return c2; 7972 } 7973 7974 /** 7975 * Returns the leading surrogate (a 7976 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 7977 * high surrogate code unit</a>) of the 7978 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 7979 * surrogate pair</a> 7980 * representing the specified supplementary character (Unicode 7981 * code point) in the UTF-16 encoding. If the specified character 7982 * is not a 7983 * <a href="Character.html#supplementary">supplementary character</a>, 7984 * an unspecified {@code char} is returned. 7985 * 7986 * <p>If 7987 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 7988 * is {@code true}, then 7989 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 7990 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 7991 * are also always {@code true}. 7992 * 7993 * @param codePoint a supplementary character (Unicode code point) 7994 * @return the leading surrogate code unit used to represent the 7995 * character in the UTF-16 encoding 7996 * @since 1.7 7997 */ 7998 public static char highSurrogate(int codePoint) { 7999 return (char) ((codePoint >>> 10) 8000 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 8001 } 8002 8003 /** 8004 * Returns the trailing surrogate (a 8005 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8006 * low surrogate code unit</a>) of the 8007 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8008 * surrogate pair</a> 8009 * representing the specified supplementary character (Unicode 8010 * code point) in the UTF-16 encoding. If the specified character 8011 * is not a 8012 * <a href="Character.html#supplementary">supplementary character</a>, 8013 * an unspecified {@code char} is returned. 8014 * 8015 * <p>If 8016 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 8017 * is {@code true}, then 8018 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 8019 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 8020 * are also always {@code true}. 8021 * 8022 * @param codePoint a supplementary character (Unicode code point) 8023 * @return the trailing surrogate code unit used to represent the 8024 * character in the UTF-16 encoding 8025 * @since 1.7 8026 */ 8027 public static char lowSurrogate(int codePoint) { 8028 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 8029 } 8030 8031 /** 8032 * Converts the specified character (Unicode code point) to its 8033 * UTF-16 representation. If the specified code point is a BMP 8034 * (Basic Multilingual Plane or Plane 0) value, the same value is 8035 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 8036 * specified code point is a supplementary character, its 8037 * surrogate values are stored in {@code dst[dstIndex]} 8038 * (high-surrogate) and {@code dst[dstIndex+1]} 8039 * (low-surrogate), and 2 is returned. 8040 * 8041 * @param codePoint the character (Unicode code point) to be converted. 8042 * @param dst an array of {@code char} in which the 8043 * {@code codePoint}'s UTF-16 value is stored. 8044 * @param dstIndex the start index into the {@code dst} 8045 * array where the converted value is stored. 8046 * @return 1 if the code point is a BMP code point, 2 if the 8047 * code point is a supplementary code point. 8048 * @exception IllegalArgumentException if the specified 8049 * {@code codePoint} is not a valid Unicode code point. 8050 * @exception NullPointerException if the specified {@code dst} is null. 8051 * @exception IndexOutOfBoundsException if {@code dstIndex} 8052 * is negative or not less than {@code dst.length}, or if 8053 * {@code dst} at {@code dstIndex} doesn't have enough 8054 * array element(s) to store the resulting {@code char} 8055 * value(s). (If {@code dstIndex} is equal to 8056 * {@code dst.length-1} and the specified 8057 * {@code codePoint} is a supplementary character, the 8058 * high-surrogate value is not stored in 8059 * {@code dst[dstIndex]}.) 8060 * @since 1.5 8061 */ 8062 public static int toChars(int codePoint, char[] dst, int dstIndex) { 8063 if (isBmpCodePoint(codePoint)) { 8064 dst[dstIndex] = (char) codePoint; 8065 return 1; 8066 } else if (isValidCodePoint(codePoint)) { 8067 toSurrogates(codePoint, dst, dstIndex); 8068 return 2; 8069 } else { 8070 throw new IllegalArgumentException(); 8071 } 8072 } 8073 8074 /** 8075 * Converts the specified character (Unicode code point) to its 8076 * UTF-16 representation stored in a {@code char} array. If 8077 * the specified code point is a BMP (Basic Multilingual Plane or 8078 * Plane 0) value, the resulting {@code char} array has 8079 * the same value as {@code codePoint}. If the specified code 8080 * point is a supplementary code point, the resulting 8081 * {@code char} array has the corresponding surrogate pair. 8082 * 8083 * @param codePoint a Unicode code point 8084 * @return a {@code char} array having 8085 * {@code codePoint}'s UTF-16 representation. 8086 * @exception IllegalArgumentException if the specified 8087 * {@code codePoint} is not a valid Unicode code point. 8088 * @since 1.5 8089 */ 8090 public static char[] toChars(int codePoint) { 8091 if (isBmpCodePoint(codePoint)) { 8092 return new char[] { (char) codePoint }; 8093 } else if (isValidCodePoint(codePoint)) { 8094 char[] result = new char[2]; 8095 toSurrogates(codePoint, result, 0); 8096 return result; 8097 } else { 8098 throw new IllegalArgumentException(); 8099 } 8100 } 8101 8102 static void toSurrogates(int codePoint, char[] dst, int index) { 8103 // We write elements "backwards" to guarantee all-or-nothing 8104 dst[index+1] = lowSurrogate(codePoint); 8105 dst[index] = highSurrogate(codePoint); 8106 } 8107 8108 /** 8109 * Returns the number of Unicode code points in the text range of 8110 * the specified char sequence. The text range begins at the 8111 * specified {@code beginIndex} and extends to the 8112 * {@code char} at index {@code endIndex - 1}. Thus the 8113 * length (in {@code char}s) of the text range is 8114 * {@code endIndex-beginIndex}. Unpaired surrogates within 8115 * the text range count as one code point each. 8116 * 8117 * @param seq the char sequence 8118 * @param beginIndex the index to the first {@code char} of 8119 * the text range. 8120 * @param endIndex the index after the last {@code char} of 8121 * the text range. 8122 * @return the number of Unicode code points in the specified text 8123 * range 8124 * @exception NullPointerException if {@code seq} is null. 8125 * @exception IndexOutOfBoundsException if the 8126 * {@code beginIndex} is negative, or {@code endIndex} 8127 * is larger than the length of the given sequence, or 8128 * {@code beginIndex} is larger than {@code endIndex}. 8129 * @since 1.5 8130 */ 8131 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 8132 int length = seq.length(); 8133 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 8134 throw new IndexOutOfBoundsException(); 8135 } 8136 int n = endIndex - beginIndex; 8137 for (int i = beginIndex; i < endIndex; ) { 8138 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 8139 isLowSurrogate(seq.charAt(i))) { 8140 n--; 8141 i++; 8142 } 8143 } 8144 return n; 8145 } 8146 8147 /** 8148 * Returns the number of Unicode code points in a subarray of the 8149 * {@code char} array argument. The {@code offset} 8150 * argument is the index of the first {@code char} of the 8151 * subarray and the {@code count} argument specifies the 8152 * length of the subarray in {@code char}s. Unpaired 8153 * surrogates within the subarray count as one code point each. 8154 * 8155 * @param a the {@code char} array 8156 * @param offset the index of the first {@code char} in the 8157 * given {@code char} array 8158 * @param count the length of the subarray in {@code char}s 8159 * @return the number of Unicode code points in the specified subarray 8160 * @exception NullPointerException if {@code a} is null. 8161 * @exception IndexOutOfBoundsException if {@code offset} or 8162 * {@code count} is negative, or if {@code offset + 8163 * count} is larger than the length of the given array. 8164 * @since 1.5 8165 */ 8166 public static int codePointCount(char[] a, int offset, int count) { 8167 if (count > a.length - offset || offset < 0 || count < 0) { 8168 throw new IndexOutOfBoundsException(); 8169 } 8170 return codePointCountImpl(a, offset, count); 8171 } 8172 8173 static int codePointCountImpl(char[] a, int offset, int count) { 8174 int endIndex = offset + count; 8175 int n = count; 8176 for (int i = offset; i < endIndex; ) { 8177 if (isHighSurrogate(a[i++]) && i < endIndex && 8178 isLowSurrogate(a[i])) { 8179 n--; 8180 i++; 8181 } 8182 } 8183 return n; 8184 } 8185 8186 /** 8187 * Returns the index within the given char sequence that is offset 8188 * from the given {@code index} by {@code codePointOffset} 8189 * code points. Unpaired surrogates within the text range given by 8190 * {@code index} and {@code codePointOffset} count as 8191 * one code point each. 8192 * 8193 * @param seq the char sequence 8194 * @param index the index to be offset 8195 * @param codePointOffset the offset in code points 8196 * @return the index within the char sequence 8197 * @exception NullPointerException if {@code seq} is null. 8198 * @exception IndexOutOfBoundsException if {@code index} 8199 * is negative or larger then the length of the char sequence, 8200 * or if {@code codePointOffset} is positive and the 8201 * subsequence starting with {@code index} has fewer than 8202 * {@code codePointOffset} code points, or if 8203 * {@code codePointOffset} is negative and the subsequence 8204 * before {@code index} has fewer than the absolute value 8205 * of {@code codePointOffset} code points. 8206 * @since 1.5 8207 */ 8208 public static int offsetByCodePoints(CharSequence seq, int index, 8209 int codePointOffset) { 8210 int length = seq.length(); 8211 if (index < 0 || index > length) { 8212 throw new IndexOutOfBoundsException(); 8213 } 8214 8215 int x = index; 8216 if (codePointOffset >= 0) { 8217 int i; 8218 for (i = 0; x < length && i < codePointOffset; i++) { 8219 if (isHighSurrogate(seq.charAt(x++)) && x < length && 8220 isLowSurrogate(seq.charAt(x))) { 8221 x++; 8222 } 8223 } 8224 if (i < codePointOffset) { 8225 throw new IndexOutOfBoundsException(); 8226 } 8227 } else { 8228 int i; 8229 for (i = codePointOffset; x > 0 && i < 0; i++) { 8230 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 8231 isHighSurrogate(seq.charAt(x-1))) { 8232 x--; 8233 } 8234 } 8235 if (i < 0) { 8236 throw new IndexOutOfBoundsException(); 8237 } 8238 } 8239 return x; 8240 } 8241 8242 /** 8243 * Returns the index within the given {@code char} subarray 8244 * that is offset from the given {@code index} by 8245 * {@code codePointOffset} code points. The 8246 * {@code start} and {@code count} arguments specify a 8247 * subarray of the {@code char} array. Unpaired surrogates 8248 * within the text range given by {@code index} and 8249 * {@code codePointOffset} count as one code point each. 8250 * 8251 * @param a the {@code char} array 8252 * @param start the index of the first {@code char} of the 8253 * subarray 8254 * @param count the length of the subarray in {@code char}s 8255 * @param index the index to be offset 8256 * @param codePointOffset the offset in code points 8257 * @return the index within the subarray 8258 * @exception NullPointerException if {@code a} is null. 8259 * @exception IndexOutOfBoundsException 8260 * if {@code start} or {@code count} is negative, 8261 * or if {@code start + count} is larger than the length of 8262 * the given array, 8263 * or if {@code index} is less than {@code start} or 8264 * larger then {@code start + count}, 8265 * or if {@code codePointOffset} is positive and the text range 8266 * starting with {@code index} and ending with {@code start + count - 1} 8267 * has fewer than {@code codePointOffset} code 8268 * points, 8269 * or if {@code codePointOffset} is negative and the text range 8270 * starting with {@code start} and ending with {@code index - 1} 8271 * has fewer than the absolute value of 8272 * {@code codePointOffset} code points. 8273 * @since 1.5 8274 */ 8275 public static int offsetByCodePoints(char[] a, int start, int count, 8276 int index, int codePointOffset) { 8277 if (count > a.length-start || start < 0 || count < 0 8278 || index < start || index > start+count) { 8279 throw new IndexOutOfBoundsException(); 8280 } 8281 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 8282 } 8283 8284 static int offsetByCodePointsImpl(char[]a, int start, int count, 8285 int index, int codePointOffset) { 8286 int x = index; 8287 if (codePointOffset >= 0) { 8288 int limit = start + count; 8289 int i; 8290 for (i = 0; x < limit && i < codePointOffset; i++) { 8291 if (isHighSurrogate(a[x++]) && x < limit && 8292 isLowSurrogate(a[x])) { 8293 x++; 8294 } 8295 } 8296 if (i < codePointOffset) { 8297 throw new IndexOutOfBoundsException(); 8298 } 8299 } else { 8300 int i; 8301 for (i = codePointOffset; x > start && i < 0; i++) { 8302 if (isLowSurrogate(a[--x]) && x > start && 8303 isHighSurrogate(a[x-1])) { 8304 x--; 8305 } 8306 } 8307 if (i < 0) { 8308 throw new IndexOutOfBoundsException(); 8309 } 8310 } 8311 return x; 8312 } 8313 8314 /** 8315 * Determines if the specified character is a lowercase character. 8316 * <p> 8317 * A character is lowercase if its general category type, provided 8318 * by {@code Character.getType(ch)}, is 8319 * {@code LOWERCASE_LETTER}, or it has contributory property 8320 * Other_Lowercase as defined by the Unicode Standard. 8321 * <p> 8322 * The following are examples of lowercase characters: 8323 * <blockquote><pre> 8324 * a b c d e f g h i j k l m n o p q r s t u v w x y z 8325 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 8326 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 8327 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 8328 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 8329 * </pre></blockquote> 8330 * <p> Many other Unicode characters are lowercase too. 8331 * 8332 * <p><b>Note:</b> This method cannot handle <a 8333 * href="#supplementary"> supplementary characters</a>. To support 8334 * all Unicode characters, including supplementary characters, use 8335 * the {@link #isLowerCase(int)} method. 8336 * 8337 * @param ch the character to be tested. 8338 * @return {@code true} if the character is lowercase; 8339 * {@code false} otherwise. 8340 * @see Character#isLowerCase(char) 8341 * @see Character#isTitleCase(char) 8342 * @see Character#toLowerCase(char) 8343 * @see Character#getType(char) 8344 */ 8345 public static boolean isLowerCase(char ch) { 8346 return isLowerCase((int)ch); 8347 } 8348 8349 /** 8350 * Determines if the specified character (Unicode code point) is a 8351 * lowercase character. 8352 * <p> 8353 * A character is lowercase if its general category type, provided 8354 * by {@link Character#getType getType(codePoint)}, is 8355 * {@code LOWERCASE_LETTER}, or it has contributory property 8356 * Other_Lowercase as defined by the Unicode Standard. 8357 * <p> 8358 * The following are examples of lowercase characters: 8359 * <blockquote><pre> 8360 * a b c d e f g h i j k l m n o p q r s t u v w x y z 8361 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 8362 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 8363 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 8364 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 8365 * </pre></blockquote> 8366 * <p> Many other Unicode characters are lowercase too. 8367 * 8368 * @param codePoint the character (Unicode code point) to be tested. 8369 * @return {@code true} if the character is lowercase; 8370 * {@code false} otherwise. 8371 * @see Character#isLowerCase(int) 8372 * @see Character#isTitleCase(int) 8373 * @see Character#toLowerCase(int) 8374 * @see Character#getType(int) 8375 * @since 1.5 8376 */ 8377 public static boolean isLowerCase(int codePoint) { 8378 return getType(codePoint) == Character.LOWERCASE_LETTER || 8379 CharacterData.of(codePoint).isOtherLowercase(codePoint); 8380 } 8381 8382 /** 8383 * Determines if the specified character is an uppercase character. 8384 * <p> 8385 * A character is uppercase if its general category type, provided by 8386 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 8387 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 8388 * <p> 8389 * The following are examples of uppercase characters: 8390 * <blockquote><pre> 8391 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 8392 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 8393 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 8394 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 8395 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 8396 * </pre></blockquote> 8397 * <p> Many other Unicode characters are uppercase too. 8398 * 8399 * <p><b>Note:</b> This method cannot handle <a 8400 * href="#supplementary"> supplementary characters</a>. To support 8401 * all Unicode characters, including supplementary characters, use 8402 * the {@link #isUpperCase(int)} method. 8403 * 8404 * @param ch the character to be tested. 8405 * @return {@code true} if the character is uppercase; 8406 * {@code false} otherwise. 8407 * @see Character#isLowerCase(char) 8408 * @see Character#isTitleCase(char) 8409 * @see Character#toUpperCase(char) 8410 * @see Character#getType(char) 8411 * @since 1.0 8412 */ 8413 public static boolean isUpperCase(char ch) { 8414 return isUpperCase((int)ch); 8415 } 8416 8417 /** 8418 * Determines if the specified character (Unicode code point) is an uppercase character. 8419 * <p> 8420 * A character is uppercase if its general category type, provided by 8421 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 8422 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 8423 * <p> 8424 * The following are examples of uppercase characters: 8425 * <blockquote><pre> 8426 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 8427 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 8428 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 8429 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 8430 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 8431 * </pre></blockquote> 8432 * <p> Many other Unicode characters are uppercase too. 8433 * 8434 * @param codePoint the character (Unicode code point) to be tested. 8435 * @return {@code true} if the character is uppercase; 8436 * {@code false} otherwise. 8437 * @see Character#isLowerCase(int) 8438 * @see Character#isTitleCase(int) 8439 * @see Character#toUpperCase(int) 8440 * @see Character#getType(int) 8441 * @since 1.5 8442 */ 8443 public static boolean isUpperCase(int codePoint) { 8444 return getType(codePoint) == Character.UPPERCASE_LETTER || 8445 CharacterData.of(codePoint).isOtherUppercase(codePoint); 8446 } 8447 8448 /** 8449 * Determines if the specified character is a titlecase character. 8450 * <p> 8451 * A character is a titlecase character if its general 8452 * category type, provided by {@code Character.getType(ch)}, 8453 * is {@code TITLECASE_LETTER}. 8454 * <p> 8455 * Some characters look like pairs of Latin letters. For example, there 8456 * is an uppercase letter that looks like "LJ" and has a corresponding 8457 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 8458 * is the appropriate form to use when rendering a word in lowercase 8459 * with initial capitals, as for a book title. 8460 * <p> 8461 * These are some of the Unicode characters for which this method returns 8462 * {@code true}: 8463 * <ul> 8464 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 8465 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 8466 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 8467 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 8468 * </ul> 8469 * <p> Many other Unicode characters are titlecase too. 8470 * 8471 * <p><b>Note:</b> This method cannot handle <a 8472 * href="#supplementary"> supplementary characters</a>. To support 8473 * all Unicode characters, including supplementary characters, use 8474 * the {@link #isTitleCase(int)} method. 8475 * 8476 * @param ch the character to be tested. 8477 * @return {@code true} if the character is titlecase; 8478 * {@code false} otherwise. 8479 * @see Character#isLowerCase(char) 8480 * @see Character#isUpperCase(char) 8481 * @see Character#toTitleCase(char) 8482 * @see Character#getType(char) 8483 * @since 1.0.2 8484 */ 8485 public static boolean isTitleCase(char ch) { 8486 return isTitleCase((int)ch); 8487 } 8488 8489 /** 8490 * Determines if the specified character (Unicode code point) is a titlecase character. 8491 * <p> 8492 * A character is a titlecase character if its general 8493 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 8494 * is {@code TITLECASE_LETTER}. 8495 * <p> 8496 * Some characters look like pairs of Latin letters. For example, there 8497 * is an uppercase letter that looks like "LJ" and has a corresponding 8498 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 8499 * is the appropriate form to use when rendering a word in lowercase 8500 * with initial capitals, as for a book title. 8501 * <p> 8502 * These are some of the Unicode characters for which this method returns 8503 * {@code true}: 8504 * <ul> 8505 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 8506 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 8507 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 8508 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 8509 * </ul> 8510 * <p> Many other Unicode characters are titlecase too. 8511 * 8512 * @param codePoint the character (Unicode code point) to be tested. 8513 * @return {@code true} if the character is titlecase; 8514 * {@code false} otherwise. 8515 * @see Character#isLowerCase(int) 8516 * @see Character#isUpperCase(int) 8517 * @see Character#toTitleCase(int) 8518 * @see Character#getType(int) 8519 * @since 1.5 8520 */ 8521 public static boolean isTitleCase(int codePoint) { 8522 return getType(codePoint) == Character.TITLECASE_LETTER; 8523 } 8524 8525 /** 8526 * Determines if the specified character is a digit. 8527 * <p> 8528 * A character is a digit if its general category type, provided 8529 * by {@code Character.getType(ch)}, is 8530 * {@code DECIMAL_DIGIT_NUMBER}. 8531 * <p> 8532 * Some Unicode character ranges that contain digits: 8533 * <ul> 8534 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 8535 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 8536 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 8537 * Arabic-Indic digits 8538 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 8539 * Extended Arabic-Indic digits 8540 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 8541 * Devanagari digits 8542 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 8543 * Fullwidth digits 8544 * </ul> 8545 * 8546 * Many other character ranges contain digits as well. 8547 * 8548 * <p><b>Note:</b> This method cannot handle <a 8549 * href="#supplementary"> supplementary characters</a>. To support 8550 * all Unicode characters, including supplementary characters, use 8551 * the {@link #isDigit(int)} method. 8552 * 8553 * @param ch the character to be tested. 8554 * @return {@code true} if the character is a digit; 8555 * {@code false} otherwise. 8556 * @see Character#digit(char, int) 8557 * @see Character#forDigit(int, int) 8558 * @see Character#getType(char) 8559 */ 8560 public static boolean isDigit(char ch) { 8561 return isDigit((int)ch); 8562 } 8563 8564 /** 8565 * Determines if the specified character (Unicode code point) is a digit. 8566 * <p> 8567 * A character is a digit if its general category type, provided 8568 * by {@link Character#getType(int) getType(codePoint)}, is 8569 * {@code DECIMAL_DIGIT_NUMBER}. 8570 * <p> 8571 * Some Unicode character ranges that contain digits: 8572 * <ul> 8573 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 8574 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 8575 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 8576 * Arabic-Indic digits 8577 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 8578 * Extended Arabic-Indic digits 8579 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 8580 * Devanagari digits 8581 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 8582 * Fullwidth digits 8583 * </ul> 8584 * 8585 * Many other character ranges contain digits as well. 8586 * 8587 * @param codePoint the character (Unicode code point) to be tested. 8588 * @return {@code true} if the character is a digit; 8589 * {@code false} otherwise. 8590 * @see Character#forDigit(int, int) 8591 * @see Character#getType(int) 8592 * @since 1.5 8593 */ 8594 public static boolean isDigit(int codePoint) { 8595 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 8596 } 8597 8598 /** 8599 * Determines if a character is defined in Unicode. 8600 * <p> 8601 * A character is defined if at least one of the following is true: 8602 * <ul> 8603 * <li>It has an entry in the UnicodeData file. 8604 * <li>It has a value in a range defined by the UnicodeData file. 8605 * </ul> 8606 * 8607 * <p><b>Note:</b> This method cannot handle <a 8608 * href="#supplementary"> supplementary characters</a>. To support 8609 * all Unicode characters, including supplementary characters, use 8610 * the {@link #isDefined(int)} method. 8611 * 8612 * @param ch the character to be tested 8613 * @return {@code true} if the character has a defined meaning 8614 * in Unicode; {@code false} otherwise. 8615 * @see Character#isDigit(char) 8616 * @see Character#isLetter(char) 8617 * @see Character#isLetterOrDigit(char) 8618 * @see Character#isLowerCase(char) 8619 * @see Character#isTitleCase(char) 8620 * @see Character#isUpperCase(char) 8621 * @since 1.0.2 8622 */ 8623 public static boolean isDefined(char ch) { 8624 return isDefined((int)ch); 8625 } 8626 8627 /** 8628 * Determines if a character (Unicode code point) is defined in Unicode. 8629 * <p> 8630 * A character is defined if at least one of the following is true: 8631 * <ul> 8632 * <li>It has an entry in the UnicodeData file. 8633 * <li>It has a value in a range defined by the UnicodeData file. 8634 * </ul> 8635 * 8636 * @param codePoint the character (Unicode code point) to be tested. 8637 * @return {@code true} if the character has a defined meaning 8638 * in Unicode; {@code false} otherwise. 8639 * @see Character#isDigit(int) 8640 * @see Character#isLetter(int) 8641 * @see Character#isLetterOrDigit(int) 8642 * @see Character#isLowerCase(int) 8643 * @see Character#isTitleCase(int) 8644 * @see Character#isUpperCase(int) 8645 * @since 1.5 8646 */ 8647 public static boolean isDefined(int codePoint) { 8648 return getType(codePoint) != Character.UNASSIGNED; 8649 } 8650 8651 /** 8652 * Determines if the specified character is a letter. 8653 * <p> 8654 * A character is considered to be a letter if its general 8655 * category type, provided by {@code Character.getType(ch)}, 8656 * is any of the following: 8657 * <ul> 8658 * <li> {@code UPPERCASE_LETTER} 8659 * <li> {@code LOWERCASE_LETTER} 8660 * <li> {@code TITLECASE_LETTER} 8661 * <li> {@code MODIFIER_LETTER} 8662 * <li> {@code OTHER_LETTER} 8663 * </ul> 8664 * 8665 * Not all letters have case. Many characters are 8666 * letters but are neither uppercase nor lowercase nor titlecase. 8667 * 8668 * <p><b>Note:</b> This method cannot handle <a 8669 * href="#supplementary"> supplementary characters</a>. To support 8670 * all Unicode characters, including supplementary characters, use 8671 * the {@link #isLetter(int)} method. 8672 * 8673 * @param ch the character to be tested. 8674 * @return {@code true} if the character is a letter; 8675 * {@code false} otherwise. 8676 * @see Character#isDigit(char) 8677 * @see Character#isJavaIdentifierStart(char) 8678 * @see Character#isJavaLetter(char) 8679 * @see Character#isJavaLetterOrDigit(char) 8680 * @see Character#isLetterOrDigit(char) 8681 * @see Character#isLowerCase(char) 8682 * @see Character#isTitleCase(char) 8683 * @see Character#isUnicodeIdentifierStart(char) 8684 * @see Character#isUpperCase(char) 8685 */ 8686 public static boolean isLetter(char ch) { 8687 return isLetter((int)ch); 8688 } 8689 8690 /** 8691 * Determines if the specified character (Unicode code point) is a letter. 8692 * <p> 8693 * A character is considered to be a letter if its general 8694 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 8695 * is any of the following: 8696 * <ul> 8697 * <li> {@code UPPERCASE_LETTER} 8698 * <li> {@code LOWERCASE_LETTER} 8699 * <li> {@code TITLECASE_LETTER} 8700 * <li> {@code MODIFIER_LETTER} 8701 * <li> {@code OTHER_LETTER} 8702 * </ul> 8703 * 8704 * Not all letters have case. Many characters are 8705 * letters but are neither uppercase nor lowercase nor titlecase. 8706 * 8707 * @param codePoint the character (Unicode code point) to be tested. 8708 * @return {@code true} if the character is a letter; 8709 * {@code false} otherwise. 8710 * @see Character#isDigit(int) 8711 * @see Character#isJavaIdentifierStart(int) 8712 * @see Character#isLetterOrDigit(int) 8713 * @see Character#isLowerCase(int) 8714 * @see Character#isTitleCase(int) 8715 * @see Character#isUnicodeIdentifierStart(int) 8716 * @see Character#isUpperCase(int) 8717 * @since 1.5 8718 */ 8719 public static boolean isLetter(int codePoint) { 8720 return ((((1 << Character.UPPERCASE_LETTER) | 8721 (1 << Character.LOWERCASE_LETTER) | 8722 (1 << Character.TITLECASE_LETTER) | 8723 (1 << Character.MODIFIER_LETTER) | 8724 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 8725 != 0; 8726 } 8727 8728 /** 8729 * Determines if the specified character is a letter or digit. 8730 * <p> 8731 * A character is considered to be a letter or digit if either 8732 * {@code Character.isLetter(char ch)} or 8733 * {@code Character.isDigit(char ch)} returns 8734 * {@code true} for the character. 8735 * 8736 * <p><b>Note:</b> This method cannot handle <a 8737 * href="#supplementary"> supplementary characters</a>. To support 8738 * all Unicode characters, including supplementary characters, use 8739 * the {@link #isLetterOrDigit(int)} method. 8740 * 8741 * @param ch the character to be tested. 8742 * @return {@code true} if the character is a letter or digit; 8743 * {@code false} otherwise. 8744 * @see Character#isDigit(char) 8745 * @see Character#isJavaIdentifierPart(char) 8746 * @see Character#isJavaLetter(char) 8747 * @see Character#isJavaLetterOrDigit(char) 8748 * @see Character#isLetter(char) 8749 * @see Character#isUnicodeIdentifierPart(char) 8750 * @since 1.0.2 8751 */ 8752 public static boolean isLetterOrDigit(char ch) { 8753 return isLetterOrDigit((int)ch); 8754 } 8755 8756 /** 8757 * Determines if the specified character (Unicode code point) is a letter or digit. 8758 * <p> 8759 * A character is considered to be a letter or digit if either 8760 * {@link #isLetter(int) isLetter(codePoint)} or 8761 * {@link #isDigit(int) isDigit(codePoint)} returns 8762 * {@code true} for the character. 8763 * 8764 * @param codePoint the character (Unicode code point) to be tested. 8765 * @return {@code true} if the character is a letter or digit; 8766 * {@code false} otherwise. 8767 * @see Character#isDigit(int) 8768 * @see Character#isJavaIdentifierPart(int) 8769 * @see Character#isLetter(int) 8770 * @see Character#isUnicodeIdentifierPart(int) 8771 * @since 1.5 8772 */ 8773 public static boolean isLetterOrDigit(int codePoint) { 8774 return ((((1 << Character.UPPERCASE_LETTER) | 8775 (1 << Character.LOWERCASE_LETTER) | 8776 (1 << Character.TITLECASE_LETTER) | 8777 (1 << Character.MODIFIER_LETTER) | 8778 (1 << Character.OTHER_LETTER) | 8779 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 8780 != 0; 8781 } 8782 8783 /** 8784 * Determines if the specified character is permissible as the first 8785 * character in a Java identifier. 8786 * <p> 8787 * A character may start a Java identifier if and only if 8788 * one of the following is true: 8789 * <ul> 8790 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 8791 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 8792 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 8793 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 8794 * </ul> 8795 * 8796 * @param ch the character to be tested. 8797 * @return {@code true} if the character may start a Java 8798 * identifier; {@code false} otherwise. 8799 * @see Character#isJavaLetterOrDigit(char) 8800 * @see Character#isJavaIdentifierStart(char) 8801 * @see Character#isJavaIdentifierPart(char) 8802 * @see Character#isLetter(char) 8803 * @see Character#isLetterOrDigit(char) 8804 * @see Character#isUnicodeIdentifierStart(char) 8805 * @since 1.0.2 8806 * @deprecated Replaced by isJavaIdentifierStart(char). 8807 */ 8808 @Deprecated(since="1.1") 8809 public static boolean isJavaLetter(char ch) { 8810 return isJavaIdentifierStart(ch); 8811 } 8812 8813 /** 8814 * Determines if the specified character may be part of a Java 8815 * identifier as other than the first character. 8816 * <p> 8817 * A character may be part of a Java identifier if and only if any 8818 * of the following are true: 8819 * <ul> 8820 * <li> it is a letter 8821 * <li> it is a currency symbol (such as {@code '$'}) 8822 * <li> it is a connecting punctuation character (such as {@code '_'}) 8823 * <li> it is a digit 8824 * <li> it is a numeric letter (such as a Roman numeral character) 8825 * <li> it is a combining mark 8826 * <li> it is a non-spacing mark 8827 * <li> {@code isIdentifierIgnorable} returns 8828 * {@code true} for the character. 8829 * </ul> 8830 * 8831 * @param ch the character to be tested. 8832 * @return {@code true} if the character may be part of a 8833 * Java identifier; {@code false} otherwise. 8834 * @see Character#isJavaLetter(char) 8835 * @see Character#isJavaIdentifierStart(char) 8836 * @see Character#isJavaIdentifierPart(char) 8837 * @see Character#isLetter(char) 8838 * @see Character#isLetterOrDigit(char) 8839 * @see Character#isUnicodeIdentifierPart(char) 8840 * @see Character#isIdentifierIgnorable(char) 8841 * @since 1.0.2 8842 * @deprecated Replaced by isJavaIdentifierPart(char). 8843 */ 8844 @Deprecated(since="1.1") 8845 public static boolean isJavaLetterOrDigit(char ch) { 8846 return isJavaIdentifierPart(ch); 8847 } 8848 8849 /** 8850 * Determines if the specified character (Unicode code point) is an alphabet. 8851 * <p> 8852 * A character is considered to be alphabetic if its general category type, 8853 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 8854 * the following: 8855 * <ul> 8856 * <li> <code>UPPERCASE_LETTER</code> 8857 * <li> <code>LOWERCASE_LETTER</code> 8858 * <li> <code>TITLECASE_LETTER</code> 8859 * <li> <code>MODIFIER_LETTER</code> 8860 * <li> <code>OTHER_LETTER</code> 8861 * <li> <code>LETTER_NUMBER</code> 8862 * </ul> 8863 * or it has contributory property Other_Alphabetic as defined by the 8864 * Unicode Standard. 8865 * 8866 * @param codePoint the character (Unicode code point) to be tested. 8867 * @return <code>true</code> if the character is a Unicode alphabet 8868 * character, <code>false</code> otherwise. 8869 * @since 1.7 8870 */ 8871 public static boolean isAlphabetic(int codePoint) { 8872 return (((((1 << Character.UPPERCASE_LETTER) | 8873 (1 << Character.LOWERCASE_LETTER) | 8874 (1 << Character.TITLECASE_LETTER) | 8875 (1 << Character.MODIFIER_LETTER) | 8876 (1 << Character.OTHER_LETTER) | 8877 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 8878 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 8879 } 8880 8881 /** 8882 * Determines if the specified character (Unicode code point) is a CJKV 8883 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 8884 * the Unicode Standard. 8885 * 8886 * @param codePoint the character (Unicode code point) to be tested. 8887 * @return <code>true</code> if the character is a Unicode ideograph 8888 * character, <code>false</code> otherwise. 8889 * @since 1.7 8890 */ 8891 public static boolean isIdeographic(int codePoint) { 8892 return CharacterData.of(codePoint).isIdeographic(codePoint); 8893 } 8894 8895 /** 8896 * Determines if the specified character is 8897 * permissible as the first character in a Java identifier. 8898 * <p> 8899 * A character may start a Java identifier if and only if 8900 * one of the following conditions is true: 8901 * <ul> 8902 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 8903 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 8904 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 8905 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 8906 * </ul> 8907 * 8908 * <p><b>Note:</b> This method cannot handle <a 8909 * href="#supplementary"> supplementary characters</a>. To support 8910 * all Unicode characters, including supplementary characters, use 8911 * the {@link #isJavaIdentifierStart(int)} method. 8912 * 8913 * @param ch the character to be tested. 8914 * @return {@code true} if the character may start a Java identifier; 8915 * {@code false} otherwise. 8916 * @see Character#isJavaIdentifierPart(char) 8917 * @see Character#isLetter(char) 8918 * @see Character#isUnicodeIdentifierStart(char) 8919 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8920 * @since 1.1 8921 */ 8922 public static boolean isJavaIdentifierStart(char ch) { 8923 return isJavaIdentifierStart((int)ch); 8924 } 8925 8926 /** 8927 * Determines if the character (Unicode code point) is 8928 * permissible as the first character in a Java identifier. 8929 * <p> 8930 * A character may start a Java identifier if and only if 8931 * one of the following conditions is true: 8932 * <ul> 8933 * <li> {@link #isLetter(int) isLetter(codePoint)} 8934 * returns {@code true} 8935 * <li> {@link #getType(int) getType(codePoint)} 8936 * returns {@code LETTER_NUMBER} 8937 * <li> the referenced character is a currency symbol (such as {@code '$'}) 8938 * <li> the referenced character is a connecting punctuation character 8939 * (such as {@code '_'}). 8940 * </ul> 8941 * 8942 * @param codePoint the character (Unicode code point) to be tested. 8943 * @return {@code true} if the character may start a Java identifier; 8944 * {@code false} otherwise. 8945 * @see Character#isJavaIdentifierPart(int) 8946 * @see Character#isLetter(int) 8947 * @see Character#isUnicodeIdentifierStart(int) 8948 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8949 * @since 1.5 8950 */ 8951 public static boolean isJavaIdentifierStart(int codePoint) { 8952 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 8953 } 8954 8955 /** 8956 * Determines if the specified character may be part of a Java 8957 * identifier as other than the first character. 8958 * <p> 8959 * A character may be part of a Java identifier if any of the following 8960 * are true: 8961 * <ul> 8962 * <li> it is a letter 8963 * <li> it is a currency symbol (such as {@code '$'}) 8964 * <li> it is a connecting punctuation character (such as {@code '_'}) 8965 * <li> it is a digit 8966 * <li> it is a numeric letter (such as a Roman numeral character) 8967 * <li> it is a combining mark 8968 * <li> it is a non-spacing mark 8969 * <li> {@code isIdentifierIgnorable} returns 8970 * {@code true} for the character 8971 * </ul> 8972 * 8973 * <p><b>Note:</b> This method cannot handle <a 8974 * href="#supplementary"> supplementary characters</a>. To support 8975 * all Unicode characters, including supplementary characters, use 8976 * the {@link #isJavaIdentifierPart(int)} method. 8977 * 8978 * @param ch the character to be tested. 8979 * @return {@code true} if the character may be part of a 8980 * Java identifier; {@code false} otherwise. 8981 * @see Character#isIdentifierIgnorable(char) 8982 * @see Character#isJavaIdentifierStart(char) 8983 * @see Character#isLetterOrDigit(char) 8984 * @see Character#isUnicodeIdentifierPart(char) 8985 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8986 * @since 1.1 8987 */ 8988 public static boolean isJavaIdentifierPart(char ch) { 8989 return isJavaIdentifierPart((int)ch); 8990 } 8991 8992 /** 8993 * Determines if the character (Unicode code point) may be part of a Java 8994 * identifier as other than the first character. 8995 * <p> 8996 * A character may be part of a Java identifier if any of the following 8997 * are true: 8998 * <ul> 8999 * <li> it is a letter 9000 * <li> it is a currency symbol (such as {@code '$'}) 9001 * <li> it is a connecting punctuation character (such as {@code '_'}) 9002 * <li> it is a digit 9003 * <li> it is a numeric letter (such as a Roman numeral character) 9004 * <li> it is a combining mark 9005 * <li> it is a non-spacing mark 9006 * <li> {@link #isIdentifierIgnorable(int) 9007 * isIdentifierIgnorable(codePoint)} returns {@code true} for 9008 * the character 9009 * </ul> 9010 * 9011 * @param codePoint the character (Unicode code point) to be tested. 9012 * @return {@code true} if the character may be part of a 9013 * Java identifier; {@code false} otherwise. 9014 * @see Character#isIdentifierIgnorable(int) 9015 * @see Character#isJavaIdentifierStart(int) 9016 * @see Character#isLetterOrDigit(int) 9017 * @see Character#isUnicodeIdentifierPart(int) 9018 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 9019 * @since 1.5 9020 */ 9021 public static boolean isJavaIdentifierPart(int codePoint) { 9022 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 9023 } 9024 9025 /** 9026 * Determines if the specified character is permissible as the 9027 * first character in a Unicode identifier. 9028 * <p> 9029 * A character may start a Unicode identifier if and only if 9030 * one of the following conditions is true: 9031 * <ul> 9032 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 9033 * <li> {@link #getType(char) getType(ch)} returns 9034 * {@code LETTER_NUMBER}. 9035 * </ul> 9036 * 9037 * <p><b>Note:</b> This method cannot handle <a 9038 * href="#supplementary"> supplementary characters</a>. To support 9039 * all Unicode characters, including supplementary characters, use 9040 * the {@link #isUnicodeIdentifierStart(int)} method. 9041 * 9042 * @param ch the character to be tested. 9043 * @return {@code true} if the character may start a Unicode 9044 * identifier; {@code false} otherwise. 9045 * @see Character#isJavaIdentifierStart(char) 9046 * @see Character#isLetter(char) 9047 * @see Character#isUnicodeIdentifierPart(char) 9048 * @since 1.1 9049 */ 9050 public static boolean isUnicodeIdentifierStart(char ch) { 9051 return isUnicodeIdentifierStart((int)ch); 9052 } 9053 9054 /** 9055 * Determines if the specified character (Unicode code point) is permissible as the 9056 * first character in a Unicode identifier. 9057 * <p> 9058 * A character may start a Unicode identifier if and only if 9059 * one of the following conditions is true: 9060 * <ul> 9061 * <li> {@link #isLetter(int) isLetter(codePoint)} 9062 * returns {@code true} 9063 * <li> {@link #getType(int) getType(codePoint)} 9064 * returns {@code LETTER_NUMBER}. 9065 * </ul> 9066 * @param codePoint the character (Unicode code point) to be tested. 9067 * @return {@code true} if the character may start a Unicode 9068 * identifier; {@code false} otherwise. 9069 * @see Character#isJavaIdentifierStart(int) 9070 * @see Character#isLetter(int) 9071 * @see Character#isUnicodeIdentifierPart(int) 9072 * @since 1.5 9073 */ 9074 public static boolean isUnicodeIdentifierStart(int codePoint) { 9075 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 9076 } 9077 9078 /** 9079 * Determines if the specified character may be part of a Unicode 9080 * identifier as other than the first character. 9081 * <p> 9082 * A character may be part of a Unicode identifier if and only if 9083 * one of the following statements is true: 9084 * <ul> 9085 * <li> it is a letter 9086 * <li> it is a connecting punctuation character (such as {@code '_'}) 9087 * <li> it is a digit 9088 * <li> it is a numeric letter (such as a Roman numeral character) 9089 * <li> it is a combining mark 9090 * <li> it is a non-spacing mark 9091 * <li> {@code isIdentifierIgnorable} returns 9092 * {@code true} for this character. 9093 * </ul> 9094 * 9095 * <p><b>Note:</b> This method cannot handle <a 9096 * href="#supplementary"> supplementary characters</a>. To support 9097 * all Unicode characters, including supplementary characters, use 9098 * the {@link #isUnicodeIdentifierPart(int)} method. 9099 * 9100 * @param ch the character to be tested. 9101 * @return {@code true} if the character may be part of a 9102 * Unicode identifier; {@code false} otherwise. 9103 * @see Character#isIdentifierIgnorable(char) 9104 * @see Character#isJavaIdentifierPart(char) 9105 * @see Character#isLetterOrDigit(char) 9106 * @see Character#isUnicodeIdentifierStart(char) 9107 * @since 1.1 9108 */ 9109 public static boolean isUnicodeIdentifierPart(char ch) { 9110 return isUnicodeIdentifierPart((int)ch); 9111 } 9112 9113 /** 9114 * Determines if the specified character (Unicode code point) may be part of a Unicode 9115 * identifier as other than the first character. 9116 * <p> 9117 * A character may be part of a Unicode identifier if and only if 9118 * one of the following statements is true: 9119 * <ul> 9120 * <li> it is a letter 9121 * <li> it is a connecting punctuation character (such as {@code '_'}) 9122 * <li> it is a digit 9123 * <li> it is a numeric letter (such as a Roman numeral character) 9124 * <li> it is a combining mark 9125 * <li> it is a non-spacing mark 9126 * <li> {@code isIdentifierIgnorable} returns 9127 * {@code true} for this character. 9128 * </ul> 9129 * @param codePoint the character (Unicode code point) to be tested. 9130 * @return {@code true} if the character may be part of a 9131 * Unicode identifier; {@code false} otherwise. 9132 * @see Character#isIdentifierIgnorable(int) 9133 * @see Character#isJavaIdentifierPart(int) 9134 * @see Character#isLetterOrDigit(int) 9135 * @see Character#isUnicodeIdentifierStart(int) 9136 * @since 1.5 9137 */ 9138 public static boolean isUnicodeIdentifierPart(int codePoint) { 9139 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 9140 } 9141 9142 /** 9143 * Determines if the specified character should be regarded as 9144 * an ignorable character in a Java identifier or a Unicode identifier. 9145 * <p> 9146 * The following Unicode characters are ignorable in a Java identifier 9147 * or a Unicode identifier: 9148 * <ul> 9149 * <li>ISO control characters that are not whitespace 9150 * <ul> 9151 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 9152 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 9153 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 9154 * </ul> 9155 * 9156 * <li>all characters that have the {@code FORMAT} general 9157 * category value 9158 * </ul> 9159 * 9160 * <p><b>Note:</b> This method cannot handle <a 9161 * href="#supplementary"> supplementary characters</a>. To support 9162 * all Unicode characters, including supplementary characters, use 9163 * the {@link #isIdentifierIgnorable(int)} method. 9164 * 9165 * @param ch the character to be tested. 9166 * @return {@code true} if the character is an ignorable control 9167 * character that may be part of a Java or Unicode identifier; 9168 * {@code false} otherwise. 9169 * @see Character#isJavaIdentifierPart(char) 9170 * @see Character#isUnicodeIdentifierPart(char) 9171 * @since 1.1 9172 */ 9173 public static boolean isIdentifierIgnorable(char ch) { 9174 return isIdentifierIgnorable((int)ch); 9175 } 9176 9177 /** 9178 * Determines if the specified character (Unicode code point) should be regarded as 9179 * an ignorable character in a Java identifier or a Unicode identifier. 9180 * <p> 9181 * The following Unicode characters are ignorable in a Java identifier 9182 * or a Unicode identifier: 9183 * <ul> 9184 * <li>ISO control characters that are not whitespace 9185 * <ul> 9186 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 9187 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 9188 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 9189 * </ul> 9190 * 9191 * <li>all characters that have the {@code FORMAT} general 9192 * category value 9193 * </ul> 9194 * 9195 * @param codePoint the character (Unicode code point) to be tested. 9196 * @return {@code true} if the character is an ignorable control 9197 * character that may be part of a Java or Unicode identifier; 9198 * {@code false} otherwise. 9199 * @see Character#isJavaIdentifierPart(int) 9200 * @see Character#isUnicodeIdentifierPart(int) 9201 * @since 1.5 9202 */ 9203 public static boolean isIdentifierIgnorable(int codePoint) { 9204 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 9205 } 9206 9207 /** 9208 * Converts the character argument to lowercase using case 9209 * mapping information from the UnicodeData file. 9210 * <p> 9211 * Note that 9212 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 9213 * does not always return {@code true} for some ranges of 9214 * characters, particularly those that are symbols or ideographs. 9215 * 9216 * <p>In general, {@link String#toLowerCase()} should be used to map 9217 * characters to lowercase. {@code String} case mapping methods 9218 * have several benefits over {@code Character} case mapping methods. 9219 * {@code String} case mapping methods can perform locale-sensitive 9220 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9221 * the {@code Character} case mapping methods cannot. 9222 * 9223 * <p><b>Note:</b> This method cannot handle <a 9224 * href="#supplementary"> supplementary characters</a>. To support 9225 * all Unicode characters, including supplementary characters, use 9226 * the {@link #toLowerCase(int)} method. 9227 * 9228 * @param ch the character to be converted. 9229 * @return the lowercase equivalent of the character, if any; 9230 * otherwise, the character itself. 9231 * @see Character#isLowerCase(char) 9232 * @see String#toLowerCase() 9233 */ 9234 public static char toLowerCase(char ch) { 9235 return (char)toLowerCase((int)ch); 9236 } 9237 9238 /** 9239 * Converts the character (Unicode code point) argument to 9240 * lowercase using case mapping information from the UnicodeData 9241 * file. 9242 * 9243 * <p> Note that 9244 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 9245 * does not always return {@code true} for some ranges of 9246 * characters, particularly those that are symbols or ideographs. 9247 * 9248 * <p>In general, {@link String#toLowerCase()} should be used to map 9249 * characters to lowercase. {@code String} case mapping methods 9250 * have several benefits over {@code Character} case mapping methods. 9251 * {@code String} case mapping methods can perform locale-sensitive 9252 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9253 * the {@code Character} case mapping methods cannot. 9254 * 9255 * @param codePoint the character (Unicode code point) to be converted. 9256 * @return the lowercase equivalent of the character (Unicode code 9257 * point), if any; otherwise, the character itself. 9258 * @see Character#isLowerCase(int) 9259 * @see String#toLowerCase() 9260 * 9261 * @since 1.5 9262 */ 9263 public static int toLowerCase(int codePoint) { 9264 return CharacterData.of(codePoint).toLowerCase(codePoint); 9265 } 9266 9267 /** 9268 * Converts the character argument to uppercase using case mapping 9269 * information from the UnicodeData file. 9270 * <p> 9271 * Note that 9272 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 9273 * does not always return {@code true} for some ranges of 9274 * characters, particularly those that are symbols or ideographs. 9275 * 9276 * <p>In general, {@link String#toUpperCase()} should be used to map 9277 * characters to uppercase. {@code String} case mapping methods 9278 * have several benefits over {@code Character} case mapping methods. 9279 * {@code String} case mapping methods can perform locale-sensitive 9280 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9281 * the {@code Character} case mapping methods cannot. 9282 * 9283 * <p><b>Note:</b> This method cannot handle <a 9284 * href="#supplementary"> supplementary characters</a>. To support 9285 * all Unicode characters, including supplementary characters, use 9286 * the {@link #toUpperCase(int)} method. 9287 * 9288 * @param ch the character to be converted. 9289 * @return the uppercase equivalent of the character, if any; 9290 * otherwise, the character itself. 9291 * @see Character#isUpperCase(char) 9292 * @see String#toUpperCase() 9293 */ 9294 public static char toUpperCase(char ch) { 9295 return (char)toUpperCase((int)ch); 9296 } 9297 9298 /** 9299 * Converts the character (Unicode code point) argument to 9300 * uppercase using case mapping information from the UnicodeData 9301 * file. 9302 * 9303 * <p>Note that 9304 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 9305 * does not always return {@code true} for some ranges of 9306 * characters, particularly those that are symbols or ideographs. 9307 * 9308 * <p>In general, {@link String#toUpperCase()} should be used to map 9309 * characters to uppercase. {@code String} case mapping methods 9310 * have several benefits over {@code Character} case mapping methods. 9311 * {@code String} case mapping methods can perform locale-sensitive 9312 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9313 * the {@code Character} case mapping methods cannot. 9314 * 9315 * @param codePoint the character (Unicode code point) to be converted. 9316 * @return the uppercase equivalent of the character, if any; 9317 * otherwise, the character itself. 9318 * @see Character#isUpperCase(int) 9319 * @see String#toUpperCase() 9320 * 9321 * @since 1.5 9322 */ 9323 public static int toUpperCase(int codePoint) { 9324 return CharacterData.of(codePoint).toUpperCase(codePoint); 9325 } 9326 9327 /** 9328 * Converts the character argument to titlecase using case mapping 9329 * information from the UnicodeData file. If a character has no 9330 * explicit titlecase mapping and is not itself a titlecase char 9331 * according to UnicodeData, then the uppercase mapping is 9332 * returned as an equivalent titlecase mapping. If the 9333 * {@code char} argument is already a titlecase 9334 * {@code char}, the same {@code char} value will be 9335 * returned. 9336 * <p> 9337 * Note that 9338 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 9339 * does not always return {@code true} for some ranges of 9340 * characters. 9341 * 9342 * <p><b>Note:</b> This method cannot handle <a 9343 * href="#supplementary"> supplementary characters</a>. To support 9344 * all Unicode characters, including supplementary characters, use 9345 * the {@link #toTitleCase(int)} method. 9346 * 9347 * @param ch the character to be converted. 9348 * @return the titlecase equivalent of the character, if any; 9349 * otherwise, the character itself. 9350 * @see Character#isTitleCase(char) 9351 * @see Character#toLowerCase(char) 9352 * @see Character#toUpperCase(char) 9353 * @since 1.0.2 9354 */ 9355 public static char toTitleCase(char ch) { 9356 return (char)toTitleCase((int)ch); 9357 } 9358 9359 /** 9360 * Converts the character (Unicode code point) argument to titlecase using case mapping 9361 * information from the UnicodeData file. If a character has no 9362 * explicit titlecase mapping and is not itself a titlecase char 9363 * according to UnicodeData, then the uppercase mapping is 9364 * returned as an equivalent titlecase mapping. If the 9365 * character argument is already a titlecase 9366 * character, the same character value will be 9367 * returned. 9368 * 9369 * <p>Note that 9370 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 9371 * does not always return {@code true} for some ranges of 9372 * characters. 9373 * 9374 * @param codePoint the character (Unicode code point) to be converted. 9375 * @return the titlecase equivalent of the character, if any; 9376 * otherwise, the character itself. 9377 * @see Character#isTitleCase(int) 9378 * @see Character#toLowerCase(int) 9379 * @see Character#toUpperCase(int) 9380 * @since 1.5 9381 */ 9382 public static int toTitleCase(int codePoint) { 9383 return CharacterData.of(codePoint).toTitleCase(codePoint); 9384 } 9385 9386 /** 9387 * Returns the numeric value of the character {@code ch} in the 9388 * specified radix. 9389 * <p> 9390 * If the radix is not in the range {@code MIN_RADIX} ≤ 9391 * {@code radix} ≤ {@code MAX_RADIX} or if the 9392 * value of {@code ch} is not a valid digit in the specified 9393 * radix, {@code -1} is returned. A character is a valid digit 9394 * if at least one of the following is true: 9395 * <ul> 9396 * <li>The method {@code isDigit} is {@code true} of the character 9397 * and the Unicode decimal digit value of the character (or its 9398 * single-character decomposition) is less than the specified radix. 9399 * In this case the decimal digit value is returned. 9400 * <li>The character is one of the uppercase Latin letters 9401 * {@code 'A'} through {@code 'Z'} and its code is less than 9402 * {@code radix + 'A' - 10}. 9403 * In this case, {@code ch - 'A' + 10} 9404 * is returned. 9405 * <li>The character is one of the lowercase Latin letters 9406 * {@code 'a'} through {@code 'z'} and its code is less than 9407 * {@code radix + 'a' - 10}. 9408 * In this case, {@code ch - 'a' + 10} 9409 * is returned. 9410 * <li>The character is one of the fullwidth uppercase Latin letters A 9411 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 9412 * and its code is less than 9413 * {@code radix + '\u005CuFF21' - 10}. 9414 * In this case, {@code ch - '\u005CuFF21' + 10} 9415 * is returned. 9416 * <li>The character is one of the fullwidth lowercase Latin letters a 9417 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 9418 * and its code is less than 9419 * {@code radix + '\u005CuFF41' - 10}. 9420 * In this case, {@code ch - '\u005CuFF41' + 10} 9421 * is returned. 9422 * </ul> 9423 * 9424 * <p><b>Note:</b> This method cannot handle <a 9425 * href="#supplementary"> supplementary characters</a>. To support 9426 * all Unicode characters, including supplementary characters, use 9427 * the {@link #digit(int, int)} method. 9428 * 9429 * @param ch the character to be converted. 9430 * @param radix the radix. 9431 * @return the numeric value represented by the character in the 9432 * specified radix. 9433 * @see Character#forDigit(int, int) 9434 * @see Character#isDigit(char) 9435 */ 9436 public static int digit(char ch, int radix) { 9437 return digit((int)ch, radix); 9438 } 9439 9440 /** 9441 * Returns the numeric value of the specified character (Unicode 9442 * code point) in the specified radix. 9443 * 9444 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 9445 * {@code radix} ≤ {@code MAX_RADIX} or if the 9446 * character is not a valid digit in the specified 9447 * radix, {@code -1} is returned. A character is a valid digit 9448 * if at least one of the following is true: 9449 * <ul> 9450 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 9451 * and the Unicode decimal digit value of the character (or its 9452 * single-character decomposition) is less than the specified radix. 9453 * In this case the decimal digit value is returned. 9454 * <li>The character is one of the uppercase Latin letters 9455 * {@code 'A'} through {@code 'Z'} and its code is less than 9456 * {@code radix + 'A' - 10}. 9457 * In this case, {@code codePoint - 'A' + 10} 9458 * is returned. 9459 * <li>The character is one of the lowercase Latin letters 9460 * {@code 'a'} through {@code 'z'} and its code is less than 9461 * {@code radix + 'a' - 10}. 9462 * In this case, {@code codePoint - 'a' + 10} 9463 * is returned. 9464 * <li>The character is one of the fullwidth uppercase Latin letters A 9465 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 9466 * and its code is less than 9467 * {@code radix + '\u005CuFF21' - 10}. 9468 * In this case, 9469 * {@code codePoint - '\u005CuFF21' + 10} 9470 * is returned. 9471 * <li>The character is one of the fullwidth lowercase Latin letters a 9472 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 9473 * and its code is less than 9474 * {@code radix + '\u005CuFF41'- 10}. 9475 * In this case, 9476 * {@code codePoint - '\u005CuFF41' + 10} 9477 * is returned. 9478 * </ul> 9479 * 9480 * @param codePoint the character (Unicode code point) to be converted. 9481 * @param radix the radix. 9482 * @return the numeric value represented by the character in the 9483 * specified radix. 9484 * @see Character#forDigit(int, int) 9485 * @see Character#isDigit(int) 9486 * @since 1.5 9487 */ 9488 public static int digit(int codePoint, int radix) { 9489 return CharacterData.of(codePoint).digit(codePoint, radix); 9490 } 9491 9492 /** 9493 * Returns the {@code int} value that the specified Unicode 9494 * character represents. For example, the character 9495 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 9496 * an int with a value of 50. 9497 * <p> 9498 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 9499 * {@code '\u005Cu005A'}), lowercase 9500 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 9501 * full width variant ({@code '\u005CuFF21'} through 9502 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 9503 * {@code '\u005CuFF5A'}) forms have numeric values from 10 9504 * through 35. This is independent of the Unicode specification, 9505 * which does not assign numeric values to these {@code char} 9506 * values. 9507 * <p> 9508 * If the character does not have a numeric value, then -1 is returned. 9509 * If the character has a numeric value that cannot be represented as a 9510 * nonnegative integer (for example, a fractional value), then -2 9511 * is returned. 9512 * 9513 * <p><b>Note:</b> This method cannot handle <a 9514 * href="#supplementary"> supplementary characters</a>. To support 9515 * all Unicode characters, including supplementary characters, use 9516 * the {@link #getNumericValue(int)} method. 9517 * 9518 * @param ch the character to be converted. 9519 * @return the numeric value of the character, as a nonnegative {@code int} 9520 * value; -2 if the character has a numeric value but the value 9521 * can not be represented as a nonnegative {@code int} value; 9522 * -1 if the character has no numeric value. 9523 * @see Character#forDigit(int, int) 9524 * @see Character#isDigit(char) 9525 * @since 1.1 9526 */ 9527 public static int getNumericValue(char ch) { 9528 return getNumericValue((int)ch); 9529 } 9530 9531 /** 9532 * Returns the {@code int} value that the specified 9533 * character (Unicode code point) represents. For example, the character 9534 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 9535 * an {@code int} with a value of 50. 9536 * <p> 9537 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 9538 * {@code '\u005Cu005A'}), lowercase 9539 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 9540 * full width variant ({@code '\u005CuFF21'} through 9541 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 9542 * {@code '\u005CuFF5A'}) forms have numeric values from 10 9543 * through 35. This is independent of the Unicode specification, 9544 * which does not assign numeric values to these {@code char} 9545 * values. 9546 * <p> 9547 * If the character does not have a numeric value, then -1 is returned. 9548 * If the character has a numeric value that cannot be represented as a 9549 * nonnegative integer (for example, a fractional value), then -2 9550 * is returned. 9551 * 9552 * @param codePoint the character (Unicode code point) to be converted. 9553 * @return the numeric value of the character, as a nonnegative {@code int} 9554 * value; -2 if the character has a numeric value but the value 9555 * can not be represented as a nonnegative {@code int} value; 9556 * -1 if the character has no numeric value. 9557 * @see Character#forDigit(int, int) 9558 * @see Character#isDigit(int) 9559 * @since 1.5 9560 */ 9561 public static int getNumericValue(int codePoint) { 9562 return CharacterData.of(codePoint).getNumericValue(codePoint); 9563 } 9564 9565 /** 9566 * Determines if the specified character is ISO-LATIN-1 white space. 9567 * This method returns {@code true} for the following five 9568 * characters only: 9569 * <table summary="truechars"> 9570 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 9571 * <td>{@code HORIZONTAL TABULATION}</td></tr> 9572 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 9573 * <td>{@code NEW LINE}</td></tr> 9574 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 9575 * <td>{@code FORM FEED}</td></tr> 9576 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 9577 * <td>{@code CARRIAGE RETURN}</td></tr> 9578 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 9579 * <td>{@code SPACE}</td></tr> 9580 * </table> 9581 * 9582 * @param ch the character to be tested. 9583 * @return {@code true} if the character is ISO-LATIN-1 white 9584 * space; {@code false} otherwise. 9585 * @see Character#isSpaceChar(char) 9586 * @see Character#isWhitespace(char) 9587 * @deprecated Replaced by isWhitespace(char). 9588 */ 9589 @Deprecated(since="1.1") 9590 public static boolean isSpace(char ch) { 9591 return (ch <= 0x0020) && 9592 (((((1L << 0x0009) | 9593 (1L << 0x000A) | 9594 (1L << 0x000C) | 9595 (1L << 0x000D) | 9596 (1L << 0x0020)) >> ch) & 1L) != 0); 9597 } 9598 9599 9600 /** 9601 * Determines if the specified character is a Unicode space character. 9602 * A character is considered to be a space character if and only if 9603 * it is specified to be a space character by the Unicode Standard. This 9604 * method returns true if the character's general category type is any of 9605 * the following: 9606 * <ul> 9607 * <li> {@code SPACE_SEPARATOR} 9608 * <li> {@code LINE_SEPARATOR} 9609 * <li> {@code PARAGRAPH_SEPARATOR} 9610 * </ul> 9611 * 9612 * <p><b>Note:</b> This method cannot handle <a 9613 * href="#supplementary"> supplementary characters</a>. To support 9614 * all Unicode characters, including supplementary characters, use 9615 * the {@link #isSpaceChar(int)} method. 9616 * 9617 * @param ch the character to be tested. 9618 * @return {@code true} if the character is a space character; 9619 * {@code false} otherwise. 9620 * @see Character#isWhitespace(char) 9621 * @since 1.1 9622 */ 9623 public static boolean isSpaceChar(char ch) { 9624 return isSpaceChar((int)ch); 9625 } 9626 9627 /** 9628 * Determines if the specified character (Unicode code point) is a 9629 * Unicode space character. A character is considered to be a 9630 * space character if and only if it is specified to be a space 9631 * character by the Unicode Standard. This method returns true if 9632 * the character's general category type is any of the following: 9633 * 9634 * <ul> 9635 * <li> {@link #SPACE_SEPARATOR} 9636 * <li> {@link #LINE_SEPARATOR} 9637 * <li> {@link #PARAGRAPH_SEPARATOR} 9638 * </ul> 9639 * 9640 * @param codePoint the character (Unicode code point) to be tested. 9641 * @return {@code true} if the character is a space character; 9642 * {@code false} otherwise. 9643 * @see Character#isWhitespace(int) 9644 * @since 1.5 9645 */ 9646 public static boolean isSpaceChar(int codePoint) { 9647 return ((((1 << Character.SPACE_SEPARATOR) | 9648 (1 << Character.LINE_SEPARATOR) | 9649 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 9650 != 0; 9651 } 9652 9653 /** 9654 * Determines if the specified character is white space according to Java. 9655 * A character is a Java whitespace character if and only if it satisfies 9656 * one of the following criteria: 9657 * <ul> 9658 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 9659 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 9660 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 9661 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 9662 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 9663 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 9664 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 9665 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 9666 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 9667 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 9668 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 9669 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 9670 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 9671 * </ul> 9672 * 9673 * <p><b>Note:</b> This method cannot handle <a 9674 * href="#supplementary"> supplementary characters</a>. To support 9675 * all Unicode characters, including supplementary characters, use 9676 * the {@link #isWhitespace(int)} method. 9677 * 9678 * @param ch the character to be tested. 9679 * @return {@code true} if the character is a Java whitespace 9680 * character; {@code false} otherwise. 9681 * @see Character#isSpaceChar(char) 9682 * @since 1.1 9683 */ 9684 public static boolean isWhitespace(char ch) { 9685 return isWhitespace((int)ch); 9686 } 9687 9688 /** 9689 * Determines if the specified character (Unicode code point) is 9690 * white space according to Java. A character is a Java 9691 * whitespace character if and only if it satisfies one of the 9692 * following criteria: 9693 * <ul> 9694 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 9695 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 9696 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 9697 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 9698 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 9699 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 9700 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 9701 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 9702 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 9703 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 9704 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 9705 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 9706 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 9707 * </ul> 9708 * 9709 * @param codePoint the character (Unicode code point) to be tested. 9710 * @return {@code true} if the character is a Java whitespace 9711 * character; {@code false} otherwise. 9712 * @see Character#isSpaceChar(int) 9713 * @since 1.5 9714 */ 9715 public static boolean isWhitespace(int codePoint) { 9716 return CharacterData.of(codePoint).isWhitespace(codePoint); 9717 } 9718 9719 /** 9720 * Determines if the specified character is an ISO control 9721 * character. A character is considered to be an ISO control 9722 * character if its code is in the range {@code '\u005Cu0000'} 9723 * through {@code '\u005Cu001F'} or in the range 9724 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 9725 * 9726 * <p><b>Note:</b> This method cannot handle <a 9727 * href="#supplementary"> supplementary characters</a>. To support 9728 * all Unicode characters, including supplementary characters, use 9729 * the {@link #isISOControl(int)} method. 9730 * 9731 * @param ch the character to be tested. 9732 * @return {@code true} if the character is an ISO control character; 9733 * {@code false} otherwise. 9734 * 9735 * @see Character#isSpaceChar(char) 9736 * @see Character#isWhitespace(char) 9737 * @since 1.1 9738 */ 9739 public static boolean isISOControl(char ch) { 9740 return isISOControl((int)ch); 9741 } 9742 9743 /** 9744 * Determines if the referenced character (Unicode code point) is an ISO control 9745 * character. A character is considered to be an ISO control 9746 * character if its code is in the range {@code '\u005Cu0000'} 9747 * through {@code '\u005Cu001F'} or in the range 9748 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 9749 * 9750 * @param codePoint the character (Unicode code point) to be tested. 9751 * @return {@code true} if the character is an ISO control character; 9752 * {@code false} otherwise. 9753 * @see Character#isSpaceChar(int) 9754 * @see Character#isWhitespace(int) 9755 * @since 1.5 9756 */ 9757 public static boolean isISOControl(int codePoint) { 9758 // Optimized form of: 9759 // (codePoint >= 0x00 && codePoint <= 0x1F) || 9760 // (codePoint >= 0x7F && codePoint <= 0x9F); 9761 return codePoint <= 0x9F && 9762 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 9763 } 9764 9765 /** 9766 * Returns a value indicating a character's general category. 9767 * 9768 * <p><b>Note:</b> This method cannot handle <a 9769 * href="#supplementary"> supplementary characters</a>. To support 9770 * all Unicode characters, including supplementary characters, use 9771 * the {@link #getType(int)} method. 9772 * 9773 * @param ch the character to be tested. 9774 * @return a value of type {@code int} representing the 9775 * character's general category. 9776 * @see Character#COMBINING_SPACING_MARK 9777 * @see Character#CONNECTOR_PUNCTUATION 9778 * @see Character#CONTROL 9779 * @see Character#CURRENCY_SYMBOL 9780 * @see Character#DASH_PUNCTUATION 9781 * @see Character#DECIMAL_DIGIT_NUMBER 9782 * @see Character#ENCLOSING_MARK 9783 * @see Character#END_PUNCTUATION 9784 * @see Character#FINAL_QUOTE_PUNCTUATION 9785 * @see Character#FORMAT 9786 * @see Character#INITIAL_QUOTE_PUNCTUATION 9787 * @see Character#LETTER_NUMBER 9788 * @see Character#LINE_SEPARATOR 9789 * @see Character#LOWERCASE_LETTER 9790 * @see Character#MATH_SYMBOL 9791 * @see Character#MODIFIER_LETTER 9792 * @see Character#MODIFIER_SYMBOL 9793 * @see Character#NON_SPACING_MARK 9794 * @see Character#OTHER_LETTER 9795 * @see Character#OTHER_NUMBER 9796 * @see Character#OTHER_PUNCTUATION 9797 * @see Character#OTHER_SYMBOL 9798 * @see Character#PARAGRAPH_SEPARATOR 9799 * @see Character#PRIVATE_USE 9800 * @see Character#SPACE_SEPARATOR 9801 * @see Character#START_PUNCTUATION 9802 * @see Character#SURROGATE 9803 * @see Character#TITLECASE_LETTER 9804 * @see Character#UNASSIGNED 9805 * @see Character#UPPERCASE_LETTER 9806 * @since 1.1 9807 */ 9808 public static int getType(char ch) { 9809 return getType((int)ch); 9810 } 9811 9812 /** 9813 * Returns a value indicating a character's general category. 9814 * 9815 * @param codePoint the character (Unicode code point) to be tested. 9816 * @return a value of type {@code int} representing the 9817 * character's general category. 9818 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 9819 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 9820 * @see Character#CONTROL CONTROL 9821 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 9822 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 9823 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 9824 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 9825 * @see Character#END_PUNCTUATION END_PUNCTUATION 9826 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 9827 * @see Character#FORMAT FORMAT 9828 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 9829 * @see Character#LETTER_NUMBER LETTER_NUMBER 9830 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 9831 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 9832 * @see Character#MATH_SYMBOL MATH_SYMBOL 9833 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 9834 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 9835 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 9836 * @see Character#OTHER_LETTER OTHER_LETTER 9837 * @see Character#OTHER_NUMBER OTHER_NUMBER 9838 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 9839 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 9840 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 9841 * @see Character#PRIVATE_USE PRIVATE_USE 9842 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 9843 * @see Character#START_PUNCTUATION START_PUNCTUATION 9844 * @see Character#SURROGATE SURROGATE 9845 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 9846 * @see Character#UNASSIGNED UNASSIGNED 9847 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 9848 * @since 1.5 9849 */ 9850 public static int getType(int codePoint) { 9851 return CharacterData.of(codePoint).getType(codePoint); 9852 } 9853 9854 /** 9855 * Determines the character representation for a specific digit in 9856 * the specified radix. If the value of {@code radix} is not a 9857 * valid radix, or the value of {@code digit} is not a valid 9858 * digit in the specified radix, the null character 9859 * ({@code '\u005Cu0000'}) is returned. 9860 * <p> 9861 * The {@code radix} argument is valid if it is greater than or 9862 * equal to {@code MIN_RADIX} and less than or equal to 9863 * {@code MAX_RADIX}. The {@code digit} argument is valid if 9864 * {@code 0 <= digit < radix}. 9865 * <p> 9866 * If the digit is less than 10, then 9867 * {@code '0' + digit} is returned. Otherwise, the value 9868 * {@code 'a' + digit - 10} is returned. 9869 * 9870 * @param digit the number to convert to a character. 9871 * @param radix the radix. 9872 * @return the {@code char} representation of the specified digit 9873 * in the specified radix. 9874 * @see Character#MIN_RADIX 9875 * @see Character#MAX_RADIX 9876 * @see Character#digit(char, int) 9877 */ 9878 public static char forDigit(int digit, int radix) { 9879 if ((digit >= radix) || (digit < 0)) { 9880 return '\0'; 9881 } 9882 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 9883 return '\0'; 9884 } 9885 if (digit < 10) { 9886 return (char)('0' + digit); 9887 } 9888 return (char)('a' - 10 + digit); 9889 } 9890 9891 /** 9892 * Returns the Unicode directionality property for the given 9893 * character. Character directionality is used to calculate the 9894 * visual ordering of text. The directionality value of undefined 9895 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 9896 * 9897 * <p><b>Note:</b> This method cannot handle <a 9898 * href="#supplementary"> supplementary characters</a>. To support 9899 * all Unicode characters, including supplementary characters, use 9900 * the {@link #getDirectionality(int)} method. 9901 * 9902 * @param ch {@code char} for which the directionality property 9903 * is requested. 9904 * @return the directionality property of the {@code char} value. 9905 * 9906 * @see Character#DIRECTIONALITY_UNDEFINED 9907 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 9908 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 9909 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 9910 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 9911 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 9912 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 9913 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 9914 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 9915 * @see Character#DIRECTIONALITY_NONSPACING_MARK 9916 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 9917 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 9918 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 9919 * @see Character#DIRECTIONALITY_WHITESPACE 9920 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 9921 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 9922 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 9923 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 9924 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 9925 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 9926 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 9927 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 9928 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 9929 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 9930 * @since 1.4 9931 */ 9932 public static byte getDirectionality(char ch) { 9933 return getDirectionality((int)ch); 9934 } 9935 9936 /** 9937 * Returns the Unicode directionality property for the given 9938 * character (Unicode code point). Character directionality is 9939 * used to calculate the visual ordering of text. The 9940 * directionality value of undefined character is {@link 9941 * #DIRECTIONALITY_UNDEFINED}. 9942 * 9943 * @param codePoint the character (Unicode code point) for which 9944 * the directionality property is requested. 9945 * @return the directionality property of the character. 9946 * 9947 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 9948 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 9949 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 9950 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 9951 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 9952 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 9953 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 9954 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 9955 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 9956 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 9957 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 9958 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 9959 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 9960 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 9961 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 9962 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 9963 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 9964 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 9965 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 9966 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 9967 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 9968 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 9969 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 9970 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 9971 * @since 1.5 9972 */ 9973 public static byte getDirectionality(int codePoint) { 9974 return CharacterData.of(codePoint).getDirectionality(codePoint); 9975 } 9976 9977 /** 9978 * Determines whether the character is mirrored according to the 9979 * Unicode specification. Mirrored characters should have their 9980 * glyphs horizontally mirrored when displayed in text that is 9981 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 9982 * PARENTHESIS is semantically defined to be an <i>opening 9983 * parenthesis</i>. This will appear as a "(" in text that is 9984 * left-to-right but as a ")" in text that is right-to-left. 9985 * 9986 * <p><b>Note:</b> This method cannot handle <a 9987 * href="#supplementary"> supplementary characters</a>. To support 9988 * all Unicode characters, including supplementary characters, use 9989 * the {@link #isMirrored(int)} method. 9990 * 9991 * @param ch {@code char} for which the mirrored property is requested 9992 * @return {@code true} if the char is mirrored, {@code false} 9993 * if the {@code char} is not mirrored or is not defined. 9994 * @since 1.4 9995 */ 9996 public static boolean isMirrored(char ch) { 9997 return isMirrored((int)ch); 9998 } 9999 10000 /** 10001 * Determines whether the specified character (Unicode code point) 10002 * is mirrored according to the Unicode specification. Mirrored 10003 * characters should have their glyphs horizontally mirrored when 10004 * displayed in text that is right-to-left. For example, 10005 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 10006 * defined to be an <i>opening parenthesis</i>. This will appear 10007 * as a "(" in text that is left-to-right but as a ")" in text 10008 * that is right-to-left. 10009 * 10010 * @param codePoint the character (Unicode code point) to be tested. 10011 * @return {@code true} if the character is mirrored, {@code false} 10012 * if the character is not mirrored or is not defined. 10013 * @since 1.5 10014 */ 10015 public static boolean isMirrored(int codePoint) { 10016 return CharacterData.of(codePoint).isMirrored(codePoint); 10017 } 10018 10019 /** 10020 * Compares two {@code Character} objects numerically. 10021 * 10022 * @param anotherCharacter the {@code Character} to be compared. 10023 10024 * @return the value {@code 0} if the argument {@code Character} 10025 * is equal to this {@code Character}; a value less than 10026 * {@code 0} if this {@code Character} is numerically less 10027 * than the {@code Character} argument; and a value greater than 10028 * {@code 0} if this {@code Character} is numerically greater 10029 * than the {@code Character} argument (unsigned comparison). 10030 * Note that this is strictly a numerical comparison; it is not 10031 * locale-dependent. 10032 * @since 1.2 10033 */ 10034 public int compareTo(Character anotherCharacter) { 10035 return compare(this.value, anotherCharacter.value); 10036 } 10037 10038 /** 10039 * Compares two {@code char} values numerically. 10040 * The value returned is identical to what would be returned by: 10041 * <pre> 10042 * Character.valueOf(x).compareTo(Character.valueOf(y)) 10043 * </pre> 10044 * 10045 * @param x the first {@code char} to compare 10046 * @param y the second {@code char} to compare 10047 * @return the value {@code 0} if {@code x == y}; 10048 * a value less than {@code 0} if {@code x < y}; and 10049 * a value greater than {@code 0} if {@code x > y} 10050 * @since 1.7 10051 */ 10052 public static int compare(char x, char y) { 10053 return x - y; 10054 } 10055 10056 /** 10057 * Converts the character (Unicode code point) argument to uppercase using 10058 * information from the UnicodeData file. 10059 * 10060 * @param codePoint the character (Unicode code point) to be converted. 10061 * @return either the uppercase equivalent of the character, if 10062 * any, or an error flag ({@code Character.ERROR}) 10063 * that indicates that a 1:M {@code char} mapping exists. 10064 * @see Character#isLowerCase(char) 10065 * @see Character#isUpperCase(char) 10066 * @see Character#toLowerCase(char) 10067 * @see Character#toTitleCase(char) 10068 * @since 1.4 10069 */ 10070 static int toUpperCaseEx(int codePoint) { 10071 assert isValidCodePoint(codePoint); 10072 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 10073 } 10074 10075 /** 10076 * Converts the character (Unicode code point) argument to uppercase using case 10077 * mapping information from the SpecialCasing file in the Unicode 10078 * specification. If a character has no explicit uppercase 10079 * mapping, then the {@code char} itself is returned in the 10080 * {@code char[]}. 10081 * 10082 * @param codePoint the character (Unicode code point) to be converted. 10083 * @return a {@code char[]} with the uppercased character. 10084 * @since 1.4 10085 */ 10086 static char[] toUpperCaseCharArray(int codePoint) { 10087 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 10088 assert isBmpCodePoint(codePoint); 10089 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 10090 } 10091 10092 /** 10093 * The number of bits used to represent a {@code char} value in unsigned 10094 * binary form, constant {@code 16}. 10095 * 10096 * @since 1.5 10097 */ 10098 public static final int SIZE = 16; 10099 10100 /** 10101 * The number of bytes used to represent a {@code char} value in unsigned 10102 * binary form. 10103 * 10104 * @since 1.8 10105 */ 10106 public static final int BYTES = SIZE / Byte.SIZE; 10107 10108 /** 10109 * Returns the value obtained by reversing the order of the bytes in the 10110 * specified {@code char} value. 10111 * 10112 * @param ch The {@code char} of which to reverse the byte order. 10113 * @return the value obtained by reversing (or, equivalently, swapping) 10114 * the bytes in the specified {@code char} value. 10115 * @since 1.5 10116 */ 10117 @HotSpotIntrinsicCandidate 10118 public static char reverseBytes(char ch) { 10119 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 10120 } 10121 10122 /** 10123 * Returns the Unicode name of the specified character 10124 * {@code codePoint}, or null if the code point is 10125 * {@link #UNASSIGNED unassigned}. 10126 * <p> 10127 * Note: if the specified character is not assigned a name by 10128 * the <i>UnicodeData</i> file (part of the Unicode Character 10129 * Database maintained by the Unicode Consortium), the returned 10130 * name is the same as the result of expression. 10131 * 10132 * <blockquote>{@code 10133 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 10134 * + " " 10135 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10136 * 10137 * }</blockquote> 10138 * 10139 * @param codePoint the character (Unicode code point) 10140 * 10141 * @return the Unicode name of the specified character, or null if 10142 * the code point is unassigned. 10143 * 10144 * @exception IllegalArgumentException if the specified 10145 * {@code codePoint} is not a valid Unicode 10146 * code point. 10147 * 10148 * @since 1.7 10149 */ 10150 public static String getName(int codePoint) { 10151 if (!isValidCodePoint(codePoint)) { 10152 throw new IllegalArgumentException(); 10153 } 10154 String name = CharacterName.getInstance().getName(codePoint); 10155 if (name != null) 10156 return name; 10157 if (getType(codePoint) == UNASSIGNED) 10158 return null; 10159 UnicodeBlock block = UnicodeBlock.of(codePoint); 10160 if (block != null) 10161 return block.toString().replace('_', ' ') + " " 10162 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10163 // should never come here 10164 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10165 } 10166 10167 /** 10168 * Returns the code point value of the Unicode character specified by 10169 * the given Unicode character name. 10170 * <p> 10171 * Note: if a character is not assigned a name by the <i>UnicodeData</i> 10172 * file (part of the Unicode Character Database maintained by the Unicode 10173 * Consortium), its name is defined as the result of expression 10174 * 10175 * <blockquote>{@code 10176 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 10177 * + " " 10178 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 10179 * 10180 * }</blockquote> 10181 * <p> 10182 * The {@code name} matching is case insensitive, with any leading and 10183 * trailing whitespace character removed. 10184 * 10185 * @param name the Unicode character name 10186 * 10187 * @return the code point value of the character specified by its name. 10188 * 10189 * @throws IllegalArgumentException if the specified {@code name} 10190 * is not a valid Unicode character name. 10191 * @throws NullPointerException if {@code name} is {@code null} 10192 * 10193 * @since 9 10194 */ 10195 public static int codePointOf(String name) { 10196 name = name.trim().toUpperCase(Locale.ROOT); 10197 int cp = CharacterName.getInstance().getCodePoint(name); 10198 if (cp != -1) 10199 return cp; 10200 try { 10201 int off = name.lastIndexOf(' '); 10202 if (off != -1) { 10203 cp = Integer.parseInt(name, off + 1, name.length(), 16); 10204 if (isValidCodePoint(cp) && name.equals(getName(cp))) 10205 return cp; 10206 } 10207 } catch (Exception x) {} 10208 throw new IllegalArgumentException("Unrecognized character name :" + name); 10209 } 10210 }