1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 import jdk.internal.HotSpotIntrinsicCandidate; 34 35 /** 36 * The {@code Character} class wraps a value of the primitive 37 * type {@code char} in an object. An object of type 38 * {@code Character} contains a single field whose type is 39 * {@code char}. 40 * <p> 41 * In addition, this class provides several methods for determining 42 * a character's category (lowercase letter, digit, etc.) and for converting 43 * characters from uppercase to lowercase and vice versa. 44 * <p> 45 * Character information is based on the Unicode Standard, version 7.0.0. 46 * <p> 47 * The methods and data of class {@code Character} are defined by 48 * the information in the <i>UnicodeData</i> file that is part of the 49 * Unicode Character Database maintained by the Unicode 50 * Consortium. This file specifies various properties including name 51 * and general category for every defined Unicode code point or 52 * character range. 53 * <p> 54 * The file and its description are available from the Unicode Consortium at: 55 * <ul> 56 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 57 * </ul> 58 * 59 * <h3><a name="unicode">Unicode Character Representations</a></h3> 60 * 61 * <p>The {@code char} data type (and therefore the value that a 62 * {@code Character} object encapsulates) are based on the 63 * original Unicode specification, which defined characters as 64 * fixed-width 16-bit entities. The Unicode Standard has since been 65 * changed to allow for characters whose representation requires more 66 * than 16 bits. The range of legal <em>code point</em>s is now 67 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 68 * (Refer to the <a 69 * href="http://www.unicode.org/reports/tr27/#notation"><i> 70 * definition</i></a> of the U+<i>n</i> notation in the Unicode 71 * Standard.) 72 * 73 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is 74 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 75 * <a name="supplementary">Characters</a> whose code points are greater 76 * than U+FFFF are called <em>supplementary character</em>s. The Java 77 * platform uses the UTF-16 representation in {@code char} arrays and 78 * in the {@code String} and {@code StringBuffer} classes. In 79 * this representation, supplementary characters are represented as a pair 80 * of {@code char} values, the first from the <em>high-surrogates</em> 81 * range, (\uD800-\uDBFF), the second from the 82 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 83 * 84 * <p>A {@code char} value, therefore, represents Basic 85 * Multilingual Plane (BMP) code points, including the surrogate 86 * code points, or code units of the UTF-16 encoding. An 87 * {@code int} value represents all Unicode code points, 88 * including supplementary code points. The lower (least significant) 89 * 21 bits of {@code int} are used to represent Unicode code 90 * points and the upper (most significant) 11 bits must be zero. 91 * Unless otherwise specified, the behavior with respect to 92 * supplementary characters and surrogate {@code char} values is 93 * as follows: 94 * 95 * <ul> 96 * <li>The methods that only accept a {@code char} value cannot support 97 * supplementary characters. They treat {@code char} values from the 98 * surrogate ranges as undefined characters. For example, 99 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 100 * this specific value if followed by any low-surrogate value in a string 101 * would represent a letter. 102 * 103 * <li>The methods that accept an {@code int} value support all 104 * Unicode characters, including supplementary characters. For 105 * example, {@code Character.isLetter(0x2F81A)} returns 106 * {@code true} because the code point value represents a letter 107 * (a CJK ideograph). 108 * </ul> 109 * 110 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 111 * used for character values in the range between U+0000 and U+10FFFF, 112 * and <em>Unicode code unit</em> is used for 16-bit 113 * {@code char} values that are code units of the <em>UTF-16</em> 114 * encoding. For more information on Unicode terminology, refer to the 115 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 116 * 117 * @author Lee Boynton 118 * @author Guy Steele 119 * @author Akira Tanaka 120 * @author Martin Buchholz 121 * @author Ulf Zibis 122 * @since 1.0 123 */ 124 public final 125 class Character implements java.io.Serializable, Comparable<Character> { 126 /** 127 * The minimum radix available for conversion to and from strings. 128 * The constant value of this field is the smallest value permitted 129 * for the radix argument in radix-conversion methods such as the 130 * {@code digit} method, the {@code forDigit} method, and the 131 * {@code toString} method of class {@code Integer}. 132 * 133 * @see Character#digit(char, int) 134 * @see Character#forDigit(int, int) 135 * @see Integer#toString(int, int) 136 * @see Integer#valueOf(String) 137 */ 138 public static final int MIN_RADIX = 2; 139 140 /** 141 * The maximum radix available for conversion to and from strings. 142 * The constant value of this field is the largest value permitted 143 * for the radix argument in radix-conversion methods such as the 144 * {@code digit} method, the {@code forDigit} method, and the 145 * {@code toString} method of class {@code Integer}. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * {@code char}, {@code '\u005Cu0000'}. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * {@code char}, {@code '\u005CuFFFF'}. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The {@code Class} instance representing the primitive type 172 * {@code char}. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined {@code char} 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * Weak bidirectional character type "LRI" in the Unicode specification. 496 * @since 1.9 497 */ 498 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 499 500 /** 501 * Weak bidirectional character type "RLI" in the Unicode specification. 502 * @since 1.9 503 */ 504 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 505 506 /** 507 * Weak bidirectional character type "FSI" in the Unicode specification. 508 * @since 1.9 509 */ 510 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 511 512 /** 513 * Weak bidirectional character type "PDI" in the Unicode specification. 514 * @since 1.9 515 */ 516 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 517 518 /** 519 * The minimum value of a 520 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 521 * Unicode high-surrogate code unit</a> 522 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 523 * A high-surrogate is also known as a <i>leading-surrogate</i>. 524 * 525 * @since 1.5 526 */ 527 public static final char MIN_HIGH_SURROGATE = '\uD800'; 528 529 /** 530 * The maximum value of a 531 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 532 * Unicode high-surrogate code unit</a> 533 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 534 * A high-surrogate is also known as a <i>leading-surrogate</i>. 535 * 536 * @since 1.5 537 */ 538 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 539 540 /** 541 * The minimum value of a 542 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 543 * Unicode low-surrogate code unit</a> 544 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 545 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 546 * 547 * @since 1.5 548 */ 549 public static final char MIN_LOW_SURROGATE = '\uDC00'; 550 551 /** 552 * The maximum value of a 553 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 554 * Unicode low-surrogate code unit</a> 555 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 556 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 557 * 558 * @since 1.5 559 */ 560 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 561 562 /** 563 * The minimum value of a Unicode surrogate code unit in the 564 * UTF-16 encoding, constant {@code '\u005CuD800'}. 565 * 566 * @since 1.5 567 */ 568 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 569 570 /** 571 * The maximum value of a Unicode surrogate code unit in the 572 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 573 * 574 * @since 1.5 575 */ 576 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 577 578 /** 579 * The minimum value of a 580 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 581 * Unicode supplementary code point</a>, constant {@code U+10000}. 582 * 583 * @since 1.5 584 */ 585 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 586 587 /** 588 * The minimum value of a 589 * <a href="http://www.unicode.org/glossary/#code_point"> 590 * Unicode code point</a>, constant {@code U+0000}. 591 * 592 * @since 1.5 593 */ 594 public static final int MIN_CODE_POINT = 0x000000; 595 596 /** 597 * The maximum value of a 598 * <a href="http://www.unicode.org/glossary/#code_point"> 599 * Unicode code point</a>, constant {@code U+10FFFF}. 600 * 601 * @since 1.5 602 */ 603 public static final int MAX_CODE_POINT = 0X10FFFF; 604 605 606 /** 607 * Instances of this class represent particular subsets of the Unicode 608 * character set. The only family of subsets defined in the 609 * {@code Character} class is {@link Character.UnicodeBlock}. 610 * Other portions of the Java API may define other subsets for their 611 * own purposes. 612 * 613 * @since 1.2 614 */ 615 public static class Subset { 616 617 private String name; 618 619 /** 620 * Constructs a new {@code Subset} instance. 621 * 622 * @param name The name of this subset 623 * @exception NullPointerException if name is {@code null} 624 */ 625 protected Subset(String name) { 626 if (name == null) { 627 throw new NullPointerException("name"); 628 } 629 this.name = name; 630 } 631 632 /** 633 * Compares two {@code Subset} objects for equality. 634 * This method returns {@code true} if and only if 635 * {@code this} and the argument refer to the same 636 * object; since this method is {@code final}, this 637 * guarantee holds for all subclasses. 638 */ 639 public final boolean equals(Object obj) { 640 return (this == obj); 641 } 642 643 /** 644 * Returns the standard hash code as defined by the 645 * {@link Object#hashCode} method. This method 646 * is {@code final} in order to ensure that the 647 * {@code equals} and {@code hashCode} methods will 648 * be consistent in all subclasses. 649 */ 650 public final int hashCode() { 651 return super.hashCode(); 652 } 653 654 /** 655 * Returns the name of this subset. 656 */ 657 public final String toString() { 658 return name; 659 } 660 } 661 662 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 663 // for the latest specification of Unicode Blocks. 664 665 /** 666 * A family of character subsets representing the character blocks in the 667 * Unicode specification. Character blocks generally define characters 668 * used for a specific script or purpose. A character is contained by 669 * at most one Unicode block. 670 * 671 * @since 1.2 672 */ 673 public static final class UnicodeBlock extends Subset { 674 /** 675 * 510 - the expected number of entities 676 * 0.75 - the default load factor of HashMap 677 */ 678 private static Map<String, UnicodeBlock> map = 679 new HashMap<>((int)(510 / 0.75f + 1.0f)); 680 681 /** 682 * Creates a UnicodeBlock with the given identifier name. 683 * This name must be the same as the block identifier. 684 */ 685 private UnicodeBlock(String idName) { 686 super(idName); 687 map.put(idName, this); 688 } 689 690 /** 691 * Creates a UnicodeBlock with the given identifier name and 692 * alias name. 693 */ 694 private UnicodeBlock(String idName, String alias) { 695 this(idName); 696 map.put(alias, this); 697 } 698 699 /** 700 * Creates a UnicodeBlock with the given identifier name and 701 * alias names. 702 */ 703 private UnicodeBlock(String idName, String... aliases) { 704 this(idName); 705 for (String alias : aliases) 706 map.put(alias, this); 707 } 708 709 /** 710 * Constant for the "Basic Latin" Unicode character block. 711 * @since 1.2 712 */ 713 public static final UnicodeBlock BASIC_LATIN = 714 new UnicodeBlock("BASIC_LATIN", 715 "BASIC LATIN", 716 "BASICLATIN"); 717 718 /** 719 * Constant for the "Latin-1 Supplement" Unicode character block. 720 * @since 1.2 721 */ 722 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 723 new UnicodeBlock("LATIN_1_SUPPLEMENT", 724 "LATIN-1 SUPPLEMENT", 725 "LATIN-1SUPPLEMENT"); 726 727 /** 728 * Constant for the "Latin Extended-A" Unicode character block. 729 * @since 1.2 730 */ 731 public static final UnicodeBlock LATIN_EXTENDED_A = 732 new UnicodeBlock("LATIN_EXTENDED_A", 733 "LATIN EXTENDED-A", 734 "LATINEXTENDED-A"); 735 736 /** 737 * Constant for the "Latin Extended-B" Unicode character block. 738 * @since 1.2 739 */ 740 public static final UnicodeBlock LATIN_EXTENDED_B = 741 new UnicodeBlock("LATIN_EXTENDED_B", 742 "LATIN EXTENDED-B", 743 "LATINEXTENDED-B"); 744 745 /** 746 * Constant for the "IPA Extensions" Unicode character block. 747 * @since 1.2 748 */ 749 public static final UnicodeBlock IPA_EXTENSIONS = 750 new UnicodeBlock("IPA_EXTENSIONS", 751 "IPA EXTENSIONS", 752 "IPAEXTENSIONS"); 753 754 /** 755 * Constant for the "Spacing Modifier Letters" Unicode character block. 756 * @since 1.2 757 */ 758 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 759 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 760 "SPACING MODIFIER LETTERS", 761 "SPACINGMODIFIERLETTERS"); 762 763 /** 764 * Constant for the "Combining Diacritical Marks" Unicode character block. 765 * @since 1.2 766 */ 767 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 768 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 769 "COMBINING DIACRITICAL MARKS", 770 "COMBININGDIACRITICALMARKS"); 771 772 /** 773 * Constant for the "Greek and Coptic" Unicode character block. 774 * <p> 775 * This block was previously known as the "Greek" block. 776 * 777 * @since 1.2 778 */ 779 public static final UnicodeBlock GREEK = 780 new UnicodeBlock("GREEK", 781 "GREEK AND COPTIC", 782 "GREEKANDCOPTIC"); 783 784 /** 785 * Constant for the "Cyrillic" Unicode character block. 786 * @since 1.2 787 */ 788 public static final UnicodeBlock CYRILLIC = 789 new UnicodeBlock("CYRILLIC"); 790 791 /** 792 * Constant for the "Armenian" Unicode character block. 793 * @since 1.2 794 */ 795 public static final UnicodeBlock ARMENIAN = 796 new UnicodeBlock("ARMENIAN"); 797 798 /** 799 * Constant for the "Hebrew" Unicode character block. 800 * @since 1.2 801 */ 802 public static final UnicodeBlock HEBREW = 803 new UnicodeBlock("HEBREW"); 804 805 /** 806 * Constant for the "Arabic" Unicode character block. 807 * @since 1.2 808 */ 809 public static final UnicodeBlock ARABIC = 810 new UnicodeBlock("ARABIC"); 811 812 /** 813 * Constant for the "Devanagari" Unicode character block. 814 * @since 1.2 815 */ 816 public static final UnicodeBlock DEVANAGARI = 817 new UnicodeBlock("DEVANAGARI"); 818 819 /** 820 * Constant for the "Bengali" Unicode character block. 821 * @since 1.2 822 */ 823 public static final UnicodeBlock BENGALI = 824 new UnicodeBlock("BENGALI"); 825 826 /** 827 * Constant for the "Gurmukhi" Unicode character block. 828 * @since 1.2 829 */ 830 public static final UnicodeBlock GURMUKHI = 831 new UnicodeBlock("GURMUKHI"); 832 833 /** 834 * Constant for the "Gujarati" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock GUJARATI = 838 new UnicodeBlock("GUJARATI"); 839 840 /** 841 * Constant for the "Oriya" Unicode character block. 842 * @since 1.2 843 */ 844 public static final UnicodeBlock ORIYA = 845 new UnicodeBlock("ORIYA"); 846 847 /** 848 * Constant for the "Tamil" Unicode character block. 849 * @since 1.2 850 */ 851 public static final UnicodeBlock TAMIL = 852 new UnicodeBlock("TAMIL"); 853 854 /** 855 * Constant for the "Telugu" Unicode character block. 856 * @since 1.2 857 */ 858 public static final UnicodeBlock TELUGU = 859 new UnicodeBlock("TELUGU"); 860 861 /** 862 * Constant for the "Kannada" Unicode character block. 863 * @since 1.2 864 */ 865 public static final UnicodeBlock KANNADA = 866 new UnicodeBlock("KANNADA"); 867 868 /** 869 * Constant for the "Malayalam" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock MALAYALAM = 873 new UnicodeBlock("MALAYALAM"); 874 875 /** 876 * Constant for the "Thai" Unicode character block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock THAI = 880 new UnicodeBlock("THAI"); 881 882 /** 883 * Constant for the "Lao" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock LAO = 887 new UnicodeBlock("LAO"); 888 889 /** 890 * Constant for the "Tibetan" Unicode character block. 891 * @since 1.2 892 */ 893 public static final UnicodeBlock TIBETAN = 894 new UnicodeBlock("TIBETAN"); 895 896 /** 897 * Constant for the "Georgian" Unicode character block. 898 * @since 1.2 899 */ 900 public static final UnicodeBlock GEORGIAN = 901 new UnicodeBlock("GEORGIAN"); 902 903 /** 904 * Constant for the "Hangul Jamo" Unicode character block. 905 * @since 1.2 906 */ 907 public static final UnicodeBlock HANGUL_JAMO = 908 new UnicodeBlock("HANGUL_JAMO", 909 "HANGUL JAMO", 910 "HANGULJAMO"); 911 912 /** 913 * Constant for the "Latin Extended Additional" Unicode character block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 917 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 918 "LATIN EXTENDED ADDITIONAL", 919 "LATINEXTENDEDADDITIONAL"); 920 921 /** 922 * Constant for the "Greek Extended" Unicode character block. 923 * @since 1.2 924 */ 925 public static final UnicodeBlock GREEK_EXTENDED = 926 new UnicodeBlock("GREEK_EXTENDED", 927 "GREEK EXTENDED", 928 "GREEKEXTENDED"); 929 930 /** 931 * Constant for the "General Punctuation" Unicode character block. 932 * @since 1.2 933 */ 934 public static final UnicodeBlock GENERAL_PUNCTUATION = 935 new UnicodeBlock("GENERAL_PUNCTUATION", 936 "GENERAL PUNCTUATION", 937 "GENERALPUNCTUATION"); 938 939 /** 940 * Constant for the "Superscripts and Subscripts" Unicode character 941 * block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 945 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 946 "SUPERSCRIPTS AND SUBSCRIPTS", 947 "SUPERSCRIPTSANDSUBSCRIPTS"); 948 949 /** 950 * Constant for the "Currency Symbols" Unicode character block. 951 * @since 1.2 952 */ 953 public static final UnicodeBlock CURRENCY_SYMBOLS = 954 new UnicodeBlock("CURRENCY_SYMBOLS", 955 "CURRENCY SYMBOLS", 956 "CURRENCYSYMBOLS"); 957 958 /** 959 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 960 * character block. 961 * <p> 962 * This block was previously known as "Combining Marks for Symbols". 963 * @since 1.2 964 */ 965 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 966 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 967 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 968 "COMBININGDIACRITICALMARKSFORSYMBOLS", 969 "COMBINING MARKS FOR SYMBOLS", 970 "COMBININGMARKSFORSYMBOLS"); 971 972 /** 973 * Constant for the "Letterlike Symbols" Unicode character block. 974 * @since 1.2 975 */ 976 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 977 new UnicodeBlock("LETTERLIKE_SYMBOLS", 978 "LETTERLIKE SYMBOLS", 979 "LETTERLIKESYMBOLS"); 980 981 /** 982 * Constant for the "Number Forms" Unicode character block. 983 * @since 1.2 984 */ 985 public static final UnicodeBlock NUMBER_FORMS = 986 new UnicodeBlock("NUMBER_FORMS", 987 "NUMBER FORMS", 988 "NUMBERFORMS"); 989 990 /** 991 * Constant for the "Arrows" Unicode character block. 992 * @since 1.2 993 */ 994 public static final UnicodeBlock ARROWS = 995 new UnicodeBlock("ARROWS"); 996 997 /** 998 * Constant for the "Mathematical Operators" Unicode character block. 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1002 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1003 "MATHEMATICAL OPERATORS", 1004 "MATHEMATICALOPERATORS"); 1005 1006 /** 1007 * Constant for the "Miscellaneous Technical" Unicode character block. 1008 * @since 1.2 1009 */ 1010 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1011 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1012 "MISCELLANEOUS TECHNICAL", 1013 "MISCELLANEOUSTECHNICAL"); 1014 1015 /** 1016 * Constant for the "Control Pictures" Unicode character block. 1017 * @since 1.2 1018 */ 1019 public static final UnicodeBlock CONTROL_PICTURES = 1020 new UnicodeBlock("CONTROL_PICTURES", 1021 "CONTROL PICTURES", 1022 "CONTROLPICTURES"); 1023 1024 /** 1025 * Constant for the "Optical Character Recognition" Unicode character block. 1026 * @since 1.2 1027 */ 1028 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1029 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1030 "OPTICAL CHARACTER RECOGNITION", 1031 "OPTICALCHARACTERRECOGNITION"); 1032 1033 /** 1034 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1035 * @since 1.2 1036 */ 1037 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1038 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1039 "ENCLOSED ALPHANUMERICS", 1040 "ENCLOSEDALPHANUMERICS"); 1041 1042 /** 1043 * Constant for the "Box Drawing" Unicode character block. 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock BOX_DRAWING = 1047 new UnicodeBlock("BOX_DRAWING", 1048 "BOX DRAWING", 1049 "BOXDRAWING"); 1050 1051 /** 1052 * Constant for the "Block Elements" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock BLOCK_ELEMENTS = 1056 new UnicodeBlock("BLOCK_ELEMENTS", 1057 "BLOCK ELEMENTS", 1058 "BLOCKELEMENTS"); 1059 1060 /** 1061 * Constant for the "Geometric Shapes" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock GEOMETRIC_SHAPES = 1065 new UnicodeBlock("GEOMETRIC_SHAPES", 1066 "GEOMETRIC SHAPES", 1067 "GEOMETRICSHAPES"); 1068 1069 /** 1070 * Constant for the "Miscellaneous Symbols" Unicode character block. 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1074 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1075 "MISCELLANEOUS SYMBOLS", 1076 "MISCELLANEOUSSYMBOLS"); 1077 1078 /** 1079 * Constant for the "Dingbats" Unicode character block. 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock DINGBATS = 1083 new UnicodeBlock("DINGBATS"); 1084 1085 /** 1086 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1090 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1091 "CJK SYMBOLS AND PUNCTUATION", 1092 "CJKSYMBOLSANDPUNCTUATION"); 1093 1094 /** 1095 * Constant for the "Hiragana" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock HIRAGANA = 1099 new UnicodeBlock("HIRAGANA"); 1100 1101 /** 1102 * Constant for the "Katakana" Unicode character block. 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock KATAKANA = 1106 new UnicodeBlock("KATAKANA"); 1107 1108 /** 1109 * Constant for the "Bopomofo" Unicode character block. 1110 * @since 1.2 1111 */ 1112 public static final UnicodeBlock BOPOMOFO = 1113 new UnicodeBlock("BOPOMOFO"); 1114 1115 /** 1116 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1117 * @since 1.2 1118 */ 1119 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1120 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1121 "HANGUL COMPATIBILITY JAMO", 1122 "HANGULCOMPATIBILITYJAMO"); 1123 1124 /** 1125 * Constant for the "Kanbun" Unicode character block. 1126 * @since 1.2 1127 */ 1128 public static final UnicodeBlock KANBUN = 1129 new UnicodeBlock("KANBUN"); 1130 1131 /** 1132 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1136 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1137 "ENCLOSED CJK LETTERS AND MONTHS", 1138 "ENCLOSEDCJKLETTERSANDMONTHS"); 1139 1140 /** 1141 * Constant for the "CJK Compatibility" Unicode character block. 1142 * @since 1.2 1143 */ 1144 public static final UnicodeBlock CJK_COMPATIBILITY = 1145 new UnicodeBlock("CJK_COMPATIBILITY", 1146 "CJK COMPATIBILITY", 1147 "CJKCOMPATIBILITY"); 1148 1149 /** 1150 * Constant for the "CJK Unified Ideographs" Unicode character block. 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1154 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1155 "CJK UNIFIED IDEOGRAPHS", 1156 "CJKUNIFIEDIDEOGRAPHS"); 1157 1158 /** 1159 * Constant for the "Hangul Syllables" Unicode character block. 1160 * @since 1.2 1161 */ 1162 public static final UnicodeBlock HANGUL_SYLLABLES = 1163 new UnicodeBlock("HANGUL_SYLLABLES", 1164 "HANGUL SYLLABLES", 1165 "HANGULSYLLABLES"); 1166 1167 /** 1168 * Constant for the "Private Use Area" Unicode character block. 1169 * @since 1.2 1170 */ 1171 public static final UnicodeBlock PRIVATE_USE_AREA = 1172 new UnicodeBlock("PRIVATE_USE_AREA", 1173 "PRIVATE USE AREA", 1174 "PRIVATEUSEAREA"); 1175 1176 /** 1177 * Constant for the "CJK Compatibility Ideographs" Unicode character 1178 * block. 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1182 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1183 "CJK COMPATIBILITY IDEOGRAPHS", 1184 "CJKCOMPATIBILITYIDEOGRAPHS"); 1185 1186 /** 1187 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1191 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1192 "ALPHABETIC PRESENTATION FORMS", 1193 "ALPHABETICPRESENTATIONFORMS"); 1194 1195 /** 1196 * Constant for the "Arabic Presentation Forms-A" Unicode character 1197 * block. 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1201 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1202 "ARABIC PRESENTATION FORMS-A", 1203 "ARABICPRESENTATIONFORMS-A"); 1204 1205 /** 1206 * Constant for the "Combining Half Marks" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock COMBINING_HALF_MARKS = 1210 new UnicodeBlock("COMBINING_HALF_MARKS", 1211 "COMBINING HALF MARKS", 1212 "COMBININGHALFMARKS"); 1213 1214 /** 1215 * Constant for the "CJK Compatibility Forms" Unicode character block. 1216 * @since 1.2 1217 */ 1218 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1219 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1220 "CJK COMPATIBILITY FORMS", 1221 "CJKCOMPATIBILITYFORMS"); 1222 1223 /** 1224 * Constant for the "Small Form Variants" Unicode character block. 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1228 new UnicodeBlock("SMALL_FORM_VARIANTS", 1229 "SMALL FORM VARIANTS", 1230 "SMALLFORMVARIANTS"); 1231 1232 /** 1233 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1234 * @since 1.2 1235 */ 1236 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1237 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1238 "ARABIC PRESENTATION FORMS-B", 1239 "ARABICPRESENTATIONFORMS-B"); 1240 1241 /** 1242 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1243 * block. 1244 * @since 1.2 1245 */ 1246 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1247 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1248 "HALFWIDTH AND FULLWIDTH FORMS", 1249 "HALFWIDTHANDFULLWIDTHFORMS"); 1250 1251 /** 1252 * Constant for the "Specials" Unicode character block. 1253 * @since 1.2 1254 */ 1255 public static final UnicodeBlock SPECIALS = 1256 new UnicodeBlock("SPECIALS"); 1257 1258 /** 1259 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1260 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1261 * {@link #LOW_SURROGATES}. These new constants match 1262 * the block definitions of the Unicode Standard. 1263 * The {@link #of(char)} and {@link #of(int)} methods 1264 * return the new constants, not SURROGATES_AREA. 1265 */ 1266 @Deprecated 1267 public static final UnicodeBlock SURROGATES_AREA = 1268 new UnicodeBlock("SURROGATES_AREA"); 1269 1270 /** 1271 * Constant for the "Syriac" Unicode character block. 1272 * @since 1.4 1273 */ 1274 public static final UnicodeBlock SYRIAC = 1275 new UnicodeBlock("SYRIAC"); 1276 1277 /** 1278 * Constant for the "Thaana" Unicode character block. 1279 * @since 1.4 1280 */ 1281 public static final UnicodeBlock THAANA = 1282 new UnicodeBlock("THAANA"); 1283 1284 /** 1285 * Constant for the "Sinhala" Unicode character block. 1286 * @since 1.4 1287 */ 1288 public static final UnicodeBlock SINHALA = 1289 new UnicodeBlock("SINHALA"); 1290 1291 /** 1292 * Constant for the "Myanmar" Unicode character block. 1293 * @since 1.4 1294 */ 1295 public static final UnicodeBlock MYANMAR = 1296 new UnicodeBlock("MYANMAR"); 1297 1298 /** 1299 * Constant for the "Ethiopic" Unicode character block. 1300 * @since 1.4 1301 */ 1302 public static final UnicodeBlock ETHIOPIC = 1303 new UnicodeBlock("ETHIOPIC"); 1304 1305 /** 1306 * Constant for the "Cherokee" Unicode character block. 1307 * @since 1.4 1308 */ 1309 public static final UnicodeBlock CHEROKEE = 1310 new UnicodeBlock("CHEROKEE"); 1311 1312 /** 1313 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1314 * @since 1.4 1315 */ 1316 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1317 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1318 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1319 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1320 1321 /** 1322 * Constant for the "Ogham" Unicode character block. 1323 * @since 1.4 1324 */ 1325 public static final UnicodeBlock OGHAM = 1326 new UnicodeBlock("OGHAM"); 1327 1328 /** 1329 * Constant for the "Runic" Unicode character block. 1330 * @since 1.4 1331 */ 1332 public static final UnicodeBlock RUNIC = 1333 new UnicodeBlock("RUNIC"); 1334 1335 /** 1336 * Constant for the "Khmer" Unicode character block. 1337 * @since 1.4 1338 */ 1339 public static final UnicodeBlock KHMER = 1340 new UnicodeBlock("KHMER"); 1341 1342 /** 1343 * Constant for the "Mongolian" Unicode character block. 1344 * @since 1.4 1345 */ 1346 public static final UnicodeBlock MONGOLIAN = 1347 new UnicodeBlock("MONGOLIAN"); 1348 1349 /** 1350 * Constant for the "Braille Patterns" Unicode character block. 1351 * @since 1.4 1352 */ 1353 public static final UnicodeBlock BRAILLE_PATTERNS = 1354 new UnicodeBlock("BRAILLE_PATTERNS", 1355 "BRAILLE PATTERNS", 1356 "BRAILLEPATTERNS"); 1357 1358 /** 1359 * Constant for the "CJK Radicals Supplement" Unicode character block. 1360 * @since 1.4 1361 */ 1362 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1363 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1364 "CJK RADICALS SUPPLEMENT", 1365 "CJKRADICALSSUPPLEMENT"); 1366 1367 /** 1368 * Constant for the "Kangxi Radicals" Unicode character block. 1369 * @since 1.4 1370 */ 1371 public static final UnicodeBlock KANGXI_RADICALS = 1372 new UnicodeBlock("KANGXI_RADICALS", 1373 "KANGXI RADICALS", 1374 "KANGXIRADICALS"); 1375 1376 /** 1377 * Constant for the "Ideographic Description Characters" Unicode character block. 1378 * @since 1.4 1379 */ 1380 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1381 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1382 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1383 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1384 1385 /** 1386 * Constant for the "Bopomofo Extended" Unicode character block. 1387 * @since 1.4 1388 */ 1389 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1390 new UnicodeBlock("BOPOMOFO_EXTENDED", 1391 "BOPOMOFO EXTENDED", 1392 "BOPOMOFOEXTENDED"); 1393 1394 /** 1395 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1396 * @since 1.4 1397 */ 1398 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1399 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1400 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1401 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1402 1403 /** 1404 * Constant for the "Yi Syllables" Unicode character block. 1405 * @since 1.4 1406 */ 1407 public static final UnicodeBlock YI_SYLLABLES = 1408 new UnicodeBlock("YI_SYLLABLES", 1409 "YI SYLLABLES", 1410 "YISYLLABLES"); 1411 1412 /** 1413 * Constant for the "Yi Radicals" Unicode character block. 1414 * @since 1.4 1415 */ 1416 public static final UnicodeBlock YI_RADICALS = 1417 new UnicodeBlock("YI_RADICALS", 1418 "YI RADICALS", 1419 "YIRADICALS"); 1420 1421 /** 1422 * Constant for the "Cyrillic Supplementary" Unicode character block. 1423 * @since 1.5 1424 */ 1425 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1426 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1427 "CYRILLIC SUPPLEMENTARY", 1428 "CYRILLICSUPPLEMENTARY", 1429 "CYRILLIC SUPPLEMENT", 1430 "CYRILLICSUPPLEMENT"); 1431 1432 /** 1433 * Constant for the "Tagalog" Unicode character block. 1434 * @since 1.5 1435 */ 1436 public static final UnicodeBlock TAGALOG = 1437 new UnicodeBlock("TAGALOG"); 1438 1439 /** 1440 * Constant for the "Hanunoo" Unicode character block. 1441 * @since 1.5 1442 */ 1443 public static final UnicodeBlock HANUNOO = 1444 new UnicodeBlock("HANUNOO"); 1445 1446 /** 1447 * Constant for the "Buhid" Unicode character block. 1448 * @since 1.5 1449 */ 1450 public static final UnicodeBlock BUHID = 1451 new UnicodeBlock("BUHID"); 1452 1453 /** 1454 * Constant for the "Tagbanwa" Unicode character block. 1455 * @since 1.5 1456 */ 1457 public static final UnicodeBlock TAGBANWA = 1458 new UnicodeBlock("TAGBANWA"); 1459 1460 /** 1461 * Constant for the "Limbu" Unicode character block. 1462 * @since 1.5 1463 */ 1464 public static final UnicodeBlock LIMBU = 1465 new UnicodeBlock("LIMBU"); 1466 1467 /** 1468 * Constant for the "Tai Le" Unicode character block. 1469 * @since 1.5 1470 */ 1471 public static final UnicodeBlock TAI_LE = 1472 new UnicodeBlock("TAI_LE", 1473 "TAI LE", 1474 "TAILE"); 1475 1476 /** 1477 * Constant for the "Khmer Symbols" Unicode character block. 1478 * @since 1.5 1479 */ 1480 public static final UnicodeBlock KHMER_SYMBOLS = 1481 new UnicodeBlock("KHMER_SYMBOLS", 1482 "KHMER SYMBOLS", 1483 "KHMERSYMBOLS"); 1484 1485 /** 1486 * Constant for the "Phonetic Extensions" Unicode character block. 1487 * @since 1.5 1488 */ 1489 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1490 new UnicodeBlock("PHONETIC_EXTENSIONS", 1491 "PHONETIC EXTENSIONS", 1492 "PHONETICEXTENSIONS"); 1493 1494 /** 1495 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1496 * @since 1.5 1497 */ 1498 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1499 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1500 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1501 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1502 1503 /** 1504 * Constant for the "Supplemental Arrows-A" Unicode character block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1508 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1509 "SUPPLEMENTAL ARROWS-A", 1510 "SUPPLEMENTALARROWS-A"); 1511 1512 /** 1513 * Constant for the "Supplemental Arrows-B" Unicode character block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1517 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1518 "SUPPLEMENTAL ARROWS-B", 1519 "SUPPLEMENTALARROWS-B"); 1520 1521 /** 1522 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1523 * character block. 1524 * @since 1.5 1525 */ 1526 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1527 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1528 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1529 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1530 1531 /** 1532 * Constant for the "Supplemental Mathematical Operators" Unicode 1533 * character block. 1534 * @since 1.5 1535 */ 1536 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1537 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1538 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1539 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1540 1541 /** 1542 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1543 * block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1547 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1548 "MISCELLANEOUS SYMBOLS AND ARROWS", 1549 "MISCELLANEOUSSYMBOLSANDARROWS"); 1550 1551 /** 1552 * Constant for the "Katakana Phonetic Extensions" Unicode character 1553 * block. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1557 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1558 "KATAKANA PHONETIC EXTENSIONS", 1559 "KATAKANAPHONETICEXTENSIONS"); 1560 1561 /** 1562 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1563 * @since 1.5 1564 */ 1565 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1566 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1567 "YIJING HEXAGRAM SYMBOLS", 1568 "YIJINGHEXAGRAMSYMBOLS"); 1569 1570 /** 1571 * Constant for the "Variation Selectors" Unicode character block. 1572 * @since 1.5 1573 */ 1574 public static final UnicodeBlock VARIATION_SELECTORS = 1575 new UnicodeBlock("VARIATION_SELECTORS", 1576 "VARIATION SELECTORS", 1577 "VARIATIONSELECTORS"); 1578 1579 /** 1580 * Constant for the "Linear B Syllabary" Unicode character block. 1581 * @since 1.5 1582 */ 1583 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1584 new UnicodeBlock("LINEAR_B_SYLLABARY", 1585 "LINEAR B SYLLABARY", 1586 "LINEARBSYLLABARY"); 1587 1588 /** 1589 * Constant for the "Linear B Ideograms" Unicode character block. 1590 * @since 1.5 1591 */ 1592 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1593 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1594 "LINEAR B IDEOGRAMS", 1595 "LINEARBIDEOGRAMS"); 1596 1597 /** 1598 * Constant for the "Aegean Numbers" Unicode character block. 1599 * @since 1.5 1600 */ 1601 public static final UnicodeBlock AEGEAN_NUMBERS = 1602 new UnicodeBlock("AEGEAN_NUMBERS", 1603 "AEGEAN NUMBERS", 1604 "AEGEANNUMBERS"); 1605 1606 /** 1607 * Constant for the "Old Italic" Unicode character block. 1608 * @since 1.5 1609 */ 1610 public static final UnicodeBlock OLD_ITALIC = 1611 new UnicodeBlock("OLD_ITALIC", 1612 "OLD ITALIC", 1613 "OLDITALIC"); 1614 1615 /** 1616 * Constant for the "Gothic" Unicode character block. 1617 * @since 1.5 1618 */ 1619 public static final UnicodeBlock GOTHIC = 1620 new UnicodeBlock("GOTHIC"); 1621 1622 /** 1623 * Constant for the "Ugaritic" Unicode character block. 1624 * @since 1.5 1625 */ 1626 public static final UnicodeBlock UGARITIC = 1627 new UnicodeBlock("UGARITIC"); 1628 1629 /** 1630 * Constant for the "Deseret" Unicode character block. 1631 * @since 1.5 1632 */ 1633 public static final UnicodeBlock DESERET = 1634 new UnicodeBlock("DESERET"); 1635 1636 /** 1637 * Constant for the "Shavian" Unicode character block. 1638 * @since 1.5 1639 */ 1640 public static final UnicodeBlock SHAVIAN = 1641 new UnicodeBlock("SHAVIAN"); 1642 1643 /** 1644 * Constant for the "Osmanya" Unicode character block. 1645 * @since 1.5 1646 */ 1647 public static final UnicodeBlock OSMANYA = 1648 new UnicodeBlock("OSMANYA"); 1649 1650 /** 1651 * Constant for the "Cypriot Syllabary" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1655 new UnicodeBlock("CYPRIOT_SYLLABARY", 1656 "CYPRIOT SYLLABARY", 1657 "CYPRIOTSYLLABARY"); 1658 1659 /** 1660 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1664 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1665 "BYZANTINE MUSICAL SYMBOLS", 1666 "BYZANTINEMUSICALSYMBOLS"); 1667 1668 /** 1669 * Constant for the "Musical Symbols" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock MUSICAL_SYMBOLS = 1673 new UnicodeBlock("MUSICAL_SYMBOLS", 1674 "MUSICAL SYMBOLS", 1675 "MUSICALSYMBOLS"); 1676 1677 /** 1678 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1682 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1683 "TAI XUAN JING SYMBOLS", 1684 "TAIXUANJINGSYMBOLS"); 1685 1686 /** 1687 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1688 * character block. 1689 * @since 1.5 1690 */ 1691 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1692 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1693 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1694 "MATHEMATICALALPHANUMERICSYMBOLS"); 1695 1696 /** 1697 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1698 * character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1702 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1703 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1704 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1705 1706 /** 1707 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1708 * @since 1.5 1709 */ 1710 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1711 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1712 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1713 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1714 1715 /** 1716 * Constant for the "Tags" Unicode character block. 1717 * @since 1.5 1718 */ 1719 public static final UnicodeBlock TAGS = 1720 new UnicodeBlock("TAGS"); 1721 1722 /** 1723 * Constant for the "Variation Selectors Supplement" Unicode character 1724 * block. 1725 * @since 1.5 1726 */ 1727 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1728 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1729 "VARIATION SELECTORS SUPPLEMENT", 1730 "VARIATIONSELECTORSSUPPLEMENT"); 1731 1732 /** 1733 * Constant for the "Supplementary Private Use Area-A" Unicode character 1734 * block. 1735 * @since 1.5 1736 */ 1737 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1738 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1739 "SUPPLEMENTARY PRIVATE USE AREA-A", 1740 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1741 1742 /** 1743 * Constant for the "Supplementary Private Use Area-B" Unicode character 1744 * block. 1745 * @since 1.5 1746 */ 1747 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1748 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1749 "SUPPLEMENTARY PRIVATE USE AREA-B", 1750 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1751 1752 /** 1753 * Constant for the "High Surrogates" Unicode character block. 1754 * This block represents codepoint values in the high surrogate 1755 * range: U+D800 through U+DB7F 1756 * 1757 * @since 1.5 1758 */ 1759 public static final UnicodeBlock HIGH_SURROGATES = 1760 new UnicodeBlock("HIGH_SURROGATES", 1761 "HIGH SURROGATES", 1762 "HIGHSURROGATES"); 1763 1764 /** 1765 * Constant for the "High Private Use Surrogates" Unicode character 1766 * block. 1767 * This block represents codepoint values in the private use high 1768 * surrogate range: U+DB80 through U+DBFF 1769 * 1770 * @since 1.5 1771 */ 1772 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1773 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1774 "HIGH PRIVATE USE SURROGATES", 1775 "HIGHPRIVATEUSESURROGATES"); 1776 1777 /** 1778 * Constant for the "Low Surrogates" Unicode character block. 1779 * This block represents codepoint values in the low surrogate 1780 * range: U+DC00 through U+DFFF 1781 * 1782 * @since 1.5 1783 */ 1784 public static final UnicodeBlock LOW_SURROGATES = 1785 new UnicodeBlock("LOW_SURROGATES", 1786 "LOW SURROGATES", 1787 "LOWSURROGATES"); 1788 1789 /** 1790 * Constant for the "Arabic Supplement" Unicode character block. 1791 * @since 1.7 1792 */ 1793 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1794 new UnicodeBlock("ARABIC_SUPPLEMENT", 1795 "ARABIC SUPPLEMENT", 1796 "ARABICSUPPLEMENT"); 1797 1798 /** 1799 * Constant for the "NKo" Unicode character block. 1800 * @since 1.7 1801 */ 1802 public static final UnicodeBlock NKO = 1803 new UnicodeBlock("NKO"); 1804 1805 /** 1806 * Constant for the "Samaritan" Unicode character block. 1807 * @since 1.7 1808 */ 1809 public static final UnicodeBlock SAMARITAN = 1810 new UnicodeBlock("SAMARITAN"); 1811 1812 /** 1813 * Constant for the "Mandaic" Unicode character block. 1814 * @since 1.7 1815 */ 1816 public static final UnicodeBlock MANDAIC = 1817 new UnicodeBlock("MANDAIC"); 1818 1819 /** 1820 * Constant for the "Ethiopic Supplement" Unicode character block. 1821 * @since 1.7 1822 */ 1823 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1824 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1825 "ETHIOPIC SUPPLEMENT", 1826 "ETHIOPICSUPPLEMENT"); 1827 1828 /** 1829 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1830 * Unicode character block. 1831 * @since 1.7 1832 */ 1833 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1834 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1835 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1836 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1837 1838 /** 1839 * Constant for the "New Tai Lue" Unicode character block. 1840 * @since 1.7 1841 */ 1842 public static final UnicodeBlock NEW_TAI_LUE = 1843 new UnicodeBlock("NEW_TAI_LUE", 1844 "NEW TAI LUE", 1845 "NEWTAILUE"); 1846 1847 /** 1848 * Constant for the "Buginese" Unicode character block. 1849 * @since 1.7 1850 */ 1851 public static final UnicodeBlock BUGINESE = 1852 new UnicodeBlock("BUGINESE"); 1853 1854 /** 1855 * Constant for the "Tai Tham" Unicode character block. 1856 * @since 1.7 1857 */ 1858 public static final UnicodeBlock TAI_THAM = 1859 new UnicodeBlock("TAI_THAM", 1860 "TAI THAM", 1861 "TAITHAM"); 1862 1863 /** 1864 * Constant for the "Balinese" Unicode character block. 1865 * @since 1.7 1866 */ 1867 public static final UnicodeBlock BALINESE = 1868 new UnicodeBlock("BALINESE"); 1869 1870 /** 1871 * Constant for the "Sundanese" Unicode character block. 1872 * @since 1.7 1873 */ 1874 public static final UnicodeBlock SUNDANESE = 1875 new UnicodeBlock("SUNDANESE"); 1876 1877 /** 1878 * Constant for the "Batak" Unicode character block. 1879 * @since 1.7 1880 */ 1881 public static final UnicodeBlock BATAK = 1882 new UnicodeBlock("BATAK"); 1883 1884 /** 1885 * Constant for the "Lepcha" Unicode character block. 1886 * @since 1.7 1887 */ 1888 public static final UnicodeBlock LEPCHA = 1889 new UnicodeBlock("LEPCHA"); 1890 1891 /** 1892 * Constant for the "Ol Chiki" Unicode character block. 1893 * @since 1.7 1894 */ 1895 public static final UnicodeBlock OL_CHIKI = 1896 new UnicodeBlock("OL_CHIKI", 1897 "OL CHIKI", 1898 "OLCHIKI"); 1899 1900 /** 1901 * Constant for the "Vedic Extensions" Unicode character block. 1902 * @since 1.7 1903 */ 1904 public static final UnicodeBlock VEDIC_EXTENSIONS = 1905 new UnicodeBlock("VEDIC_EXTENSIONS", 1906 "VEDIC EXTENSIONS", 1907 "VEDICEXTENSIONS"); 1908 1909 /** 1910 * Constant for the "Phonetic Extensions Supplement" Unicode character 1911 * block. 1912 * @since 1.7 1913 */ 1914 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1915 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1916 "PHONETIC EXTENSIONS SUPPLEMENT", 1917 "PHONETICEXTENSIONSSUPPLEMENT"); 1918 1919 /** 1920 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1921 * character block. 1922 * @since 1.7 1923 */ 1924 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1925 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1926 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1927 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1928 1929 /** 1930 * Constant for the "Glagolitic" Unicode character block. 1931 * @since 1.7 1932 */ 1933 public static final UnicodeBlock GLAGOLITIC = 1934 new UnicodeBlock("GLAGOLITIC"); 1935 1936 /** 1937 * Constant for the "Latin Extended-C" Unicode character block. 1938 * @since 1.7 1939 */ 1940 public static final UnicodeBlock LATIN_EXTENDED_C = 1941 new UnicodeBlock("LATIN_EXTENDED_C", 1942 "LATIN EXTENDED-C", 1943 "LATINEXTENDED-C"); 1944 1945 /** 1946 * Constant for the "Coptic" Unicode character block. 1947 * @since 1.7 1948 */ 1949 public static final UnicodeBlock COPTIC = 1950 new UnicodeBlock("COPTIC"); 1951 1952 /** 1953 * Constant for the "Georgian Supplement" Unicode character block. 1954 * @since 1.7 1955 */ 1956 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1957 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1958 "GEORGIAN SUPPLEMENT", 1959 "GEORGIANSUPPLEMENT"); 1960 1961 /** 1962 * Constant for the "Tifinagh" Unicode character block. 1963 * @since 1.7 1964 */ 1965 public static final UnicodeBlock TIFINAGH = 1966 new UnicodeBlock("TIFINAGH"); 1967 1968 /** 1969 * Constant for the "Ethiopic Extended" Unicode character block. 1970 * @since 1.7 1971 */ 1972 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1973 new UnicodeBlock("ETHIOPIC_EXTENDED", 1974 "ETHIOPIC EXTENDED", 1975 "ETHIOPICEXTENDED"); 1976 1977 /** 1978 * Constant for the "Cyrillic Extended-A" Unicode character block. 1979 * @since 1.7 1980 */ 1981 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1982 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1983 "CYRILLIC EXTENDED-A", 1984 "CYRILLICEXTENDED-A"); 1985 1986 /** 1987 * Constant for the "Supplemental Punctuation" Unicode character block. 1988 * @since 1.7 1989 */ 1990 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1991 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1992 "SUPPLEMENTAL PUNCTUATION", 1993 "SUPPLEMENTALPUNCTUATION"); 1994 1995 /** 1996 * Constant for the "CJK Strokes" Unicode character block. 1997 * @since 1.7 1998 */ 1999 public static final UnicodeBlock CJK_STROKES = 2000 new UnicodeBlock("CJK_STROKES", 2001 "CJK STROKES", 2002 "CJKSTROKES"); 2003 2004 /** 2005 * Constant for the "Lisu" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock LISU = 2009 new UnicodeBlock("LISU"); 2010 2011 /** 2012 * Constant for the "Vai" Unicode character block. 2013 * @since 1.7 2014 */ 2015 public static final UnicodeBlock VAI = 2016 new UnicodeBlock("VAI"); 2017 2018 /** 2019 * Constant for the "Cyrillic Extended-B" Unicode character block. 2020 * @since 1.7 2021 */ 2022 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2023 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2024 "CYRILLIC EXTENDED-B", 2025 "CYRILLICEXTENDED-B"); 2026 2027 /** 2028 * Constant for the "Bamum" Unicode character block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock BAMUM = 2032 new UnicodeBlock("BAMUM"); 2033 2034 /** 2035 * Constant for the "Modifier Tone Letters" Unicode character block. 2036 * @since 1.7 2037 */ 2038 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2039 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2040 "MODIFIER TONE LETTERS", 2041 "MODIFIERTONELETTERS"); 2042 2043 /** 2044 * Constant for the "Latin Extended-D" Unicode character block. 2045 * @since 1.7 2046 */ 2047 public static final UnicodeBlock LATIN_EXTENDED_D = 2048 new UnicodeBlock("LATIN_EXTENDED_D", 2049 "LATIN EXTENDED-D", 2050 "LATINEXTENDED-D"); 2051 2052 /** 2053 * Constant for the "Syloti Nagri" Unicode character block. 2054 * @since 1.7 2055 */ 2056 public static final UnicodeBlock SYLOTI_NAGRI = 2057 new UnicodeBlock("SYLOTI_NAGRI", 2058 "SYLOTI NAGRI", 2059 "SYLOTINAGRI"); 2060 2061 /** 2062 * Constant for the "Common Indic Number Forms" Unicode character block. 2063 * @since 1.7 2064 */ 2065 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2066 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2067 "COMMON INDIC NUMBER FORMS", 2068 "COMMONINDICNUMBERFORMS"); 2069 2070 /** 2071 * Constant for the "Phags-pa" Unicode character block. 2072 * @since 1.7 2073 */ 2074 public static final UnicodeBlock PHAGS_PA = 2075 new UnicodeBlock("PHAGS_PA", 2076 "PHAGS-PA"); 2077 2078 /** 2079 * Constant for the "Saurashtra" Unicode character block. 2080 * @since 1.7 2081 */ 2082 public static final UnicodeBlock SAURASHTRA = 2083 new UnicodeBlock("SAURASHTRA"); 2084 2085 /** 2086 * Constant for the "Devanagari Extended" Unicode character block. 2087 * @since 1.7 2088 */ 2089 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2090 new UnicodeBlock("DEVANAGARI_EXTENDED", 2091 "DEVANAGARI EXTENDED", 2092 "DEVANAGARIEXTENDED"); 2093 2094 /** 2095 * Constant for the "Kayah Li" Unicode character block. 2096 * @since 1.7 2097 */ 2098 public static final UnicodeBlock KAYAH_LI = 2099 new UnicodeBlock("KAYAH_LI", 2100 "KAYAH LI", 2101 "KAYAHLI"); 2102 2103 /** 2104 * Constant for the "Rejang" Unicode character block. 2105 * @since 1.7 2106 */ 2107 public static final UnicodeBlock REJANG = 2108 new UnicodeBlock("REJANG"); 2109 2110 /** 2111 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2112 * @since 1.7 2113 */ 2114 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2115 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2116 "HANGUL JAMO EXTENDED-A", 2117 "HANGULJAMOEXTENDED-A"); 2118 2119 /** 2120 * Constant for the "Javanese" Unicode character block. 2121 * @since 1.7 2122 */ 2123 public static final UnicodeBlock JAVANESE = 2124 new UnicodeBlock("JAVANESE"); 2125 2126 /** 2127 * Constant for the "Cham" Unicode character block. 2128 * @since 1.7 2129 */ 2130 public static final UnicodeBlock CHAM = 2131 new UnicodeBlock("CHAM"); 2132 2133 /** 2134 * Constant for the "Myanmar Extended-A" Unicode character block. 2135 * @since 1.7 2136 */ 2137 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2138 new UnicodeBlock("MYANMAR_EXTENDED_A", 2139 "MYANMAR EXTENDED-A", 2140 "MYANMAREXTENDED-A"); 2141 2142 /** 2143 * Constant for the "Tai Viet" Unicode character block. 2144 * @since 1.7 2145 */ 2146 public static final UnicodeBlock TAI_VIET = 2147 new UnicodeBlock("TAI_VIET", 2148 "TAI VIET", 2149 "TAIVIET"); 2150 2151 /** 2152 * Constant for the "Ethiopic Extended-A" Unicode character block. 2153 * @since 1.7 2154 */ 2155 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2156 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2157 "ETHIOPIC EXTENDED-A", 2158 "ETHIOPICEXTENDED-A"); 2159 2160 /** 2161 * Constant for the "Meetei Mayek" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock MEETEI_MAYEK = 2165 new UnicodeBlock("MEETEI_MAYEK", 2166 "MEETEI MAYEK", 2167 "MEETEIMAYEK"); 2168 2169 /** 2170 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2171 * @since 1.7 2172 */ 2173 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2174 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2175 "HANGUL JAMO EXTENDED-B", 2176 "HANGULJAMOEXTENDED-B"); 2177 2178 /** 2179 * Constant for the "Vertical Forms" Unicode character block. 2180 * @since 1.7 2181 */ 2182 public static final UnicodeBlock VERTICAL_FORMS = 2183 new UnicodeBlock("VERTICAL_FORMS", 2184 "VERTICAL FORMS", 2185 "VERTICALFORMS"); 2186 2187 /** 2188 * Constant for the "Ancient Greek Numbers" Unicode character block. 2189 * @since 1.7 2190 */ 2191 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2192 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2193 "ANCIENT GREEK NUMBERS", 2194 "ANCIENTGREEKNUMBERS"); 2195 2196 /** 2197 * Constant for the "Ancient Symbols" Unicode character block. 2198 * @since 1.7 2199 */ 2200 public static final UnicodeBlock ANCIENT_SYMBOLS = 2201 new UnicodeBlock("ANCIENT_SYMBOLS", 2202 "ANCIENT SYMBOLS", 2203 "ANCIENTSYMBOLS"); 2204 2205 /** 2206 * Constant for the "Phaistos Disc" Unicode character block. 2207 * @since 1.7 2208 */ 2209 public static final UnicodeBlock PHAISTOS_DISC = 2210 new UnicodeBlock("PHAISTOS_DISC", 2211 "PHAISTOS DISC", 2212 "PHAISTOSDISC"); 2213 2214 /** 2215 * Constant for the "Lycian" Unicode character block. 2216 * @since 1.7 2217 */ 2218 public static final UnicodeBlock LYCIAN = 2219 new UnicodeBlock("LYCIAN"); 2220 2221 /** 2222 * Constant for the "Carian" Unicode character block. 2223 * @since 1.7 2224 */ 2225 public static final UnicodeBlock CARIAN = 2226 new UnicodeBlock("CARIAN"); 2227 2228 /** 2229 * Constant for the "Old Persian" Unicode character block. 2230 * @since 1.7 2231 */ 2232 public static final UnicodeBlock OLD_PERSIAN = 2233 new UnicodeBlock("OLD_PERSIAN", 2234 "OLD PERSIAN", 2235 "OLDPERSIAN"); 2236 2237 /** 2238 * Constant for the "Imperial Aramaic" Unicode character block. 2239 * @since 1.7 2240 */ 2241 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2242 new UnicodeBlock("IMPERIAL_ARAMAIC", 2243 "IMPERIAL ARAMAIC", 2244 "IMPERIALARAMAIC"); 2245 2246 /** 2247 * Constant for the "Phoenician" Unicode character block. 2248 * @since 1.7 2249 */ 2250 public static final UnicodeBlock PHOENICIAN = 2251 new UnicodeBlock("PHOENICIAN"); 2252 2253 /** 2254 * Constant for the "Lydian" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock LYDIAN = 2258 new UnicodeBlock("LYDIAN"); 2259 2260 /** 2261 * Constant for the "Kharoshthi" Unicode character block. 2262 * @since 1.7 2263 */ 2264 public static final UnicodeBlock KHAROSHTHI = 2265 new UnicodeBlock("KHAROSHTHI"); 2266 2267 /** 2268 * Constant for the "Old South Arabian" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2272 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2273 "OLD SOUTH ARABIAN", 2274 "OLDSOUTHARABIAN"); 2275 2276 /** 2277 * Constant for the "Avestan" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock AVESTAN = 2281 new UnicodeBlock("AVESTAN"); 2282 2283 /** 2284 * Constant for the "Inscriptional Parthian" Unicode character block. 2285 * @since 1.7 2286 */ 2287 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2288 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2289 "INSCRIPTIONAL PARTHIAN", 2290 "INSCRIPTIONALPARTHIAN"); 2291 2292 /** 2293 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2294 * @since 1.7 2295 */ 2296 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2297 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2298 "INSCRIPTIONAL PAHLAVI", 2299 "INSCRIPTIONALPAHLAVI"); 2300 2301 /** 2302 * Constant for the "Old Turkic" Unicode character block. 2303 * @since 1.7 2304 */ 2305 public static final UnicodeBlock OLD_TURKIC = 2306 new UnicodeBlock("OLD_TURKIC", 2307 "OLD TURKIC", 2308 "OLDTURKIC"); 2309 2310 /** 2311 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2312 * @since 1.7 2313 */ 2314 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2315 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2316 "RUMI NUMERAL SYMBOLS", 2317 "RUMINUMERALSYMBOLS"); 2318 2319 /** 2320 * Constant for the "Brahmi" Unicode character block. 2321 * @since 1.7 2322 */ 2323 public static final UnicodeBlock BRAHMI = 2324 new UnicodeBlock("BRAHMI"); 2325 2326 /** 2327 * Constant for the "Kaithi" Unicode character block. 2328 * @since 1.7 2329 */ 2330 public static final UnicodeBlock KAITHI = 2331 new UnicodeBlock("KAITHI"); 2332 2333 /** 2334 * Constant for the "Cuneiform" Unicode character block. 2335 * @since 1.7 2336 */ 2337 public static final UnicodeBlock CUNEIFORM = 2338 new UnicodeBlock("CUNEIFORM"); 2339 2340 /** 2341 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2342 * character block. 2343 * @since 1.7 2344 */ 2345 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2346 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2347 "CUNEIFORM NUMBERS AND PUNCTUATION", 2348 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2349 2350 /** 2351 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2352 * @since 1.7 2353 */ 2354 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2355 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2356 "EGYPTIAN HIEROGLYPHS", 2357 "EGYPTIANHIEROGLYPHS"); 2358 2359 /** 2360 * Constant for the "Bamum Supplement" Unicode character block. 2361 * @since 1.7 2362 */ 2363 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2364 new UnicodeBlock("BAMUM_SUPPLEMENT", 2365 "BAMUM SUPPLEMENT", 2366 "BAMUMSUPPLEMENT"); 2367 2368 /** 2369 * Constant for the "Kana Supplement" Unicode character block. 2370 * @since 1.7 2371 */ 2372 public static final UnicodeBlock KANA_SUPPLEMENT = 2373 new UnicodeBlock("KANA_SUPPLEMENT", 2374 "KANA SUPPLEMENT", 2375 "KANASUPPLEMENT"); 2376 2377 /** 2378 * Constant for the "Ancient Greek Musical Notation" Unicode character 2379 * block. 2380 * @since 1.7 2381 */ 2382 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2383 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2384 "ANCIENT GREEK MUSICAL NOTATION", 2385 "ANCIENTGREEKMUSICALNOTATION"); 2386 2387 /** 2388 * Constant for the "Counting Rod Numerals" Unicode character block. 2389 * @since 1.7 2390 */ 2391 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2392 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2393 "COUNTING ROD NUMERALS", 2394 "COUNTINGRODNUMERALS"); 2395 2396 /** 2397 * Constant for the "Mahjong Tiles" Unicode character block. 2398 * @since 1.7 2399 */ 2400 public static final UnicodeBlock MAHJONG_TILES = 2401 new UnicodeBlock("MAHJONG_TILES", 2402 "MAHJONG TILES", 2403 "MAHJONGTILES"); 2404 2405 /** 2406 * Constant for the "Domino Tiles" Unicode character block. 2407 * @since 1.7 2408 */ 2409 public static final UnicodeBlock DOMINO_TILES = 2410 new UnicodeBlock("DOMINO_TILES", 2411 "DOMINO TILES", 2412 "DOMINOTILES"); 2413 2414 /** 2415 * Constant for the "Playing Cards" Unicode character block. 2416 * @since 1.7 2417 */ 2418 public static final UnicodeBlock PLAYING_CARDS = 2419 new UnicodeBlock("PLAYING_CARDS", 2420 "PLAYING CARDS", 2421 "PLAYINGCARDS"); 2422 2423 /** 2424 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2425 * block. 2426 * @since 1.7 2427 */ 2428 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2429 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2430 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2431 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2432 2433 /** 2434 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2435 * block. 2436 * @since 1.7 2437 */ 2438 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2439 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2440 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2441 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2442 2443 /** 2444 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2445 * character block. 2446 * @since 1.7 2447 */ 2448 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2449 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2450 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2451 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2452 2453 /** 2454 * Constant for the "Emoticons" Unicode character block. 2455 * @since 1.7 2456 */ 2457 public static final UnicodeBlock EMOTICONS = 2458 new UnicodeBlock("EMOTICONS"); 2459 2460 /** 2461 * Constant for the "Transport And Map Symbols" Unicode character block. 2462 * @since 1.7 2463 */ 2464 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2465 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2466 "TRANSPORT AND MAP SYMBOLS", 2467 "TRANSPORTANDMAPSYMBOLS"); 2468 2469 /** 2470 * Constant for the "Alchemical Symbols" Unicode character block. 2471 * @since 1.7 2472 */ 2473 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2474 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2475 "ALCHEMICAL SYMBOLS", 2476 "ALCHEMICALSYMBOLS"); 2477 2478 /** 2479 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2480 * character block. 2481 * @since 1.7 2482 */ 2483 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2484 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2485 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2486 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2487 2488 /** 2489 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2490 * character block. 2491 * @since 1.7 2492 */ 2493 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2494 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2495 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2496 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2497 2498 /** 2499 * Constant for the "Arabic Extended-A" Unicode character block. 2500 * @since 1.8 2501 */ 2502 public static final UnicodeBlock ARABIC_EXTENDED_A = 2503 new UnicodeBlock("ARABIC_EXTENDED_A", 2504 "ARABIC EXTENDED-A", 2505 "ARABICEXTENDED-A"); 2506 2507 /** 2508 * Constant for the "Sundanese Supplement" Unicode character block. 2509 * @since 1.8 2510 */ 2511 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2512 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2513 "SUNDANESE SUPPLEMENT", 2514 "SUNDANESESUPPLEMENT"); 2515 2516 /** 2517 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2518 * @since 1.8 2519 */ 2520 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2521 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2522 "MEETEI MAYEK EXTENSIONS", 2523 "MEETEIMAYEKEXTENSIONS"); 2524 2525 /** 2526 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2527 * @since 1.8 2528 */ 2529 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2530 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2531 "MEROITIC HIEROGLYPHS", 2532 "MEROITICHIEROGLYPHS"); 2533 2534 /** 2535 * Constant for the "Meroitic Cursive" Unicode character block. 2536 * @since 1.8 2537 */ 2538 public static final UnicodeBlock MEROITIC_CURSIVE = 2539 new UnicodeBlock("MEROITIC_CURSIVE", 2540 "MEROITIC CURSIVE", 2541 "MEROITICCURSIVE"); 2542 2543 /** 2544 * Constant for the "Sora Sompeng" Unicode character block. 2545 * @since 1.8 2546 */ 2547 public static final UnicodeBlock SORA_SOMPENG = 2548 new UnicodeBlock("SORA_SOMPENG", 2549 "SORA SOMPENG", 2550 "SORASOMPENG"); 2551 2552 /** 2553 * Constant for the "Chakma" Unicode character block. 2554 * @since 1.8 2555 */ 2556 public static final UnicodeBlock CHAKMA = 2557 new UnicodeBlock("CHAKMA"); 2558 2559 /** 2560 * Constant for the "Sharada" Unicode character block. 2561 * @since 1.8 2562 */ 2563 public static final UnicodeBlock SHARADA = 2564 new UnicodeBlock("SHARADA"); 2565 2566 /** 2567 * Constant for the "Takri" Unicode character block. 2568 * @since 1.8 2569 */ 2570 public static final UnicodeBlock TAKRI = 2571 new UnicodeBlock("TAKRI"); 2572 2573 /** 2574 * Constant for the "Miao" Unicode character block. 2575 * @since 1.8 2576 */ 2577 public static final UnicodeBlock MIAO = 2578 new UnicodeBlock("MIAO"); 2579 2580 /** 2581 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2582 * character block. 2583 * @since 1.8 2584 */ 2585 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2586 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2587 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2588 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2589 2590 /** 2591 * Constant for the "Combining Diacritical Marks Extended" Unicode 2592 * character block. 2593 * @since 1.9 2594 */ 2595 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2596 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2597 "COMBINING DIACRITICAL MARKS EXTENDED", 2598 "COMBININGDIACRITICALMARKSEXTENDED"); 2599 2600 /** 2601 * Constant for the "Myanmar Extended-B" Unicode character block. 2602 * @since 1.9 2603 */ 2604 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2605 new UnicodeBlock("MYANMAR_EXTENDED_B", 2606 "MYANMAR EXTENDED-B", 2607 "MYANMAREXTENDED-B"); 2608 2609 /** 2610 * Constant for the "Latin Extended-E" Unicode character block. 2611 * @since 1.9 2612 */ 2613 public static final UnicodeBlock LATIN_EXTENDED_E = 2614 new UnicodeBlock("LATIN_EXTENDED_E", 2615 "LATIN EXTENDED-E", 2616 "LATINEXTENDED-E"); 2617 2618 /** 2619 * Constant for the "Coptic Epact Numbers" Unicode character block. 2620 * @since 1.9 2621 */ 2622 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2623 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2624 "COPTIC EPACT NUMBERS", 2625 "COPTICEPACTNUMBERS"); 2626 2627 /** 2628 * Constant for the "Old Permic" Unicode character block. 2629 * @since 1.9 2630 */ 2631 public static final UnicodeBlock OLD_PERMIC = 2632 new UnicodeBlock("OLD_PERMIC", 2633 "OLD PERMIC", 2634 "OLDPERMIC"); 2635 2636 /** 2637 * Constant for the "Elbasan" Unicode character block. 2638 * @since 1.9 2639 */ 2640 public static final UnicodeBlock ELBASAN = 2641 new UnicodeBlock("ELBASAN"); 2642 2643 /** 2644 * Constant for the "Caucasian Albanian" Unicode character block. 2645 * @since 1.9 2646 */ 2647 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2648 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2649 "CAUCASIAN ALBANIAN", 2650 "CAUCASIANALBANIAN"); 2651 2652 /** 2653 * Constant for the "Linear A" Unicode character block. 2654 * @since 1.9 2655 */ 2656 public static final UnicodeBlock LINEAR_A = 2657 new UnicodeBlock("LINEAR_A", 2658 "LINEAR A", 2659 "LINEARA"); 2660 2661 /** 2662 * Constant for the "Palmyrene" Unicode character block. 2663 * @since 1.9 2664 */ 2665 public static final UnicodeBlock PALMYRENE = 2666 new UnicodeBlock("PALMYRENE"); 2667 2668 /** 2669 * Constant for the "Nabataean" Unicode character block. 2670 * @since 1.9 2671 */ 2672 public static final UnicodeBlock NABATAEAN = 2673 new UnicodeBlock("NABATAEAN"); 2674 2675 /** 2676 * Constant for the "Old North Arabian" Unicode character block. 2677 * @since 1.9 2678 */ 2679 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2680 new UnicodeBlock("OLD_NORTH_ARABIAN", 2681 "OLD NORTH ARABIAN", 2682 "OLDNORTHARABIAN"); 2683 2684 /** 2685 * Constant for the "Manichaean" Unicode character block. 2686 * @since 1.9 2687 */ 2688 public static final UnicodeBlock MANICHAEAN = 2689 new UnicodeBlock("MANICHAEAN"); 2690 2691 /** 2692 * Constant for the "Psalter Pahlavi" Unicode character block. 2693 * @since 1.9 2694 */ 2695 public static final UnicodeBlock PSALTER_PAHLAVI = 2696 new UnicodeBlock("PSALTER_PAHLAVI", 2697 "PSALTER PAHLAVI", 2698 "PSALTERPAHLAVI"); 2699 2700 /** 2701 * Constant for the "Mahajani" Unicode character block. 2702 * @since 1.9 2703 */ 2704 public static final UnicodeBlock MAHAJANI = 2705 new UnicodeBlock("MAHAJANI"); 2706 2707 /** 2708 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2709 * @since 1.9 2710 */ 2711 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2712 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2713 "SINHALA ARCHAIC NUMBERS", 2714 "SINHALAARCHAICNUMBERS"); 2715 2716 /** 2717 * Constant for the "Khojki" Unicode character block. 2718 * @since 1.9 2719 */ 2720 public static final UnicodeBlock KHOJKI = 2721 new UnicodeBlock("KHOJKI"); 2722 2723 /** 2724 * Constant for the "Khudawadi" Unicode character block. 2725 * @since 1.9 2726 */ 2727 public static final UnicodeBlock KHUDAWADI = 2728 new UnicodeBlock("KHUDAWADI"); 2729 2730 /** 2731 * Constant for the "Grantha" Unicode character block. 2732 * @since 1.9 2733 */ 2734 public static final UnicodeBlock GRANTHA = 2735 new UnicodeBlock("GRANTHA"); 2736 2737 /** 2738 * Constant for the "Tirhuta" Unicode character block. 2739 * @since 1.9 2740 */ 2741 public static final UnicodeBlock TIRHUTA = 2742 new UnicodeBlock("TIRHUTA"); 2743 2744 /** 2745 * Constant for the "Siddham" Unicode character block. 2746 * @since 1.9 2747 */ 2748 public static final UnicodeBlock SIDDHAM = 2749 new UnicodeBlock("SIDDHAM"); 2750 2751 /** 2752 * Constant for the "Modi" Unicode character block. 2753 * @since 1.9 2754 */ 2755 public static final UnicodeBlock MODI = 2756 new UnicodeBlock("MODI"); 2757 2758 /** 2759 * Constant for the "Warang Citi" Unicode character block. 2760 * @since 1.9 2761 */ 2762 public static final UnicodeBlock WARANG_CITI = 2763 new UnicodeBlock("WARANG_CITI", 2764 "WARANG CITI", 2765 "WARANGCITI"); 2766 2767 /** 2768 * Constant for the "Pau Cin Hau" Unicode character block. 2769 * @since 1.9 2770 */ 2771 public static final UnicodeBlock PAU_CIN_HAU = 2772 new UnicodeBlock("PAU_CIN_HAU", 2773 "PAU CIN HAU", 2774 "PAUCINHAU"); 2775 2776 /** 2777 * Constant for the "Mro" Unicode character block. 2778 * @since 1.9 2779 */ 2780 public static final UnicodeBlock MRO = 2781 new UnicodeBlock("MRO"); 2782 2783 /** 2784 * Constant for the "Bassa Vah" Unicode character block. 2785 * @since 1.9 2786 */ 2787 public static final UnicodeBlock BASSA_VAH = 2788 new UnicodeBlock("BASSA_VAH", 2789 "BASSA VAH", 2790 "BASSAVAH"); 2791 2792 /** 2793 * Constant for the "Pahawh Hmong" Unicode character block. 2794 * @since 1.9 2795 */ 2796 public static final UnicodeBlock PAHAWH_HMONG = 2797 new UnicodeBlock("PAHAWH_HMONG", 2798 "PAHAWH HMONG", 2799 "PAHAWHHMONG"); 2800 2801 /** 2802 * Constant for the "Duployan" Unicode character block. 2803 * @since 1.9 2804 */ 2805 public static final UnicodeBlock DUPLOYAN = 2806 new UnicodeBlock("DUPLOYAN"); 2807 2808 /** 2809 * Constant for the "Shorthand Format Controls" Unicode character block. 2810 * @since 1.9 2811 */ 2812 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2813 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2814 "SHORTHAND FORMAT CONTROLS", 2815 "SHORTHANDFORMATCONTROLS"); 2816 2817 /** 2818 * Constant for the "Mende Kikakui" Unicode character block. 2819 * @since 1.9 2820 */ 2821 public static final UnicodeBlock MENDE_KIKAKUI = 2822 new UnicodeBlock("MENDE_KIKAKUI", 2823 "MENDE KIKAKUI", 2824 "MENDEKIKAKUI"); 2825 2826 /** 2827 * Constant for the "Ornamental Dingbats" Unicode character block. 2828 * @since 1.9 2829 */ 2830 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2831 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2832 "ORNAMENTAL DINGBATS", 2833 "ORNAMENTALDINGBATS"); 2834 2835 /** 2836 * Constant for the "Geometric Shapes Extended" Unicode character block. 2837 * @since 1.9 2838 */ 2839 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2840 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2841 "GEOMETRIC SHAPES EXTENDED", 2842 "GEOMETRICSHAPESEXTENDED"); 2843 2844 /** 2845 * Constant for the "Supplemental Arrows-C" Unicode character block. 2846 * @since 1.9 2847 */ 2848 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2849 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2850 "SUPPLEMENTAL ARROWS-C", 2851 "SUPPLEMENTALARROWS-C"); 2852 2853 private static final int blockStarts[] = { 2854 0x0000, // 0000..007F; Basic Latin 2855 0x0080, // 0080..00FF; Latin-1 Supplement 2856 0x0100, // 0100..017F; Latin Extended-A 2857 0x0180, // 0180..024F; Latin Extended-B 2858 0x0250, // 0250..02AF; IPA Extensions 2859 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2860 0x0300, // 0300..036F; Combining Diacritical Marks 2861 0x0370, // 0370..03FF; Greek and Coptic 2862 0x0400, // 0400..04FF; Cyrillic 2863 0x0500, // 0500..052F; Cyrillic Supplement 2864 0x0530, // 0530..058F; Armenian 2865 0x0590, // 0590..05FF; Hebrew 2866 0x0600, // 0600..06FF; Arabic 2867 0x0700, // 0700..074F; Syriac 2868 0x0750, // 0750..077F; Arabic Supplement 2869 0x0780, // 0780..07BF; Thaana 2870 0x07C0, // 07C0..07FF; NKo 2871 0x0800, // 0800..083F; Samaritan 2872 0x0840, // 0840..085F; Mandaic 2873 0x0860, // unassigned 2874 0x08A0, // 08A0..08FF; Arabic Extended-A 2875 0x0900, // 0900..097F; Devanagari 2876 0x0980, // 0980..09FF; Bengali 2877 0x0A00, // 0A00..0A7F; Gurmukhi 2878 0x0A80, // 0A80..0AFF; Gujarati 2879 0x0B00, // 0B00..0B7F; Oriya 2880 0x0B80, // 0B80..0BFF; Tamil 2881 0x0C00, // 0C00..0C7F; Telugu 2882 0x0C80, // 0C80..0CFF; Kannada 2883 0x0D00, // 0D00..0D7F; Malayalam 2884 0x0D80, // 0D80..0DFF; Sinhala 2885 0x0E00, // 0E00..0E7F; Thai 2886 0x0E80, // 0E80..0EFF; Lao 2887 0x0F00, // 0F00..0FFF; Tibetan 2888 0x1000, // 1000..109F; Myanmar 2889 0x10A0, // 10A0..10FF; Georgian 2890 0x1100, // 1100..11FF; Hangul Jamo 2891 0x1200, // 1200..137F; Ethiopic 2892 0x1380, // 1380..139F; Ethiopic Supplement 2893 0x13A0, // 13A0..13FF; Cherokee 2894 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2895 0x1680, // 1680..169F; Ogham 2896 0x16A0, // 16A0..16FF; Runic 2897 0x1700, // 1700..171F; Tagalog 2898 0x1720, // 1720..173F; Hanunoo 2899 0x1740, // 1740..175F; Buhid 2900 0x1760, // 1760..177F; Tagbanwa 2901 0x1780, // 1780..17FF; Khmer 2902 0x1800, // 1800..18AF; Mongolian 2903 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2904 0x1900, // 1900..194F; Limbu 2905 0x1950, // 1950..197F; Tai Le 2906 0x1980, // 1980..19DF; New Tai Lue 2907 0x19E0, // 19E0..19FF; Khmer Symbols 2908 0x1A00, // 1A00..1A1F; Buginese 2909 0x1A20, // 1A20..1AAF; Tai Tham 2910 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 2911 0x1B00, // 1B00..1B7F; Balinese 2912 0x1B80, // 1B80..1BBF; Sundanese 2913 0x1BC0, // 1BC0..1BFF; Batak 2914 0x1C00, // 1C00..1C4F; Lepcha 2915 0x1C50, // 1C50..1C7F; Ol Chiki 2916 0x1C80, // unassigned 2917 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 2918 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2919 0x1D00, // 1D00..1D7F; Phonetic Extensions 2920 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2921 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2922 0x1E00, // 1E00..1EFF; Latin Extended Additional 2923 0x1F00, // 1F00..1FFF; Greek Extended 2924 0x2000, // 2000..206F; General Punctuation 2925 0x2070, // 2070..209F; Superscripts and Subscripts 2926 0x20A0, // 20A0..20CF; Currency Symbols 2927 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2928 0x2100, // 2100..214F; Letterlike Symbols 2929 0x2150, // 2150..218F; Number Forms 2930 0x2190, // 2190..21FF; Arrows 2931 0x2200, // 2200..22FF; Mathematical Operators 2932 0x2300, // 2300..23FF; Miscellaneous Technical 2933 0x2400, // 2400..243F; Control Pictures 2934 0x2440, // 2440..245F; Optical Character Recognition 2935 0x2460, // 2460..24FF; Enclosed Alphanumerics 2936 0x2500, // 2500..257F; Box Drawing 2937 0x2580, // 2580..259F; Block Elements 2938 0x25A0, // 25A0..25FF; Geometric Shapes 2939 0x2600, // 2600..26FF; Miscellaneous Symbols 2940 0x2700, // 2700..27BF; Dingbats 2941 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2942 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2943 0x2800, // 2800..28FF; Braille Patterns 2944 0x2900, // 2900..297F; Supplemental Arrows-B 2945 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2946 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2947 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2948 0x2C00, // 2C00..2C5F; Glagolitic 2949 0x2C60, // 2C60..2C7F; Latin Extended-C 2950 0x2C80, // 2C80..2CFF; Coptic 2951 0x2D00, // 2D00..2D2F; Georgian Supplement 2952 0x2D30, // 2D30..2D7F; Tifinagh 2953 0x2D80, // 2D80..2DDF; Ethiopic Extended 2954 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2955 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2956 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2957 0x2F00, // 2F00..2FDF; Kangxi Radicals 2958 0x2FE0, // unassigned 2959 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2960 0x3000, // 3000..303F; CJK Symbols and Punctuation 2961 0x3040, // 3040..309F; Hiragana 2962 0x30A0, // 30A0..30FF; Katakana 2963 0x3100, // 3100..312F; Bopomofo 2964 0x3130, // 3130..318F; Hangul Compatibility Jamo 2965 0x3190, // 3190..319F; Kanbun 2966 0x31A0, // 31A0..31BF; Bopomofo Extended 2967 0x31C0, // 31C0..31EF; CJK Strokes 2968 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2969 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2970 0x3300, // 3300..33FF; CJK Compatibility 2971 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2972 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2973 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2974 0xA000, // A000..A48F; Yi Syllables 2975 0xA490, // A490..A4CF; Yi Radicals 2976 0xA4D0, // A4D0..A4FF; Lisu 2977 0xA500, // A500..A63F; Vai 2978 0xA640, // A640..A69F; Cyrillic Extended-B 2979 0xA6A0, // A6A0..A6FF; Bamum 2980 0xA700, // A700..A71F; Modifier Tone Letters 2981 0xA720, // A720..A7FF; Latin Extended-D 2982 0xA800, // A800..A82F; Syloti Nagri 2983 0xA830, // A830..A83F; Common Indic Number Forms 2984 0xA840, // A840..A87F; Phags-pa 2985 0xA880, // A880..A8DF; Saurashtra 2986 0xA8E0, // A8E0..A8FF; Devanagari Extended 2987 0xA900, // A900..A92F; Kayah Li 2988 0xA930, // A930..A95F; Rejang 2989 0xA960, // A960..A97F; Hangul Jamo Extended-A 2990 0xA980, // A980..A9DF; Javanese 2991 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 2992 0xAA00, // AA00..AA5F; Cham 2993 0xAA60, // AA60..AA7F; Myanmar Extended-A 2994 0xAA80, // AA80..AADF; Tai Viet 2995 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 2996 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2997 0xAB30, // AB30..AB6F; Latin Extended-E 2998 0xAB70, // unassigned 2999 0xABC0, // ABC0..ABFF; Meetei Mayek 3000 0xAC00, // AC00..D7AF; Hangul Syllables 3001 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3002 0xD800, // D800..DB7F; High Surrogates 3003 0xDB80, // DB80..DBFF; High Private Use Surrogates 3004 0xDC00, // DC00..DFFF; Low Surrogates 3005 0xE000, // E000..F8FF; Private Use Area 3006 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3007 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3008 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3009 0xFE00, // FE00..FE0F; Variation Selectors 3010 0xFE10, // FE10..FE1F; Vertical Forms 3011 0xFE20, // FE20..FE2F; Combining Half Marks 3012 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3013 0xFE50, // FE50..FE6F; Small Form Variants 3014 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3015 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3016 0xFFF0, // FFF0..FFFF; Specials 3017 0x10000, // 10000..1007F; Linear B Syllabary 3018 0x10080, // 10080..100FF; Linear B Ideograms 3019 0x10100, // 10100..1013F; Aegean Numbers 3020 0x10140, // 10140..1018F; Ancient Greek Numbers 3021 0x10190, // 10190..101CF; Ancient Symbols 3022 0x101D0, // 101D0..101FF; Phaistos Disc 3023 0x10200, // unassigned 3024 0x10280, // 10280..1029F; Lycian 3025 0x102A0, // 102A0..102DF; Carian 3026 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3027 0x10300, // 10300..1032F; Old Italic 3028 0x10330, // 10330..1034F; Gothic 3029 0x10350, // 10350..1037F; Old Permic 3030 0x10380, // 10380..1039F; Ugaritic 3031 0x103A0, // 103A0..103DF; Old Persian 3032 0x103E0, // unassigned 3033 0x10400, // 10400..1044F; Deseret 3034 0x10450, // 10450..1047F; Shavian 3035 0x10480, // 10480..104AF; Osmanya 3036 0x104B0, // unassigned 3037 0x10500, // 10500..1052F; Elbasan 3038 0x10530, // 10530..1056F; Caucasian Albanian 3039 0x10570, // unassigned 3040 0x10600, // 10600..1077F; Linear A 3041 0x10780, // unassigned 3042 0x10800, // 10800..1083F; Cypriot Syllabary 3043 0x10840, // 10840..1085F; Imperial Aramaic 3044 0x10860, // 10860..1087F; Palmyrene 3045 0x10880, // 10880..108AF; Nabataean 3046 0x108B0, // unassigned 3047 0x10900, // 10900..1091F; Phoenician 3048 0x10920, // 10920..1093F; Lydian 3049 0x10940, // unassigned 3050 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3051 0x109A0, // 109A0..109FF; Meroitic Cursive 3052 0x10A00, // 10A00..10A5F; Kharoshthi 3053 0x10A60, // 10A60..10A7F; Old South Arabian 3054 0x10A80, // 10A80..10A9F; Old North Arabian 3055 0x10AA0, // unassigned 3056 0x10AC0, // 10AC0..10AFF; Manichaean 3057 0x10B00, // 10B00..10B3F; Avestan 3058 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3059 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3060 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3061 0x10BB0, // unassigned 3062 0x10C00, // 10C00..10C4F; Old Turkic 3063 0x10C50, // unassigned 3064 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3065 0x10E80, // unassigned 3066 0x11000, // 11000..1107F; Brahmi 3067 0x11080, // 11080..110CF; Kaithi 3068 0x110D0, // 110D0..110FF; Sora Sompeng 3069 0x11100, // 11100..1114F; Chakma 3070 0x11150, // 11150..1117F; Mahajani 3071 0x11180, // 11180..111DF; Sharada 3072 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3073 0x11200, // 11200..1124F; Khojki 3074 0x11250, // unassigned 3075 0x112B0, // 112B0..112FF; Khudawadi 3076 0x11300, // 11300..1137F; Grantha 3077 0x11380, // unassigned 3078 0x11480, // 11480..114DF; Tirhuta 3079 0x114E0, // unassigned 3080 0x11580, // 11580..115FF; Siddham 3081 0x11600, // 11600..1165F; Modi 3082 0x11660, // unassigned 3083 0x11680, // 11680..116CF; Takri 3084 0x116D0, // unassigned 3085 0x118A0, // 118A0..118FF; Warang Citi 3086 0x11900, // unassigned 3087 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3088 0x11B00, // unassigned 3089 0x12000, // 12000..123FF; Cuneiform 3090 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3091 0x12480, // unassigned 3092 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3093 0x13430, // unassigned 3094 0x16800, // 16800..16A3F; Bamum Supplement 3095 0x16A40, // 16A40..16A6F; Mro 3096 0x16A70, // unassigned 3097 0x16AD0, // 16AD0..16AFF; Bassa Vah 3098 0x16B00, // 16B00..16B8F; Pahawh Hmong 3099 0x16B90, // unassigned 3100 0x16F00, // 16F00..16F9F; Miao 3101 0x16FA0, // unassigned 3102 0x1B000, // 1B000..1B0FF; Kana Supplement 3103 0x1B100, // unassigned 3104 0x1BC00, // 1BC00..1BC9F; Duployan 3105 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3106 0x1BCB0, // unassigned 3107 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3108 0x1D100, // 1D100..1D1FF; Musical Symbols 3109 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3110 0x1D250, // unassigned 3111 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3112 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3113 0x1D380, // unassigned 3114 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3115 0x1D800, // unassigned 3116 0x1E800, // 1E800..1E8DF; Mende Kikakui 3117 0x1E8E0, // unassigned 3118 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3119 0x1EF00, // unassigned 3120 0x1F000, // 1F000..1F02F; Mahjong Tiles 3121 0x1F030, // 1F030..1F09F; Domino Tiles 3122 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3123 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3124 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3125 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 3126 0x1F600, // 1F600..1F64F; Emoticons 3127 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3128 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 3129 0x1F700, // 1F700..1F77F; Alchemical Symbols 3130 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3131 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3132 0x1F900, // unassigned 3133 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3134 0x2A6E0, // unassigned 3135 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3136 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3137 0x2B820, // unassigned 3138 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3139 0x2FA20, // unassigned 3140 0xE0000, // E0000..E007F; Tags 3141 0xE0080, // unassigned 3142 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3143 0xE01F0, // unassigned 3144 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3145 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 3146 }; 3147 3148 private static final UnicodeBlock[] blocks = { 3149 BASIC_LATIN, 3150 LATIN_1_SUPPLEMENT, 3151 LATIN_EXTENDED_A, 3152 LATIN_EXTENDED_B, 3153 IPA_EXTENSIONS, 3154 SPACING_MODIFIER_LETTERS, 3155 COMBINING_DIACRITICAL_MARKS, 3156 GREEK, 3157 CYRILLIC, 3158 CYRILLIC_SUPPLEMENTARY, 3159 ARMENIAN, 3160 HEBREW, 3161 ARABIC, 3162 SYRIAC, 3163 ARABIC_SUPPLEMENT, 3164 THAANA, 3165 NKO, 3166 SAMARITAN, 3167 MANDAIC, 3168 null, 3169 ARABIC_EXTENDED_A, 3170 DEVANAGARI, 3171 BENGALI, 3172 GURMUKHI, 3173 GUJARATI, 3174 ORIYA, 3175 TAMIL, 3176 TELUGU, 3177 KANNADA, 3178 MALAYALAM, 3179 SINHALA, 3180 THAI, 3181 LAO, 3182 TIBETAN, 3183 MYANMAR, 3184 GEORGIAN, 3185 HANGUL_JAMO, 3186 ETHIOPIC, 3187 ETHIOPIC_SUPPLEMENT, 3188 CHEROKEE, 3189 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3190 OGHAM, 3191 RUNIC, 3192 TAGALOG, 3193 HANUNOO, 3194 BUHID, 3195 TAGBANWA, 3196 KHMER, 3197 MONGOLIAN, 3198 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3199 LIMBU, 3200 TAI_LE, 3201 NEW_TAI_LUE, 3202 KHMER_SYMBOLS, 3203 BUGINESE, 3204 TAI_THAM, 3205 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3206 BALINESE, 3207 SUNDANESE, 3208 BATAK, 3209 LEPCHA, 3210 OL_CHIKI, 3211 null, 3212 SUNDANESE_SUPPLEMENT, 3213 VEDIC_EXTENSIONS, 3214 PHONETIC_EXTENSIONS, 3215 PHONETIC_EXTENSIONS_SUPPLEMENT, 3216 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3217 LATIN_EXTENDED_ADDITIONAL, 3218 GREEK_EXTENDED, 3219 GENERAL_PUNCTUATION, 3220 SUPERSCRIPTS_AND_SUBSCRIPTS, 3221 CURRENCY_SYMBOLS, 3222 COMBINING_MARKS_FOR_SYMBOLS, 3223 LETTERLIKE_SYMBOLS, 3224 NUMBER_FORMS, 3225 ARROWS, 3226 MATHEMATICAL_OPERATORS, 3227 MISCELLANEOUS_TECHNICAL, 3228 CONTROL_PICTURES, 3229 OPTICAL_CHARACTER_RECOGNITION, 3230 ENCLOSED_ALPHANUMERICS, 3231 BOX_DRAWING, 3232 BLOCK_ELEMENTS, 3233 GEOMETRIC_SHAPES, 3234 MISCELLANEOUS_SYMBOLS, 3235 DINGBATS, 3236 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3237 SUPPLEMENTAL_ARROWS_A, 3238 BRAILLE_PATTERNS, 3239 SUPPLEMENTAL_ARROWS_B, 3240 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3241 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3242 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3243 GLAGOLITIC, 3244 LATIN_EXTENDED_C, 3245 COPTIC, 3246 GEORGIAN_SUPPLEMENT, 3247 TIFINAGH, 3248 ETHIOPIC_EXTENDED, 3249 CYRILLIC_EXTENDED_A, 3250 SUPPLEMENTAL_PUNCTUATION, 3251 CJK_RADICALS_SUPPLEMENT, 3252 KANGXI_RADICALS, 3253 null, 3254 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3255 CJK_SYMBOLS_AND_PUNCTUATION, 3256 HIRAGANA, 3257 KATAKANA, 3258 BOPOMOFO, 3259 HANGUL_COMPATIBILITY_JAMO, 3260 KANBUN, 3261 BOPOMOFO_EXTENDED, 3262 CJK_STROKES, 3263 KATAKANA_PHONETIC_EXTENSIONS, 3264 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3265 CJK_COMPATIBILITY, 3266 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3267 YIJING_HEXAGRAM_SYMBOLS, 3268 CJK_UNIFIED_IDEOGRAPHS, 3269 YI_SYLLABLES, 3270 YI_RADICALS, 3271 LISU, 3272 VAI, 3273 CYRILLIC_EXTENDED_B, 3274 BAMUM, 3275 MODIFIER_TONE_LETTERS, 3276 LATIN_EXTENDED_D, 3277 SYLOTI_NAGRI, 3278 COMMON_INDIC_NUMBER_FORMS, 3279 PHAGS_PA, 3280 SAURASHTRA, 3281 DEVANAGARI_EXTENDED, 3282 KAYAH_LI, 3283 REJANG, 3284 HANGUL_JAMO_EXTENDED_A, 3285 JAVANESE, 3286 MYANMAR_EXTENDED_B, 3287 CHAM, 3288 MYANMAR_EXTENDED_A, 3289 TAI_VIET, 3290 MEETEI_MAYEK_EXTENSIONS, 3291 ETHIOPIC_EXTENDED_A, 3292 LATIN_EXTENDED_E, 3293 null, 3294 MEETEI_MAYEK, 3295 HANGUL_SYLLABLES, 3296 HANGUL_JAMO_EXTENDED_B, 3297 HIGH_SURROGATES, 3298 HIGH_PRIVATE_USE_SURROGATES, 3299 LOW_SURROGATES, 3300 PRIVATE_USE_AREA, 3301 CJK_COMPATIBILITY_IDEOGRAPHS, 3302 ALPHABETIC_PRESENTATION_FORMS, 3303 ARABIC_PRESENTATION_FORMS_A, 3304 VARIATION_SELECTORS, 3305 VERTICAL_FORMS, 3306 COMBINING_HALF_MARKS, 3307 CJK_COMPATIBILITY_FORMS, 3308 SMALL_FORM_VARIANTS, 3309 ARABIC_PRESENTATION_FORMS_B, 3310 HALFWIDTH_AND_FULLWIDTH_FORMS, 3311 SPECIALS, 3312 LINEAR_B_SYLLABARY, 3313 LINEAR_B_IDEOGRAMS, 3314 AEGEAN_NUMBERS, 3315 ANCIENT_GREEK_NUMBERS, 3316 ANCIENT_SYMBOLS, 3317 PHAISTOS_DISC, 3318 null, 3319 LYCIAN, 3320 CARIAN, 3321 COPTIC_EPACT_NUMBERS, 3322 OLD_ITALIC, 3323 GOTHIC, 3324 OLD_PERMIC, 3325 UGARITIC, 3326 OLD_PERSIAN, 3327 null, 3328 DESERET, 3329 SHAVIAN, 3330 OSMANYA, 3331 null, 3332 ELBASAN, 3333 CAUCASIAN_ALBANIAN, 3334 null, 3335 LINEAR_A, 3336 null, 3337 CYPRIOT_SYLLABARY, 3338 IMPERIAL_ARAMAIC, 3339 PALMYRENE, 3340 NABATAEAN, 3341 null, 3342 PHOENICIAN, 3343 LYDIAN, 3344 null, 3345 MEROITIC_HIEROGLYPHS, 3346 MEROITIC_CURSIVE, 3347 KHAROSHTHI, 3348 OLD_SOUTH_ARABIAN, 3349 OLD_NORTH_ARABIAN, 3350 null, 3351 MANICHAEAN, 3352 AVESTAN, 3353 INSCRIPTIONAL_PARTHIAN, 3354 INSCRIPTIONAL_PAHLAVI, 3355 PSALTER_PAHLAVI, 3356 null, 3357 OLD_TURKIC, 3358 null, 3359 RUMI_NUMERAL_SYMBOLS, 3360 null, 3361 BRAHMI, 3362 KAITHI, 3363 SORA_SOMPENG, 3364 CHAKMA, 3365 MAHAJANI, 3366 SHARADA, 3367 SINHALA_ARCHAIC_NUMBERS, 3368 KHOJKI, 3369 null, 3370 KHUDAWADI, 3371 GRANTHA, 3372 null, 3373 TIRHUTA, 3374 null, 3375 SIDDHAM, 3376 MODI, 3377 null, 3378 TAKRI, 3379 null, 3380 WARANG_CITI, 3381 null, 3382 PAU_CIN_HAU, 3383 null, 3384 CUNEIFORM, 3385 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3386 null, 3387 EGYPTIAN_HIEROGLYPHS, 3388 null, 3389 BAMUM_SUPPLEMENT, 3390 MRO, 3391 null, 3392 BASSA_VAH, 3393 PAHAWH_HMONG, 3394 null, 3395 MIAO, 3396 null, 3397 KANA_SUPPLEMENT, 3398 null, 3399 DUPLOYAN, 3400 SHORTHAND_FORMAT_CONTROLS, 3401 null, 3402 BYZANTINE_MUSICAL_SYMBOLS, 3403 MUSICAL_SYMBOLS, 3404 ANCIENT_GREEK_MUSICAL_NOTATION, 3405 null, 3406 TAI_XUAN_JING_SYMBOLS, 3407 COUNTING_ROD_NUMERALS, 3408 null, 3409 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3410 null, 3411 MENDE_KIKAKUI, 3412 null, 3413 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3414 null, 3415 MAHJONG_TILES, 3416 DOMINO_TILES, 3417 PLAYING_CARDS, 3418 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3419 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3420 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3421 EMOTICONS, 3422 ORNAMENTAL_DINGBATS, 3423 TRANSPORT_AND_MAP_SYMBOLS, 3424 ALCHEMICAL_SYMBOLS, 3425 GEOMETRIC_SHAPES_EXTENDED, 3426 SUPPLEMENTAL_ARROWS_C, 3427 null, 3428 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3429 null, 3430 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3431 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3432 null, 3433 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3434 null, 3435 TAGS, 3436 null, 3437 VARIATION_SELECTORS_SUPPLEMENT, 3438 null, 3439 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3440 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3441 }; 3442 3443 3444 /** 3445 * Returns the object representing the Unicode block containing the 3446 * given character, or {@code null} if the character is not a 3447 * member of a defined block. 3448 * 3449 * <p><b>Note:</b> This method cannot handle 3450 * <a href="Character.html#supplementary"> supplementary 3451 * characters</a>. To support all Unicode characters, including 3452 * supplementary characters, use the {@link #of(int)} method. 3453 * 3454 * @param c The character in question 3455 * @return The {@code UnicodeBlock} instance representing the 3456 * Unicode block of which this character is a member, or 3457 * {@code null} if the character is not a member of any 3458 * Unicode block 3459 */ 3460 public static UnicodeBlock of(char c) { 3461 return of((int)c); 3462 } 3463 3464 /** 3465 * Returns the object representing the Unicode block 3466 * containing the given character (Unicode code point), or 3467 * {@code null} if the character is not a member of a 3468 * defined block. 3469 * 3470 * @param codePoint the character (Unicode code point) in question. 3471 * @return The {@code UnicodeBlock} instance representing the 3472 * Unicode block of which this character is a member, or 3473 * {@code null} if the character is not a member of any 3474 * Unicode block 3475 * @exception IllegalArgumentException if the specified 3476 * {@code codePoint} is an invalid Unicode code point. 3477 * @see Character#isValidCodePoint(int) 3478 * @since 1.5 3479 */ 3480 public static UnicodeBlock of(int codePoint) { 3481 if (!isValidCodePoint(codePoint)) { 3482 throw new IllegalArgumentException(); 3483 } 3484 3485 int top, bottom, current; 3486 bottom = 0; 3487 top = blockStarts.length; 3488 current = top/2; 3489 3490 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3491 while (top - bottom > 1) { 3492 if (codePoint >= blockStarts[current]) { 3493 bottom = current; 3494 } else { 3495 top = current; 3496 } 3497 current = (top + bottom) / 2; 3498 } 3499 return blocks[current]; 3500 } 3501 3502 /** 3503 * Returns the UnicodeBlock with the given name. Block 3504 * names are determined by The Unicode Standard. The file 3505 * {@code Blocks-<version>.txt} defines blocks for a particular 3506 * version of the standard. The {@link Character} class specifies 3507 * the version of the standard that it supports. 3508 * <p> 3509 * This method accepts block names in the following forms: 3510 * <ol> 3511 * <li> Canonical block names as defined by the Unicode Standard. 3512 * For example, the standard defines a "Basic Latin" block. Therefore, this 3513 * method accepts "Basic Latin" as a valid block name. The documentation of 3514 * each UnicodeBlock provides the canonical name. 3515 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3516 * is a valid block name for the "Basic Latin" block. 3517 * <li>The text representation of each constant UnicodeBlock identifier. 3518 * For example, this method will return the {@link #BASIC_LATIN} block if 3519 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3520 * hyphens in the canonical name with underscores. 3521 * </ol> 3522 * Finally, character case is ignored for all of the valid block name forms. 3523 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3524 * The en_US locale's case mapping rules are used to provide case-insensitive 3525 * string comparisons for block name validation. 3526 * <p> 3527 * If the Unicode Standard changes block names, both the previous and 3528 * current names will be accepted. 3529 * 3530 * @param blockName A {@code UnicodeBlock} name. 3531 * @return The {@code UnicodeBlock} instance identified 3532 * by {@code blockName} 3533 * @throws IllegalArgumentException if {@code blockName} is an 3534 * invalid name 3535 * @throws NullPointerException if {@code blockName} is null 3536 * @since 1.5 3537 */ 3538 public static final UnicodeBlock forName(String blockName) { 3539 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3540 if (block == null) { 3541 throw new IllegalArgumentException(); 3542 } 3543 return block; 3544 } 3545 } 3546 3547 3548 /** 3549 * A family of character subsets representing the character scripts 3550 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3551 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3552 * character is assigned to a single Unicode script, either a specific 3553 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3554 * one of the following three special values, 3555 * {@link Character.UnicodeScript#INHERITED Inherited}, 3556 * {@link Character.UnicodeScript#COMMON Common} or 3557 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3558 * 3559 * @since 1.7 3560 */ 3561 public static enum UnicodeScript { 3562 /** 3563 * Unicode script "Common". 3564 */ 3565 COMMON, 3566 3567 /** 3568 * Unicode script "Latin". 3569 */ 3570 LATIN, 3571 3572 /** 3573 * Unicode script "Greek". 3574 */ 3575 GREEK, 3576 3577 /** 3578 * Unicode script "Cyrillic". 3579 */ 3580 CYRILLIC, 3581 3582 /** 3583 * Unicode script "Armenian". 3584 */ 3585 ARMENIAN, 3586 3587 /** 3588 * Unicode script "Hebrew". 3589 */ 3590 HEBREW, 3591 3592 /** 3593 * Unicode script "Arabic". 3594 */ 3595 ARABIC, 3596 3597 /** 3598 * Unicode script "Syriac". 3599 */ 3600 SYRIAC, 3601 3602 /** 3603 * Unicode script "Thaana". 3604 */ 3605 THAANA, 3606 3607 /** 3608 * Unicode script "Devanagari". 3609 */ 3610 DEVANAGARI, 3611 3612 /** 3613 * Unicode script "Bengali". 3614 */ 3615 BENGALI, 3616 3617 /** 3618 * Unicode script "Gurmukhi". 3619 */ 3620 GURMUKHI, 3621 3622 /** 3623 * Unicode script "Gujarati". 3624 */ 3625 GUJARATI, 3626 3627 /** 3628 * Unicode script "Oriya". 3629 */ 3630 ORIYA, 3631 3632 /** 3633 * Unicode script "Tamil". 3634 */ 3635 TAMIL, 3636 3637 /** 3638 * Unicode script "Telugu". 3639 */ 3640 TELUGU, 3641 3642 /** 3643 * Unicode script "Kannada". 3644 */ 3645 KANNADA, 3646 3647 /** 3648 * Unicode script "Malayalam". 3649 */ 3650 MALAYALAM, 3651 3652 /** 3653 * Unicode script "Sinhala". 3654 */ 3655 SINHALA, 3656 3657 /** 3658 * Unicode script "Thai". 3659 */ 3660 THAI, 3661 3662 /** 3663 * Unicode script "Lao". 3664 */ 3665 LAO, 3666 3667 /** 3668 * Unicode script "Tibetan". 3669 */ 3670 TIBETAN, 3671 3672 /** 3673 * Unicode script "Myanmar". 3674 */ 3675 MYANMAR, 3676 3677 /** 3678 * Unicode script "Georgian". 3679 */ 3680 GEORGIAN, 3681 3682 /** 3683 * Unicode script "Hangul". 3684 */ 3685 HANGUL, 3686 3687 /** 3688 * Unicode script "Ethiopic". 3689 */ 3690 ETHIOPIC, 3691 3692 /** 3693 * Unicode script "Cherokee". 3694 */ 3695 CHEROKEE, 3696 3697 /** 3698 * Unicode script "Canadian_Aboriginal". 3699 */ 3700 CANADIAN_ABORIGINAL, 3701 3702 /** 3703 * Unicode script "Ogham". 3704 */ 3705 OGHAM, 3706 3707 /** 3708 * Unicode script "Runic". 3709 */ 3710 RUNIC, 3711 3712 /** 3713 * Unicode script "Khmer". 3714 */ 3715 KHMER, 3716 3717 /** 3718 * Unicode script "Mongolian". 3719 */ 3720 MONGOLIAN, 3721 3722 /** 3723 * Unicode script "Hiragana". 3724 */ 3725 HIRAGANA, 3726 3727 /** 3728 * Unicode script "Katakana". 3729 */ 3730 KATAKANA, 3731 3732 /** 3733 * Unicode script "Bopomofo". 3734 */ 3735 BOPOMOFO, 3736 3737 /** 3738 * Unicode script "Han". 3739 */ 3740 HAN, 3741 3742 /** 3743 * Unicode script "Yi". 3744 */ 3745 YI, 3746 3747 /** 3748 * Unicode script "Old_Italic". 3749 */ 3750 OLD_ITALIC, 3751 3752 /** 3753 * Unicode script "Gothic". 3754 */ 3755 GOTHIC, 3756 3757 /** 3758 * Unicode script "Deseret". 3759 */ 3760 DESERET, 3761 3762 /** 3763 * Unicode script "Inherited". 3764 */ 3765 INHERITED, 3766 3767 /** 3768 * Unicode script "Tagalog". 3769 */ 3770 TAGALOG, 3771 3772 /** 3773 * Unicode script "Hanunoo". 3774 */ 3775 HANUNOO, 3776 3777 /** 3778 * Unicode script "Buhid". 3779 */ 3780 BUHID, 3781 3782 /** 3783 * Unicode script "Tagbanwa". 3784 */ 3785 TAGBANWA, 3786 3787 /** 3788 * Unicode script "Limbu". 3789 */ 3790 LIMBU, 3791 3792 /** 3793 * Unicode script "Tai_Le". 3794 */ 3795 TAI_LE, 3796 3797 /** 3798 * Unicode script "Linear_B". 3799 */ 3800 LINEAR_B, 3801 3802 /** 3803 * Unicode script "Ugaritic". 3804 */ 3805 UGARITIC, 3806 3807 /** 3808 * Unicode script "Shavian". 3809 */ 3810 SHAVIAN, 3811 3812 /** 3813 * Unicode script "Osmanya". 3814 */ 3815 OSMANYA, 3816 3817 /** 3818 * Unicode script "Cypriot". 3819 */ 3820 CYPRIOT, 3821 3822 /** 3823 * Unicode script "Braille". 3824 */ 3825 BRAILLE, 3826 3827 /** 3828 * Unicode script "Buginese". 3829 */ 3830 BUGINESE, 3831 3832 /** 3833 * Unicode script "Coptic". 3834 */ 3835 COPTIC, 3836 3837 /** 3838 * Unicode script "New_Tai_Lue". 3839 */ 3840 NEW_TAI_LUE, 3841 3842 /** 3843 * Unicode script "Glagolitic". 3844 */ 3845 GLAGOLITIC, 3846 3847 /** 3848 * Unicode script "Tifinagh". 3849 */ 3850 TIFINAGH, 3851 3852 /** 3853 * Unicode script "Syloti_Nagri". 3854 */ 3855 SYLOTI_NAGRI, 3856 3857 /** 3858 * Unicode script "Old_Persian". 3859 */ 3860 OLD_PERSIAN, 3861 3862 /** 3863 * Unicode script "Kharoshthi". 3864 */ 3865 KHAROSHTHI, 3866 3867 /** 3868 * Unicode script "Balinese". 3869 */ 3870 BALINESE, 3871 3872 /** 3873 * Unicode script "Cuneiform". 3874 */ 3875 CUNEIFORM, 3876 3877 /** 3878 * Unicode script "Phoenician". 3879 */ 3880 PHOENICIAN, 3881 3882 /** 3883 * Unicode script "Phags_Pa". 3884 */ 3885 PHAGS_PA, 3886 3887 /** 3888 * Unicode script "Nko". 3889 */ 3890 NKO, 3891 3892 /** 3893 * Unicode script "Sundanese". 3894 */ 3895 SUNDANESE, 3896 3897 /** 3898 * Unicode script "Batak". 3899 */ 3900 BATAK, 3901 3902 /** 3903 * Unicode script "Lepcha". 3904 */ 3905 LEPCHA, 3906 3907 /** 3908 * Unicode script "Ol_Chiki". 3909 */ 3910 OL_CHIKI, 3911 3912 /** 3913 * Unicode script "Vai". 3914 */ 3915 VAI, 3916 3917 /** 3918 * Unicode script "Saurashtra". 3919 */ 3920 SAURASHTRA, 3921 3922 /** 3923 * Unicode script "Kayah_Li". 3924 */ 3925 KAYAH_LI, 3926 3927 /** 3928 * Unicode script "Rejang". 3929 */ 3930 REJANG, 3931 3932 /** 3933 * Unicode script "Lycian". 3934 */ 3935 LYCIAN, 3936 3937 /** 3938 * Unicode script "Carian". 3939 */ 3940 CARIAN, 3941 3942 /** 3943 * Unicode script "Lydian". 3944 */ 3945 LYDIAN, 3946 3947 /** 3948 * Unicode script "Cham". 3949 */ 3950 CHAM, 3951 3952 /** 3953 * Unicode script "Tai_Tham". 3954 */ 3955 TAI_THAM, 3956 3957 /** 3958 * Unicode script "Tai_Viet". 3959 */ 3960 TAI_VIET, 3961 3962 /** 3963 * Unicode script "Avestan". 3964 */ 3965 AVESTAN, 3966 3967 /** 3968 * Unicode script "Egyptian_Hieroglyphs". 3969 */ 3970 EGYPTIAN_HIEROGLYPHS, 3971 3972 /** 3973 * Unicode script "Samaritan". 3974 */ 3975 SAMARITAN, 3976 3977 /** 3978 * Unicode script "Mandaic". 3979 */ 3980 MANDAIC, 3981 3982 /** 3983 * Unicode script "Lisu". 3984 */ 3985 LISU, 3986 3987 /** 3988 * Unicode script "Bamum". 3989 */ 3990 BAMUM, 3991 3992 /** 3993 * Unicode script "Javanese". 3994 */ 3995 JAVANESE, 3996 3997 /** 3998 * Unicode script "Meetei_Mayek". 3999 */ 4000 MEETEI_MAYEK, 4001 4002 /** 4003 * Unicode script "Imperial_Aramaic". 4004 */ 4005 IMPERIAL_ARAMAIC, 4006 4007 /** 4008 * Unicode script "Old_South_Arabian". 4009 */ 4010 OLD_SOUTH_ARABIAN, 4011 4012 /** 4013 * Unicode script "Inscriptional_Parthian". 4014 */ 4015 INSCRIPTIONAL_PARTHIAN, 4016 4017 /** 4018 * Unicode script "Inscriptional_Pahlavi". 4019 */ 4020 INSCRIPTIONAL_PAHLAVI, 4021 4022 /** 4023 * Unicode script "Old_Turkic". 4024 */ 4025 OLD_TURKIC, 4026 4027 /** 4028 * Unicode script "Brahmi". 4029 */ 4030 BRAHMI, 4031 4032 /** 4033 * Unicode script "Kaithi". 4034 */ 4035 KAITHI, 4036 4037 /** 4038 * Unicode script "Meroitic Hieroglyphs". 4039 * @since 1.8 4040 */ 4041 MEROITIC_HIEROGLYPHS, 4042 4043 /** 4044 * Unicode script "Meroitic Cursive". 4045 * @since 1.8 4046 */ 4047 MEROITIC_CURSIVE, 4048 4049 /** 4050 * Unicode script "Sora Sompeng". 4051 * @since 1.8 4052 */ 4053 SORA_SOMPENG, 4054 4055 /** 4056 * Unicode script "Chakma". 4057 * @since 1.8 4058 */ 4059 CHAKMA, 4060 4061 /** 4062 * Unicode script "Sharada". 4063 * @since 1.8 4064 */ 4065 SHARADA, 4066 4067 /** 4068 * Unicode script "Takri". 4069 * @since 1.8 4070 */ 4071 TAKRI, 4072 4073 /** 4074 * Unicode script "Miao". 4075 * @since 1.8 4076 */ 4077 MIAO, 4078 4079 /** 4080 * Unicode script "Caucasian Albanian". 4081 * @since 1.9 4082 */ 4083 CAUCASIAN_ALBANIAN, 4084 4085 /** 4086 * Unicode script "Bassa Vah". 4087 * @since 1.9 4088 */ 4089 BASSA_VAH, 4090 4091 /** 4092 * Unicode script "Duployan". 4093 * @since 1.9 4094 */ 4095 DUPLOYAN, 4096 4097 /** 4098 * Unicode script "Elbasan". 4099 * @since 1.9 4100 */ 4101 ELBASAN, 4102 4103 /** 4104 * Unicode script "Grantha". 4105 * @since 1.9 4106 */ 4107 GRANTHA, 4108 4109 /** 4110 * Unicode script "Pahawh Hmong". 4111 * @since 1.9 4112 */ 4113 PAHAWH_HMONG, 4114 4115 /** 4116 * Unicode script "Khojki". 4117 * @since 1.9 4118 */ 4119 KHOJKI, 4120 4121 /** 4122 * Unicode script "Linear A". 4123 * @since 1.9 4124 */ 4125 LINEAR_A, 4126 4127 /** 4128 * Unicode script "Mahajani". 4129 * @since 1.9 4130 */ 4131 MAHAJANI, 4132 4133 /** 4134 * Unicode script "Manichaean". 4135 * @since 1.9 4136 */ 4137 MANICHAEAN, 4138 4139 /** 4140 * Unicode script "Mende Kikakui". 4141 * @since 1.9 4142 */ 4143 MENDE_KIKAKUI, 4144 4145 /** 4146 * Unicode script "Modi". 4147 * @since 1.9 4148 */ 4149 MODI, 4150 4151 /** 4152 * Unicode script "Mro". 4153 * @since 1.9 4154 */ 4155 MRO, 4156 4157 /** 4158 * Unicode script "Old North Arabian". 4159 * @since 1.9 4160 */ 4161 OLD_NORTH_ARABIAN, 4162 4163 /** 4164 * Unicode script "Nabataean". 4165 * @since 1.9 4166 */ 4167 NABATAEAN, 4168 4169 /** 4170 * Unicode script "Palmyrene". 4171 * @since 1.9 4172 */ 4173 PALMYRENE, 4174 4175 /** 4176 * Unicode script "Pau Cin Hau". 4177 * @since 1.9 4178 */ 4179 PAU_CIN_HAU, 4180 4181 /** 4182 * Unicode script "Old Permic". 4183 * @since 1.9 4184 */ 4185 OLD_PERMIC, 4186 4187 /** 4188 * Unicode script "Psalter Pahlavi". 4189 * @since 1.9 4190 */ 4191 PSALTER_PAHLAVI, 4192 4193 /** 4194 * Unicode script "Siddham". 4195 * @since 1.9 4196 */ 4197 SIDDHAM, 4198 4199 /** 4200 * Unicode script "Khudawadi". 4201 * @since 1.9 4202 */ 4203 KHUDAWADI, 4204 4205 /** 4206 * Unicode script "Tirhuta". 4207 * @since 1.9 4208 */ 4209 TIRHUTA, 4210 4211 /** 4212 * Unicode script "Warang Citi". 4213 * @since 1.9 4214 */ 4215 WARANG_CITI, 4216 4217 /** 4218 * Unicode script "Unknown". 4219 */ 4220 UNKNOWN; 4221 4222 private static final int[] scriptStarts = { 4223 0x0000, // 0000..0040; COMMON 4224 0x0041, // 0041..005A; LATIN 4225 0x005B, // 005B..0060; COMMON 4226 0x0061, // 0061..007A; LATIN 4227 0x007B, // 007B..00A9; COMMON 4228 0x00AA, // 00AA ; LATIN 4229 0x00AB, // 00AB..00B9; COMMON 4230 0x00BA, // 00BA ; LATIN 4231 0x00BB, // 00BB..00BF; COMMON 4232 0x00C0, // 00C0..00D6; LATIN 4233 0x00D7, // 00D7 ; COMMON 4234 0x00D8, // 00D8..00F6; LATIN 4235 0x00F7, // 00F7 ; COMMON 4236 0x00F8, // 00F8..02B8; LATIN 4237 0x02B9, // 02B9..02DF; COMMON 4238 0x02E0, // 02E0..02E4; LATIN 4239 0x02E5, // 02E5..02E9; COMMON 4240 0x02EA, // 02EA..02EB; BOPOMOFO 4241 0x02EC, // 02EC..02FF; COMMON 4242 0x0300, // 0300..036F; INHERITED 4243 0x0370, // 0370..0373; GREEK 4244 0x0374, // 0374 ; COMMON 4245 0x0375, // 0375..0377; GREEK 4246 0x0378, // 0378..0379; UNKNOWN 4247 0x037A, // 037A..037D; GREEK 4248 0x037E, // 037E ; COMMON 4249 0x037F, // 037F ; GREEK 4250 0x0380, // 0380..0383; UNKNOWN 4251 0x0384, // 0384 ; GREEK 4252 0x0385, // 0385 ; COMMON 4253 0x0386, // 0386 ; GREEK 4254 0x0387, // 0387 ; COMMON 4255 0x0388, // 0388..038A; GREEK 4256 0x038B, // 038B ; UNKNOWN 4257 0x038C, // 038C ; GREEK 4258 0x038D, // 038D ; UNKNOWN 4259 0x038E, // 038E..03A1; GREEK 4260 0x03A2, // 03A2 ; UNKNOWN 4261 0x03A3, // 03A3..03E1; GREEK 4262 0x03E2, // 03E2..03EF; COPTIC 4263 0x03F0, // 03F0..03FF; GREEK 4264 0x0400, // 0400..0484; CYRILLIC 4265 0x0485, // 0485..0486; INHERITED 4266 0x0487, // 0487..052F; CYRILLIC 4267 0x0530, // 0530 ; UNKNOWN 4268 0x0531, // 0531..0556; ARMENIAN 4269 0x0557, // 0557..0558; UNKNOWN 4270 0x0559, // 0559..055F; ARMENIAN 4271 0x0560, // 0560 ; UNKNOWN 4272 0x0561, // 0561..0587; ARMENIAN 4273 0x0588, // 0588 ; UNKNOWN 4274 0x0589, // 0589 ; COMMON 4275 0x058A, // 058A ; ARMENIAN 4276 0x058B, // 058B..058C; UNKNOWN 4277 0x058D, // 058D..058F; ARMENIAN 4278 0x0590, // 0590 ; UNKNOWN 4279 0x0591, // 0591..05C7; HEBREW 4280 0x05C8, // 05C8..05CF; UNKNOWN 4281 0x05D0, // 05D0..05EA; HEBREW 4282 0x05EB, // 05EB..05EF; UNKNOWN 4283 0x05F0, // 05F0..05F4; HEBREW 4284 0x05F5, // 05F5..05FF; UNKNOWN 4285 0x0600, // 0600..0604; ARABIC 4286 0x0605, // 0605 ; COMMON 4287 0x0606, // 0606..060B; ARABIC 4288 0x060C, // 060C ; COMMON 4289 0x060D, // 060D..061A; ARABIC 4290 0x061B, // 061B..061C; COMMON 4291 0x061D, // 061D ; UNKNOWN 4292 0x061E, // 061E ; ARABIC 4293 0x061F, // 061F ; COMMON 4294 0x0620, // 0620..063F; ARABIC 4295 0x0640, // 0640 ; COMMON 4296 0x0641, // 0641..064A; ARABIC 4297 0x064B, // 064B..0655; INHERITED 4298 0x0656, // 0656..065F; ARABIC 4299 0x0660, // 0660..0669; COMMON 4300 0x066A, // 066A..066F; ARABIC 4301 0x0670, // 0670 ; INHERITED 4302 0x0671, // 0671..06DC; ARABIC 4303 0x06DD, // 06DD ; COMMON 4304 0x06DE, // 06DE..06FF; ARABIC 4305 0x0700, // 0700..070D; SYRIAC 4306 0x070E, // 070E ; UNKNOWN 4307 0x070F, // 070F..074A; SYRIAC 4308 0x074B, // 074B..074C; UNKNOWN 4309 0x074D, // 074D..074F; SYRIAC 4310 0x0750, // 0750..077F; ARABIC 4311 0x0780, // 0780..07B1; THAANA 4312 0x07B2, // 07B2..07BF; UNKNOWN 4313 0x07C0, // 07C0..07FA; NKO 4314 0x07FB, // 07FB..07FF; UNKNOWN 4315 0x0800, // 0800..082D; SAMARITAN 4316 0x082E, // 082E..082F; UNKNOWN 4317 0x0830, // 0830..083E; SAMARITAN 4318 0x083F, // 083F ; UNKNOWN 4319 0x0840, // 0840..085B; MANDAIC 4320 0x085C, // 085C..085D; UNKNOWN 4321 0x085E, // 085E ; MANDAIC 4322 0x085F, // 085F..089F; UNKNOWN 4323 0x08A0, // 08A0..08B2; ARABIC 4324 0x08B3, // 08B3..08E3; UNKNOWN 4325 0x08E4, // 08E4..08FF; ARABIC 4326 0x0900, // 0900..0950; DEVANAGARI 4327 0x0951, // 0951..0952; INHERITED 4328 0x0953, // 0953..0963; DEVANAGARI 4329 0x0964, // 0964..0965; COMMON 4330 0x0966, // 0966..097F; DEVANAGARI 4331 0x0980, // 0980..0983; BENGALI 4332 0x0984, // 0984 ; UNKNOWN 4333 0x0985, // 0985..098C; BENGALI 4334 0x098D, // 098D..098E; UNKNOWN 4335 0x098F, // 098F..0990; BENGALI 4336 0x0991, // 0991..0992; UNKNOWN 4337 0x0993, // 0993..09A8; BENGALI 4338 0x09A9, // 09A9 ; UNKNOWN 4339 0x09AA, // 09AA..09B0; BENGALI 4340 0x09B1, // 09B1 ; UNKNOWN 4341 0x09B2, // 09B2 ; BENGALI 4342 0x09B3, // 09B3..09B5; UNKNOWN 4343 0x09B6, // 09B6..09B9; BENGALI 4344 0x09BA, // 09BA..09BB; UNKNOWN 4345 0x09BC, // 09BC..09C4; BENGALI 4346 0x09C5, // 09C5..09C6; UNKNOWN 4347 0x09C7, // 09C7..09C8; BENGALI 4348 0x09C9, // 09C9..09CA; UNKNOWN 4349 0x09CB, // 09CB..09CE; BENGALI 4350 0x09CF, // 09CF..09D6; UNKNOWN 4351 0x09D7, // 09D7 ; BENGALI 4352 0x09D8, // 09D8..09DB; UNKNOWN 4353 0x09DC, // 09DC..09DD; BENGALI 4354 0x09DE, // 09DE ; UNKNOWN 4355 0x09DF, // 09DF..09E3; BENGALI 4356 0x09E4, // 09E4..09E5; UNKNOWN 4357 0x09E6, // 09E6..09FB; BENGALI 4358 0x09FC, // 09FC..0A00; UNKNOWN 4359 0x0A01, // 0A01..0A03; GURMUKHI 4360 0x0A04, // 0A04 ; UNKNOWN 4361 0x0A05, // 0A05..0A0A; GURMUKHI 4362 0x0A0B, // 0A0B..0A0E; UNKNOWN 4363 0x0A0F, // 0A0F..0A10; GURMUKHI 4364 0x0A11, // 0A11..0A12; UNKNOWN 4365 0x0A13, // 0A13..0A28; GURMUKHI 4366 0x0A29, // 0A29 ; UNKNOWN 4367 0x0A2A, // 0A2A..0A30; GURMUKHI 4368 0x0A31, // 0A31 ; UNKNOWN 4369 0x0A32, // 0A32..0A33; GURMUKHI 4370 0x0A34, // 0A34 ; UNKNOWN 4371 0x0A35, // 0A35..0A36; GURMUKHI 4372 0x0A37, // 0A37 ; UNKNOWN 4373 0x0A38, // 0A38..0A39; GURMUKHI 4374 0x0A3A, // 0A3A..0A3B; UNKNOWN 4375 0x0A3C, // 0A3C ; GURMUKHI 4376 0x0A3D, // 0A3D ; UNKNOWN 4377 0x0A3E, // 0A3E..0A42; GURMUKHI 4378 0x0A43, // 0A43..0A46; UNKNOWN 4379 0x0A47, // 0A47..0A48; GURMUKHI 4380 0x0A49, // 0A49..0A4A; UNKNOWN 4381 0x0A4B, // 0A4B..0A4D; GURMUKHI 4382 0x0A4E, // 0A4E..0A50; UNKNOWN 4383 0x0A51, // 0A51 ; GURMUKHI 4384 0x0A52, // 0A52..0A58; UNKNOWN 4385 0x0A59, // 0A59..0A5C; GURMUKHI 4386 0x0A5D, // 0A5D ; UNKNOWN 4387 0x0A5E, // 0A5E ; GURMUKHI 4388 0x0A5F, // 0A5F..0A65; UNKNOWN 4389 0x0A66, // 0A66..0A75; GURMUKHI 4390 0x0A76, // 0A76..0A80; UNKNOWN 4391 0x0A81, // 0A81..0A83; GUJARATI 4392 0x0A84, // 0A84 ; UNKNOWN 4393 0x0A85, // 0A85..0A8D; GUJARATI 4394 0x0A8E, // 0A8E ; UNKNOWN 4395 0x0A8F, // 0A8F..0A91; GUJARATI 4396 0x0A92, // 0A92 ; UNKNOWN 4397 0x0A93, // 0A93..0AA8; GUJARATI 4398 0x0AA9, // 0AA9 ; UNKNOWN 4399 0x0AAA, // 0AAA..0AB0; GUJARATI 4400 0x0AB1, // 0AB1 ; UNKNOWN 4401 0x0AB2, // 0AB2..0AB3; GUJARATI 4402 0x0AB4, // 0AB4 ; UNKNOWN 4403 0x0AB5, // 0AB5..0AB9; GUJARATI 4404 0x0ABA, // 0ABA..0ABB; UNKNOWN 4405 0x0ABC, // 0ABC..0AC5; GUJARATI 4406 0x0AC6, // 0AC6 ; UNKNOWN 4407 0x0AC7, // 0AC7..0AC9; GUJARATI 4408 0x0ACA, // 0ACA ; UNKNOWN 4409 0x0ACB, // 0ACB..0ACD; GUJARATI 4410 0x0ACE, // 0ACE..0ACF; UNKNOWN 4411 0x0AD0, // 0AD0 ; GUJARATI 4412 0x0AD1, // 0AD1..0ADF; UNKNOWN 4413 0x0AE0, // 0AE0..0AE3; GUJARATI 4414 0x0AE4, // 0AE4..0AE5; UNKNOWN 4415 0x0AE6, // 0AE6..0AF1; GUJARATI 4416 0x0AF2, // 0AF2..0B00; UNKNOWN 4417 0x0B01, // 0B01..0B03; ORIYA 4418 0x0B04, // 0B04 ; UNKNOWN 4419 0x0B05, // 0B05..0B0C; ORIYA 4420 0x0B0D, // 0B0D..0B0E; UNKNOWN 4421 0x0B0F, // 0B0F..0B10; ORIYA 4422 0x0B11, // 0B11..0B12; UNKNOWN 4423 0x0B13, // 0B13..0B28; ORIYA 4424 0x0B29, // 0B29 ; UNKNOWN 4425 0x0B2A, // 0B2A..0B30; ORIYA 4426 0x0B31, // 0B31 ; UNKNOWN 4427 0x0B32, // 0B32..0B33; ORIYA 4428 0x0B34, // 0B34 ; UNKNOWN 4429 0x0B35, // 0B35..0B39; ORIYA 4430 0x0B3A, // 0B3A..0B3B; UNKNOWN 4431 0x0B3C, // 0B3C..0B44; ORIYA 4432 0x0B45, // 0B45..0B46; UNKNOWN 4433 0x0B47, // 0B47..0B48; ORIYA 4434 0x0B49, // 0B49..0B4A; UNKNOWN 4435 0x0B4B, // 0B4B..0B4D; ORIYA 4436 0x0B4E, // 0B4E..0B55; UNKNOWN 4437 0x0B56, // 0B56..0B57; ORIYA 4438 0x0B58, // 0B58..0B5B; UNKNOWN 4439 0x0B5C, // 0B5C..0B5D; ORIYA 4440 0x0B5E, // 0B5E ; UNKNOWN 4441 0x0B5F, // 0B5F..0B63; ORIYA 4442 0x0B64, // 0B64..0B65; UNKNOWN 4443 0x0B66, // 0B66..0B77; ORIYA 4444 0x0B78, // 0B78..0B81; UNKNOWN 4445 0x0B82, // 0B82..0B83; TAMIL 4446 0x0B84, // 0B84 ; UNKNOWN 4447 0x0B85, // 0B85..0B8A; TAMIL 4448 0x0B8B, // 0B8B..0B8D; UNKNOWN 4449 0x0B8E, // 0B8E..0B90; TAMIL 4450 0x0B91, // 0B91 ; UNKNOWN 4451 0x0B92, // 0B92..0B95; TAMIL 4452 0x0B96, // 0B96..0B98; UNKNOWN 4453 0x0B99, // 0B99..0B9A; TAMIL 4454 0x0B9B, // 0B9B ; UNKNOWN 4455 0x0B9C, // 0B9C ; TAMIL 4456 0x0B9D, // 0B9D ; UNKNOWN 4457 0x0B9E, // 0B9E..0B9F; TAMIL 4458 0x0BA0, // 0BA0..0BA2; UNKNOWN 4459 0x0BA3, // 0BA3..0BA4; TAMIL 4460 0x0BA5, // 0BA5..0BA7; UNKNOWN 4461 0x0BA8, // 0BA8..0BAA; TAMIL 4462 0x0BAB, // 0BAB..0BAD; UNKNOWN 4463 0x0BAE, // 0BAE..0BB9; TAMIL 4464 0x0BBA, // 0BBA..0BBD; UNKNOWN 4465 0x0BBE, // 0BBE..0BC2; TAMIL 4466 0x0BC3, // 0BC3..0BC5; UNKNOWN 4467 0x0BC6, // 0BC6..0BC8; TAMIL 4468 0x0BC9, // 0BC9 ; UNKNOWN 4469 0x0BCA, // 0BCA..0BCD; TAMIL 4470 0x0BCE, // 0BCE..0BCF; UNKNOWN 4471 0x0BD0, // 0BD0 ; TAMIL 4472 0x0BD1, // 0BD1..0BD6; UNKNOWN 4473 0x0BD7, // 0BD7 ; TAMIL 4474 0x0BD8, // 0BD8..0BE5; UNKNOWN 4475 0x0BE6, // 0BE6..0BFA; TAMIL 4476 0x0BFB, // 0BFB..0BFF; UNKNOWN 4477 0x0C00, // 0C00..0C03; TELUGU 4478 0x0C04, // 0C04 ; UNKNOWN 4479 0x0C05, // 0C05..0C0C; TELUGU 4480 0x0C0D, // 0C0D ; UNKNOWN 4481 0x0C0E, // 0C0E..0C10; TELUGU 4482 0x0C11, // 0C11 ; UNKNOWN 4483 0x0C12, // 0C12..0C28; TELUGU 4484 0x0C29, // 0C29 ; UNKNOWN 4485 0x0C2A, // 0C2A..0C39; TELUGU 4486 0x0C3A, // 0C3A..0C3C; UNKNOWN 4487 0x0C3D, // 0C3D..0C44; TELUGU 4488 0x0C45, // 0C45 ; UNKNOWN 4489 0x0C46, // 0C46..0C48; TELUGU 4490 0x0C49, // 0C49 ; UNKNOWN 4491 0x0C4A, // 0C4A..0C4D; TELUGU 4492 0x0C4E, // 0C4E..0C54; UNKNOWN 4493 0x0C55, // 0C55..0C56; TELUGU 4494 0x0C57, // 0C57 ; UNKNOWN 4495 0x0C58, // 0C58..0C59; TELUGU 4496 0x0C5A, // 0C5A..0C5F; UNKNOWN 4497 0x0C60, // 0C60..0C63; TELUGU 4498 0x0C64, // 0C64..0C65; UNKNOWN 4499 0x0C66, // 0C66..0C6F; TELUGU 4500 0x0C70, // 0C70..0C77; UNKNOWN 4501 0x0C78, // 0C78..0C7F; TELUGU 4502 0x0C80, // 0C80 ; UNKNOWN 4503 0x0C81, // 0C81..0C83; KANNADA 4504 0x0C84, // 0C84 ; UNKNOWN 4505 0x0C85, // 0C85..0C8C; KANNADA 4506 0x0C8D, // 0C8D ; UNKNOWN 4507 0x0C8E, // 0C8E..0C90; KANNADA 4508 0x0C91, // 0C91 ; UNKNOWN 4509 0x0C92, // 0C92..0CA8; KANNADA 4510 0x0CA9, // 0CA9 ; UNKNOWN 4511 0x0CAA, // 0CAA..0CB3; KANNADA 4512 0x0CB4, // 0CB4 ; UNKNOWN 4513 0x0CB5, // 0CB5..0CB9; KANNADA 4514 0x0CBA, // 0CBA..0CBB; UNKNOWN 4515 0x0CBC, // 0CBC..0CC4; KANNADA 4516 0x0CC5, // 0CC5 ; UNKNOWN 4517 0x0CC6, // 0CC6..0CC8; KANNADA 4518 0x0CC9, // 0CC9 ; UNKNOWN 4519 0x0CCA, // 0CCA..0CCD; KANNADA 4520 0x0CCE, // 0CCE..0CD4; UNKNOWN 4521 0x0CD5, // 0CD5..0CD6; KANNADA 4522 0x0CD7, // 0CD7..0CDD; UNKNOWN 4523 0x0CDE, // 0CDE ; KANNADA 4524 0x0CDF, // 0CDF ; UNKNOWN 4525 0x0CE0, // 0CE0..0CE3; KANNADA 4526 0x0CE4, // 0CE4..0CE5; UNKNOWN 4527 0x0CE6, // 0CE6..0CEF; KANNADA 4528 0x0CF0, // 0CF0 ; UNKNOWN 4529 0x0CF1, // 0CF1..0CF2; KANNADA 4530 0x0CF3, // 0CF3..0D00; UNKNOWN 4531 0x0D01, // 0D01..0D03; MALAYALAM 4532 0x0D04, // 0D04 ; UNKNOWN 4533 0x0D05, // 0D05..0D0C; MALAYALAM 4534 0x0D0D, // 0D0D ; UNKNOWN 4535 0x0D0E, // 0D0E..0D10; MALAYALAM 4536 0x0D11, // 0D11 ; UNKNOWN 4537 0x0D12, // 0D12..0D3A; MALAYALAM 4538 0x0D3B, // 0D3B..0D3C; UNKNOWN 4539 0x0D3D, // 0D3D..0D44; MALAYALAM 4540 0x0D45, // 0D45 ; UNKNOWN 4541 0x0D46, // 0D46..0D48; MALAYALAM 4542 0x0D49, // 0D49 ; UNKNOWN 4543 0x0D4A, // 0D4A..0D4E; MALAYALAM 4544 0x0D4F, // 0D4F..0D56; UNKNOWN 4545 0x0D57, // 0D57 ; MALAYALAM 4546 0x0D58, // 0D58..0D5F; UNKNOWN 4547 0x0D60, // 0D60..0D63; MALAYALAM 4548 0x0D64, // 0D64..0D65; UNKNOWN 4549 0x0D66, // 0D66..0D75; MALAYALAM 4550 0x0D76, // 0D76..0D78; UNKNOWN 4551 0x0D79, // 0D79..0D7F; MALAYALAM 4552 0x0D80, // 0D80..0D81; UNKNOWN 4553 0x0D82, // 0D82..0D83; SINHALA 4554 0x0D84, // 0D84 ; UNKNOWN 4555 0x0D85, // 0D85..0D96; SINHALA 4556 0x0D97, // 0D97..0D99; UNKNOWN 4557 0x0D9A, // 0D9A..0DB1; SINHALA 4558 0x0DB2, // 0DB2 ; UNKNOWN 4559 0x0DB3, // 0DB3..0DBB; SINHALA 4560 0x0DBC, // 0DBC ; UNKNOWN 4561 0x0DBD, // 0DBD ; SINHALA 4562 0x0DBE, // 0DBE..0DBF; UNKNOWN 4563 0x0DC0, // 0DC0..0DC6; SINHALA 4564 0x0DC7, // 0DC7..0DC9; UNKNOWN 4565 0x0DCA, // 0DCA ; SINHALA 4566 0x0DCB, // 0DCB..0DCE; UNKNOWN 4567 0x0DCF, // 0DCF..0DD4; SINHALA 4568 0x0DD5, // 0DD5 ; UNKNOWN 4569 0x0DD6, // 0DD6 ; SINHALA 4570 0x0DD7, // 0DD7 ; UNKNOWN 4571 0x0DD8, // 0DD8..0DDF; SINHALA 4572 0x0DE0, // 0DE0..0DE5; UNKNOWN 4573 0x0DE6, // 0DE6..0DEF; SINHALA 4574 0x0DF0, // 0DF0..0DF1; UNKNOWN 4575 0x0DF2, // 0DF2..0DF4; SINHALA 4576 0x0DF5, // 0DF5..0E00; UNKNOWN 4577 0x0E01, // 0E01..0E3A; THAI 4578 0x0E3B, // 0E3B..0E3E; UNKNOWN 4579 0x0E3F, // 0E3F ; COMMON 4580 0x0E40, // 0E40..0E5B; THAI 4581 0x0E5C, // 0E5C..0E80; UNKNOWN 4582 0x0E81, // 0E81..0E82; LAO 4583 0x0E83, // 0E83 ; UNKNOWN 4584 0x0E84, // 0E84 ; LAO 4585 0x0E85, // 0E85..0E86; UNKNOWN 4586 0x0E87, // 0E87..0E88; LAO 4587 0x0E89, // 0E89 ; UNKNOWN 4588 0x0E8A, // 0E8A ; LAO 4589 0x0E8B, // 0E8B..0E8C; UNKNOWN 4590 0x0E8D, // 0E8D ; LAO 4591 0x0E8E, // 0E8E..0E93; UNKNOWN 4592 0x0E94, // 0E94..0E97; LAO 4593 0x0E98, // 0E98 ; UNKNOWN 4594 0x0E99, // 0E99..0E9F; LAO 4595 0x0EA0, // 0EA0 ; UNKNOWN 4596 0x0EA1, // 0EA1..0EA3; LAO 4597 0x0EA4, // 0EA4 ; UNKNOWN 4598 0x0EA5, // 0EA5 ; LAO 4599 0x0EA6, // 0EA6 ; UNKNOWN 4600 0x0EA7, // 0EA7 ; LAO 4601 0x0EA8, // 0EA8..0EA9; UNKNOWN 4602 0x0EAA, // 0EAA..0EAB; LAO 4603 0x0EAC, // 0EAC ; UNKNOWN 4604 0x0EAD, // 0EAD..0EB9; LAO 4605 0x0EBA, // 0EBA ; UNKNOWN 4606 0x0EBB, // 0EBB..0EBD; LAO 4607 0x0EBE, // 0EBE..0EBF; UNKNOWN 4608 0x0EC0, // 0EC0..0EC4; LAO 4609 0x0EC5, // 0EC5 ; UNKNOWN 4610 0x0EC6, // 0EC6 ; LAO 4611 0x0EC7, // 0EC7 ; UNKNOWN 4612 0x0EC8, // 0EC8..0ECD; LAO 4613 0x0ECE, // 0ECE..0ECF; UNKNOWN 4614 0x0ED0, // 0ED0..0ED9; LAO 4615 0x0EDA, // 0EDA..0EDB; UNKNOWN 4616 0x0EDC, // 0EDC..0EDF; LAO 4617 0x0EE0, // 0EE0..0EFF; UNKNOWN 4618 0x0F00, // 0F00..0F47; TIBETAN 4619 0x0F48, // 0F48 ; UNKNOWN 4620 0x0F49, // 0F49..0F6C; TIBETAN 4621 0x0F6D, // 0F6D..0F70; UNKNOWN 4622 0x0F71, // 0F71..0F97; TIBETAN 4623 0x0F98, // 0F98 ; UNKNOWN 4624 0x0F99, // 0F99..0FBC; TIBETAN 4625 0x0FBD, // 0FBD ; UNKNOWN 4626 0x0FBE, // 0FBE..0FCC; TIBETAN 4627 0x0FCD, // 0FCD ; UNKNOWN 4628 0x0FCE, // 0FCE..0FD4; TIBETAN 4629 0x0FD5, // 0FD5..0FD8; COMMON 4630 0x0FD9, // 0FD9..0FDA; TIBETAN 4631 0x0FDB, // 0FDB..FFF; UNKNOWN 4632 0x1000, // 1000..109F; MYANMAR 4633 0x10A0, // 10A0..10C5; GEORGIAN 4634 0x10C6, // 10C6 ; UNKNOWN 4635 0x10C7, // 10C7 ; GEORGIAN 4636 0x10C8, // 10C8..10CC; UNKNOWN 4637 0x10CD, // 10CD ; GEORGIAN 4638 0x10CE, // 10CE..10CF; UNKNOWN 4639 0x10D0, // 10D0..10FA; GEORGIAN 4640 0x10FB, // 10FB ; COMMON 4641 0x10FC, // 10FC..10FF; GEORGIAN 4642 0x1100, // 1100..11FF; HANGUL 4643 0x1200, // 1200..1248; ETHIOPIC 4644 0x1249, // 1249 ; UNKNOWN 4645 0x124A, // 124A..124D; ETHIOPIC 4646 0x124E, // 124E..124F; UNKNOWN 4647 0x1250, // 1250..1256; ETHIOPIC 4648 0x1257, // 1257 ; UNKNOWN 4649 0x1258, // 1258 ; ETHIOPIC 4650 0x1259, // 1259 ; UNKNOWN 4651 0x125A, // 125A..125D; ETHIOPIC 4652 0x125E, // 125E..125F; UNKNOWN 4653 0x1260, // 1260..1288; ETHIOPIC 4654 0x1289, // 1289 ; UNKNOWN 4655 0x128A, // 128A..128D; ETHIOPIC 4656 0x128E, // 128E..128F; UNKNOWN 4657 0x1290, // 1290..12B0; ETHIOPIC 4658 0x12B1, // 12B1 ; UNKNOWN 4659 0x12B2, // 12B2..12B5; ETHIOPIC 4660 0x12B6, // 12B6..12B7; UNKNOWN 4661 0x12B8, // 12B8..12BE; ETHIOPIC 4662 0x12BF, // 12BF ; UNKNOWN 4663 0x12C0, // 12C0 ; ETHIOPIC 4664 0x12C1, // 12C1 ; UNKNOWN 4665 0x12C2, // 12C2..12C5; ETHIOPIC 4666 0x12C6, // 12C6..12C7; UNKNOWN 4667 0x12C8, // 12C8..12D6; ETHIOPIC 4668 0x12D7, // 12D7 ; UNKNOWN 4669 0x12D8, // 12D8..1310; ETHIOPIC 4670 0x1311, // 1311 ; UNKNOWN 4671 0x1312, // 1312..1315; ETHIOPIC 4672 0x1316, // 1316..1317; UNKNOWN 4673 0x1318, // 1318..135A; ETHIOPIC 4674 0x135B, // 135B..135C; UNKNOWN 4675 0x135D, // 135D..137C; ETHIOPIC 4676 0x137D, // 137D..137F; UNKNOWN 4677 0x1380, // 1380..1399; ETHIOPIC 4678 0x139A, // 139A..139F; UNKNOWN 4679 0x13A0, // 13A0..13F4; CHEROKEE 4680 0x13F5, // 13F5..13FF; UNKNOWN 4681 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 4682 0x1680, // 1680..169C; OGHAM 4683 0x169D, // 169D..169F; UNKNOWN 4684 0x16A0, // 16A0..16EA; RUNIC 4685 0x16EB, // 16EB..16ED; COMMON 4686 0x16EE, // 16EE..16F8; RUNIC 4687 0x16F9, // 16F9..16FF; UNKNOWN 4688 0x1700, // 1700..170C; TAGALOG 4689 0x170D, // 170D ; UNKNOWN 4690 0x170E, // 170E..1714; TAGALOG 4691 0x1715, // 1715..171F; UNKNOWN 4692 0x1720, // 1720..1734; HANUNOO 4693 0x1735, // 1735..1736; COMMON 4694 0x1737, // 1737..173F; UNKNOWN 4695 0x1740, // 1740..1753; BUHID 4696 0x1754, // 1754..175F; UNKNOWN 4697 0x1760, // 1760..176C; TAGBANWA 4698 0x176D, // 176D ; UNKNOWN 4699 0x176E, // 176E..1770; TAGBANWA 4700 0x1771, // 1771 ; UNKNOWN 4701 0x1772, // 1772..1773; TAGBANWA 4702 0x1774, // 1774..177F; UNKNOWN 4703 0x1780, // 1780..17DD; KHMER 4704 0x17DE, // 17DE..17DF; UNKNOWN 4705 0x17E0, // 17E0..17E9; KHMER 4706 0x17EA, // 17EA..17EF; UNKNOWN 4707 0x17F0, // 17F0..17F9; KHMER 4708 0x17FA, // 17FA..17FF; UNKNOWN 4709 0x1800, // 1800..1801; MONGOLIAN 4710 0x1802, // 1802..1803; COMMON 4711 0x1804, // 1804 ; MONGOLIAN 4712 0x1805, // 1805 ; COMMON 4713 0x1806, // 1806..180E; MONGOLIAN 4714 0x180F, // 180F ; UNKNOWN 4715 0x1810, // 1810..1819; MONGOLIAN 4716 0x181A, // 181A..181F; UNKNOWN 4717 0x1820, // 1820..1877; MONGOLIAN 4718 0x1878, // 1878..187F; UNKNOWN 4719 0x1880, // 1880..18AA; MONGOLIAN 4720 0x18AB, // 18AB..18AF; UNKNOWN 4721 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 4722 0x18F6, // 18F6..18FF; UNKNOWN 4723 0x1900, // 1900..191E; LIMBU 4724 0x191F, // 191F ; UNKNOWN 4725 0x1920, // 1920..192B; LIMBU 4726 0x192C, // 192C..192F; UNKNOWN 4727 0x1930, // 1930..193B; LIMBU 4728 0x193C, // 193C..193F; UNKNOWN 4729 0x1940, // 1940 ; LIMBU 4730 0x1941, // 1941..1943; UNKNOWN 4731 0x1944, // 1944..194F; LIMBU 4732 0x1950, // 1950..196D; TAI_LE 4733 0x196E, // 196E..196F; UNKNOWN 4734 0x1970, // 1970..1974; TAI_LE 4735 0x1975, // 1975..197F; UNKNOWN 4736 0x1980, // 1980..19AB; NEW_TAI_LUE 4737 0x19AC, // 19AC..19AF; UNKNOWN 4738 0x19B0, // 19B0..19C9; NEW_TAI_LUE 4739 0x19CA, // 19CA..19CF; UNKNOWN 4740 0x19D0, // 19D0..19DA; NEW_TAI_LUE 4741 0x19DB, // 19DB..19DD; UNKNOWN 4742 0x19DE, // 19DE..19DF; NEW_TAI_LUE 4743 0x19E0, // 19E0..19FF; KHMER 4744 0x1A00, // 1A00..1A1B; BUGINESE 4745 0x1A1C, // 1A1C..1A1D; UNKNOWN 4746 0x1A1E, // 1A1E..1A1F; BUGINESE 4747 0x1A20, // 1A20..1A5E; TAI_THAM 4748 0x1A5F, // 1A5F ; UNKNOWN 4749 0x1A60, // 1A60..1A7C; TAI_THAM 4750 0x1A7D, // 1A7D..1A7E; UNKNOWN 4751 0x1A7F, // 1A7F..1A89; TAI_THAM 4752 0x1A8A, // 1A8A..1A8F; UNKNOWN 4753 0x1A90, // 1A90..1A99; TAI_THAM 4754 0x1A9A, // 1A9A..1A9F; UNKNOWN 4755 0x1AA0, // 1AA0..1AAD; TAI_THAM 4756 0x1AAE, // 1AAE..1AAF; UNKNOWN 4757 0x1AB0, // 1AB0..1ABE; INHERITED 4758 0x1ABF, // 1ABF..1AFF; UNKNOWN 4759 0x1B00, // 1B00..1B4B; BALINESE 4760 0x1B4C, // 1B4C..1B4F; UNKNOWN 4761 0x1B50, // 1B50..1B7C; BALINESE 4762 0x1B7D, // 1B7D..1B7F; UNKNOWN 4763 0x1B80, // 1B80..1BBF; SUNDANESE 4764 0x1BC0, // 1BC0..1BF3; BATAK 4765 0x1BF4, // 1BF4..1BFB; UNKNOWN 4766 0x1BFC, // 1BFC..1BFF; BATAK 4767 0x1C00, // 1C00..1C37; LEPCHA 4768 0x1C38, // 1C38..1C3A; UNKNOWN 4769 0x1C3B, // 1C3B..1C49; LEPCHA 4770 0x1C4A, // 1C4A..1C4C; UNKNOWN 4771 0x1C4D, // 1C4D..1C4F; LEPCHA 4772 0x1C50, // 1C50..1C7F; OL_CHIKI 4773 0x1C80, // 1C80..1CBF; UNKNOWN 4774 0x1CC0, // 1CC0..1CC7; SUNDANESE 4775 0x1CC8, // 1CC8..1CCF; UNKNOWN 4776 0x1CD0, // 1CD0..1CD2; INHERITED 4777 0x1CD3, // 1CD3 ; COMMON 4778 0x1CD4, // 1CD4..1CE0; INHERITED 4779 0x1CE1, // 1CE1 ; COMMON 4780 0x1CE2, // 1CE2..1CE8; INHERITED 4781 0x1CE9, // 1CE9..1CEC; COMMON 4782 0x1CED, // 1CED ; INHERITED 4783 0x1CEE, // 1CEE..1CF3; COMMON 4784 0x1CF4, // 1CF4 ; INHERITED 4785 0x1CF5, // 1CF5..1CF6; COMMON 4786 0x1CF7, // 1CF7 ; UNKNOWN 4787 0x1CF8, // 1CF8..1CF9; INHERITED 4788 0x1CFA, // 1CFA..1CFF; UNKNOWN 4789 0x1D00, // 1D00..1D25; LATIN 4790 0x1D26, // 1D26..1D2A; GREEK 4791 0x1D2B, // 1D2B ; CYRILLIC 4792 0x1D2C, // 1D2C..1D5C; LATIN 4793 0x1D5D, // 1D5D..1D61; GREEK 4794 0x1D62, // 1D62..1D65; LATIN 4795 0x1D66, // 1D66..1D6A; GREEK 4796 0x1D6B, // 1D6B..1D77; LATIN 4797 0x1D78, // 1D78 ; CYRILLIC 4798 0x1D79, // 1D79..1DBE; LATIN 4799 0x1DBF, // 1DBF ; GREEK 4800 0x1DC0, // 1DC0..1DF5; INHERITED 4801 0x1DF6, // 1DF6..1DFB; UNKNOWN 4802 0x1DFC, // 1DFC..1DFF; INHERITED 4803 0x1E00, // 1E00..1EFF; LATIN 4804 0x1F00, // 1F00..1F15; GREEK 4805 0x1F16, // 1F16..1F17; UNKNOWN 4806 0x1F18, // 1F18..1F1D; GREEK 4807 0x1F1E, // 1F1E..1F1F; UNKNOWN 4808 0x1F20, // 1F20..1F45; GREEK 4809 0x1F46, // 1F46..1F47; UNKNOWN 4810 0x1F48, // 1F48..1F4D; GREEK 4811 0x1F4E, // 1F4E..1F4F; UNKNOWN 4812 0x1F50, // 1F50..1F57; GREEK 4813 0x1F58, // 1F58 ; UNKNOWN 4814 0x1F59, // 1F59 ; GREEK 4815 0x1F5A, // 1F5A ; UNKNOWN 4816 0x1F5B, // 1F5B ; GREEK 4817 0x1F5C, // 1F5C ; UNKNOWN 4818 0x1F5D, // 1F5D ; GREEK 4819 0x1F5E, // 1F5E ; UNKNOWN 4820 0x1F5F, // 1F5F..1F7D; GREEK 4821 0x1F7E, // 1F7E..1F7F; UNKNOWN 4822 0x1F80, // 1F80..1FB4; GREEK 4823 0x1FB5, // 1FB5 ; UNKNOWN 4824 0x1FB6, // 1FB6..1FC4; GREEK 4825 0x1FC5, // 1FC5 ; UNKNOWN 4826 0x1FC6, // 1FC6..1FD3; GREEK 4827 0x1FD4, // 1FD4..1FD5; UNKNOWN 4828 0x1FD6, // 1FD6..1FDB; GREEK 4829 0x1FDC, // 1FDC ; UNKNOWN 4830 0x1FDD, // 1FDD..1FEF; GREEK 4831 0x1FF0, // 1FF0..1FF1; UNKNOWN 4832 0x1FF2, // 1FF2..1FF4; GREEK 4833 0x1FF5, // 1FF5 ; UNKNOWN 4834 0x1FF6, // 1FF6..1FFE; GREEK 4835 0x1FFF, // 1FFF ; UNKNOWN 4836 0x2000, // 2000..200B; COMMON 4837 0x200C, // 200C..200D; INHERITED 4838 0x200E, // 200E..2064; COMMON 4839 0x2065, // 2065 ; UNKNOWN 4840 0x2066, // 2066..2070; COMMON 4841 0x2071, // 2071 ; LATIN 4842 0x2072, // 2072..2073; UNKNOWN 4843 0x2074, // 2074..207E; COMMON 4844 0x207F, // 207F ; LATIN 4845 0x2080, // 2080..208E; COMMON 4846 0x208F, // 208F ; UNKNOWN 4847 0x2090, // 2090..209C; LATIN 4848 0x209D, // 209D..209F; UNKNOWN 4849 0x20A0, // 20A0..20BD; COMMON 4850 0x20BE, // 20BE..20CF; UNKNOWN 4851 0x20D0, // 20D0..20F0; INHERITED 4852 0x20F1, // 20F1..20FF; UNKNOWN 4853 0x2100, // 2100..2125; COMMON 4854 0x2126, // 2126 ; GREEK 4855 0x2127, // 2127..2129; COMMON 4856 0x212A, // 212A..212B; LATIN 4857 0x212C, // 212C..2131; COMMON 4858 0x2132, // 2132 ; LATIN 4859 0x2133, // 2133..214D; COMMON 4860 0x214E, // 214E ; LATIN 4861 0x214F, // 214F..215F; COMMON 4862 0x2160, // 2160..2188; LATIN 4863 0x2189, // 2189 ; COMMON 4864 0x218A, // 218A..218F; UNKNOWN 4865 0x2190, // 2190..23FA; COMMON 4866 0x23FB, // 23FB..23FF; UNKNOWN 4867 0x2400, // 2400..2426; COMMON 4868 0x2427, // 2427..243F; UNKNOWN 4869 0x2440, // 2440..244A; COMMON 4870 0x244B, // 244B..245F; UNKNOWN 4871 0x2460, // 2460..27FF; COMMON 4872 0x2800, // 2800..28FF; BRAILLE 4873 0x2900, // 2900..2B73; COMMON 4874 0x2B74, // 2B74..2B75; UNKNOWN 4875 0x2B76, // 2B76..2B95; COMMON 4876 0x2B96, // 2B96..2B97; UNKNOWN 4877 0x2B98, // 2B98..2BB9; COMMON 4878 0x2BBA, // 2BBA..2BBC; UNKNOWN 4879 0x2BBD, // 2BBD..2BC8; COMMON 4880 0x2BC9, // 2BC9 ; UNKNOWN 4881 0x2BCA, // 2BCA..2BD1; COMMON 4882 0x2BD2, // 2BD2..2BFF; UNKNOWN 4883 0x2C00, // 2C00..2C2E; GLAGOLITIC 4884 0x2C2F, // 2C2F ; UNKNOWN 4885 0x2C30, // 2C30..2C5E; GLAGOLITIC 4886 0x2C5F, // 2C5F ; UNKNOWN 4887 0x2C60, // 2C60..2C7F; LATIN 4888 0x2C80, // 2C80..2CF3; COPTIC 4889 0x2CF4, // 2CF4..2CF8; UNKNOWN 4890 0x2CF9, // 2CF9..2CFF; COPTIC 4891 0x2D00, // 2D00..2D25; GEORGIAN 4892 0x2D26, // 2D26 ; UNKNOWN 4893 0x2D27, // 2D27 ; GEORGIAN 4894 0x2D28, // 2D28..2D2C; UNKNOWN 4895 0x2D2D, // 2D2D ; GEORGIAN 4896 0x2D2E, // 2D2E..2D2F; UNKNOWN 4897 0x2D30, // 2D30..2D67; TIFINAGH 4898 0x2D68, // 2D68..2D6E; UNKNOWN 4899 0x2D6F, // 2D6F..2D70; TIFINAGH 4900 0x2D71, // 2D71..2D7E; UNKNOWN 4901 0x2D7F, // 2D7F ; TIFINAGH 4902 0x2D80, // 2D80..2D96; ETHIOPIC 4903 0x2D97, // 2D97..2D9F; UNKNOWN 4904 0x2DA0, // 2DA0..2DA6; ETHIOPIC 4905 0x2DA7, // 2DA7 ; UNKNOWN 4906 0x2DA8, // 2DA8..2DAE; ETHIOPIC 4907 0x2DAF, // 2DAF ; UNKNOWN 4908 0x2DB0, // 2DB0..2DB6; ETHIOPIC 4909 0x2DB7, // 2DB7 ; UNKNOWN 4910 0x2DB8, // 2DB8..2DBE; ETHIOPIC 4911 0x2DBF, // 2DBF ; UNKNOWN 4912 0x2DC0, // 2DC0..2DC6; ETHIOPIC 4913 0x2DC7, // 2DC7 ; UNKNOWN 4914 0x2DC8, // 2DC8..2DCE; ETHIOPIC 4915 0x2DCF, // 2DCF ; UNKNOWN 4916 0x2DD0, // 2DD0..2DD6; ETHIOPIC 4917 0x2DD7, // 2DD7 ; UNKNOWN 4918 0x2DD8, // 2DD8..2DDE; ETHIOPIC 4919 0x2DDF, // 2DDF ; UNKNOWN 4920 0x2DE0, // 2DE0..2DFF; CYRILLIC 4921 0x2E00, // 2E00..2E42; COMMON 4922 0x2E43, // 2E43..2E7F; UNKNOWN 4923 0x2E80, // 2E80..2E99; HAN 4924 0x2E9A, // 2E9A ; UNKNOWN 4925 0x2E9B, // 2E9B..2EF3; HAN 4926 0x2EF4, // 2EF4..2EFF; UNKNOWN 4927 0x2F00, // 2F00..2FD5; HAN 4928 0x2FD6, // 2FD6..2FEF; UNKNOWN 4929 0x2FF0, // 2FF0..2FFB; COMMON 4930 0x2FFC, // 2FFC..2FFF; UNKNOWN 4931 0x3000, // 3000..3004; COMMON 4932 0x3005, // 3005 ; HAN 4933 0x3006, // 3006 ; COMMON 4934 0x3007, // 3007 ; HAN 4935 0x3008, // 3008..3020; COMMON 4936 0x3021, // 3021..3029; HAN 4937 0x302A, // 302A..302D; INHERITED 4938 0x302E, // 302E..302F; HANGUL 4939 0x3030, // 3030..3037; COMMON 4940 0x3038, // 3038..303B; HAN 4941 0x303C, // 303C..303F; COMMON 4942 0x3040, // 3040 ; UNKNOWN 4943 0x3041, // 3041..3096; HIRAGANA 4944 0x3097, // 3097..3098; UNKNOWN 4945 0x3099, // 3099..309A; INHERITED 4946 0x309B, // 309B..309C; COMMON 4947 0x309D, // 309D..309F; HIRAGANA 4948 0x30A0, // 30A0 ; COMMON 4949 0x30A1, // 30A1..30FA; KATAKANA 4950 0x30FB, // 30FB..30FC; COMMON 4951 0x30FD, // 30FD..30FF; KATAKANA 4952 0x3100, // 3100..3104; UNKNOWN 4953 0x3105, // 3105..312D; BOPOMOFO 4954 0x312E, // 312E..3130; UNKNOWN 4955 0x3131, // 3131..318E; HANGUL 4956 0x318F, // 318F ; UNKNOWN 4957 0x3190, // 3190..319F; COMMON 4958 0x31A0, // 31A0..31BA; BOPOMOFO 4959 0x31BB, // 31BB..31BF; UNKNOWN 4960 0x31C0, // 31C0..31E3; COMMON 4961 0x31E4, // 31E4..31EF; UNKNOWN 4962 0x31F0, // 31F0..31FF; KATAKANA 4963 0x3200, // 3200..321E; HANGUL 4964 0x321F, // 321F ; UNKNOWN 4965 0x3220, // 3220..325F; COMMON 4966 0x3260, // 3260..327E; HANGUL 4967 0x327F, // 327F..32CF; COMMON 4968 0x32D0, // 32D0..32FE; KATAKANA 4969 0x32FF, // 32FF ; UNKNOWN 4970 0x3300, // 3300..3357; KATAKANA 4971 0x3358, // 3358..33FF; COMMON 4972 0x3400, // 3400..4DB5; HAN 4973 0x4DB6, // 4DB6..4DBF; UNKNOWN 4974 0x4DC0, // 4DC0..4DFF; COMMON 4975 0x4E00, // 4E00..9FCC; HAN 4976 0x9FCD, // 9FCD..9FFF; UNKNOWN 4977 0xA000, // A000..A48C; YI 4978 0xA48D, // A48D..A48F; UNKNOWN 4979 0xA490, // A490..A4C6; YI 4980 0xA4C7, // A4C7..A4CF; UNKNOWN 4981 0xA4D0, // A4D0..A4FF; LISU 4982 0xA500, // A500..A62B; VAI 4983 0xA62C, // A62C..A63F; UNKNOWN 4984 0xA640, // A640..A69D; CYRILLIC 4985 0xA69E, // A69E ; UNKNOWN 4986 0xA69F, // A69F ; CYRILLIC 4987 0xA6A0, // A6A0..A6F7; BAMUM 4988 0xA6F8, // A6F8..A6FF; UNKNOWN 4989 0xA700, // A700..A721; COMMON 4990 0xA722, // A722..A787; LATIN 4991 0xA788, // A788..A78A; COMMON 4992 0xA78B, // A78B..A78E; LATIN 4993 0xA78F, // A78F ; UNKNOWN 4994 0xA790, // A790..A7AD; LATIN 4995 0xA7AE, // A7AE..A7AF; UNKNOWN 4996 0xA7B0, // A7B0..A7B1; LATIN 4997 0xA7B2, // A7B2..A7F6; UNKNOWN 4998 0xA7F7, // A7F7..A7FF; LATIN 4999 0xA800, // A800..A82B; SYLOTI_NAGRI 5000 0xA82C, // A82C..A82F; UNKNOWN 5001 0xA830, // A830..A839; COMMON 5002 0xA83A, // A83A..A83F; UNKNOWN 5003 0xA840, // A840..A877; PHAGS_PA 5004 0xA878, // A878..A87F; UNKNOWN 5005 0xA880, // A880..A8C4; SAURASHTRA 5006 0xA8C5, // A8C5..A8CD; UNKNOWN 5007 0xA8CE, // A8CE..A8D9; SAURASHTRA 5008 0xA8DA, // A8DA..A8DF; UNKNOWN 5009 0xA8E0, // A8E0..A8FB; DEVANAGARI 5010 0xA8FC, // A8FC..A8FF; UNKNOWN 5011 0xA900, // A900..A92D; KAYAH_LI 5012 0xA92E, // A92E ; COMMON 5013 0xA92F, // A92F ; KAYAH_LI 5014 0xA930, // A930..A953; REJANG 5015 0xA954, // A954..A95E; UNKNOWN 5016 0xA95F, // A95F ; REJANG 5017 0xA960, // A960..A97C; HANGUL 5018 0xA97D, // A97D..A97F; UNKNOWN 5019 0xA980, // A980..A9CD; JAVANESE 5020 0xA9CE, // A9CE ; UNKNOWN 5021 0xA9CF, // A9CF ; COMMON 5022 0xA9D0, // A9D0..A9D9; JAVANESE 5023 0xA9DA, // A9DA..A9DD; UNKNOWN 5024 0xA9DE, // A9DE..A9DF; JAVANESE 5025 0xA9E0, // A9E0..A9FE; MYANMAR 5026 0xA9FF, // A9FF ; UNKNOWN 5027 0xAA00, // AA00..AA36; CHAM 5028 0xAA37, // AA37..AA3F; UNKNOWN 5029 0xAA40, // AA40..AA4D; CHAM 5030 0xAA4E, // AA4E..AA4F; UNKNOWN 5031 0xAA50, // AA50..AA59; CHAM 5032 0xAA5A, // AA5A..AA5B; UNKNOWN 5033 0xAA5C, // AA5C..AA5F; CHAM 5034 0xAA60, // AA60..AA7F; MYANMAR 5035 0xAA80, // AA80..AAC2; TAI_VIET 5036 0xAAC3, // AAC3..AADA; UNKNOWN 5037 0xAADB, // AADB..AADF; TAI_VIET 5038 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5039 0xAAF7, // AAF7..AB00; UNKNOWN 5040 0xAB01, // AB01..AB06; ETHIOPIC 5041 0xAB07, // AB07..AB08; UNKNOWN 5042 0xAB09, // AB09..AB0E; ETHIOPIC 5043 0xAB0F, // AB0F..AB10; UNKNOWN 5044 0xAB11, // AB11..AB16; ETHIOPIC 5045 0xAB17, // AB17..AB1F; UNKNOWN 5046 0xAB20, // AB20..AB26; ETHIOPIC 5047 0xAB27, // AB27 ; UNKNOWN 5048 0xAB28, // AB28..AB2E; ETHIOPIC 5049 0xAB2F, // AB2F ; UNKNOWN 5050 0xAB30, // AB30..AB5A; LATIN 5051 0xAB5B, // AB5B ; COMMON 5052 0xAB5C, // AB5C..AB5F; LATIN 5053 0xAB60, // AB60..AB63; UNKNOWN 5054 0xAB64, // AB64 ; LATIN 5055 0xAB65, // AB65 ; GREEK 5056 0xAB66, // AB66..ABBF; UNKNOWN 5057 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5058 0xABEE, // ABEE..ABEF; UNKNOWN 5059 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5060 0xABFA, // ABFA..ABFF; UNKNOWN 5061 0xAC00, // AC00..D7A3; HANGUL 5062 0xD7A4, // D7A4..D7AF; UNKNOWN 5063 0xD7B0, // D7B0..D7C6; HANGUL 5064 0xD7C7, // D7C7..D7CA; UNKNOWN 5065 0xD7CB, // D7CB..D7FB; HANGUL 5066 0xD7FC, // D7FC..F8FF; UNKNOWN 5067 0xF900, // F900..FA6D; HAN 5068 0xFA6E, // FA6E..FA6F; UNKNOWN 5069 0xFA70, // FA70..FAD9; HAN 5070 0xFADA, // FADA..FAFF; UNKNOWN 5071 0xFB00, // FB00..FB06; LATIN 5072 0xFB07, // FB07..FB12; UNKNOWN 5073 0xFB13, // FB13..FB17; ARMENIAN 5074 0xFB18, // FB18..FB1C; UNKNOWN 5075 0xFB1D, // FB1D..FB36; HEBREW 5076 0xFB37, // FB37 ; UNKNOWN 5077 0xFB38, // FB38..FB3C; HEBREW 5078 0xFB3D, // FB3D ; UNKNOWN 5079 0xFB3E, // FB3E ; HEBREW 5080 0xFB3F, // FB3F ; UNKNOWN 5081 0xFB40, // FB40..FB41; HEBREW 5082 0xFB42, // FB42 ; UNKNOWN 5083 0xFB43, // FB43..FB44; HEBREW 5084 0xFB45, // FB45 ; UNKNOWN 5085 0xFB46, // FB46..FB4F; HEBREW 5086 0xFB50, // FB50..FBC1; ARABIC 5087 0xFBC2, // FBC2..FBD2; UNKNOWN 5088 0xFBD3, // FBD3..FD3D; ARABIC 5089 0xFD3E, // FD3E..FD3F; COMMON 5090 0xFD40, // FD40..FD4F; UNKNOWN 5091 0xFD50, // FD50..FD8F; ARABIC 5092 0xFD90, // FD90..FD91; UNKNOWN 5093 0xFD92, // FD92..FDC7; ARABIC 5094 0xFDC8, // FDC8..FDEF; UNKNOWN 5095 0xFDF0, // FDF0..FDFD; ARABIC 5096 0xFDFE, // FDFE..FDFF; UNKNOWN 5097 0xFE00, // FE00..FE0F; INHERITED 5098 0xFE10, // FE10..FE19; COMMON 5099 0xFE1A, // FE1A..FE1F; UNKNOWN 5100 0xFE20, // FE20..FE2D; INHERITED 5101 0xFE2E, // FE2E..FE2F; UNKNOWN 5102 0xFE30, // FE30..FE52; COMMON 5103 0xFE53, // FE53 ; UNKNOWN 5104 0xFE54, // FE54..FE66; COMMON 5105 0xFE67, // FE67 ; UNKNOWN 5106 0xFE68, // FE68..FE6B; COMMON 5107 0xFE6C, // FE6C..FE6F; UNKNOWN 5108 0xFE70, // FE70..FE74; ARABIC 5109 0xFE75, // FE75 ; UNKNOWN 5110 0xFE76, // FE76..FEFC; ARABIC 5111 0xFEFD, // FEFD..FEFE; UNKNOWN 5112 0xFEFF, // FEFF ; COMMON 5113 0xFF00, // FF00 ; UNKNOWN 5114 0xFF01, // FF01..FF20; COMMON 5115 0xFF21, // FF21..FF3A; LATIN 5116 0xFF3B, // FF3B..FF40; COMMON 5117 0xFF41, // FF41..FF5A; LATIN 5118 0xFF5B, // FF5B..FF65; COMMON 5119 0xFF66, // FF66..FF6F; KATAKANA 5120 0xFF70, // FF70 ; COMMON 5121 0xFF71, // FF71..FF9D; KATAKANA 5122 0xFF9E, // FF9E..FF9F; COMMON 5123 0xFFA0, // FFA0..FFBE; HANGUL 5124 0xFFBF, // FFBF..FFC1; UNKNOWN 5125 0xFFC2, // FFC2..FFC7; HANGUL 5126 0xFFC8, // FFC8..FFC9; UNKNOWN 5127 0xFFCA, // FFCA..FFCF; HANGUL 5128 0xFFD0, // FFD0..FFD1; UNKNOWN 5129 0xFFD2, // FFD2..FFD7; HANGUL 5130 0xFFD8, // FFD8..FFD9; UNKNOWN 5131 0xFFDA, // FFDA..FFDC; HANGUL 5132 0xFFDD, // FFDD..FFDF; UNKNOWN 5133 0xFFE0, // FFE0..FFE6; COMMON 5134 0xFFE7, // FFE7 ; UNKNOWN 5135 0xFFE8, // FFE8..FFEE; COMMON 5136 0xFFEF, // FFEF..FFF8; UNKNOWN 5137 0xFFF9, // FFF9..FFFD; COMMON 5138 0xFFFE, // FFFE..FFFF; UNKNOWN 5139 0x10000, // 10000..1000B; LINEAR_B 5140 0x1000C, // 1000C ; UNKNOWN 5141 0x1000D, // 1000D..10026; LINEAR_B 5142 0x10027, // 10027 ; UNKNOWN 5143 0x10028, // 10028..1003A; LINEAR_B 5144 0x1003B, // 1003B ; UNKNOWN 5145 0x1003C, // 1003C..1003D; LINEAR_B 5146 0x1003E, // 1003E ; UNKNOWN 5147 0x1003F, // 1003F..1004D; LINEAR_B 5148 0x1004E, // 1004E..1004F; UNKNOWN 5149 0x10050, // 10050..1005D; LINEAR_B 5150 0x1005E, // 1005E..1007F; UNKNOWN 5151 0x10080, // 10080..100FA; LINEAR_B 5152 0x100FB, // 100FB..100FF; UNKNOWN 5153 0x10100, // 10100..10102; COMMON 5154 0x10103, // 10103..10106; UNKNOWN 5155 0x10107, // 10107..10133; COMMON 5156 0x10134, // 10134..10136; UNKNOWN 5157 0x10137, // 10137..1013F; COMMON 5158 0x10140, // 10140..1018C; GREEK 5159 0x1018D, // 1018D..1018F; UNKNOWN 5160 0x10190, // 10190..1019B; COMMON 5161 0x1019C, // 1019C..1019F; UNKNOWN 5162 0x101A0, // 101A0 ; GREEK 5163 0x101A1, // 101A1..101CF; UNKNOWN 5164 0x101D0, // 101D0..101FC; COMMON 5165 0x101FD, // 101FD ; INHERITED 5166 0x101FE, // 101FE..1027F; UNKNOWN 5167 0x10280, // 10280..1029C; LYCIAN 5168 0x1029D, // 1029D..1029F; UNKNOWN 5169 0x102A0, // 102A0..102D0; CARIAN 5170 0x102D1, // 102D1..102DF; UNKNOWN 5171 0x102E0, // 102E0 ; INHERITED 5172 0x102E1, // 102E1..102FB; COMMON 5173 0x102FC, // 102FC..102FF; UNKNOWN 5174 0x10300, // 10300..10323; OLD_ITALIC 5175 0x10324, // 10324..1032F; UNKNOWN 5176 0x10330, // 10330..1034A; GOTHIC 5177 0x1034B, // 1034B..1034F; UNKNOWN 5178 0x10350, // 10350..1037A; OLD_PERMIC 5179 0x1037B, // 1037B..1037F; UNKNOWN 5180 0x10380, // 10380..1039D; UGARITIC 5181 0x1039E, // 1039E ; UNKNOWN 5182 0x1039F, // 1039F ; UGARITIC 5183 0x103A0, // 103A0..103C3; OLD_PERSIAN 5184 0x103C4, // 103C4..103C7; UNKNOWN 5185 0x103C8, // 103C8..103D5; OLD_PERSIAN 5186 0x103D6, // 103D6..103FF; UNKNOWN 5187 0x10400, // 10400..1044F; DESERET 5188 0x10450, // 10450..1047F; SHAVIAN 5189 0x10480, // 10480..1049D; OSMANYA 5190 0x1049E, // 1049E..1049F; UNKNOWN 5191 0x104A0, // 104A0..104A9; OSMANYA 5192 0x104AA, // 104AA..104FF; UNKNOWN 5193 0x10500, // 10500..10527; ELBASAN 5194 0x10528, // 10528..1052F; UNKNOWN 5195 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 5196 0x10564, // 10564..1056E; UNKNOWN 5197 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 5198 0x10570, // 10570..105FF; UNKNOWN 5199 0x10600, // 10600..10736; LINEAR_A 5200 0x10737, // 10737..1073F; UNKNOWN 5201 0x10740, // 10740..10755; LINEAR_A 5202 0x10756, // 10756..1075F; UNKNOWN 5203 0x10760, // 10760..10767; LINEAR_A 5204 0x10768, // 10768..107FF; UNKNOWN 5205 0x10800, // 10800..10805; CYPRIOT 5206 0x10806, // 10806..10807; UNKNOWN 5207 0x10808, // 10808 ; CYPRIOT 5208 0x10809, // 10809 ; UNKNOWN 5209 0x1080A, // 1080A..10835; CYPRIOT 5210 0x10836, // 10836 ; UNKNOWN 5211 0x10837, // 10837..10838; CYPRIOT 5212 0x10839, // 10839..1083B; UNKNOWN 5213 0x1083C, // 1083C ; CYPRIOT 5214 0x1083D, // 1083D..1083E; UNKNOWN 5215 0x1083F, // 1083F ; CYPRIOT 5216 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 5217 0x10856, // 10856 ; UNKNOWN 5218 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 5219 0x10860, // 10860..1087F; PALMYRENE 5220 0x10880, // 10880..1089E; NABATAEAN 5221 0x1089F, // 1089F..108A6; UNKNOWN 5222 0x108A7, // 108A7..108AF; NABATAEAN 5223 0x108B0, // 108B0..108FF; UNKNOWN 5224 0x10900, // 10900..1091B; PHOENICIAN 5225 0x1091C, // 1091C..1091E; UNKNOWN 5226 0x1091F, // 1091F ; PHOENICIAN 5227 0x10920, // 10920..10939; LYDIAN 5228 0x1093A, // 1093A..1093E; UNKNOWN 5229 0x1093F, // 1093F ; LYDIAN 5230 0x10940, // 10940..1097F; UNKNOWN 5231 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 5232 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 5233 0x109B8, // 109B8..109BD; UNKNOWN 5234 0x109BE, // 109BE..109BF; MEROITIC_CURSIVE 5235 0x109C0, // 109C0..109FF; UNKNOWN 5236 0x10A00, // 10A00..10A03; KHAROSHTHI 5237 0x10A04, // 10A04 ; UNKNOWN 5238 0x10A05, // 10A05..10A06; KHAROSHTHI 5239 0x10A07, // 10A07..10A0B; UNKNOWN 5240 0x10A0C, // 10A0C..10A13; KHAROSHTHI 5241 0x10A14, // 10A14 ; UNKNOWN 5242 0x10A15, // 10A15..10A17; KHAROSHTHI 5243 0x10A18, // 10A18 ; UNKNOWN 5244 0x10A19, // 10A19..10A33; KHAROSHTHI 5245 0x10A34, // 10A34..10A37; UNKNOWN 5246 0x10A38, // 10A38..10A3A; KHAROSHTHI 5247 0x10A3B, // 10A3B..10A3E; UNKNOWN 5248 0x10A3F, // 10A3F..10A47; KHAROSHTHI 5249 0x10A48, // 10A48..10A4F; UNKNOWN 5250 0x10A50, // 10A50..10A58; KHAROSHTHI 5251 0x10A59, // 10A59..10A5F; UNKNOWN 5252 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 5253 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 5254 0x10AA0, // 10AA0..10ABF; UNKNOWN 5255 0x10AC0, // 10AC0..10AE6; MANICHAEAN 5256 0x10AE7, // 10AE7..10AEA; UNKNOWN 5257 0x10AEB, // 10AEB..10AF6; MANICHAEAN 5258 0x10AF7, // 10AF7..10AFF; UNKNOWN 5259 0x10B00, // 10B00..10B35; AVESTAN 5260 0x10B36, // 10B36..10B38; UNKNOWN 5261 0x10B39, // 10B39..10B3F; AVESTAN 5262 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 5263 0x10B56, // 10B56..10B57; UNKNOWN 5264 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 5265 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 5266 0x10B73, // 10B73..10B77; UNKNOWN 5267 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 5268 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 5269 0x10B92, // 10B92..10B98; UNKNOWN 5270 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 5271 0x10B9D, // 10B9D..10BA8; UNKNOWN 5272 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 5273 0x10BB0, // 10BB0..10BFF; UNKNOWN 5274 0x10C00, // 10C00..10C48; OLD_TURKIC 5275 0x10C49, // 10C49..10E5F; UNKNOWN 5276 0x10E60, // 10E60..10E7E; ARABIC 5277 0x10E7F, // 10E7F..10FFF; UNKNOWN 5278 0x11000, // 11000..1104D; BRAHMI 5279 0x1104E, // 1104E..11051; UNKNOWN 5280 0x11052, // 11052..1106F; BRAHMI 5281 0x11070, // 11070..1107E; UNKNOWN 5282 0x1107F, // 1107F ; BRAHMI 5283 0x11080, // 11080..110C1; KAITHI 5284 0x110C2, // 110C2..110CF; UNKNOWN 5285 0x110D0, // 110D0..110E8; SORA_SOMPENG 5286 0x110E9, // 110E9..110EF; UNKNOWN 5287 0x110F0, // 110F0..110F9; SORA_SOMPENG 5288 0x110FA, // 110FA..110FF; UNKNOWN 5289 0x11100, // 11100..11134; CHAKMA 5290 0x11135, // 11135 ; UNKNOWN 5291 0x11136, // 11136..11143; CHAKMA 5292 0x11144, // 11144..1114F; UNKNOWN 5293 0x11150, // 11150..11176; MAHAJANI 5294 0x11177, // 11177..1117F; UNKNOWN 5295 0x11180, // 11180..111C8; SHARADA 5296 0x111C9, // 111C9..111CC; UNKNOWN 5297 0x111CD, // 111CD ; SHARADA 5298 0x111CE, // 111CE..111CF; UNKNOWN 5299 0x111D0, // 111D0..111DA; SHARADA 5300 0x111DB, // 111DB..111E0; UNKNOWN 5301 0x111E1, // 111E1..111F4; SINHALA 5302 0x111F5, // 111F5..111FF; UNKNOWN 5303 0x11200, // 11200..11211; KHOJKI 5304 0x11212, // 11212 ; UNKNOWN 5305 0x11213, // 11213..1123D; KHOJKI 5306 0x1123E, // 1123E..112AF; UNKNOWN 5307 0x112B0, // 112B0..112EA; KHUDAWADI 5308 0x112EB, // 112EB..112EF; UNKNOWN 5309 0x112F0, // 112F0..112F9; KHUDAWADI 5310 0x112FA, // 112FA..11300; UNKNOWN 5311 0x11301, // 11301..11303; GRANTHA 5312 0x11304, // 11304 ; UNKNOWN 5313 0x11305, // 11305..1130C; GRANTHA 5314 0x1130D, // 1130D..1130E; UNKNOWN 5315 0x1130F, // 1130F..11310; GRANTHA 5316 0x11311, // 11311..11312; UNKNOWN 5317 0x11313, // 11313..11328; GRANTHA 5318 0x11329, // 11329 ; UNKNOWN 5319 0x1132A, // 1132A..11330; GRANTHA 5320 0x11331, // 11331 ; UNKNOWN 5321 0x11332, // 11332..11333; GRANTHA 5322 0x11334, // 11334 ; UNKNOWN 5323 0x11335, // 11335..11339; GRANTHA 5324 0x1133A, // 1133A..1133B; UNKNOWN 5325 0x1133C, // 1133C..11344; GRANTHA 5326 0x11345, // 11345..11346; UNKNOWN 5327 0x11347, // 11347..11348; GRANTHA 5328 0x11349, // 11349..1134A; UNKNOWN 5329 0x1134B, // 1134B..1134D; GRANTHA 5330 0x1134E, // 1134E..11356; UNKNOWN 5331 0x11357, // 11357 ; GRANTHA 5332 0x11358, // 11358..1135C; UNKNOWN 5333 0x1135D, // 1135D..11363; GRANTHA 5334 0x11364, // 11364..11365; UNKNOWN 5335 0x11366, // 11366..1136C; GRANTHA 5336 0x1136D, // 1136D..1136F; UNKNOWN 5337 0x11370, // 11370..11374; GRANTHA 5338 0x11375, // 11375..1147F; UNKNOWN 5339 0x11480, // 11480..114C7; TIRHUTA 5340 0x114C8, // 114C8..114CF; UNKNOWN 5341 0x114D0, // 114D0..114D9; TIRHUTA 5342 0x114DA, // 114DA..1157F; UNKNOWN 5343 0x11580, // 11580..115B5; SIDDHAM 5344 0x115B6, // 115B6..115B7; UNKNOWN 5345 0x115B8, // 115B8..115C9; SIDDHAM 5346 0x115CA, // 115CA..115FF; UNKNOWN 5347 0x11600, // 11600..11644; MODI 5348 0x11645, // 11645..1164F; UNKNOWN 5349 0x11650, // 11650..11659; MODI 5350 0x1165A, // 1165A..1167F; UNKNOWN 5351 0x11680, // 11680..116B7; TAKRI 5352 0x116B8, // 116B8..116BF; UNKNOWN 5353 0x116C0, // 116C0..116C9; TAKRI 5354 0x116CA, // 116CA..1189F; UNKNOWN 5355 0x118A0, // 118A0..118F2; WARANG_CITI 5356 0x118F3, // 118F3..118FE; UNKNOWN 5357 0x118FF, // 118FF ; WARANG_CITI 5358 0x11900, // 11900..11ABF; UNKNOWN 5359 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 5360 0x11AF9, // 11AF9..11FFF; UNKNOWN 5361 0x12000, // 12000..12398; CUNEIFORM 5362 0x12399, // 12399..123FF; UNKNOWN 5363 0x12400, // 12400..1246E; CUNEIFORM 5364 0x1246F, // 1246F ; UNKNOWN 5365 0x12470, // 12470..12474; CUNEIFORM 5366 0x12475, // 12475..12FFF; UNKNOWN 5367 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 5368 0x1342F, // 1342F..167FF; UNKNOWN 5369 0x16800, // 16800..16A38; BAMUM 5370 0x16A39, // 16A39..16A3F; UNKNOWN 5371 0x16A40, // 16A40..16A5E; MRO 5372 0x16A5F, // 16A5F ; UNKNOWN 5373 0x16A60, // 16A60..16A69; MRO 5374 0x16A6A, // 16A6A..16A6D; UNKNOWN 5375 0x16A6E, // 16A6E..16A6F; MRO 5376 0x16A70, // 16A70..16ACF; UNKNOWN 5377 0x16AD0, // 16AD0..16AED; BASSA_VAH 5378 0x16AEE, // 16AEE..16AEF; UNKNOWN 5379 0x16AF0, // 16AF0..16AF5; BASSA_VAH 5380 0x16AF6, // 16AF6..16AFF; UNKNOWN 5381 0x16B00, // 16B00..16B45; PAHAWH_HMONG 5382 0x16B46, // 16B46..16B4F; UNKNOWN 5383 0x16B50, // 16B50..16B59; PAHAWH_HMONG 5384 0x16B5A, // 16B5A ; UNKNOWN 5385 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 5386 0x16B62, // 16B62 ; UNKNOWN 5387 0x16B63, // 16B63..16B77; PAHAWH_HMONG 5388 0x16B78, // 16B78..16B7C; UNKNOWN 5389 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 5390 0x16B90, // 16B90..16EFF; UNKNOWN 5391 0x16F00, // 16F00..16F44; MIAO 5392 0x16F45, // 16F45..16F4F; UNKNOWN 5393 0x16F50, // 16F50..16F7E; MIAO 5394 0x16F7F, // 16F7F..16F8E; UNKNOWN 5395 0x16F8F, // 16F8F..16F9F; MIAO 5396 0x16FA0, // 16FA0..1AFFF; UNKNOWN 5397 0x1B000, // 1B000 ; KATAKANA 5398 0x1B001, // 1B001 ; HIRAGANA 5399 0x1B002, // 1B002..1BBFF; UNKNOWN 5400 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 5401 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 5402 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 5403 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 5404 0x1BC80, // 1BC80..1BC88; DUPLOYAN 5405 0x1BC89, // 1BC89..1BC8F; UNKNOWN 5406 0x1BC90, // 1BC90..1BC99; DUPLOYAN 5407 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 5408 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 5409 0x1BCA0, // 1BCA0..1BCA3; COMMON 5410 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 5411 0x1D000, // 1D000..1D0F5; COMMON 5412 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 5413 0x1D100, // 1D100..1D126; COMMON 5414 0x1D127, // 1D127..1D128; UNKNOWN 5415 0x1D129, // 1D129..1D166; COMMON 5416 0x1D167, // 1D167..1D169; INHERITED 5417 0x1D16A, // 1D16A..1D17A; COMMON 5418 0x1D17B, // 1D17B..1D182; INHERITED 5419 0x1D183, // 1D183..1D184; COMMON 5420 0x1D185, // 1D185..1D18B; INHERITED 5421 0x1D18C, // 1D18C..1D1A9; COMMON 5422 0x1D1AA, // 1D1AA..1D1AD; INHERITED 5423 0x1D1AE, // 1D1AE..1D1DD; COMMON 5424 0x1D1DE, // 1D1DE..1D1FF; UNKNOWN 5425 0x1D200, // 1D200..1D245; GREEK 5426 0x1D246, // 1D246..1D2FF; UNKNOWN 5427 0x1D300, // 1D300..1D356; COMMON 5428 0x1D357, // 1D357..1D35F; UNKNOWN 5429 0x1D360, // 1D360..1D371; COMMON 5430 0x1D372, // 1D372..1D3FF; UNKNOWN 5431 0x1D400, // 1D400..1D454; COMMON 5432 0x1D455, // 1D455 ; UNKNOWN 5433 0x1D456, // 1D456..1D49C; COMMON 5434 0x1D49D, // 1D49D ; UNKNOWN 5435 0x1D49E, // 1D49E..1D49F; COMMON 5436 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 5437 0x1D4A2, // 1D4A2 ; COMMON 5438 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 5439 0x1D4A5, // 1D4A5..1D4A6; COMMON 5440 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 5441 0x1D4A9, // 1D4A9..1D4AC; COMMON 5442 0x1D4AD, // 1D4AD ; UNKNOWN 5443 0x1D4AE, // 1D4AE..1D4B9; COMMON 5444 0x1D4BA, // 1D4BA ; UNKNOWN 5445 0x1D4BB, // 1D4BB ; COMMON 5446 0x1D4BC, // 1D4BC ; UNKNOWN 5447 0x1D4BD, // 1D4BD..1D4C3; COMMON 5448 0x1D4C4, // 1D4C4 ; UNKNOWN 5449 0x1D4C5, // 1D4C5..1D505; COMMON 5450 0x1D506, // 1D506 ; UNKNOWN 5451 0x1D507, // 1D507..1D50A; COMMON 5452 0x1D50B, // 1D50B..1D50C; UNKNOWN 5453 0x1D50D, // 1D50D..1D514; COMMON 5454 0x1D515, // 1D515 ; UNKNOWN 5455 0x1D516, // 1D516..1D51C; COMMON 5456 0x1D51D, // 1D51D ; UNKNOWN 5457 0x1D51E, // 1D51E..1D539; COMMON 5458 0x1D53A, // 1D53A ; UNKNOWN 5459 0x1D53B, // 1D53B..1D53E; COMMON 5460 0x1D53F, // 1D53F ; UNKNOWN 5461 0x1D540, // 1D540..1D544; COMMON 5462 0x1D545, // 1D545 ; UNKNOWN 5463 0x1D546, // 1D546 ; COMMON 5464 0x1D547, // 1D547..1D549; UNKNOWN 5465 0x1D54A, // 1D54A..1D550; COMMON 5466 0x1D551, // 1D551 ; UNKNOWN 5467 0x1D552, // 1D552..1D6A5; COMMON 5468 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 5469 0x1D6A8, // 1D6A8..1D7CB; COMMON 5470 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 5471 0x1D7CE, // 1D7CE..1D7FF; COMMON 5472 0x1D800, // 1D800..1E7FF; UNKNOWN 5473 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 5474 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 5475 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 5476 0x1E8D7, // 1E8D7..1EDFF; UNKNOWN 5477 0x1EE00, // 1EE00..1EE03; ARABIC 5478 0x1EE04, // 1EE04 ; UNKNOWN 5479 0x1EE05, // 1EE05..1EE1F; ARABIC 5480 0x1EE20, // 1EE20 ; UNKNOWN 5481 0x1EE21, // 1EE21..1EE22; ARABIC 5482 0x1EE23, // 1EE23 ; UNKNOWN 5483 0x1EE24, // 1EE24 ; ARABIC 5484 0x1EE25, // 1EE25..1EE26; UNKNOWN 5485 0x1EE27, // 1EE27 ; ARABIC 5486 0x1EE28, // 1EE28 ; UNKNOWN 5487 0x1EE29, // 1EE29..1EE32; ARABIC 5488 0x1EE33, // 1EE33 ; UNKNOWN 5489 0x1EE34, // 1EE34..1EE37; ARABIC 5490 0x1EE38, // 1EE38 ; UNKNOWN 5491 0x1EE39, // 1EE39 ; ARABIC 5492 0x1EE3A, // 1EE3A ; UNKNOWN 5493 0x1EE3B, // 1EE3B ; ARABIC 5494 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 5495 0x1EE42, // 1EE42 ; ARABIC 5496 0x1EE43, // 1EE43..1EE46; UNKNOWN 5497 0x1EE47, // 1EE47 ; ARABIC 5498 0x1EE48, // 1EE48 ; UNKNOWN 5499 0x1EE49, // 1EE49 ; ARABIC 5500 0x1EE4A, // 1EE4A ; UNKNOWN 5501 0x1EE4B, // 1EE4B ; ARABIC 5502 0x1EE4C, // 1EE4C ; UNKNOWN 5503 0x1EE4D, // 1EE4D..1EE4F; ARABIC 5504 0x1EE50, // 1EE50 ; UNKNOWN 5505 0x1EE51, // 1EE51..1EE52; ARABIC 5506 0x1EE53, // 1EE53 ; UNKNOWN 5507 0x1EE54, // 1EE54 ; ARABIC 5508 0x1EE55, // 1EE55..1EE56; UNKNOWN 5509 0x1EE57, // 1EE57 ; ARABIC 5510 0x1EE58, // 1EE58 ; UNKNOWN 5511 0x1EE59, // 1EE59 ; ARABIC 5512 0x1EE5A, // 1EE5A ; UNKNOWN 5513 0x1EE5B, // 1EE5B ; ARABIC 5514 0x1EE5C, // 1EE5C ; UNKNOWN 5515 0x1EE5D, // 1EE5D ; ARABIC 5516 0x1EE5E, // 1EE5E ; UNKNOWN 5517 0x1EE5F, // 1EE5F ; ARABIC 5518 0x1EE60, // 1EE60 ; UNKNOWN 5519 0x1EE61, // 1EE61..1EE62; ARABIC 5520 0x1EE63, // 1EE63 ; UNKNOWN 5521 0x1EE64, // 1EE64 ; ARABIC 5522 0x1EE65, // 1EE65..1EE66; UNKNOWN 5523 0x1EE67, // 1EE67..1EE6A; ARABIC 5524 0x1EE6B, // 1EE6B ; UNKNOWN 5525 0x1EE6C, // 1EE6C..1EE72; ARABIC 5526 0x1EE73, // 1EE73 ; UNKNOWN 5527 0x1EE74, // 1EE74..1EE77; ARABIC 5528 0x1EE78, // 1EE78 ; UNKNOWN 5529 0x1EE79, // 1EE79..1EE7C; ARABIC 5530 0x1EE7D, // 1EE7D ; UNKNOWN 5531 0x1EE7E, // 1EE7E ; ARABIC 5532 0x1EE7F, // 1EE7F ; UNKNOWN 5533 0x1EE80, // 1EE80..1EE89; ARABIC 5534 0x1EE8A, // 1EE8A ; UNKNOWN 5535 0x1EE8B, // 1EE8B..1EE9B; ARABIC 5536 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 5537 0x1EEA1, // 1EEA1..1EEA3; ARABIC 5538 0x1EEA4, // 1EEA4 ; UNKNOWN 5539 0x1EEA5, // 1EEA5..1EEA9; ARABIC 5540 0x1EEAA, // 1EEAA ; UNKNOWN 5541 0x1EEAB, // 1EEAB..1EEBB; ARABIC 5542 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 5543 0x1EEF0, // 1EEF0..1EEF1; ARABIC 5544 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 5545 0x1F000, // 1F000..1F02B; COMMON 5546 0x1F02C, // 1F02C..1F02F; UNKNOWN 5547 0x1F030, // 1F030..1F093; COMMON 5548 0x1F094, // 1F094..1F09F; UNKNOWN 5549 0x1F0A0, // 1F0A0..1F0AE; COMMON 5550 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 5551 0x1F0B1, // 1F0B1..1F0BF; COMMON 5552 0x1F0C0, // 1F0C0 ; UNKNOWN 5553 0x1F0C1, // 1F0C1..1F0CF; COMMON 5554 0x1F0D0, // 1F0D0 ; UNKNOWN 5555 0x1F0D1, // 1F0D1..1F0F5; COMMON 5556 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 5557 0x1F100, // 1F100..1F10C; COMMON 5558 0x1F10D, // 1F10D..1F10F; UNKNOWN 5559 0x1F110, // 1F110..1F12E; COMMON 5560 0x1F12F, // 1F12F ; UNKNOWN 5561 0x1F130, // 1F130..1F16B; COMMON 5562 0x1F16C, // 1F16C..1F16F; UNKNOWN 5563 0x1F170, // 1F170..1F19A; COMMON 5564 0x1F19B, // 1F19B..1F1E5; UNKNOWN 5565 0x1F1E6, // 1F1E6..1F1FF; COMMON 5566 0x1F200, // 1F200 ; HIRAGANA 5567 0x1F201, // 1F201..1F202; COMMON 5568 0x1F203, // 1F203..1F20F; UNKNOWN 5569 0x1F210, // 1F210..1F23A; COMMON 5570 0x1F23B, // 1F23B..1F23F; UNKNOWN 5571 0x1F240, // 1F240..1F248; COMMON 5572 0x1F249, // 1F249..1F24F; UNKNOWN 5573 0x1F250, // 1F250..1F251; COMMON 5574 0x1F252, // 1F252..1F2FF; UNKNOWN 5575 0x1F300, // 1F300..1F32C; COMMON 5576 0x1F32D, // 1F32D..1F32F; UNKNOWN 5577 0x1F330, // 1F330..1F37D; COMMON 5578 0x1F37E, // 1F37E..1F37F; UNKNOWN 5579 0x1F380, // 1F380..1F3CE; COMMON 5580 0x1F3CF, // 1F3CF..1F3D3; UNKNOWN 5581 0x1F3D4, // 1F3D4..1F3F7; COMMON 5582 0x1F3F8, // 1F3F8..1F3FF; UNKNOWN 5583 0x1F400, // 1F400..1F4FE; COMMON 5584 0x1F4FF, // 1F4FF ; UNKNOWN 5585 0x1F500, // 1F500..1F54A; COMMON 5586 0x1F54B, // 1F54B..1F54F; UNKNOWN 5587 0x1F550, // 1F550..1F579; COMMON 5588 0x1F57A, // 1F57A ; UNKNOWN 5589 0x1F57B, // 1F57B..1F5A3; COMMON 5590 0x1F5A4, // 1F5A4 ; UNKNOWN 5591 0x1F5A5, // 1F5A5..1F642; COMMON 5592 0x1F643, // 1F643..1F644; UNKNOWN 5593 0x1F645, // 1F645..1F6CF; COMMON 5594 0x1F6D0, // 1F6D0..1F6DF; UNKNOWN 5595 0x1F6E0, // 1F6E0..1F6EC; COMMON 5596 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 5597 0x1F6F0, // 1F6F0..1F6F3; COMMON 5598 0x1F6F4, // 1F6F4..1F6FF; UNKNOWN 5599 0x1F700, // 1F700..1F773; COMMON 5600 0x1F774, // 1F774..1F77F; UNKNOWN 5601 0x1F780, // 1F780..1F7D4; COMMON 5602 0x1F7D5, // 1F7D5..1F7FF; UNKNOWN 5603 0x1F800, // 1F800..1F80B; COMMON 5604 0x1F80C, // 1F80C..1F80F; UNKNOWN 5605 0x1F810, // 1F810..1F847; COMMON 5606 0x1F848, // 1F848..1F84F; UNKNOWN 5607 0x1F850, // 1F850..1F859; COMMON 5608 0x1F85A, // 1F85A..1F85F; UNKNOWN 5609 0x1F860, // 1F860..1F887; COMMON 5610 0x1F888, // 1F888..1F88F; UNKNOWN 5611 0x1F890, // 1F890..1F8AD; COMMON 5612 0x1F8AE, // 1F8AE..1FFFF; UNKNOWN 5613 0x20000, // 20000..2A6D6; HAN 5614 0x2A6D7, // 2A6D7..2A6FF; UNKNOWN 5615 0x2A700, // 2A700..2B734; HAN 5616 0x2B735, // 2B735..2B73F; UNKNOWN 5617 0x2B740, // 2B740..2B81D; HAN 5618 0x2B81E, // 2B81E..2F7FF; UNKNOWN 5619 0x2F800, // 2F800..2FA1D; HAN 5620 0x2FA1E, // 2FA1E..E0000; UNKNOWN 5621 0xE0001, // E0001 ; COMMON 5622 0xE0002, // E0002..E001F; UNKNOWN 5623 0xE0020, // E0020..E007F; COMMON 5624 0xE0080, // E0080..E00FF; UNKNOWN 5625 0xE0100, // E0100..E01EF; INHERITED 5626 0xE01F0 // E01F0..10FFFF; UNKNOWN 5627 }; 5628 5629 private static final UnicodeScript[] scripts = { 5630 COMMON, // 0000..0040 5631 LATIN, // 0041..005A 5632 COMMON, // 005B..0060 5633 LATIN, // 0061..007A 5634 COMMON, // 007B..00A9 5635 LATIN, // 00AA 5636 COMMON, // 00AB..00B9 5637 LATIN, // 00BA 5638 COMMON, // 00BB..00BF 5639 LATIN, // 00C0..00D6 5640 COMMON, // 00D7 5641 LATIN, // 00D8..00F6 5642 COMMON, // 00F7 5643 LATIN, // 00F8..02B8 5644 COMMON, // 02B9..02DF 5645 LATIN, // 02E0..02E4 5646 COMMON, // 02E5..02E9 5647 BOPOMOFO, // 02EA..02EB 5648 COMMON, // 02EC..02FF 5649 INHERITED, // 0300..036F 5650 GREEK, // 0370..0373 5651 COMMON, // 0374 5652 GREEK, // 0375..0377 5653 UNKNOWN, // 0378..0379 5654 GREEK, // 037A..037D 5655 COMMON, // 037E 5656 GREEK, // 037F 5657 UNKNOWN, // 0380..0383 5658 GREEK, // 0384 5659 COMMON, // 0385 5660 GREEK, // 0386 5661 COMMON, // 0387 5662 GREEK, // 0388..038A 5663 UNKNOWN, // 038B 5664 GREEK, // 038C 5665 UNKNOWN, // 038D 5666 GREEK, // 038E..03A1 5667 UNKNOWN, // 03A2 5668 GREEK, // 03A3..03E1 5669 COPTIC, // 03E2..03EF 5670 GREEK, // 03F0..03FF 5671 CYRILLIC, // 0400..0484 5672 INHERITED, // 0485..0486 5673 CYRILLIC, // 0487..052F 5674 UNKNOWN, // 0530 5675 ARMENIAN, // 0531..0556 5676 UNKNOWN, // 0557..0558 5677 ARMENIAN, // 0559..055F 5678 UNKNOWN, // 0560 5679 ARMENIAN, // 0561..0587 5680 UNKNOWN, // 0588 5681 COMMON, // 0589 5682 ARMENIAN, // 058A 5683 UNKNOWN, // 058B..058C 5684 ARMENIAN, // 058D..058F 5685 UNKNOWN, // 0590 5686 HEBREW, // 0591..05C7 5687 UNKNOWN, // 05C8..05CF 5688 HEBREW, // 05D0..05EA 5689 UNKNOWN, // 05EB..05EF 5690 HEBREW, // 05F0..05F4 5691 UNKNOWN, // 05F5..05FF 5692 ARABIC, // 0600..0604 5693 COMMON, // 0605 5694 ARABIC, // 0606..060B 5695 COMMON, // 060C 5696 ARABIC, // 060D..061A 5697 COMMON, // 061B..061C 5698 UNKNOWN, // 061D 5699 ARABIC, // 061E 5700 COMMON, // 061F 5701 ARABIC, // 0620..063F 5702 COMMON, // 0640 5703 ARABIC, // 0641..064A 5704 INHERITED, // 064B..0655 5705 ARABIC, // 0656..065F 5706 COMMON, // 0660..0669 5707 ARABIC, // 066A..066F 5708 INHERITED, // 0670 5709 ARABIC, // 0671..06DC 5710 COMMON, // 06DD 5711 ARABIC, // 06DE..06FF 5712 SYRIAC, // 0700..070D 5713 UNKNOWN, // 070E 5714 SYRIAC, // 070F..074A 5715 UNKNOWN, // 074B..074C 5716 SYRIAC, // 074D..074F 5717 ARABIC, // 0750..077F 5718 THAANA, // 0780..07B1 5719 UNKNOWN, // 07B2..07BF 5720 NKO, // 07C0..07FA 5721 UNKNOWN, // 07FB..07FF 5722 SAMARITAN, // 0800..082D 5723 UNKNOWN, // 082E..082F 5724 SAMARITAN, // 0830..083E 5725 UNKNOWN, // 083F 5726 MANDAIC, // 0840..085B 5727 UNKNOWN, // 085C..085D 5728 MANDAIC, // 085E 5729 UNKNOWN, // 085F..089F 5730 ARABIC, // 08A0..08B2 5731 UNKNOWN, // 08B3..08E3 5732 ARABIC, // 08E4..08FF 5733 DEVANAGARI, // 0900..0950 5734 INHERITED, // 0951..0952 5735 DEVANAGARI, // 0953..0963 5736 COMMON, // 0964..0965 5737 DEVANAGARI, // 0966..097F 5738 BENGALI, // 0980..0983 5739 UNKNOWN, // 0984 5740 BENGALI, // 0985..098C 5741 UNKNOWN, // 098D..098E 5742 BENGALI, // 098F..0990 5743 UNKNOWN, // 0991..0992 5744 BENGALI, // 0993..09A8 5745 UNKNOWN, // 09A9 5746 BENGALI, // 09AA..09B0 5747 UNKNOWN, // 09B1 5748 BENGALI, // 09B2 5749 UNKNOWN, // 09B3..09B5 5750 BENGALI, // 09B6..09B9 5751 UNKNOWN, // 09BA..09BB 5752 BENGALI, // 09BC..09C4 5753 UNKNOWN, // 09C5..09C6 5754 BENGALI, // 09C7..09C8 5755 UNKNOWN, // 09C9..09CA 5756 BENGALI, // 09CB..09CE 5757 UNKNOWN, // 09CF..09D6 5758 BENGALI, // 09D7 5759 UNKNOWN, // 09D8..09DB 5760 BENGALI, // 09DC..09DD 5761 UNKNOWN, // 09DE 5762 BENGALI, // 09DF..09E3 5763 UNKNOWN, // 09E4..09E5 5764 BENGALI, // 09E6..09FB 5765 UNKNOWN, // 09FC..0A00 5766 GURMUKHI, // 0A01..0A03 5767 UNKNOWN, // 0A04 5768 GURMUKHI, // 0A05..0A0A 5769 UNKNOWN, // 0A0B..0A0E 5770 GURMUKHI, // 0A0F..0A10 5771 UNKNOWN, // 0A11..0A12 5772 GURMUKHI, // 0A13..0A28 5773 UNKNOWN, // 0A29 5774 GURMUKHI, // 0A2A..0A30 5775 UNKNOWN, // 0A31 5776 GURMUKHI, // 0A32..0A33 5777 UNKNOWN, // 0A34 5778 GURMUKHI, // 0A35..0A36 5779 UNKNOWN, // 0A37 5780 GURMUKHI, // 0A38..0A39 5781 UNKNOWN, // 0A3A..0A3B 5782 GURMUKHI, // 0A3C 5783 UNKNOWN, // 0A3D 5784 GURMUKHI, // 0A3E..0A42 5785 UNKNOWN, // 0A43..0A46 5786 GURMUKHI, // 0A47..0A48 5787 UNKNOWN, // 0A49..0A4A 5788 GURMUKHI, // 0A4B..0A4D 5789 UNKNOWN, // 0A4E..0A50 5790 GURMUKHI, // 0A51 5791 UNKNOWN, // 0A52..0A58 5792 GURMUKHI, // 0A59..0A5C 5793 UNKNOWN, // 0A5D 5794 GURMUKHI, // 0A5E 5795 UNKNOWN, // 0A5F..0A65 5796 GURMUKHI, // 0A66..0A75 5797 UNKNOWN, // 0A76..0A80 5798 GUJARATI, // 0A81..0A83 5799 UNKNOWN, // 0A84 5800 GUJARATI, // 0A85..0A8D 5801 UNKNOWN, // 0A8E 5802 GUJARATI, // 0A8F..0A91 5803 UNKNOWN, // 0A92 5804 GUJARATI, // 0A93..0AA8 5805 UNKNOWN, // 0AA9 5806 GUJARATI, // 0AAA..0AB0 5807 UNKNOWN, // 0AB1 5808 GUJARATI, // 0AB2..0AB3 5809 UNKNOWN, // 0AB4 5810 GUJARATI, // 0AB5..0AB9 5811 UNKNOWN, // 0ABA..0ABB 5812 GUJARATI, // 0ABC..0AC5 5813 UNKNOWN, // 0AC6 5814 GUJARATI, // 0AC7..0AC9 5815 UNKNOWN, // 0ACA 5816 GUJARATI, // 0ACB..0ACD 5817 UNKNOWN, // 0ACE..0ACF 5818 GUJARATI, // 0AD0 5819 UNKNOWN, // 0AD1..0ADF 5820 GUJARATI, // 0AE0..0AE3 5821 UNKNOWN, // 0AE4..0AE5 5822 GUJARATI, // 0AE6..0AF1 5823 UNKNOWN, // 0AF2..0B00 5824 ORIYA, // 0B01..0B03 5825 UNKNOWN, // 0B04 5826 ORIYA, // 0B05..0B0C 5827 UNKNOWN, // 0B0D..0B0E 5828 ORIYA, // 0B0F..0B10 5829 UNKNOWN, // 0B11..0B12 5830 ORIYA, // 0B13..0B28 5831 UNKNOWN, // 0B29 5832 ORIYA, // 0B2A..0B30 5833 UNKNOWN, // 0B31 5834 ORIYA, // 0B32..0B33 5835 UNKNOWN, // 0B34 5836 ORIYA, // 0B35..0B39 5837 UNKNOWN, // 0B3A..0B3B 5838 ORIYA, // 0B3C..0B44 5839 UNKNOWN, // 0B45..0B46 5840 ORIYA, // 0B47..0B48 5841 UNKNOWN, // 0B49..0B4A 5842 ORIYA, // 0B4B..0B4D 5843 UNKNOWN, // 0B4E..0B55 5844 ORIYA, // 0B56..0B57 5845 UNKNOWN, // 0B58..0B5B 5846 ORIYA, // 0B5C..0B5D 5847 UNKNOWN, // 0B5E 5848 ORIYA, // 0B5F..0B63 5849 UNKNOWN, // 0B64..0B65 5850 ORIYA, // 0B66..0B77 5851 UNKNOWN, // 0B78..0B81 5852 TAMIL, // 0B82..0B83 5853 UNKNOWN, // 0B84 5854 TAMIL, // 0B85..0B8A 5855 UNKNOWN, // 0B8B..0B8D 5856 TAMIL, // 0B8E..0B90 5857 UNKNOWN, // 0B91 5858 TAMIL, // 0B92..0B95 5859 UNKNOWN, // 0B96..0B98 5860 TAMIL, // 0B99..0B9A 5861 UNKNOWN, // 0B9B 5862 TAMIL, // 0B9C 5863 UNKNOWN, // 0B9D 5864 TAMIL, // 0B9E..0B9F 5865 UNKNOWN, // 0BA0..0BA2 5866 TAMIL, // 0BA3..0BA4 5867 UNKNOWN, // 0BA5..0BA7 5868 TAMIL, // 0BA8..0BAA 5869 UNKNOWN, // 0BAB..0BAD 5870 TAMIL, // 0BAE..0BB9 5871 UNKNOWN, // 0BBA..0BBD 5872 TAMIL, // 0BBE..0BC2 5873 UNKNOWN, // 0BC3..0BC5 5874 TAMIL, // 0BC6..0BC8 5875 UNKNOWN, // 0BC9 5876 TAMIL, // 0BCA..0BCD 5877 UNKNOWN, // 0BCE..0BCF 5878 TAMIL, // 0BD0 5879 UNKNOWN, // 0BD1..0BD6 5880 TAMIL, // 0BD7 5881 UNKNOWN, // 0BD8..0BE5 5882 TAMIL, // 0BE6..0BFA 5883 UNKNOWN, // 0BFB..0BFF 5884 TELUGU, // 0C00..0C03 5885 UNKNOWN, // 0C04 5886 TELUGU, // 0C05..0C0C 5887 UNKNOWN, // 0C0D 5888 TELUGU, // 0C0E..0C10 5889 UNKNOWN, // 0C11 5890 TELUGU, // 0C12..0C28 5891 UNKNOWN, // 0C29 5892 TELUGU, // 0C2A..0C39 5893 UNKNOWN, // 0C3A..0C3C 5894 TELUGU, // 0C3D..0C44 5895 UNKNOWN, // 0C45 5896 TELUGU, // 0C46..0C48 5897 UNKNOWN, // 0C49 5898 TELUGU, // 0C4A..0C4D 5899 UNKNOWN, // 0C4E..0C54 5900 TELUGU, // 0C55..0C56 5901 UNKNOWN, // 0C57 5902 TELUGU, // 0C58..0C59 5903 UNKNOWN, // 0C5A..0C5F 5904 TELUGU, // 0C60..0C63 5905 UNKNOWN, // 0C64..0C65 5906 TELUGU, // 0C66..0C6F 5907 UNKNOWN, // 0C70..0C77 5908 TELUGU, // 0C78..0C7F 5909 UNKNOWN, // 0C80 5910 KANNADA, // 0C81..0C83 5911 UNKNOWN, // 0C84 5912 KANNADA, // 0C85..0C8C 5913 UNKNOWN, // 0C8D 5914 KANNADA, // 0C8E..0C90 5915 UNKNOWN, // 0C91 5916 KANNADA, // 0C92..0CA8 5917 UNKNOWN, // 0CA9 5918 KANNADA, // 0CAA..0CB3 5919 UNKNOWN, // 0CB4 5920 KANNADA, // 0CB5..0CB9 5921 UNKNOWN, // 0CBA..0CBB 5922 KANNADA, // 0CBC..0CC4 5923 UNKNOWN, // 0CC5 5924 KANNADA, // 0CC6..0CC8 5925 UNKNOWN, // 0CC9 5926 KANNADA, // 0CCA..0CCD 5927 UNKNOWN, // 0CCE..0CD4 5928 KANNADA, // 0CD5..0CD6 5929 UNKNOWN, // 0CD7..0CDD 5930 KANNADA, // 0CDE 5931 UNKNOWN, // 0CDF 5932 KANNADA, // 0CE0..0CE3 5933 UNKNOWN, // 0CE4..0CE5 5934 KANNADA, // 0CE6..0CEF 5935 UNKNOWN, // 0CF0 5936 KANNADA, // 0CF1..0CF2 5937 UNKNOWN, // 0CF3..0D00 5938 MALAYALAM, // 0D01..0D03 5939 UNKNOWN, // 0D04 5940 MALAYALAM, // 0D05..0D0C 5941 UNKNOWN, // 0D0D 5942 MALAYALAM, // 0D0E..0D10 5943 UNKNOWN, // 0D11 5944 MALAYALAM, // 0D12..0D3A 5945 UNKNOWN, // 0D3B..0D3C 5946 MALAYALAM, // 0D3D..0D44 5947 UNKNOWN, // 0D45 5948 MALAYALAM, // 0D46..0D48 5949 UNKNOWN, // 0D49 5950 MALAYALAM, // 0D4A..0D4E 5951 UNKNOWN, // 0D4F..0D56 5952 MALAYALAM, // 0D57 5953 UNKNOWN, // 0D58..0D5F 5954 MALAYALAM, // 0D60..0D63 5955 UNKNOWN, // 0D64..0D65 5956 MALAYALAM, // 0D66..0D75 5957 UNKNOWN, // 0D76..0D78 5958 MALAYALAM, // 0D79..0D7F 5959 UNKNOWN, // 0D80..0D81 5960 SINHALA, // 0D82..0D83 5961 UNKNOWN, // 0D84 5962 SINHALA, // 0D85..0D96 5963 UNKNOWN, // 0D97..0D99 5964 SINHALA, // 0D9A..0DB1 5965 UNKNOWN, // 0DB2 5966 SINHALA, // 0DB3..0DBB 5967 UNKNOWN, // 0DBC 5968 SINHALA, // 0DBD 5969 UNKNOWN, // 0DBE..0DBF 5970 SINHALA, // 0DC0..0DC6 5971 UNKNOWN, // 0DC7..0DC9 5972 SINHALA, // 0DCA 5973 UNKNOWN, // 0DCB..0DCE 5974 SINHALA, // 0DCF..0DD4 5975 UNKNOWN, // 0DD5 5976 SINHALA, // 0DD6 5977 UNKNOWN, // 0DD7 5978 SINHALA, // 0DD8..0DDF 5979 UNKNOWN, // 0DE0..0DE5 5980 SINHALA, // 0DE6..0DEF 5981 UNKNOWN, // 0DF0..0DF1 5982 SINHALA, // 0DF2..0DF4 5983 UNKNOWN, // 0DF5..0E00 5984 THAI, // 0E01..0E3A 5985 UNKNOWN, // 0E3B..0E3E 5986 COMMON, // 0E3F 5987 THAI, // 0E40..0E5B 5988 UNKNOWN, // 0E5C..0E80 5989 LAO, // 0E81..0E82 5990 UNKNOWN, // 0E83 5991 LAO, // 0E84 5992 UNKNOWN, // 0E85..0E86 5993 LAO, // 0E87..0E88 5994 UNKNOWN, // 0E89 5995 LAO, // 0E8A 5996 UNKNOWN, // 0E8B..0E8C 5997 LAO, // 0E8D 5998 UNKNOWN, // 0E8E..0E93 5999 LAO, // 0E94..0E97 6000 UNKNOWN, // 0E98 6001 LAO, // 0E99..0E9F 6002 UNKNOWN, // 0EA0 6003 LAO, // 0EA1..0EA3 6004 UNKNOWN, // 0EA4 6005 LAO, // 0EA5 6006 UNKNOWN, // 0EA6 6007 LAO, // 0EA7 6008 UNKNOWN, // 0EA8..0EA9 6009 LAO, // 0EAA..0EAB 6010 UNKNOWN, // 0EAC 6011 LAO, // 0EAD..0EB9 6012 UNKNOWN, // 0EBA 6013 LAO, // 0EBB..0EBD 6014 UNKNOWN, // 0EBE..0EBF 6015 LAO, // 0EC0..0EC4 6016 UNKNOWN, // 0EC5 6017 LAO, // 0EC6 6018 UNKNOWN, // 0EC7 6019 LAO, // 0EC8..0ECD 6020 UNKNOWN, // 0ECE..0ECF 6021 LAO, // 0ED0..0ED9 6022 UNKNOWN, // 0EDA..0EDB 6023 LAO, // 0EDC..0EDF 6024 UNKNOWN, // 0EE0..0EFF 6025 TIBETAN, // 0F00..0F47 6026 UNKNOWN, // 0F48 6027 TIBETAN, // 0F49..0F6C 6028 UNKNOWN, // 0F6D..0F70 6029 TIBETAN, // 0F71..0F97 6030 UNKNOWN, // 0F98 6031 TIBETAN, // 0F99..0FBC 6032 UNKNOWN, // 0FBD 6033 TIBETAN, // 0FBE..0FCC 6034 UNKNOWN, // 0FCD 6035 TIBETAN, // 0FCE..0FD4 6036 COMMON, // 0FD5..0FD8 6037 TIBETAN, // 0FD9..0FDA 6038 UNKNOWN, // 0FDB..FFF 6039 MYANMAR, // 1000..109F 6040 GEORGIAN, // 10A0..10C5 6041 UNKNOWN, // 10C6 6042 GEORGIAN, // 10C7 6043 UNKNOWN, // 10C8..10CC 6044 GEORGIAN, // 10CD 6045 UNKNOWN, // 10CE..10CF 6046 GEORGIAN, // 10D0..10FA 6047 COMMON, // 10FB 6048 GEORGIAN, // 10FC..10FF 6049 HANGUL, // 1100..11FF 6050 ETHIOPIC, // 1200..1248 6051 UNKNOWN, // 1249 6052 ETHIOPIC, // 124A..124D 6053 UNKNOWN, // 124E..124F 6054 ETHIOPIC, // 1250..1256 6055 UNKNOWN, // 1257 6056 ETHIOPIC, // 1258 6057 UNKNOWN, // 1259 6058 ETHIOPIC, // 125A..125D 6059 UNKNOWN, // 125E..125F 6060 ETHIOPIC, // 1260..1288 6061 UNKNOWN, // 1289 6062 ETHIOPIC, // 128A..128D 6063 UNKNOWN, // 128E..128F 6064 ETHIOPIC, // 1290..12B0 6065 UNKNOWN, // 12B1 6066 ETHIOPIC, // 12B2..12B5 6067 UNKNOWN, // 12B6..12B7 6068 ETHIOPIC, // 12B8..12BE 6069 UNKNOWN, // 12BF 6070 ETHIOPIC, // 12C0 6071 UNKNOWN, // 12C1 6072 ETHIOPIC, // 12C2..12C5 6073 UNKNOWN, // 12C6..12C7 6074 ETHIOPIC, // 12C8..12D6 6075 UNKNOWN, // 12D7 6076 ETHIOPIC, // 12D8..1310 6077 UNKNOWN, // 1311 6078 ETHIOPIC, // 1312..1315 6079 UNKNOWN, // 1316..1317 6080 ETHIOPIC, // 1318..135A 6081 UNKNOWN, // 135B..135C 6082 ETHIOPIC, // 135D..137C 6083 UNKNOWN, // 137D..137F 6084 ETHIOPIC, // 1380..1399 6085 UNKNOWN, // 139A..139F 6086 CHEROKEE, // 13A0..13F4 6087 UNKNOWN, // 13F5..13FF 6088 CANADIAN_ABORIGINAL, // 1400..167F 6089 OGHAM, // 1680..169C 6090 UNKNOWN, // 169D..169F 6091 RUNIC, // 16A0..16EA 6092 COMMON, // 16EB..16ED 6093 RUNIC, // 16EE..16F8 6094 UNKNOWN, // 16F9..16FF 6095 TAGALOG, // 1700..170C 6096 UNKNOWN, // 170D 6097 TAGALOG, // 170E..1714 6098 UNKNOWN, // 1715..171F 6099 HANUNOO, // 1720..1734 6100 COMMON, // 1735..1736 6101 UNKNOWN, // 1737..173F 6102 BUHID, // 1740..1753 6103 UNKNOWN, // 1754..175F 6104 TAGBANWA, // 1760..176C 6105 UNKNOWN, // 176D 6106 TAGBANWA, // 176E..1770 6107 UNKNOWN, // 1771 6108 TAGBANWA, // 1772..1773 6109 UNKNOWN, // 1774..177F 6110 KHMER, // 1780..17DD 6111 UNKNOWN, // 17DE..17DF 6112 KHMER, // 17E0..17E9 6113 UNKNOWN, // 17EA..17EF 6114 KHMER, // 17F0..17F9 6115 UNKNOWN, // 17FA..17FF 6116 MONGOLIAN, // 1800..1801 6117 COMMON, // 1802..1803 6118 MONGOLIAN, // 1804 6119 COMMON, // 1805 6120 MONGOLIAN, // 1806..180E 6121 UNKNOWN, // 180F 6122 MONGOLIAN, // 1810..1819 6123 UNKNOWN, // 181A..181F 6124 MONGOLIAN, // 1820..1877 6125 UNKNOWN, // 1878..187F 6126 MONGOLIAN, // 1880..18AA 6127 UNKNOWN, // 18AB..18AF 6128 CANADIAN_ABORIGINAL, // 18B0..18F5 6129 UNKNOWN, // 18F6..18FF 6130 LIMBU, // 1900..191E 6131 UNKNOWN, // 191F 6132 LIMBU, // 1920..192B 6133 UNKNOWN, // 192C..192F 6134 LIMBU, // 1930..193B 6135 UNKNOWN, // 193C..193F 6136 LIMBU, // 1940 6137 UNKNOWN, // 1941..1943 6138 LIMBU, // 1944..194F 6139 TAI_LE, // 1950..196D 6140 UNKNOWN, // 196E..196F 6141 TAI_LE, // 1970..1974 6142 UNKNOWN, // 1975..197F 6143 NEW_TAI_LUE, // 1980..19AB 6144 UNKNOWN, // 19AC..19AF 6145 NEW_TAI_LUE, // 19B0..19C9 6146 UNKNOWN, // 19CA..19CF 6147 NEW_TAI_LUE, // 19D0..19DA 6148 UNKNOWN, // 19DB..19DD 6149 NEW_TAI_LUE, // 19DE..19DF 6150 KHMER, // 19E0..19FF 6151 BUGINESE, // 1A00..1A1B 6152 UNKNOWN, // 1A1C..1A1D 6153 BUGINESE, // 1A1E..1A1F 6154 TAI_THAM, // 1A20..1A5E 6155 UNKNOWN, // 1A5F 6156 TAI_THAM, // 1A60..1A7C 6157 UNKNOWN, // 1A7D..1A7E 6158 TAI_THAM, // 1A7F..1A89 6159 UNKNOWN, // 1A8A..1A8F 6160 TAI_THAM, // 1A90..1A99 6161 UNKNOWN, // 1A9A..1A9F 6162 TAI_THAM, // 1AA0..1AAD 6163 UNKNOWN, // 1AAE..1AAF 6164 INHERITED, // 1AB0..1ABE 6165 UNKNOWN, // 1ABF..1AFF 6166 BALINESE, // 1B00..1B4B 6167 UNKNOWN, // 1B4C..1B4F 6168 BALINESE, // 1B50..1B7C 6169 UNKNOWN, // 1B7D..1B7F 6170 SUNDANESE, // 1B80..1BBF 6171 BATAK, // 1BC0..1BF3 6172 UNKNOWN, // 1BF4..1BFB 6173 BATAK, // 1BFC..1BFF 6174 LEPCHA, // 1C00..1C37 6175 UNKNOWN, // 1C38..1C3A 6176 LEPCHA, // 1C3B..1C49 6177 UNKNOWN, // 1C4A..1C4C 6178 LEPCHA, // 1C4D..1C4F 6179 OL_CHIKI, // 1C50..1C7F 6180 UNKNOWN, // 1C80..1CBF 6181 SUNDANESE, // 1CC0..1CC7 6182 UNKNOWN, // 1CC8..1CCF 6183 INHERITED, // 1CD0..1CD2 6184 COMMON, // 1CD3 6185 INHERITED, // 1CD4..1CE0 6186 COMMON, // 1CE1 6187 INHERITED, // 1CE2..1CE8 6188 COMMON, // 1CE9..1CEC 6189 INHERITED, // 1CED 6190 COMMON, // 1CEE..1CF3 6191 INHERITED, // 1CF4 6192 COMMON, // 1CF5..1CF6 6193 UNKNOWN, // 1CF7 6194 INHERITED, // 1CF8..1CF9 6195 UNKNOWN, // 1CFA..1CFF 6196 LATIN, // 1D00..1D25 6197 GREEK, // 1D26..1D2A 6198 CYRILLIC, // 1D2B 6199 LATIN, // 1D2C..1D5C 6200 GREEK, // 1D5D..1D61 6201 LATIN, // 1D62..1D65 6202 GREEK, // 1D66..1D6A 6203 LATIN, // 1D6B..1D77 6204 CYRILLIC, // 1D78 6205 LATIN, // 1D79..1DBE 6206 GREEK, // 1DBF 6207 INHERITED, // 1DC0..1DF5 6208 UNKNOWN, // 1DF6..1DFB 6209 INHERITED, // 1DFC..1DFF 6210 LATIN, // 1E00..1EFF 6211 GREEK, // 1F00..1F15 6212 UNKNOWN, // 1F16..1F17 6213 GREEK, // 1F18..1F1D 6214 UNKNOWN, // 1F1E..1F1F 6215 GREEK, // 1F20..1F45 6216 UNKNOWN, // 1F46..1F47 6217 GREEK, // 1F48..1F4D 6218 UNKNOWN, // 1F4E..1F4F 6219 GREEK, // 1F50..1F57 6220 UNKNOWN, // 1F58 6221 GREEK, // 1F59 6222 UNKNOWN, // 1F5A 6223 GREEK, // 1F5B 6224 UNKNOWN, // 1F5C 6225 GREEK, // 1F5D 6226 UNKNOWN, // 1F5E 6227 GREEK, // 1F5F..1F7D 6228 UNKNOWN, // 1F7E..1F7F 6229 GREEK, // 1F80..1FB4 6230 UNKNOWN, // 1FB5 6231 GREEK, // 1FB6..1FC4 6232 UNKNOWN, // 1FC5 6233 GREEK, // 1FC6..1FD3 6234 UNKNOWN, // 1FD4..1FD5 6235 GREEK, // 1FD6..1FDB 6236 UNKNOWN, // 1FDC 6237 GREEK, // 1FDD..1FEF 6238 UNKNOWN, // 1FF0..1FF1 6239 GREEK, // 1FF2..1FF4 6240 UNKNOWN, // 1FF5 6241 GREEK, // 1FF6..1FFE 6242 UNKNOWN, // 1FFF 6243 COMMON, // 2000..200B 6244 INHERITED, // 200C..200D 6245 COMMON, // 200E..2064 6246 UNKNOWN, // 2065 6247 COMMON, // 2066..2070 6248 LATIN, // 2071 6249 UNKNOWN, // 2072..2073 6250 COMMON, // 2074..207E 6251 LATIN, // 207F 6252 COMMON, // 2080..208E 6253 UNKNOWN, // 208F 6254 LATIN, // 2090..209C 6255 UNKNOWN, // 209D..209F 6256 COMMON, // 20A0..20BD 6257 UNKNOWN, // 20BE..20CF 6258 INHERITED, // 20D0..20F0 6259 UNKNOWN, // 20F1..20FF 6260 COMMON, // 2100..2125 6261 GREEK, // 2126 6262 COMMON, // 2127..2129 6263 LATIN, // 212A..212B 6264 COMMON, // 212C..2131 6265 LATIN, // 2132 6266 COMMON, // 2133..214D 6267 LATIN, // 214E 6268 COMMON, // 214F..215F 6269 LATIN, // 2160..2188 6270 COMMON, // 2189 6271 UNKNOWN, // 218A..218F 6272 COMMON, // 2190..23FA 6273 UNKNOWN, // 23FB..23FF 6274 COMMON, // 2400..2426 6275 UNKNOWN, // 2427..243F 6276 COMMON, // 2440..244A 6277 UNKNOWN, // 244B..245F 6278 COMMON, // 2460..27FF 6279 BRAILLE, // 2800..28FF 6280 COMMON, // 2900..2B73 6281 UNKNOWN, // 2B74..2B75 6282 COMMON, // 2B76..2B95 6283 UNKNOWN, // 2B96..2B97 6284 COMMON, // 2B98..2BB9 6285 UNKNOWN, // 2BBA..2BBC 6286 COMMON, // 2BBD..2BC8 6287 UNKNOWN, // 2BC9 6288 COMMON, // 2BCA..2BD1 6289 UNKNOWN, // 2BD2..2BFF 6290 GLAGOLITIC, // 2C00..2C2E 6291 UNKNOWN, // 2C2F 6292 GLAGOLITIC, // 2C30..2C5E 6293 UNKNOWN, // 2C5F 6294 LATIN, // 2C60..2C7F 6295 COPTIC, // 2C80..2CF3 6296 UNKNOWN, // 2CF4..2CF8 6297 COPTIC, // 2CF9..2CFF 6298 GEORGIAN, // 2D00..2D25 6299 UNKNOWN, // 2D26 6300 GEORGIAN, // 2D27 6301 UNKNOWN, // 2D28..2D2C 6302 GEORGIAN, // 2D2D 6303 UNKNOWN, // 2D2E..2D2F 6304 TIFINAGH, // 2D30..2D67 6305 UNKNOWN, // 2D68..2D6E 6306 TIFINAGH, // 2D6F..2D70 6307 UNKNOWN, // 2D71..2D7E 6308 TIFINAGH, // 2D7F 6309 ETHIOPIC, // 2D80..2D96 6310 UNKNOWN, // 2D97..2D9F 6311 ETHIOPIC, // 2DA0..2DA6 6312 UNKNOWN, // 2DA7 6313 ETHIOPIC, // 2DA8..2DAE 6314 UNKNOWN, // 2DAF 6315 ETHIOPIC, // 2DB0..2DB6 6316 UNKNOWN, // 2DB7 6317 ETHIOPIC, // 2DB8..2DBE 6318 UNKNOWN, // 2DBF 6319 ETHIOPIC, // 2DC0..2DC6 6320 UNKNOWN, // 2DC7 6321 ETHIOPIC, // 2DC8..2DCE 6322 UNKNOWN, // 2DCF 6323 ETHIOPIC, // 2DD0..2DD6 6324 UNKNOWN, // 2DD7 6325 ETHIOPIC, // 2DD8..2DDE 6326 UNKNOWN, // 2DDF 6327 CYRILLIC, // 2DE0..2DFF 6328 COMMON, // 2E00..2E42 6329 UNKNOWN, // 2E43..2E7F 6330 HAN, // 2E80..2E99 6331 UNKNOWN, // 2E9A 6332 HAN, // 2E9B..2EF3 6333 UNKNOWN, // 2EF4..2EFF 6334 HAN, // 2F00..2FD5 6335 UNKNOWN, // 2FD6..2FEF 6336 COMMON, // 2FF0..2FFB 6337 UNKNOWN, // 2FFC..2FFF 6338 COMMON, // 3000..3004 6339 HAN, // 3005 6340 COMMON, // 3006 6341 HAN, // 3007 6342 COMMON, // 3008..3020 6343 HAN, // 3021..3029 6344 INHERITED, // 302A..302D 6345 HANGUL, // 302E..302F 6346 COMMON, // 3030..3037 6347 HAN, // 3038..303B 6348 COMMON, // 303C..303F 6349 UNKNOWN, // 3040 6350 HIRAGANA, // 3041..3096 6351 UNKNOWN, // 3097..3098 6352 INHERITED, // 3099..309A 6353 COMMON, // 309B..309C 6354 HIRAGANA, // 309D..309F 6355 COMMON, // 30A0 6356 KATAKANA, // 30A1..30FA 6357 COMMON, // 30FB..30FC 6358 KATAKANA, // 30FD..30FF 6359 UNKNOWN, // 3100..3104 6360 BOPOMOFO, // 3105..312D 6361 UNKNOWN, // 312E..3130 6362 HANGUL, // 3131..318E 6363 UNKNOWN, // 318F 6364 COMMON, // 3190..319F 6365 BOPOMOFO, // 31A0..31BA 6366 UNKNOWN, // 31BB..31BF 6367 COMMON, // 31C0..31E3 6368 UNKNOWN, // 31E4..31EF 6369 KATAKANA, // 31F0..31FF 6370 HANGUL, // 3200..321E 6371 UNKNOWN, // 321F 6372 COMMON, // 3220..325F 6373 HANGUL, // 3260..327E 6374 COMMON, // 327F..32CF 6375 KATAKANA, // 32D0..32FE 6376 UNKNOWN, // 32FF 6377 KATAKANA, // 3300..3357 6378 COMMON, // 3358..33FF 6379 HAN, // 3400..4DB5 6380 UNKNOWN, // 4DB6..4DBF 6381 COMMON, // 4DC0..4DFF 6382 HAN, // 4E00..9FCC 6383 UNKNOWN, // 9FCD..9FFF 6384 YI, // A000..A48C 6385 UNKNOWN, // A48D..A48F 6386 YI, // A490..A4C6 6387 UNKNOWN, // A4C7..A4CF 6388 LISU, // A4D0..A4FF 6389 VAI, // A500..A62B 6390 UNKNOWN, // A62C..A63F 6391 CYRILLIC, // A640..A69D 6392 UNKNOWN, // A69E 6393 CYRILLIC, // A69F 6394 BAMUM, // A6A0..A6F7 6395 UNKNOWN, // A6F8..A6FF 6396 COMMON, // A700..A721 6397 LATIN, // A722..A787 6398 COMMON, // A788..A78A 6399 LATIN, // A78B..A78E 6400 UNKNOWN, // A78F 6401 LATIN, // A790..A7AD 6402 UNKNOWN, // A7AE..A7AF 6403 LATIN, // A7B0..A7B1 6404 UNKNOWN, // A7B2..A7F6 6405 LATIN, // A7F7..A7FF 6406 SYLOTI_NAGRI, // A800..A82B 6407 UNKNOWN, // A82C..A82F 6408 COMMON, // A830..A839 6409 UNKNOWN, // A83A..A83F 6410 PHAGS_PA, // A840..A877 6411 UNKNOWN, // A878..A87F 6412 SAURASHTRA, // A880..A8C4 6413 UNKNOWN, // A8C5..A8CD 6414 SAURASHTRA, // A8CE..A8D9 6415 UNKNOWN, // A8DA..A8DF 6416 DEVANAGARI, // A8E0..A8FB 6417 UNKNOWN, // A8FC..A8FF 6418 KAYAH_LI, // A900..A92D 6419 COMMON, // A92E 6420 KAYAH_LI, // A92F 6421 REJANG, // A930..A953 6422 UNKNOWN, // A954..A95E 6423 REJANG, // A95F 6424 HANGUL, // A960..A97C 6425 UNKNOWN, // A97D..A97F 6426 JAVANESE, // A980..A9CD 6427 UNKNOWN, // A9CE 6428 COMMON, // A9CF 6429 JAVANESE, // A9D0..A9D9 6430 UNKNOWN, // A9DA..A9DD 6431 JAVANESE, // A9DE..A9DF 6432 MYANMAR, // A9E0..A9FE 6433 UNKNOWN, // A9FF 6434 CHAM, // AA00..AA36 6435 UNKNOWN, // AA37..AA3F 6436 CHAM, // AA40..AA4D 6437 UNKNOWN, // AA4E..AA4F 6438 CHAM, // AA50..AA59 6439 UNKNOWN, // AA5A..AA5B 6440 CHAM, // AA5C..AA5F 6441 MYANMAR, // AA60..AA7F 6442 TAI_VIET, // AA80..AAC2 6443 UNKNOWN, // AAC3..AADA 6444 TAI_VIET, // AADB..AADF 6445 MEETEI_MAYEK, // AAE0..AAF6 6446 UNKNOWN, // AAF7..AB00 6447 ETHIOPIC, // AB01..AB06 6448 UNKNOWN, // AB07..AB08 6449 ETHIOPIC, // AB09..AB0E 6450 UNKNOWN, // AB0F..AB10 6451 ETHIOPIC, // AB11..AB16 6452 UNKNOWN, // AB17..AB1F 6453 ETHIOPIC, // AB20..AB26 6454 UNKNOWN, // AB27 6455 ETHIOPIC, // AB28..AB2E 6456 UNKNOWN, // AB2F 6457 LATIN, // AB30..AB5A 6458 COMMON, // AB5B 6459 LATIN, // AB5C..AB5F 6460 UNKNOWN, // AB60..AB63 6461 LATIN, // AB64 6462 GREEK, // AB65 6463 UNKNOWN, // AB66..ABBF 6464 MEETEI_MAYEK, // ABC0..ABED 6465 UNKNOWN, // ABEE..ABEF 6466 MEETEI_MAYEK, // ABF0..ABF9 6467 UNKNOWN, // ABFA..ABFF 6468 HANGUL, // AC00..D7A3 6469 UNKNOWN, // D7A4..D7AF 6470 HANGUL, // D7B0..D7C6 6471 UNKNOWN, // D7C7..D7CA 6472 HANGUL, // D7CB..D7FB 6473 UNKNOWN, // D7FC..F8FF 6474 HAN, // F900..FA6D 6475 UNKNOWN, // FA6E..FA6F 6476 HAN, // FA70..FAD9 6477 UNKNOWN, // FADA..FAFF 6478 LATIN, // FB00..FB06 6479 UNKNOWN, // FB07..FB12 6480 ARMENIAN, // FB13..FB17 6481 UNKNOWN, // FB18..FB1C 6482 HEBREW, // FB1D..FB36 6483 UNKNOWN, // FB37 6484 HEBREW, // FB38..FB3C 6485 UNKNOWN, // FB3D 6486 HEBREW, // FB3E 6487 UNKNOWN, // FB3F 6488 HEBREW, // FB40..FB41 6489 UNKNOWN, // FB42 6490 HEBREW, // FB43..FB44 6491 UNKNOWN, // FB45 6492 HEBREW, // FB46..FB4F 6493 ARABIC, // FB50..FBC1 6494 UNKNOWN, // FBC2..FBD2 6495 ARABIC, // FBD3..FD3D 6496 COMMON, // FD3E..FD3F 6497 UNKNOWN, // FD40..FD4F 6498 ARABIC, // FD50..FD8F 6499 UNKNOWN, // FD90..FD91 6500 ARABIC, // FD92..FDC7 6501 UNKNOWN, // FDC8..FDEF 6502 ARABIC, // FDF0..FDFD 6503 UNKNOWN, // FDFE..FDFF 6504 INHERITED, // FE00..FE0F 6505 COMMON, // FE10..FE19 6506 UNKNOWN, // FE1A..FE1F 6507 INHERITED, // FE20..FE2D 6508 UNKNOWN, // FE2E..FE2F 6509 COMMON, // FE30..FE52 6510 UNKNOWN, // FE53 6511 COMMON, // FE54..FE66 6512 UNKNOWN, // FE67 6513 COMMON, // FE68..FE6B 6514 UNKNOWN, // FE6C..FE6F 6515 ARABIC, // FE70..FE74 6516 UNKNOWN, // FE75 6517 ARABIC, // FE76..FEFC 6518 UNKNOWN, // FEFD..FEFE 6519 COMMON, // FEFF 6520 UNKNOWN, // FF00 6521 COMMON, // FF01..FF20 6522 LATIN, // FF21..FF3A 6523 COMMON, // FF3B..FF40 6524 LATIN, // FF41..FF5A 6525 COMMON, // FF5B..FF65 6526 KATAKANA, // FF66..FF6F 6527 COMMON, // FF70 6528 KATAKANA, // FF71..FF9D 6529 COMMON, // FF9E..FF9F 6530 HANGUL, // FFA0..FFBE 6531 UNKNOWN, // FFBF..FFC1 6532 HANGUL, // FFC2..FFC7 6533 UNKNOWN, // FFC8..FFC9 6534 HANGUL, // FFCA..FFCF 6535 UNKNOWN, // FFD0..FFD1 6536 HANGUL, // FFD2..FFD7 6537 UNKNOWN, // FFD8..FFD9 6538 HANGUL, // FFDA..FFDC 6539 UNKNOWN, // FFDD..FFDF 6540 COMMON, // FFE0..FFE6 6541 UNKNOWN, // FFE7 6542 COMMON, // FFE8..FFEE 6543 UNKNOWN, // FFEF..FFF8 6544 COMMON, // FFF9..FFFD 6545 UNKNOWN, // FFFE..FFFF 6546 LINEAR_B, // 10000..1000B 6547 UNKNOWN, // 1000C 6548 LINEAR_B, // 1000D..10026 6549 UNKNOWN, // 10027 6550 LINEAR_B, // 10028..1003A 6551 UNKNOWN, // 1003B 6552 LINEAR_B, // 1003C..1003D 6553 UNKNOWN, // 1003E 6554 LINEAR_B, // 1003F..1004D 6555 UNKNOWN, // 1004E..1004F 6556 LINEAR_B, // 10050..1005D 6557 UNKNOWN, // 1005E..1007F 6558 LINEAR_B, // 10080..100FA 6559 UNKNOWN, // 100FB..100FF 6560 COMMON, // 10100..10102 6561 UNKNOWN, // 10103..10106 6562 COMMON, // 10107..10133 6563 UNKNOWN, // 10134..10136 6564 COMMON, // 10137..1013F 6565 GREEK, // 10140..1018C 6566 UNKNOWN, // 1018D..1018F 6567 COMMON, // 10190..1019B 6568 UNKNOWN, // 1019C..1019F 6569 GREEK, // 101A0 6570 UNKNOWN, // 101A1..101CF 6571 COMMON, // 101D0..101FC 6572 INHERITED, // 101FD 6573 UNKNOWN, // 101FE..1027F 6574 LYCIAN, // 10280..1029C 6575 UNKNOWN, // 1029D..1029F 6576 CARIAN, // 102A0..102D0 6577 UNKNOWN, // 102D1..102DF 6578 INHERITED, // 102E0 6579 COMMON, // 102E1..102FB 6580 UNKNOWN, // 102FC..102FF 6581 OLD_ITALIC, // 10300..10323 6582 UNKNOWN, // 10324..1032F 6583 GOTHIC, // 10330..1034A 6584 UNKNOWN, // 1034B..1034F 6585 OLD_PERMIC, // 10350..1037A 6586 UNKNOWN, // 1037B..1037F 6587 UGARITIC, // 10380..1039D 6588 UNKNOWN, // 1039E 6589 UGARITIC, // 1039F 6590 OLD_PERSIAN, // 103A0..103C3 6591 UNKNOWN, // 103C4..103C7 6592 OLD_PERSIAN, // 103C8..103D5 6593 UNKNOWN, // 103D6..103FF 6594 DESERET, // 10400..1044F 6595 SHAVIAN, // 10450..1047F 6596 OSMANYA, // 10480..1049D 6597 UNKNOWN, // 1049E..1049F 6598 OSMANYA, // 104A0..104A9 6599 UNKNOWN, // 104AA..104FF 6600 ELBASAN, // 10500..10527 6601 UNKNOWN, // 10528..1052F 6602 CAUCASIAN_ALBANIAN, // 10530..10563 6603 UNKNOWN, // 10564..1056E 6604 CAUCASIAN_ALBANIAN, // 1056F 6605 UNKNOWN, // 10570..105FF 6606 LINEAR_A, // 10600..10736 6607 UNKNOWN, // 10737..1073F 6608 LINEAR_A, // 10740..10755 6609 UNKNOWN, // 10756..1075F 6610 LINEAR_A, // 10760..10767 6611 UNKNOWN, // 10768..107FF 6612 CYPRIOT, // 10800..10805 6613 UNKNOWN, // 10806..10807 6614 CYPRIOT, // 10808 6615 UNKNOWN, // 10809 6616 CYPRIOT, // 1080A..10835 6617 UNKNOWN, // 10836 6618 CYPRIOT, // 10837..10838 6619 UNKNOWN, // 10839..1083B 6620 CYPRIOT, // 1083C 6621 UNKNOWN, // 1083D..1083E 6622 CYPRIOT, // 1083F 6623 IMPERIAL_ARAMAIC, // 10840..10855 6624 UNKNOWN, // 10856 6625 IMPERIAL_ARAMAIC, // 10857..1085F 6626 PALMYRENE, // 10860..1087F 6627 NABATAEAN, // 10880..1089E 6628 UNKNOWN, // 1089F..108A6 6629 NABATAEAN, // 108A7..108AF 6630 UNKNOWN, // 108B0..108FF 6631 PHOENICIAN, // 10900..1091B 6632 UNKNOWN, // 1091C..1091E 6633 PHOENICIAN, // 1091F 6634 LYDIAN, // 10920..10939 6635 UNKNOWN, // 1093A..1093E 6636 LYDIAN, // 1093F 6637 UNKNOWN, // 10940..1097F 6638 MEROITIC_HIEROGLYPHS, // 10980..1099F 6639 MEROITIC_CURSIVE, // 109A0..109B7 6640 UNKNOWN, // 109B8..109BD 6641 MEROITIC_CURSIVE, // 109BE..109BF 6642 UNKNOWN, // 109C0..109FF 6643 KHAROSHTHI, // 10A00..10A03 6644 UNKNOWN, // 10A04 6645 KHAROSHTHI, // 10A05..10A06 6646 UNKNOWN, // 10A07..10A0B 6647 KHAROSHTHI, // 10A0C..10A13 6648 UNKNOWN, // 10A14 6649 KHAROSHTHI, // 10A15..10A17 6650 UNKNOWN, // 10A18 6651 KHAROSHTHI, // 10A19..10A33 6652 UNKNOWN, // 10A34..10A37 6653 KHAROSHTHI, // 10A38..10A3A 6654 UNKNOWN, // 10A3B..10A3E 6655 KHAROSHTHI, // 10A3F..10A47 6656 UNKNOWN, // 10A48..10A4F 6657 KHAROSHTHI, // 10A50..10A58 6658 UNKNOWN, // 10A59..10A5F 6659 OLD_SOUTH_ARABIAN, // 10A60..10A7F 6660 OLD_NORTH_ARABIAN, // 10A80..10A9F 6661 UNKNOWN, // 10AA0..10ABF 6662 MANICHAEAN, // 10AC0..10AE6 6663 UNKNOWN, // 10AE7..10AEA 6664 MANICHAEAN, // 10AEB..10AF6 6665 UNKNOWN, // 10AF7..10AFF 6666 AVESTAN, // 10B00..10B35 6667 UNKNOWN, // 10B36..10B38 6668 AVESTAN, // 10B39..10B3F 6669 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 6670 UNKNOWN, // 10B56..10B57 6671 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 6672 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 6673 UNKNOWN, // 10B73..10B77 6674 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 6675 PSALTER_PAHLAVI, // 10B80..10B91 6676 UNKNOWN, // 10B92..10B98 6677 PSALTER_PAHLAVI, // 10B99..10B9C 6678 UNKNOWN, // 10B9D..10BA8 6679 PSALTER_PAHLAVI, // 10BA9..10BAF 6680 UNKNOWN, // 10BB0..10BFF 6681 OLD_TURKIC, // 10C00..10C48 6682 UNKNOWN, // 10C49..10E5F 6683 ARABIC, // 10E60..10E7E 6684 UNKNOWN, // 10E7F..10FFF 6685 BRAHMI, // 11000..1104D 6686 UNKNOWN, // 1104E..11051 6687 BRAHMI, // 11052..1106F 6688 UNKNOWN, // 11070..1107E 6689 BRAHMI, // 1107F 6690 KAITHI, // 11080..110C1 6691 UNKNOWN, // 110C2..110CF 6692 SORA_SOMPENG, // 110D0..110E8 6693 UNKNOWN, // 110E9..110EF 6694 SORA_SOMPENG, // 110F0..110F9 6695 UNKNOWN, // 110FA..110FF 6696 CHAKMA, // 11100..11134 6697 UNKNOWN, // 11135 6698 CHAKMA, // 11136..11143 6699 UNKNOWN, // 11144..1114F 6700 MAHAJANI, // 11150..11176 6701 UNKNOWN, // 11177..1117F 6702 SHARADA, // 11180..111C8 6703 UNKNOWN, // 111C9..111CC 6704 SHARADA, // 111CD 6705 UNKNOWN, // 111CE..111CF 6706 SHARADA, // 111D0..111DA 6707 UNKNOWN, // 111DB..111E0 6708 SINHALA, // 111E1..111F4 6709 UNKNOWN, // 111F5..111FF 6710 KHOJKI, // 11200..11211 6711 UNKNOWN, // 11212 6712 KHOJKI, // 11213..1123D 6713 UNKNOWN, // 1123E..112AF 6714 KHUDAWADI, // 112B0..112EA 6715 UNKNOWN, // 112EB..112EF 6716 KHUDAWADI, // 112F0..112F9 6717 UNKNOWN, // 112FA..11300 6718 GRANTHA, // 11301..11303 6719 UNKNOWN, // 11304 6720 GRANTHA, // 11305..1130C 6721 UNKNOWN, // 1130D..1130E 6722 GRANTHA, // 1130F..11310 6723 UNKNOWN, // 11311..11312 6724 GRANTHA, // 11313..11328 6725 UNKNOWN, // 11329 6726 GRANTHA, // 1132A..11330 6727 UNKNOWN, // 11331 6728 GRANTHA, // 11332..11333 6729 UNKNOWN, // 11334 6730 GRANTHA, // 11335..11339 6731 UNKNOWN, // 1133A..1133B 6732 GRANTHA, // 1133C..11344 6733 UNKNOWN, // 11345..11346 6734 GRANTHA, // 11347..11348 6735 UNKNOWN, // 11349..1134A 6736 GRANTHA, // 1134B..1134D 6737 UNKNOWN, // 1134E..11356 6738 GRANTHA, // 11357 6739 UNKNOWN, // 11358..1135C 6740 GRANTHA, // 1135D..11363 6741 UNKNOWN, // 11364..11365 6742 GRANTHA, // 11366..1136C 6743 UNKNOWN, // 1136D..1136F 6744 GRANTHA, // 11370..11374 6745 UNKNOWN, // 11375..1147F 6746 TIRHUTA, // 11480..114C7 6747 UNKNOWN, // 114C8..114CF 6748 TIRHUTA, // 114D0..114D9 6749 UNKNOWN, // 114DA..1157F 6750 SIDDHAM, // 11580..115B5 6751 UNKNOWN, // 115B6..115B7 6752 SIDDHAM, // 115B8..115C9 6753 UNKNOWN, // 115CA..115FF 6754 MODI, // 11600..11644 6755 UNKNOWN, // 11645..1164F 6756 MODI, // 11650..11659 6757 UNKNOWN, // 1165A..1167F 6758 TAKRI, // 11680..116B7 6759 UNKNOWN, // 116B8..116BF 6760 TAKRI, // 116C0..116C9 6761 UNKNOWN, // 116CA..1189F 6762 WARANG_CITI, // 118A0..118F2 6763 UNKNOWN, // 118F3..118FE 6764 WARANG_CITI, // 118FF 6765 UNKNOWN, // 11900..11ABF 6766 PAU_CIN_HAU, // 11AC0..11AF8 6767 UNKNOWN, // 11AF9..11FFF 6768 CUNEIFORM, // 12000..12398 6769 UNKNOWN, // 12399..123FF 6770 CUNEIFORM, // 12400..1246E 6771 UNKNOWN, // 1246F 6772 CUNEIFORM, // 12470..12474 6773 UNKNOWN, // 12475..12FFF 6774 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 6775 UNKNOWN, // 1342F..167FF 6776 BAMUM, // 16800..16A38 6777 UNKNOWN, // 16A39..16A3F 6778 MRO, // 16A40..16A5E 6779 UNKNOWN, // 16A5F 6780 MRO, // 16A60..16A69 6781 UNKNOWN, // 16A6A..16A6D 6782 MRO, // 16A6E..16A6F 6783 UNKNOWN, // 16A70..16ACF 6784 BASSA_VAH, // 16AD0..16AED 6785 UNKNOWN, // 16AEE..16AEF 6786 BASSA_VAH, // 16AF0..16AF5 6787 UNKNOWN, // 16AF6..16AFF 6788 PAHAWH_HMONG, // 16B00..16B45 6789 UNKNOWN, // 16B46..16B4F 6790 PAHAWH_HMONG, // 16B50..16B59 6791 UNKNOWN, // 16B5A 6792 PAHAWH_HMONG, // 16B5B..16B61 6793 UNKNOWN, // 16B62 6794 PAHAWH_HMONG, // 16B63..16B77 6795 UNKNOWN, // 16B78..16B7C 6796 PAHAWH_HMONG, // 16B7D..16B8F 6797 UNKNOWN, // 16B90..16EFF 6798 MIAO, // 16F00..16F44 6799 UNKNOWN, // 16F45..16F4F 6800 MIAO, // 16F50..16F7E 6801 UNKNOWN, // 16F7F..16F8E 6802 MIAO, // 16F8F..16F9F 6803 UNKNOWN, // 16FA0..1AFFF 6804 KATAKANA, // 1B000 6805 HIRAGANA, // 1B001 6806 UNKNOWN, // 1B002..1BBFF 6807 DUPLOYAN, // 1BC00..1BC6A 6808 UNKNOWN, // 1BC6B..1BC6F 6809 DUPLOYAN, // 1BC70..1BC7C 6810 UNKNOWN, // 1BC7D..1BC7F 6811 DUPLOYAN, // 1BC80..1BC88 6812 UNKNOWN, // 1BC89..1BC8F 6813 DUPLOYAN, // 1BC90..1BC99 6814 UNKNOWN, // 1BC9A..1BC9B 6815 DUPLOYAN, // 1BC9C..1BC9F 6816 COMMON, // 1BCA0..1BCA3 6817 UNKNOWN, // 1BCA4..1CFFF 6818 COMMON, // 1D000..1D0F5 6819 UNKNOWN, // 1D0F6..1D0FF 6820 COMMON, // 1D100..1D126 6821 UNKNOWN, // 1D127..1D128 6822 COMMON, // 1D129..1D166 6823 INHERITED, // 1D167..1D169 6824 COMMON, // 1D16A..1D17A 6825 INHERITED, // 1D17B..1D182 6826 COMMON, // 1D183..1D184 6827 INHERITED, // 1D185..1D18B 6828 COMMON, // 1D18C..1D1A9 6829 INHERITED, // 1D1AA..1D1AD 6830 COMMON, // 1D1AE..1D1DD 6831 UNKNOWN, // 1D1DE..1D1FF 6832 GREEK, // 1D200..1D245 6833 UNKNOWN, // 1D246..1D2FF 6834 COMMON, // 1D300..1D356 6835 UNKNOWN, // 1D357..1D35F 6836 COMMON, // 1D360..1D371 6837 UNKNOWN, // 1D372..1D3FF 6838 COMMON, // 1D400..1D454 6839 UNKNOWN, // 1D455 6840 COMMON, // 1D456..1D49C 6841 UNKNOWN, // 1D49D 6842 COMMON, // 1D49E..1D49F 6843 UNKNOWN, // 1D4A0..1D4A1 6844 COMMON, // 1D4A2 6845 UNKNOWN, // 1D4A3..1D4A4 6846 COMMON, // 1D4A5..1D4A6 6847 UNKNOWN, // 1D4A7..1D4A8 6848 COMMON, // 1D4A9..1D4AC 6849 UNKNOWN, // 1D4AD 6850 COMMON, // 1D4AE..1D4B9 6851 UNKNOWN, // 1D4BA 6852 COMMON, // 1D4BB 6853 UNKNOWN, // 1D4BC 6854 COMMON, // 1D4BD..1D4C3 6855 UNKNOWN, // 1D4C4 6856 COMMON, // 1D4C5..1D505 6857 UNKNOWN, // 1D506 6858 COMMON, // 1D507..1D50A 6859 UNKNOWN, // 1D50B..1D50C 6860 COMMON, // 1D50D..1D514 6861 UNKNOWN, // 1D515 6862 COMMON, // 1D516..1D51C 6863 UNKNOWN, // 1D51D 6864 COMMON, // 1D51E..1D539 6865 UNKNOWN, // 1D53A 6866 COMMON, // 1D53B..1D53E 6867 UNKNOWN, // 1D53F 6868 COMMON, // 1D540..1D544 6869 UNKNOWN, // 1D545 6870 COMMON, // 1D546 6871 UNKNOWN, // 1D547..1D549 6872 COMMON, // 1D54A..1D550 6873 UNKNOWN, // 1D551 6874 COMMON, // 1D552..1D6A5 6875 UNKNOWN, // 1D6A6..1D6A7 6876 COMMON, // 1D6A8..1D7CB 6877 UNKNOWN, // 1D7CC..1D7CD 6878 COMMON, // 1D7CE..1D7FF 6879 UNKNOWN, // 1D800..1E7FF 6880 MENDE_KIKAKUI, // 1E800..1E8C4 6881 UNKNOWN, // 1E8C5..1E8C6 6882 MENDE_KIKAKUI, // 1E8C7..1E8D6 6883 UNKNOWN, // 1E8D7..1EDFF 6884 ARABIC, // 1EE00..1EE03 6885 UNKNOWN, // 1EE04 6886 ARABIC, // 1EE05..1EE1F 6887 UNKNOWN, // 1EE20 6888 ARABIC, // 1EE21..1EE22 6889 UNKNOWN, // 1EE23 6890 ARABIC, // 1EE24 6891 UNKNOWN, // 1EE25..1EE26 6892 ARABIC, // 1EE27 6893 UNKNOWN, // 1EE28 6894 ARABIC, // 1EE29..1EE32 6895 UNKNOWN, // 1EE33 6896 ARABIC, // 1EE34..1EE37 6897 UNKNOWN, // 1EE38 6898 ARABIC, // 1EE39 6899 UNKNOWN, // 1EE3A 6900 ARABIC, // 1EE3B 6901 UNKNOWN, // 1EE3C..1EE41 6902 ARABIC, // 1EE42 6903 UNKNOWN, // 1EE43..1EE46 6904 ARABIC, // 1EE47 6905 UNKNOWN, // 1EE48 6906 ARABIC, // 1EE49 6907 UNKNOWN, // 1EE4A 6908 ARABIC, // 1EE4B 6909 UNKNOWN, // 1EE4C 6910 ARABIC, // 1EE4D..1EE4F 6911 UNKNOWN, // 1EE50 6912 ARABIC, // 1EE51..1EE52 6913 UNKNOWN, // 1EE53 6914 ARABIC, // 1EE54 6915 UNKNOWN, // 1EE55..1EE56 6916 ARABIC, // 1EE57 6917 UNKNOWN, // 1EE58 6918 ARABIC, // 1EE59 6919 UNKNOWN, // 1EE5A 6920 ARABIC, // 1EE5B 6921 UNKNOWN, // 1EE5C 6922 ARABIC, // 1EE5D 6923 UNKNOWN, // 1EE5E 6924 ARABIC, // 1EE5F 6925 UNKNOWN, // 1EE60 6926 ARABIC, // 1EE61..1EE62 6927 UNKNOWN, // 1EE63 6928 ARABIC, // 1EE64 6929 UNKNOWN, // 1EE65..1EE66 6930 ARABIC, // 1EE67..1EE6A 6931 UNKNOWN, // 1EE6B 6932 ARABIC, // 1EE6C..1EE72 6933 UNKNOWN, // 1EE73 6934 ARABIC, // 1EE74..1EE77 6935 UNKNOWN, // 1EE78 6936 ARABIC, // 1EE79..1EE7C 6937 UNKNOWN, // 1EE7D 6938 ARABIC, // 1EE7E 6939 UNKNOWN, // 1EE7F 6940 ARABIC, // 1EE80..1EE89 6941 UNKNOWN, // 1EE8A 6942 ARABIC, // 1EE8B..1EE9B 6943 UNKNOWN, // 1EE9C..1EEA0 6944 ARABIC, // 1EEA1..1EEA3 6945 UNKNOWN, // 1EEA4 6946 ARABIC, // 1EEA5..1EEA9 6947 UNKNOWN, // 1EEAA 6948 ARABIC, // 1EEAB..1EEBB 6949 UNKNOWN, // 1EEBC..1EEEF 6950 ARABIC, // 1EEF0..1EEF1 6951 UNKNOWN, // 1EEF2..1EFFF 6952 COMMON, // 1F000..1F02B 6953 UNKNOWN, // 1F02C..1F02F 6954 COMMON, // 1F030..1F093 6955 UNKNOWN, // 1F094..1F09F 6956 COMMON, // 1F0A0..1F0AE 6957 UNKNOWN, // 1F0AF..1F0B0 6958 COMMON, // 1F0B1..1F0BF 6959 UNKNOWN, // 1F0C0 6960 COMMON, // 1F0C1..1F0CF 6961 UNKNOWN, // 1F0D0 6962 COMMON, // 1F0D1..1F0F5 6963 UNKNOWN, // 1F0F6..1F0FF 6964 COMMON, // 1F100..1F10C 6965 UNKNOWN, // 1F10D..1F10F 6966 COMMON, // 1F110..1F12E 6967 UNKNOWN, // 1F12F 6968 COMMON, // 1F130..1F16B 6969 UNKNOWN, // 1F16C..1F16F 6970 COMMON, // 1F170..1F19A 6971 UNKNOWN, // 1F19B..1F1E5 6972 COMMON, // 1F1E6..1F1FF 6973 HIRAGANA, // 1F200 6974 COMMON, // 1F201..1F202 6975 UNKNOWN, // 1F203..1F20F 6976 COMMON, // 1F210..1F23A 6977 UNKNOWN, // 1F23B..1F23F 6978 COMMON, // 1F240..1F248 6979 UNKNOWN, // 1F249..1F24F 6980 COMMON, // 1F250..1F251 6981 UNKNOWN, // 1F252..1F2FF 6982 COMMON, // 1F300..1F32C 6983 UNKNOWN, // 1F32D..1F32F 6984 COMMON, // 1F330..1F37D 6985 UNKNOWN, // 1F37E..1F37F 6986 COMMON, // 1F380..1F3CE 6987 UNKNOWN, // 1F3CF..1F3D3 6988 COMMON, // 1F3D4..1F3F7 6989 UNKNOWN, // 1F3F8..1F3FF 6990 COMMON, // 1F400..1F4FE 6991 UNKNOWN, // 1F4FF 6992 COMMON, // 1F500..1F54A 6993 UNKNOWN, // 1F54B..1F54F 6994 COMMON, // 1F550..1F579 6995 UNKNOWN, // 1F57A 6996 COMMON, // 1F57B..1F5A3 6997 UNKNOWN, // 1F5A4 6998 COMMON, // 1F5A5..1F642 6999 UNKNOWN, // 1F643..1F644 7000 COMMON, // 1F645..1F6CF 7001 UNKNOWN, // 1F6D0..1F6DF 7002 COMMON, // 1F6E0..1F6EC 7003 UNKNOWN, // 1F6ED..1F6EF 7004 COMMON, // 1F6F0..1F6F3 7005 UNKNOWN, // 1F6F4..1F6FF 7006 COMMON, // 1F700..1F773 7007 UNKNOWN, // 1F774..1F77F 7008 COMMON, // 1F780..1F7D4 7009 UNKNOWN, // 1F7D5..1F7FF 7010 COMMON, // 1F800..1F80B 7011 UNKNOWN, // 1F80C..1F80F 7012 COMMON, // 1F810..1F847 7013 UNKNOWN, // 1F848..1F84F 7014 COMMON, // 1F850..1F859 7015 UNKNOWN, // 1F85A..1F85F 7016 COMMON, // 1F860..1F887 7017 UNKNOWN, // 1F888..1F88F 7018 COMMON, // 1F890..1F8AD 7019 UNKNOWN, // 1F8AE..1FFFF 7020 HAN, // 20000..2A6D6 7021 UNKNOWN, // 2A6D7..2A6FF 7022 HAN, // 2A700..2B734 7023 UNKNOWN, // 2B735..2B73F 7024 HAN, // 2B740..2B81D 7025 UNKNOWN, // 2B81E..2F7FF 7026 HAN, // 2F800..2FA1D 7027 UNKNOWN, // 2FA1E..E0000 7028 COMMON, // E0001 7029 UNKNOWN, // E0002..E001F 7030 COMMON, // E0020..E007F 7031 UNKNOWN, // E0080..E00FF 7032 INHERITED, // E0100..E01EF 7033 UNKNOWN // E01F0..10FFFF 7034 }; 7035 7036 private static HashMap<String, Character.UnicodeScript> aliases; 7037 static { 7038 aliases = new HashMap<>(128); 7039 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 7040 aliases.put("ARAB", ARABIC); 7041 aliases.put("ARMI", IMPERIAL_ARAMAIC); 7042 aliases.put("ARMN", ARMENIAN); 7043 aliases.put("AVST", AVESTAN); 7044 aliases.put("BALI", BALINESE); 7045 aliases.put("BAMU", BAMUM); 7046 aliases.put("BASS", BASSA_VAH); 7047 aliases.put("BATK", BATAK); 7048 aliases.put("BENG", BENGALI); 7049 aliases.put("BOPO", BOPOMOFO); 7050 aliases.put("BRAH", BRAHMI); 7051 aliases.put("BRAI", BRAILLE); 7052 aliases.put("BUGI", BUGINESE); 7053 aliases.put("BUHD", BUHID); 7054 aliases.put("CAKM", CHAKMA); 7055 aliases.put("CANS", CANADIAN_ABORIGINAL); 7056 aliases.put("CARI", CARIAN); 7057 aliases.put("CHAM", CHAM); 7058 aliases.put("CHER", CHEROKEE); 7059 aliases.put("COPT", COPTIC); 7060 aliases.put("CPRT", CYPRIOT); 7061 aliases.put("CYRL", CYRILLIC); 7062 aliases.put("DEVA", DEVANAGARI); 7063 aliases.put("DSRT", DESERET); 7064 aliases.put("DUPL", DUPLOYAN); 7065 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 7066 aliases.put("ELBA", ELBASAN); 7067 aliases.put("ETHI", ETHIOPIC); 7068 aliases.put("GEOR", GEORGIAN); 7069 aliases.put("GLAG", GLAGOLITIC); 7070 aliases.put("GOTH", GOTHIC); 7071 aliases.put("GRAN", GRANTHA); 7072 aliases.put("GREK", GREEK); 7073 aliases.put("GUJR", GUJARATI); 7074 aliases.put("GURU", GURMUKHI); 7075 aliases.put("HANG", HANGUL); 7076 aliases.put("HANI", HAN); 7077 aliases.put("HANO", HANUNOO); 7078 aliases.put("HEBR", HEBREW); 7079 aliases.put("HIRA", HIRAGANA); 7080 aliases.put("HMNG", PAHAWH_HMONG); 7081 // it appears we don't have the KATAKANA_OR_HIRAGANA 7082 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 7083 aliases.put("ITAL", OLD_ITALIC); 7084 aliases.put("JAVA", JAVANESE); 7085 aliases.put("KALI", KAYAH_LI); 7086 aliases.put("KANA", KATAKANA); 7087 aliases.put("KHAR", KHAROSHTHI); 7088 aliases.put("KHMR", KHMER); 7089 aliases.put("KHOJ", KHOJKI); 7090 aliases.put("KNDA", KANNADA); 7091 aliases.put("KTHI", KAITHI); 7092 aliases.put("LANA", TAI_THAM); 7093 aliases.put("LAOO", LAO); 7094 aliases.put("LATN", LATIN); 7095 aliases.put("LEPC", LEPCHA); 7096 aliases.put("LIMB", LIMBU); 7097 aliases.put("LINA", LINEAR_A); 7098 aliases.put("LINB", LINEAR_B); 7099 aliases.put("LISU", LISU); 7100 aliases.put("LYCI", LYCIAN); 7101 aliases.put("LYDI", LYDIAN); 7102 aliases.put("MAHJ", MAHAJANI); 7103 aliases.put("MAND", MANDAIC); 7104 aliases.put("MANI", MANICHAEAN); 7105 aliases.put("MEND", MENDE_KIKAKUI); 7106 aliases.put("MERC", MEROITIC_CURSIVE); 7107 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 7108 aliases.put("MLYM", MALAYALAM); 7109 aliases.put("MODI", MODI); 7110 aliases.put("MONG", MONGOLIAN); 7111 aliases.put("MROO", MRO); 7112 aliases.put("MTEI", MEETEI_MAYEK); 7113 aliases.put("MYMR", MYANMAR); 7114 aliases.put("NARB", OLD_NORTH_ARABIAN); 7115 aliases.put("NBAT", NABATAEAN); 7116 aliases.put("NKOO", NKO); 7117 aliases.put("OGAM", OGHAM); 7118 aliases.put("OLCK", OL_CHIKI); 7119 aliases.put("ORKH", OLD_TURKIC); 7120 aliases.put("ORYA", ORIYA); 7121 aliases.put("OSMA", OSMANYA); 7122 aliases.put("PALM", PALMYRENE); 7123 aliases.put("PAUC", PAU_CIN_HAU); 7124 aliases.put("PERM", OLD_PERMIC); 7125 aliases.put("PHAG", PHAGS_PA); 7126 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 7127 aliases.put("PHLP", PSALTER_PAHLAVI); 7128 aliases.put("PHNX", PHOENICIAN); 7129 aliases.put("PLRD", MIAO); 7130 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 7131 aliases.put("RJNG", REJANG); 7132 aliases.put("RUNR", RUNIC); 7133 aliases.put("SAMR", SAMARITAN); 7134 aliases.put("SARB", OLD_SOUTH_ARABIAN); 7135 aliases.put("SAUR", SAURASHTRA); 7136 aliases.put("SHAW", SHAVIAN); 7137 aliases.put("SHRD", SHARADA); 7138 aliases.put("SIDD", SIDDHAM); 7139 aliases.put("SIND", KHUDAWADI); 7140 aliases.put("SINH", SINHALA); 7141 aliases.put("SORA", SORA_SOMPENG); 7142 aliases.put("SUND", SUNDANESE); 7143 aliases.put("SYLO", SYLOTI_NAGRI); 7144 aliases.put("SYRC", SYRIAC); 7145 aliases.put("TAGB", TAGBANWA); 7146 aliases.put("TAKR", TAKRI); 7147 aliases.put("TALE", TAI_LE); 7148 aliases.put("TALU", NEW_TAI_LUE); 7149 aliases.put("TAML", TAMIL); 7150 aliases.put("TAVT", TAI_VIET); 7151 aliases.put("TELU", TELUGU); 7152 aliases.put("TFNG", TIFINAGH); 7153 aliases.put("TGLG", TAGALOG); 7154 aliases.put("THAA", THAANA); 7155 aliases.put("THAI", THAI); 7156 aliases.put("TIBT", TIBETAN); 7157 aliases.put("TIRH", TIRHUTA); 7158 aliases.put("UGAR", UGARITIC); 7159 aliases.put("VAII", VAI); 7160 aliases.put("WARA", WARANG_CITI); 7161 aliases.put("XPEO", OLD_PERSIAN); 7162 aliases.put("XSUX", CUNEIFORM); 7163 aliases.put("YIII", YI); 7164 aliases.put("ZINH", INHERITED); 7165 aliases.put("ZYYY", COMMON); 7166 aliases.put("ZZZZ", UNKNOWN); 7167 } 7168 7169 /** 7170 * Returns the enum constant representing the Unicode script of which 7171 * the given character (Unicode code point) is assigned to. 7172 * 7173 * @param codePoint the character (Unicode code point) in question. 7174 * @return The {@code UnicodeScript} constant representing the 7175 * Unicode script of which this character is assigned to. 7176 * 7177 * @exception IllegalArgumentException if the specified 7178 * {@code codePoint} is an invalid Unicode code point. 7179 * @see Character#isValidCodePoint(int) 7180 * 7181 */ 7182 public static UnicodeScript of(int codePoint) { 7183 if (!isValidCodePoint(codePoint)) 7184 throw new IllegalArgumentException(); 7185 int type = getType(codePoint); 7186 // leave SURROGATE and PRIVATE_USE for table lookup 7187 if (type == UNASSIGNED) 7188 return UNKNOWN; 7189 int index = Arrays.binarySearch(scriptStarts, codePoint); 7190 if (index < 0) 7191 index = -index - 2; 7192 return scripts[index]; 7193 } 7194 7195 /** 7196 * Returns the UnicodeScript constant with the given Unicode script 7197 * name or the script name alias. Script names and their aliases are 7198 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 7199 * and {@code PropertyValueAliases<version>.txt} define script names 7200 * and the script name aliases for a particular version of the 7201 * standard. The {@link Character} class specifies the version of 7202 * the standard that it supports. 7203 * <p> 7204 * Character case is ignored for all of the valid script names. 7205 * The en_US locale's case mapping rules are used to provide 7206 * case-insensitive string comparisons for script name validation. 7207 * 7208 * @param scriptName A {@code UnicodeScript} name. 7209 * @return The {@code UnicodeScript} constant identified 7210 * by {@code scriptName} 7211 * @throws IllegalArgumentException if {@code scriptName} is an 7212 * invalid name 7213 * @throws NullPointerException if {@code scriptName} is null 7214 */ 7215 public static final UnicodeScript forName(String scriptName) { 7216 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 7217 //.replace(' ', '_')); 7218 UnicodeScript sc = aliases.get(scriptName); 7219 if (sc != null) 7220 return sc; 7221 return valueOf(scriptName); 7222 } 7223 } 7224 7225 /** 7226 * The value of the {@code Character}. 7227 * 7228 * @serial 7229 */ 7230 private final char value; 7231 7232 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 7233 private static final long serialVersionUID = 3786198910865385080L; 7234 7235 /** 7236 * Constructs a newly allocated {@code Character} object that 7237 * represents the specified {@code char} value. 7238 * 7239 * @param value the value to be represented by the 7240 * {@code Character} object. 7241 */ 7242 public Character(char value) { 7243 this.value = value; 7244 } 7245 7246 private static class CharacterCache { 7247 private CharacterCache(){} 7248 7249 static final Character cache[] = new Character[127 + 1]; 7250 7251 static { 7252 for (int i = 0; i < cache.length; i++) 7253 cache[i] = new Character((char)i); 7254 } 7255 } 7256 7257 /** 7258 * Returns a {@code Character} instance representing the specified 7259 * {@code char} value. 7260 * If a new {@code Character} instance is not required, this method 7261 * should generally be used in preference to the constructor 7262 * {@link #Character(char)}, as this method is likely to yield 7263 * significantly better space and time performance by caching 7264 * frequently requested values. 7265 * 7266 * This method will always cache values in the range {@code 7267 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 7268 * cache other values outside of this range. 7269 * 7270 * @param c a char value. 7271 * @return a {@code Character} instance representing {@code c}. 7272 * @since 1.5 7273 */ 7274 @HotSpotIntrinsicCandidate 7275 public static Character valueOf(char c) { 7276 if (c <= 127) { // must cache 7277 return CharacterCache.cache[(int)c]; 7278 } 7279 return new Character(c); 7280 } 7281 7282 /** 7283 * Returns the value of this {@code Character} object. 7284 * @return the primitive {@code char} value represented by 7285 * this object. 7286 */ 7287 @HotSpotIntrinsicCandidate 7288 public char charValue() { 7289 return value; 7290 } 7291 7292 /** 7293 * Returns a hash code for this {@code Character}; equal to the result 7294 * of invoking {@code charValue()}. 7295 * 7296 * @return a hash code value for this {@code Character} 7297 */ 7298 @Override 7299 public int hashCode() { 7300 return Character.hashCode(value); 7301 } 7302 7303 /** 7304 * Returns a hash code for a {@code char} value; compatible with 7305 * {@code Character.hashCode()}. 7306 * 7307 * @since 1.8 7308 * 7309 * @param value The {@code char} for which to return a hash code. 7310 * @return a hash code value for a {@code char} value. 7311 */ 7312 public static int hashCode(char value) { 7313 return (int)value; 7314 } 7315 7316 /** 7317 * Compares this object against the specified object. 7318 * The result is {@code true} if and only if the argument is not 7319 * {@code null} and is a {@code Character} object that 7320 * represents the same {@code char} value as this object. 7321 * 7322 * @param obj the object to compare with. 7323 * @return {@code true} if the objects are the same; 7324 * {@code false} otherwise. 7325 */ 7326 public boolean equals(Object obj) { 7327 if (obj instanceof Character) { 7328 return value == ((Character)obj).charValue(); 7329 } 7330 return false; 7331 } 7332 7333 /** 7334 * Returns a {@code String} object representing this 7335 * {@code Character}'s value. The result is a string of 7336 * length 1 whose sole component is the primitive 7337 * {@code char} value represented by this 7338 * {@code Character} object. 7339 * 7340 * @return a string representation of this object. 7341 */ 7342 public String toString() { 7343 char buf[] = {value}; 7344 return String.valueOf(buf); 7345 } 7346 7347 /** 7348 * Returns a {@code String} object representing the 7349 * specified {@code char}. The result is a string of length 7350 * 1 consisting solely of the specified {@code char}. 7351 * 7352 * @param c the {@code char} to be converted 7353 * @return the string representation of the specified {@code char} 7354 * @since 1.4 7355 */ 7356 public static String toString(char c) { 7357 return String.valueOf(c); 7358 } 7359 7360 /** 7361 * Determines whether the specified code point is a valid 7362 * <a href="http://www.unicode.org/glossary/#code_point"> 7363 * Unicode code point value</a>. 7364 * 7365 * @param codePoint the Unicode code point to be tested 7366 * @return {@code true} if the specified code point value is between 7367 * {@link #MIN_CODE_POINT} and 7368 * {@link #MAX_CODE_POINT} inclusive; 7369 * {@code false} otherwise. 7370 * @since 1.5 7371 */ 7372 public static boolean isValidCodePoint(int codePoint) { 7373 // Optimized form of: 7374 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 7375 int plane = codePoint >>> 16; 7376 return plane < ((MAX_CODE_POINT + 1) >>> 16); 7377 } 7378 7379 /** 7380 * Determines whether the specified character (Unicode code point) 7381 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 7382 * Such code points can be represented using a single {@code char}. 7383 * 7384 * @param codePoint the character (Unicode code point) to be tested 7385 * @return {@code true} if the specified code point is between 7386 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 7387 * {@code false} otherwise. 7388 * @since 1.7 7389 */ 7390 public static boolean isBmpCodePoint(int codePoint) { 7391 return codePoint >>> 16 == 0; 7392 // Optimized form of: 7393 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 7394 // We consistently use logical shift (>>>) to facilitate 7395 // additional runtime optimizations. 7396 } 7397 7398 /** 7399 * Determines whether the specified character (Unicode code point) 7400 * is in the <a href="#supplementary">supplementary character</a> range. 7401 * 7402 * @param codePoint the character (Unicode code point) to be tested 7403 * @return {@code true} if the specified code point is between 7404 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 7405 * {@link #MAX_CODE_POINT} inclusive; 7406 * {@code false} otherwise. 7407 * @since 1.5 7408 */ 7409 public static boolean isSupplementaryCodePoint(int codePoint) { 7410 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 7411 && codePoint < MAX_CODE_POINT + 1; 7412 } 7413 7414 /** 7415 * Determines if the given {@code char} value is a 7416 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 7417 * Unicode high-surrogate code unit</a> 7418 * (also known as <i>leading-surrogate code unit</i>). 7419 * 7420 * <p>Such values do not represent characters by themselves, 7421 * but are used in the representation of 7422 * <a href="#supplementary">supplementary characters</a> 7423 * in the UTF-16 encoding. 7424 * 7425 * @param ch the {@code char} value to be tested. 7426 * @return {@code true} if the {@code char} value is between 7427 * {@link #MIN_HIGH_SURROGATE} and 7428 * {@link #MAX_HIGH_SURROGATE} inclusive; 7429 * {@code false} otherwise. 7430 * @see Character#isLowSurrogate(char) 7431 * @see Character.UnicodeBlock#of(int) 7432 * @since 1.5 7433 */ 7434 public static boolean isHighSurrogate(char ch) { 7435 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 7436 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 7437 } 7438 7439 /** 7440 * Determines if the given {@code char} value is a 7441 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 7442 * Unicode low-surrogate code unit</a> 7443 * (also known as <i>trailing-surrogate code unit</i>). 7444 * 7445 * <p>Such values do not represent characters by themselves, 7446 * but are used in the representation of 7447 * <a href="#supplementary">supplementary characters</a> 7448 * in the UTF-16 encoding. 7449 * 7450 * @param ch the {@code char} value to be tested. 7451 * @return {@code true} if the {@code char} value is between 7452 * {@link #MIN_LOW_SURROGATE} and 7453 * {@link #MAX_LOW_SURROGATE} inclusive; 7454 * {@code false} otherwise. 7455 * @see Character#isHighSurrogate(char) 7456 * @since 1.5 7457 */ 7458 public static boolean isLowSurrogate(char ch) { 7459 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 7460 } 7461 7462 /** 7463 * Determines if the given {@code char} value is a Unicode 7464 * <i>surrogate code unit</i>. 7465 * 7466 * <p>Such values do not represent characters by themselves, 7467 * but are used in the representation of 7468 * <a href="#supplementary">supplementary characters</a> 7469 * in the UTF-16 encoding. 7470 * 7471 * <p>A char value is a surrogate code unit if and only if it is either 7472 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 7473 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 7474 * 7475 * @param ch the {@code char} value to be tested. 7476 * @return {@code true} if the {@code char} value is between 7477 * {@link #MIN_SURROGATE} and 7478 * {@link #MAX_SURROGATE} inclusive; 7479 * {@code false} otherwise. 7480 * @since 1.7 7481 */ 7482 public static boolean isSurrogate(char ch) { 7483 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 7484 } 7485 7486 /** 7487 * Determines whether the specified pair of {@code char} 7488 * values is a valid 7489 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 7490 * Unicode surrogate pair</a>. 7491 7492 * <p>This method is equivalent to the expression: 7493 * <blockquote><pre>{@code 7494 * isHighSurrogate(high) && isLowSurrogate(low) 7495 * }</pre></blockquote> 7496 * 7497 * @param high the high-surrogate code value to be tested 7498 * @param low the low-surrogate code value to be tested 7499 * @return {@code true} if the specified high and 7500 * low-surrogate code values represent a valid surrogate pair; 7501 * {@code false} otherwise. 7502 * @since 1.5 7503 */ 7504 public static boolean isSurrogatePair(char high, char low) { 7505 return isHighSurrogate(high) && isLowSurrogate(low); 7506 } 7507 7508 /** 7509 * Determines the number of {@code char} values needed to 7510 * represent the specified character (Unicode code point). If the 7511 * specified character is equal to or greater than 0x10000, then 7512 * the method returns 2. Otherwise, the method returns 1. 7513 * 7514 * <p>This method doesn't validate the specified character to be a 7515 * valid Unicode code point. The caller must validate the 7516 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 7517 * if necessary. 7518 * 7519 * @param codePoint the character (Unicode code point) to be tested. 7520 * @return 2 if the character is a valid supplementary character; 1 otherwise. 7521 * @see Character#isSupplementaryCodePoint(int) 7522 * @since 1.5 7523 */ 7524 public static int charCount(int codePoint) { 7525 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 7526 } 7527 7528 /** 7529 * Converts the specified surrogate pair to its supplementary code 7530 * point value. This method does not validate the specified 7531 * surrogate pair. The caller must validate it using {@link 7532 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 7533 * 7534 * @param high the high-surrogate code unit 7535 * @param low the low-surrogate code unit 7536 * @return the supplementary code point composed from the 7537 * specified surrogate pair. 7538 * @since 1.5 7539 */ 7540 public static int toCodePoint(char high, char low) { 7541 // Optimized form of: 7542 // return ((high - MIN_HIGH_SURROGATE) << 10) 7543 // + (low - MIN_LOW_SURROGATE) 7544 // + MIN_SUPPLEMENTARY_CODE_POINT; 7545 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 7546 - (MIN_HIGH_SURROGATE << 10) 7547 - MIN_LOW_SURROGATE); 7548 } 7549 7550 /** 7551 * Returns the code point at the given index of the 7552 * {@code CharSequence}. If the {@code char} value at 7553 * the given index in the {@code CharSequence} is in the 7554 * high-surrogate range, the following index is less than the 7555 * length of the {@code CharSequence}, and the 7556 * {@code char} value at the following index is in the 7557 * low-surrogate range, then the supplementary code point 7558 * corresponding to this surrogate pair is returned. Otherwise, 7559 * the {@code char} value at the given index is returned. 7560 * 7561 * @param seq a sequence of {@code char} values (Unicode code 7562 * units) 7563 * @param index the index to the {@code char} values (Unicode 7564 * code units) in {@code seq} to be converted 7565 * @return the Unicode code point at the given index 7566 * @exception NullPointerException if {@code seq} is null. 7567 * @exception IndexOutOfBoundsException if the value 7568 * {@code index} is negative or not less than 7569 * {@link CharSequence#length() seq.length()}. 7570 * @since 1.5 7571 */ 7572 public static int codePointAt(CharSequence seq, int index) { 7573 char c1 = seq.charAt(index); 7574 if (isHighSurrogate(c1) && ++index < seq.length()) { 7575 char c2 = seq.charAt(index); 7576 if (isLowSurrogate(c2)) { 7577 return toCodePoint(c1, c2); 7578 } 7579 } 7580 return c1; 7581 } 7582 7583 /** 7584 * Returns the code point at the given index of the 7585 * {@code char} array. If the {@code char} value at 7586 * the given index in the {@code char} array is in the 7587 * high-surrogate range, the following index is less than the 7588 * length of the {@code char} array, and the 7589 * {@code char} value at the following index is in the 7590 * low-surrogate range, then the supplementary code point 7591 * corresponding to this surrogate pair is returned. Otherwise, 7592 * the {@code char} value at the given index is returned. 7593 * 7594 * @param a the {@code char} array 7595 * @param index the index to the {@code char} values (Unicode 7596 * code units) in the {@code char} array to be converted 7597 * @return the Unicode code point at the given index 7598 * @exception NullPointerException if {@code a} is null. 7599 * @exception IndexOutOfBoundsException if the value 7600 * {@code index} is negative or not less than 7601 * the length of the {@code char} array. 7602 * @since 1.5 7603 */ 7604 public static int codePointAt(char[] a, int index) { 7605 return codePointAtImpl(a, index, a.length); 7606 } 7607 7608 /** 7609 * Returns the code point at the given index of the 7610 * {@code char} array, where only array elements with 7611 * {@code index} less than {@code limit} can be used. If 7612 * the {@code char} value at the given index in the 7613 * {@code char} array is in the high-surrogate range, the 7614 * following index is less than the {@code limit}, and the 7615 * {@code char} value at the following index is in the 7616 * low-surrogate range, then the supplementary code point 7617 * corresponding to this surrogate pair is returned. Otherwise, 7618 * the {@code char} value at the given index is returned. 7619 * 7620 * @param a the {@code char} array 7621 * @param index the index to the {@code char} values (Unicode 7622 * code units) in the {@code char} array to be converted 7623 * @param limit the index after the last array element that 7624 * can be used in the {@code char} array 7625 * @return the Unicode code point at the given index 7626 * @exception NullPointerException if {@code a} is null. 7627 * @exception IndexOutOfBoundsException if the {@code index} 7628 * argument is negative or not less than the {@code limit} 7629 * argument, or if the {@code limit} argument is negative or 7630 * greater than the length of the {@code char} array. 7631 * @since 1.5 7632 */ 7633 public static int codePointAt(char[] a, int index, int limit) { 7634 if (index >= limit || limit < 0 || limit > a.length) { 7635 throw new IndexOutOfBoundsException(); 7636 } 7637 return codePointAtImpl(a, index, limit); 7638 } 7639 7640 // throws ArrayIndexOutOfBoundsException if index out of bounds 7641 static int codePointAtImpl(char[] a, int index, int limit) { 7642 char c1 = a[index]; 7643 if (isHighSurrogate(c1) && ++index < limit) { 7644 char c2 = a[index]; 7645 if (isLowSurrogate(c2)) { 7646 return toCodePoint(c1, c2); 7647 } 7648 } 7649 return c1; 7650 } 7651 7652 /** 7653 * Returns the code point preceding the given index of the 7654 * {@code CharSequence}. If the {@code char} value at 7655 * {@code (index - 1)} in the {@code CharSequence} is in 7656 * the low-surrogate range, {@code (index - 2)} is not 7657 * negative, and the {@code char} value at {@code (index - 2)} 7658 * in the {@code CharSequence} is in the 7659 * high-surrogate range, then the supplementary code point 7660 * corresponding to this surrogate pair is returned. Otherwise, 7661 * the {@code char} value at {@code (index - 1)} is 7662 * returned. 7663 * 7664 * @param seq the {@code CharSequence} instance 7665 * @param index the index following the code point that should be returned 7666 * @return the Unicode code point value before the given index. 7667 * @exception NullPointerException if {@code seq} is null. 7668 * @exception IndexOutOfBoundsException if the {@code index} 7669 * argument is less than 1 or greater than {@link 7670 * CharSequence#length() seq.length()}. 7671 * @since 1.5 7672 */ 7673 public static int codePointBefore(CharSequence seq, int index) { 7674 char c2 = seq.charAt(--index); 7675 if (isLowSurrogate(c2) && index > 0) { 7676 char c1 = seq.charAt(--index); 7677 if (isHighSurrogate(c1)) { 7678 return toCodePoint(c1, c2); 7679 } 7680 } 7681 return c2; 7682 } 7683 7684 /** 7685 * Returns the code point preceding the given index of the 7686 * {@code char} array. If the {@code char} value at 7687 * {@code (index - 1)} in the {@code char} array is in 7688 * the low-surrogate range, {@code (index - 2)} is not 7689 * negative, and the {@code char} value at {@code (index - 2)} 7690 * in the {@code char} array is in the 7691 * high-surrogate range, then the supplementary code point 7692 * corresponding to this surrogate pair is returned. Otherwise, 7693 * the {@code char} value at {@code (index - 1)} is 7694 * returned. 7695 * 7696 * @param a the {@code char} array 7697 * @param index the index following the code point that should be returned 7698 * @return the Unicode code point value before the given index. 7699 * @exception NullPointerException if {@code a} is null. 7700 * @exception IndexOutOfBoundsException if the {@code index} 7701 * argument is less than 1 or greater than the length of the 7702 * {@code char} array 7703 * @since 1.5 7704 */ 7705 public static int codePointBefore(char[] a, int index) { 7706 return codePointBeforeImpl(a, index, 0); 7707 } 7708 7709 /** 7710 * Returns the code point preceding the given index of the 7711 * {@code char} array, where only array elements with 7712 * {@code index} greater than or equal to {@code start} 7713 * can be used. If the {@code char} value at {@code (index - 1)} 7714 * in the {@code char} array is in the 7715 * low-surrogate range, {@code (index - 2)} is not less than 7716 * {@code start}, and the {@code char} value at 7717 * {@code (index - 2)} in the {@code char} array is in 7718 * the high-surrogate range, then the supplementary code point 7719 * corresponding to this surrogate pair is returned. Otherwise, 7720 * the {@code char} value at {@code (index - 1)} is 7721 * returned. 7722 * 7723 * @param a the {@code char} array 7724 * @param index the index following the code point that should be returned 7725 * @param start the index of the first array element in the 7726 * {@code char} array 7727 * @return the Unicode code point value before the given index. 7728 * @exception NullPointerException if {@code a} is null. 7729 * @exception IndexOutOfBoundsException if the {@code index} 7730 * argument is not greater than the {@code start} argument or 7731 * is greater than the length of the {@code char} array, or 7732 * if the {@code start} argument is negative or not less than 7733 * the length of the {@code char} array. 7734 * @since 1.5 7735 */ 7736 public static int codePointBefore(char[] a, int index, int start) { 7737 if (index <= start || start < 0 || start >= a.length) { 7738 throw new IndexOutOfBoundsException(); 7739 } 7740 return codePointBeforeImpl(a, index, start); 7741 } 7742 7743 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 7744 static int codePointBeforeImpl(char[] a, int index, int start) { 7745 char c2 = a[--index]; 7746 if (isLowSurrogate(c2) && index > start) { 7747 char c1 = a[--index]; 7748 if (isHighSurrogate(c1)) { 7749 return toCodePoint(c1, c2); 7750 } 7751 } 7752 return c2; 7753 } 7754 7755 /** 7756 * Returns the leading surrogate (a 7757 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 7758 * high surrogate code unit</a>) of the 7759 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 7760 * surrogate pair</a> 7761 * representing the specified supplementary character (Unicode 7762 * code point) in the UTF-16 encoding. If the specified character 7763 * is not a 7764 * <a href="Character.html#supplementary">supplementary character</a>, 7765 * an unspecified {@code char} is returned. 7766 * 7767 * <p>If 7768 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 7769 * is {@code true}, then 7770 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 7771 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 7772 * are also always {@code true}. 7773 * 7774 * @param codePoint a supplementary character (Unicode code point) 7775 * @return the leading surrogate code unit used to represent the 7776 * character in the UTF-16 encoding 7777 * @since 1.7 7778 */ 7779 public static char highSurrogate(int codePoint) { 7780 return (char) ((codePoint >>> 10) 7781 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 7782 } 7783 7784 /** 7785 * Returns the trailing surrogate (a 7786 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 7787 * low surrogate code unit</a>) of the 7788 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 7789 * surrogate pair</a> 7790 * representing the specified supplementary character (Unicode 7791 * code point) in the UTF-16 encoding. If the specified character 7792 * is not a 7793 * <a href="Character.html#supplementary">supplementary character</a>, 7794 * an unspecified {@code char} is returned. 7795 * 7796 * <p>If 7797 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 7798 * is {@code true}, then 7799 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 7800 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 7801 * are also always {@code true}. 7802 * 7803 * @param codePoint a supplementary character (Unicode code point) 7804 * @return the trailing surrogate code unit used to represent the 7805 * character in the UTF-16 encoding 7806 * @since 1.7 7807 */ 7808 public static char lowSurrogate(int codePoint) { 7809 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 7810 } 7811 7812 /** 7813 * Converts the specified character (Unicode code point) to its 7814 * UTF-16 representation. If the specified code point is a BMP 7815 * (Basic Multilingual Plane or Plane 0) value, the same value is 7816 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 7817 * specified code point is a supplementary character, its 7818 * surrogate values are stored in {@code dst[dstIndex]} 7819 * (high-surrogate) and {@code dst[dstIndex+1]} 7820 * (low-surrogate), and 2 is returned. 7821 * 7822 * @param codePoint the character (Unicode code point) to be converted. 7823 * @param dst an array of {@code char} in which the 7824 * {@code codePoint}'s UTF-16 value is stored. 7825 * @param dstIndex the start index into the {@code dst} 7826 * array where the converted value is stored. 7827 * @return 1 if the code point is a BMP code point, 2 if the 7828 * code point is a supplementary code point. 7829 * @exception IllegalArgumentException if the specified 7830 * {@code codePoint} is not a valid Unicode code point. 7831 * @exception NullPointerException if the specified {@code dst} is null. 7832 * @exception IndexOutOfBoundsException if {@code dstIndex} 7833 * is negative or not less than {@code dst.length}, or if 7834 * {@code dst} at {@code dstIndex} doesn't have enough 7835 * array element(s) to store the resulting {@code char} 7836 * value(s). (If {@code dstIndex} is equal to 7837 * {@code dst.length-1} and the specified 7838 * {@code codePoint} is a supplementary character, the 7839 * high-surrogate value is not stored in 7840 * {@code dst[dstIndex]}.) 7841 * @since 1.5 7842 */ 7843 public static int toChars(int codePoint, char[] dst, int dstIndex) { 7844 if (isBmpCodePoint(codePoint)) { 7845 dst[dstIndex] = (char) codePoint; 7846 return 1; 7847 } else if (isValidCodePoint(codePoint)) { 7848 toSurrogates(codePoint, dst, dstIndex); 7849 return 2; 7850 } else { 7851 throw new IllegalArgumentException(); 7852 } 7853 } 7854 7855 /** 7856 * Converts the specified character (Unicode code point) to its 7857 * UTF-16 representation stored in a {@code char} array. If 7858 * the specified code point is a BMP (Basic Multilingual Plane or 7859 * Plane 0) value, the resulting {@code char} array has 7860 * the same value as {@code codePoint}. If the specified code 7861 * point is a supplementary code point, the resulting 7862 * {@code char} array has the corresponding surrogate pair. 7863 * 7864 * @param codePoint a Unicode code point 7865 * @return a {@code char} array having 7866 * {@code codePoint}'s UTF-16 representation. 7867 * @exception IllegalArgumentException if the specified 7868 * {@code codePoint} is not a valid Unicode code point. 7869 * @since 1.5 7870 */ 7871 public static char[] toChars(int codePoint) { 7872 if (isBmpCodePoint(codePoint)) { 7873 return new char[] { (char) codePoint }; 7874 } else if (isValidCodePoint(codePoint)) { 7875 char[] result = new char[2]; 7876 toSurrogates(codePoint, result, 0); 7877 return result; 7878 } else { 7879 throw new IllegalArgumentException(); 7880 } 7881 } 7882 7883 static void toSurrogates(int codePoint, char[] dst, int index) { 7884 // We write elements "backwards" to guarantee all-or-nothing 7885 dst[index+1] = lowSurrogate(codePoint); 7886 dst[index] = highSurrogate(codePoint); 7887 } 7888 7889 /** 7890 * Returns the number of Unicode code points in the text range of 7891 * the specified char sequence. The text range begins at the 7892 * specified {@code beginIndex} and extends to the 7893 * {@code char} at index {@code endIndex - 1}. Thus the 7894 * length (in {@code char}s) of the text range is 7895 * {@code endIndex-beginIndex}. Unpaired surrogates within 7896 * the text range count as one code point each. 7897 * 7898 * @param seq the char sequence 7899 * @param beginIndex the index to the first {@code char} of 7900 * the text range. 7901 * @param endIndex the index after the last {@code char} of 7902 * the text range. 7903 * @return the number of Unicode code points in the specified text 7904 * range 7905 * @exception NullPointerException if {@code seq} is null. 7906 * @exception IndexOutOfBoundsException if the 7907 * {@code beginIndex} is negative, or {@code endIndex} 7908 * is larger than the length of the given sequence, or 7909 * {@code beginIndex} is larger than {@code endIndex}. 7910 * @since 1.5 7911 */ 7912 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 7913 int length = seq.length(); 7914 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 7915 throw new IndexOutOfBoundsException(); 7916 } 7917 int n = endIndex - beginIndex; 7918 for (int i = beginIndex; i < endIndex; ) { 7919 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 7920 isLowSurrogate(seq.charAt(i))) { 7921 n--; 7922 i++; 7923 } 7924 } 7925 return n; 7926 } 7927 7928 /** 7929 * Returns the number of Unicode code points in a subarray of the 7930 * {@code char} array argument. The {@code offset} 7931 * argument is the index of the first {@code char} of the 7932 * subarray and the {@code count} argument specifies the 7933 * length of the subarray in {@code char}s. Unpaired 7934 * surrogates within the subarray count as one code point each. 7935 * 7936 * @param a the {@code char} array 7937 * @param offset the index of the first {@code char} in the 7938 * given {@code char} array 7939 * @param count the length of the subarray in {@code char}s 7940 * @return the number of Unicode code points in the specified subarray 7941 * @exception NullPointerException if {@code a} is null. 7942 * @exception IndexOutOfBoundsException if {@code offset} or 7943 * {@code count} is negative, or if {@code offset + 7944 * count} is larger than the length of the given array. 7945 * @since 1.5 7946 */ 7947 public static int codePointCount(char[] a, int offset, int count) { 7948 if (count > a.length - offset || offset < 0 || count < 0) { 7949 throw new IndexOutOfBoundsException(); 7950 } 7951 return codePointCountImpl(a, offset, count); 7952 } 7953 7954 static int codePointCountImpl(char[] a, int offset, int count) { 7955 int endIndex = offset + count; 7956 int n = count; 7957 for (int i = offset; i < endIndex; ) { 7958 if (isHighSurrogate(a[i++]) && i < endIndex && 7959 isLowSurrogate(a[i])) { 7960 n--; 7961 i++; 7962 } 7963 } 7964 return n; 7965 } 7966 7967 /** 7968 * Returns the index within the given char sequence that is offset 7969 * from the given {@code index} by {@code codePointOffset} 7970 * code points. Unpaired surrogates within the text range given by 7971 * {@code index} and {@code codePointOffset} count as 7972 * one code point each. 7973 * 7974 * @param seq the char sequence 7975 * @param index the index to be offset 7976 * @param codePointOffset the offset in code points 7977 * @return the index within the char sequence 7978 * @exception NullPointerException if {@code seq} is null. 7979 * @exception IndexOutOfBoundsException if {@code index} 7980 * is negative or larger then the length of the char sequence, 7981 * or if {@code codePointOffset} is positive and the 7982 * subsequence starting with {@code index} has fewer than 7983 * {@code codePointOffset} code points, or if 7984 * {@code codePointOffset} is negative and the subsequence 7985 * before {@code index} has fewer than the absolute value 7986 * of {@code codePointOffset} code points. 7987 * @since 1.5 7988 */ 7989 public static int offsetByCodePoints(CharSequence seq, int index, 7990 int codePointOffset) { 7991 int length = seq.length(); 7992 if (index < 0 || index > length) { 7993 throw new IndexOutOfBoundsException(); 7994 } 7995 7996 int x = index; 7997 if (codePointOffset >= 0) { 7998 int i; 7999 for (i = 0; x < length && i < codePointOffset; i++) { 8000 if (isHighSurrogate(seq.charAt(x++)) && x < length && 8001 isLowSurrogate(seq.charAt(x))) { 8002 x++; 8003 } 8004 } 8005 if (i < codePointOffset) { 8006 throw new IndexOutOfBoundsException(); 8007 } 8008 } else { 8009 int i; 8010 for (i = codePointOffset; x > 0 && i < 0; i++) { 8011 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 8012 isHighSurrogate(seq.charAt(x-1))) { 8013 x--; 8014 } 8015 } 8016 if (i < 0) { 8017 throw new IndexOutOfBoundsException(); 8018 } 8019 } 8020 return x; 8021 } 8022 8023 /** 8024 * Returns the index within the given {@code char} subarray 8025 * that is offset from the given {@code index} by 8026 * {@code codePointOffset} code points. The 8027 * {@code start} and {@code count} arguments specify a 8028 * subarray of the {@code char} array. Unpaired surrogates 8029 * within the text range given by {@code index} and 8030 * {@code codePointOffset} count as one code point each. 8031 * 8032 * @param a the {@code char} array 8033 * @param start the index of the first {@code char} of the 8034 * subarray 8035 * @param count the length of the subarray in {@code char}s 8036 * @param index the index to be offset 8037 * @param codePointOffset the offset in code points 8038 * @return the index within the subarray 8039 * @exception NullPointerException if {@code a} is null. 8040 * @exception IndexOutOfBoundsException 8041 * if {@code start} or {@code count} is negative, 8042 * or if {@code start + count} is larger than the length of 8043 * the given array, 8044 * or if {@code index} is less than {@code start} or 8045 * larger then {@code start + count}, 8046 * or if {@code codePointOffset} is positive and the text range 8047 * starting with {@code index} and ending with {@code start + count - 1} 8048 * has fewer than {@code codePointOffset} code 8049 * points, 8050 * or if {@code codePointOffset} is negative and the text range 8051 * starting with {@code start} and ending with {@code index - 1} 8052 * has fewer than the absolute value of 8053 * {@code codePointOffset} code points. 8054 * @since 1.5 8055 */ 8056 public static int offsetByCodePoints(char[] a, int start, int count, 8057 int index, int codePointOffset) { 8058 if (count > a.length-start || start < 0 || count < 0 8059 || index < start || index > start+count) { 8060 throw new IndexOutOfBoundsException(); 8061 } 8062 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 8063 } 8064 8065 static int offsetByCodePointsImpl(char[]a, int start, int count, 8066 int index, int codePointOffset) { 8067 int x = index; 8068 if (codePointOffset >= 0) { 8069 int limit = start + count; 8070 int i; 8071 for (i = 0; x < limit && i < codePointOffset; i++) { 8072 if (isHighSurrogate(a[x++]) && x < limit && 8073 isLowSurrogate(a[x])) { 8074 x++; 8075 } 8076 } 8077 if (i < codePointOffset) { 8078 throw new IndexOutOfBoundsException(); 8079 } 8080 } else { 8081 int i; 8082 for (i = codePointOffset; x > start && i < 0; i++) { 8083 if (isLowSurrogate(a[--x]) && x > start && 8084 isHighSurrogate(a[x-1])) { 8085 x--; 8086 } 8087 } 8088 if (i < 0) { 8089 throw new IndexOutOfBoundsException(); 8090 } 8091 } 8092 return x; 8093 } 8094 8095 /** 8096 * Determines if the specified character is a lowercase character. 8097 * <p> 8098 * A character is lowercase if its general category type, provided 8099 * by {@code Character.getType(ch)}, is 8100 * {@code LOWERCASE_LETTER}, or it has contributory property 8101 * Other_Lowercase as defined by the Unicode Standard. 8102 * <p> 8103 * The following are examples of lowercase characters: 8104 * <blockquote><pre> 8105 * a b c d e f g h i j k l m n o p q r s t u v w x y z 8106 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 8107 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 8108 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 8109 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 8110 * </pre></blockquote> 8111 * <p> Many other Unicode characters are lowercase too. 8112 * 8113 * <p><b>Note:</b> This method cannot handle <a 8114 * href="#supplementary"> supplementary characters</a>. To support 8115 * all Unicode characters, including supplementary characters, use 8116 * the {@link #isLowerCase(int)} method. 8117 * 8118 * @param ch the character to be tested. 8119 * @return {@code true} if the character is lowercase; 8120 * {@code false} otherwise. 8121 * @see Character#isLowerCase(char) 8122 * @see Character#isTitleCase(char) 8123 * @see Character#toLowerCase(char) 8124 * @see Character#getType(char) 8125 */ 8126 public static boolean isLowerCase(char ch) { 8127 return isLowerCase((int)ch); 8128 } 8129 8130 /** 8131 * Determines if the specified character (Unicode code point) is a 8132 * lowercase character. 8133 * <p> 8134 * A character is lowercase if its general category type, provided 8135 * by {@link Character#getType getType(codePoint)}, is 8136 * {@code LOWERCASE_LETTER}, or it has contributory property 8137 * Other_Lowercase as defined by the Unicode Standard. 8138 * <p> 8139 * The following are examples of lowercase characters: 8140 * <blockquote><pre> 8141 * a b c d e f g h i j k l m n o p q r s t u v w x y z 8142 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 8143 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 8144 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 8145 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 8146 * </pre></blockquote> 8147 * <p> Many other Unicode characters are lowercase too. 8148 * 8149 * @param codePoint the character (Unicode code point) to be tested. 8150 * @return {@code true} if the character is lowercase; 8151 * {@code false} otherwise. 8152 * @see Character#isLowerCase(int) 8153 * @see Character#isTitleCase(int) 8154 * @see Character#toLowerCase(int) 8155 * @see Character#getType(int) 8156 * @since 1.5 8157 */ 8158 public static boolean isLowerCase(int codePoint) { 8159 return getType(codePoint) == Character.LOWERCASE_LETTER || 8160 CharacterData.of(codePoint).isOtherLowercase(codePoint); 8161 } 8162 8163 /** 8164 * Determines if the specified character is an uppercase character. 8165 * <p> 8166 * A character is uppercase if its general category type, provided by 8167 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 8168 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 8169 * <p> 8170 * The following are examples of uppercase characters: 8171 * <blockquote><pre> 8172 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 8173 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 8174 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 8175 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 8176 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 8177 * </pre></blockquote> 8178 * <p> Many other Unicode characters are uppercase too. 8179 * 8180 * <p><b>Note:</b> This method cannot handle <a 8181 * href="#supplementary"> supplementary characters</a>. To support 8182 * all Unicode characters, including supplementary characters, use 8183 * the {@link #isUpperCase(int)} method. 8184 * 8185 * @param ch the character to be tested. 8186 * @return {@code true} if the character is uppercase; 8187 * {@code false} otherwise. 8188 * @see Character#isLowerCase(char) 8189 * @see Character#isTitleCase(char) 8190 * @see Character#toUpperCase(char) 8191 * @see Character#getType(char) 8192 * @since 1.0 8193 */ 8194 public static boolean isUpperCase(char ch) { 8195 return isUpperCase((int)ch); 8196 } 8197 8198 /** 8199 * Determines if the specified character (Unicode code point) is an uppercase character. 8200 * <p> 8201 * A character is uppercase if its general category type, provided by 8202 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 8203 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 8204 * <p> 8205 * The following are examples of uppercase characters: 8206 * <blockquote><pre> 8207 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 8208 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 8209 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 8210 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 8211 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 8212 * </pre></blockquote> 8213 * <p> Many other Unicode characters are uppercase too. 8214 * 8215 * @param codePoint the character (Unicode code point) to be tested. 8216 * @return {@code true} if the character is uppercase; 8217 * {@code false} otherwise. 8218 * @see Character#isLowerCase(int) 8219 * @see Character#isTitleCase(int) 8220 * @see Character#toUpperCase(int) 8221 * @see Character#getType(int) 8222 * @since 1.5 8223 */ 8224 public static boolean isUpperCase(int codePoint) { 8225 return getType(codePoint) == Character.UPPERCASE_LETTER || 8226 CharacterData.of(codePoint).isOtherUppercase(codePoint); 8227 } 8228 8229 /** 8230 * Determines if the specified character is a titlecase character. 8231 * <p> 8232 * A character is a titlecase character if its general 8233 * category type, provided by {@code Character.getType(ch)}, 8234 * is {@code TITLECASE_LETTER}. 8235 * <p> 8236 * Some characters look like pairs of Latin letters. For example, there 8237 * is an uppercase letter that looks like "LJ" and has a corresponding 8238 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 8239 * is the appropriate form to use when rendering a word in lowercase 8240 * with initial capitals, as for a book title. 8241 * <p> 8242 * These are some of the Unicode characters for which this method returns 8243 * {@code true}: 8244 * <ul> 8245 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 8246 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 8247 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 8248 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 8249 * </ul> 8250 * <p> Many other Unicode characters are titlecase too. 8251 * 8252 * <p><b>Note:</b> This method cannot handle <a 8253 * href="#supplementary"> supplementary characters</a>. To support 8254 * all Unicode characters, including supplementary characters, use 8255 * the {@link #isTitleCase(int)} method. 8256 * 8257 * @param ch the character to be tested. 8258 * @return {@code true} if the character is titlecase; 8259 * {@code false} otherwise. 8260 * @see Character#isLowerCase(char) 8261 * @see Character#isUpperCase(char) 8262 * @see Character#toTitleCase(char) 8263 * @see Character#getType(char) 8264 * @since 1.0.2 8265 */ 8266 public static boolean isTitleCase(char ch) { 8267 return isTitleCase((int)ch); 8268 } 8269 8270 /** 8271 * Determines if the specified character (Unicode code point) is a titlecase character. 8272 * <p> 8273 * A character is a titlecase character if its general 8274 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 8275 * is {@code TITLECASE_LETTER}. 8276 * <p> 8277 * Some characters look like pairs of Latin letters. For example, there 8278 * is an uppercase letter that looks like "LJ" and has a corresponding 8279 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 8280 * is the appropriate form to use when rendering a word in lowercase 8281 * with initial capitals, as for a book title. 8282 * <p> 8283 * These are some of the Unicode characters for which this method returns 8284 * {@code true}: 8285 * <ul> 8286 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 8287 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 8288 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 8289 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 8290 * </ul> 8291 * <p> Many other Unicode characters are titlecase too. 8292 * 8293 * @param codePoint the character (Unicode code point) to be tested. 8294 * @return {@code true} if the character is titlecase; 8295 * {@code false} otherwise. 8296 * @see Character#isLowerCase(int) 8297 * @see Character#isUpperCase(int) 8298 * @see Character#toTitleCase(int) 8299 * @see Character#getType(int) 8300 * @since 1.5 8301 */ 8302 public static boolean isTitleCase(int codePoint) { 8303 return getType(codePoint) == Character.TITLECASE_LETTER; 8304 } 8305 8306 /** 8307 * Determines if the specified character is a digit. 8308 * <p> 8309 * A character is a digit if its general category type, provided 8310 * by {@code Character.getType(ch)}, is 8311 * {@code DECIMAL_DIGIT_NUMBER}. 8312 * <p> 8313 * Some Unicode character ranges that contain digits: 8314 * <ul> 8315 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 8316 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 8317 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 8318 * Arabic-Indic digits 8319 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 8320 * Extended Arabic-Indic digits 8321 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 8322 * Devanagari digits 8323 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 8324 * Fullwidth digits 8325 * </ul> 8326 * 8327 * Many other character ranges contain digits as well. 8328 * 8329 * <p><b>Note:</b> This method cannot handle <a 8330 * href="#supplementary"> supplementary characters</a>. To support 8331 * all Unicode characters, including supplementary characters, use 8332 * the {@link #isDigit(int)} method. 8333 * 8334 * @param ch the character to be tested. 8335 * @return {@code true} if the character is a digit; 8336 * {@code false} otherwise. 8337 * @see Character#digit(char, int) 8338 * @see Character#forDigit(int, int) 8339 * @see Character#getType(char) 8340 */ 8341 public static boolean isDigit(char ch) { 8342 return isDigit((int)ch); 8343 } 8344 8345 /** 8346 * Determines if the specified character (Unicode code point) is a digit. 8347 * <p> 8348 * A character is a digit if its general category type, provided 8349 * by {@link Character#getType(int) getType(codePoint)}, is 8350 * {@code DECIMAL_DIGIT_NUMBER}. 8351 * <p> 8352 * Some Unicode character ranges that contain digits: 8353 * <ul> 8354 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 8355 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 8356 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 8357 * Arabic-Indic digits 8358 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 8359 * Extended Arabic-Indic digits 8360 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 8361 * Devanagari digits 8362 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 8363 * Fullwidth digits 8364 * </ul> 8365 * 8366 * Many other character ranges contain digits as well. 8367 * 8368 * @param codePoint the character (Unicode code point) to be tested. 8369 * @return {@code true} if the character is a digit; 8370 * {@code false} otherwise. 8371 * @see Character#forDigit(int, int) 8372 * @see Character#getType(int) 8373 * @since 1.5 8374 */ 8375 public static boolean isDigit(int codePoint) { 8376 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 8377 } 8378 8379 /** 8380 * Determines if a character is defined in Unicode. 8381 * <p> 8382 * A character is defined if at least one of the following is true: 8383 * <ul> 8384 * <li>It has an entry in the UnicodeData file. 8385 * <li>It has a value in a range defined by the UnicodeData file. 8386 * </ul> 8387 * 8388 * <p><b>Note:</b> This method cannot handle <a 8389 * href="#supplementary"> supplementary characters</a>. To support 8390 * all Unicode characters, including supplementary characters, use 8391 * the {@link #isDefined(int)} method. 8392 * 8393 * @param ch the character to be tested 8394 * @return {@code true} if the character has a defined meaning 8395 * in Unicode; {@code false} otherwise. 8396 * @see Character#isDigit(char) 8397 * @see Character#isLetter(char) 8398 * @see Character#isLetterOrDigit(char) 8399 * @see Character#isLowerCase(char) 8400 * @see Character#isTitleCase(char) 8401 * @see Character#isUpperCase(char) 8402 * @since 1.0.2 8403 */ 8404 public static boolean isDefined(char ch) { 8405 return isDefined((int)ch); 8406 } 8407 8408 /** 8409 * Determines if a character (Unicode code point) is defined in Unicode. 8410 * <p> 8411 * A character is defined if at least one of the following is true: 8412 * <ul> 8413 * <li>It has an entry in the UnicodeData file. 8414 * <li>It has a value in a range defined by the UnicodeData file. 8415 * </ul> 8416 * 8417 * @param codePoint the character (Unicode code point) to be tested. 8418 * @return {@code true} if the character has a defined meaning 8419 * in Unicode; {@code false} otherwise. 8420 * @see Character#isDigit(int) 8421 * @see Character#isLetter(int) 8422 * @see Character#isLetterOrDigit(int) 8423 * @see Character#isLowerCase(int) 8424 * @see Character#isTitleCase(int) 8425 * @see Character#isUpperCase(int) 8426 * @since 1.5 8427 */ 8428 public static boolean isDefined(int codePoint) { 8429 return getType(codePoint) != Character.UNASSIGNED; 8430 } 8431 8432 /** 8433 * Determines if the specified character is a letter. 8434 * <p> 8435 * A character is considered to be a letter if its general 8436 * category type, provided by {@code Character.getType(ch)}, 8437 * is any of the following: 8438 * <ul> 8439 * <li> {@code UPPERCASE_LETTER} 8440 * <li> {@code LOWERCASE_LETTER} 8441 * <li> {@code TITLECASE_LETTER} 8442 * <li> {@code MODIFIER_LETTER} 8443 * <li> {@code OTHER_LETTER} 8444 * </ul> 8445 * 8446 * Not all letters have case. Many characters are 8447 * letters but are neither uppercase nor lowercase nor titlecase. 8448 * 8449 * <p><b>Note:</b> This method cannot handle <a 8450 * href="#supplementary"> supplementary characters</a>. To support 8451 * all Unicode characters, including supplementary characters, use 8452 * the {@link #isLetter(int)} method. 8453 * 8454 * @param ch the character to be tested. 8455 * @return {@code true} if the character is a letter; 8456 * {@code false} otherwise. 8457 * @see Character#isDigit(char) 8458 * @see Character#isJavaIdentifierStart(char) 8459 * @see Character#isJavaLetter(char) 8460 * @see Character#isJavaLetterOrDigit(char) 8461 * @see Character#isLetterOrDigit(char) 8462 * @see Character#isLowerCase(char) 8463 * @see Character#isTitleCase(char) 8464 * @see Character#isUnicodeIdentifierStart(char) 8465 * @see Character#isUpperCase(char) 8466 */ 8467 public static boolean isLetter(char ch) { 8468 return isLetter((int)ch); 8469 } 8470 8471 /** 8472 * Determines if the specified character (Unicode code point) is a letter. 8473 * <p> 8474 * A character is considered to be a letter if its general 8475 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 8476 * is any of the following: 8477 * <ul> 8478 * <li> {@code UPPERCASE_LETTER} 8479 * <li> {@code LOWERCASE_LETTER} 8480 * <li> {@code TITLECASE_LETTER} 8481 * <li> {@code MODIFIER_LETTER} 8482 * <li> {@code OTHER_LETTER} 8483 * </ul> 8484 * 8485 * Not all letters have case. Many characters are 8486 * letters but are neither uppercase nor lowercase nor titlecase. 8487 * 8488 * @param codePoint the character (Unicode code point) to be tested. 8489 * @return {@code true} if the character is a letter; 8490 * {@code false} otherwise. 8491 * @see Character#isDigit(int) 8492 * @see Character#isJavaIdentifierStart(int) 8493 * @see Character#isLetterOrDigit(int) 8494 * @see Character#isLowerCase(int) 8495 * @see Character#isTitleCase(int) 8496 * @see Character#isUnicodeIdentifierStart(int) 8497 * @see Character#isUpperCase(int) 8498 * @since 1.5 8499 */ 8500 public static boolean isLetter(int codePoint) { 8501 return ((((1 << Character.UPPERCASE_LETTER) | 8502 (1 << Character.LOWERCASE_LETTER) | 8503 (1 << Character.TITLECASE_LETTER) | 8504 (1 << Character.MODIFIER_LETTER) | 8505 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 8506 != 0; 8507 } 8508 8509 /** 8510 * Determines if the specified character is a letter or digit. 8511 * <p> 8512 * A character is considered to be a letter or digit if either 8513 * {@code Character.isLetter(char ch)} or 8514 * {@code Character.isDigit(char ch)} returns 8515 * {@code true} for the character. 8516 * 8517 * <p><b>Note:</b> This method cannot handle <a 8518 * href="#supplementary"> supplementary characters</a>. To support 8519 * all Unicode characters, including supplementary characters, use 8520 * the {@link #isLetterOrDigit(int)} method. 8521 * 8522 * @param ch the character to be tested. 8523 * @return {@code true} if the character is a letter or digit; 8524 * {@code false} otherwise. 8525 * @see Character#isDigit(char) 8526 * @see Character#isJavaIdentifierPart(char) 8527 * @see Character#isJavaLetter(char) 8528 * @see Character#isJavaLetterOrDigit(char) 8529 * @see Character#isLetter(char) 8530 * @see Character#isUnicodeIdentifierPart(char) 8531 * @since 1.0.2 8532 */ 8533 public static boolean isLetterOrDigit(char ch) { 8534 return isLetterOrDigit((int)ch); 8535 } 8536 8537 /** 8538 * Determines if the specified character (Unicode code point) is a letter or digit. 8539 * <p> 8540 * A character is considered to be a letter or digit if either 8541 * {@link #isLetter(int) isLetter(codePoint)} or 8542 * {@link #isDigit(int) isDigit(codePoint)} returns 8543 * {@code true} for the character. 8544 * 8545 * @param codePoint the character (Unicode code point) to be tested. 8546 * @return {@code true} if the character is a letter or digit; 8547 * {@code false} otherwise. 8548 * @see Character#isDigit(int) 8549 * @see Character#isJavaIdentifierPart(int) 8550 * @see Character#isLetter(int) 8551 * @see Character#isUnicodeIdentifierPart(int) 8552 * @since 1.5 8553 */ 8554 public static boolean isLetterOrDigit(int codePoint) { 8555 return ((((1 << Character.UPPERCASE_LETTER) | 8556 (1 << Character.LOWERCASE_LETTER) | 8557 (1 << Character.TITLECASE_LETTER) | 8558 (1 << Character.MODIFIER_LETTER) | 8559 (1 << Character.OTHER_LETTER) | 8560 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 8561 != 0; 8562 } 8563 8564 /** 8565 * Determines if the specified character is permissible as the first 8566 * character in a Java identifier. 8567 * <p> 8568 * A character may start a Java identifier if and only if 8569 * one of the following is true: 8570 * <ul> 8571 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 8572 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 8573 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 8574 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 8575 * </ul> 8576 * 8577 * @param ch the character to be tested. 8578 * @return {@code true} if the character may start a Java 8579 * identifier; {@code false} otherwise. 8580 * @see Character#isJavaLetterOrDigit(char) 8581 * @see Character#isJavaIdentifierStart(char) 8582 * @see Character#isJavaIdentifierPart(char) 8583 * @see Character#isLetter(char) 8584 * @see Character#isLetterOrDigit(char) 8585 * @see Character#isUnicodeIdentifierStart(char) 8586 * @since 1.0.2 8587 * @deprecated Replaced by isJavaIdentifierStart(char). 8588 */ 8589 @Deprecated 8590 public static boolean isJavaLetter(char ch) { 8591 return isJavaIdentifierStart(ch); 8592 } 8593 8594 /** 8595 * Determines if the specified character may be part of a Java 8596 * identifier as other than the first character. 8597 * <p> 8598 * A character may be part of a Java identifier if and only if any 8599 * of the following are true: 8600 * <ul> 8601 * <li> it is a letter 8602 * <li> it is a currency symbol (such as {@code '$'}) 8603 * <li> it is a connecting punctuation character (such as {@code '_'}) 8604 * <li> it is a digit 8605 * <li> it is a numeric letter (such as a Roman numeral character) 8606 * <li> it is a combining mark 8607 * <li> it is a non-spacing mark 8608 * <li> {@code isIdentifierIgnorable} returns 8609 * {@code true} for the character. 8610 * </ul> 8611 * 8612 * @param ch the character to be tested. 8613 * @return {@code true} if the character may be part of a 8614 * Java identifier; {@code false} otherwise. 8615 * @see Character#isJavaLetter(char) 8616 * @see Character#isJavaIdentifierStart(char) 8617 * @see Character#isJavaIdentifierPart(char) 8618 * @see Character#isLetter(char) 8619 * @see Character#isLetterOrDigit(char) 8620 * @see Character#isUnicodeIdentifierPart(char) 8621 * @see Character#isIdentifierIgnorable(char) 8622 * @since 1.0.2 8623 * @deprecated Replaced by isJavaIdentifierPart(char). 8624 */ 8625 @Deprecated 8626 public static boolean isJavaLetterOrDigit(char ch) { 8627 return isJavaIdentifierPart(ch); 8628 } 8629 8630 /** 8631 * Determines if the specified character (Unicode code point) is an alphabet. 8632 * <p> 8633 * A character is considered to be alphabetic if its general category type, 8634 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 8635 * the following: 8636 * <ul> 8637 * <li> <code>UPPERCASE_LETTER</code> 8638 * <li> <code>LOWERCASE_LETTER</code> 8639 * <li> <code>TITLECASE_LETTER</code> 8640 * <li> <code>MODIFIER_LETTER</code> 8641 * <li> <code>OTHER_LETTER</code> 8642 * <li> <code>LETTER_NUMBER</code> 8643 * </ul> 8644 * or it has contributory property Other_Alphabetic as defined by the 8645 * Unicode Standard. 8646 * 8647 * @param codePoint the character (Unicode code point) to be tested. 8648 * @return <code>true</code> if the character is a Unicode alphabet 8649 * character, <code>false</code> otherwise. 8650 * @since 1.7 8651 */ 8652 public static boolean isAlphabetic(int codePoint) { 8653 return (((((1 << Character.UPPERCASE_LETTER) | 8654 (1 << Character.LOWERCASE_LETTER) | 8655 (1 << Character.TITLECASE_LETTER) | 8656 (1 << Character.MODIFIER_LETTER) | 8657 (1 << Character.OTHER_LETTER) | 8658 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 8659 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 8660 } 8661 8662 /** 8663 * Determines if the specified character (Unicode code point) is a CJKV 8664 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 8665 * the Unicode Standard. 8666 * 8667 * @param codePoint the character (Unicode code point) to be tested. 8668 * @return <code>true</code> if the character is a Unicode ideograph 8669 * character, <code>false</code> otherwise. 8670 * @since 1.7 8671 */ 8672 public static boolean isIdeographic(int codePoint) { 8673 return CharacterData.of(codePoint).isIdeographic(codePoint); 8674 } 8675 8676 /** 8677 * Determines if the specified character is 8678 * permissible as the first character in a Java identifier. 8679 * <p> 8680 * A character may start a Java identifier if and only if 8681 * one of the following conditions is true: 8682 * <ul> 8683 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 8684 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 8685 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 8686 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 8687 * </ul> 8688 * 8689 * <p><b>Note:</b> This method cannot handle <a 8690 * href="#supplementary"> supplementary characters</a>. To support 8691 * all Unicode characters, including supplementary characters, use 8692 * the {@link #isJavaIdentifierStart(int)} method. 8693 * 8694 * @param ch the character to be tested. 8695 * @return {@code true} if the character may start a Java identifier; 8696 * {@code false} otherwise. 8697 * @see Character#isJavaIdentifierPart(char) 8698 * @see Character#isLetter(char) 8699 * @see Character#isUnicodeIdentifierStart(char) 8700 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8701 * @since 1.1 8702 */ 8703 public static boolean isJavaIdentifierStart(char ch) { 8704 return isJavaIdentifierStart((int)ch); 8705 } 8706 8707 /** 8708 * Determines if the character (Unicode code point) is 8709 * permissible as the first character in a Java identifier. 8710 * <p> 8711 * A character may start a Java identifier if and only if 8712 * one of the following conditions is true: 8713 * <ul> 8714 * <li> {@link #isLetter(int) isLetter(codePoint)} 8715 * returns {@code true} 8716 * <li> {@link #getType(int) getType(codePoint)} 8717 * returns {@code LETTER_NUMBER} 8718 * <li> the referenced character is a currency symbol (such as {@code '$'}) 8719 * <li> the referenced character is a connecting punctuation character 8720 * (such as {@code '_'}). 8721 * </ul> 8722 * 8723 * @param codePoint the character (Unicode code point) to be tested. 8724 * @return {@code true} if the character may start a Java identifier; 8725 * {@code false} otherwise. 8726 * @see Character#isJavaIdentifierPart(int) 8727 * @see Character#isLetter(int) 8728 * @see Character#isUnicodeIdentifierStart(int) 8729 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8730 * @since 1.5 8731 */ 8732 public static boolean isJavaIdentifierStart(int codePoint) { 8733 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 8734 } 8735 8736 /** 8737 * Determines if the specified character may be part of a Java 8738 * identifier as other than the first character. 8739 * <p> 8740 * A character may be part of a Java identifier if any of the following 8741 * are true: 8742 * <ul> 8743 * <li> it is a letter 8744 * <li> it is a currency symbol (such as {@code '$'}) 8745 * <li> it is a connecting punctuation character (such as {@code '_'}) 8746 * <li> it is a digit 8747 * <li> it is a numeric letter (such as a Roman numeral character) 8748 * <li> it is a combining mark 8749 * <li> it is a non-spacing mark 8750 * <li> {@code isIdentifierIgnorable} returns 8751 * {@code true} for the character 8752 * </ul> 8753 * 8754 * <p><b>Note:</b> This method cannot handle <a 8755 * href="#supplementary"> supplementary characters</a>. To support 8756 * all Unicode characters, including supplementary characters, use 8757 * the {@link #isJavaIdentifierPart(int)} method. 8758 * 8759 * @param ch the character to be tested. 8760 * @return {@code true} if the character may be part of a 8761 * Java identifier; {@code false} otherwise. 8762 * @see Character#isIdentifierIgnorable(char) 8763 * @see Character#isJavaIdentifierStart(char) 8764 * @see Character#isLetterOrDigit(char) 8765 * @see Character#isUnicodeIdentifierPart(char) 8766 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8767 * @since 1.1 8768 */ 8769 public static boolean isJavaIdentifierPart(char ch) { 8770 return isJavaIdentifierPart((int)ch); 8771 } 8772 8773 /** 8774 * Determines if the character (Unicode code point) may be part of a Java 8775 * identifier as other than the first character. 8776 * <p> 8777 * A character may be part of a Java identifier if any of the following 8778 * are true: 8779 * <ul> 8780 * <li> it is a letter 8781 * <li> it is a currency symbol (such as {@code '$'}) 8782 * <li> it is a connecting punctuation character (such as {@code '_'}) 8783 * <li> it is a digit 8784 * <li> it is a numeric letter (such as a Roman numeral character) 8785 * <li> it is a combining mark 8786 * <li> it is a non-spacing mark 8787 * <li> {@link #isIdentifierIgnorable(int) 8788 * isIdentifierIgnorable(codePoint)} returns {@code true} for 8789 * the character 8790 * </ul> 8791 * 8792 * @param codePoint the character (Unicode code point) to be tested. 8793 * @return {@code true} if the character may be part of a 8794 * Java identifier; {@code false} otherwise. 8795 * @see Character#isIdentifierIgnorable(int) 8796 * @see Character#isJavaIdentifierStart(int) 8797 * @see Character#isLetterOrDigit(int) 8798 * @see Character#isUnicodeIdentifierPart(int) 8799 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 8800 * @since 1.5 8801 */ 8802 public static boolean isJavaIdentifierPart(int codePoint) { 8803 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 8804 } 8805 8806 /** 8807 * Determines if the specified character is permissible as the 8808 * first character in a Unicode identifier. 8809 * <p> 8810 * A character may start a Unicode identifier if and only if 8811 * one of the following conditions is true: 8812 * <ul> 8813 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 8814 * <li> {@link #getType(char) getType(ch)} returns 8815 * {@code LETTER_NUMBER}. 8816 * </ul> 8817 * 8818 * <p><b>Note:</b> This method cannot handle <a 8819 * href="#supplementary"> supplementary characters</a>. To support 8820 * all Unicode characters, including supplementary characters, use 8821 * the {@link #isUnicodeIdentifierStart(int)} method. 8822 * 8823 * @param ch the character to be tested. 8824 * @return {@code true} if the character may start a Unicode 8825 * identifier; {@code false} otherwise. 8826 * @see Character#isJavaIdentifierStart(char) 8827 * @see Character#isLetter(char) 8828 * @see Character#isUnicodeIdentifierPart(char) 8829 * @since 1.1 8830 */ 8831 public static boolean isUnicodeIdentifierStart(char ch) { 8832 return isUnicodeIdentifierStart((int)ch); 8833 } 8834 8835 /** 8836 * Determines if the specified character (Unicode code point) is permissible as the 8837 * first character in a Unicode identifier. 8838 * <p> 8839 * A character may start a Unicode identifier if and only if 8840 * one of the following conditions is true: 8841 * <ul> 8842 * <li> {@link #isLetter(int) isLetter(codePoint)} 8843 * returns {@code true} 8844 * <li> {@link #getType(int) getType(codePoint)} 8845 * returns {@code LETTER_NUMBER}. 8846 * </ul> 8847 * @param codePoint the character (Unicode code point) to be tested. 8848 * @return {@code true} if the character may start a Unicode 8849 * identifier; {@code false} otherwise. 8850 * @see Character#isJavaIdentifierStart(int) 8851 * @see Character#isLetter(int) 8852 * @see Character#isUnicodeIdentifierPart(int) 8853 * @since 1.5 8854 */ 8855 public static boolean isUnicodeIdentifierStart(int codePoint) { 8856 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 8857 } 8858 8859 /** 8860 * Determines if the specified character may be part of a Unicode 8861 * identifier as other than the first character. 8862 * <p> 8863 * A character may be part of a Unicode identifier if and only if 8864 * one of the following statements is true: 8865 * <ul> 8866 * <li> it is a letter 8867 * <li> it is a connecting punctuation character (such as {@code '_'}) 8868 * <li> it is a digit 8869 * <li> it is a numeric letter (such as a Roman numeral character) 8870 * <li> it is a combining mark 8871 * <li> it is a non-spacing mark 8872 * <li> {@code isIdentifierIgnorable} returns 8873 * {@code true} for this character. 8874 * </ul> 8875 * 8876 * <p><b>Note:</b> This method cannot handle <a 8877 * href="#supplementary"> supplementary characters</a>. To support 8878 * all Unicode characters, including supplementary characters, use 8879 * the {@link #isUnicodeIdentifierPart(int)} method. 8880 * 8881 * @param ch the character to be tested. 8882 * @return {@code true} if the character may be part of a 8883 * Unicode identifier; {@code false} otherwise. 8884 * @see Character#isIdentifierIgnorable(char) 8885 * @see Character#isJavaIdentifierPart(char) 8886 * @see Character#isLetterOrDigit(char) 8887 * @see Character#isUnicodeIdentifierStart(char) 8888 * @since 1.1 8889 */ 8890 public static boolean isUnicodeIdentifierPart(char ch) { 8891 return isUnicodeIdentifierPart((int)ch); 8892 } 8893 8894 /** 8895 * Determines if the specified character (Unicode code point) may be part of a Unicode 8896 * identifier as other than the first character. 8897 * <p> 8898 * A character may be part of a Unicode identifier if and only if 8899 * one of the following statements is true: 8900 * <ul> 8901 * <li> it is a letter 8902 * <li> it is a connecting punctuation character (such as {@code '_'}) 8903 * <li> it is a digit 8904 * <li> it is a numeric letter (such as a Roman numeral character) 8905 * <li> it is a combining mark 8906 * <li> it is a non-spacing mark 8907 * <li> {@code isIdentifierIgnorable} returns 8908 * {@code true} for this character. 8909 * </ul> 8910 * @param codePoint the character (Unicode code point) to be tested. 8911 * @return {@code true} if the character may be part of a 8912 * Unicode identifier; {@code false} otherwise. 8913 * @see Character#isIdentifierIgnorable(int) 8914 * @see Character#isJavaIdentifierPart(int) 8915 * @see Character#isLetterOrDigit(int) 8916 * @see Character#isUnicodeIdentifierStart(int) 8917 * @since 1.5 8918 */ 8919 public static boolean isUnicodeIdentifierPart(int codePoint) { 8920 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 8921 } 8922 8923 /** 8924 * Determines if the specified character should be regarded as 8925 * an ignorable character in a Java identifier or a Unicode identifier. 8926 * <p> 8927 * The following Unicode characters are ignorable in a Java identifier 8928 * or a Unicode identifier: 8929 * <ul> 8930 * <li>ISO control characters that are not whitespace 8931 * <ul> 8932 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 8933 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 8934 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 8935 * </ul> 8936 * 8937 * <li>all characters that have the {@code FORMAT} general 8938 * category value 8939 * </ul> 8940 * 8941 * <p><b>Note:</b> This method cannot handle <a 8942 * href="#supplementary"> supplementary characters</a>. To support 8943 * all Unicode characters, including supplementary characters, use 8944 * the {@link #isIdentifierIgnorable(int)} method. 8945 * 8946 * @param ch the character to be tested. 8947 * @return {@code true} if the character is an ignorable control 8948 * character that may be part of a Java or Unicode identifier; 8949 * {@code false} otherwise. 8950 * @see Character#isJavaIdentifierPart(char) 8951 * @see Character#isUnicodeIdentifierPart(char) 8952 * @since 1.1 8953 */ 8954 public static boolean isIdentifierIgnorable(char ch) { 8955 return isIdentifierIgnorable((int)ch); 8956 } 8957 8958 /** 8959 * Determines if the specified character (Unicode code point) should be regarded as 8960 * an ignorable character in a Java identifier or a Unicode identifier. 8961 * <p> 8962 * The following Unicode characters are ignorable in a Java identifier 8963 * or a Unicode identifier: 8964 * <ul> 8965 * <li>ISO control characters that are not whitespace 8966 * <ul> 8967 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 8968 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 8969 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 8970 * </ul> 8971 * 8972 * <li>all characters that have the {@code FORMAT} general 8973 * category value 8974 * </ul> 8975 * 8976 * @param codePoint the character (Unicode code point) to be tested. 8977 * @return {@code true} if the character is an ignorable control 8978 * character that may be part of a Java or Unicode identifier; 8979 * {@code false} otherwise. 8980 * @see Character#isJavaIdentifierPart(int) 8981 * @see Character#isUnicodeIdentifierPart(int) 8982 * @since 1.5 8983 */ 8984 public static boolean isIdentifierIgnorable(int codePoint) { 8985 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 8986 } 8987 8988 /** 8989 * Converts the character argument to lowercase using case 8990 * mapping information from the UnicodeData file. 8991 * <p> 8992 * Note that 8993 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 8994 * does not always return {@code true} for some ranges of 8995 * characters, particularly those that are symbols or ideographs. 8996 * 8997 * <p>In general, {@link String#toLowerCase()} should be used to map 8998 * characters to lowercase. {@code String} case mapping methods 8999 * have several benefits over {@code Character} case mapping methods. 9000 * {@code String} case mapping methods can perform locale-sensitive 9001 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9002 * the {@code Character} case mapping methods cannot. 9003 * 9004 * <p><b>Note:</b> This method cannot handle <a 9005 * href="#supplementary"> supplementary characters</a>. To support 9006 * all Unicode characters, including supplementary characters, use 9007 * the {@link #toLowerCase(int)} method. 9008 * 9009 * @param ch the character to be converted. 9010 * @return the lowercase equivalent of the character, if any; 9011 * otherwise, the character itself. 9012 * @see Character#isLowerCase(char) 9013 * @see String#toLowerCase() 9014 */ 9015 public static char toLowerCase(char ch) { 9016 return (char)toLowerCase((int)ch); 9017 } 9018 9019 /** 9020 * Converts the character (Unicode code point) argument to 9021 * lowercase using case mapping information from the UnicodeData 9022 * file. 9023 * 9024 * <p> Note that 9025 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 9026 * does not always return {@code true} for some ranges of 9027 * characters, particularly those that are symbols or ideographs. 9028 * 9029 * <p>In general, {@link String#toLowerCase()} should be used to map 9030 * characters to lowercase. {@code String} case mapping methods 9031 * have several benefits over {@code Character} case mapping methods. 9032 * {@code String} case mapping methods can perform locale-sensitive 9033 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9034 * the {@code Character} case mapping methods cannot. 9035 * 9036 * @param codePoint the character (Unicode code point) to be converted. 9037 * @return the lowercase equivalent of the character (Unicode code 9038 * point), if any; otherwise, the character itself. 9039 * @see Character#isLowerCase(int) 9040 * @see String#toLowerCase() 9041 * 9042 * @since 1.5 9043 */ 9044 public static int toLowerCase(int codePoint) { 9045 return CharacterData.of(codePoint).toLowerCase(codePoint); 9046 } 9047 9048 /** 9049 * Converts the character argument to uppercase using case mapping 9050 * information from the UnicodeData file. 9051 * <p> 9052 * Note that 9053 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 9054 * does not always return {@code true} for some ranges of 9055 * characters, particularly those that are symbols or ideographs. 9056 * 9057 * <p>In general, {@link String#toUpperCase()} should be used to map 9058 * characters to uppercase. {@code String} case mapping methods 9059 * have several benefits over {@code Character} case mapping methods. 9060 * {@code String} case mapping methods can perform locale-sensitive 9061 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9062 * the {@code Character} case mapping methods cannot. 9063 * 9064 * <p><b>Note:</b> This method cannot handle <a 9065 * href="#supplementary"> supplementary characters</a>. To support 9066 * all Unicode characters, including supplementary characters, use 9067 * the {@link #toUpperCase(int)} method. 9068 * 9069 * @param ch the character to be converted. 9070 * @return the uppercase equivalent of the character, if any; 9071 * otherwise, the character itself. 9072 * @see Character#isUpperCase(char) 9073 * @see String#toUpperCase() 9074 */ 9075 public static char toUpperCase(char ch) { 9076 return (char)toUpperCase((int)ch); 9077 } 9078 9079 /** 9080 * Converts the character (Unicode code point) argument to 9081 * uppercase using case mapping information from the UnicodeData 9082 * file. 9083 * 9084 * <p>Note that 9085 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 9086 * does not always return {@code true} for some ranges of 9087 * characters, particularly those that are symbols or ideographs. 9088 * 9089 * <p>In general, {@link String#toUpperCase()} should be used to map 9090 * characters to uppercase. {@code String} case mapping methods 9091 * have several benefits over {@code Character} case mapping methods. 9092 * {@code String} case mapping methods can perform locale-sensitive 9093 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 9094 * the {@code Character} case mapping methods cannot. 9095 * 9096 * @param codePoint the character (Unicode code point) to be converted. 9097 * @return the uppercase equivalent of the character, if any; 9098 * otherwise, the character itself. 9099 * @see Character#isUpperCase(int) 9100 * @see String#toUpperCase() 9101 * 9102 * @since 1.5 9103 */ 9104 public static int toUpperCase(int codePoint) { 9105 return CharacterData.of(codePoint).toUpperCase(codePoint); 9106 } 9107 9108 /** 9109 * Converts the character argument to titlecase using case mapping 9110 * information from the UnicodeData file. If a character has no 9111 * explicit titlecase mapping and is not itself a titlecase char 9112 * according to UnicodeData, then the uppercase mapping is 9113 * returned as an equivalent titlecase mapping. If the 9114 * {@code char} argument is already a titlecase 9115 * {@code char}, the same {@code char} value will be 9116 * returned. 9117 * <p> 9118 * Note that 9119 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 9120 * does not always return {@code true} for some ranges of 9121 * characters. 9122 * 9123 * <p><b>Note:</b> This method cannot handle <a 9124 * href="#supplementary"> supplementary characters</a>. To support 9125 * all Unicode characters, including supplementary characters, use 9126 * the {@link #toTitleCase(int)} method. 9127 * 9128 * @param ch the character to be converted. 9129 * @return the titlecase equivalent of the character, if any; 9130 * otherwise, the character itself. 9131 * @see Character#isTitleCase(char) 9132 * @see Character#toLowerCase(char) 9133 * @see Character#toUpperCase(char) 9134 * @since 1.0.2 9135 */ 9136 public static char toTitleCase(char ch) { 9137 return (char)toTitleCase((int)ch); 9138 } 9139 9140 /** 9141 * Converts the character (Unicode code point) argument to titlecase using case mapping 9142 * information from the UnicodeData file. If a character has no 9143 * explicit titlecase mapping and is not itself a titlecase char 9144 * according to UnicodeData, then the uppercase mapping is 9145 * returned as an equivalent titlecase mapping. If the 9146 * character argument is already a titlecase 9147 * character, the same character value will be 9148 * returned. 9149 * 9150 * <p>Note that 9151 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 9152 * does not always return {@code true} for some ranges of 9153 * characters. 9154 * 9155 * @param codePoint the character (Unicode code point) to be converted. 9156 * @return the titlecase equivalent of the character, if any; 9157 * otherwise, the character itself. 9158 * @see Character#isTitleCase(int) 9159 * @see Character#toLowerCase(int) 9160 * @see Character#toUpperCase(int) 9161 * @since 1.5 9162 */ 9163 public static int toTitleCase(int codePoint) { 9164 return CharacterData.of(codePoint).toTitleCase(codePoint); 9165 } 9166 9167 /** 9168 * Returns the numeric value of the character {@code ch} in the 9169 * specified radix. 9170 * <p> 9171 * If the radix is not in the range {@code MIN_RADIX} ≤ 9172 * {@code radix} ≤ {@code MAX_RADIX} or if the 9173 * value of {@code ch} is not a valid digit in the specified 9174 * radix, {@code -1} is returned. A character is a valid digit 9175 * if at least one of the following is true: 9176 * <ul> 9177 * <li>The method {@code isDigit} is {@code true} of the character 9178 * and the Unicode decimal digit value of the character (or its 9179 * single-character decomposition) is less than the specified radix. 9180 * In this case the decimal digit value is returned. 9181 * <li>The character is one of the uppercase Latin letters 9182 * {@code 'A'} through {@code 'Z'} and its code is less than 9183 * {@code radix + 'A' - 10}. 9184 * In this case, {@code ch - 'A' + 10} 9185 * is returned. 9186 * <li>The character is one of the lowercase Latin letters 9187 * {@code 'a'} through {@code 'z'} and its code is less than 9188 * {@code radix + 'a' - 10}. 9189 * In this case, {@code ch - 'a' + 10} 9190 * is returned. 9191 * <li>The character is one of the fullwidth uppercase Latin letters A 9192 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 9193 * and its code is less than 9194 * {@code radix + '\u005CuFF21' - 10}. 9195 * In this case, {@code ch - '\u005CuFF21' + 10} 9196 * is returned. 9197 * <li>The character is one of the fullwidth lowercase Latin letters a 9198 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 9199 * and its code is less than 9200 * {@code radix + '\u005CuFF41' - 10}. 9201 * In this case, {@code ch - '\u005CuFF41' + 10} 9202 * is returned. 9203 * </ul> 9204 * 9205 * <p><b>Note:</b> This method cannot handle <a 9206 * href="#supplementary"> supplementary characters</a>. To support 9207 * all Unicode characters, including supplementary characters, use 9208 * the {@link #digit(int, int)} method. 9209 * 9210 * @param ch the character to be converted. 9211 * @param radix the radix. 9212 * @return the numeric value represented by the character in the 9213 * specified radix. 9214 * @see Character#forDigit(int, int) 9215 * @see Character#isDigit(char) 9216 */ 9217 public static int digit(char ch, int radix) { 9218 return digit((int)ch, radix); 9219 } 9220 9221 /** 9222 * Returns the numeric value of the specified character (Unicode 9223 * code point) in the specified radix. 9224 * 9225 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 9226 * {@code radix} ≤ {@code MAX_RADIX} or if the 9227 * character is not a valid digit in the specified 9228 * radix, {@code -1} is returned. A character is a valid digit 9229 * if at least one of the following is true: 9230 * <ul> 9231 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 9232 * and the Unicode decimal digit value of the character (or its 9233 * single-character decomposition) is less than the specified radix. 9234 * In this case the decimal digit value is returned. 9235 * <li>The character is one of the uppercase Latin letters 9236 * {@code 'A'} through {@code 'Z'} and its code is less than 9237 * {@code radix + 'A' - 10}. 9238 * In this case, {@code codePoint - 'A' + 10} 9239 * is returned. 9240 * <li>The character is one of the lowercase Latin letters 9241 * {@code 'a'} through {@code 'z'} and its code is less than 9242 * {@code radix + 'a' - 10}. 9243 * In this case, {@code codePoint - 'a' + 10} 9244 * is returned. 9245 * <li>The character is one of the fullwidth uppercase Latin letters A 9246 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 9247 * and its code is less than 9248 * {@code radix + '\u005CuFF21' - 10}. 9249 * In this case, 9250 * {@code codePoint - '\u005CuFF21' + 10} 9251 * is returned. 9252 * <li>The character is one of the fullwidth lowercase Latin letters a 9253 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 9254 * and its code is less than 9255 * {@code radix + '\u005CuFF41'- 10}. 9256 * In this case, 9257 * {@code codePoint - '\u005CuFF41' + 10} 9258 * is returned. 9259 * </ul> 9260 * 9261 * @param codePoint the character (Unicode code point) to be converted. 9262 * @param radix the radix. 9263 * @return the numeric value represented by the character in the 9264 * specified radix. 9265 * @see Character#forDigit(int, int) 9266 * @see Character#isDigit(int) 9267 * @since 1.5 9268 */ 9269 public static int digit(int codePoint, int radix) { 9270 return CharacterData.of(codePoint).digit(codePoint, radix); 9271 } 9272 9273 /** 9274 * Returns the {@code int} value that the specified Unicode 9275 * character represents. For example, the character 9276 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 9277 * an int with a value of 50. 9278 * <p> 9279 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 9280 * {@code '\u005Cu005A'}), lowercase 9281 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 9282 * full width variant ({@code '\u005CuFF21'} through 9283 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 9284 * {@code '\u005CuFF5A'}) forms have numeric values from 10 9285 * through 35. This is independent of the Unicode specification, 9286 * which does not assign numeric values to these {@code char} 9287 * values. 9288 * <p> 9289 * If the character does not have a numeric value, then -1 is returned. 9290 * If the character has a numeric value that cannot be represented as a 9291 * nonnegative integer (for example, a fractional value), then -2 9292 * is returned. 9293 * 9294 * <p><b>Note:</b> This method cannot handle <a 9295 * href="#supplementary"> supplementary characters</a>. To support 9296 * all Unicode characters, including supplementary characters, use 9297 * the {@link #getNumericValue(int)} method. 9298 * 9299 * @param ch the character to be converted. 9300 * @return the numeric value of the character, as a nonnegative {@code int} 9301 * value; -2 if the character has a numeric value but the value 9302 * can not be represented as a nonnegative {@code int} value; 9303 * -1 if the character has no numeric value. 9304 * @see Character#forDigit(int, int) 9305 * @see Character#isDigit(char) 9306 * @since 1.1 9307 */ 9308 public static int getNumericValue(char ch) { 9309 return getNumericValue((int)ch); 9310 } 9311 9312 /** 9313 * Returns the {@code int} value that the specified 9314 * character (Unicode code point) represents. For example, the character 9315 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 9316 * an {@code int} with a value of 50. 9317 * <p> 9318 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 9319 * {@code '\u005Cu005A'}), lowercase 9320 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 9321 * full width variant ({@code '\u005CuFF21'} through 9322 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 9323 * {@code '\u005CuFF5A'}) forms have numeric values from 10 9324 * through 35. This is independent of the Unicode specification, 9325 * which does not assign numeric values to these {@code char} 9326 * values. 9327 * <p> 9328 * If the character does not have a numeric value, then -1 is returned. 9329 * If the character has a numeric value that cannot be represented as a 9330 * nonnegative integer (for example, a fractional value), then -2 9331 * is returned. 9332 * 9333 * @param codePoint the character (Unicode code point) to be converted. 9334 * @return the numeric value of the character, as a nonnegative {@code int} 9335 * value; -2 if the character has a numeric value but the value 9336 * can not be represented as a nonnegative {@code int} value; 9337 * -1 if the character has no numeric value. 9338 * @see Character#forDigit(int, int) 9339 * @see Character#isDigit(int) 9340 * @since 1.5 9341 */ 9342 public static int getNumericValue(int codePoint) { 9343 return CharacterData.of(codePoint).getNumericValue(codePoint); 9344 } 9345 9346 /** 9347 * Determines if the specified character is ISO-LATIN-1 white space. 9348 * This method returns {@code true} for the following five 9349 * characters only: 9350 * <table summary="truechars"> 9351 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 9352 * <td>{@code HORIZONTAL TABULATION}</td></tr> 9353 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 9354 * <td>{@code NEW LINE}</td></tr> 9355 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 9356 * <td>{@code FORM FEED}</td></tr> 9357 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 9358 * <td>{@code CARRIAGE RETURN}</td></tr> 9359 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 9360 * <td>{@code SPACE}</td></tr> 9361 * </table> 9362 * 9363 * @param ch the character to be tested. 9364 * @return {@code true} if the character is ISO-LATIN-1 white 9365 * space; {@code false} otherwise. 9366 * @see Character#isSpaceChar(char) 9367 * @see Character#isWhitespace(char) 9368 * @deprecated Replaced by isWhitespace(char). 9369 */ 9370 @Deprecated 9371 public static boolean isSpace(char ch) { 9372 return (ch <= 0x0020) && 9373 (((((1L << 0x0009) | 9374 (1L << 0x000A) | 9375 (1L << 0x000C) | 9376 (1L << 0x000D) | 9377 (1L << 0x0020)) >> ch) & 1L) != 0); 9378 } 9379 9380 9381 /** 9382 * Determines if the specified character is a Unicode space character. 9383 * A character is considered to be a space character if and only if 9384 * it is specified to be a space character by the Unicode Standard. This 9385 * method returns true if the character's general category type is any of 9386 * the following: 9387 * <ul> 9388 * <li> {@code SPACE_SEPARATOR} 9389 * <li> {@code LINE_SEPARATOR} 9390 * <li> {@code PARAGRAPH_SEPARATOR} 9391 * </ul> 9392 * 9393 * <p><b>Note:</b> This method cannot handle <a 9394 * href="#supplementary"> supplementary characters</a>. To support 9395 * all Unicode characters, including supplementary characters, use 9396 * the {@link #isSpaceChar(int)} method. 9397 * 9398 * @param ch the character to be tested. 9399 * @return {@code true} if the character is a space character; 9400 * {@code false} otherwise. 9401 * @see Character#isWhitespace(char) 9402 * @since 1.1 9403 */ 9404 public static boolean isSpaceChar(char ch) { 9405 return isSpaceChar((int)ch); 9406 } 9407 9408 /** 9409 * Determines if the specified character (Unicode code point) is a 9410 * Unicode space character. A character is considered to be a 9411 * space character if and only if it is specified to be a space 9412 * character by the Unicode Standard. This method returns true if 9413 * the character's general category type is any of the following: 9414 * 9415 * <ul> 9416 * <li> {@link #SPACE_SEPARATOR} 9417 * <li> {@link #LINE_SEPARATOR} 9418 * <li> {@link #PARAGRAPH_SEPARATOR} 9419 * </ul> 9420 * 9421 * @param codePoint the character (Unicode code point) to be tested. 9422 * @return {@code true} if the character is a space character; 9423 * {@code false} otherwise. 9424 * @see Character#isWhitespace(int) 9425 * @since 1.5 9426 */ 9427 public static boolean isSpaceChar(int codePoint) { 9428 return ((((1 << Character.SPACE_SEPARATOR) | 9429 (1 << Character.LINE_SEPARATOR) | 9430 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 9431 != 0; 9432 } 9433 9434 /** 9435 * Determines if the specified character is white space according to Java. 9436 * A character is a Java whitespace character if and only if it satisfies 9437 * one of the following criteria: 9438 * <ul> 9439 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 9440 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 9441 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 9442 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 9443 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 9444 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 9445 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 9446 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 9447 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 9448 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 9449 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 9450 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 9451 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 9452 * </ul> 9453 * 9454 * <p><b>Note:</b> This method cannot handle <a 9455 * href="#supplementary"> supplementary characters</a>. To support 9456 * all Unicode characters, including supplementary characters, use 9457 * the {@link #isWhitespace(int)} method. 9458 * 9459 * @param ch the character to be tested. 9460 * @return {@code true} if the character is a Java whitespace 9461 * character; {@code false} otherwise. 9462 * @see Character#isSpaceChar(char) 9463 * @since 1.1 9464 */ 9465 public static boolean isWhitespace(char ch) { 9466 return isWhitespace((int)ch); 9467 } 9468 9469 /** 9470 * Determines if the specified character (Unicode code point) is 9471 * white space according to Java. A character is a Java 9472 * whitespace character if and only if it satisfies one of the 9473 * following criteria: 9474 * <ul> 9475 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 9476 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 9477 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 9478 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 9479 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 9480 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 9481 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 9482 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 9483 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 9484 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 9485 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 9486 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 9487 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 9488 * </ul> 9489 * 9490 * @param codePoint the character (Unicode code point) to be tested. 9491 * @return {@code true} if the character is a Java whitespace 9492 * character; {@code false} otherwise. 9493 * @see Character#isSpaceChar(int) 9494 * @since 1.5 9495 */ 9496 public static boolean isWhitespace(int codePoint) { 9497 return CharacterData.of(codePoint).isWhitespace(codePoint); 9498 } 9499 9500 /** 9501 * Determines if the specified character is an ISO control 9502 * character. A character is considered to be an ISO control 9503 * character if its code is in the range {@code '\u005Cu0000'} 9504 * through {@code '\u005Cu001F'} or in the range 9505 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 9506 * 9507 * <p><b>Note:</b> This method cannot handle <a 9508 * href="#supplementary"> supplementary characters</a>. To support 9509 * all Unicode characters, including supplementary characters, use 9510 * the {@link #isISOControl(int)} method. 9511 * 9512 * @param ch the character to be tested. 9513 * @return {@code true} if the character is an ISO control character; 9514 * {@code false} otherwise. 9515 * 9516 * @see Character#isSpaceChar(char) 9517 * @see Character#isWhitespace(char) 9518 * @since 1.1 9519 */ 9520 public static boolean isISOControl(char ch) { 9521 return isISOControl((int)ch); 9522 } 9523 9524 /** 9525 * Determines if the referenced character (Unicode code point) is an ISO control 9526 * character. A character is considered to be an ISO control 9527 * character if its code is in the range {@code '\u005Cu0000'} 9528 * through {@code '\u005Cu001F'} or in the range 9529 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 9530 * 9531 * @param codePoint the character (Unicode code point) to be tested. 9532 * @return {@code true} if the character is an ISO control character; 9533 * {@code false} otherwise. 9534 * @see Character#isSpaceChar(int) 9535 * @see Character#isWhitespace(int) 9536 * @since 1.5 9537 */ 9538 public static boolean isISOControl(int codePoint) { 9539 // Optimized form of: 9540 // (codePoint >= 0x00 && codePoint <= 0x1F) || 9541 // (codePoint >= 0x7F && codePoint <= 0x9F); 9542 return codePoint <= 0x9F && 9543 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 9544 } 9545 9546 /** 9547 * Returns a value indicating a character's general category. 9548 * 9549 * <p><b>Note:</b> This method cannot handle <a 9550 * href="#supplementary"> supplementary characters</a>. To support 9551 * all Unicode characters, including supplementary characters, use 9552 * the {@link #getType(int)} method. 9553 * 9554 * @param ch the character to be tested. 9555 * @return a value of type {@code int} representing the 9556 * character's general category. 9557 * @see Character#COMBINING_SPACING_MARK 9558 * @see Character#CONNECTOR_PUNCTUATION 9559 * @see Character#CONTROL 9560 * @see Character#CURRENCY_SYMBOL 9561 * @see Character#DASH_PUNCTUATION 9562 * @see Character#DECIMAL_DIGIT_NUMBER 9563 * @see Character#ENCLOSING_MARK 9564 * @see Character#END_PUNCTUATION 9565 * @see Character#FINAL_QUOTE_PUNCTUATION 9566 * @see Character#FORMAT 9567 * @see Character#INITIAL_QUOTE_PUNCTUATION 9568 * @see Character#LETTER_NUMBER 9569 * @see Character#LINE_SEPARATOR 9570 * @see Character#LOWERCASE_LETTER 9571 * @see Character#MATH_SYMBOL 9572 * @see Character#MODIFIER_LETTER 9573 * @see Character#MODIFIER_SYMBOL 9574 * @see Character#NON_SPACING_MARK 9575 * @see Character#OTHER_LETTER 9576 * @see Character#OTHER_NUMBER 9577 * @see Character#OTHER_PUNCTUATION 9578 * @see Character#OTHER_SYMBOL 9579 * @see Character#PARAGRAPH_SEPARATOR 9580 * @see Character#PRIVATE_USE 9581 * @see Character#SPACE_SEPARATOR 9582 * @see Character#START_PUNCTUATION 9583 * @see Character#SURROGATE 9584 * @see Character#TITLECASE_LETTER 9585 * @see Character#UNASSIGNED 9586 * @see Character#UPPERCASE_LETTER 9587 * @since 1.1 9588 */ 9589 public static int getType(char ch) { 9590 return getType((int)ch); 9591 } 9592 9593 /** 9594 * Returns a value indicating a character's general category. 9595 * 9596 * @param codePoint the character (Unicode code point) to be tested. 9597 * @return a value of type {@code int} representing the 9598 * character's general category. 9599 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 9600 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 9601 * @see Character#CONTROL CONTROL 9602 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 9603 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 9604 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 9605 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 9606 * @see Character#END_PUNCTUATION END_PUNCTUATION 9607 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 9608 * @see Character#FORMAT FORMAT 9609 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 9610 * @see Character#LETTER_NUMBER LETTER_NUMBER 9611 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 9612 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 9613 * @see Character#MATH_SYMBOL MATH_SYMBOL 9614 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 9615 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 9616 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 9617 * @see Character#OTHER_LETTER OTHER_LETTER 9618 * @see Character#OTHER_NUMBER OTHER_NUMBER 9619 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 9620 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 9621 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 9622 * @see Character#PRIVATE_USE PRIVATE_USE 9623 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 9624 * @see Character#START_PUNCTUATION START_PUNCTUATION 9625 * @see Character#SURROGATE SURROGATE 9626 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 9627 * @see Character#UNASSIGNED UNASSIGNED 9628 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 9629 * @since 1.5 9630 */ 9631 public static int getType(int codePoint) { 9632 return CharacterData.of(codePoint).getType(codePoint); 9633 } 9634 9635 /** 9636 * Determines the character representation for a specific digit in 9637 * the specified radix. If the value of {@code radix} is not a 9638 * valid radix, or the value of {@code digit} is not a valid 9639 * digit in the specified radix, the null character 9640 * ({@code '\u005Cu0000'}) is returned. 9641 * <p> 9642 * The {@code radix} argument is valid if it is greater than or 9643 * equal to {@code MIN_RADIX} and less than or equal to 9644 * {@code MAX_RADIX}. The {@code digit} argument is valid if 9645 * {@code 0 <= digit < radix}. 9646 * <p> 9647 * If the digit is less than 10, then 9648 * {@code '0' + digit} is returned. Otherwise, the value 9649 * {@code 'a' + digit - 10} is returned. 9650 * 9651 * @param digit the number to convert to a character. 9652 * @param radix the radix. 9653 * @return the {@code char} representation of the specified digit 9654 * in the specified radix. 9655 * @see Character#MIN_RADIX 9656 * @see Character#MAX_RADIX 9657 * @see Character#digit(char, int) 9658 */ 9659 public static char forDigit(int digit, int radix) { 9660 if ((digit >= radix) || (digit < 0)) { 9661 return '\0'; 9662 } 9663 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 9664 return '\0'; 9665 } 9666 if (digit < 10) { 9667 return (char)('0' + digit); 9668 } 9669 return (char)('a' - 10 + digit); 9670 } 9671 9672 /** 9673 * Returns the Unicode directionality property for the given 9674 * character. Character directionality is used to calculate the 9675 * visual ordering of text. The directionality value of undefined 9676 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 9677 * 9678 * <p><b>Note:</b> This method cannot handle <a 9679 * href="#supplementary"> supplementary characters</a>. To support 9680 * all Unicode characters, including supplementary characters, use 9681 * the {@link #getDirectionality(int)} method. 9682 * 9683 * @param ch {@code char} for which the directionality property 9684 * is requested. 9685 * @return the directionality property of the {@code char} value. 9686 * 9687 * @see Character#DIRECTIONALITY_UNDEFINED 9688 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 9689 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 9690 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 9691 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 9692 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 9693 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 9694 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 9695 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 9696 * @see Character#DIRECTIONALITY_NONSPACING_MARK 9697 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 9698 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 9699 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 9700 * @see Character#DIRECTIONALITY_WHITESPACE 9701 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 9702 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 9703 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 9704 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 9705 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 9706 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 9707 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 9708 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 9709 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 9710 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 9711 * @since 1.4 9712 */ 9713 public static byte getDirectionality(char ch) { 9714 return getDirectionality((int)ch); 9715 } 9716 9717 /** 9718 * Returns the Unicode directionality property for the given 9719 * character (Unicode code point). Character directionality is 9720 * used to calculate the visual ordering of text. The 9721 * directionality value of undefined character is {@link 9722 * #DIRECTIONALITY_UNDEFINED}. 9723 * 9724 * @param codePoint the character (Unicode code point) for which 9725 * the directionality property is requested. 9726 * @return the directionality property of the character. 9727 * 9728 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 9729 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 9730 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 9731 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 9732 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 9733 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 9734 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 9735 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 9736 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 9737 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 9738 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 9739 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 9740 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 9741 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 9742 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 9743 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 9744 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 9745 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 9746 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 9747 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 9748 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 9749 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 9750 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 9751 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 9752 * @since 1.5 9753 */ 9754 public static byte getDirectionality(int codePoint) { 9755 return CharacterData.of(codePoint).getDirectionality(codePoint); 9756 } 9757 9758 /** 9759 * Determines whether the character is mirrored according to the 9760 * Unicode specification. Mirrored characters should have their 9761 * glyphs horizontally mirrored when displayed in text that is 9762 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 9763 * PARENTHESIS is semantically defined to be an <i>opening 9764 * parenthesis</i>. This will appear as a "(" in text that is 9765 * left-to-right but as a ")" in text that is right-to-left. 9766 * 9767 * <p><b>Note:</b> This method cannot handle <a 9768 * href="#supplementary"> supplementary characters</a>. To support 9769 * all Unicode characters, including supplementary characters, use 9770 * the {@link #isMirrored(int)} method. 9771 * 9772 * @param ch {@code char} for which the mirrored property is requested 9773 * @return {@code true} if the char is mirrored, {@code false} 9774 * if the {@code char} is not mirrored or is not defined. 9775 * @since 1.4 9776 */ 9777 public static boolean isMirrored(char ch) { 9778 return isMirrored((int)ch); 9779 } 9780 9781 /** 9782 * Determines whether the specified character (Unicode code point) 9783 * is mirrored according to the Unicode specification. Mirrored 9784 * characters should have their glyphs horizontally mirrored when 9785 * displayed in text that is right-to-left. For example, 9786 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 9787 * defined to be an <i>opening parenthesis</i>. This will appear 9788 * as a "(" in text that is left-to-right but as a ")" in text 9789 * that is right-to-left. 9790 * 9791 * @param codePoint the character (Unicode code point) to be tested. 9792 * @return {@code true} if the character is mirrored, {@code false} 9793 * if the character is not mirrored or is not defined. 9794 * @since 1.5 9795 */ 9796 public static boolean isMirrored(int codePoint) { 9797 return CharacterData.of(codePoint).isMirrored(codePoint); 9798 } 9799 9800 /** 9801 * Compares two {@code Character} objects numerically. 9802 * 9803 * @param anotherCharacter the {@code Character} to be compared. 9804 9805 * @return the value {@code 0} if the argument {@code Character} 9806 * is equal to this {@code Character}; a value less than 9807 * {@code 0} if this {@code Character} is numerically less 9808 * than the {@code Character} argument; and a value greater than 9809 * {@code 0} if this {@code Character} is numerically greater 9810 * than the {@code Character} argument (unsigned comparison). 9811 * Note that this is strictly a numerical comparison; it is not 9812 * locale-dependent. 9813 * @since 1.2 9814 */ 9815 public int compareTo(Character anotherCharacter) { 9816 return compare(this.value, anotherCharacter.value); 9817 } 9818 9819 /** 9820 * Compares two {@code char} values numerically. 9821 * The value returned is identical to what would be returned by: 9822 * <pre> 9823 * Character.valueOf(x).compareTo(Character.valueOf(y)) 9824 * </pre> 9825 * 9826 * @param x the first {@code char} to compare 9827 * @param y the second {@code char} to compare 9828 * @return the value {@code 0} if {@code x == y}; 9829 * a value less than {@code 0} if {@code x < y}; and 9830 * a value greater than {@code 0} if {@code x > y} 9831 * @since 1.7 9832 */ 9833 public static int compare(char x, char y) { 9834 return x - y; 9835 } 9836 9837 /** 9838 * Converts the character (Unicode code point) argument to uppercase using 9839 * information from the UnicodeData file. 9840 * 9841 * @param codePoint the character (Unicode code point) to be converted. 9842 * @return either the uppercase equivalent of the character, if 9843 * any, or an error flag ({@code Character.ERROR}) 9844 * that indicates that a 1:M {@code char} mapping exists. 9845 * @see Character#isLowerCase(char) 9846 * @see Character#isUpperCase(char) 9847 * @see Character#toLowerCase(char) 9848 * @see Character#toTitleCase(char) 9849 * @since 1.4 9850 */ 9851 static int toUpperCaseEx(int codePoint) { 9852 assert isValidCodePoint(codePoint); 9853 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 9854 } 9855 9856 /** 9857 * Converts the character (Unicode code point) argument to uppercase using case 9858 * mapping information from the SpecialCasing file in the Unicode 9859 * specification. If a character has no explicit uppercase 9860 * mapping, then the {@code char} itself is returned in the 9861 * {@code char[]}. 9862 * 9863 * @param codePoint the character (Unicode code point) to be converted. 9864 * @return a {@code char[]} with the uppercased character. 9865 * @since 1.4 9866 */ 9867 static char[] toUpperCaseCharArray(int codePoint) { 9868 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 9869 assert isBmpCodePoint(codePoint); 9870 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 9871 } 9872 9873 /** 9874 * The number of bits used to represent a {@code char} value in unsigned 9875 * binary form, constant {@code 16}. 9876 * 9877 * @since 1.5 9878 */ 9879 public static final int SIZE = 16; 9880 9881 /** 9882 * The number of bytes used to represent a {@code char} value in unsigned 9883 * binary form. 9884 * 9885 * @since 1.8 9886 */ 9887 public static final int BYTES = SIZE / Byte.SIZE; 9888 9889 /** 9890 * Returns the value obtained by reversing the order of the bytes in the 9891 * specified {@code char} value. 9892 * 9893 * @param ch The {@code char} of which to reverse the byte order. 9894 * @return the value obtained by reversing (or, equivalently, swapping) 9895 * the bytes in the specified {@code char} value. 9896 * @since 1.5 9897 */ 9898 @HotSpotIntrinsicCandidate 9899 public static char reverseBytes(char ch) { 9900 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 9901 } 9902 9903 /** 9904 * Returns the Unicode name of the specified character 9905 * {@code codePoint}, or null if the code point is 9906 * {@link #UNASSIGNED unassigned}. 9907 * <p> 9908 * Note: if the specified character is not assigned a name by 9909 * the <i>UnicodeData</i> file (part of the Unicode Character 9910 * Database maintained by the Unicode Consortium), the returned 9911 * name is the same as the result of expression. 9912 * 9913 * <blockquote>{@code 9914 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 9915 * + " " 9916 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 9917 * 9918 * }</blockquote> 9919 * 9920 * @param codePoint the character (Unicode code point) 9921 * 9922 * @return the Unicode name of the specified character, or null if 9923 * the code point is unassigned. 9924 * 9925 * @exception IllegalArgumentException if the specified 9926 * {@code codePoint} is not a valid Unicode 9927 * code point. 9928 * 9929 * @since 1.7 9930 */ 9931 public static String getName(int codePoint) { 9932 if (!isValidCodePoint(codePoint)) { 9933 throw new IllegalArgumentException(); 9934 } 9935 String name = CharacterName.get(codePoint); 9936 if (name != null) 9937 return name; 9938 if (getType(codePoint) == UNASSIGNED) 9939 return null; 9940 UnicodeBlock block = UnicodeBlock.of(codePoint); 9941 if (block != null) 9942 return block.toString().replace('_', ' ') + " " 9943 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 9944 // should never come here 9945 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 9946 } 9947 }