1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 import jdk.internal.HotSpotIntrinsicCandidate; 34 35 /** 36 * The {@code Character} class wraps a value of the primitive 37 * type {@code char} in an object. An object of type 38 * {@code Character} contains a single field whose type is 39 * {@code char}. 40 * <p> 41 * In addition, this class provides several methods for determining 42 * a character's category (lowercase letter, digit, etc.) and for converting 43 * characters from uppercase to lowercase and vice versa. 44 * <p> 45 * Character information is based on the Unicode Standard, version 6.2.0. 46 * <p> 47 * The methods and data of class {@code Character} are defined by 48 * the information in the <i>UnicodeData</i> file that is part of the 49 * Unicode Character Database maintained by the Unicode 50 * Consortium. This file specifies various properties including name 51 * and general category for every defined Unicode code point or 52 * character range. 53 * <p> 54 * The file and its description are available from the Unicode Consortium at: 55 * <ul> 56 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 57 * </ul> 58 * 59 * <h3><a name="unicode">Unicode Character Representations</a></h3> 60 * 61 * <p>The {@code char} data type (and therefore the value that a 62 * {@code Character} object encapsulates) are based on the 63 * original Unicode specification, which defined characters as 64 * fixed-width 16-bit entities. The Unicode Standard has since been 65 * changed to allow for characters whose representation requires more 66 * than 16 bits. The range of legal <em>code point</em>s is now 67 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 68 * (Refer to the <a 69 * href="http://www.unicode.org/reports/tr27/#notation"><i> 70 * definition</i></a> of the U+<i>n</i> notation in the Unicode 71 * Standard.) 72 * 73 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is 74 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 75 * <a name="supplementary">Characters</a> whose code points are greater 76 * than U+FFFF are called <em>supplementary character</em>s. The Java 77 * platform uses the UTF-16 representation in {@code char} arrays and 78 * in the {@code String} and {@code StringBuffer} classes. In 79 * this representation, supplementary characters are represented as a pair 80 * of {@code char} values, the first from the <em>high-surrogates</em> 81 * range, (\uD800-\uDBFF), the second from the 82 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 83 * 84 * <p>A {@code char} value, therefore, represents Basic 85 * Multilingual Plane (BMP) code points, including the surrogate 86 * code points, or code units of the UTF-16 encoding. An 87 * {@code int} value represents all Unicode code points, 88 * including supplementary code points. The lower (least significant) 89 * 21 bits of {@code int} are used to represent Unicode code 90 * points and the upper (most significant) 11 bits must be zero. 91 * Unless otherwise specified, the behavior with respect to 92 * supplementary characters and surrogate {@code char} values is 93 * as follows: 94 * 95 * <ul> 96 * <li>The methods that only accept a {@code char} value cannot support 97 * supplementary characters. They treat {@code char} values from the 98 * surrogate ranges as undefined characters. For example, 99 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 100 * this specific value if followed by any low-surrogate value in a string 101 * would represent a letter. 102 * 103 * <li>The methods that accept an {@code int} value support all 104 * Unicode characters, including supplementary characters. For 105 * example, {@code Character.isLetter(0x2F81A)} returns 106 * {@code true} because the code point value represents a letter 107 * (a CJK ideograph). 108 * </ul> 109 * 110 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 111 * used for character values in the range between U+0000 and U+10FFFF, 112 * and <em>Unicode code unit</em> is used for 16-bit 113 * {@code char} values that are code units of the <em>UTF-16</em> 114 * encoding. For more information on Unicode terminology, refer to the 115 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 116 * 117 * @author Lee Boynton 118 * @author Guy Steele 119 * @author Akira Tanaka 120 * @author Martin Buchholz 121 * @author Ulf Zibis 122 * @since 1.0 123 */ 124 public final 125 class Character implements java.io.Serializable, Comparable<Character> { 126 /** 127 * The minimum radix available for conversion to and from strings. 128 * The constant value of this field is the smallest value permitted 129 * for the radix argument in radix-conversion methods such as the 130 * {@code digit} method, the {@code forDigit} method, and the 131 * {@code toString} method of class {@code Integer}. 132 * 133 * @see Character#digit(char, int) 134 * @see Character#forDigit(int, int) 135 * @see Integer#toString(int, int) 136 * @see Integer#valueOf(String) 137 */ 138 public static final int MIN_RADIX = 2; 139 140 /** 141 * The maximum radix available for conversion to and from strings. 142 * The constant value of this field is the largest value permitted 143 * for the radix argument in radix-conversion methods such as the 144 * {@code digit} method, the {@code forDigit} method, and the 145 * {@code toString} method of class {@code Integer}. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * {@code char}, {@code '\u005Cu0000'}. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * {@code char}, {@code '\u005CuFFFF'}. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The {@code Class} instance representing the primitive type 172 * {@code char}. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined {@code char} 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * The minimum value of a 496 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 497 * Unicode high-surrogate code unit</a> 498 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 499 * A high-surrogate is also known as a <i>leading-surrogate</i>. 500 * 501 * @since 1.5 502 */ 503 public static final char MIN_HIGH_SURROGATE = '\uD800'; 504 505 /** 506 * The maximum value of a 507 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 508 * Unicode high-surrogate code unit</a> 509 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 510 * A high-surrogate is also known as a <i>leading-surrogate</i>. 511 * 512 * @since 1.5 513 */ 514 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 515 516 /** 517 * The minimum value of a 518 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 519 * Unicode low-surrogate code unit</a> 520 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 521 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 522 * 523 * @since 1.5 524 */ 525 public static final char MIN_LOW_SURROGATE = '\uDC00'; 526 527 /** 528 * The maximum value of a 529 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 530 * Unicode low-surrogate code unit</a> 531 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 532 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 533 * 534 * @since 1.5 535 */ 536 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 537 538 /** 539 * The minimum value of a Unicode surrogate code unit in the 540 * UTF-16 encoding, constant {@code '\u005CuD800'}. 541 * 542 * @since 1.5 543 */ 544 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 545 546 /** 547 * The maximum value of a Unicode surrogate code unit in the 548 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 549 * 550 * @since 1.5 551 */ 552 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 553 554 /** 555 * The minimum value of a 556 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 557 * Unicode supplementary code point</a>, constant {@code U+10000}. 558 * 559 * @since 1.5 560 */ 561 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 562 563 /** 564 * The minimum value of a 565 * <a href="http://www.unicode.org/glossary/#code_point"> 566 * Unicode code point</a>, constant {@code U+0000}. 567 * 568 * @since 1.5 569 */ 570 public static final int MIN_CODE_POINT = 0x000000; 571 572 /** 573 * The maximum value of a 574 * <a href="http://www.unicode.org/glossary/#code_point"> 575 * Unicode code point</a>, constant {@code U+10FFFF}. 576 * 577 * @since 1.5 578 */ 579 public static final int MAX_CODE_POINT = 0X10FFFF; 580 581 582 /** 583 * Instances of this class represent particular subsets of the Unicode 584 * character set. The only family of subsets defined in the 585 * {@code Character} class is {@link Character.UnicodeBlock}. 586 * Other portions of the Java API may define other subsets for their 587 * own purposes. 588 * 589 * @since 1.2 590 */ 591 public static class Subset { 592 593 private String name; 594 595 /** 596 * Constructs a new {@code Subset} instance. 597 * 598 * @param name The name of this subset 599 * @exception NullPointerException if name is {@code null} 600 */ 601 protected Subset(String name) { 602 if (name == null) { 603 throw new NullPointerException("name"); 604 } 605 this.name = name; 606 } 607 608 /** 609 * Compares two {@code Subset} objects for equality. 610 * This method returns {@code true} if and only if 611 * {@code this} and the argument refer to the same 612 * object; since this method is {@code final}, this 613 * guarantee holds for all subclasses. 614 */ 615 public final boolean equals(Object obj) { 616 return (this == obj); 617 } 618 619 /** 620 * Returns the standard hash code as defined by the 621 * {@link Object#hashCode} method. This method 622 * is {@code final} in order to ensure that the 623 * {@code equals} and {@code hashCode} methods will 624 * be consistent in all subclasses. 625 */ 626 public final int hashCode() { 627 return super.hashCode(); 628 } 629 630 /** 631 * Returns the name of this subset. 632 */ 633 public final String toString() { 634 return name; 635 } 636 } 637 638 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 639 // for the latest specification of Unicode Blocks. 640 641 /** 642 * A family of character subsets representing the character blocks in the 643 * Unicode specification. Character blocks generally define characters 644 * used for a specific script or purpose. A character is contained by 645 * at most one Unicode block. 646 * 647 * @since 1.2 648 */ 649 public static final class UnicodeBlock extends Subset { 650 /** 651 * 510 - the expected number of entities 652 * 0.75 - the default load factor of HashMap 653 */ 654 private static Map<String, UnicodeBlock> map = 655 new HashMap<>((int)(510 / 0.75f + 1.0f)); 656 657 /** 658 * Creates a UnicodeBlock with the given identifier name. 659 * This name must be the same as the block identifier. 660 */ 661 private UnicodeBlock(String idName) { 662 super(idName); 663 map.put(idName, this); 664 } 665 666 /** 667 * Creates a UnicodeBlock with the given identifier name and 668 * alias name. 669 */ 670 private UnicodeBlock(String idName, String alias) { 671 this(idName); 672 map.put(alias, this); 673 } 674 675 /** 676 * Creates a UnicodeBlock with the given identifier name and 677 * alias names. 678 */ 679 private UnicodeBlock(String idName, String... aliases) { 680 this(idName); 681 for (String alias : aliases) 682 map.put(alias, this); 683 } 684 685 /** 686 * Constant for the "Basic Latin" Unicode character block. 687 * @since 1.2 688 */ 689 public static final UnicodeBlock BASIC_LATIN = 690 new UnicodeBlock("BASIC_LATIN", 691 "BASIC LATIN", 692 "BASICLATIN"); 693 694 /** 695 * Constant for the "Latin-1 Supplement" Unicode character block. 696 * @since 1.2 697 */ 698 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 699 new UnicodeBlock("LATIN_1_SUPPLEMENT", 700 "LATIN-1 SUPPLEMENT", 701 "LATIN-1SUPPLEMENT"); 702 703 /** 704 * Constant for the "Latin Extended-A" Unicode character block. 705 * @since 1.2 706 */ 707 public static final UnicodeBlock LATIN_EXTENDED_A = 708 new UnicodeBlock("LATIN_EXTENDED_A", 709 "LATIN EXTENDED-A", 710 "LATINEXTENDED-A"); 711 712 /** 713 * Constant for the "Latin Extended-B" Unicode character block. 714 * @since 1.2 715 */ 716 public static final UnicodeBlock LATIN_EXTENDED_B = 717 new UnicodeBlock("LATIN_EXTENDED_B", 718 "LATIN EXTENDED-B", 719 "LATINEXTENDED-B"); 720 721 /** 722 * Constant for the "IPA Extensions" Unicode character block. 723 * @since 1.2 724 */ 725 public static final UnicodeBlock IPA_EXTENSIONS = 726 new UnicodeBlock("IPA_EXTENSIONS", 727 "IPA EXTENSIONS", 728 "IPAEXTENSIONS"); 729 730 /** 731 * Constant for the "Spacing Modifier Letters" Unicode character block. 732 * @since 1.2 733 */ 734 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 735 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 736 "SPACING MODIFIER LETTERS", 737 "SPACINGMODIFIERLETTERS"); 738 739 /** 740 * Constant for the "Combining Diacritical Marks" Unicode character block. 741 * @since 1.2 742 */ 743 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 744 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 745 "COMBINING DIACRITICAL MARKS", 746 "COMBININGDIACRITICALMARKS"); 747 748 /** 749 * Constant for the "Greek and Coptic" Unicode character block. 750 * <p> 751 * This block was previously known as the "Greek" block. 752 * 753 * @since 1.2 754 */ 755 public static final UnicodeBlock GREEK = 756 new UnicodeBlock("GREEK", 757 "GREEK AND COPTIC", 758 "GREEKANDCOPTIC"); 759 760 /** 761 * Constant for the "Cyrillic" Unicode character block. 762 * @since 1.2 763 */ 764 public static final UnicodeBlock CYRILLIC = 765 new UnicodeBlock("CYRILLIC"); 766 767 /** 768 * Constant for the "Armenian" Unicode character block. 769 * @since 1.2 770 */ 771 public static final UnicodeBlock ARMENIAN = 772 new UnicodeBlock("ARMENIAN"); 773 774 /** 775 * Constant for the "Hebrew" Unicode character block. 776 * @since 1.2 777 */ 778 public static final UnicodeBlock HEBREW = 779 new UnicodeBlock("HEBREW"); 780 781 /** 782 * Constant for the "Arabic" Unicode character block. 783 * @since 1.2 784 */ 785 public static final UnicodeBlock ARABIC = 786 new UnicodeBlock("ARABIC"); 787 788 /** 789 * Constant for the "Devanagari" Unicode character block. 790 * @since 1.2 791 */ 792 public static final UnicodeBlock DEVANAGARI = 793 new UnicodeBlock("DEVANAGARI"); 794 795 /** 796 * Constant for the "Bengali" Unicode character block. 797 * @since 1.2 798 */ 799 public static final UnicodeBlock BENGALI = 800 new UnicodeBlock("BENGALI"); 801 802 /** 803 * Constant for the "Gurmukhi" Unicode character block. 804 * @since 1.2 805 */ 806 public static final UnicodeBlock GURMUKHI = 807 new UnicodeBlock("GURMUKHI"); 808 809 /** 810 * Constant for the "Gujarati" Unicode character block. 811 * @since 1.2 812 */ 813 public static final UnicodeBlock GUJARATI = 814 new UnicodeBlock("GUJARATI"); 815 816 /** 817 * Constant for the "Oriya" Unicode character block. 818 * @since 1.2 819 */ 820 public static final UnicodeBlock ORIYA = 821 new UnicodeBlock("ORIYA"); 822 823 /** 824 * Constant for the "Tamil" Unicode character block. 825 * @since 1.2 826 */ 827 public static final UnicodeBlock TAMIL = 828 new UnicodeBlock("TAMIL"); 829 830 /** 831 * Constant for the "Telugu" Unicode character block. 832 * @since 1.2 833 */ 834 public static final UnicodeBlock TELUGU = 835 new UnicodeBlock("TELUGU"); 836 837 /** 838 * Constant for the "Kannada" Unicode character block. 839 * @since 1.2 840 */ 841 public static final UnicodeBlock KANNADA = 842 new UnicodeBlock("KANNADA"); 843 844 /** 845 * Constant for the "Malayalam" Unicode character block. 846 * @since 1.2 847 */ 848 public static final UnicodeBlock MALAYALAM = 849 new UnicodeBlock("MALAYALAM"); 850 851 /** 852 * Constant for the "Thai" Unicode character block. 853 * @since 1.2 854 */ 855 public static final UnicodeBlock THAI = 856 new UnicodeBlock("THAI"); 857 858 /** 859 * Constant for the "Lao" Unicode character block. 860 * @since 1.2 861 */ 862 public static final UnicodeBlock LAO = 863 new UnicodeBlock("LAO"); 864 865 /** 866 * Constant for the "Tibetan" Unicode character block. 867 * @since 1.2 868 */ 869 public static final UnicodeBlock TIBETAN = 870 new UnicodeBlock("TIBETAN"); 871 872 /** 873 * Constant for the "Georgian" Unicode character block. 874 * @since 1.2 875 */ 876 public static final UnicodeBlock GEORGIAN = 877 new UnicodeBlock("GEORGIAN"); 878 879 /** 880 * Constant for the "Hangul Jamo" Unicode character block. 881 * @since 1.2 882 */ 883 public static final UnicodeBlock HANGUL_JAMO = 884 new UnicodeBlock("HANGUL_JAMO", 885 "HANGUL JAMO", 886 "HANGULJAMO"); 887 888 /** 889 * Constant for the "Latin Extended Additional" Unicode character block. 890 * @since 1.2 891 */ 892 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 893 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 894 "LATIN EXTENDED ADDITIONAL", 895 "LATINEXTENDEDADDITIONAL"); 896 897 /** 898 * Constant for the "Greek Extended" Unicode character block. 899 * @since 1.2 900 */ 901 public static final UnicodeBlock GREEK_EXTENDED = 902 new UnicodeBlock("GREEK_EXTENDED", 903 "GREEK EXTENDED", 904 "GREEKEXTENDED"); 905 906 /** 907 * Constant for the "General Punctuation" Unicode character block. 908 * @since 1.2 909 */ 910 public static final UnicodeBlock GENERAL_PUNCTUATION = 911 new UnicodeBlock("GENERAL_PUNCTUATION", 912 "GENERAL PUNCTUATION", 913 "GENERALPUNCTUATION"); 914 915 /** 916 * Constant for the "Superscripts and Subscripts" Unicode character 917 * block. 918 * @since 1.2 919 */ 920 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 921 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 922 "SUPERSCRIPTS AND SUBSCRIPTS", 923 "SUPERSCRIPTSANDSUBSCRIPTS"); 924 925 /** 926 * Constant for the "Currency Symbols" Unicode character block. 927 * @since 1.2 928 */ 929 public static final UnicodeBlock CURRENCY_SYMBOLS = 930 new UnicodeBlock("CURRENCY_SYMBOLS", 931 "CURRENCY SYMBOLS", 932 "CURRENCYSYMBOLS"); 933 934 /** 935 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 936 * character block. 937 * <p> 938 * This block was previously known as "Combining Marks for Symbols". 939 * @since 1.2 940 */ 941 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 942 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 943 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 944 "COMBININGDIACRITICALMARKSFORSYMBOLS", 945 "COMBINING MARKS FOR SYMBOLS", 946 "COMBININGMARKSFORSYMBOLS"); 947 948 /** 949 * Constant for the "Letterlike Symbols" Unicode character block. 950 * @since 1.2 951 */ 952 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 953 new UnicodeBlock("LETTERLIKE_SYMBOLS", 954 "LETTERLIKE SYMBOLS", 955 "LETTERLIKESYMBOLS"); 956 957 /** 958 * Constant for the "Number Forms" Unicode character block. 959 * @since 1.2 960 */ 961 public static final UnicodeBlock NUMBER_FORMS = 962 new UnicodeBlock("NUMBER_FORMS", 963 "NUMBER FORMS", 964 "NUMBERFORMS"); 965 966 /** 967 * Constant for the "Arrows" Unicode character block. 968 * @since 1.2 969 */ 970 public static final UnicodeBlock ARROWS = 971 new UnicodeBlock("ARROWS"); 972 973 /** 974 * Constant for the "Mathematical Operators" Unicode character block. 975 * @since 1.2 976 */ 977 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 978 new UnicodeBlock("MATHEMATICAL_OPERATORS", 979 "MATHEMATICAL OPERATORS", 980 "MATHEMATICALOPERATORS"); 981 982 /** 983 * Constant for the "Miscellaneous Technical" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 987 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 988 "MISCELLANEOUS TECHNICAL", 989 "MISCELLANEOUSTECHNICAL"); 990 991 /** 992 * Constant for the "Control Pictures" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock CONTROL_PICTURES = 996 new UnicodeBlock("CONTROL_PICTURES", 997 "CONTROL PICTURES", 998 "CONTROLPICTURES"); 999 1000 /** 1001 * Constant for the "Optical Character Recognition" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1005 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1006 "OPTICAL CHARACTER RECOGNITION", 1007 "OPTICALCHARACTERRECOGNITION"); 1008 1009 /** 1010 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1011 * @since 1.2 1012 */ 1013 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1014 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1015 "ENCLOSED ALPHANUMERICS", 1016 "ENCLOSEDALPHANUMERICS"); 1017 1018 /** 1019 * Constant for the "Box Drawing" Unicode character block. 1020 * @since 1.2 1021 */ 1022 public static final UnicodeBlock BOX_DRAWING = 1023 new UnicodeBlock("BOX_DRAWING", 1024 "BOX DRAWING", 1025 "BOXDRAWING"); 1026 1027 /** 1028 * Constant for the "Block Elements" Unicode character block. 1029 * @since 1.2 1030 */ 1031 public static final UnicodeBlock BLOCK_ELEMENTS = 1032 new UnicodeBlock("BLOCK_ELEMENTS", 1033 "BLOCK ELEMENTS", 1034 "BLOCKELEMENTS"); 1035 1036 /** 1037 * Constant for the "Geometric Shapes" Unicode character block. 1038 * @since 1.2 1039 */ 1040 public static final UnicodeBlock GEOMETRIC_SHAPES = 1041 new UnicodeBlock("GEOMETRIC_SHAPES", 1042 "GEOMETRIC SHAPES", 1043 "GEOMETRICSHAPES"); 1044 1045 /** 1046 * Constant for the "Miscellaneous Symbols" Unicode character block. 1047 * @since 1.2 1048 */ 1049 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1050 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1051 "MISCELLANEOUS SYMBOLS", 1052 "MISCELLANEOUSSYMBOLS"); 1053 1054 /** 1055 * Constant for the "Dingbats" Unicode character block. 1056 * @since 1.2 1057 */ 1058 public static final UnicodeBlock DINGBATS = 1059 new UnicodeBlock("DINGBATS"); 1060 1061 /** 1062 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1063 * @since 1.2 1064 */ 1065 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1066 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1067 "CJK SYMBOLS AND PUNCTUATION", 1068 "CJKSYMBOLSANDPUNCTUATION"); 1069 1070 /** 1071 * Constant for the "Hiragana" Unicode character block. 1072 * @since 1.2 1073 */ 1074 public static final UnicodeBlock HIRAGANA = 1075 new UnicodeBlock("HIRAGANA"); 1076 1077 /** 1078 * Constant for the "Katakana" Unicode character block. 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock KATAKANA = 1082 new UnicodeBlock("KATAKANA"); 1083 1084 /** 1085 * Constant for the "Bopomofo" Unicode character block. 1086 * @since 1.2 1087 */ 1088 public static final UnicodeBlock BOPOMOFO = 1089 new UnicodeBlock("BOPOMOFO"); 1090 1091 /** 1092 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1093 * @since 1.2 1094 */ 1095 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1096 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1097 "HANGUL COMPATIBILITY JAMO", 1098 "HANGULCOMPATIBILITYJAMO"); 1099 1100 /** 1101 * Constant for the "Kanbun" Unicode character block. 1102 * @since 1.2 1103 */ 1104 public static final UnicodeBlock KANBUN = 1105 new UnicodeBlock("KANBUN"); 1106 1107 /** 1108 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1109 * @since 1.2 1110 */ 1111 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1112 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1113 "ENCLOSED CJK LETTERS AND MONTHS", 1114 "ENCLOSEDCJKLETTERSANDMONTHS"); 1115 1116 /** 1117 * Constant for the "CJK Compatibility" Unicode character block. 1118 * @since 1.2 1119 */ 1120 public static final UnicodeBlock CJK_COMPATIBILITY = 1121 new UnicodeBlock("CJK_COMPATIBILITY", 1122 "CJK COMPATIBILITY", 1123 "CJKCOMPATIBILITY"); 1124 1125 /** 1126 * Constant for the "CJK Unified Ideographs" Unicode character block. 1127 * @since 1.2 1128 */ 1129 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1130 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1131 "CJK UNIFIED IDEOGRAPHS", 1132 "CJKUNIFIEDIDEOGRAPHS"); 1133 1134 /** 1135 * Constant for the "Hangul Syllables" Unicode character block. 1136 * @since 1.2 1137 */ 1138 public static final UnicodeBlock HANGUL_SYLLABLES = 1139 new UnicodeBlock("HANGUL_SYLLABLES", 1140 "HANGUL SYLLABLES", 1141 "HANGULSYLLABLES"); 1142 1143 /** 1144 * Constant for the "Private Use Area" Unicode character block. 1145 * @since 1.2 1146 */ 1147 public static final UnicodeBlock PRIVATE_USE_AREA = 1148 new UnicodeBlock("PRIVATE_USE_AREA", 1149 "PRIVATE USE AREA", 1150 "PRIVATEUSEAREA"); 1151 1152 /** 1153 * Constant for the "CJK Compatibility Ideographs" Unicode character 1154 * block. 1155 * @since 1.2 1156 */ 1157 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1158 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1159 "CJK COMPATIBILITY IDEOGRAPHS", 1160 "CJKCOMPATIBILITYIDEOGRAPHS"); 1161 1162 /** 1163 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1164 * @since 1.2 1165 */ 1166 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1167 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1168 "ALPHABETIC PRESENTATION FORMS", 1169 "ALPHABETICPRESENTATIONFORMS"); 1170 1171 /** 1172 * Constant for the "Arabic Presentation Forms-A" Unicode character 1173 * block. 1174 * @since 1.2 1175 */ 1176 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1177 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1178 "ARABIC PRESENTATION FORMS-A", 1179 "ARABICPRESENTATIONFORMS-A"); 1180 1181 /** 1182 * Constant for the "Combining Half Marks" Unicode character block. 1183 * @since 1.2 1184 */ 1185 public static final UnicodeBlock COMBINING_HALF_MARKS = 1186 new UnicodeBlock("COMBINING_HALF_MARKS", 1187 "COMBINING HALF MARKS", 1188 "COMBININGHALFMARKS"); 1189 1190 /** 1191 * Constant for the "CJK Compatibility Forms" Unicode character block. 1192 * @since 1.2 1193 */ 1194 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1195 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1196 "CJK COMPATIBILITY FORMS", 1197 "CJKCOMPATIBILITYFORMS"); 1198 1199 /** 1200 * Constant for the "Small Form Variants" Unicode character block. 1201 * @since 1.2 1202 */ 1203 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1204 new UnicodeBlock("SMALL_FORM_VARIANTS", 1205 "SMALL FORM VARIANTS", 1206 "SMALLFORMVARIANTS"); 1207 1208 /** 1209 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1210 * @since 1.2 1211 */ 1212 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1213 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1214 "ARABIC PRESENTATION FORMS-B", 1215 "ARABICPRESENTATIONFORMS-B"); 1216 1217 /** 1218 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1219 * block. 1220 * @since 1.2 1221 */ 1222 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1223 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1224 "HALFWIDTH AND FULLWIDTH FORMS", 1225 "HALFWIDTHANDFULLWIDTHFORMS"); 1226 1227 /** 1228 * Constant for the "Specials" Unicode character block. 1229 * @since 1.2 1230 */ 1231 public static final UnicodeBlock SPECIALS = 1232 new UnicodeBlock("SPECIALS"); 1233 1234 /** 1235 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1236 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1237 * {@link #LOW_SURROGATES}. These new constants match 1238 * the block definitions of the Unicode Standard. 1239 * The {@link #of(char)} and {@link #of(int)} methods 1240 * return the new constants, not SURROGATES_AREA. 1241 */ 1242 @Deprecated 1243 public static final UnicodeBlock SURROGATES_AREA = 1244 new UnicodeBlock("SURROGATES_AREA"); 1245 1246 /** 1247 * Constant for the "Syriac" Unicode character block. 1248 * @since 1.4 1249 */ 1250 public static final UnicodeBlock SYRIAC = 1251 new UnicodeBlock("SYRIAC"); 1252 1253 /** 1254 * Constant for the "Thaana" Unicode character block. 1255 * @since 1.4 1256 */ 1257 public static final UnicodeBlock THAANA = 1258 new UnicodeBlock("THAANA"); 1259 1260 /** 1261 * Constant for the "Sinhala" Unicode character block. 1262 * @since 1.4 1263 */ 1264 public static final UnicodeBlock SINHALA = 1265 new UnicodeBlock("SINHALA"); 1266 1267 /** 1268 * Constant for the "Myanmar" Unicode character block. 1269 * @since 1.4 1270 */ 1271 public static final UnicodeBlock MYANMAR = 1272 new UnicodeBlock("MYANMAR"); 1273 1274 /** 1275 * Constant for the "Ethiopic" Unicode character block. 1276 * @since 1.4 1277 */ 1278 public static final UnicodeBlock ETHIOPIC = 1279 new UnicodeBlock("ETHIOPIC"); 1280 1281 /** 1282 * Constant for the "Cherokee" Unicode character block. 1283 * @since 1.4 1284 */ 1285 public static final UnicodeBlock CHEROKEE = 1286 new UnicodeBlock("CHEROKEE"); 1287 1288 /** 1289 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1290 * @since 1.4 1291 */ 1292 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1293 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1294 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1295 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1296 1297 /** 1298 * Constant for the "Ogham" Unicode character block. 1299 * @since 1.4 1300 */ 1301 public static final UnicodeBlock OGHAM = 1302 new UnicodeBlock("OGHAM"); 1303 1304 /** 1305 * Constant for the "Runic" Unicode character block. 1306 * @since 1.4 1307 */ 1308 public static final UnicodeBlock RUNIC = 1309 new UnicodeBlock("RUNIC"); 1310 1311 /** 1312 * Constant for the "Khmer" Unicode character block. 1313 * @since 1.4 1314 */ 1315 public static final UnicodeBlock KHMER = 1316 new UnicodeBlock("KHMER"); 1317 1318 /** 1319 * Constant for the "Mongolian" Unicode character block. 1320 * @since 1.4 1321 */ 1322 public static final UnicodeBlock MONGOLIAN = 1323 new UnicodeBlock("MONGOLIAN"); 1324 1325 /** 1326 * Constant for the "Braille Patterns" Unicode character block. 1327 * @since 1.4 1328 */ 1329 public static final UnicodeBlock BRAILLE_PATTERNS = 1330 new UnicodeBlock("BRAILLE_PATTERNS", 1331 "BRAILLE PATTERNS", 1332 "BRAILLEPATTERNS"); 1333 1334 /** 1335 * Constant for the "CJK Radicals Supplement" Unicode character block. 1336 * @since 1.4 1337 */ 1338 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1339 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1340 "CJK RADICALS SUPPLEMENT", 1341 "CJKRADICALSSUPPLEMENT"); 1342 1343 /** 1344 * Constant for the "Kangxi Radicals" Unicode character block. 1345 * @since 1.4 1346 */ 1347 public static final UnicodeBlock KANGXI_RADICALS = 1348 new UnicodeBlock("KANGXI_RADICALS", 1349 "KANGXI RADICALS", 1350 "KANGXIRADICALS"); 1351 1352 /** 1353 * Constant for the "Ideographic Description Characters" Unicode character block. 1354 * @since 1.4 1355 */ 1356 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1357 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1358 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1359 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1360 1361 /** 1362 * Constant for the "Bopomofo Extended" Unicode character block. 1363 * @since 1.4 1364 */ 1365 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1366 new UnicodeBlock("BOPOMOFO_EXTENDED", 1367 "BOPOMOFO EXTENDED", 1368 "BOPOMOFOEXTENDED"); 1369 1370 /** 1371 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1372 * @since 1.4 1373 */ 1374 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1375 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1376 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1377 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1378 1379 /** 1380 * Constant for the "Yi Syllables" Unicode character block. 1381 * @since 1.4 1382 */ 1383 public static final UnicodeBlock YI_SYLLABLES = 1384 new UnicodeBlock("YI_SYLLABLES", 1385 "YI SYLLABLES", 1386 "YISYLLABLES"); 1387 1388 /** 1389 * Constant for the "Yi Radicals" Unicode character block. 1390 * @since 1.4 1391 */ 1392 public static final UnicodeBlock YI_RADICALS = 1393 new UnicodeBlock("YI_RADICALS", 1394 "YI RADICALS", 1395 "YIRADICALS"); 1396 1397 /** 1398 * Constant for the "Cyrillic Supplementary" Unicode character block. 1399 * @since 1.5 1400 */ 1401 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1402 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1403 "CYRILLIC SUPPLEMENTARY", 1404 "CYRILLICSUPPLEMENTARY", 1405 "CYRILLIC SUPPLEMENT", 1406 "CYRILLICSUPPLEMENT"); 1407 1408 /** 1409 * Constant for the "Tagalog" Unicode character block. 1410 * @since 1.5 1411 */ 1412 public static final UnicodeBlock TAGALOG = 1413 new UnicodeBlock("TAGALOG"); 1414 1415 /** 1416 * Constant for the "Hanunoo" Unicode character block. 1417 * @since 1.5 1418 */ 1419 public static final UnicodeBlock HANUNOO = 1420 new UnicodeBlock("HANUNOO"); 1421 1422 /** 1423 * Constant for the "Buhid" Unicode character block. 1424 * @since 1.5 1425 */ 1426 public static final UnicodeBlock BUHID = 1427 new UnicodeBlock("BUHID"); 1428 1429 /** 1430 * Constant for the "Tagbanwa" Unicode character block. 1431 * @since 1.5 1432 */ 1433 public static final UnicodeBlock TAGBANWA = 1434 new UnicodeBlock("TAGBANWA"); 1435 1436 /** 1437 * Constant for the "Limbu" Unicode character block. 1438 * @since 1.5 1439 */ 1440 public static final UnicodeBlock LIMBU = 1441 new UnicodeBlock("LIMBU"); 1442 1443 /** 1444 * Constant for the "Tai Le" Unicode character block. 1445 * @since 1.5 1446 */ 1447 public static final UnicodeBlock TAI_LE = 1448 new UnicodeBlock("TAI_LE", 1449 "TAI LE", 1450 "TAILE"); 1451 1452 /** 1453 * Constant for the "Khmer Symbols" Unicode character block. 1454 * @since 1.5 1455 */ 1456 public static final UnicodeBlock KHMER_SYMBOLS = 1457 new UnicodeBlock("KHMER_SYMBOLS", 1458 "KHMER SYMBOLS", 1459 "KHMERSYMBOLS"); 1460 1461 /** 1462 * Constant for the "Phonetic Extensions" Unicode character block. 1463 * @since 1.5 1464 */ 1465 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1466 new UnicodeBlock("PHONETIC_EXTENSIONS", 1467 "PHONETIC EXTENSIONS", 1468 "PHONETICEXTENSIONS"); 1469 1470 /** 1471 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1472 * @since 1.5 1473 */ 1474 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1475 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1476 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1477 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1478 1479 /** 1480 * Constant for the "Supplemental Arrows-A" Unicode character block. 1481 * @since 1.5 1482 */ 1483 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1484 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1485 "SUPPLEMENTAL ARROWS-A", 1486 "SUPPLEMENTALARROWS-A"); 1487 1488 /** 1489 * Constant for the "Supplemental Arrows-B" Unicode character block. 1490 * @since 1.5 1491 */ 1492 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1493 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1494 "SUPPLEMENTAL ARROWS-B", 1495 "SUPPLEMENTALARROWS-B"); 1496 1497 /** 1498 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1499 * character block. 1500 * @since 1.5 1501 */ 1502 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1503 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1504 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1505 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1506 1507 /** 1508 * Constant for the "Supplemental Mathematical Operators" Unicode 1509 * character block. 1510 * @since 1.5 1511 */ 1512 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1513 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1514 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1515 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1516 1517 /** 1518 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1519 * block. 1520 * @since 1.5 1521 */ 1522 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1523 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1524 "MISCELLANEOUS SYMBOLS AND ARROWS", 1525 "MISCELLANEOUSSYMBOLSANDARROWS"); 1526 1527 /** 1528 * Constant for the "Katakana Phonetic Extensions" Unicode character 1529 * block. 1530 * @since 1.5 1531 */ 1532 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1533 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1534 "KATAKANA PHONETIC EXTENSIONS", 1535 "KATAKANAPHONETICEXTENSIONS"); 1536 1537 /** 1538 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1539 * @since 1.5 1540 */ 1541 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1542 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1543 "YIJING HEXAGRAM SYMBOLS", 1544 "YIJINGHEXAGRAMSYMBOLS"); 1545 1546 /** 1547 * Constant for the "Variation Selectors" Unicode character block. 1548 * @since 1.5 1549 */ 1550 public static final UnicodeBlock VARIATION_SELECTORS = 1551 new UnicodeBlock("VARIATION_SELECTORS", 1552 "VARIATION SELECTORS", 1553 "VARIATIONSELECTORS"); 1554 1555 /** 1556 * Constant for the "Linear B Syllabary" Unicode character block. 1557 * @since 1.5 1558 */ 1559 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1560 new UnicodeBlock("LINEAR_B_SYLLABARY", 1561 "LINEAR B SYLLABARY", 1562 "LINEARBSYLLABARY"); 1563 1564 /** 1565 * Constant for the "Linear B Ideograms" Unicode character block. 1566 * @since 1.5 1567 */ 1568 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1569 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1570 "LINEAR B IDEOGRAMS", 1571 "LINEARBIDEOGRAMS"); 1572 1573 /** 1574 * Constant for the "Aegean Numbers" Unicode character block. 1575 * @since 1.5 1576 */ 1577 public static final UnicodeBlock AEGEAN_NUMBERS = 1578 new UnicodeBlock("AEGEAN_NUMBERS", 1579 "AEGEAN NUMBERS", 1580 "AEGEANNUMBERS"); 1581 1582 /** 1583 * Constant for the "Old Italic" Unicode character block. 1584 * @since 1.5 1585 */ 1586 public static final UnicodeBlock OLD_ITALIC = 1587 new UnicodeBlock("OLD_ITALIC", 1588 "OLD ITALIC", 1589 "OLDITALIC"); 1590 1591 /** 1592 * Constant for the "Gothic" Unicode character block. 1593 * @since 1.5 1594 */ 1595 public static final UnicodeBlock GOTHIC = 1596 new UnicodeBlock("GOTHIC"); 1597 1598 /** 1599 * Constant for the "Ugaritic" Unicode character block. 1600 * @since 1.5 1601 */ 1602 public static final UnicodeBlock UGARITIC = 1603 new UnicodeBlock("UGARITIC"); 1604 1605 /** 1606 * Constant for the "Deseret" Unicode character block. 1607 * @since 1.5 1608 */ 1609 public static final UnicodeBlock DESERET = 1610 new UnicodeBlock("DESERET"); 1611 1612 /** 1613 * Constant for the "Shavian" Unicode character block. 1614 * @since 1.5 1615 */ 1616 public static final UnicodeBlock SHAVIAN = 1617 new UnicodeBlock("SHAVIAN"); 1618 1619 /** 1620 * Constant for the "Osmanya" Unicode character block. 1621 * @since 1.5 1622 */ 1623 public static final UnicodeBlock OSMANYA = 1624 new UnicodeBlock("OSMANYA"); 1625 1626 /** 1627 * Constant for the "Cypriot Syllabary" Unicode character block. 1628 * @since 1.5 1629 */ 1630 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1631 new UnicodeBlock("CYPRIOT_SYLLABARY", 1632 "CYPRIOT SYLLABARY", 1633 "CYPRIOTSYLLABARY"); 1634 1635 /** 1636 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1637 * @since 1.5 1638 */ 1639 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1640 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1641 "BYZANTINE MUSICAL SYMBOLS", 1642 "BYZANTINEMUSICALSYMBOLS"); 1643 1644 /** 1645 * Constant for the "Musical Symbols" Unicode character block. 1646 * @since 1.5 1647 */ 1648 public static final UnicodeBlock MUSICAL_SYMBOLS = 1649 new UnicodeBlock("MUSICAL_SYMBOLS", 1650 "MUSICAL SYMBOLS", 1651 "MUSICALSYMBOLS"); 1652 1653 /** 1654 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1655 * @since 1.5 1656 */ 1657 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1658 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1659 "TAI XUAN JING SYMBOLS", 1660 "TAIXUANJINGSYMBOLS"); 1661 1662 /** 1663 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1664 * character block. 1665 * @since 1.5 1666 */ 1667 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1668 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1669 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1670 "MATHEMATICALALPHANUMERICSYMBOLS"); 1671 1672 /** 1673 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1674 * character block. 1675 * @since 1.5 1676 */ 1677 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1678 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1679 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1680 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1681 1682 /** 1683 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1684 * @since 1.5 1685 */ 1686 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1687 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1688 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1689 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1690 1691 /** 1692 * Constant for the "Tags" Unicode character block. 1693 * @since 1.5 1694 */ 1695 public static final UnicodeBlock TAGS = 1696 new UnicodeBlock("TAGS"); 1697 1698 /** 1699 * Constant for the "Variation Selectors Supplement" Unicode character 1700 * block. 1701 * @since 1.5 1702 */ 1703 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1704 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1705 "VARIATION SELECTORS SUPPLEMENT", 1706 "VARIATIONSELECTORSSUPPLEMENT"); 1707 1708 /** 1709 * Constant for the "Supplementary Private Use Area-A" Unicode character 1710 * block. 1711 * @since 1.5 1712 */ 1713 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1714 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1715 "SUPPLEMENTARY PRIVATE USE AREA-A", 1716 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1717 1718 /** 1719 * Constant for the "Supplementary Private Use Area-B" Unicode character 1720 * block. 1721 * @since 1.5 1722 */ 1723 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1724 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1725 "SUPPLEMENTARY PRIVATE USE AREA-B", 1726 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1727 1728 /** 1729 * Constant for the "High Surrogates" Unicode character block. 1730 * This block represents codepoint values in the high surrogate 1731 * range: U+D800 through U+DB7F 1732 * 1733 * @since 1.5 1734 */ 1735 public static final UnicodeBlock HIGH_SURROGATES = 1736 new UnicodeBlock("HIGH_SURROGATES", 1737 "HIGH SURROGATES", 1738 "HIGHSURROGATES"); 1739 1740 /** 1741 * Constant for the "High Private Use Surrogates" Unicode character 1742 * block. 1743 * This block represents codepoint values in the private use high 1744 * surrogate range: U+DB80 through U+DBFF 1745 * 1746 * @since 1.5 1747 */ 1748 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1749 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1750 "HIGH PRIVATE USE SURROGATES", 1751 "HIGHPRIVATEUSESURROGATES"); 1752 1753 /** 1754 * Constant for the "Low Surrogates" Unicode character block. 1755 * This block represents codepoint values in the low surrogate 1756 * range: U+DC00 through U+DFFF 1757 * 1758 * @since 1.5 1759 */ 1760 public static final UnicodeBlock LOW_SURROGATES = 1761 new UnicodeBlock("LOW_SURROGATES", 1762 "LOW SURROGATES", 1763 "LOWSURROGATES"); 1764 1765 /** 1766 * Constant for the "Arabic Supplement" Unicode character block. 1767 * @since 1.7 1768 */ 1769 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1770 new UnicodeBlock("ARABIC_SUPPLEMENT", 1771 "ARABIC SUPPLEMENT", 1772 "ARABICSUPPLEMENT"); 1773 1774 /** 1775 * Constant for the "NKo" Unicode character block. 1776 * @since 1.7 1777 */ 1778 public static final UnicodeBlock NKO = 1779 new UnicodeBlock("NKO"); 1780 1781 /** 1782 * Constant for the "Samaritan" Unicode character block. 1783 * @since 1.7 1784 */ 1785 public static final UnicodeBlock SAMARITAN = 1786 new UnicodeBlock("SAMARITAN"); 1787 1788 /** 1789 * Constant for the "Mandaic" Unicode character block. 1790 * @since 1.7 1791 */ 1792 public static final UnicodeBlock MANDAIC = 1793 new UnicodeBlock("MANDAIC"); 1794 1795 /** 1796 * Constant for the "Ethiopic Supplement" Unicode character block. 1797 * @since 1.7 1798 */ 1799 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1800 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1801 "ETHIOPIC SUPPLEMENT", 1802 "ETHIOPICSUPPLEMENT"); 1803 1804 /** 1805 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1806 * Unicode character block. 1807 * @since 1.7 1808 */ 1809 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1810 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1811 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1812 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1813 1814 /** 1815 * Constant for the "New Tai Lue" Unicode character block. 1816 * @since 1.7 1817 */ 1818 public static final UnicodeBlock NEW_TAI_LUE = 1819 new UnicodeBlock("NEW_TAI_LUE", 1820 "NEW TAI LUE", 1821 "NEWTAILUE"); 1822 1823 /** 1824 * Constant for the "Buginese" Unicode character block. 1825 * @since 1.7 1826 */ 1827 public static final UnicodeBlock BUGINESE = 1828 new UnicodeBlock("BUGINESE"); 1829 1830 /** 1831 * Constant for the "Tai Tham" Unicode character block. 1832 * @since 1.7 1833 */ 1834 public static final UnicodeBlock TAI_THAM = 1835 new UnicodeBlock("TAI_THAM", 1836 "TAI THAM", 1837 "TAITHAM"); 1838 1839 /** 1840 * Constant for the "Balinese" Unicode character block. 1841 * @since 1.7 1842 */ 1843 public static final UnicodeBlock BALINESE = 1844 new UnicodeBlock("BALINESE"); 1845 1846 /** 1847 * Constant for the "Sundanese" Unicode character block. 1848 * @since 1.7 1849 */ 1850 public static final UnicodeBlock SUNDANESE = 1851 new UnicodeBlock("SUNDANESE"); 1852 1853 /** 1854 * Constant for the "Batak" Unicode character block. 1855 * @since 1.7 1856 */ 1857 public static final UnicodeBlock BATAK = 1858 new UnicodeBlock("BATAK"); 1859 1860 /** 1861 * Constant for the "Lepcha" Unicode character block. 1862 * @since 1.7 1863 */ 1864 public static final UnicodeBlock LEPCHA = 1865 new UnicodeBlock("LEPCHA"); 1866 1867 /** 1868 * Constant for the "Ol Chiki" Unicode character block. 1869 * @since 1.7 1870 */ 1871 public static final UnicodeBlock OL_CHIKI = 1872 new UnicodeBlock("OL_CHIKI", 1873 "OL CHIKI", 1874 "OLCHIKI"); 1875 1876 /** 1877 * Constant for the "Vedic Extensions" Unicode character block. 1878 * @since 1.7 1879 */ 1880 public static final UnicodeBlock VEDIC_EXTENSIONS = 1881 new UnicodeBlock("VEDIC_EXTENSIONS", 1882 "VEDIC EXTENSIONS", 1883 "VEDICEXTENSIONS"); 1884 1885 /** 1886 * Constant for the "Phonetic Extensions Supplement" Unicode character 1887 * block. 1888 * @since 1.7 1889 */ 1890 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1891 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1892 "PHONETIC EXTENSIONS SUPPLEMENT", 1893 "PHONETICEXTENSIONSSUPPLEMENT"); 1894 1895 /** 1896 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1897 * character block. 1898 * @since 1.7 1899 */ 1900 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1901 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1902 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1903 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1904 1905 /** 1906 * Constant for the "Glagolitic" Unicode character block. 1907 * @since 1.7 1908 */ 1909 public static final UnicodeBlock GLAGOLITIC = 1910 new UnicodeBlock("GLAGOLITIC"); 1911 1912 /** 1913 * Constant for the "Latin Extended-C" Unicode character block. 1914 * @since 1.7 1915 */ 1916 public static final UnicodeBlock LATIN_EXTENDED_C = 1917 new UnicodeBlock("LATIN_EXTENDED_C", 1918 "LATIN EXTENDED-C", 1919 "LATINEXTENDED-C"); 1920 1921 /** 1922 * Constant for the "Coptic" Unicode character block. 1923 * @since 1.7 1924 */ 1925 public static final UnicodeBlock COPTIC = 1926 new UnicodeBlock("COPTIC"); 1927 1928 /** 1929 * Constant for the "Georgian Supplement" Unicode character block. 1930 * @since 1.7 1931 */ 1932 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1933 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1934 "GEORGIAN SUPPLEMENT", 1935 "GEORGIANSUPPLEMENT"); 1936 1937 /** 1938 * Constant for the "Tifinagh" Unicode character block. 1939 * @since 1.7 1940 */ 1941 public static final UnicodeBlock TIFINAGH = 1942 new UnicodeBlock("TIFINAGH"); 1943 1944 /** 1945 * Constant for the "Ethiopic Extended" Unicode character block. 1946 * @since 1.7 1947 */ 1948 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1949 new UnicodeBlock("ETHIOPIC_EXTENDED", 1950 "ETHIOPIC EXTENDED", 1951 "ETHIOPICEXTENDED"); 1952 1953 /** 1954 * Constant for the "Cyrillic Extended-A" Unicode character block. 1955 * @since 1.7 1956 */ 1957 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1958 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1959 "CYRILLIC EXTENDED-A", 1960 "CYRILLICEXTENDED-A"); 1961 1962 /** 1963 * Constant for the "Supplemental Punctuation" Unicode character block. 1964 * @since 1.7 1965 */ 1966 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1967 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1968 "SUPPLEMENTAL PUNCTUATION", 1969 "SUPPLEMENTALPUNCTUATION"); 1970 1971 /** 1972 * Constant for the "CJK Strokes" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock CJK_STROKES = 1976 new UnicodeBlock("CJK_STROKES", 1977 "CJK STROKES", 1978 "CJKSTROKES"); 1979 1980 /** 1981 * Constant for the "Lisu" Unicode character block. 1982 * @since 1.7 1983 */ 1984 public static final UnicodeBlock LISU = 1985 new UnicodeBlock("LISU"); 1986 1987 /** 1988 * Constant for the "Vai" Unicode character block. 1989 * @since 1.7 1990 */ 1991 public static final UnicodeBlock VAI = 1992 new UnicodeBlock("VAI"); 1993 1994 /** 1995 * Constant for the "Cyrillic Extended-B" Unicode character block. 1996 * @since 1.7 1997 */ 1998 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1999 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2000 "CYRILLIC EXTENDED-B", 2001 "CYRILLICEXTENDED-B"); 2002 2003 /** 2004 * Constant for the "Bamum" Unicode character block. 2005 * @since 1.7 2006 */ 2007 public static final UnicodeBlock BAMUM = 2008 new UnicodeBlock("BAMUM"); 2009 2010 /** 2011 * Constant for the "Modifier Tone Letters" Unicode character block. 2012 * @since 1.7 2013 */ 2014 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2015 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2016 "MODIFIER TONE LETTERS", 2017 "MODIFIERTONELETTERS"); 2018 2019 /** 2020 * Constant for the "Latin Extended-D" Unicode character block. 2021 * @since 1.7 2022 */ 2023 public static final UnicodeBlock LATIN_EXTENDED_D = 2024 new UnicodeBlock("LATIN_EXTENDED_D", 2025 "LATIN EXTENDED-D", 2026 "LATINEXTENDED-D"); 2027 2028 /** 2029 * Constant for the "Syloti Nagri" Unicode character block. 2030 * @since 1.7 2031 */ 2032 public static final UnicodeBlock SYLOTI_NAGRI = 2033 new UnicodeBlock("SYLOTI_NAGRI", 2034 "SYLOTI NAGRI", 2035 "SYLOTINAGRI"); 2036 2037 /** 2038 * Constant for the "Common Indic Number Forms" Unicode character block. 2039 * @since 1.7 2040 */ 2041 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2042 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2043 "COMMON INDIC NUMBER FORMS", 2044 "COMMONINDICNUMBERFORMS"); 2045 2046 /** 2047 * Constant for the "Phags-pa" Unicode character block. 2048 * @since 1.7 2049 */ 2050 public static final UnicodeBlock PHAGS_PA = 2051 new UnicodeBlock("PHAGS_PA", 2052 "PHAGS-PA"); 2053 2054 /** 2055 * Constant for the "Saurashtra" Unicode character block. 2056 * @since 1.7 2057 */ 2058 public static final UnicodeBlock SAURASHTRA = 2059 new UnicodeBlock("SAURASHTRA"); 2060 2061 /** 2062 * Constant for the "Devanagari Extended" Unicode character block. 2063 * @since 1.7 2064 */ 2065 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2066 new UnicodeBlock("DEVANAGARI_EXTENDED", 2067 "DEVANAGARI EXTENDED", 2068 "DEVANAGARIEXTENDED"); 2069 2070 /** 2071 * Constant for the "Kayah Li" Unicode character block. 2072 * @since 1.7 2073 */ 2074 public static final UnicodeBlock KAYAH_LI = 2075 new UnicodeBlock("KAYAH_LI", 2076 "KAYAH LI", 2077 "KAYAHLI"); 2078 2079 /** 2080 * Constant for the "Rejang" Unicode character block. 2081 * @since 1.7 2082 */ 2083 public static final UnicodeBlock REJANG = 2084 new UnicodeBlock("REJANG"); 2085 2086 /** 2087 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2088 * @since 1.7 2089 */ 2090 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2091 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2092 "HANGUL JAMO EXTENDED-A", 2093 "HANGULJAMOEXTENDED-A"); 2094 2095 /** 2096 * Constant for the "Javanese" Unicode character block. 2097 * @since 1.7 2098 */ 2099 public static final UnicodeBlock JAVANESE = 2100 new UnicodeBlock("JAVANESE"); 2101 2102 /** 2103 * Constant for the "Cham" Unicode character block. 2104 * @since 1.7 2105 */ 2106 public static final UnicodeBlock CHAM = 2107 new UnicodeBlock("CHAM"); 2108 2109 /** 2110 * Constant for the "Myanmar Extended-A" Unicode character block. 2111 * @since 1.7 2112 */ 2113 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2114 new UnicodeBlock("MYANMAR_EXTENDED_A", 2115 "MYANMAR EXTENDED-A", 2116 "MYANMAREXTENDED-A"); 2117 2118 /** 2119 * Constant for the "Tai Viet" Unicode character block. 2120 * @since 1.7 2121 */ 2122 public static final UnicodeBlock TAI_VIET = 2123 new UnicodeBlock("TAI_VIET", 2124 "TAI VIET", 2125 "TAIVIET"); 2126 2127 /** 2128 * Constant for the "Ethiopic Extended-A" Unicode character block. 2129 * @since 1.7 2130 */ 2131 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2132 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2133 "ETHIOPIC EXTENDED-A", 2134 "ETHIOPICEXTENDED-A"); 2135 2136 /** 2137 * Constant for the "Meetei Mayek" Unicode character block. 2138 * @since 1.7 2139 */ 2140 public static final UnicodeBlock MEETEI_MAYEK = 2141 new UnicodeBlock("MEETEI_MAYEK", 2142 "MEETEI MAYEK", 2143 "MEETEIMAYEK"); 2144 2145 /** 2146 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2147 * @since 1.7 2148 */ 2149 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2150 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2151 "HANGUL JAMO EXTENDED-B", 2152 "HANGULJAMOEXTENDED-B"); 2153 2154 /** 2155 * Constant for the "Vertical Forms" Unicode character block. 2156 * @since 1.7 2157 */ 2158 public static final UnicodeBlock VERTICAL_FORMS = 2159 new UnicodeBlock("VERTICAL_FORMS", 2160 "VERTICAL FORMS", 2161 "VERTICALFORMS"); 2162 2163 /** 2164 * Constant for the "Ancient Greek Numbers" Unicode character block. 2165 * @since 1.7 2166 */ 2167 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2168 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2169 "ANCIENT GREEK NUMBERS", 2170 "ANCIENTGREEKNUMBERS"); 2171 2172 /** 2173 * Constant for the "Ancient Symbols" Unicode character block. 2174 * @since 1.7 2175 */ 2176 public static final UnicodeBlock ANCIENT_SYMBOLS = 2177 new UnicodeBlock("ANCIENT_SYMBOLS", 2178 "ANCIENT SYMBOLS", 2179 "ANCIENTSYMBOLS"); 2180 2181 /** 2182 * Constant for the "Phaistos Disc" Unicode character block. 2183 * @since 1.7 2184 */ 2185 public static final UnicodeBlock PHAISTOS_DISC = 2186 new UnicodeBlock("PHAISTOS_DISC", 2187 "PHAISTOS DISC", 2188 "PHAISTOSDISC"); 2189 2190 /** 2191 * Constant for the "Lycian" Unicode character block. 2192 * @since 1.7 2193 */ 2194 public static final UnicodeBlock LYCIAN = 2195 new UnicodeBlock("LYCIAN"); 2196 2197 /** 2198 * Constant for the "Carian" Unicode character block. 2199 * @since 1.7 2200 */ 2201 public static final UnicodeBlock CARIAN = 2202 new UnicodeBlock("CARIAN"); 2203 2204 /** 2205 * Constant for the "Old Persian" Unicode character block. 2206 * @since 1.7 2207 */ 2208 public static final UnicodeBlock OLD_PERSIAN = 2209 new UnicodeBlock("OLD_PERSIAN", 2210 "OLD PERSIAN", 2211 "OLDPERSIAN"); 2212 2213 /** 2214 * Constant for the "Imperial Aramaic" Unicode character block. 2215 * @since 1.7 2216 */ 2217 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2218 new UnicodeBlock("IMPERIAL_ARAMAIC", 2219 "IMPERIAL ARAMAIC", 2220 "IMPERIALARAMAIC"); 2221 2222 /** 2223 * Constant for the "Phoenician" Unicode character block. 2224 * @since 1.7 2225 */ 2226 public static final UnicodeBlock PHOENICIAN = 2227 new UnicodeBlock("PHOENICIAN"); 2228 2229 /** 2230 * Constant for the "Lydian" Unicode character block. 2231 * @since 1.7 2232 */ 2233 public static final UnicodeBlock LYDIAN = 2234 new UnicodeBlock("LYDIAN"); 2235 2236 /** 2237 * Constant for the "Kharoshthi" Unicode character block. 2238 * @since 1.7 2239 */ 2240 public static final UnicodeBlock KHAROSHTHI = 2241 new UnicodeBlock("KHAROSHTHI"); 2242 2243 /** 2244 * Constant for the "Old South Arabian" Unicode character block. 2245 * @since 1.7 2246 */ 2247 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2248 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2249 "OLD SOUTH ARABIAN", 2250 "OLDSOUTHARABIAN"); 2251 2252 /** 2253 * Constant for the "Avestan" Unicode character block. 2254 * @since 1.7 2255 */ 2256 public static final UnicodeBlock AVESTAN = 2257 new UnicodeBlock("AVESTAN"); 2258 2259 /** 2260 * Constant for the "Inscriptional Parthian" Unicode character block. 2261 * @since 1.7 2262 */ 2263 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2264 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2265 "INSCRIPTIONAL PARTHIAN", 2266 "INSCRIPTIONALPARTHIAN"); 2267 2268 /** 2269 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2270 * @since 1.7 2271 */ 2272 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2273 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2274 "INSCRIPTIONAL PAHLAVI", 2275 "INSCRIPTIONALPAHLAVI"); 2276 2277 /** 2278 * Constant for the "Old Turkic" Unicode character block. 2279 * @since 1.7 2280 */ 2281 public static final UnicodeBlock OLD_TURKIC = 2282 new UnicodeBlock("OLD_TURKIC", 2283 "OLD TURKIC", 2284 "OLDTURKIC"); 2285 2286 /** 2287 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2288 * @since 1.7 2289 */ 2290 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2291 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2292 "RUMI NUMERAL SYMBOLS", 2293 "RUMINUMERALSYMBOLS"); 2294 2295 /** 2296 * Constant for the "Brahmi" Unicode character block. 2297 * @since 1.7 2298 */ 2299 public static final UnicodeBlock BRAHMI = 2300 new UnicodeBlock("BRAHMI"); 2301 2302 /** 2303 * Constant for the "Kaithi" Unicode character block. 2304 * @since 1.7 2305 */ 2306 public static final UnicodeBlock KAITHI = 2307 new UnicodeBlock("KAITHI"); 2308 2309 /** 2310 * Constant for the "Cuneiform" Unicode character block. 2311 * @since 1.7 2312 */ 2313 public static final UnicodeBlock CUNEIFORM = 2314 new UnicodeBlock("CUNEIFORM"); 2315 2316 /** 2317 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2318 * character block. 2319 * @since 1.7 2320 */ 2321 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2322 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2323 "CUNEIFORM NUMBERS AND PUNCTUATION", 2324 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2325 2326 /** 2327 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2328 * @since 1.7 2329 */ 2330 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2331 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2332 "EGYPTIAN HIEROGLYPHS", 2333 "EGYPTIANHIEROGLYPHS"); 2334 2335 /** 2336 * Constant for the "Bamum Supplement" Unicode character block. 2337 * @since 1.7 2338 */ 2339 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2340 new UnicodeBlock("BAMUM_SUPPLEMENT", 2341 "BAMUM SUPPLEMENT", 2342 "BAMUMSUPPLEMENT"); 2343 2344 /** 2345 * Constant for the "Kana Supplement" Unicode character block. 2346 * @since 1.7 2347 */ 2348 public static final UnicodeBlock KANA_SUPPLEMENT = 2349 new UnicodeBlock("KANA_SUPPLEMENT", 2350 "KANA SUPPLEMENT", 2351 "KANASUPPLEMENT"); 2352 2353 /** 2354 * Constant for the "Ancient Greek Musical Notation" Unicode character 2355 * block. 2356 * @since 1.7 2357 */ 2358 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2359 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2360 "ANCIENT GREEK MUSICAL NOTATION", 2361 "ANCIENTGREEKMUSICALNOTATION"); 2362 2363 /** 2364 * Constant for the "Counting Rod Numerals" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2368 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2369 "COUNTING ROD NUMERALS", 2370 "COUNTINGRODNUMERALS"); 2371 2372 /** 2373 * Constant for the "Mahjong Tiles" Unicode character block. 2374 * @since 1.7 2375 */ 2376 public static final UnicodeBlock MAHJONG_TILES = 2377 new UnicodeBlock("MAHJONG_TILES", 2378 "MAHJONG TILES", 2379 "MAHJONGTILES"); 2380 2381 /** 2382 * Constant for the "Domino Tiles" Unicode character block. 2383 * @since 1.7 2384 */ 2385 public static final UnicodeBlock DOMINO_TILES = 2386 new UnicodeBlock("DOMINO_TILES", 2387 "DOMINO TILES", 2388 "DOMINOTILES"); 2389 2390 /** 2391 * Constant for the "Playing Cards" Unicode character block. 2392 * @since 1.7 2393 */ 2394 public static final UnicodeBlock PLAYING_CARDS = 2395 new UnicodeBlock("PLAYING_CARDS", 2396 "PLAYING CARDS", 2397 "PLAYINGCARDS"); 2398 2399 /** 2400 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2401 * block. 2402 * @since 1.7 2403 */ 2404 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2405 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2406 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2407 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2408 2409 /** 2410 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2411 * block. 2412 * @since 1.7 2413 */ 2414 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2415 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2416 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2417 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2418 2419 /** 2420 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2421 * character block. 2422 * @since 1.7 2423 */ 2424 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2425 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2426 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2427 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2428 2429 /** 2430 * Constant for the "Emoticons" Unicode character block. 2431 * @since 1.7 2432 */ 2433 public static final UnicodeBlock EMOTICONS = 2434 new UnicodeBlock("EMOTICONS"); 2435 2436 /** 2437 * Constant for the "Transport And Map Symbols" Unicode character block. 2438 * @since 1.7 2439 */ 2440 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2441 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2442 "TRANSPORT AND MAP SYMBOLS", 2443 "TRANSPORTANDMAPSYMBOLS"); 2444 2445 /** 2446 * Constant for the "Alchemical Symbols" Unicode character block. 2447 * @since 1.7 2448 */ 2449 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2450 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2451 "ALCHEMICAL SYMBOLS", 2452 "ALCHEMICALSYMBOLS"); 2453 2454 /** 2455 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2456 * character block. 2457 * @since 1.7 2458 */ 2459 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2460 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2461 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2462 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2463 2464 /** 2465 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2466 * character block. 2467 * @since 1.7 2468 */ 2469 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2470 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2471 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2472 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2473 2474 /** 2475 * Constant for the "Arabic Extended-A" Unicode character block. 2476 * @since 1.8 2477 */ 2478 public static final UnicodeBlock ARABIC_EXTENDED_A = 2479 new UnicodeBlock("ARABIC_EXTENDED_A", 2480 "ARABIC EXTENDED-A", 2481 "ARABICEXTENDED-A"); 2482 2483 /** 2484 * Constant for the "Sundanese Supplement" Unicode character block. 2485 * @since 1.8 2486 */ 2487 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2488 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2489 "SUNDANESE SUPPLEMENT", 2490 "SUNDANESESUPPLEMENT"); 2491 2492 /** 2493 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2494 * @since 1.8 2495 */ 2496 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2497 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2498 "MEETEI MAYEK EXTENSIONS", 2499 "MEETEIMAYEKEXTENSIONS"); 2500 2501 /** 2502 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2503 * @since 1.8 2504 */ 2505 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2506 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2507 "MEROITIC HIEROGLYPHS", 2508 "MEROITICHIEROGLYPHS"); 2509 2510 /** 2511 * Constant for the "Meroitic Cursive" Unicode character block. 2512 * @since 1.8 2513 */ 2514 public static final UnicodeBlock MEROITIC_CURSIVE = 2515 new UnicodeBlock("MEROITIC_CURSIVE", 2516 "MEROITIC CURSIVE", 2517 "MEROITICCURSIVE"); 2518 2519 /** 2520 * Constant for the "Sora Sompeng" Unicode character block. 2521 * @since 1.8 2522 */ 2523 public static final UnicodeBlock SORA_SOMPENG = 2524 new UnicodeBlock("SORA_SOMPENG", 2525 "SORA SOMPENG", 2526 "SORASOMPENG"); 2527 2528 /** 2529 * Constant for the "Chakma" Unicode character block. 2530 * @since 1.8 2531 */ 2532 public static final UnicodeBlock CHAKMA = 2533 new UnicodeBlock("CHAKMA"); 2534 2535 /** 2536 * Constant for the "Sharada" Unicode character block. 2537 * @since 1.8 2538 */ 2539 public static final UnicodeBlock SHARADA = 2540 new UnicodeBlock("SHARADA"); 2541 2542 /** 2543 * Constant for the "Takri" Unicode character block. 2544 * @since 1.8 2545 */ 2546 public static final UnicodeBlock TAKRI = 2547 new UnicodeBlock("TAKRI"); 2548 2549 /** 2550 * Constant for the "Miao" Unicode character block. 2551 * @since 1.8 2552 */ 2553 public static final UnicodeBlock MIAO = 2554 new UnicodeBlock("MIAO"); 2555 2556 /** 2557 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2558 * character block. 2559 * @since 1.8 2560 */ 2561 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2562 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2563 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2564 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2565 2566 private static final int blockStarts[] = { 2567 0x0000, // 0000..007F; Basic Latin 2568 0x0080, // 0080..00FF; Latin-1 Supplement 2569 0x0100, // 0100..017F; Latin Extended-A 2570 0x0180, // 0180..024F; Latin Extended-B 2571 0x0250, // 0250..02AF; IPA Extensions 2572 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2573 0x0300, // 0300..036F; Combining Diacritical Marks 2574 0x0370, // 0370..03FF; Greek and Coptic 2575 0x0400, // 0400..04FF; Cyrillic 2576 0x0500, // 0500..052F; Cyrillic Supplement 2577 0x0530, // 0530..058F; Armenian 2578 0x0590, // 0590..05FF; Hebrew 2579 0x0600, // 0600..06FF; Arabic 2580 0x0700, // 0700..074F; Syriac 2581 0x0750, // 0750..077F; Arabic Supplement 2582 0x0780, // 0780..07BF; Thaana 2583 0x07C0, // 07C0..07FF; NKo 2584 0x0800, // 0800..083F; Samaritan 2585 0x0840, // 0840..085F; Mandaic 2586 0x0860, // unassigned 2587 0x08A0, // 08A0..08FF; Arabic Extended-A 2588 0x0900, // 0900..097F; Devanagari 2589 0x0980, // 0980..09FF; Bengali 2590 0x0A00, // 0A00..0A7F; Gurmukhi 2591 0x0A80, // 0A80..0AFF; Gujarati 2592 0x0B00, // 0B00..0B7F; Oriya 2593 0x0B80, // 0B80..0BFF; Tamil 2594 0x0C00, // 0C00..0C7F; Telugu 2595 0x0C80, // 0C80..0CFF; Kannada 2596 0x0D00, // 0D00..0D7F; Malayalam 2597 0x0D80, // 0D80..0DFF; Sinhala 2598 0x0E00, // 0E00..0E7F; Thai 2599 0x0E80, // 0E80..0EFF; Lao 2600 0x0F00, // 0F00..0FFF; Tibetan 2601 0x1000, // 1000..109F; Myanmar 2602 0x10A0, // 10A0..10FF; Georgian 2603 0x1100, // 1100..11FF; Hangul Jamo 2604 0x1200, // 1200..137F; Ethiopic 2605 0x1380, // 1380..139F; Ethiopic Supplement 2606 0x13A0, // 13A0..13FF; Cherokee 2607 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2608 0x1680, // 1680..169F; Ogham 2609 0x16A0, // 16A0..16FF; Runic 2610 0x1700, // 1700..171F; Tagalog 2611 0x1720, // 1720..173F; Hanunoo 2612 0x1740, // 1740..175F; Buhid 2613 0x1760, // 1760..177F; Tagbanwa 2614 0x1780, // 1780..17FF; Khmer 2615 0x1800, // 1800..18AF; Mongolian 2616 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2617 0x1900, // 1900..194F; Limbu 2618 0x1950, // 1950..197F; Tai Le 2619 0x1980, // 1980..19DF; New Tai Lue 2620 0x19E0, // 19E0..19FF; Khmer Symbols 2621 0x1A00, // 1A00..1A1F; Buginese 2622 0x1A20, // 1A20..1AAF; Tai Tham 2623 0x1AB0, // unassigned 2624 0x1B00, // 1B00..1B7F; Balinese 2625 0x1B80, // 1B80..1BBF; Sundanese 2626 0x1BC0, // 1BC0..1BFF; Batak 2627 0x1C00, // 1C00..1C4F; Lepcha 2628 0x1C50, // 1C50..1C7F; Ol Chiki 2629 0x1C80, // unassigned 2630 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 2631 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2632 0x1D00, // 1D00..1D7F; Phonetic Extensions 2633 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2634 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2635 0x1E00, // 1E00..1EFF; Latin Extended Additional 2636 0x1F00, // 1F00..1FFF; Greek Extended 2637 0x2000, // 2000..206F; General Punctuation 2638 0x2070, // 2070..209F; Superscripts and Subscripts 2639 0x20A0, // 20A0..20CF; Currency Symbols 2640 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2641 0x2100, // 2100..214F; Letterlike Symbols 2642 0x2150, // 2150..218F; Number Forms 2643 0x2190, // 2190..21FF; Arrows 2644 0x2200, // 2200..22FF; Mathematical Operators 2645 0x2300, // 2300..23FF; Miscellaneous Technical 2646 0x2400, // 2400..243F; Control Pictures 2647 0x2440, // 2440..245F; Optical Character Recognition 2648 0x2460, // 2460..24FF; Enclosed Alphanumerics 2649 0x2500, // 2500..257F; Box Drawing 2650 0x2580, // 2580..259F; Block Elements 2651 0x25A0, // 25A0..25FF; Geometric Shapes 2652 0x2600, // 2600..26FF; Miscellaneous Symbols 2653 0x2700, // 2700..27BF; Dingbats 2654 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2655 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2656 0x2800, // 2800..28FF; Braille Patterns 2657 0x2900, // 2900..297F; Supplemental Arrows-B 2658 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2659 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2660 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2661 0x2C00, // 2C00..2C5F; Glagolitic 2662 0x2C60, // 2C60..2C7F; Latin Extended-C 2663 0x2C80, // 2C80..2CFF; Coptic 2664 0x2D00, // 2D00..2D2F; Georgian Supplement 2665 0x2D30, // 2D30..2D7F; Tifinagh 2666 0x2D80, // 2D80..2DDF; Ethiopic Extended 2667 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2668 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2669 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2670 0x2F00, // 2F00..2FDF; Kangxi Radicals 2671 0x2FE0, // unassigned 2672 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2673 0x3000, // 3000..303F; CJK Symbols and Punctuation 2674 0x3040, // 3040..309F; Hiragana 2675 0x30A0, // 30A0..30FF; Katakana 2676 0x3100, // 3100..312F; Bopomofo 2677 0x3130, // 3130..318F; Hangul Compatibility Jamo 2678 0x3190, // 3190..319F; Kanbun 2679 0x31A0, // 31A0..31BF; Bopomofo Extended 2680 0x31C0, // 31C0..31EF; CJK Strokes 2681 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2682 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2683 0x3300, // 3300..33FF; CJK Compatibility 2684 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2685 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2686 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2687 0xA000, // A000..A48F; Yi Syllables 2688 0xA490, // A490..A4CF; Yi Radicals 2689 0xA4D0, // A4D0..A4FF; Lisu 2690 0xA500, // A500..A63F; Vai 2691 0xA640, // A640..A69F; Cyrillic Extended-B 2692 0xA6A0, // A6A0..A6FF; Bamum 2693 0xA700, // A700..A71F; Modifier Tone Letters 2694 0xA720, // A720..A7FF; Latin Extended-D 2695 0xA800, // A800..A82F; Syloti Nagri 2696 0xA830, // A830..A83F; Common Indic Number Forms 2697 0xA840, // A840..A87F; Phags-pa 2698 0xA880, // A880..A8DF; Saurashtra 2699 0xA8E0, // A8E0..A8FF; Devanagari Extended 2700 0xA900, // A900..A92F; Kayah Li 2701 0xA930, // A930..A95F; Rejang 2702 0xA960, // A960..A97F; Hangul Jamo Extended-A 2703 0xA980, // A980..A9DF; Javanese 2704 0xA9E0, // unassigned 2705 0xAA00, // AA00..AA5F; Cham 2706 0xAA60, // AA60..AA7F; Myanmar Extended-A 2707 0xAA80, // AA80..AADF; Tai Viet 2708 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 2709 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2710 0xAB30, // unassigned 2711 0xABC0, // ABC0..ABFF; Meetei Mayek 2712 0xAC00, // AC00..D7AF; Hangul Syllables 2713 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2714 0xD800, // D800..DB7F; High Surrogates 2715 0xDB80, // DB80..DBFF; High Private Use Surrogates 2716 0xDC00, // DC00..DFFF; Low Surrogates 2717 0xE000, // E000..F8FF; Private Use Area 2718 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2719 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2720 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2721 0xFE00, // FE00..FE0F; Variation Selectors 2722 0xFE10, // FE10..FE1F; Vertical Forms 2723 0xFE20, // FE20..FE2F; Combining Half Marks 2724 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2725 0xFE50, // FE50..FE6F; Small Form Variants 2726 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2727 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2728 0xFFF0, // FFF0..FFFF; Specials 2729 0x10000, // 10000..1007F; Linear B Syllabary 2730 0x10080, // 10080..100FF; Linear B Ideograms 2731 0x10100, // 10100..1013F; Aegean Numbers 2732 0x10140, // 10140..1018F; Ancient Greek Numbers 2733 0x10190, // 10190..101CF; Ancient Symbols 2734 0x101D0, // 101D0..101FF; Phaistos Disc 2735 0x10200, // unassigned 2736 0x10280, // 10280..1029F; Lycian 2737 0x102A0, // 102A0..102DF; Carian 2738 0x102E0, // unassigned 2739 0x10300, // 10300..1032F; Old Italic 2740 0x10330, // 10330..1034F; Gothic 2741 0x10350, // unassigned 2742 0x10380, // 10380..1039F; Ugaritic 2743 0x103A0, // 103A0..103DF; Old Persian 2744 0x103E0, // unassigned 2745 0x10400, // 10400..1044F; Deseret 2746 0x10450, // 10450..1047F; Shavian 2747 0x10480, // 10480..104AF; Osmanya 2748 0x104B0, // unassigned 2749 0x10800, // 10800..1083F; Cypriot Syllabary 2750 0x10840, // 10840..1085F; Imperial Aramaic 2751 0x10860, // unassigned 2752 0x10900, // 10900..1091F; Phoenician 2753 0x10920, // 10920..1093F; Lydian 2754 0x10940, // unassigned 2755 0x10980, // 10980..1099F; Meroitic Hieroglyphs 2756 0x109A0, // 109A0..109FF; Meroitic Cursive 2757 0x10A00, // 10A00..10A5F; Kharoshthi 2758 0x10A60, // 10A60..10A7F; Old South Arabian 2759 0x10A80, // unassigned 2760 0x10B00, // 10B00..10B3F; Avestan 2761 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2762 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2763 0x10B80, // unassigned 2764 0x10C00, // 10C00..10C4F; Old Turkic 2765 0x10C50, // unassigned 2766 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2767 0x10E80, // unassigned 2768 0x11000, // 11000..1107F; Brahmi 2769 0x11080, // 11080..110CF; Kaithi 2770 0x110D0, // 110D0..110FF; Sora Sompeng 2771 0x11100, // 11100..1114F; Chakma 2772 0x11150, // unassigned 2773 0x11180, // 11180..111DF; Sharada 2774 0x111E0, // unassigned 2775 0x11680, // 11680..116CF; Takri 2776 0x116D0, // unassigned 2777 0x12000, // 12000..123FF; Cuneiform 2778 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2779 0x12480, // unassigned 2780 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2781 0x13430, // unassigned 2782 0x16800, // 16800..16A3F; Bamum Supplement 2783 0x16A40, // unassigned 2784 0x16F00, // 16F00..16F9F; Miao 2785 0x16FA0, // unassigned 2786 0x1B000, // 1B000..1B0FF; Kana Supplement 2787 0x1B100, // unassigned 2788 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2789 0x1D100, // 1D100..1D1FF; Musical Symbols 2790 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2791 0x1D250, // unassigned 2792 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2793 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2794 0x1D380, // unassigned 2795 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2796 0x1D800, // unassigned 2797 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 2798 0x1EF00, // unassigned 2799 0x1F000, // 1F000..1F02F; Mahjong Tiles 2800 0x1F030, // 1F030..1F09F; Domino Tiles 2801 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2802 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2803 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2804 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2805 0x1F600, // 1F600..1F64F; Emoticons 2806 0x1F650, // unassigned 2807 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2808 0x1F700, // 1F700..1F77F; Alchemical Symbols 2809 0x1F780, // unassigned 2810 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2811 0x2A6E0, // unassigned 2812 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2813 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2814 0x2B820, // unassigned 2815 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2816 0x2FA20, // unassigned 2817 0xE0000, // E0000..E007F; Tags 2818 0xE0080, // unassigned 2819 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2820 0xE01F0, // unassigned 2821 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2822 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2823 }; 2824 2825 private static final UnicodeBlock[] blocks = { 2826 BASIC_LATIN, 2827 LATIN_1_SUPPLEMENT, 2828 LATIN_EXTENDED_A, 2829 LATIN_EXTENDED_B, 2830 IPA_EXTENSIONS, 2831 SPACING_MODIFIER_LETTERS, 2832 COMBINING_DIACRITICAL_MARKS, 2833 GREEK, 2834 CYRILLIC, 2835 CYRILLIC_SUPPLEMENTARY, 2836 ARMENIAN, 2837 HEBREW, 2838 ARABIC, 2839 SYRIAC, 2840 ARABIC_SUPPLEMENT, 2841 THAANA, 2842 NKO, 2843 SAMARITAN, 2844 MANDAIC, 2845 null, 2846 ARABIC_EXTENDED_A, 2847 DEVANAGARI, 2848 BENGALI, 2849 GURMUKHI, 2850 GUJARATI, 2851 ORIYA, 2852 TAMIL, 2853 TELUGU, 2854 KANNADA, 2855 MALAYALAM, 2856 SINHALA, 2857 THAI, 2858 LAO, 2859 TIBETAN, 2860 MYANMAR, 2861 GEORGIAN, 2862 HANGUL_JAMO, 2863 ETHIOPIC, 2864 ETHIOPIC_SUPPLEMENT, 2865 CHEROKEE, 2866 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2867 OGHAM, 2868 RUNIC, 2869 TAGALOG, 2870 HANUNOO, 2871 BUHID, 2872 TAGBANWA, 2873 KHMER, 2874 MONGOLIAN, 2875 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2876 LIMBU, 2877 TAI_LE, 2878 NEW_TAI_LUE, 2879 KHMER_SYMBOLS, 2880 BUGINESE, 2881 TAI_THAM, 2882 null, 2883 BALINESE, 2884 SUNDANESE, 2885 BATAK, 2886 LEPCHA, 2887 OL_CHIKI, 2888 null, 2889 SUNDANESE_SUPPLEMENT, 2890 VEDIC_EXTENSIONS, 2891 PHONETIC_EXTENSIONS, 2892 PHONETIC_EXTENSIONS_SUPPLEMENT, 2893 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2894 LATIN_EXTENDED_ADDITIONAL, 2895 GREEK_EXTENDED, 2896 GENERAL_PUNCTUATION, 2897 SUPERSCRIPTS_AND_SUBSCRIPTS, 2898 CURRENCY_SYMBOLS, 2899 COMBINING_MARKS_FOR_SYMBOLS, 2900 LETTERLIKE_SYMBOLS, 2901 NUMBER_FORMS, 2902 ARROWS, 2903 MATHEMATICAL_OPERATORS, 2904 MISCELLANEOUS_TECHNICAL, 2905 CONTROL_PICTURES, 2906 OPTICAL_CHARACTER_RECOGNITION, 2907 ENCLOSED_ALPHANUMERICS, 2908 BOX_DRAWING, 2909 BLOCK_ELEMENTS, 2910 GEOMETRIC_SHAPES, 2911 MISCELLANEOUS_SYMBOLS, 2912 DINGBATS, 2913 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2914 SUPPLEMENTAL_ARROWS_A, 2915 BRAILLE_PATTERNS, 2916 SUPPLEMENTAL_ARROWS_B, 2917 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2918 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2919 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2920 GLAGOLITIC, 2921 LATIN_EXTENDED_C, 2922 COPTIC, 2923 GEORGIAN_SUPPLEMENT, 2924 TIFINAGH, 2925 ETHIOPIC_EXTENDED, 2926 CYRILLIC_EXTENDED_A, 2927 SUPPLEMENTAL_PUNCTUATION, 2928 CJK_RADICALS_SUPPLEMENT, 2929 KANGXI_RADICALS, 2930 null, 2931 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2932 CJK_SYMBOLS_AND_PUNCTUATION, 2933 HIRAGANA, 2934 KATAKANA, 2935 BOPOMOFO, 2936 HANGUL_COMPATIBILITY_JAMO, 2937 KANBUN, 2938 BOPOMOFO_EXTENDED, 2939 CJK_STROKES, 2940 KATAKANA_PHONETIC_EXTENSIONS, 2941 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2942 CJK_COMPATIBILITY, 2943 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2944 YIJING_HEXAGRAM_SYMBOLS, 2945 CJK_UNIFIED_IDEOGRAPHS, 2946 YI_SYLLABLES, 2947 YI_RADICALS, 2948 LISU, 2949 VAI, 2950 CYRILLIC_EXTENDED_B, 2951 BAMUM, 2952 MODIFIER_TONE_LETTERS, 2953 LATIN_EXTENDED_D, 2954 SYLOTI_NAGRI, 2955 COMMON_INDIC_NUMBER_FORMS, 2956 PHAGS_PA, 2957 SAURASHTRA, 2958 DEVANAGARI_EXTENDED, 2959 KAYAH_LI, 2960 REJANG, 2961 HANGUL_JAMO_EXTENDED_A, 2962 JAVANESE, 2963 null, 2964 CHAM, 2965 MYANMAR_EXTENDED_A, 2966 TAI_VIET, 2967 MEETEI_MAYEK_EXTENSIONS, 2968 ETHIOPIC_EXTENDED_A, 2969 null, 2970 MEETEI_MAYEK, 2971 HANGUL_SYLLABLES, 2972 HANGUL_JAMO_EXTENDED_B, 2973 HIGH_SURROGATES, 2974 HIGH_PRIVATE_USE_SURROGATES, 2975 LOW_SURROGATES, 2976 PRIVATE_USE_AREA, 2977 CJK_COMPATIBILITY_IDEOGRAPHS, 2978 ALPHABETIC_PRESENTATION_FORMS, 2979 ARABIC_PRESENTATION_FORMS_A, 2980 VARIATION_SELECTORS, 2981 VERTICAL_FORMS, 2982 COMBINING_HALF_MARKS, 2983 CJK_COMPATIBILITY_FORMS, 2984 SMALL_FORM_VARIANTS, 2985 ARABIC_PRESENTATION_FORMS_B, 2986 HALFWIDTH_AND_FULLWIDTH_FORMS, 2987 SPECIALS, 2988 LINEAR_B_SYLLABARY, 2989 LINEAR_B_IDEOGRAMS, 2990 AEGEAN_NUMBERS, 2991 ANCIENT_GREEK_NUMBERS, 2992 ANCIENT_SYMBOLS, 2993 PHAISTOS_DISC, 2994 null, 2995 LYCIAN, 2996 CARIAN, 2997 null, 2998 OLD_ITALIC, 2999 GOTHIC, 3000 null, 3001 UGARITIC, 3002 OLD_PERSIAN, 3003 null, 3004 DESERET, 3005 SHAVIAN, 3006 OSMANYA, 3007 null, 3008 CYPRIOT_SYLLABARY, 3009 IMPERIAL_ARAMAIC, 3010 null, 3011 PHOENICIAN, 3012 LYDIAN, 3013 null, 3014 MEROITIC_HIEROGLYPHS, 3015 MEROITIC_CURSIVE, 3016 KHAROSHTHI, 3017 OLD_SOUTH_ARABIAN, 3018 null, 3019 AVESTAN, 3020 INSCRIPTIONAL_PARTHIAN, 3021 INSCRIPTIONAL_PAHLAVI, 3022 null, 3023 OLD_TURKIC, 3024 null, 3025 RUMI_NUMERAL_SYMBOLS, 3026 null, 3027 BRAHMI, 3028 KAITHI, 3029 SORA_SOMPENG, 3030 CHAKMA, 3031 null, 3032 SHARADA, 3033 null, 3034 TAKRI, 3035 null, 3036 CUNEIFORM, 3037 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3038 null, 3039 EGYPTIAN_HIEROGLYPHS, 3040 null, 3041 BAMUM_SUPPLEMENT, 3042 null, 3043 MIAO, 3044 null, 3045 KANA_SUPPLEMENT, 3046 null, 3047 BYZANTINE_MUSICAL_SYMBOLS, 3048 MUSICAL_SYMBOLS, 3049 ANCIENT_GREEK_MUSICAL_NOTATION, 3050 null, 3051 TAI_XUAN_JING_SYMBOLS, 3052 COUNTING_ROD_NUMERALS, 3053 null, 3054 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3055 null, 3056 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3057 null, 3058 MAHJONG_TILES, 3059 DOMINO_TILES, 3060 PLAYING_CARDS, 3061 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3062 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3063 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3064 EMOTICONS, 3065 null, 3066 TRANSPORT_AND_MAP_SYMBOLS, 3067 ALCHEMICAL_SYMBOLS, 3068 null, 3069 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3070 null, 3071 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3072 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3073 null, 3074 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3075 null, 3076 TAGS, 3077 null, 3078 VARIATION_SELECTORS_SUPPLEMENT, 3079 null, 3080 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3081 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3082 }; 3083 3084 3085 /** 3086 * Returns the object representing the Unicode block containing the 3087 * given character, or {@code null} if the character is not a 3088 * member of a defined block. 3089 * 3090 * <p><b>Note:</b> This method cannot handle 3091 * <a href="Character.html#supplementary"> supplementary 3092 * characters</a>. To support all Unicode characters, including 3093 * supplementary characters, use the {@link #of(int)} method. 3094 * 3095 * @param c The character in question 3096 * @return The {@code UnicodeBlock} instance representing the 3097 * Unicode block of which this character is a member, or 3098 * {@code null} if the character is not a member of any 3099 * Unicode block 3100 */ 3101 public static UnicodeBlock of(char c) { 3102 return of((int)c); 3103 } 3104 3105 /** 3106 * Returns the object representing the Unicode block 3107 * containing the given character (Unicode code point), or 3108 * {@code null} if the character is not a member of a 3109 * defined block. 3110 * 3111 * @param codePoint the character (Unicode code point) in question. 3112 * @return The {@code UnicodeBlock} instance representing the 3113 * Unicode block of which this character is a member, or 3114 * {@code null} if the character is not a member of any 3115 * Unicode block 3116 * @exception IllegalArgumentException if the specified 3117 * {@code codePoint} is an invalid Unicode code point. 3118 * @see Character#isValidCodePoint(int) 3119 * @since 1.5 3120 */ 3121 public static UnicodeBlock of(int codePoint) { 3122 if (!isValidCodePoint(codePoint)) { 3123 throw new IllegalArgumentException(); 3124 } 3125 3126 int top, bottom, current; 3127 bottom = 0; 3128 top = blockStarts.length; 3129 current = top/2; 3130 3131 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3132 while (top - bottom > 1) { 3133 if (codePoint >= blockStarts[current]) { 3134 bottom = current; 3135 } else { 3136 top = current; 3137 } 3138 current = (top + bottom) / 2; 3139 } 3140 return blocks[current]; 3141 } 3142 3143 /** 3144 * Returns the UnicodeBlock with the given name. Block 3145 * names are determined by The Unicode Standard. The file 3146 * Blocks-<version>.txt defines blocks for a particular 3147 * version of the standard. The {@link Character} class specifies 3148 * the version of the standard that it supports. 3149 * <p> 3150 * This method accepts block names in the following forms: 3151 * <ol> 3152 * <li> Canonical block names as defined by the Unicode Standard. 3153 * For example, the standard defines a "Basic Latin" block. Therefore, this 3154 * method accepts "Basic Latin" as a valid block name. The documentation of 3155 * each UnicodeBlock provides the canonical name. 3156 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3157 * is a valid block name for the "Basic Latin" block. 3158 * <li>The text representation of each constant UnicodeBlock identifier. 3159 * For example, this method will return the {@link #BASIC_LATIN} block if 3160 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3161 * hyphens in the canonical name with underscores. 3162 * </ol> 3163 * Finally, character case is ignored for all of the valid block name forms. 3164 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3165 * The en_US locale's case mapping rules are used to provide case-insensitive 3166 * string comparisons for block name validation. 3167 * <p> 3168 * If the Unicode Standard changes block names, both the previous and 3169 * current names will be accepted. 3170 * 3171 * @param blockName A {@code UnicodeBlock} name. 3172 * @return The {@code UnicodeBlock} instance identified 3173 * by {@code blockName} 3174 * @throws IllegalArgumentException if {@code blockName} is an 3175 * invalid name 3176 * @throws NullPointerException if {@code blockName} is null 3177 * @since 1.5 3178 */ 3179 public static final UnicodeBlock forName(String blockName) { 3180 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3181 if (block == null) { 3182 throw new IllegalArgumentException(); 3183 } 3184 return block; 3185 } 3186 } 3187 3188 3189 /** 3190 * A family of character subsets representing the character scripts 3191 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3192 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3193 * character is assigned to a single Unicode script, either a specific 3194 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3195 * one of the following three special values, 3196 * {@link Character.UnicodeScript#INHERITED Inherited}, 3197 * {@link Character.UnicodeScript#COMMON Common} or 3198 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3199 * 3200 * @since 1.7 3201 */ 3202 public static enum UnicodeScript { 3203 /** 3204 * Unicode script "Common". 3205 */ 3206 COMMON, 3207 3208 /** 3209 * Unicode script "Latin". 3210 */ 3211 LATIN, 3212 3213 /** 3214 * Unicode script "Greek". 3215 */ 3216 GREEK, 3217 3218 /** 3219 * Unicode script "Cyrillic". 3220 */ 3221 CYRILLIC, 3222 3223 /** 3224 * Unicode script "Armenian". 3225 */ 3226 ARMENIAN, 3227 3228 /** 3229 * Unicode script "Hebrew". 3230 */ 3231 HEBREW, 3232 3233 /** 3234 * Unicode script "Arabic". 3235 */ 3236 ARABIC, 3237 3238 /** 3239 * Unicode script "Syriac". 3240 */ 3241 SYRIAC, 3242 3243 /** 3244 * Unicode script "Thaana". 3245 */ 3246 THAANA, 3247 3248 /** 3249 * Unicode script "Devanagari". 3250 */ 3251 DEVANAGARI, 3252 3253 /** 3254 * Unicode script "Bengali". 3255 */ 3256 BENGALI, 3257 3258 /** 3259 * Unicode script "Gurmukhi". 3260 */ 3261 GURMUKHI, 3262 3263 /** 3264 * Unicode script "Gujarati". 3265 */ 3266 GUJARATI, 3267 3268 /** 3269 * Unicode script "Oriya". 3270 */ 3271 ORIYA, 3272 3273 /** 3274 * Unicode script "Tamil". 3275 */ 3276 TAMIL, 3277 3278 /** 3279 * Unicode script "Telugu". 3280 */ 3281 TELUGU, 3282 3283 /** 3284 * Unicode script "Kannada". 3285 */ 3286 KANNADA, 3287 3288 /** 3289 * Unicode script "Malayalam". 3290 */ 3291 MALAYALAM, 3292 3293 /** 3294 * Unicode script "Sinhala". 3295 */ 3296 SINHALA, 3297 3298 /** 3299 * Unicode script "Thai". 3300 */ 3301 THAI, 3302 3303 /** 3304 * Unicode script "Lao". 3305 */ 3306 LAO, 3307 3308 /** 3309 * Unicode script "Tibetan". 3310 */ 3311 TIBETAN, 3312 3313 /** 3314 * Unicode script "Myanmar". 3315 */ 3316 MYANMAR, 3317 3318 /** 3319 * Unicode script "Georgian". 3320 */ 3321 GEORGIAN, 3322 3323 /** 3324 * Unicode script "Hangul". 3325 */ 3326 HANGUL, 3327 3328 /** 3329 * Unicode script "Ethiopic". 3330 */ 3331 ETHIOPIC, 3332 3333 /** 3334 * Unicode script "Cherokee". 3335 */ 3336 CHEROKEE, 3337 3338 /** 3339 * Unicode script "Canadian_Aboriginal". 3340 */ 3341 CANADIAN_ABORIGINAL, 3342 3343 /** 3344 * Unicode script "Ogham". 3345 */ 3346 OGHAM, 3347 3348 /** 3349 * Unicode script "Runic". 3350 */ 3351 RUNIC, 3352 3353 /** 3354 * Unicode script "Khmer". 3355 */ 3356 KHMER, 3357 3358 /** 3359 * Unicode script "Mongolian". 3360 */ 3361 MONGOLIAN, 3362 3363 /** 3364 * Unicode script "Hiragana". 3365 */ 3366 HIRAGANA, 3367 3368 /** 3369 * Unicode script "Katakana". 3370 */ 3371 KATAKANA, 3372 3373 /** 3374 * Unicode script "Bopomofo". 3375 */ 3376 BOPOMOFO, 3377 3378 /** 3379 * Unicode script "Han". 3380 */ 3381 HAN, 3382 3383 /** 3384 * Unicode script "Yi". 3385 */ 3386 YI, 3387 3388 /** 3389 * Unicode script "Old_Italic". 3390 */ 3391 OLD_ITALIC, 3392 3393 /** 3394 * Unicode script "Gothic". 3395 */ 3396 GOTHIC, 3397 3398 /** 3399 * Unicode script "Deseret". 3400 */ 3401 DESERET, 3402 3403 /** 3404 * Unicode script "Inherited". 3405 */ 3406 INHERITED, 3407 3408 /** 3409 * Unicode script "Tagalog". 3410 */ 3411 TAGALOG, 3412 3413 /** 3414 * Unicode script "Hanunoo". 3415 */ 3416 HANUNOO, 3417 3418 /** 3419 * Unicode script "Buhid". 3420 */ 3421 BUHID, 3422 3423 /** 3424 * Unicode script "Tagbanwa". 3425 */ 3426 TAGBANWA, 3427 3428 /** 3429 * Unicode script "Limbu". 3430 */ 3431 LIMBU, 3432 3433 /** 3434 * Unicode script "Tai_Le". 3435 */ 3436 TAI_LE, 3437 3438 /** 3439 * Unicode script "Linear_B". 3440 */ 3441 LINEAR_B, 3442 3443 /** 3444 * Unicode script "Ugaritic". 3445 */ 3446 UGARITIC, 3447 3448 /** 3449 * Unicode script "Shavian". 3450 */ 3451 SHAVIAN, 3452 3453 /** 3454 * Unicode script "Osmanya". 3455 */ 3456 OSMANYA, 3457 3458 /** 3459 * Unicode script "Cypriot". 3460 */ 3461 CYPRIOT, 3462 3463 /** 3464 * Unicode script "Braille". 3465 */ 3466 BRAILLE, 3467 3468 /** 3469 * Unicode script "Buginese". 3470 */ 3471 BUGINESE, 3472 3473 /** 3474 * Unicode script "Coptic". 3475 */ 3476 COPTIC, 3477 3478 /** 3479 * Unicode script "New_Tai_Lue". 3480 */ 3481 NEW_TAI_LUE, 3482 3483 /** 3484 * Unicode script "Glagolitic". 3485 */ 3486 GLAGOLITIC, 3487 3488 /** 3489 * Unicode script "Tifinagh". 3490 */ 3491 TIFINAGH, 3492 3493 /** 3494 * Unicode script "Syloti_Nagri". 3495 */ 3496 SYLOTI_NAGRI, 3497 3498 /** 3499 * Unicode script "Old_Persian". 3500 */ 3501 OLD_PERSIAN, 3502 3503 /** 3504 * Unicode script "Kharoshthi". 3505 */ 3506 KHAROSHTHI, 3507 3508 /** 3509 * Unicode script "Balinese". 3510 */ 3511 BALINESE, 3512 3513 /** 3514 * Unicode script "Cuneiform". 3515 */ 3516 CUNEIFORM, 3517 3518 /** 3519 * Unicode script "Phoenician". 3520 */ 3521 PHOENICIAN, 3522 3523 /** 3524 * Unicode script "Phags_Pa". 3525 */ 3526 PHAGS_PA, 3527 3528 /** 3529 * Unicode script "Nko". 3530 */ 3531 NKO, 3532 3533 /** 3534 * Unicode script "Sundanese". 3535 */ 3536 SUNDANESE, 3537 3538 /** 3539 * Unicode script "Batak". 3540 */ 3541 BATAK, 3542 3543 /** 3544 * Unicode script "Lepcha". 3545 */ 3546 LEPCHA, 3547 3548 /** 3549 * Unicode script "Ol_Chiki". 3550 */ 3551 OL_CHIKI, 3552 3553 /** 3554 * Unicode script "Vai". 3555 */ 3556 VAI, 3557 3558 /** 3559 * Unicode script "Saurashtra". 3560 */ 3561 SAURASHTRA, 3562 3563 /** 3564 * Unicode script "Kayah_Li". 3565 */ 3566 KAYAH_LI, 3567 3568 /** 3569 * Unicode script "Rejang". 3570 */ 3571 REJANG, 3572 3573 /** 3574 * Unicode script "Lycian". 3575 */ 3576 LYCIAN, 3577 3578 /** 3579 * Unicode script "Carian". 3580 */ 3581 CARIAN, 3582 3583 /** 3584 * Unicode script "Lydian". 3585 */ 3586 LYDIAN, 3587 3588 /** 3589 * Unicode script "Cham". 3590 */ 3591 CHAM, 3592 3593 /** 3594 * Unicode script "Tai_Tham". 3595 */ 3596 TAI_THAM, 3597 3598 /** 3599 * Unicode script "Tai_Viet". 3600 */ 3601 TAI_VIET, 3602 3603 /** 3604 * Unicode script "Avestan". 3605 */ 3606 AVESTAN, 3607 3608 /** 3609 * Unicode script "Egyptian_Hieroglyphs". 3610 */ 3611 EGYPTIAN_HIEROGLYPHS, 3612 3613 /** 3614 * Unicode script "Samaritan". 3615 */ 3616 SAMARITAN, 3617 3618 /** 3619 * Unicode script "Mandaic". 3620 */ 3621 MANDAIC, 3622 3623 /** 3624 * Unicode script "Lisu". 3625 */ 3626 LISU, 3627 3628 /** 3629 * Unicode script "Bamum". 3630 */ 3631 BAMUM, 3632 3633 /** 3634 * Unicode script "Javanese". 3635 */ 3636 JAVANESE, 3637 3638 /** 3639 * Unicode script "Meetei_Mayek". 3640 */ 3641 MEETEI_MAYEK, 3642 3643 /** 3644 * Unicode script "Imperial_Aramaic". 3645 */ 3646 IMPERIAL_ARAMAIC, 3647 3648 /** 3649 * Unicode script "Old_South_Arabian". 3650 */ 3651 OLD_SOUTH_ARABIAN, 3652 3653 /** 3654 * Unicode script "Inscriptional_Parthian". 3655 */ 3656 INSCRIPTIONAL_PARTHIAN, 3657 3658 /** 3659 * Unicode script "Inscriptional_Pahlavi". 3660 */ 3661 INSCRIPTIONAL_PAHLAVI, 3662 3663 /** 3664 * Unicode script "Old_Turkic". 3665 */ 3666 OLD_TURKIC, 3667 3668 /** 3669 * Unicode script "Brahmi". 3670 */ 3671 BRAHMI, 3672 3673 /** 3674 * Unicode script "Kaithi". 3675 */ 3676 KAITHI, 3677 3678 /** 3679 * Unicode script "Meroitic Hieroglyphs". 3680 */ 3681 MEROITIC_HIEROGLYPHS, 3682 3683 /** 3684 * Unicode script "Meroitic Cursive". 3685 */ 3686 MEROITIC_CURSIVE, 3687 3688 /** 3689 * Unicode script "Sora Sompeng". 3690 */ 3691 SORA_SOMPENG, 3692 3693 /** 3694 * Unicode script "Chakma". 3695 */ 3696 CHAKMA, 3697 3698 /** 3699 * Unicode script "Sharada". 3700 */ 3701 SHARADA, 3702 3703 /** 3704 * Unicode script "Takri". 3705 */ 3706 TAKRI, 3707 3708 /** 3709 * Unicode script "Miao". 3710 */ 3711 MIAO, 3712 3713 /** 3714 * Unicode script "Unknown". 3715 */ 3716 UNKNOWN; 3717 3718 private static final int[] scriptStarts = { 3719 0x0000, // 0000..0040; COMMON 3720 0x0041, // 0041..005A; LATIN 3721 0x005B, // 005B..0060; COMMON 3722 0x0061, // 0061..007A; LATIN 3723 0x007B, // 007B..00A9; COMMON 3724 0x00AA, // 00AA..00AA; LATIN 3725 0x00AB, // 00AB..00B9; COMMON 3726 0x00BA, // 00BA..00BA; LATIN 3727 0x00BB, // 00BB..00BF; COMMON 3728 0x00C0, // 00C0..00D6; LATIN 3729 0x00D7, // 00D7..00D7; COMMON 3730 0x00D8, // 00D8..00F6; LATIN 3731 0x00F7, // 00F7..00F7; COMMON 3732 0x00F8, // 00F8..02B8; LATIN 3733 0x02B9, // 02B9..02DF; COMMON 3734 0x02E0, // 02E0..02E4; LATIN 3735 0x02E5, // 02E5..02E9; COMMON 3736 0x02EA, // 02EA..02EB; BOPOMOFO 3737 0x02EC, // 02EC..02FF; COMMON 3738 0x0300, // 0300..036F; INHERITED 3739 0x0370, // 0370..0373; GREEK 3740 0x0374, // 0374..0374; COMMON 3741 0x0375, // 0375..037D; GREEK 3742 0x037E, // 037E..0383; COMMON 3743 0x0384, // 0384..0384; GREEK 3744 0x0385, // 0385..0385; COMMON 3745 0x0386, // 0386..0386; GREEK 3746 0x0387, // 0387..0387; COMMON 3747 0x0388, // 0388..03E1; GREEK 3748 0x03E2, // 03E2..03EF; COPTIC 3749 0x03F0, // 03F0..03FF; GREEK 3750 0x0400, // 0400..0484; CYRILLIC 3751 0x0485, // 0485..0486; INHERITED 3752 0x0487, // 0487..0530; CYRILLIC 3753 0x0531, // 0531..0588; ARMENIAN 3754 0x0589, // 0589..0589; COMMON 3755 0x058A, // 058A..0590; ARMENIAN 3756 0x0591, // 0591..05FF; HEBREW 3757 0x0600, // 0600..060B; ARABIC 3758 0x060C, // 060C..060C; COMMON 3759 0x060D, // 060D..061A; ARABIC 3760 0x061B, // 061B..061D; COMMON 3761 0x061E, // 061E..061E; ARABIC 3762 0x061F, // 061F..061F; COMMON 3763 0x0620, // 0620..063F; ARABIC 3764 0x0640, // 0640..0640; COMMON 3765 0x0641, // 0641..064A; ARABIC 3766 0x064B, // 064B..0655; INHERITED 3767 0x0656, // 0656..065F; ARABIC 3768 0x0660, // 0660..0669; COMMON 3769 0x066A, // 066A..066F; ARABIC 3770 0x0670, // 0670..0670; INHERITED 3771 0x0671, // 0671..06DC; ARABIC 3772 0x06DD, // 06DD..06DD; COMMON 3773 0x06DE, // 06DE..06FF; ARABIC 3774 0x0700, // 0700..074F; SYRIAC 3775 0x0750, // 0750..077F; ARABIC 3776 0x0780, // 0780..07BF; THAANA 3777 0x07C0, // 07C0..07FF; NKO 3778 0x0800, // 0800..083F; SAMARITAN 3779 0x0840, // 0840..089F; MANDAIC 3780 0x08A0, // 08A0..08FF; ARABIC 3781 0x0900, // 0900..0950; DEVANAGARI 3782 0x0951, // 0951..0952; INHERITED 3783 0x0953, // 0953..0963; DEVANAGARI 3784 0x0964, // 0964..0965; COMMON 3785 0x0966, // 0966..0980; DEVANAGARI 3786 0x0981, // 0981..0A00; BENGALI 3787 0x0A01, // 0A01..0A80; GURMUKHI 3788 0x0A81, // 0A81..0B00; GUJARATI 3789 0x0B01, // 0B01..0B81; ORIYA 3790 0x0B82, // 0B82..0C00; TAMIL 3791 0x0C01, // 0C01..0C81; TELUGU 3792 0x0C82, // 0C82..0CF0; KANNADA 3793 0x0D02, // 0D02..0D81; MALAYALAM 3794 0x0D82, // 0D82..0E00; SINHALA 3795 0x0E01, // 0E01..0E3E; THAI 3796 0x0E3F, // 0E3F..0E3F; COMMON 3797 0x0E40, // 0E40..0E80; THAI 3798 0x0E81, // 0E81..0EFF; LAO 3799 0x0F00, // 0F00..0FD4; TIBETAN 3800 0x0FD5, // 0FD5..0FD8; COMMON 3801 0x0FD9, // 0FD9..0FFF; TIBETAN 3802 0x1000, // 1000..109F; MYANMAR 3803 0x10A0, // 10A0..10FA; GEORGIAN 3804 0x10FB, // 10FB..10FB; COMMON 3805 0x10FC, // 10FC..10FF; GEORGIAN 3806 0x1100, // 1100..11FF; HANGUL 3807 0x1200, // 1200..139F; ETHIOPIC 3808 0x13A0, // 13A0..13FF; CHEROKEE 3809 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3810 0x1680, // 1680..169F; OGHAM 3811 0x16A0, // 16A0..16EA; RUNIC 3812 0x16EB, // 16EB..16ED; COMMON 3813 0x16EE, // 16EE..16FF; RUNIC 3814 0x1700, // 1700..171F; TAGALOG 3815 0x1720, // 1720..1734; HANUNOO 3816 0x1735, // 1735..173F; COMMON 3817 0x1740, // 1740..175F; BUHID 3818 0x1760, // 1760..177F; TAGBANWA 3819 0x1780, // 1780..17FF; KHMER 3820 0x1800, // 1800..1801; MONGOLIAN 3821 0x1802, // 1802..1803; COMMON 3822 0x1804, // 1804..1804; MONGOLIAN 3823 0x1805, // 1805..1805; COMMON 3824 0x1806, // 1806..18AF; MONGOLIAN 3825 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3826 0x1900, // 1900..194F; LIMBU 3827 0x1950, // 1950..197F; TAI_LE 3828 0x1980, // 1980..19DF; NEW_TAI_LUE 3829 0x19E0, // 19E0..19FF; KHMER 3830 0x1A00, // 1A00..1A1F; BUGINESE 3831 0x1A20, // 1A20..1AFF; TAI_THAM 3832 0x1B00, // 1B00..1B7F; BALINESE 3833 0x1B80, // 1B80..1BBF; SUNDANESE 3834 0x1BC0, // 1BC0..1BFF; BATAK 3835 0x1C00, // 1C00..1C4F; LEPCHA 3836 0x1C50, // 1C50..1CBF; OL_CHIKI 3837 0x1CC0, // 1CC0..1CCF; SUNDANESE 3838 0x1CD0, // 1CD0..1CD2; INHERITED 3839 0x1CD3, // 1CD3..1CD3; COMMON 3840 0x1CD4, // 1CD4..1CE0; INHERITED 3841 0x1CE1, // 1CE1..1CE1; COMMON 3842 0x1CE2, // 1CE2..1CE8; INHERITED 3843 0x1CE9, // 1CE9..1CEC; COMMON 3844 0x1CED, // 1CED..1CED; INHERITED 3845 0x1CEE, // 1CEE..1CF3; COMMON 3846 0x1CF4, // 1CF4..1CF4; INHERITED 3847 0x1CF5, // 1CF5..1CFF; COMMON 3848 0x1D00, // 1D00..1D25; LATIN 3849 0x1D26, // 1D26..1D2A; GREEK 3850 0x1D2B, // 1D2B..1D2B; CYRILLIC 3851 0x1D2C, // 1D2C..1D5C; LATIN 3852 0x1D5D, // 1D5D..1D61; GREEK 3853 0x1D62, // 1D62..1D65; LATIN 3854 0x1D66, // 1D66..1D6A; GREEK 3855 0x1D6B, // 1D6B..1D77; LATIN 3856 0x1D78, // 1D78..1D78; CYRILLIC 3857 0x1D79, // 1D79..1DBE; LATIN 3858 0x1DBF, // 1DBF..1DBF; GREEK 3859 0x1DC0, // 1DC0..1DFF; INHERITED 3860 0x1E00, // 1E00..1EFF; LATIN 3861 0x1F00, // 1F00..1FFF; GREEK 3862 0x2000, // 2000..200B; COMMON 3863 0x200C, // 200C..200D; INHERITED 3864 0x200E, // 200E..2070; COMMON 3865 0x2071, // 2071..2073; LATIN 3866 0x2074, // 2074..207E; COMMON 3867 0x207F, // 207F..207F; LATIN 3868 0x2080, // 2080..208F; COMMON 3869 0x2090, // 2090..209F; LATIN 3870 0x20A0, // 20A0..20CF; COMMON 3871 0x20D0, // 20D0..20FF; INHERITED 3872 0x2100, // 2100..2125; COMMON 3873 0x2126, // 2126..2126; GREEK 3874 0x2127, // 2127..2129; COMMON 3875 0x212A, // 212A..212B; LATIN 3876 0x212C, // 212C..2131; COMMON 3877 0x2132, // 2132..2132; LATIN 3878 0x2133, // 2133..214D; COMMON 3879 0x214E, // 214E..214E; LATIN 3880 0x214F, // 214F..215F; COMMON 3881 0x2160, // 2160..2188; LATIN 3882 0x2189, // 2189..27FF; COMMON 3883 0x2800, // 2800..28FF; BRAILLE 3884 0x2900, // 2900..2BFF; COMMON 3885 0x2C00, // 2C00..2C5F; GLAGOLITIC 3886 0x2C60, // 2C60..2C7F; LATIN 3887 0x2C80, // 2C80..2CFF; COPTIC 3888 0x2D00, // 2D00..2D2F; GEORGIAN 3889 0x2D30, // 2D30..2D7F; TIFINAGH 3890 0x2D80, // 2D80..2DDF; ETHIOPIC 3891 0x2DE0, // 2DE0..2DFF; CYRILLIC 3892 0x2E00, // 2E00..2E7F; COMMON 3893 0x2E80, // 2E80..2FEF; HAN 3894 0x2FF0, // 2FF0..3004; COMMON 3895 0x3005, // 3005..3005; HAN 3896 0x3006, // 3006..3006; COMMON 3897 0x3007, // 3007..3007; HAN 3898 0x3008, // 3008..3020; COMMON 3899 0x3021, // 3021..3029; HAN 3900 0x302A, // 302A..302D; INHERITED 3901 0x302E, // 302E..302F; HANGUL 3902 0x3030, // 3030..3037; COMMON 3903 0x3038, // 3038..303B; HAN 3904 0x303C, // 303C..3040; COMMON 3905 0x3041, // 3041..3098; HIRAGANA 3906 0x3099, // 3099..309A; INHERITED 3907 0x309B, // 309B..309C; COMMON 3908 0x309D, // 309D..309F; HIRAGANA 3909 0x30A0, // 30A0..30A0; COMMON 3910 0x30A1, // 30A1..30FA; KATAKANA 3911 0x30FB, // 30FB..30FC; COMMON 3912 0x30FD, // 30FD..3104; KATAKANA 3913 0x3105, // 3105..3130; BOPOMOFO 3914 0x3131, // 3131..318F; HANGUL 3915 0x3190, // 3190..319F; COMMON 3916 0x31A0, // 31A0..31BF; BOPOMOFO 3917 0x31C0, // 31C0..31EF; COMMON 3918 0x31F0, // 31F0..31FF; KATAKANA 3919 0x3200, // 3200..321F; HANGUL 3920 0x3220, // 3220..325F; COMMON 3921 0x3260, // 3260..327E; HANGUL 3922 0x327F, // 327F..32CF; COMMON 3923 0x32D0, // 32D0..3357; KATAKANA 3924 0x3358, // 3358..33FF; COMMON 3925 0x3400, // 3400..4DBF; HAN 3926 0x4DC0, // 4DC0..4DFF; COMMON 3927 0x4E00, // 4E00..9FFF; HAN 3928 0xA000, // A000..A4CF; YI 3929 0xA4D0, // A4D0..A4FF; LISU 3930 0xA500, // A500..A63F; VAI 3931 0xA640, // A640..A69F; CYRILLIC 3932 0xA6A0, // A6A0..A6FF; BAMUM 3933 0xA700, // A700..A721; COMMON 3934 0xA722, // A722..A787; LATIN 3935 0xA788, // A788..A78A; COMMON 3936 0xA78B, // A78B..A7FF; LATIN 3937 0xA800, // A800..A82F; SYLOTI_NAGRI 3938 0xA830, // A830..A83F; COMMON 3939 0xA840, // A840..A87F; PHAGS_PA 3940 0xA880, // A880..A8DF; SAURASHTRA 3941 0xA8E0, // A8E0..A8FF; DEVANAGARI 3942 0xA900, // A900..A92F; KAYAH_LI 3943 0xA930, // A930..A95F; REJANG 3944 0xA960, // A960..A97F; HANGUL 3945 0xA980, // A980..A9FF; JAVANESE 3946 0xAA00, // AA00..AA5F; CHAM 3947 0xAA60, // AA60..AA7F; MYANMAR 3948 0xAA80, // AA80..AADF; TAI_VIET 3949 0xAAE0, // AAE0..AB00; MEETEI_MAYEK 3950 0xAB01, // AB01..ABBF; ETHIOPIC 3951 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3952 0xAC00, // AC00..D7FB; HANGUL 3953 0xD7FC, // D7FC..F8FF; UNKNOWN 3954 0xF900, // F900..FAFF; HAN 3955 0xFB00, // FB00..FB12; LATIN 3956 0xFB13, // FB13..FB1C; ARMENIAN 3957 0xFB1D, // FB1D..FB4F; HEBREW 3958 0xFB50, // FB50..FD3D; ARABIC 3959 0xFD3E, // FD3E..FD4F; COMMON 3960 0xFD50, // FD50..FDFC; ARABIC 3961 0xFDFD, // FDFD..FDFF; COMMON 3962 0xFE00, // FE00..FE0F; INHERITED 3963 0xFE10, // FE10..FE1F; COMMON 3964 0xFE20, // FE20..FE2F; INHERITED 3965 0xFE30, // FE30..FE6F; COMMON 3966 0xFE70, // FE70..FEFE; ARABIC 3967 0xFEFF, // FEFF..FF20; COMMON 3968 0xFF21, // FF21..FF3A; LATIN 3969 0xFF3B, // FF3B..FF40; COMMON 3970 0xFF41, // FF41..FF5A; LATIN 3971 0xFF5B, // FF5B..FF65; COMMON 3972 0xFF66, // FF66..FF6F; KATAKANA 3973 0xFF70, // FF70..FF70; COMMON 3974 0xFF71, // FF71..FF9D; KATAKANA 3975 0xFF9E, // FF9E..FF9F; COMMON 3976 0xFFA0, // FFA0..FFDF; HANGUL 3977 0xFFE0, // FFE0..FFFF; COMMON 3978 0x10000, // 10000..100FF; LINEAR_B 3979 0x10100, // 10100..1013F; COMMON 3980 0x10140, // 10140..1018F; GREEK 3981 0x10190, // 10190..101FC; COMMON 3982 0x101FD, // 101FD..1027F; INHERITED 3983 0x10280, // 10280..1029F; LYCIAN 3984 0x102A0, // 102A0..102FF; CARIAN 3985 0x10300, // 10300..1032F; OLD_ITALIC 3986 0x10330, // 10330..1037F; GOTHIC 3987 0x10380, // 10380..1039F; UGARITIC 3988 0x103A0, // 103A0..103FF; OLD_PERSIAN 3989 0x10400, // 10400..1044F; DESERET 3990 0x10450, // 10450..1047F; SHAVIAN 3991 0x10480, // 10480..107FF; OSMANYA 3992 0x10800, // 10800..1083F; CYPRIOT 3993 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3994 0x10900, // 10900..1091F; PHOENICIAN 3995 0x10920, // 10920..1097F; LYDIAN 3996 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 3997 0x109A0, // 109A0..109FF; MEROITIC_CURSIVE 3998 0x10A00, // 10A00..10A5F; KHAROSHTHI 3999 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 4000 0x10B00, // 10B00..10B3F; AVESTAN 4001 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 4002 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 4003 0x10C00, // 10C00..10E5F; OLD_TURKIC 4004 0x10E60, // 10E60..10FFF; ARABIC 4005 0x11000, // 11000..1107F; BRAHMI 4006 0x11080, // 11080..110CF; KAITHI 4007 0x110D0, // 110D0..110FF; SORA_SOMPENG 4008 0x11100, // 11100..1117F; CHAKMA 4009 0x11180, // 11180..1167F; SHARADA 4010 0x11680, // 11680..116CF; TAKRI 4011 0x12000, // 12000..12FFF; CUNEIFORM 4012 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 4013 0x16800, // 16800..16A38; BAMUM 4014 0x16F00, // 16F00..16F9F; MIAO 4015 0x1B000, // 1B000..1B000; KATAKANA 4016 0x1B001, // 1B001..1CFFF; HIRAGANA 4017 0x1D000, // 1D000..1D166; COMMON 4018 0x1D167, // 1D167..1D169; INHERITED 4019 0x1D16A, // 1D16A..1D17A; COMMON 4020 0x1D17B, // 1D17B..1D182; INHERITED 4021 0x1D183, // 1D183..1D184; COMMON 4022 0x1D185, // 1D185..1D18B; INHERITED 4023 0x1D18C, // 1D18C..1D1A9; COMMON 4024 0x1D1AA, // 1D1AA..1D1AD; INHERITED 4025 0x1D1AE, // 1D1AE..1D1FF; COMMON 4026 0x1D200, // 1D200..1D2FF; GREEK 4027 0x1D300, // 1D300..1EDFF; COMMON 4028 0x1EE00, // 1EE00..1EFFF; ARABIC 4029 0x1F000, // 1F000..1F1FF; COMMON 4030 0x1F200, // 1F200..1F200; HIRAGANA 4031 0x1F201, // 1F210..1FFFF; COMMON 4032 0x20000, // 20000..E0000; HAN 4033 0xE0001, // E0001..E00FF; COMMON 4034 0xE0100, // E0100..E01EF; INHERITED 4035 0xE01F0 // E01F0..10FFFF; UNKNOWN 4036 4037 }; 4038 4039 private static final UnicodeScript[] scripts = { 4040 COMMON, 4041 LATIN, 4042 COMMON, 4043 LATIN, 4044 COMMON, 4045 LATIN, 4046 COMMON, 4047 LATIN, 4048 COMMON, 4049 LATIN, 4050 COMMON, 4051 LATIN, 4052 COMMON, 4053 LATIN, 4054 COMMON, 4055 LATIN, 4056 COMMON, 4057 BOPOMOFO, 4058 COMMON, 4059 INHERITED, 4060 GREEK, 4061 COMMON, 4062 GREEK, 4063 COMMON, 4064 GREEK, 4065 COMMON, 4066 GREEK, 4067 COMMON, 4068 GREEK, 4069 COPTIC, 4070 GREEK, 4071 CYRILLIC, 4072 INHERITED, 4073 CYRILLIC, 4074 ARMENIAN, 4075 COMMON, 4076 ARMENIAN, 4077 HEBREW, 4078 ARABIC, 4079 COMMON, 4080 ARABIC, 4081 COMMON, 4082 ARABIC, 4083 COMMON, 4084 ARABIC, 4085 COMMON, 4086 ARABIC, 4087 INHERITED, 4088 ARABIC, 4089 COMMON, 4090 ARABIC, 4091 INHERITED, 4092 ARABIC, 4093 COMMON, 4094 ARABIC, 4095 SYRIAC, 4096 ARABIC, 4097 THAANA, 4098 NKO, 4099 SAMARITAN, 4100 MANDAIC, 4101 ARABIC, 4102 DEVANAGARI, 4103 INHERITED, 4104 DEVANAGARI, 4105 COMMON, 4106 DEVANAGARI, 4107 BENGALI, 4108 GURMUKHI, 4109 GUJARATI, 4110 ORIYA, 4111 TAMIL, 4112 TELUGU, 4113 KANNADA, 4114 MALAYALAM, 4115 SINHALA, 4116 THAI, 4117 COMMON, 4118 THAI, 4119 LAO, 4120 TIBETAN, 4121 COMMON, 4122 TIBETAN, 4123 MYANMAR, 4124 GEORGIAN, 4125 COMMON, 4126 GEORGIAN, 4127 HANGUL, 4128 ETHIOPIC, 4129 CHEROKEE, 4130 CANADIAN_ABORIGINAL, 4131 OGHAM, 4132 RUNIC, 4133 COMMON, 4134 RUNIC, 4135 TAGALOG, 4136 HANUNOO, 4137 COMMON, 4138 BUHID, 4139 TAGBANWA, 4140 KHMER, 4141 MONGOLIAN, 4142 COMMON, 4143 MONGOLIAN, 4144 COMMON, 4145 MONGOLIAN, 4146 CANADIAN_ABORIGINAL, 4147 LIMBU, 4148 TAI_LE, 4149 NEW_TAI_LUE, 4150 KHMER, 4151 BUGINESE, 4152 TAI_THAM, 4153 BALINESE, 4154 SUNDANESE, 4155 BATAK, 4156 LEPCHA, 4157 OL_CHIKI, 4158 SUNDANESE, 4159 INHERITED, 4160 COMMON, 4161 INHERITED, 4162 COMMON, 4163 INHERITED, 4164 COMMON, 4165 INHERITED, 4166 COMMON, 4167 INHERITED, 4168 COMMON, 4169 LATIN, 4170 GREEK, 4171 CYRILLIC, 4172 LATIN, 4173 GREEK, 4174 LATIN, 4175 GREEK, 4176 LATIN, 4177 CYRILLIC, 4178 LATIN, 4179 GREEK, 4180 INHERITED, 4181 LATIN, 4182 GREEK, 4183 COMMON, 4184 INHERITED, 4185 COMMON, 4186 LATIN, 4187 COMMON, 4188 LATIN, 4189 COMMON, 4190 LATIN, 4191 COMMON, 4192 INHERITED, 4193 COMMON, 4194 GREEK, 4195 COMMON, 4196 LATIN, 4197 COMMON, 4198 LATIN, 4199 COMMON, 4200 LATIN, 4201 COMMON, 4202 LATIN, 4203 COMMON, 4204 BRAILLE, 4205 COMMON, 4206 GLAGOLITIC, 4207 LATIN, 4208 COPTIC, 4209 GEORGIAN, 4210 TIFINAGH, 4211 ETHIOPIC, 4212 CYRILLIC, 4213 COMMON, 4214 HAN, 4215 COMMON, 4216 HAN, 4217 COMMON, 4218 HAN, 4219 COMMON, 4220 HAN, 4221 INHERITED, 4222 HANGUL, 4223 COMMON, 4224 HAN, 4225 COMMON, 4226 HIRAGANA, 4227 INHERITED, 4228 COMMON, 4229 HIRAGANA, 4230 COMMON, 4231 KATAKANA, 4232 COMMON, 4233 KATAKANA, 4234 BOPOMOFO, 4235 HANGUL, 4236 COMMON, 4237 BOPOMOFO, 4238 COMMON, 4239 KATAKANA, 4240 HANGUL, 4241 COMMON, 4242 HANGUL, 4243 COMMON, 4244 KATAKANA, 4245 COMMON, 4246 HAN, 4247 COMMON, 4248 HAN, 4249 YI, 4250 LISU, 4251 VAI, 4252 CYRILLIC, 4253 BAMUM, 4254 COMMON, 4255 LATIN, 4256 COMMON, 4257 LATIN, 4258 SYLOTI_NAGRI, 4259 COMMON, 4260 PHAGS_PA, 4261 SAURASHTRA, 4262 DEVANAGARI, 4263 KAYAH_LI, 4264 REJANG, 4265 HANGUL, 4266 JAVANESE, 4267 CHAM, 4268 MYANMAR, 4269 TAI_VIET, 4270 MEETEI_MAYEK, 4271 ETHIOPIC, 4272 MEETEI_MAYEK, 4273 HANGUL, 4274 UNKNOWN , 4275 HAN, 4276 LATIN, 4277 ARMENIAN, 4278 HEBREW, 4279 ARABIC, 4280 COMMON, 4281 ARABIC, 4282 COMMON, 4283 INHERITED, 4284 COMMON, 4285 INHERITED, 4286 COMMON, 4287 ARABIC, 4288 COMMON, 4289 LATIN, 4290 COMMON, 4291 LATIN, 4292 COMMON, 4293 KATAKANA, 4294 COMMON, 4295 KATAKANA, 4296 COMMON, 4297 HANGUL, 4298 COMMON, 4299 LINEAR_B, 4300 COMMON, 4301 GREEK, 4302 COMMON, 4303 INHERITED, 4304 LYCIAN, 4305 CARIAN, 4306 OLD_ITALIC, 4307 GOTHIC, 4308 UGARITIC, 4309 OLD_PERSIAN, 4310 DESERET, 4311 SHAVIAN, 4312 OSMANYA, 4313 CYPRIOT, 4314 IMPERIAL_ARAMAIC, 4315 PHOENICIAN, 4316 LYDIAN, 4317 MEROITIC_HIEROGLYPHS, 4318 MEROITIC_CURSIVE, 4319 KHAROSHTHI, 4320 OLD_SOUTH_ARABIAN, 4321 AVESTAN, 4322 INSCRIPTIONAL_PARTHIAN, 4323 INSCRIPTIONAL_PAHLAVI, 4324 OLD_TURKIC, 4325 ARABIC, 4326 BRAHMI, 4327 KAITHI, 4328 SORA_SOMPENG, 4329 CHAKMA, 4330 SHARADA, 4331 TAKRI, 4332 CUNEIFORM, 4333 EGYPTIAN_HIEROGLYPHS, 4334 BAMUM, 4335 MIAO, 4336 KATAKANA, 4337 HIRAGANA, 4338 COMMON, 4339 INHERITED, 4340 COMMON, 4341 INHERITED, 4342 COMMON, 4343 INHERITED, 4344 COMMON, 4345 INHERITED, 4346 COMMON, 4347 GREEK, 4348 COMMON, 4349 ARABIC, 4350 COMMON, 4351 HIRAGANA, 4352 COMMON, 4353 HAN, 4354 COMMON, 4355 INHERITED, 4356 UNKNOWN 4357 }; 4358 4359 private static HashMap<String, Character.UnicodeScript> aliases; 4360 static { 4361 aliases = new HashMap<>(128); 4362 aliases.put("ARAB", ARABIC); 4363 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4364 aliases.put("ARMN", ARMENIAN); 4365 aliases.put("AVST", AVESTAN); 4366 aliases.put("BALI", BALINESE); 4367 aliases.put("BAMU", BAMUM); 4368 aliases.put("BATK", BATAK); 4369 aliases.put("BENG", BENGALI); 4370 aliases.put("BOPO", BOPOMOFO); 4371 aliases.put("BRAI", BRAILLE); 4372 aliases.put("BRAH", BRAHMI); 4373 aliases.put("BUGI", BUGINESE); 4374 aliases.put("BUHD", BUHID); 4375 aliases.put("CAKM", CHAKMA); 4376 aliases.put("CANS", CANADIAN_ABORIGINAL); 4377 aliases.put("CARI", CARIAN); 4378 aliases.put("CHAM", CHAM); 4379 aliases.put("CHER", CHEROKEE); 4380 aliases.put("COPT", COPTIC); 4381 aliases.put("CPRT", CYPRIOT); 4382 aliases.put("CYRL", CYRILLIC); 4383 aliases.put("DEVA", DEVANAGARI); 4384 aliases.put("DSRT", DESERET); 4385 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4386 aliases.put("ETHI", ETHIOPIC); 4387 aliases.put("GEOR", GEORGIAN); 4388 aliases.put("GLAG", GLAGOLITIC); 4389 aliases.put("GOTH", GOTHIC); 4390 aliases.put("GREK", GREEK); 4391 aliases.put("GUJR", GUJARATI); 4392 aliases.put("GURU", GURMUKHI); 4393 aliases.put("HANG", HANGUL); 4394 aliases.put("HANI", HAN); 4395 aliases.put("HANO", HANUNOO); 4396 aliases.put("HEBR", HEBREW); 4397 aliases.put("HIRA", HIRAGANA); 4398 // it appears we don't have the KATAKANA_OR_HIRAGANA 4399 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4400 aliases.put("ITAL", OLD_ITALIC); 4401 aliases.put("JAVA", JAVANESE); 4402 aliases.put("KALI", KAYAH_LI); 4403 aliases.put("KANA", KATAKANA); 4404 aliases.put("KHAR", KHAROSHTHI); 4405 aliases.put("KHMR", KHMER); 4406 aliases.put("KNDA", KANNADA); 4407 aliases.put("KTHI", KAITHI); 4408 aliases.put("LANA", TAI_THAM); 4409 aliases.put("LAOO", LAO); 4410 aliases.put("LATN", LATIN); 4411 aliases.put("LEPC", LEPCHA); 4412 aliases.put("LIMB", LIMBU); 4413 aliases.put("LINB", LINEAR_B); 4414 aliases.put("LISU", LISU); 4415 aliases.put("LYCI", LYCIAN); 4416 aliases.put("LYDI", LYDIAN); 4417 aliases.put("MAND", MANDAIC); 4418 aliases.put("MERC", MEROITIC_CURSIVE); 4419 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 4420 aliases.put("MLYM", MALAYALAM); 4421 aliases.put("MONG", MONGOLIAN); 4422 aliases.put("MTEI", MEETEI_MAYEK); 4423 aliases.put("MYMR", MYANMAR); 4424 aliases.put("NKOO", NKO); 4425 aliases.put("OGAM", OGHAM); 4426 aliases.put("OLCK", OL_CHIKI); 4427 aliases.put("ORKH", OLD_TURKIC); 4428 aliases.put("ORYA", ORIYA); 4429 aliases.put("OSMA", OSMANYA); 4430 aliases.put("PHAG", PHAGS_PA); 4431 aliases.put("PLRD", MIAO); 4432 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4433 aliases.put("PHNX", PHOENICIAN); 4434 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4435 aliases.put("RJNG", REJANG); 4436 aliases.put("RUNR", RUNIC); 4437 aliases.put("SAMR", SAMARITAN); 4438 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4439 aliases.put("SAUR", SAURASHTRA); 4440 aliases.put("SHAW", SHAVIAN); 4441 aliases.put("SHRD", SHARADA); 4442 aliases.put("SINH", SINHALA); 4443 aliases.put("SORA", SORA_SOMPENG); 4444 aliases.put("SUND", SUNDANESE); 4445 aliases.put("SYLO", SYLOTI_NAGRI); 4446 aliases.put("SYRC", SYRIAC); 4447 aliases.put("TAGB", TAGBANWA); 4448 aliases.put("TALE", TAI_LE); 4449 aliases.put("TAKR", TAKRI); 4450 aliases.put("TALU", NEW_TAI_LUE); 4451 aliases.put("TAML", TAMIL); 4452 aliases.put("TAVT", TAI_VIET); 4453 aliases.put("TELU", TELUGU); 4454 aliases.put("TFNG", TIFINAGH); 4455 aliases.put("TGLG", TAGALOG); 4456 aliases.put("THAA", THAANA); 4457 aliases.put("THAI", THAI); 4458 aliases.put("TIBT", TIBETAN); 4459 aliases.put("UGAR", UGARITIC); 4460 aliases.put("VAII", VAI); 4461 aliases.put("XPEO", OLD_PERSIAN); 4462 aliases.put("XSUX", CUNEIFORM); 4463 aliases.put("YIII", YI); 4464 aliases.put("ZINH", INHERITED); 4465 aliases.put("ZYYY", COMMON); 4466 aliases.put("ZZZZ", UNKNOWN); 4467 } 4468 4469 /** 4470 * Returns the enum constant representing the Unicode script of which 4471 * the given character (Unicode code point) is assigned to. 4472 * 4473 * @param codePoint the character (Unicode code point) in question. 4474 * @return The {@code UnicodeScript} constant representing the 4475 * Unicode script of which this character is assigned to. 4476 * 4477 * @exception IllegalArgumentException if the specified 4478 * {@code codePoint} is an invalid Unicode code point. 4479 * @see Character#isValidCodePoint(int) 4480 * 4481 */ 4482 public static UnicodeScript of(int codePoint) { 4483 if (!isValidCodePoint(codePoint)) 4484 throw new IllegalArgumentException(); 4485 int type = getType(codePoint); 4486 // leave SURROGATE and PRIVATE_USE for table lookup 4487 if (type == UNASSIGNED) 4488 return UNKNOWN; 4489 int index = Arrays.binarySearch(scriptStarts, codePoint); 4490 if (index < 0) 4491 index = -index - 2; 4492 return scripts[index]; 4493 } 4494 4495 /** 4496 * Returns the UnicodeScript constant with the given Unicode script 4497 * name or the script name alias. Script names and their aliases are 4498 * determined by The Unicode Standard. The files Scripts<version>.txt 4499 * and PropertyValueAliases<version>.txt define script names 4500 * and the script name aliases for a particular version of the 4501 * standard. The {@link Character} class specifies the version of 4502 * the standard that it supports. 4503 * <p> 4504 * Character case is ignored for all of the valid script names. 4505 * The en_US locale's case mapping rules are used to provide 4506 * case-insensitive string comparisons for script name validation. 4507 * 4508 * @param scriptName A {@code UnicodeScript} name. 4509 * @return The {@code UnicodeScript} constant identified 4510 * by {@code scriptName} 4511 * @throws IllegalArgumentException if {@code scriptName} is an 4512 * invalid name 4513 * @throws NullPointerException if {@code scriptName} is null 4514 */ 4515 public static final UnicodeScript forName(String scriptName) { 4516 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4517 //.replace(' ', '_')); 4518 UnicodeScript sc = aliases.get(scriptName); 4519 if (sc != null) 4520 return sc; 4521 return valueOf(scriptName); 4522 } 4523 } 4524 4525 /** 4526 * The value of the {@code Character}. 4527 * 4528 * @serial 4529 */ 4530 private final char value; 4531 4532 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4533 private static final long serialVersionUID = 3786198910865385080L; 4534 4535 /** 4536 * Constructs a newly allocated {@code Character} object that 4537 * represents the specified {@code char} value. 4538 * 4539 * @param value the value to be represented by the 4540 * {@code Character} object. 4541 */ 4542 public Character(char value) { 4543 this.value = value; 4544 } 4545 4546 private static class CharacterCache { 4547 private CharacterCache(){} 4548 4549 static final Character cache[] = new Character[127 + 1]; 4550 4551 static { 4552 for (int i = 0; i < cache.length; i++) 4553 cache[i] = new Character((char)i); 4554 } 4555 } 4556 4557 /** 4558 * Returns a <tt>Character</tt> instance representing the specified 4559 * <tt>char</tt> value. 4560 * If a new <tt>Character</tt> instance is not required, this method 4561 * should generally be used in preference to the constructor 4562 * {@link #Character(char)}, as this method is likely to yield 4563 * significantly better space and time performance by caching 4564 * frequently requested values. 4565 * 4566 * This method will always cache values in the range {@code 4567 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 4568 * cache other values outside of this range. 4569 * 4570 * @param c a char value. 4571 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4572 * @since 1.5 4573 */ 4574 @HotSpotIntrinsicCandidate 4575 public static Character valueOf(char c) { 4576 if (c <= 127) { // must cache 4577 return CharacterCache.cache[(int)c]; 4578 } 4579 return new Character(c); 4580 } 4581 4582 /** 4583 * Returns the value of this {@code Character} object. 4584 * @return the primitive {@code char} value represented by 4585 * this object. 4586 */ 4587 @HotSpotIntrinsicCandidate 4588 public char charValue() { 4589 return value; 4590 } 4591 4592 /** 4593 * Returns a hash code for this {@code Character}; equal to the result 4594 * of invoking {@code charValue()}. 4595 * 4596 * @return a hash code value for this {@code Character} 4597 */ 4598 @Override 4599 public int hashCode() { 4600 return Character.hashCode(value); 4601 } 4602 4603 /** 4604 * Returns a hash code for a {@code char} value; compatible with 4605 * {@code Character.hashCode()}. 4606 * 4607 * @since 1.8 4608 * 4609 * @param value The {@code char} for which to return a hash code. 4610 * @return a hash code value for a {@code char} value. 4611 */ 4612 public static int hashCode(char value) { 4613 return (int)value; 4614 } 4615 4616 /** 4617 * Compares this object against the specified object. 4618 * The result is {@code true} if and only if the argument is not 4619 * {@code null} and is a {@code Character} object that 4620 * represents the same {@code char} value as this object. 4621 * 4622 * @param obj the object to compare with. 4623 * @return {@code true} if the objects are the same; 4624 * {@code false} otherwise. 4625 */ 4626 public boolean equals(Object obj) { 4627 if (obj instanceof Character) { 4628 return value == ((Character)obj).charValue(); 4629 } 4630 return false; 4631 } 4632 4633 /** 4634 * Returns a {@code String} object representing this 4635 * {@code Character}'s value. The result is a string of 4636 * length 1 whose sole component is the primitive 4637 * {@code char} value represented by this 4638 * {@code Character} object. 4639 * 4640 * @return a string representation of this object. 4641 */ 4642 public String toString() { 4643 char buf[] = {value}; 4644 return String.valueOf(buf); 4645 } 4646 4647 /** 4648 * Returns a {@code String} object representing the 4649 * specified {@code char}. The result is a string of length 4650 * 1 consisting solely of the specified {@code char}. 4651 * 4652 * @param c the {@code char} to be converted 4653 * @return the string representation of the specified {@code char} 4654 * @since 1.4 4655 */ 4656 public static String toString(char c) { 4657 return String.valueOf(c); 4658 } 4659 4660 /** 4661 * Determines whether the specified code point is a valid 4662 * <a href="http://www.unicode.org/glossary/#code_point"> 4663 * Unicode code point value</a>. 4664 * 4665 * @param codePoint the Unicode code point to be tested 4666 * @return {@code true} if the specified code point value is between 4667 * {@link #MIN_CODE_POINT} and 4668 * {@link #MAX_CODE_POINT} inclusive; 4669 * {@code false} otherwise. 4670 * @since 1.5 4671 */ 4672 public static boolean isValidCodePoint(int codePoint) { 4673 // Optimized form of: 4674 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4675 int plane = codePoint >>> 16; 4676 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4677 } 4678 4679 /** 4680 * Determines whether the specified character (Unicode code point) 4681 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4682 * Such code points can be represented using a single {@code char}. 4683 * 4684 * @param codePoint the character (Unicode code point) to be tested 4685 * @return {@code true} if the specified code point is between 4686 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4687 * {@code false} otherwise. 4688 * @since 1.7 4689 */ 4690 public static boolean isBmpCodePoint(int codePoint) { 4691 return codePoint >>> 16 == 0; 4692 // Optimized form of: 4693 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4694 // We consistently use logical shift (>>>) to facilitate 4695 // additional runtime optimizations. 4696 } 4697 4698 /** 4699 * Determines whether the specified character (Unicode code point) 4700 * is in the <a href="#supplementary">supplementary character</a> range. 4701 * 4702 * @param codePoint the character (Unicode code point) to be tested 4703 * @return {@code true} if the specified code point is between 4704 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4705 * {@link #MAX_CODE_POINT} inclusive; 4706 * {@code false} otherwise. 4707 * @since 1.5 4708 */ 4709 public static boolean isSupplementaryCodePoint(int codePoint) { 4710 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4711 && codePoint < MAX_CODE_POINT + 1; 4712 } 4713 4714 /** 4715 * Determines if the given {@code char} value is a 4716 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4717 * Unicode high-surrogate code unit</a> 4718 * (also known as <i>leading-surrogate code unit</i>). 4719 * 4720 * <p>Such values do not represent characters by themselves, 4721 * but are used in the representation of 4722 * <a href="#supplementary">supplementary characters</a> 4723 * in the UTF-16 encoding. 4724 * 4725 * @param ch the {@code char} value to be tested. 4726 * @return {@code true} if the {@code char} value is between 4727 * {@link #MIN_HIGH_SURROGATE} and 4728 * {@link #MAX_HIGH_SURROGATE} inclusive; 4729 * {@code false} otherwise. 4730 * @see Character#isLowSurrogate(char) 4731 * @see Character.UnicodeBlock#of(int) 4732 * @since 1.5 4733 */ 4734 public static boolean isHighSurrogate(char ch) { 4735 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4736 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4737 } 4738 4739 /** 4740 * Determines if the given {@code char} value is a 4741 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4742 * Unicode low-surrogate code unit</a> 4743 * (also known as <i>trailing-surrogate code unit</i>). 4744 * 4745 * <p>Such values do not represent characters by themselves, 4746 * but are used in the representation of 4747 * <a href="#supplementary">supplementary characters</a> 4748 * in the UTF-16 encoding. 4749 * 4750 * @param ch the {@code char} value to be tested. 4751 * @return {@code true} if the {@code char} value is between 4752 * {@link #MIN_LOW_SURROGATE} and 4753 * {@link #MAX_LOW_SURROGATE} inclusive; 4754 * {@code false} otherwise. 4755 * @see Character#isHighSurrogate(char) 4756 * @since 1.5 4757 */ 4758 public static boolean isLowSurrogate(char ch) { 4759 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4760 } 4761 4762 /** 4763 * Determines if the given {@code char} value is a Unicode 4764 * <i>surrogate code unit</i>. 4765 * 4766 * <p>Such values do not represent characters by themselves, 4767 * but are used in the representation of 4768 * <a href="#supplementary">supplementary characters</a> 4769 * in the UTF-16 encoding. 4770 * 4771 * <p>A char value is a surrogate code unit if and only if it is either 4772 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4773 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4774 * 4775 * @param ch the {@code char} value to be tested. 4776 * @return {@code true} if the {@code char} value is between 4777 * {@link #MIN_SURROGATE} and 4778 * {@link #MAX_SURROGATE} inclusive; 4779 * {@code false} otherwise. 4780 * @since 1.7 4781 */ 4782 public static boolean isSurrogate(char ch) { 4783 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4784 } 4785 4786 /** 4787 * Determines whether the specified pair of {@code char} 4788 * values is a valid 4789 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4790 * Unicode surrogate pair</a>. 4791 4792 * <p>This method is equivalent to the expression: 4793 * <blockquote><pre>{@code 4794 * isHighSurrogate(high) && isLowSurrogate(low) 4795 * }</pre></blockquote> 4796 * 4797 * @param high the high-surrogate code value to be tested 4798 * @param low the low-surrogate code value to be tested 4799 * @return {@code true} if the specified high and 4800 * low-surrogate code values represent a valid surrogate pair; 4801 * {@code false} otherwise. 4802 * @since 1.5 4803 */ 4804 public static boolean isSurrogatePair(char high, char low) { 4805 return isHighSurrogate(high) && isLowSurrogate(low); 4806 } 4807 4808 /** 4809 * Determines the number of {@code char} values needed to 4810 * represent the specified character (Unicode code point). If the 4811 * specified character is equal to or greater than 0x10000, then 4812 * the method returns 2. Otherwise, the method returns 1. 4813 * 4814 * <p>This method doesn't validate the specified character to be a 4815 * valid Unicode code point. The caller must validate the 4816 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4817 * if necessary. 4818 * 4819 * @param codePoint the character (Unicode code point) to be tested. 4820 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4821 * @see Character#isSupplementaryCodePoint(int) 4822 * @since 1.5 4823 */ 4824 public static int charCount(int codePoint) { 4825 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4826 } 4827 4828 /** 4829 * Converts the specified surrogate pair to its supplementary code 4830 * point value. This method does not validate the specified 4831 * surrogate pair. The caller must validate it using {@link 4832 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4833 * 4834 * @param high the high-surrogate code unit 4835 * @param low the low-surrogate code unit 4836 * @return the supplementary code point composed from the 4837 * specified surrogate pair. 4838 * @since 1.5 4839 */ 4840 public static int toCodePoint(char high, char low) { 4841 // Optimized form of: 4842 // return ((high - MIN_HIGH_SURROGATE) << 10) 4843 // + (low - MIN_LOW_SURROGATE) 4844 // + MIN_SUPPLEMENTARY_CODE_POINT; 4845 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4846 - (MIN_HIGH_SURROGATE << 10) 4847 - MIN_LOW_SURROGATE); 4848 } 4849 4850 /** 4851 * Returns the code point at the given index of the 4852 * {@code CharSequence}. If the {@code char} value at 4853 * the given index in the {@code CharSequence} is in the 4854 * high-surrogate range, the following index is less than the 4855 * length of the {@code CharSequence}, and the 4856 * {@code char} value at the following index is in the 4857 * low-surrogate range, then the supplementary code point 4858 * corresponding to this surrogate pair is returned. Otherwise, 4859 * the {@code char} value at the given index is returned. 4860 * 4861 * @param seq a sequence of {@code char} values (Unicode code 4862 * units) 4863 * @param index the index to the {@code char} values (Unicode 4864 * code units) in {@code seq} to be converted 4865 * @return the Unicode code point at the given index 4866 * @exception NullPointerException if {@code seq} is null. 4867 * @exception IndexOutOfBoundsException if the value 4868 * {@code index} is negative or not less than 4869 * {@link CharSequence#length() seq.length()}. 4870 * @since 1.5 4871 */ 4872 public static int codePointAt(CharSequence seq, int index) { 4873 char c1 = seq.charAt(index); 4874 if (isHighSurrogate(c1) && ++index < seq.length()) { 4875 char c2 = seq.charAt(index); 4876 if (isLowSurrogate(c2)) { 4877 return toCodePoint(c1, c2); 4878 } 4879 } 4880 return c1; 4881 } 4882 4883 /** 4884 * Returns the code point at the given index of the 4885 * {@code char} array. If the {@code char} value at 4886 * the given index in the {@code char} array is in the 4887 * high-surrogate range, the following index is less than the 4888 * length of the {@code char} array, and the 4889 * {@code char} value at the following index is in the 4890 * low-surrogate range, then the supplementary code point 4891 * corresponding to this surrogate pair is returned. Otherwise, 4892 * the {@code char} value at the given index is returned. 4893 * 4894 * @param a the {@code char} array 4895 * @param index the index to the {@code char} values (Unicode 4896 * code units) in the {@code char} array to be converted 4897 * @return the Unicode code point at the given index 4898 * @exception NullPointerException if {@code a} is null. 4899 * @exception IndexOutOfBoundsException if the value 4900 * {@code index} is negative or not less than 4901 * the length of the {@code char} array. 4902 * @since 1.5 4903 */ 4904 public static int codePointAt(char[] a, int index) { 4905 return codePointAtImpl(a, index, a.length); 4906 } 4907 4908 /** 4909 * Returns the code point at the given index of the 4910 * {@code char} array, where only array elements with 4911 * {@code index} less than {@code limit} can be used. If 4912 * the {@code char} value at the given index in the 4913 * {@code char} array is in the high-surrogate range, the 4914 * following index is less than the {@code limit}, and the 4915 * {@code char} value at the following index is in the 4916 * low-surrogate range, then the supplementary code point 4917 * corresponding to this surrogate pair is returned. Otherwise, 4918 * the {@code char} value at the given index is returned. 4919 * 4920 * @param a the {@code char} array 4921 * @param index the index to the {@code char} values (Unicode 4922 * code units) in the {@code char} array to be converted 4923 * @param limit the index after the last array element that 4924 * can be used in the {@code char} array 4925 * @return the Unicode code point at the given index 4926 * @exception NullPointerException if {@code a} is null. 4927 * @exception IndexOutOfBoundsException if the {@code index} 4928 * argument is negative or not less than the {@code limit} 4929 * argument, or if the {@code limit} argument is negative or 4930 * greater than the length of the {@code char} array. 4931 * @since 1.5 4932 */ 4933 public static int codePointAt(char[] a, int index, int limit) { 4934 if (index >= limit || limit < 0 || limit > a.length) { 4935 throw new IndexOutOfBoundsException(); 4936 } 4937 return codePointAtImpl(a, index, limit); 4938 } 4939 4940 // throws ArrayIndexOutOfBoundsException if index out of bounds 4941 static int codePointAtImpl(char[] a, int index, int limit) { 4942 char c1 = a[index]; 4943 if (isHighSurrogate(c1) && ++index < limit) { 4944 char c2 = a[index]; 4945 if (isLowSurrogate(c2)) { 4946 return toCodePoint(c1, c2); 4947 } 4948 } 4949 return c1; 4950 } 4951 4952 /** 4953 * Returns the code point preceding the given index of the 4954 * {@code CharSequence}. If the {@code char} value at 4955 * {@code (index - 1)} in the {@code CharSequence} is in 4956 * the low-surrogate range, {@code (index - 2)} is not 4957 * negative, and the {@code char} value at {@code (index - 2)} 4958 * in the {@code CharSequence} is in the 4959 * high-surrogate range, then the supplementary code point 4960 * corresponding to this surrogate pair is returned. Otherwise, 4961 * the {@code char} value at {@code (index - 1)} is 4962 * returned. 4963 * 4964 * @param seq the {@code CharSequence} instance 4965 * @param index the index following the code point that should be returned 4966 * @return the Unicode code point value before the given index. 4967 * @exception NullPointerException if {@code seq} is null. 4968 * @exception IndexOutOfBoundsException if the {@code index} 4969 * argument is less than 1 or greater than {@link 4970 * CharSequence#length() seq.length()}. 4971 * @since 1.5 4972 */ 4973 public static int codePointBefore(CharSequence seq, int index) { 4974 char c2 = seq.charAt(--index); 4975 if (isLowSurrogate(c2) && index > 0) { 4976 char c1 = seq.charAt(--index); 4977 if (isHighSurrogate(c1)) { 4978 return toCodePoint(c1, c2); 4979 } 4980 } 4981 return c2; 4982 } 4983 4984 /** 4985 * Returns the code point preceding the given index of the 4986 * {@code char} array. If the {@code char} value at 4987 * {@code (index - 1)} in the {@code char} array is in 4988 * the low-surrogate range, {@code (index - 2)} is not 4989 * negative, and the {@code char} value at {@code (index - 2)} 4990 * in the {@code char} array is in the 4991 * high-surrogate range, then the supplementary code point 4992 * corresponding to this surrogate pair is returned. Otherwise, 4993 * the {@code char} value at {@code (index - 1)} is 4994 * returned. 4995 * 4996 * @param a the {@code char} array 4997 * @param index the index following the code point that should be returned 4998 * @return the Unicode code point value before the given index. 4999 * @exception NullPointerException if {@code a} is null. 5000 * @exception IndexOutOfBoundsException if the {@code index} 5001 * argument is less than 1 or greater than the length of the 5002 * {@code char} array 5003 * @since 1.5 5004 */ 5005 public static int codePointBefore(char[] a, int index) { 5006 return codePointBeforeImpl(a, index, 0); 5007 } 5008 5009 /** 5010 * Returns the code point preceding the given index of the 5011 * {@code char} array, where only array elements with 5012 * {@code index} greater than or equal to {@code start} 5013 * can be used. If the {@code char} value at {@code (index - 1)} 5014 * in the {@code char} array is in the 5015 * low-surrogate range, {@code (index - 2)} is not less than 5016 * {@code start}, and the {@code char} value at 5017 * {@code (index - 2)} in the {@code char} array is in 5018 * the high-surrogate range, then the supplementary code point 5019 * corresponding to this surrogate pair is returned. Otherwise, 5020 * the {@code char} value at {@code (index - 1)} is 5021 * returned. 5022 * 5023 * @param a the {@code char} array 5024 * @param index the index following the code point that should be returned 5025 * @param start the index of the first array element in the 5026 * {@code char} array 5027 * @return the Unicode code point value before the given index. 5028 * @exception NullPointerException if {@code a} is null. 5029 * @exception IndexOutOfBoundsException if the {@code index} 5030 * argument is not greater than the {@code start} argument or 5031 * is greater than the length of the {@code char} array, or 5032 * if the {@code start} argument is negative or not less than 5033 * the length of the {@code char} array. 5034 * @since 1.5 5035 */ 5036 public static int codePointBefore(char[] a, int index, int start) { 5037 if (index <= start || start < 0 || start >= a.length) { 5038 throw new IndexOutOfBoundsException(); 5039 } 5040 return codePointBeforeImpl(a, index, start); 5041 } 5042 5043 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 5044 static int codePointBeforeImpl(char[] a, int index, int start) { 5045 char c2 = a[--index]; 5046 if (isLowSurrogate(c2) && index > start) { 5047 char c1 = a[--index]; 5048 if (isHighSurrogate(c1)) { 5049 return toCodePoint(c1, c2); 5050 } 5051 } 5052 return c2; 5053 } 5054 5055 /** 5056 * Returns the leading surrogate (a 5057 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 5058 * high surrogate code unit</a>) of the 5059 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5060 * surrogate pair</a> 5061 * representing the specified supplementary character (Unicode 5062 * code point) in the UTF-16 encoding. If the specified character 5063 * is not a 5064 * <a href="Character.html#supplementary">supplementary character</a>, 5065 * an unspecified {@code char} is returned. 5066 * 5067 * <p>If 5068 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5069 * is {@code true}, then 5070 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 5071 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 5072 * are also always {@code true}. 5073 * 5074 * @param codePoint a supplementary character (Unicode code point) 5075 * @return the leading surrogate code unit used to represent the 5076 * character in the UTF-16 encoding 5077 * @since 1.7 5078 */ 5079 public static char highSurrogate(int codePoint) { 5080 return (char) ((codePoint >>> 10) 5081 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 5082 } 5083 5084 /** 5085 * Returns the trailing surrogate (a 5086 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 5087 * low surrogate code unit</a>) of the 5088 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5089 * surrogate pair</a> 5090 * representing the specified supplementary character (Unicode 5091 * code point) in the UTF-16 encoding. If the specified character 5092 * is not a 5093 * <a href="Character.html#supplementary">supplementary character</a>, 5094 * an unspecified {@code char} is returned. 5095 * 5096 * <p>If 5097 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5098 * is {@code true}, then 5099 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 5100 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 5101 * are also always {@code true}. 5102 * 5103 * @param codePoint a supplementary character (Unicode code point) 5104 * @return the trailing surrogate code unit used to represent the 5105 * character in the UTF-16 encoding 5106 * @since 1.7 5107 */ 5108 public static char lowSurrogate(int codePoint) { 5109 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 5110 } 5111 5112 /** 5113 * Converts the specified character (Unicode code point) to its 5114 * UTF-16 representation. If the specified code point is a BMP 5115 * (Basic Multilingual Plane or Plane 0) value, the same value is 5116 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 5117 * specified code point is a supplementary character, its 5118 * surrogate values are stored in {@code dst[dstIndex]} 5119 * (high-surrogate) and {@code dst[dstIndex+1]} 5120 * (low-surrogate), and 2 is returned. 5121 * 5122 * @param codePoint the character (Unicode code point) to be converted. 5123 * @param dst an array of {@code char} in which the 5124 * {@code codePoint}'s UTF-16 value is stored. 5125 * @param dstIndex the start index into the {@code dst} 5126 * array where the converted value is stored. 5127 * @return 1 if the code point is a BMP code point, 2 if the 5128 * code point is a supplementary code point. 5129 * @exception IllegalArgumentException if the specified 5130 * {@code codePoint} is not a valid Unicode code point. 5131 * @exception NullPointerException if the specified {@code dst} is null. 5132 * @exception IndexOutOfBoundsException if {@code dstIndex} 5133 * is negative or not less than {@code dst.length}, or if 5134 * {@code dst} at {@code dstIndex} doesn't have enough 5135 * array element(s) to store the resulting {@code char} 5136 * value(s). (If {@code dstIndex} is equal to 5137 * {@code dst.length-1} and the specified 5138 * {@code codePoint} is a supplementary character, the 5139 * high-surrogate value is not stored in 5140 * {@code dst[dstIndex]}.) 5141 * @since 1.5 5142 */ 5143 public static int toChars(int codePoint, char[] dst, int dstIndex) { 5144 if (isBmpCodePoint(codePoint)) { 5145 dst[dstIndex] = (char) codePoint; 5146 return 1; 5147 } else if (isValidCodePoint(codePoint)) { 5148 toSurrogates(codePoint, dst, dstIndex); 5149 return 2; 5150 } else { 5151 throw new IllegalArgumentException(); 5152 } 5153 } 5154 5155 /** 5156 * Converts the specified character (Unicode code point) to its 5157 * UTF-16 representation stored in a {@code char} array. If 5158 * the specified code point is a BMP (Basic Multilingual Plane or 5159 * Plane 0) value, the resulting {@code char} array has 5160 * the same value as {@code codePoint}. If the specified code 5161 * point is a supplementary code point, the resulting 5162 * {@code char} array has the corresponding surrogate pair. 5163 * 5164 * @param codePoint a Unicode code point 5165 * @return a {@code char} array having 5166 * {@code codePoint}'s UTF-16 representation. 5167 * @exception IllegalArgumentException if the specified 5168 * {@code codePoint} is not a valid Unicode code point. 5169 * @since 1.5 5170 */ 5171 public static char[] toChars(int codePoint) { 5172 if (isBmpCodePoint(codePoint)) { 5173 return new char[] { (char) codePoint }; 5174 } else if (isValidCodePoint(codePoint)) { 5175 char[] result = new char[2]; 5176 toSurrogates(codePoint, result, 0); 5177 return result; 5178 } else { 5179 throw new IllegalArgumentException(); 5180 } 5181 } 5182 5183 static void toSurrogates(int codePoint, char[] dst, int index) { 5184 // We write elements "backwards" to guarantee all-or-nothing 5185 dst[index+1] = lowSurrogate(codePoint); 5186 dst[index] = highSurrogate(codePoint); 5187 } 5188 5189 /** 5190 * Returns the number of Unicode code points in the text range of 5191 * the specified char sequence. The text range begins at the 5192 * specified {@code beginIndex} and extends to the 5193 * {@code char} at index {@code endIndex - 1}. Thus the 5194 * length (in {@code char}s) of the text range is 5195 * {@code endIndex-beginIndex}. Unpaired surrogates within 5196 * the text range count as one code point each. 5197 * 5198 * @param seq the char sequence 5199 * @param beginIndex the index to the first {@code char} of 5200 * the text range. 5201 * @param endIndex the index after the last {@code char} of 5202 * the text range. 5203 * @return the number of Unicode code points in the specified text 5204 * range 5205 * @exception NullPointerException if {@code seq} is null. 5206 * @exception IndexOutOfBoundsException if the 5207 * {@code beginIndex} is negative, or {@code endIndex} 5208 * is larger than the length of the given sequence, or 5209 * {@code beginIndex} is larger than {@code endIndex}. 5210 * @since 1.5 5211 */ 5212 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5213 int length = seq.length(); 5214 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5215 throw new IndexOutOfBoundsException(); 5216 } 5217 int n = endIndex - beginIndex; 5218 for (int i = beginIndex; i < endIndex; ) { 5219 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5220 isLowSurrogate(seq.charAt(i))) { 5221 n--; 5222 i++; 5223 } 5224 } 5225 return n; 5226 } 5227 5228 /** 5229 * Returns the number of Unicode code points in a subarray of the 5230 * {@code char} array argument. The {@code offset} 5231 * argument is the index of the first {@code char} of the 5232 * subarray and the {@code count} argument specifies the 5233 * length of the subarray in {@code char}s. Unpaired 5234 * surrogates within the subarray count as one code point each. 5235 * 5236 * @param a the {@code char} array 5237 * @param offset the index of the first {@code char} in the 5238 * given {@code char} array 5239 * @param count the length of the subarray in {@code char}s 5240 * @return the number of Unicode code points in the specified subarray 5241 * @exception NullPointerException if {@code a} is null. 5242 * @exception IndexOutOfBoundsException if {@code offset} or 5243 * {@code count} is negative, or if {@code offset + 5244 * count} is larger than the length of the given array. 5245 * @since 1.5 5246 */ 5247 public static int codePointCount(char[] a, int offset, int count) { 5248 if (count > a.length - offset || offset < 0 || count < 0) { 5249 throw new IndexOutOfBoundsException(); 5250 } 5251 return codePointCountImpl(a, offset, count); 5252 } 5253 5254 static int codePointCountImpl(char[] a, int offset, int count) { 5255 int endIndex = offset + count; 5256 int n = count; 5257 for (int i = offset; i < endIndex; ) { 5258 if (isHighSurrogate(a[i++]) && i < endIndex && 5259 isLowSurrogate(a[i])) { 5260 n--; 5261 i++; 5262 } 5263 } 5264 return n; 5265 } 5266 5267 /** 5268 * Returns the index within the given char sequence that is offset 5269 * from the given {@code index} by {@code codePointOffset} 5270 * code points. Unpaired surrogates within the text range given by 5271 * {@code index} and {@code codePointOffset} count as 5272 * one code point each. 5273 * 5274 * @param seq the char sequence 5275 * @param index the index to be offset 5276 * @param codePointOffset the offset in code points 5277 * @return the index within the char sequence 5278 * @exception NullPointerException if {@code seq} is null. 5279 * @exception IndexOutOfBoundsException if {@code index} 5280 * is negative or larger then the length of the char sequence, 5281 * or if {@code codePointOffset} is positive and the 5282 * subsequence starting with {@code index} has fewer than 5283 * {@code codePointOffset} code points, or if 5284 * {@code codePointOffset} is negative and the subsequence 5285 * before {@code index} has fewer than the absolute value 5286 * of {@code codePointOffset} code points. 5287 * @since 1.5 5288 */ 5289 public static int offsetByCodePoints(CharSequence seq, int index, 5290 int codePointOffset) { 5291 int length = seq.length(); 5292 if (index < 0 || index > length) { 5293 throw new IndexOutOfBoundsException(); 5294 } 5295 5296 int x = index; 5297 if (codePointOffset >= 0) { 5298 int i; 5299 for (i = 0; x < length && i < codePointOffset; i++) { 5300 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5301 isLowSurrogate(seq.charAt(x))) { 5302 x++; 5303 } 5304 } 5305 if (i < codePointOffset) { 5306 throw new IndexOutOfBoundsException(); 5307 } 5308 } else { 5309 int i; 5310 for (i = codePointOffset; x > 0 && i < 0; i++) { 5311 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5312 isHighSurrogate(seq.charAt(x-1))) { 5313 x--; 5314 } 5315 } 5316 if (i < 0) { 5317 throw new IndexOutOfBoundsException(); 5318 } 5319 } 5320 return x; 5321 } 5322 5323 /** 5324 * Returns the index within the given {@code char} subarray 5325 * that is offset from the given {@code index} by 5326 * {@code codePointOffset} code points. The 5327 * {@code start} and {@code count} arguments specify a 5328 * subarray of the {@code char} array. Unpaired surrogates 5329 * within the text range given by {@code index} and 5330 * {@code codePointOffset} count as one code point each. 5331 * 5332 * @param a the {@code char} array 5333 * @param start the index of the first {@code char} of the 5334 * subarray 5335 * @param count the length of the subarray in {@code char}s 5336 * @param index the index to be offset 5337 * @param codePointOffset the offset in code points 5338 * @return the index within the subarray 5339 * @exception NullPointerException if {@code a} is null. 5340 * @exception IndexOutOfBoundsException 5341 * if {@code start} or {@code count} is negative, 5342 * or if {@code start + count} is larger than the length of 5343 * the given array, 5344 * or if {@code index} is less than {@code start} or 5345 * larger then {@code start + count}, 5346 * or if {@code codePointOffset} is positive and the text range 5347 * starting with {@code index} and ending with {@code start + count - 1} 5348 * has fewer than {@code codePointOffset} code 5349 * points, 5350 * or if {@code codePointOffset} is negative and the text range 5351 * starting with {@code start} and ending with {@code index - 1} 5352 * has fewer than the absolute value of 5353 * {@code codePointOffset} code points. 5354 * @since 1.5 5355 */ 5356 public static int offsetByCodePoints(char[] a, int start, int count, 5357 int index, int codePointOffset) { 5358 if (count > a.length-start || start < 0 || count < 0 5359 || index < start || index > start+count) { 5360 throw new IndexOutOfBoundsException(); 5361 } 5362 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5363 } 5364 5365 static int offsetByCodePointsImpl(char[]a, int start, int count, 5366 int index, int codePointOffset) { 5367 int x = index; 5368 if (codePointOffset >= 0) { 5369 int limit = start + count; 5370 int i; 5371 for (i = 0; x < limit && i < codePointOffset; i++) { 5372 if (isHighSurrogate(a[x++]) && x < limit && 5373 isLowSurrogate(a[x])) { 5374 x++; 5375 } 5376 } 5377 if (i < codePointOffset) { 5378 throw new IndexOutOfBoundsException(); 5379 } 5380 } else { 5381 int i; 5382 for (i = codePointOffset; x > start && i < 0; i++) { 5383 if (isLowSurrogate(a[--x]) && x > start && 5384 isHighSurrogate(a[x-1])) { 5385 x--; 5386 } 5387 } 5388 if (i < 0) { 5389 throw new IndexOutOfBoundsException(); 5390 } 5391 } 5392 return x; 5393 } 5394 5395 /** 5396 * Determines if the specified character is a lowercase character. 5397 * <p> 5398 * A character is lowercase if its general category type, provided 5399 * by {@code Character.getType(ch)}, is 5400 * {@code LOWERCASE_LETTER}, or it has contributory property 5401 * Other_Lowercase as defined by the Unicode Standard. 5402 * <p> 5403 * The following are examples of lowercase characters: 5404 * <blockquote><pre> 5405 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5406 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5407 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5408 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5409 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5410 * </pre></blockquote> 5411 * <p> Many other Unicode characters are lowercase too. 5412 * 5413 * <p><b>Note:</b> This method cannot handle <a 5414 * href="#supplementary"> supplementary characters</a>. To support 5415 * all Unicode characters, including supplementary characters, use 5416 * the {@link #isLowerCase(int)} method. 5417 * 5418 * @param ch the character to be tested. 5419 * @return {@code true} if the character is lowercase; 5420 * {@code false} otherwise. 5421 * @see Character#isLowerCase(char) 5422 * @see Character#isTitleCase(char) 5423 * @see Character#toLowerCase(char) 5424 * @see Character#getType(char) 5425 */ 5426 public static boolean isLowerCase(char ch) { 5427 return isLowerCase((int)ch); 5428 } 5429 5430 /** 5431 * Determines if the specified character (Unicode code point) is a 5432 * lowercase character. 5433 * <p> 5434 * A character is lowercase if its general category type, provided 5435 * by {@link Character#getType getType(codePoint)}, is 5436 * {@code LOWERCASE_LETTER}, or it has contributory property 5437 * Other_Lowercase as defined by the Unicode Standard. 5438 * <p> 5439 * The following are examples of lowercase characters: 5440 * <blockquote><pre> 5441 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5442 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5443 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5444 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5445 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5446 * </pre></blockquote> 5447 * <p> Many other Unicode characters are lowercase too. 5448 * 5449 * @param codePoint the character (Unicode code point) to be tested. 5450 * @return {@code true} if the character is lowercase; 5451 * {@code false} otherwise. 5452 * @see Character#isLowerCase(int) 5453 * @see Character#isTitleCase(int) 5454 * @see Character#toLowerCase(int) 5455 * @see Character#getType(int) 5456 * @since 1.5 5457 */ 5458 public static boolean isLowerCase(int codePoint) { 5459 return getType(codePoint) == Character.LOWERCASE_LETTER || 5460 CharacterData.of(codePoint).isOtherLowercase(codePoint); 5461 } 5462 5463 /** 5464 * Determines if the specified character is an uppercase character. 5465 * <p> 5466 * A character is uppercase if its general category type, provided by 5467 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 5468 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5469 * <p> 5470 * The following are examples of uppercase characters: 5471 * <blockquote><pre> 5472 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5473 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5474 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5475 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5476 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5477 * </pre></blockquote> 5478 * <p> Many other Unicode characters are uppercase too. 5479 * 5480 * <p><b>Note:</b> This method cannot handle <a 5481 * href="#supplementary"> supplementary characters</a>. To support 5482 * all Unicode characters, including supplementary characters, use 5483 * the {@link #isUpperCase(int)} method. 5484 * 5485 * @param ch the character to be tested. 5486 * @return {@code true} if the character is uppercase; 5487 * {@code false} otherwise. 5488 * @see Character#isLowerCase(char) 5489 * @see Character#isTitleCase(char) 5490 * @see Character#toUpperCase(char) 5491 * @see Character#getType(char) 5492 * @since 1.0 5493 */ 5494 public static boolean isUpperCase(char ch) { 5495 return isUpperCase((int)ch); 5496 } 5497 5498 /** 5499 * Determines if the specified character (Unicode code point) is an uppercase character. 5500 * <p> 5501 * A character is uppercase if its general category type, provided by 5502 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 5503 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5504 * <p> 5505 * The following are examples of uppercase characters: 5506 * <blockquote><pre> 5507 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5508 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5509 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5510 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5511 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5512 * </pre></blockquote> 5513 * <p> Many other Unicode characters are uppercase too. 5514 * 5515 * @param codePoint the character (Unicode code point) to be tested. 5516 * @return {@code true} if the character is uppercase; 5517 * {@code false} otherwise. 5518 * @see Character#isLowerCase(int) 5519 * @see Character#isTitleCase(int) 5520 * @see Character#toUpperCase(int) 5521 * @see Character#getType(int) 5522 * @since 1.5 5523 */ 5524 public static boolean isUpperCase(int codePoint) { 5525 return getType(codePoint) == Character.UPPERCASE_LETTER || 5526 CharacterData.of(codePoint).isOtherUppercase(codePoint); 5527 } 5528 5529 /** 5530 * Determines if the specified character is a titlecase character. 5531 * <p> 5532 * A character is a titlecase character if its general 5533 * category type, provided by {@code Character.getType(ch)}, 5534 * is {@code TITLECASE_LETTER}. 5535 * <p> 5536 * Some characters look like pairs of Latin letters. For example, there 5537 * is an uppercase letter that looks like "LJ" and has a corresponding 5538 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5539 * is the appropriate form to use when rendering a word in lowercase 5540 * with initial capitals, as for a book title. 5541 * <p> 5542 * These are some of the Unicode characters for which this method returns 5543 * {@code true}: 5544 * <ul> 5545 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5546 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5547 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5548 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5549 * </ul> 5550 * <p> Many other Unicode characters are titlecase too. 5551 * 5552 * <p><b>Note:</b> This method cannot handle <a 5553 * href="#supplementary"> supplementary characters</a>. To support 5554 * all Unicode characters, including supplementary characters, use 5555 * the {@link #isTitleCase(int)} method. 5556 * 5557 * @param ch the character to be tested. 5558 * @return {@code true} if the character is titlecase; 5559 * {@code false} otherwise. 5560 * @see Character#isLowerCase(char) 5561 * @see Character#isUpperCase(char) 5562 * @see Character#toTitleCase(char) 5563 * @see Character#getType(char) 5564 * @since 1.0.2 5565 */ 5566 public static boolean isTitleCase(char ch) { 5567 return isTitleCase((int)ch); 5568 } 5569 5570 /** 5571 * Determines if the specified character (Unicode code point) is a titlecase character. 5572 * <p> 5573 * A character is a titlecase character if its general 5574 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5575 * is {@code TITLECASE_LETTER}. 5576 * <p> 5577 * Some characters look like pairs of Latin letters. For example, there 5578 * is an uppercase letter that looks like "LJ" and has a corresponding 5579 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5580 * is the appropriate form to use when rendering a word in lowercase 5581 * with initial capitals, as for a book title. 5582 * <p> 5583 * These are some of the Unicode characters for which this method returns 5584 * {@code true}: 5585 * <ul> 5586 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5587 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5588 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5589 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5590 * </ul> 5591 * <p> Many other Unicode characters are titlecase too. 5592 * 5593 * @param codePoint the character (Unicode code point) to be tested. 5594 * @return {@code true} if the character is titlecase; 5595 * {@code false} otherwise. 5596 * @see Character#isLowerCase(int) 5597 * @see Character#isUpperCase(int) 5598 * @see Character#toTitleCase(int) 5599 * @see Character#getType(int) 5600 * @since 1.5 5601 */ 5602 public static boolean isTitleCase(int codePoint) { 5603 return getType(codePoint) == Character.TITLECASE_LETTER; 5604 } 5605 5606 /** 5607 * Determines if the specified character is a digit. 5608 * <p> 5609 * A character is a digit if its general category type, provided 5610 * by {@code Character.getType(ch)}, is 5611 * {@code DECIMAL_DIGIT_NUMBER}. 5612 * <p> 5613 * Some Unicode character ranges that contain digits: 5614 * <ul> 5615 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5616 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5617 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5618 * Arabic-Indic digits 5619 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5620 * Extended Arabic-Indic digits 5621 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5622 * Devanagari digits 5623 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5624 * Fullwidth digits 5625 * </ul> 5626 * 5627 * Many other character ranges contain digits as well. 5628 * 5629 * <p><b>Note:</b> This method cannot handle <a 5630 * href="#supplementary"> supplementary characters</a>. To support 5631 * all Unicode characters, including supplementary characters, use 5632 * the {@link #isDigit(int)} method. 5633 * 5634 * @param ch the character to be tested. 5635 * @return {@code true} if the character is a digit; 5636 * {@code false} otherwise. 5637 * @see Character#digit(char, int) 5638 * @see Character#forDigit(int, int) 5639 * @see Character#getType(char) 5640 */ 5641 public static boolean isDigit(char ch) { 5642 return isDigit((int)ch); 5643 } 5644 5645 /** 5646 * Determines if the specified character (Unicode code point) is a digit. 5647 * <p> 5648 * A character is a digit if its general category type, provided 5649 * by {@link Character#getType(int) getType(codePoint)}, is 5650 * {@code DECIMAL_DIGIT_NUMBER}. 5651 * <p> 5652 * Some Unicode character ranges that contain digits: 5653 * <ul> 5654 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5655 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5656 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5657 * Arabic-Indic digits 5658 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5659 * Extended Arabic-Indic digits 5660 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5661 * Devanagari digits 5662 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5663 * Fullwidth digits 5664 * </ul> 5665 * 5666 * Many other character ranges contain digits as well. 5667 * 5668 * @param codePoint the character (Unicode code point) to be tested. 5669 * @return {@code true} if the character is a digit; 5670 * {@code false} otherwise. 5671 * @see Character#forDigit(int, int) 5672 * @see Character#getType(int) 5673 * @since 1.5 5674 */ 5675 public static boolean isDigit(int codePoint) { 5676 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5677 } 5678 5679 /** 5680 * Determines if a character is defined in Unicode. 5681 * <p> 5682 * A character is defined if at least one of the following is true: 5683 * <ul> 5684 * <li>It has an entry in the UnicodeData file. 5685 * <li>It has a value in a range defined by the UnicodeData file. 5686 * </ul> 5687 * 5688 * <p><b>Note:</b> This method cannot handle <a 5689 * href="#supplementary"> supplementary characters</a>. To support 5690 * all Unicode characters, including supplementary characters, use 5691 * the {@link #isDefined(int)} method. 5692 * 5693 * @param ch the character to be tested 5694 * @return {@code true} if the character has a defined meaning 5695 * in Unicode; {@code false} otherwise. 5696 * @see Character#isDigit(char) 5697 * @see Character#isLetter(char) 5698 * @see Character#isLetterOrDigit(char) 5699 * @see Character#isLowerCase(char) 5700 * @see Character#isTitleCase(char) 5701 * @see Character#isUpperCase(char) 5702 * @since 1.0.2 5703 */ 5704 public static boolean isDefined(char ch) { 5705 return isDefined((int)ch); 5706 } 5707 5708 /** 5709 * Determines if a character (Unicode code point) is defined in Unicode. 5710 * <p> 5711 * A character is defined if at least one of the following is true: 5712 * <ul> 5713 * <li>It has an entry in the UnicodeData file. 5714 * <li>It has a value in a range defined by the UnicodeData file. 5715 * </ul> 5716 * 5717 * @param codePoint the character (Unicode code point) to be tested. 5718 * @return {@code true} if the character has a defined meaning 5719 * in Unicode; {@code false} otherwise. 5720 * @see Character#isDigit(int) 5721 * @see Character#isLetter(int) 5722 * @see Character#isLetterOrDigit(int) 5723 * @see Character#isLowerCase(int) 5724 * @see Character#isTitleCase(int) 5725 * @see Character#isUpperCase(int) 5726 * @since 1.5 5727 */ 5728 public static boolean isDefined(int codePoint) { 5729 return getType(codePoint) != Character.UNASSIGNED; 5730 } 5731 5732 /** 5733 * Determines if the specified character is a letter. 5734 * <p> 5735 * A character is considered to be a letter if its general 5736 * category type, provided by {@code Character.getType(ch)}, 5737 * is any of the following: 5738 * <ul> 5739 * <li> {@code UPPERCASE_LETTER} 5740 * <li> {@code LOWERCASE_LETTER} 5741 * <li> {@code TITLECASE_LETTER} 5742 * <li> {@code MODIFIER_LETTER} 5743 * <li> {@code OTHER_LETTER} 5744 * </ul> 5745 * 5746 * Not all letters have case. Many characters are 5747 * letters but are neither uppercase nor lowercase nor titlecase. 5748 * 5749 * <p><b>Note:</b> This method cannot handle <a 5750 * href="#supplementary"> supplementary characters</a>. To support 5751 * all Unicode characters, including supplementary characters, use 5752 * the {@link #isLetter(int)} method. 5753 * 5754 * @param ch the character to be tested. 5755 * @return {@code true} if the character is a letter; 5756 * {@code false} otherwise. 5757 * @see Character#isDigit(char) 5758 * @see Character#isJavaIdentifierStart(char) 5759 * @see Character#isJavaLetter(char) 5760 * @see Character#isJavaLetterOrDigit(char) 5761 * @see Character#isLetterOrDigit(char) 5762 * @see Character#isLowerCase(char) 5763 * @see Character#isTitleCase(char) 5764 * @see Character#isUnicodeIdentifierStart(char) 5765 * @see Character#isUpperCase(char) 5766 */ 5767 public static boolean isLetter(char ch) { 5768 return isLetter((int)ch); 5769 } 5770 5771 /** 5772 * Determines if the specified character (Unicode code point) is a letter. 5773 * <p> 5774 * A character is considered to be a letter if its general 5775 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5776 * is any of the following: 5777 * <ul> 5778 * <li> {@code UPPERCASE_LETTER} 5779 * <li> {@code LOWERCASE_LETTER} 5780 * <li> {@code TITLECASE_LETTER} 5781 * <li> {@code MODIFIER_LETTER} 5782 * <li> {@code OTHER_LETTER} 5783 * </ul> 5784 * 5785 * Not all letters have case. Many characters are 5786 * letters but are neither uppercase nor lowercase nor titlecase. 5787 * 5788 * @param codePoint the character (Unicode code point) to be tested. 5789 * @return {@code true} if the character is a letter; 5790 * {@code false} otherwise. 5791 * @see Character#isDigit(int) 5792 * @see Character#isJavaIdentifierStart(int) 5793 * @see Character#isLetterOrDigit(int) 5794 * @see Character#isLowerCase(int) 5795 * @see Character#isTitleCase(int) 5796 * @see Character#isUnicodeIdentifierStart(int) 5797 * @see Character#isUpperCase(int) 5798 * @since 1.5 5799 */ 5800 public static boolean isLetter(int codePoint) { 5801 return ((((1 << Character.UPPERCASE_LETTER) | 5802 (1 << Character.LOWERCASE_LETTER) | 5803 (1 << Character.TITLECASE_LETTER) | 5804 (1 << Character.MODIFIER_LETTER) | 5805 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5806 != 0; 5807 } 5808 5809 /** 5810 * Determines if the specified character is a letter or digit. 5811 * <p> 5812 * A character is considered to be a letter or digit if either 5813 * {@code Character.isLetter(char ch)} or 5814 * {@code Character.isDigit(char ch)} returns 5815 * {@code true} for the character. 5816 * 5817 * <p><b>Note:</b> This method cannot handle <a 5818 * href="#supplementary"> supplementary characters</a>. To support 5819 * all Unicode characters, including supplementary characters, use 5820 * the {@link #isLetterOrDigit(int)} method. 5821 * 5822 * @param ch the character to be tested. 5823 * @return {@code true} if the character is a letter or digit; 5824 * {@code false} otherwise. 5825 * @see Character#isDigit(char) 5826 * @see Character#isJavaIdentifierPart(char) 5827 * @see Character#isJavaLetter(char) 5828 * @see Character#isJavaLetterOrDigit(char) 5829 * @see Character#isLetter(char) 5830 * @see Character#isUnicodeIdentifierPart(char) 5831 * @since 1.0.2 5832 */ 5833 public static boolean isLetterOrDigit(char ch) { 5834 return isLetterOrDigit((int)ch); 5835 } 5836 5837 /** 5838 * Determines if the specified character (Unicode code point) is a letter or digit. 5839 * <p> 5840 * A character is considered to be a letter or digit if either 5841 * {@link #isLetter(int) isLetter(codePoint)} or 5842 * {@link #isDigit(int) isDigit(codePoint)} returns 5843 * {@code true} for the character. 5844 * 5845 * @param codePoint the character (Unicode code point) to be tested. 5846 * @return {@code true} if the character is a letter or digit; 5847 * {@code false} otherwise. 5848 * @see Character#isDigit(int) 5849 * @see Character#isJavaIdentifierPart(int) 5850 * @see Character#isLetter(int) 5851 * @see Character#isUnicodeIdentifierPart(int) 5852 * @since 1.5 5853 */ 5854 public static boolean isLetterOrDigit(int codePoint) { 5855 return ((((1 << Character.UPPERCASE_LETTER) | 5856 (1 << Character.LOWERCASE_LETTER) | 5857 (1 << Character.TITLECASE_LETTER) | 5858 (1 << Character.MODIFIER_LETTER) | 5859 (1 << Character.OTHER_LETTER) | 5860 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5861 != 0; 5862 } 5863 5864 /** 5865 * Determines if the specified character is permissible as the first 5866 * character in a Java identifier. 5867 * <p> 5868 * A character may start a Java identifier if and only if 5869 * one of the following is true: 5870 * <ul> 5871 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5872 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5873 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5874 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5875 * </ul> 5876 * 5877 * @param ch the character to be tested. 5878 * @return {@code true} if the character may start a Java 5879 * identifier; {@code false} otherwise. 5880 * @see Character#isJavaLetterOrDigit(char) 5881 * @see Character#isJavaIdentifierStart(char) 5882 * @see Character#isJavaIdentifierPart(char) 5883 * @see Character#isLetter(char) 5884 * @see Character#isLetterOrDigit(char) 5885 * @see Character#isUnicodeIdentifierStart(char) 5886 * @since 1.0.2 5887 * @deprecated Replaced by isJavaIdentifierStart(char). 5888 */ 5889 @Deprecated 5890 public static boolean isJavaLetter(char ch) { 5891 return isJavaIdentifierStart(ch); 5892 } 5893 5894 /** 5895 * Determines if the specified character may be part of a Java 5896 * identifier as other than the first character. 5897 * <p> 5898 * A character may be part of a Java identifier if and only if any 5899 * of the following are true: 5900 * <ul> 5901 * <li> it is a letter 5902 * <li> it is a currency symbol (such as {@code '$'}) 5903 * <li> it is a connecting punctuation character (such as {@code '_'}) 5904 * <li> it is a digit 5905 * <li> it is a numeric letter (such as a Roman numeral character) 5906 * <li> it is a combining mark 5907 * <li> it is a non-spacing mark 5908 * <li> {@code isIdentifierIgnorable} returns 5909 * {@code true} for the character. 5910 * </ul> 5911 * 5912 * @param ch the character to be tested. 5913 * @return {@code true} if the character may be part of a 5914 * Java identifier; {@code false} otherwise. 5915 * @see Character#isJavaLetter(char) 5916 * @see Character#isJavaIdentifierStart(char) 5917 * @see Character#isJavaIdentifierPart(char) 5918 * @see Character#isLetter(char) 5919 * @see Character#isLetterOrDigit(char) 5920 * @see Character#isUnicodeIdentifierPart(char) 5921 * @see Character#isIdentifierIgnorable(char) 5922 * @since 1.0.2 5923 * @deprecated Replaced by isJavaIdentifierPart(char). 5924 */ 5925 @Deprecated 5926 public static boolean isJavaLetterOrDigit(char ch) { 5927 return isJavaIdentifierPart(ch); 5928 } 5929 5930 /** 5931 * Determines if the specified character (Unicode code point) is an alphabet. 5932 * <p> 5933 * A character is considered to be alphabetic if its general category type, 5934 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 5935 * the following: 5936 * <ul> 5937 * <li> <code>UPPERCASE_LETTER</code> 5938 * <li> <code>LOWERCASE_LETTER</code> 5939 * <li> <code>TITLECASE_LETTER</code> 5940 * <li> <code>MODIFIER_LETTER</code> 5941 * <li> <code>OTHER_LETTER</code> 5942 * <li> <code>LETTER_NUMBER</code> 5943 * </ul> 5944 * or it has contributory property Other_Alphabetic as defined by the 5945 * Unicode Standard. 5946 * 5947 * @param codePoint the character (Unicode code point) to be tested. 5948 * @return <code>true</code> if the character is a Unicode alphabet 5949 * character, <code>false</code> otherwise. 5950 * @since 1.7 5951 */ 5952 public static boolean isAlphabetic(int codePoint) { 5953 return (((((1 << Character.UPPERCASE_LETTER) | 5954 (1 << Character.LOWERCASE_LETTER) | 5955 (1 << Character.TITLECASE_LETTER) | 5956 (1 << Character.MODIFIER_LETTER) | 5957 (1 << Character.OTHER_LETTER) | 5958 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 5959 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 5960 } 5961 5962 /** 5963 * Determines if the specified character (Unicode code point) is a CJKV 5964 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 5965 * the Unicode Standard. 5966 * 5967 * @param codePoint the character (Unicode code point) to be tested. 5968 * @return <code>true</code> if the character is a Unicode ideograph 5969 * character, <code>false</code> otherwise. 5970 * @since 1.7 5971 */ 5972 public static boolean isIdeographic(int codePoint) { 5973 return CharacterData.of(codePoint).isIdeographic(codePoint); 5974 } 5975 5976 /** 5977 * Determines if the specified character is 5978 * permissible as the first character in a Java identifier. 5979 * <p> 5980 * A character may start a Java identifier if and only if 5981 * one of the following conditions is true: 5982 * <ul> 5983 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5984 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5985 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5986 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5987 * </ul> 5988 * 5989 * <p><b>Note:</b> This method cannot handle <a 5990 * href="#supplementary"> supplementary characters</a>. To support 5991 * all Unicode characters, including supplementary characters, use 5992 * the {@link #isJavaIdentifierStart(int)} method. 5993 * 5994 * @param ch the character to be tested. 5995 * @return {@code true} if the character may start a Java identifier; 5996 * {@code false} otherwise. 5997 * @see Character#isJavaIdentifierPart(char) 5998 * @see Character#isLetter(char) 5999 * @see Character#isUnicodeIdentifierStart(char) 6000 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6001 * @since 1.1 6002 */ 6003 public static boolean isJavaIdentifierStart(char ch) { 6004 return isJavaIdentifierStart((int)ch); 6005 } 6006 6007 /** 6008 * Determines if the character (Unicode code point) is 6009 * permissible as the first character in a Java identifier. 6010 * <p> 6011 * A character may start a Java identifier if and only if 6012 * one of the following conditions is true: 6013 * <ul> 6014 * <li> {@link #isLetter(int) isLetter(codePoint)} 6015 * returns {@code true} 6016 * <li> {@link #getType(int) getType(codePoint)} 6017 * returns {@code LETTER_NUMBER} 6018 * <li> the referenced character is a currency symbol (such as {@code '$'}) 6019 * <li> the referenced character is a connecting punctuation character 6020 * (such as {@code '_'}). 6021 * </ul> 6022 * 6023 * @param codePoint the character (Unicode code point) to be tested. 6024 * @return {@code true} if the character may start a Java identifier; 6025 * {@code false} otherwise. 6026 * @see Character#isJavaIdentifierPart(int) 6027 * @see Character#isLetter(int) 6028 * @see Character#isUnicodeIdentifierStart(int) 6029 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6030 * @since 1.5 6031 */ 6032 public static boolean isJavaIdentifierStart(int codePoint) { 6033 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 6034 } 6035 6036 /** 6037 * Determines if the specified character may be part of a Java 6038 * identifier as other than the first character. 6039 * <p> 6040 * A character may be part of a Java identifier if any of the following 6041 * are true: 6042 * <ul> 6043 * <li> it is a letter 6044 * <li> it is a currency symbol (such as {@code '$'}) 6045 * <li> it is a connecting punctuation character (such as {@code '_'}) 6046 * <li> it is a digit 6047 * <li> it is a numeric letter (such as a Roman numeral character) 6048 * <li> it is a combining mark 6049 * <li> it is a non-spacing mark 6050 * <li> {@code isIdentifierIgnorable} returns 6051 * {@code true} for the character 6052 * </ul> 6053 * 6054 * <p><b>Note:</b> This method cannot handle <a 6055 * href="#supplementary"> supplementary characters</a>. To support 6056 * all Unicode characters, including supplementary characters, use 6057 * the {@link #isJavaIdentifierPart(int)} method. 6058 * 6059 * @param ch the character to be tested. 6060 * @return {@code true} if the character may be part of a 6061 * Java identifier; {@code false} otherwise. 6062 * @see Character#isIdentifierIgnorable(char) 6063 * @see Character#isJavaIdentifierStart(char) 6064 * @see Character#isLetterOrDigit(char) 6065 * @see Character#isUnicodeIdentifierPart(char) 6066 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6067 * @since 1.1 6068 */ 6069 public static boolean isJavaIdentifierPart(char ch) { 6070 return isJavaIdentifierPart((int)ch); 6071 } 6072 6073 /** 6074 * Determines if the character (Unicode code point) may be part of a Java 6075 * identifier as other than the first character. 6076 * <p> 6077 * A character may be part of a Java identifier if any of the following 6078 * are true: 6079 * <ul> 6080 * <li> it is a letter 6081 * <li> it is a currency symbol (such as {@code '$'}) 6082 * <li> it is a connecting punctuation character (such as {@code '_'}) 6083 * <li> it is a digit 6084 * <li> it is a numeric letter (such as a Roman numeral character) 6085 * <li> it is a combining mark 6086 * <li> it is a non-spacing mark 6087 * <li> {@link #isIdentifierIgnorable(int) 6088 * isIdentifierIgnorable(codePoint)} returns {@code true} for 6089 * the character 6090 * </ul> 6091 * 6092 * @param codePoint the character (Unicode code point) to be tested. 6093 * @return {@code true} if the character may be part of a 6094 * Java identifier; {@code false} otherwise. 6095 * @see Character#isIdentifierIgnorable(int) 6096 * @see Character#isJavaIdentifierStart(int) 6097 * @see Character#isLetterOrDigit(int) 6098 * @see Character#isUnicodeIdentifierPart(int) 6099 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6100 * @since 1.5 6101 */ 6102 public static boolean isJavaIdentifierPart(int codePoint) { 6103 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 6104 } 6105 6106 /** 6107 * Determines if the specified character is permissible as the 6108 * first character in a Unicode identifier. 6109 * <p> 6110 * A character may start a Unicode identifier if and only if 6111 * one of the following conditions is true: 6112 * <ul> 6113 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 6114 * <li> {@link #getType(char) getType(ch)} returns 6115 * {@code LETTER_NUMBER}. 6116 * </ul> 6117 * 6118 * <p><b>Note:</b> This method cannot handle <a 6119 * href="#supplementary"> supplementary characters</a>. To support 6120 * all Unicode characters, including supplementary characters, use 6121 * the {@link #isUnicodeIdentifierStart(int)} method. 6122 * 6123 * @param ch the character to be tested. 6124 * @return {@code true} if the character may start a Unicode 6125 * identifier; {@code false} otherwise. 6126 * @see Character#isJavaIdentifierStart(char) 6127 * @see Character#isLetter(char) 6128 * @see Character#isUnicodeIdentifierPart(char) 6129 * @since 1.1 6130 */ 6131 public static boolean isUnicodeIdentifierStart(char ch) { 6132 return isUnicodeIdentifierStart((int)ch); 6133 } 6134 6135 /** 6136 * Determines if the specified character (Unicode code point) is permissible as the 6137 * first character in a Unicode identifier. 6138 * <p> 6139 * A character may start a Unicode identifier if and only if 6140 * one of the following conditions is true: 6141 * <ul> 6142 * <li> {@link #isLetter(int) isLetter(codePoint)} 6143 * returns {@code true} 6144 * <li> {@link #getType(int) getType(codePoint)} 6145 * returns {@code LETTER_NUMBER}. 6146 * </ul> 6147 * @param codePoint the character (Unicode code point) to be tested. 6148 * @return {@code true} if the character may start a Unicode 6149 * identifier; {@code false} otherwise. 6150 * @see Character#isJavaIdentifierStart(int) 6151 * @see Character#isLetter(int) 6152 * @see Character#isUnicodeIdentifierPart(int) 6153 * @since 1.5 6154 */ 6155 public static boolean isUnicodeIdentifierStart(int codePoint) { 6156 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 6157 } 6158 6159 /** 6160 * Determines if the specified character may be part of a Unicode 6161 * identifier as other than the first character. 6162 * <p> 6163 * A character may be part of a Unicode identifier if and only if 6164 * one of the following statements is true: 6165 * <ul> 6166 * <li> it is a letter 6167 * <li> it is a connecting punctuation character (such as {@code '_'}) 6168 * <li> it is a digit 6169 * <li> it is a numeric letter (such as a Roman numeral character) 6170 * <li> it is a combining mark 6171 * <li> it is a non-spacing mark 6172 * <li> {@code isIdentifierIgnorable} returns 6173 * {@code true} for this character. 6174 * </ul> 6175 * 6176 * <p><b>Note:</b> This method cannot handle <a 6177 * href="#supplementary"> supplementary characters</a>. To support 6178 * all Unicode characters, including supplementary characters, use 6179 * the {@link #isUnicodeIdentifierPart(int)} method. 6180 * 6181 * @param ch the character to be tested. 6182 * @return {@code true} if the character may be part of a 6183 * Unicode identifier; {@code false} otherwise. 6184 * @see Character#isIdentifierIgnorable(char) 6185 * @see Character#isJavaIdentifierPart(char) 6186 * @see Character#isLetterOrDigit(char) 6187 * @see Character#isUnicodeIdentifierStart(char) 6188 * @since 1.1 6189 */ 6190 public static boolean isUnicodeIdentifierPart(char ch) { 6191 return isUnicodeIdentifierPart((int)ch); 6192 } 6193 6194 /** 6195 * Determines if the specified character (Unicode code point) may be part of a Unicode 6196 * identifier as other than the first character. 6197 * <p> 6198 * A character may be part of a Unicode identifier if and only if 6199 * one of the following statements is true: 6200 * <ul> 6201 * <li> it is a letter 6202 * <li> it is a connecting punctuation character (such as {@code '_'}) 6203 * <li> it is a digit 6204 * <li> it is a numeric letter (such as a Roman numeral character) 6205 * <li> it is a combining mark 6206 * <li> it is a non-spacing mark 6207 * <li> {@code isIdentifierIgnorable} returns 6208 * {@code true} for this character. 6209 * </ul> 6210 * @param codePoint the character (Unicode code point) to be tested. 6211 * @return {@code true} if the character may be part of a 6212 * Unicode identifier; {@code false} otherwise. 6213 * @see Character#isIdentifierIgnorable(int) 6214 * @see Character#isJavaIdentifierPart(int) 6215 * @see Character#isLetterOrDigit(int) 6216 * @see Character#isUnicodeIdentifierStart(int) 6217 * @since 1.5 6218 */ 6219 public static boolean isUnicodeIdentifierPart(int codePoint) { 6220 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 6221 } 6222 6223 /** 6224 * Determines if the specified character should be regarded as 6225 * an ignorable character in a Java identifier or a Unicode identifier. 6226 * <p> 6227 * The following Unicode characters are ignorable in a Java identifier 6228 * or a Unicode identifier: 6229 * <ul> 6230 * <li>ISO control characters that are not whitespace 6231 * <ul> 6232 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6233 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6234 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6235 * </ul> 6236 * 6237 * <li>all characters that have the {@code FORMAT} general 6238 * category value 6239 * </ul> 6240 * 6241 * <p><b>Note:</b> This method cannot handle <a 6242 * href="#supplementary"> supplementary characters</a>. To support 6243 * all Unicode characters, including supplementary characters, use 6244 * the {@link #isIdentifierIgnorable(int)} method. 6245 * 6246 * @param ch the character to be tested. 6247 * @return {@code true} if the character is an ignorable control 6248 * character that may be part of a Java or Unicode identifier; 6249 * {@code false} otherwise. 6250 * @see Character#isJavaIdentifierPart(char) 6251 * @see Character#isUnicodeIdentifierPart(char) 6252 * @since 1.1 6253 */ 6254 public static boolean isIdentifierIgnorable(char ch) { 6255 return isIdentifierIgnorable((int)ch); 6256 } 6257 6258 /** 6259 * Determines if the specified character (Unicode code point) should be regarded as 6260 * an ignorable character in a Java identifier or a Unicode identifier. 6261 * <p> 6262 * The following Unicode characters are ignorable in a Java identifier 6263 * or a Unicode identifier: 6264 * <ul> 6265 * <li>ISO control characters that are not whitespace 6266 * <ul> 6267 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6268 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6269 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6270 * </ul> 6271 * 6272 * <li>all characters that have the {@code FORMAT} general 6273 * category value 6274 * </ul> 6275 * 6276 * @param codePoint the character (Unicode code point) to be tested. 6277 * @return {@code true} if the character is an ignorable control 6278 * character that may be part of a Java or Unicode identifier; 6279 * {@code false} otherwise. 6280 * @see Character#isJavaIdentifierPart(int) 6281 * @see Character#isUnicodeIdentifierPart(int) 6282 * @since 1.5 6283 */ 6284 public static boolean isIdentifierIgnorable(int codePoint) { 6285 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6286 } 6287 6288 /** 6289 * Converts the character argument to lowercase using case 6290 * mapping information from the UnicodeData file. 6291 * <p> 6292 * Note that 6293 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 6294 * does not always return {@code true} for some ranges of 6295 * characters, particularly those that are symbols or ideographs. 6296 * 6297 * <p>In general, {@link String#toLowerCase()} should be used to map 6298 * characters to lowercase. {@code String} case mapping methods 6299 * have several benefits over {@code Character} case mapping methods. 6300 * {@code String} case mapping methods can perform locale-sensitive 6301 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6302 * the {@code Character} case mapping methods cannot. 6303 * 6304 * <p><b>Note:</b> This method cannot handle <a 6305 * href="#supplementary"> supplementary characters</a>. To support 6306 * all Unicode characters, including supplementary characters, use 6307 * the {@link #toLowerCase(int)} method. 6308 * 6309 * @param ch the character to be converted. 6310 * @return the lowercase equivalent of the character, if any; 6311 * otherwise, the character itself. 6312 * @see Character#isLowerCase(char) 6313 * @see String#toLowerCase() 6314 */ 6315 public static char toLowerCase(char ch) { 6316 return (char)toLowerCase((int)ch); 6317 } 6318 6319 /** 6320 * Converts the character (Unicode code point) argument to 6321 * lowercase using case mapping information from the UnicodeData 6322 * file. 6323 * 6324 * <p> Note that 6325 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 6326 * does not always return {@code true} for some ranges of 6327 * characters, particularly those that are symbols or ideographs. 6328 * 6329 * <p>In general, {@link String#toLowerCase()} should be used to map 6330 * characters to lowercase. {@code String} case mapping methods 6331 * have several benefits over {@code Character} case mapping methods. 6332 * {@code String} case mapping methods can perform locale-sensitive 6333 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6334 * the {@code Character} case mapping methods cannot. 6335 * 6336 * @param codePoint the character (Unicode code point) to be converted. 6337 * @return the lowercase equivalent of the character (Unicode code 6338 * point), if any; otherwise, the character itself. 6339 * @see Character#isLowerCase(int) 6340 * @see String#toLowerCase() 6341 * 6342 * @since 1.5 6343 */ 6344 public static int toLowerCase(int codePoint) { 6345 return CharacterData.of(codePoint).toLowerCase(codePoint); 6346 } 6347 6348 /** 6349 * Converts the character argument to uppercase using case mapping 6350 * information from the UnicodeData file. 6351 * <p> 6352 * Note that 6353 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 6354 * does not always return {@code true} for some ranges of 6355 * characters, particularly those that are symbols or ideographs. 6356 * 6357 * <p>In general, {@link String#toUpperCase()} should be used to map 6358 * characters to uppercase. {@code String} case mapping methods 6359 * have several benefits over {@code Character} case mapping methods. 6360 * {@code String} case mapping methods can perform locale-sensitive 6361 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6362 * the {@code Character} case mapping methods cannot. 6363 * 6364 * <p><b>Note:</b> This method cannot handle <a 6365 * href="#supplementary"> supplementary characters</a>. To support 6366 * all Unicode characters, including supplementary characters, use 6367 * the {@link #toUpperCase(int)} method. 6368 * 6369 * @param ch the character to be converted. 6370 * @return the uppercase equivalent of the character, if any; 6371 * otherwise, the character itself. 6372 * @see Character#isUpperCase(char) 6373 * @see String#toUpperCase() 6374 */ 6375 public static char toUpperCase(char ch) { 6376 return (char)toUpperCase((int)ch); 6377 } 6378 6379 /** 6380 * Converts the character (Unicode code point) argument to 6381 * uppercase using case mapping information from the UnicodeData 6382 * file. 6383 * 6384 * <p>Note that 6385 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 6386 * does not always return {@code true} for some ranges of 6387 * characters, particularly those that are symbols or ideographs. 6388 * 6389 * <p>In general, {@link String#toUpperCase()} should be used to map 6390 * characters to uppercase. {@code String} case mapping methods 6391 * have several benefits over {@code Character} case mapping methods. 6392 * {@code String} case mapping methods can perform locale-sensitive 6393 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6394 * the {@code Character} case mapping methods cannot. 6395 * 6396 * @param codePoint the character (Unicode code point) to be converted. 6397 * @return the uppercase equivalent of the character, if any; 6398 * otherwise, the character itself. 6399 * @see Character#isUpperCase(int) 6400 * @see String#toUpperCase() 6401 * 6402 * @since 1.5 6403 */ 6404 public static int toUpperCase(int codePoint) { 6405 return CharacterData.of(codePoint).toUpperCase(codePoint); 6406 } 6407 6408 /** 6409 * Converts the character argument to titlecase using case mapping 6410 * information from the UnicodeData file. If a character has no 6411 * explicit titlecase mapping and is not itself a titlecase char 6412 * according to UnicodeData, then the uppercase mapping is 6413 * returned as an equivalent titlecase mapping. If the 6414 * {@code char} argument is already a titlecase 6415 * {@code char}, the same {@code char} value will be 6416 * returned. 6417 * <p> 6418 * Note that 6419 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 6420 * does not always return {@code true} for some ranges of 6421 * characters. 6422 * 6423 * <p><b>Note:</b> This method cannot handle <a 6424 * href="#supplementary"> supplementary characters</a>. To support 6425 * all Unicode characters, including supplementary characters, use 6426 * the {@link #toTitleCase(int)} method. 6427 * 6428 * @param ch the character to be converted. 6429 * @return the titlecase equivalent of the character, if any; 6430 * otherwise, the character itself. 6431 * @see Character#isTitleCase(char) 6432 * @see Character#toLowerCase(char) 6433 * @see Character#toUpperCase(char) 6434 * @since 1.0.2 6435 */ 6436 public static char toTitleCase(char ch) { 6437 return (char)toTitleCase((int)ch); 6438 } 6439 6440 /** 6441 * Converts the character (Unicode code point) argument to titlecase using case mapping 6442 * information from the UnicodeData file. If a character has no 6443 * explicit titlecase mapping and is not itself a titlecase char 6444 * according to UnicodeData, then the uppercase mapping is 6445 * returned as an equivalent titlecase mapping. If the 6446 * character argument is already a titlecase 6447 * character, the same character value will be 6448 * returned. 6449 * 6450 * <p>Note that 6451 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 6452 * does not always return {@code true} for some ranges of 6453 * characters. 6454 * 6455 * @param codePoint the character (Unicode code point) to be converted. 6456 * @return the titlecase equivalent of the character, if any; 6457 * otherwise, the character itself. 6458 * @see Character#isTitleCase(int) 6459 * @see Character#toLowerCase(int) 6460 * @see Character#toUpperCase(int) 6461 * @since 1.5 6462 */ 6463 public static int toTitleCase(int codePoint) { 6464 return CharacterData.of(codePoint).toTitleCase(codePoint); 6465 } 6466 6467 /** 6468 * Returns the numeric value of the character {@code ch} in the 6469 * specified radix. 6470 * <p> 6471 * If the radix is not in the range {@code MIN_RADIX} ≤ 6472 * {@code radix} ≤ {@code MAX_RADIX} or if the 6473 * value of {@code ch} is not a valid digit in the specified 6474 * radix, {@code -1} is returned. A character is a valid digit 6475 * if at least one of the following is true: 6476 * <ul> 6477 * <li>The method {@code isDigit} is {@code true} of the character 6478 * and the Unicode decimal digit value of the character (or its 6479 * single-character decomposition) is less than the specified radix. 6480 * In this case the decimal digit value is returned. 6481 * <li>The character is one of the uppercase Latin letters 6482 * {@code 'A'} through {@code 'Z'} and its code is less than 6483 * {@code radix + 'A' - 10}. 6484 * In this case, {@code ch - 'A' + 10} 6485 * is returned. 6486 * <li>The character is one of the lowercase Latin letters 6487 * {@code 'a'} through {@code 'z'} and its code is less than 6488 * {@code radix + 'a' - 10}. 6489 * In this case, {@code ch - 'a' + 10} 6490 * is returned. 6491 * <li>The character is one of the fullwidth uppercase Latin letters A 6492 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6493 * and its code is less than 6494 * {@code radix + '\u005CuFF21' - 10}. 6495 * In this case, {@code ch - '\u005CuFF21' + 10} 6496 * is returned. 6497 * <li>The character is one of the fullwidth lowercase Latin letters a 6498 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6499 * and its code is less than 6500 * {@code radix + '\u005CuFF41' - 10}. 6501 * In this case, {@code ch - '\u005CuFF41' + 10} 6502 * is returned. 6503 * </ul> 6504 * 6505 * <p><b>Note:</b> This method cannot handle <a 6506 * href="#supplementary"> supplementary characters</a>. To support 6507 * all Unicode characters, including supplementary characters, use 6508 * the {@link #digit(int, int)} method. 6509 * 6510 * @param ch the character to be converted. 6511 * @param radix the radix. 6512 * @return the numeric value represented by the character in the 6513 * specified radix. 6514 * @see Character#forDigit(int, int) 6515 * @see Character#isDigit(char) 6516 */ 6517 public static int digit(char ch, int radix) { 6518 return digit((int)ch, radix); 6519 } 6520 6521 /** 6522 * Returns the numeric value of the specified character (Unicode 6523 * code point) in the specified radix. 6524 * 6525 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 6526 * {@code radix} ≤ {@code MAX_RADIX} or if the 6527 * character is not a valid digit in the specified 6528 * radix, {@code -1} is returned. A character is a valid digit 6529 * if at least one of the following is true: 6530 * <ul> 6531 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 6532 * and the Unicode decimal digit value of the character (or its 6533 * single-character decomposition) is less than the specified radix. 6534 * In this case the decimal digit value is returned. 6535 * <li>The character is one of the uppercase Latin letters 6536 * {@code 'A'} through {@code 'Z'} and its code is less than 6537 * {@code radix + 'A' - 10}. 6538 * In this case, {@code codePoint - 'A' + 10} 6539 * is returned. 6540 * <li>The character is one of the lowercase Latin letters 6541 * {@code 'a'} through {@code 'z'} and its code is less than 6542 * {@code radix + 'a' - 10}. 6543 * In this case, {@code codePoint - 'a' + 10} 6544 * is returned. 6545 * <li>The character is one of the fullwidth uppercase Latin letters A 6546 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6547 * and its code is less than 6548 * {@code radix + '\u005CuFF21' - 10}. 6549 * In this case, 6550 * {@code codePoint - '\u005CuFF21' + 10} 6551 * is returned. 6552 * <li>The character is one of the fullwidth lowercase Latin letters a 6553 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6554 * and its code is less than 6555 * {@code radix + '\u005CuFF41'- 10}. 6556 * In this case, 6557 * {@code codePoint - '\u005CuFF41' + 10} 6558 * is returned. 6559 * </ul> 6560 * 6561 * @param codePoint the character (Unicode code point) to be converted. 6562 * @param radix the radix. 6563 * @return the numeric value represented by the character in the 6564 * specified radix. 6565 * @see Character#forDigit(int, int) 6566 * @see Character#isDigit(int) 6567 * @since 1.5 6568 */ 6569 public static int digit(int codePoint, int radix) { 6570 return CharacterData.of(codePoint).digit(codePoint, radix); 6571 } 6572 6573 /** 6574 * Returns the {@code int} value that the specified Unicode 6575 * character represents. For example, the character 6576 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 6577 * an int with a value of 50. 6578 * <p> 6579 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6580 * {@code '\u005Cu005A'}), lowercase 6581 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6582 * full width variant ({@code '\u005CuFF21'} through 6583 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6584 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6585 * through 35. This is independent of the Unicode specification, 6586 * which does not assign numeric values to these {@code char} 6587 * values. 6588 * <p> 6589 * If the character does not have a numeric value, then -1 is returned. 6590 * If the character has a numeric value that cannot be represented as a 6591 * nonnegative integer (for example, a fractional value), then -2 6592 * is returned. 6593 * 6594 * <p><b>Note:</b> This method cannot handle <a 6595 * href="#supplementary"> supplementary characters</a>. To support 6596 * all Unicode characters, including supplementary characters, use 6597 * the {@link #getNumericValue(int)} method. 6598 * 6599 * @param ch the character to be converted. 6600 * @return the numeric value of the character, as a nonnegative {@code int} 6601 * value; -2 if the character has a numeric value that is not a 6602 * nonnegative integer; -1 if the character has no numeric value. 6603 * @see Character#forDigit(int, int) 6604 * @see Character#isDigit(char) 6605 * @since 1.1 6606 */ 6607 public static int getNumericValue(char ch) { 6608 return getNumericValue((int)ch); 6609 } 6610 6611 /** 6612 * Returns the {@code int} value that the specified 6613 * character (Unicode code point) represents. For example, the character 6614 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 6615 * an {@code int} with a value of 50. 6616 * <p> 6617 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6618 * {@code '\u005Cu005A'}), lowercase 6619 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6620 * full width variant ({@code '\u005CuFF21'} through 6621 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6622 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6623 * through 35. This is independent of the Unicode specification, 6624 * which does not assign numeric values to these {@code char} 6625 * values. 6626 * <p> 6627 * If the character does not have a numeric value, then -1 is returned. 6628 * If the character has a numeric value that cannot be represented as a 6629 * nonnegative integer (for example, a fractional value), then -2 6630 * is returned. 6631 * 6632 * @param codePoint the character (Unicode code point) to be converted. 6633 * @return the numeric value of the character, as a nonnegative {@code int} 6634 * value; -2 if the character has a numeric value that is not a 6635 * nonnegative integer; -1 if the character has no numeric value. 6636 * @see Character#forDigit(int, int) 6637 * @see Character#isDigit(int) 6638 * @since 1.5 6639 */ 6640 public static int getNumericValue(int codePoint) { 6641 return CharacterData.of(codePoint).getNumericValue(codePoint); 6642 } 6643 6644 /** 6645 * Determines if the specified character is ISO-LATIN-1 white space. 6646 * This method returns {@code true} for the following five 6647 * characters only: 6648 * <table summary="truechars"> 6649 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 6650 * <td>{@code HORIZONTAL TABULATION}</td></tr> 6651 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 6652 * <td>{@code NEW LINE}</td></tr> 6653 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 6654 * <td>{@code FORM FEED}</td></tr> 6655 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 6656 * <td>{@code CARRIAGE RETURN}</td></tr> 6657 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 6658 * <td>{@code SPACE}</td></tr> 6659 * </table> 6660 * 6661 * @param ch the character to be tested. 6662 * @return {@code true} if the character is ISO-LATIN-1 white 6663 * space; {@code false} otherwise. 6664 * @see Character#isSpaceChar(char) 6665 * @see Character#isWhitespace(char) 6666 * @deprecated Replaced by isWhitespace(char). 6667 */ 6668 @Deprecated 6669 public static boolean isSpace(char ch) { 6670 return (ch <= 0x0020) && 6671 (((((1L << 0x0009) | 6672 (1L << 0x000A) | 6673 (1L << 0x000C) | 6674 (1L << 0x000D) | 6675 (1L << 0x0020)) >> ch) & 1L) != 0); 6676 } 6677 6678 6679 /** 6680 * Determines if the specified character is a Unicode space character. 6681 * A character is considered to be a space character if and only if 6682 * it is specified to be a space character by the Unicode Standard. This 6683 * method returns true if the character's general category type is any of 6684 * the following: 6685 * <ul> 6686 * <li> {@code SPACE_SEPARATOR} 6687 * <li> {@code LINE_SEPARATOR} 6688 * <li> {@code PARAGRAPH_SEPARATOR} 6689 * </ul> 6690 * 6691 * <p><b>Note:</b> This method cannot handle <a 6692 * href="#supplementary"> supplementary characters</a>. To support 6693 * all Unicode characters, including supplementary characters, use 6694 * the {@link #isSpaceChar(int)} method. 6695 * 6696 * @param ch the character to be tested. 6697 * @return {@code true} if the character is a space character; 6698 * {@code false} otherwise. 6699 * @see Character#isWhitespace(char) 6700 * @since 1.1 6701 */ 6702 public static boolean isSpaceChar(char ch) { 6703 return isSpaceChar((int)ch); 6704 } 6705 6706 /** 6707 * Determines if the specified character (Unicode code point) is a 6708 * Unicode space character. A character is considered to be a 6709 * space character if and only if it is specified to be a space 6710 * character by the Unicode Standard. This method returns true if 6711 * the character's general category type is any of the following: 6712 * 6713 * <ul> 6714 * <li> {@link #SPACE_SEPARATOR} 6715 * <li> {@link #LINE_SEPARATOR} 6716 * <li> {@link #PARAGRAPH_SEPARATOR} 6717 * </ul> 6718 * 6719 * @param codePoint the character (Unicode code point) to be tested. 6720 * @return {@code true} if the character is a space character; 6721 * {@code false} otherwise. 6722 * @see Character#isWhitespace(int) 6723 * @since 1.5 6724 */ 6725 public static boolean isSpaceChar(int codePoint) { 6726 return ((((1 << Character.SPACE_SEPARATOR) | 6727 (1 << Character.LINE_SEPARATOR) | 6728 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6729 != 0; 6730 } 6731 6732 /** 6733 * Determines if the specified character is white space according to Java. 6734 * A character is a Java whitespace character if and only if it satisfies 6735 * one of the following criteria: 6736 * <ul> 6737 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 6738 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 6739 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6740 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6741 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6742 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6743 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6744 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6745 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6746 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6747 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6748 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6749 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6750 * </ul> 6751 * 6752 * <p><b>Note:</b> This method cannot handle <a 6753 * href="#supplementary"> supplementary characters</a>. To support 6754 * all Unicode characters, including supplementary characters, use 6755 * the {@link #isWhitespace(int)} method. 6756 * 6757 * @param ch the character to be tested. 6758 * @return {@code true} if the character is a Java whitespace 6759 * character; {@code false} otherwise. 6760 * @see Character#isSpaceChar(char) 6761 * @since 1.1 6762 */ 6763 public static boolean isWhitespace(char ch) { 6764 return isWhitespace((int)ch); 6765 } 6766 6767 /** 6768 * Determines if the specified character (Unicode code point) is 6769 * white space according to Java. A character is a Java 6770 * whitespace character if and only if it satisfies one of the 6771 * following criteria: 6772 * <ul> 6773 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6774 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6775 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6776 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6777 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6778 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6779 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6780 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6781 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6782 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6783 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6784 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6785 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6786 * </ul> 6787 * 6788 * @param codePoint the character (Unicode code point) to be tested. 6789 * @return {@code true} if the character is a Java whitespace 6790 * character; {@code false} otherwise. 6791 * @see Character#isSpaceChar(int) 6792 * @since 1.5 6793 */ 6794 public static boolean isWhitespace(int codePoint) { 6795 return CharacterData.of(codePoint).isWhitespace(codePoint); 6796 } 6797 6798 /** 6799 * Determines if the specified character is an ISO control 6800 * character. A character is considered to be an ISO control 6801 * character if its code is in the range {@code '\u005Cu0000'} 6802 * through {@code '\u005Cu001F'} or in the range 6803 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6804 * 6805 * <p><b>Note:</b> This method cannot handle <a 6806 * href="#supplementary"> supplementary characters</a>. To support 6807 * all Unicode characters, including supplementary characters, use 6808 * the {@link #isISOControl(int)} method. 6809 * 6810 * @param ch the character to be tested. 6811 * @return {@code true} if the character is an ISO control character; 6812 * {@code false} otherwise. 6813 * 6814 * @see Character#isSpaceChar(char) 6815 * @see Character#isWhitespace(char) 6816 * @since 1.1 6817 */ 6818 public static boolean isISOControl(char ch) { 6819 return isISOControl((int)ch); 6820 } 6821 6822 /** 6823 * Determines if the referenced character (Unicode code point) is an ISO control 6824 * character. A character is considered to be an ISO control 6825 * character if its code is in the range {@code '\u005Cu0000'} 6826 * through {@code '\u005Cu001F'} or in the range 6827 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6828 * 6829 * @param codePoint the character (Unicode code point) to be tested. 6830 * @return {@code true} if the character is an ISO control character; 6831 * {@code false} otherwise. 6832 * @see Character#isSpaceChar(int) 6833 * @see Character#isWhitespace(int) 6834 * @since 1.5 6835 */ 6836 public static boolean isISOControl(int codePoint) { 6837 // Optimized form of: 6838 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6839 // (codePoint >= 0x7F && codePoint <= 0x9F); 6840 return codePoint <= 0x9F && 6841 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6842 } 6843 6844 /** 6845 * Returns a value indicating a character's general category. 6846 * 6847 * <p><b>Note:</b> This method cannot handle <a 6848 * href="#supplementary"> supplementary characters</a>. To support 6849 * all Unicode characters, including supplementary characters, use 6850 * the {@link #getType(int)} method. 6851 * 6852 * @param ch the character to be tested. 6853 * @return a value of type {@code int} representing the 6854 * character's general category. 6855 * @see Character#COMBINING_SPACING_MARK 6856 * @see Character#CONNECTOR_PUNCTUATION 6857 * @see Character#CONTROL 6858 * @see Character#CURRENCY_SYMBOL 6859 * @see Character#DASH_PUNCTUATION 6860 * @see Character#DECIMAL_DIGIT_NUMBER 6861 * @see Character#ENCLOSING_MARK 6862 * @see Character#END_PUNCTUATION 6863 * @see Character#FINAL_QUOTE_PUNCTUATION 6864 * @see Character#FORMAT 6865 * @see Character#INITIAL_QUOTE_PUNCTUATION 6866 * @see Character#LETTER_NUMBER 6867 * @see Character#LINE_SEPARATOR 6868 * @see Character#LOWERCASE_LETTER 6869 * @see Character#MATH_SYMBOL 6870 * @see Character#MODIFIER_LETTER 6871 * @see Character#MODIFIER_SYMBOL 6872 * @see Character#NON_SPACING_MARK 6873 * @see Character#OTHER_LETTER 6874 * @see Character#OTHER_NUMBER 6875 * @see Character#OTHER_PUNCTUATION 6876 * @see Character#OTHER_SYMBOL 6877 * @see Character#PARAGRAPH_SEPARATOR 6878 * @see Character#PRIVATE_USE 6879 * @see Character#SPACE_SEPARATOR 6880 * @see Character#START_PUNCTUATION 6881 * @see Character#SURROGATE 6882 * @see Character#TITLECASE_LETTER 6883 * @see Character#UNASSIGNED 6884 * @see Character#UPPERCASE_LETTER 6885 * @since 1.1 6886 */ 6887 public static int getType(char ch) { 6888 return getType((int)ch); 6889 } 6890 6891 /** 6892 * Returns a value indicating a character's general category. 6893 * 6894 * @param codePoint the character (Unicode code point) to be tested. 6895 * @return a value of type {@code int} representing the 6896 * character's general category. 6897 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6898 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6899 * @see Character#CONTROL CONTROL 6900 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6901 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6902 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6903 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6904 * @see Character#END_PUNCTUATION END_PUNCTUATION 6905 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6906 * @see Character#FORMAT FORMAT 6907 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6908 * @see Character#LETTER_NUMBER LETTER_NUMBER 6909 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6910 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6911 * @see Character#MATH_SYMBOL MATH_SYMBOL 6912 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6913 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6914 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6915 * @see Character#OTHER_LETTER OTHER_LETTER 6916 * @see Character#OTHER_NUMBER OTHER_NUMBER 6917 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6918 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6919 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6920 * @see Character#PRIVATE_USE PRIVATE_USE 6921 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6922 * @see Character#START_PUNCTUATION START_PUNCTUATION 6923 * @see Character#SURROGATE SURROGATE 6924 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6925 * @see Character#UNASSIGNED UNASSIGNED 6926 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6927 * @since 1.5 6928 */ 6929 public static int getType(int codePoint) { 6930 return CharacterData.of(codePoint).getType(codePoint); 6931 } 6932 6933 /** 6934 * Determines the character representation for a specific digit in 6935 * the specified radix. If the value of {@code radix} is not a 6936 * valid radix, or the value of {@code digit} is not a valid 6937 * digit in the specified radix, the null character 6938 * ({@code '\u005Cu0000'}) is returned. 6939 * <p> 6940 * The {@code radix} argument is valid if it is greater than or 6941 * equal to {@code MIN_RADIX} and less than or equal to 6942 * {@code MAX_RADIX}. The {@code digit} argument is valid if 6943 * {@code 0 <= digit < radix}. 6944 * <p> 6945 * If the digit is less than 10, then 6946 * {@code '0' + digit} is returned. Otherwise, the value 6947 * {@code 'a' + digit - 10} is returned. 6948 * 6949 * @param digit the number to convert to a character. 6950 * @param radix the radix. 6951 * @return the {@code char} representation of the specified digit 6952 * in the specified radix. 6953 * @see Character#MIN_RADIX 6954 * @see Character#MAX_RADIX 6955 * @see Character#digit(char, int) 6956 */ 6957 public static char forDigit(int digit, int radix) { 6958 if ((digit >= radix) || (digit < 0)) { 6959 return '\0'; 6960 } 6961 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6962 return '\0'; 6963 } 6964 if (digit < 10) { 6965 return (char)('0' + digit); 6966 } 6967 return (char)('a' - 10 + digit); 6968 } 6969 6970 /** 6971 * Returns the Unicode directionality property for the given 6972 * character. Character directionality is used to calculate the 6973 * visual ordering of text. The directionality value of undefined 6974 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 6975 * 6976 * <p><b>Note:</b> This method cannot handle <a 6977 * href="#supplementary"> supplementary characters</a>. To support 6978 * all Unicode characters, including supplementary characters, use 6979 * the {@link #getDirectionality(int)} method. 6980 * 6981 * @param ch {@code char} for which the directionality property 6982 * is requested. 6983 * @return the directionality property of the {@code char} value. 6984 * 6985 * @see Character#DIRECTIONALITY_UNDEFINED 6986 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6987 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6988 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6989 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6990 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6991 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6992 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6993 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6994 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6995 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6996 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6997 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6998 * @see Character#DIRECTIONALITY_WHITESPACE 6999 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 7000 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 7001 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 7002 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 7003 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7004 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7005 * @since 1.4 7006 */ 7007 public static byte getDirectionality(char ch) { 7008 return getDirectionality((int)ch); 7009 } 7010 7011 /** 7012 * Returns the Unicode directionality property for the given 7013 * character (Unicode code point). Character directionality is 7014 * used to calculate the visual ordering of text. The 7015 * directionality value of undefined character is {@link 7016 * #DIRECTIONALITY_UNDEFINED}. 7017 * 7018 * @param codePoint the character (Unicode code point) for which 7019 * the directionality property is requested. 7020 * @return the directionality property of the character. 7021 * 7022 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 7023 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 7024 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 7025 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 7026 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 7027 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 7028 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 7029 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 7030 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 7031 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 7032 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 7033 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 7034 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 7035 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 7036 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 7037 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 7038 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 7039 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 7040 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7041 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7042 * @since 1.5 7043 */ 7044 public static byte getDirectionality(int codePoint) { 7045 return CharacterData.of(codePoint).getDirectionality(codePoint); 7046 } 7047 7048 /** 7049 * Determines whether the character is mirrored according to the 7050 * Unicode specification. Mirrored characters should have their 7051 * glyphs horizontally mirrored when displayed in text that is 7052 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 7053 * PARENTHESIS is semantically defined to be an <i>opening 7054 * parenthesis</i>. This will appear as a "(" in text that is 7055 * left-to-right but as a ")" in text that is right-to-left. 7056 * 7057 * <p><b>Note:</b> This method cannot handle <a 7058 * href="#supplementary"> supplementary characters</a>. To support 7059 * all Unicode characters, including supplementary characters, use 7060 * the {@link #isMirrored(int)} method. 7061 * 7062 * @param ch {@code char} for which the mirrored property is requested 7063 * @return {@code true} if the char is mirrored, {@code false} 7064 * if the {@code char} is not mirrored or is not defined. 7065 * @since 1.4 7066 */ 7067 public static boolean isMirrored(char ch) { 7068 return isMirrored((int)ch); 7069 } 7070 7071 /** 7072 * Determines whether the specified character (Unicode code point) 7073 * is mirrored according to the Unicode specification. Mirrored 7074 * characters should have their glyphs horizontally mirrored when 7075 * displayed in text that is right-to-left. For example, 7076 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 7077 * defined to be an <i>opening parenthesis</i>. This will appear 7078 * as a "(" in text that is left-to-right but as a ")" in text 7079 * that is right-to-left. 7080 * 7081 * @param codePoint the character (Unicode code point) to be tested. 7082 * @return {@code true} if the character is mirrored, {@code false} 7083 * if the character is not mirrored or is not defined. 7084 * @since 1.5 7085 */ 7086 public static boolean isMirrored(int codePoint) { 7087 return CharacterData.of(codePoint).isMirrored(codePoint); 7088 } 7089 7090 /** 7091 * Compares two {@code Character} objects numerically. 7092 * 7093 * @param anotherCharacter the {@code Character} to be compared. 7094 7095 * @return the value {@code 0} if the argument {@code Character} 7096 * is equal to this {@code Character}; a value less than 7097 * {@code 0} if this {@code Character} is numerically less 7098 * than the {@code Character} argument; and a value greater than 7099 * {@code 0} if this {@code Character} is numerically greater 7100 * than the {@code Character} argument (unsigned comparison). 7101 * Note that this is strictly a numerical comparison; it is not 7102 * locale-dependent. 7103 * @since 1.2 7104 */ 7105 public int compareTo(Character anotherCharacter) { 7106 return compare(this.value, anotherCharacter.value); 7107 } 7108 7109 /** 7110 * Compares two {@code char} values numerically. 7111 * The value returned is identical to what would be returned by: 7112 * <pre> 7113 * Character.valueOf(x).compareTo(Character.valueOf(y)) 7114 * </pre> 7115 * 7116 * @param x the first {@code char} to compare 7117 * @param y the second {@code char} to compare 7118 * @return the value {@code 0} if {@code x == y}; 7119 * a value less than {@code 0} if {@code x < y}; and 7120 * a value greater than {@code 0} if {@code x > y} 7121 * @since 1.7 7122 */ 7123 public static int compare(char x, char y) { 7124 return x - y; 7125 } 7126 7127 /** 7128 * Converts the character (Unicode code point) argument to uppercase using 7129 * information from the UnicodeData file. 7130 * 7131 * @param codePoint the character (Unicode code point) to be converted. 7132 * @return either the uppercase equivalent of the character, if 7133 * any, or an error flag ({@code Character.ERROR}) 7134 * that indicates that a 1:M {@code char} mapping exists. 7135 * @see Character#isLowerCase(char) 7136 * @see Character#isUpperCase(char) 7137 * @see Character#toLowerCase(char) 7138 * @see Character#toTitleCase(char) 7139 * @since 1.4 7140 */ 7141 static int toUpperCaseEx(int codePoint) { 7142 assert isValidCodePoint(codePoint); 7143 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 7144 } 7145 7146 /** 7147 * Converts the character (Unicode code point) argument to uppercase using case 7148 * mapping information from the SpecialCasing file in the Unicode 7149 * specification. If a character has no explicit uppercase 7150 * mapping, then the {@code char} itself is returned in the 7151 * {@code char[]}. 7152 * 7153 * @param codePoint the character (Unicode code point) to be converted. 7154 * @return a {@code char[]} with the uppercased character. 7155 * @since 1.4 7156 */ 7157 static char[] toUpperCaseCharArray(int codePoint) { 7158 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 7159 assert isBmpCodePoint(codePoint); 7160 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 7161 } 7162 7163 /** 7164 * The number of bits used to represent a <tt>char</tt> value in unsigned 7165 * binary form, constant {@code 16}. 7166 * 7167 * @since 1.5 7168 */ 7169 public static final int SIZE = 16; 7170 7171 /** 7172 * The number of bytes used to represent a {@code char} value in unsigned 7173 * binary form. 7174 * 7175 * @since 1.8 7176 */ 7177 public static final int BYTES = SIZE / Byte.SIZE; 7178 7179 /** 7180 * Returns the value obtained by reversing the order of the bytes in the 7181 * specified <tt>char</tt> value. 7182 * 7183 * @param ch The {@code char} of which to reverse the byte order. 7184 * @return the value obtained by reversing (or, equivalently, swapping) 7185 * the bytes in the specified <tt>char</tt> value. 7186 * @since 1.5 7187 */ 7188 @HotSpotIntrinsicCandidate 7189 public static char reverseBytes(char ch) { 7190 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 7191 } 7192 7193 /** 7194 * Returns the Unicode name of the specified character 7195 * {@code codePoint}, or null if the code point is 7196 * {@link #UNASSIGNED unassigned}. 7197 * <p> 7198 * Note: if the specified character is not assigned a name by 7199 * the <i>UnicodeData</i> file (part of the Unicode Character 7200 * Database maintained by the Unicode Consortium), the returned 7201 * name is the same as the result of expression. 7202 * 7203 * <blockquote>{@code 7204 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 7205 * + " " 7206 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7207 * 7208 * }</blockquote> 7209 * 7210 * @param codePoint the character (Unicode code point) 7211 * 7212 * @return the Unicode name of the specified character, or null if 7213 * the code point is unassigned. 7214 * 7215 * @exception IllegalArgumentException if the specified 7216 * {@code codePoint} is not a valid Unicode 7217 * code point. 7218 * 7219 * @since 1.7 7220 */ 7221 public static String getName(int codePoint) { 7222 if (!isValidCodePoint(codePoint)) { 7223 throw new IllegalArgumentException(); 7224 } 7225 String name = CharacterName.get(codePoint); 7226 if (name != null) 7227 return name; 7228 if (getType(codePoint) == UNASSIGNED) 7229 return null; 7230 UnicodeBlock block = UnicodeBlock.of(codePoint); 7231 if (block != null) 7232 return block.toString().replace('_', ' ') + " " 7233 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7234 // should never come here 7235 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7236 } 7237 }