1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The {@code Character} class wraps a value of the primitive 35 * type {@code char} in an object. An object of type 36 * {@code Character} contains a single field whose type is 37 * {@code char}. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.2.0. 44 * <p> 45 * The methods and data of class {@code Character} are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h3><a name="unicode">Unicode Character Representations</a></h3> 58 * 59 * <p>The {@code char} data type (and therefore the value that a 60 * {@code Character} object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode Standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * Standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in {@code char} arrays and 76 * in the {@code String} and {@code StringBuffer} classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of {@code char} values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A {@code char} value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * {@code int} value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of {@code int} are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate {@code char} values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a {@code char} value cannot support 95 * supplementary characters. They treat {@code char} values from the 96 * surrogate ranges as undefined characters. For example, 97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an {@code int} value support all 102 * Unicode characters, including supplementary characters. For 103 * example, {@code Character.isLetter(0x2F81A)} returns 104 * {@code true} because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * {@code char} values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * {@code digit} method, the {@code forDigit} method, and the 129 * {@code toString} method of class {@code Integer}. 130 * 131 * @see Character#digit(char, int) 132 * @see Character#forDigit(int, int) 133 * @see Integer#toString(int, int) 134 * @see Integer#valueOf(String) 135 */ 136 public static final int MIN_RADIX = 2; 137 138 /** 139 * The maximum radix available for conversion to and from strings. 140 * The constant value of this field is the largest value permitted 141 * for the radix argument in radix-conversion methods such as the 142 * {@code digit} method, the {@code forDigit} method, and the 143 * {@code toString} method of class {@code Integer}. 144 * 145 * @see Character#digit(char, int) 146 * @see Character#forDigit(int, int) 147 * @see Integer#toString(int, int) 148 * @see Integer#valueOf(String) 149 */ 150 public static final int MAX_RADIX = 36; 151 152 /** 153 * The constant value of this field is the smallest value of type 154 * {@code char}, {@code '\u005Cu0000'}. 155 * 156 * @since 1.0.2 157 */ 158 public static final char MIN_VALUE = '\u0000'; 159 160 /** 161 * The constant value of this field is the largest value of type 162 * {@code char}, {@code '\u005CuFFFF'}. 163 * 164 * @since 1.0.2 165 */ 166 public static final char MAX_VALUE = '\uFFFF'; 167 168 /** 169 * The {@code Class} instance representing the primitive type 170 * {@code char}. 171 * 172 * @since 1.1 173 */ 174 @SuppressWarnings("unchecked") 175 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 176 177 /* 178 * Normative general types 179 */ 180 181 /* 182 * General character types 183 */ 184 185 /** 186 * General category "Cn" in the Unicode specification. 187 * @since 1.1 188 */ 189 public static final byte UNASSIGNED = 0; 190 191 /** 192 * General category "Lu" in the Unicode specification. 193 * @since 1.1 194 */ 195 public static final byte UPPERCASE_LETTER = 1; 196 197 /** 198 * General category "Ll" in the Unicode specification. 199 * @since 1.1 200 */ 201 public static final byte LOWERCASE_LETTER = 2; 202 203 /** 204 * General category "Lt" in the Unicode specification. 205 * @since 1.1 206 */ 207 public static final byte TITLECASE_LETTER = 3; 208 209 /** 210 * General category "Lm" in the Unicode specification. 211 * @since 1.1 212 */ 213 public static final byte MODIFIER_LETTER = 4; 214 215 /** 216 * General category "Lo" in the Unicode specification. 217 * @since 1.1 218 */ 219 public static final byte OTHER_LETTER = 5; 220 221 /** 222 * General category "Mn" in the Unicode specification. 223 * @since 1.1 224 */ 225 public static final byte NON_SPACING_MARK = 6; 226 227 /** 228 * General category "Me" in the Unicode specification. 229 * @since 1.1 230 */ 231 public static final byte ENCLOSING_MARK = 7; 232 233 /** 234 * General category "Mc" in the Unicode specification. 235 * @since 1.1 236 */ 237 public static final byte COMBINING_SPACING_MARK = 8; 238 239 /** 240 * General category "Nd" in the Unicode specification. 241 * @since 1.1 242 */ 243 public static final byte DECIMAL_DIGIT_NUMBER = 9; 244 245 /** 246 * General category "Nl" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte LETTER_NUMBER = 10; 250 251 /** 252 * General category "No" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte OTHER_NUMBER = 11; 256 257 /** 258 * General category "Zs" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte SPACE_SEPARATOR = 12; 262 263 /** 264 * General category "Zl" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte LINE_SEPARATOR = 13; 268 269 /** 270 * General category "Zp" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte PARAGRAPH_SEPARATOR = 14; 274 275 /** 276 * General category "Cc" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte CONTROL = 15; 280 281 /** 282 * General category "Cf" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte FORMAT = 16; 286 287 /** 288 * General category "Co" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte PRIVATE_USE = 18; 292 293 /** 294 * General category "Cs" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte SURROGATE = 19; 298 299 /** 300 * General category "Pd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DASH_PUNCTUATION = 20; 304 305 /** 306 * General category "Ps" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte START_PUNCTUATION = 21; 310 311 /** 312 * General category "Pe" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte END_PUNCTUATION = 22; 316 317 /** 318 * General category "Pc" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte CONNECTOR_PUNCTUATION = 23; 322 323 /** 324 * General category "Po" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte OTHER_PUNCTUATION = 24; 328 329 /** 330 * General category "Sm" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte MATH_SYMBOL = 25; 334 335 /** 336 * General category "Sc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CURRENCY_SYMBOL = 26; 340 341 /** 342 * General category "Sk" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte MODIFIER_SYMBOL = 27; 346 347 /** 348 * General category "So" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte OTHER_SYMBOL = 28; 352 353 /** 354 * General category "Pi" in the Unicode specification. 355 * @since 1.4 356 */ 357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 358 359 /** 360 * General category "Pf" in the Unicode specification. 361 * @since 1.4 362 */ 363 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 364 365 /** 366 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 367 */ 368 static final int ERROR = 0xFFFFFFFF; 369 370 371 /** 372 * Undefined bidirectional character type. Undefined {@code char} 373 * values have undefined directionality in the Unicode specification. 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_UNDEFINED = -1; 377 378 /** 379 * Strong bidirectional character type "L" in the Unicode specification. 380 * @since 1.4 381 */ 382 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 383 384 /** 385 * Strong bidirectional character type "R" in the Unicode specification. 386 * @since 1.4 387 */ 388 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 389 390 /** 391 * Strong bidirectional character type "AL" in the Unicode specification. 392 * @since 1.4 393 */ 394 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 395 396 /** 397 * Weak bidirectional character type "EN" in the Unicode specification. 398 * @since 1.4 399 */ 400 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 401 402 /** 403 * Weak bidirectional character type "ES" in the Unicode specification. 404 * @since 1.4 405 */ 406 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 407 408 /** 409 * Weak bidirectional character type "ET" in the Unicode specification. 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 413 414 /** 415 * Weak bidirectional character type "AN" in the Unicode specification. 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 419 420 /** 421 * Weak bidirectional character type "CS" in the Unicode specification. 422 * @since 1.4 423 */ 424 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 425 426 /** 427 * Weak bidirectional character type "NSM" in the Unicode specification. 428 * @since 1.4 429 */ 430 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 431 432 /** 433 * Weak bidirectional character type "BN" in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 437 438 /** 439 * Neutral bidirectional character type "B" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 443 444 /** 445 * Neutral bidirectional character type "S" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 449 450 /** 451 * Neutral bidirectional character type "WS" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_WHITESPACE = 12; 455 456 /** 457 * Neutral bidirectional character type "ON" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 461 462 /** 463 * Strong bidirectional character type "LRE" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 467 468 /** 469 * Strong bidirectional character type "LRO" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 473 474 /** 475 * Strong bidirectional character type "RLE" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 479 480 /** 481 * Strong bidirectional character type "RLO" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 485 486 /** 487 * Weak bidirectional character type "PDF" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 491 492 /** 493 * The minimum value of a 494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 495 * Unicode high-surrogate code unit</a> 496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 497 * A high-surrogate is also known as a <i>leading-surrogate</i>. 498 * 499 * @since 1.5 500 */ 501 public static final char MIN_HIGH_SURROGATE = '\uD800'; 502 503 /** 504 * The maximum value of a 505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 506 * Unicode high-surrogate code unit</a> 507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 508 * A high-surrogate is also known as a <i>leading-surrogate</i>. 509 * 510 * @since 1.5 511 */ 512 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 513 514 /** 515 * The minimum value of a 516 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 517 * Unicode low-surrogate code unit</a> 518 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 519 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 520 * 521 * @since 1.5 522 */ 523 public static final char MIN_LOW_SURROGATE = '\uDC00'; 524 525 /** 526 * The maximum value of a 527 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 528 * Unicode low-surrogate code unit</a> 529 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 530 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 531 * 532 * @since 1.5 533 */ 534 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 535 536 /** 537 * The minimum value of a Unicode surrogate code unit in the 538 * UTF-16 encoding, constant {@code '\u005CuD800'}. 539 * 540 * @since 1.5 541 */ 542 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 543 544 /** 545 * The maximum value of a Unicode surrogate code unit in the 546 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 547 * 548 * @since 1.5 549 */ 550 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 551 552 /** 553 * The minimum value of a 554 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 555 * Unicode supplementary code point</a>, constant {@code U+10000}. 556 * 557 * @since 1.5 558 */ 559 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 560 561 /** 562 * The minimum value of a 563 * <a href="http://www.unicode.org/glossary/#code_point"> 564 * Unicode code point</a>, constant {@code U+0000}. 565 * 566 * @since 1.5 567 */ 568 public static final int MIN_CODE_POINT = 0x000000; 569 570 /** 571 * The maximum value of a 572 * <a href="http://www.unicode.org/glossary/#code_point"> 573 * Unicode code point</a>, constant {@code U+10FFFF}. 574 * 575 * @since 1.5 576 */ 577 public static final int MAX_CODE_POINT = 0X10FFFF; 578 579 580 /** 581 * Instances of this class represent particular subsets of the Unicode 582 * character set. The only family of subsets defined in the 583 * {@code Character} class is {@link Character.UnicodeBlock}. 584 * Other portions of the Java API may define other subsets for their 585 * own purposes. 586 * 587 * @since 1.2 588 */ 589 public static class Subset { 590 591 private String name; 592 593 /** 594 * Constructs a new {@code Subset} instance. 595 * 596 * @param name The name of this subset 597 * @exception NullPointerException if name is {@code null} 598 */ 599 protected Subset(String name) { 600 if (name == null) { 601 throw new NullPointerException("name"); 602 } 603 this.name = name; 604 } 605 606 /** 607 * Compares two {@code Subset} objects for equality. 608 * This method returns {@code true} if and only if 609 * {@code this} and the argument refer to the same 610 * object; since this method is {@code final}, this 611 * guarantee holds for all subclasses. 612 */ 613 public final boolean equals(Object obj) { 614 return (this == obj); 615 } 616 617 /** 618 * Returns the standard hash code as defined by the 619 * {@link Object#hashCode} method. This method 620 * is {@code final} in order to ensure that the 621 * {@code equals} and {@code hashCode} methods will 622 * be consistent in all subclasses. 623 */ 624 public final int hashCode() { 625 return super.hashCode(); 626 } 627 628 /** 629 * Returns the name of this subset. 630 */ 631 public final String toString() { 632 return name; 633 } 634 } 635 636 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 637 // for the latest specification of Unicode Blocks. 638 639 /** 640 * A family of character subsets representing the character blocks in the 641 * Unicode specification. Character blocks generally define characters 642 * used for a specific script or purpose. A character is contained by 643 * at most one Unicode block. 644 * 645 * @since 1.2 646 */ 647 public static final class UnicodeBlock extends Subset { 648 private static final int EXPECTED_MAP_SIZE = 510; 649 private static Map<String, UnicodeBlock> map = 650 new HashMap<>(EXPECTED_MAP_SIZE); 651 652 /** 653 * Creates a UnicodeBlock with the given identifier name. 654 * This name must be the same as the block identifier. 655 */ 656 private UnicodeBlock(String idName) { 657 super(idName); 658 map.put(idName, this); 659 } 660 661 /** 662 * Creates a UnicodeBlock with the given identifier name and 663 * alias name. 664 */ 665 private UnicodeBlock(String idName, String alias) { 666 this(idName); 667 map.put(alias, this); 668 } 669 670 /** 671 * Creates a UnicodeBlock with the given identifier name and 672 * alias names. 673 */ 674 private UnicodeBlock(String idName, String... aliases) { 675 this(idName); 676 for (String alias : aliases) 677 map.put(alias, this); 678 } 679 680 /** 681 * Constant for the "Basic Latin" Unicode character block. 682 * @since 1.2 683 */ 684 public static final UnicodeBlock BASIC_LATIN = 685 new UnicodeBlock("BASIC_LATIN", 686 "BASIC LATIN", 687 "BASICLATIN"); 688 689 /** 690 * Constant for the "Latin-1 Supplement" Unicode character block. 691 * @since 1.2 692 */ 693 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 694 new UnicodeBlock("LATIN_1_SUPPLEMENT", 695 "LATIN-1 SUPPLEMENT", 696 "LATIN-1SUPPLEMENT"); 697 698 /** 699 * Constant for the "Latin Extended-A" Unicode character block. 700 * @since 1.2 701 */ 702 public static final UnicodeBlock LATIN_EXTENDED_A = 703 new UnicodeBlock("LATIN_EXTENDED_A", 704 "LATIN EXTENDED-A", 705 "LATINEXTENDED-A"); 706 707 /** 708 * Constant for the "Latin Extended-B" Unicode character block. 709 * @since 1.2 710 */ 711 public static final UnicodeBlock LATIN_EXTENDED_B = 712 new UnicodeBlock("LATIN_EXTENDED_B", 713 "LATIN EXTENDED-B", 714 "LATINEXTENDED-B"); 715 716 /** 717 * Constant for the "IPA Extensions" Unicode character block. 718 * @since 1.2 719 */ 720 public static final UnicodeBlock IPA_EXTENSIONS = 721 new UnicodeBlock("IPA_EXTENSIONS", 722 "IPA EXTENSIONS", 723 "IPAEXTENSIONS"); 724 725 /** 726 * Constant for the "Spacing Modifier Letters" Unicode character block. 727 * @since 1.2 728 */ 729 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 730 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 731 "SPACING MODIFIER LETTERS", 732 "SPACINGMODIFIERLETTERS"); 733 734 /** 735 * Constant for the "Combining Diacritical Marks" Unicode character block. 736 * @since 1.2 737 */ 738 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 739 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 740 "COMBINING DIACRITICAL MARKS", 741 "COMBININGDIACRITICALMARKS"); 742 743 /** 744 * Constant for the "Greek and Coptic" Unicode character block. 745 * <p> 746 * This block was previously known as the "Greek" block. 747 * 748 * @since 1.2 749 */ 750 public static final UnicodeBlock GREEK = 751 new UnicodeBlock("GREEK", 752 "GREEK AND COPTIC", 753 "GREEKANDCOPTIC"); 754 755 /** 756 * Constant for the "Cyrillic" Unicode character block. 757 * @since 1.2 758 */ 759 public static final UnicodeBlock CYRILLIC = 760 new UnicodeBlock("CYRILLIC"); 761 762 /** 763 * Constant for the "Armenian" Unicode character block. 764 * @since 1.2 765 */ 766 public static final UnicodeBlock ARMENIAN = 767 new UnicodeBlock("ARMENIAN"); 768 769 /** 770 * Constant for the "Hebrew" Unicode character block. 771 * @since 1.2 772 */ 773 public static final UnicodeBlock HEBREW = 774 new UnicodeBlock("HEBREW"); 775 776 /** 777 * Constant for the "Arabic" Unicode character block. 778 * @since 1.2 779 */ 780 public static final UnicodeBlock ARABIC = 781 new UnicodeBlock("ARABIC"); 782 783 /** 784 * Constant for the "Devanagari" Unicode character block. 785 * @since 1.2 786 */ 787 public static final UnicodeBlock DEVANAGARI = 788 new UnicodeBlock("DEVANAGARI"); 789 790 /** 791 * Constant for the "Bengali" Unicode character block. 792 * @since 1.2 793 */ 794 public static final UnicodeBlock BENGALI = 795 new UnicodeBlock("BENGALI"); 796 797 /** 798 * Constant for the "Gurmukhi" Unicode character block. 799 * @since 1.2 800 */ 801 public static final UnicodeBlock GURMUKHI = 802 new UnicodeBlock("GURMUKHI"); 803 804 /** 805 * Constant for the "Gujarati" Unicode character block. 806 * @since 1.2 807 */ 808 public static final UnicodeBlock GUJARATI = 809 new UnicodeBlock("GUJARATI"); 810 811 /** 812 * Constant for the "Oriya" Unicode character block. 813 * @since 1.2 814 */ 815 public static final UnicodeBlock ORIYA = 816 new UnicodeBlock("ORIYA"); 817 818 /** 819 * Constant for the "Tamil" Unicode character block. 820 * @since 1.2 821 */ 822 public static final UnicodeBlock TAMIL = 823 new UnicodeBlock("TAMIL"); 824 825 /** 826 * Constant for the "Telugu" Unicode character block. 827 * @since 1.2 828 */ 829 public static final UnicodeBlock TELUGU = 830 new UnicodeBlock("TELUGU"); 831 832 /** 833 * Constant for the "Kannada" Unicode character block. 834 * @since 1.2 835 */ 836 public static final UnicodeBlock KANNADA = 837 new UnicodeBlock("KANNADA"); 838 839 /** 840 * Constant for the "Malayalam" Unicode character block. 841 * @since 1.2 842 */ 843 public static final UnicodeBlock MALAYALAM = 844 new UnicodeBlock("MALAYALAM"); 845 846 /** 847 * Constant for the "Thai" Unicode character block. 848 * @since 1.2 849 */ 850 public static final UnicodeBlock THAI = 851 new UnicodeBlock("THAI"); 852 853 /** 854 * Constant for the "Lao" Unicode character block. 855 * @since 1.2 856 */ 857 public static final UnicodeBlock LAO = 858 new UnicodeBlock("LAO"); 859 860 /** 861 * Constant for the "Tibetan" Unicode character block. 862 * @since 1.2 863 */ 864 public static final UnicodeBlock TIBETAN = 865 new UnicodeBlock("TIBETAN"); 866 867 /** 868 * Constant for the "Georgian" Unicode character block. 869 * @since 1.2 870 */ 871 public static final UnicodeBlock GEORGIAN = 872 new UnicodeBlock("GEORGIAN"); 873 874 /** 875 * Constant for the "Hangul Jamo" Unicode character block. 876 * @since 1.2 877 */ 878 public static final UnicodeBlock HANGUL_JAMO = 879 new UnicodeBlock("HANGUL_JAMO", 880 "HANGUL JAMO", 881 "HANGULJAMO"); 882 883 /** 884 * Constant for the "Latin Extended Additional" Unicode character block. 885 * @since 1.2 886 */ 887 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 888 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 889 "LATIN EXTENDED ADDITIONAL", 890 "LATINEXTENDEDADDITIONAL"); 891 892 /** 893 * Constant for the "Greek Extended" Unicode character block. 894 * @since 1.2 895 */ 896 public static final UnicodeBlock GREEK_EXTENDED = 897 new UnicodeBlock("GREEK_EXTENDED", 898 "GREEK EXTENDED", 899 "GREEKEXTENDED"); 900 901 /** 902 * Constant for the "General Punctuation" Unicode character block. 903 * @since 1.2 904 */ 905 public static final UnicodeBlock GENERAL_PUNCTUATION = 906 new UnicodeBlock("GENERAL_PUNCTUATION", 907 "GENERAL PUNCTUATION", 908 "GENERALPUNCTUATION"); 909 910 /** 911 * Constant for the "Superscripts and Subscripts" Unicode character 912 * block. 913 * @since 1.2 914 */ 915 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 916 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 917 "SUPERSCRIPTS AND SUBSCRIPTS", 918 "SUPERSCRIPTSANDSUBSCRIPTS"); 919 920 /** 921 * Constant for the "Currency Symbols" Unicode character block. 922 * @since 1.2 923 */ 924 public static final UnicodeBlock CURRENCY_SYMBOLS = 925 new UnicodeBlock("CURRENCY_SYMBOLS", 926 "CURRENCY SYMBOLS", 927 "CURRENCYSYMBOLS"); 928 929 /** 930 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 931 * character block. 932 * <p> 933 * This block was previously known as "Combining Marks for Symbols". 934 * @since 1.2 935 */ 936 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 937 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 938 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 939 "COMBININGDIACRITICALMARKSFORSYMBOLS", 940 "COMBINING MARKS FOR SYMBOLS", 941 "COMBININGMARKSFORSYMBOLS"); 942 943 /** 944 * Constant for the "Letterlike Symbols" Unicode character block. 945 * @since 1.2 946 */ 947 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 948 new UnicodeBlock("LETTERLIKE_SYMBOLS", 949 "LETTERLIKE SYMBOLS", 950 "LETTERLIKESYMBOLS"); 951 952 /** 953 * Constant for the "Number Forms" Unicode character block. 954 * @since 1.2 955 */ 956 public static final UnicodeBlock NUMBER_FORMS = 957 new UnicodeBlock("NUMBER_FORMS", 958 "NUMBER FORMS", 959 "NUMBERFORMS"); 960 961 /** 962 * Constant for the "Arrows" Unicode character block. 963 * @since 1.2 964 */ 965 public static final UnicodeBlock ARROWS = 966 new UnicodeBlock("ARROWS"); 967 968 /** 969 * Constant for the "Mathematical Operators" Unicode character block. 970 * @since 1.2 971 */ 972 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 973 new UnicodeBlock("MATHEMATICAL_OPERATORS", 974 "MATHEMATICAL OPERATORS", 975 "MATHEMATICALOPERATORS"); 976 977 /** 978 * Constant for the "Miscellaneous Technical" Unicode character block. 979 * @since 1.2 980 */ 981 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 982 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 983 "MISCELLANEOUS TECHNICAL", 984 "MISCELLANEOUSTECHNICAL"); 985 986 /** 987 * Constant for the "Control Pictures" Unicode character block. 988 * @since 1.2 989 */ 990 public static final UnicodeBlock CONTROL_PICTURES = 991 new UnicodeBlock("CONTROL_PICTURES", 992 "CONTROL PICTURES", 993 "CONTROLPICTURES"); 994 995 /** 996 * Constant for the "Optical Character Recognition" Unicode character block. 997 * @since 1.2 998 */ 999 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1000 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1001 "OPTICAL CHARACTER RECOGNITION", 1002 "OPTICALCHARACTERRECOGNITION"); 1003 1004 /** 1005 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1006 * @since 1.2 1007 */ 1008 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1009 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1010 "ENCLOSED ALPHANUMERICS", 1011 "ENCLOSEDALPHANUMERICS"); 1012 1013 /** 1014 * Constant for the "Box Drawing" Unicode character block. 1015 * @since 1.2 1016 */ 1017 public static final UnicodeBlock BOX_DRAWING = 1018 new UnicodeBlock("BOX_DRAWING", 1019 "BOX DRAWING", 1020 "BOXDRAWING"); 1021 1022 /** 1023 * Constant for the "Block Elements" Unicode character block. 1024 * @since 1.2 1025 */ 1026 public static final UnicodeBlock BLOCK_ELEMENTS = 1027 new UnicodeBlock("BLOCK_ELEMENTS", 1028 "BLOCK ELEMENTS", 1029 "BLOCKELEMENTS"); 1030 1031 /** 1032 * Constant for the "Geometric Shapes" Unicode character block. 1033 * @since 1.2 1034 */ 1035 public static final UnicodeBlock GEOMETRIC_SHAPES = 1036 new UnicodeBlock("GEOMETRIC_SHAPES", 1037 "GEOMETRIC SHAPES", 1038 "GEOMETRICSHAPES"); 1039 1040 /** 1041 * Constant for the "Miscellaneous Symbols" Unicode character block. 1042 * @since 1.2 1043 */ 1044 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1045 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1046 "MISCELLANEOUS SYMBOLS", 1047 "MISCELLANEOUSSYMBOLS"); 1048 1049 /** 1050 * Constant for the "Dingbats" Unicode character block. 1051 * @since 1.2 1052 */ 1053 public static final UnicodeBlock DINGBATS = 1054 new UnicodeBlock("DINGBATS"); 1055 1056 /** 1057 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1058 * @since 1.2 1059 */ 1060 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1061 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1062 "CJK SYMBOLS AND PUNCTUATION", 1063 "CJKSYMBOLSANDPUNCTUATION"); 1064 1065 /** 1066 * Constant for the "Hiragana" Unicode character block. 1067 * @since 1.2 1068 */ 1069 public static final UnicodeBlock HIRAGANA = 1070 new UnicodeBlock("HIRAGANA"); 1071 1072 /** 1073 * Constant for the "Katakana" Unicode character block. 1074 * @since 1.2 1075 */ 1076 public static final UnicodeBlock KATAKANA = 1077 new UnicodeBlock("KATAKANA"); 1078 1079 /** 1080 * Constant for the "Bopomofo" Unicode character block. 1081 * @since 1.2 1082 */ 1083 public static final UnicodeBlock BOPOMOFO = 1084 new UnicodeBlock("BOPOMOFO"); 1085 1086 /** 1087 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1088 * @since 1.2 1089 */ 1090 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1091 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1092 "HANGUL COMPATIBILITY JAMO", 1093 "HANGULCOMPATIBILITYJAMO"); 1094 1095 /** 1096 * Constant for the "Kanbun" Unicode character block. 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock KANBUN = 1100 new UnicodeBlock("KANBUN"); 1101 1102 /** 1103 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1104 * @since 1.2 1105 */ 1106 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1107 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1108 "ENCLOSED CJK LETTERS AND MONTHS", 1109 "ENCLOSEDCJKLETTERSANDMONTHS"); 1110 1111 /** 1112 * Constant for the "CJK Compatibility" Unicode character block. 1113 * @since 1.2 1114 */ 1115 public static final UnicodeBlock CJK_COMPATIBILITY = 1116 new UnicodeBlock("CJK_COMPATIBILITY", 1117 "CJK COMPATIBILITY", 1118 "CJKCOMPATIBILITY"); 1119 1120 /** 1121 * Constant for the "CJK Unified Ideographs" Unicode character block. 1122 * @since 1.2 1123 */ 1124 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1125 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1126 "CJK UNIFIED IDEOGRAPHS", 1127 "CJKUNIFIEDIDEOGRAPHS"); 1128 1129 /** 1130 * Constant for the "Hangul Syllables" Unicode character block. 1131 * @since 1.2 1132 */ 1133 public static final UnicodeBlock HANGUL_SYLLABLES = 1134 new UnicodeBlock("HANGUL_SYLLABLES", 1135 "HANGUL SYLLABLES", 1136 "HANGULSYLLABLES"); 1137 1138 /** 1139 * Constant for the "Private Use Area" Unicode character block. 1140 * @since 1.2 1141 */ 1142 public static final UnicodeBlock PRIVATE_USE_AREA = 1143 new UnicodeBlock("PRIVATE_USE_AREA", 1144 "PRIVATE USE AREA", 1145 "PRIVATEUSEAREA"); 1146 1147 /** 1148 * Constant for the "CJK Compatibility Ideographs" Unicode character 1149 * block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1153 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1154 "CJK COMPATIBILITY IDEOGRAPHS", 1155 "CJKCOMPATIBILITYIDEOGRAPHS"); 1156 1157 /** 1158 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1159 * @since 1.2 1160 */ 1161 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1162 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1163 "ALPHABETIC PRESENTATION FORMS", 1164 "ALPHABETICPRESENTATIONFORMS"); 1165 1166 /** 1167 * Constant for the "Arabic Presentation Forms-A" Unicode character 1168 * block. 1169 * @since 1.2 1170 */ 1171 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1172 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1173 "ARABIC PRESENTATION FORMS-A", 1174 "ARABICPRESENTATIONFORMS-A"); 1175 1176 /** 1177 * Constant for the "Combining Half Marks" Unicode character block. 1178 * @since 1.2 1179 */ 1180 public static final UnicodeBlock COMBINING_HALF_MARKS = 1181 new UnicodeBlock("COMBINING_HALF_MARKS", 1182 "COMBINING HALF MARKS", 1183 "COMBININGHALFMARKS"); 1184 1185 /** 1186 * Constant for the "CJK Compatibility Forms" Unicode character block. 1187 * @since 1.2 1188 */ 1189 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1190 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1191 "CJK COMPATIBILITY FORMS", 1192 "CJKCOMPATIBILITYFORMS"); 1193 1194 /** 1195 * Constant for the "Small Form Variants" Unicode character block. 1196 * @since 1.2 1197 */ 1198 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1199 new UnicodeBlock("SMALL_FORM_VARIANTS", 1200 "SMALL FORM VARIANTS", 1201 "SMALLFORMVARIANTS"); 1202 1203 /** 1204 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1205 * @since 1.2 1206 */ 1207 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1208 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1209 "ARABIC PRESENTATION FORMS-B", 1210 "ARABICPRESENTATIONFORMS-B"); 1211 1212 /** 1213 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1214 * block. 1215 * @since 1.2 1216 */ 1217 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1218 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1219 "HALFWIDTH AND FULLWIDTH FORMS", 1220 "HALFWIDTHANDFULLWIDTHFORMS"); 1221 1222 /** 1223 * Constant for the "Specials" Unicode character block. 1224 * @since 1.2 1225 */ 1226 public static final UnicodeBlock SPECIALS = 1227 new UnicodeBlock("SPECIALS"); 1228 1229 /** 1230 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1231 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1232 * {@link #LOW_SURROGATES}. These new constants match 1233 * the block definitions of the Unicode Standard. 1234 * The {@link #of(char)} and {@link #of(int)} methods 1235 * return the new constants, not SURROGATES_AREA. 1236 */ 1237 @Deprecated 1238 public static final UnicodeBlock SURROGATES_AREA = 1239 new UnicodeBlock("SURROGATES_AREA"); 1240 1241 /** 1242 * Constant for the "Syriac" Unicode character block. 1243 * @since 1.4 1244 */ 1245 public static final UnicodeBlock SYRIAC = 1246 new UnicodeBlock("SYRIAC"); 1247 1248 /** 1249 * Constant for the "Thaana" Unicode character block. 1250 * @since 1.4 1251 */ 1252 public static final UnicodeBlock THAANA = 1253 new UnicodeBlock("THAANA"); 1254 1255 /** 1256 * Constant for the "Sinhala" Unicode character block. 1257 * @since 1.4 1258 */ 1259 public static final UnicodeBlock SINHALA = 1260 new UnicodeBlock("SINHALA"); 1261 1262 /** 1263 * Constant for the "Myanmar" Unicode character block. 1264 * @since 1.4 1265 */ 1266 public static final UnicodeBlock MYANMAR = 1267 new UnicodeBlock("MYANMAR"); 1268 1269 /** 1270 * Constant for the "Ethiopic" Unicode character block. 1271 * @since 1.4 1272 */ 1273 public static final UnicodeBlock ETHIOPIC = 1274 new UnicodeBlock("ETHIOPIC"); 1275 1276 /** 1277 * Constant for the "Cherokee" Unicode character block. 1278 * @since 1.4 1279 */ 1280 public static final UnicodeBlock CHEROKEE = 1281 new UnicodeBlock("CHEROKEE"); 1282 1283 /** 1284 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1285 * @since 1.4 1286 */ 1287 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1288 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1289 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1290 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1291 1292 /** 1293 * Constant for the "Ogham" Unicode character block. 1294 * @since 1.4 1295 */ 1296 public static final UnicodeBlock OGHAM = 1297 new UnicodeBlock("OGHAM"); 1298 1299 /** 1300 * Constant for the "Runic" Unicode character block. 1301 * @since 1.4 1302 */ 1303 public static final UnicodeBlock RUNIC = 1304 new UnicodeBlock("RUNIC"); 1305 1306 /** 1307 * Constant for the "Khmer" Unicode character block. 1308 * @since 1.4 1309 */ 1310 public static final UnicodeBlock KHMER = 1311 new UnicodeBlock("KHMER"); 1312 1313 /** 1314 * Constant for the "Mongolian" Unicode character block. 1315 * @since 1.4 1316 */ 1317 public static final UnicodeBlock MONGOLIAN = 1318 new UnicodeBlock("MONGOLIAN"); 1319 1320 /** 1321 * Constant for the "Braille Patterns" Unicode character block. 1322 * @since 1.4 1323 */ 1324 public static final UnicodeBlock BRAILLE_PATTERNS = 1325 new UnicodeBlock("BRAILLE_PATTERNS", 1326 "BRAILLE PATTERNS", 1327 "BRAILLEPATTERNS"); 1328 1329 /** 1330 * Constant for the "CJK Radicals Supplement" Unicode character block. 1331 * @since 1.4 1332 */ 1333 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1334 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1335 "CJK RADICALS SUPPLEMENT", 1336 "CJKRADICALSSUPPLEMENT"); 1337 1338 /** 1339 * Constant for the "Kangxi Radicals" Unicode character block. 1340 * @since 1.4 1341 */ 1342 public static final UnicodeBlock KANGXI_RADICALS = 1343 new UnicodeBlock("KANGXI_RADICALS", 1344 "KANGXI RADICALS", 1345 "KANGXIRADICALS"); 1346 1347 /** 1348 * Constant for the "Ideographic Description Characters" Unicode character block. 1349 * @since 1.4 1350 */ 1351 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1352 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1353 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1354 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1355 1356 /** 1357 * Constant for the "Bopomofo Extended" Unicode character block. 1358 * @since 1.4 1359 */ 1360 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1361 new UnicodeBlock("BOPOMOFO_EXTENDED", 1362 "BOPOMOFO EXTENDED", 1363 "BOPOMOFOEXTENDED"); 1364 1365 /** 1366 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1367 * @since 1.4 1368 */ 1369 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1370 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1371 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1372 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1373 1374 /** 1375 * Constant for the "Yi Syllables" Unicode character block. 1376 * @since 1.4 1377 */ 1378 public static final UnicodeBlock YI_SYLLABLES = 1379 new UnicodeBlock("YI_SYLLABLES", 1380 "YI SYLLABLES", 1381 "YISYLLABLES"); 1382 1383 /** 1384 * Constant for the "Yi Radicals" Unicode character block. 1385 * @since 1.4 1386 */ 1387 public static final UnicodeBlock YI_RADICALS = 1388 new UnicodeBlock("YI_RADICALS", 1389 "YI RADICALS", 1390 "YIRADICALS"); 1391 1392 /** 1393 * Constant for the "Cyrillic Supplementary" Unicode character block. 1394 * @since 1.5 1395 */ 1396 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1397 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1398 "CYRILLIC SUPPLEMENTARY", 1399 "CYRILLICSUPPLEMENTARY", 1400 "CYRILLIC SUPPLEMENT", 1401 "CYRILLICSUPPLEMENT"); 1402 1403 /** 1404 * Constant for the "Tagalog" Unicode character block. 1405 * @since 1.5 1406 */ 1407 public static final UnicodeBlock TAGALOG = 1408 new UnicodeBlock("TAGALOG"); 1409 1410 /** 1411 * Constant for the "Hanunoo" Unicode character block. 1412 * @since 1.5 1413 */ 1414 public static final UnicodeBlock HANUNOO = 1415 new UnicodeBlock("HANUNOO"); 1416 1417 /** 1418 * Constant for the "Buhid" Unicode character block. 1419 * @since 1.5 1420 */ 1421 public static final UnicodeBlock BUHID = 1422 new UnicodeBlock("BUHID"); 1423 1424 /** 1425 * Constant for the "Tagbanwa" Unicode character block. 1426 * @since 1.5 1427 */ 1428 public static final UnicodeBlock TAGBANWA = 1429 new UnicodeBlock("TAGBANWA"); 1430 1431 /** 1432 * Constant for the "Limbu" Unicode character block. 1433 * @since 1.5 1434 */ 1435 public static final UnicodeBlock LIMBU = 1436 new UnicodeBlock("LIMBU"); 1437 1438 /** 1439 * Constant for the "Tai Le" Unicode character block. 1440 * @since 1.5 1441 */ 1442 public static final UnicodeBlock TAI_LE = 1443 new UnicodeBlock("TAI_LE", 1444 "TAI LE", 1445 "TAILE"); 1446 1447 /** 1448 * Constant for the "Khmer Symbols" Unicode character block. 1449 * @since 1.5 1450 */ 1451 public static final UnicodeBlock KHMER_SYMBOLS = 1452 new UnicodeBlock("KHMER_SYMBOLS", 1453 "KHMER SYMBOLS", 1454 "KHMERSYMBOLS"); 1455 1456 /** 1457 * Constant for the "Phonetic Extensions" Unicode character block. 1458 * @since 1.5 1459 */ 1460 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1461 new UnicodeBlock("PHONETIC_EXTENSIONS", 1462 "PHONETIC EXTENSIONS", 1463 "PHONETICEXTENSIONS"); 1464 1465 /** 1466 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1467 * @since 1.5 1468 */ 1469 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1470 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1471 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1472 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1473 1474 /** 1475 * Constant for the "Supplemental Arrows-A" Unicode character block. 1476 * @since 1.5 1477 */ 1478 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1479 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1480 "SUPPLEMENTAL ARROWS-A", 1481 "SUPPLEMENTALARROWS-A"); 1482 1483 /** 1484 * Constant for the "Supplemental Arrows-B" Unicode character block. 1485 * @since 1.5 1486 */ 1487 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1488 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1489 "SUPPLEMENTAL ARROWS-B", 1490 "SUPPLEMENTALARROWS-B"); 1491 1492 /** 1493 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1494 * character block. 1495 * @since 1.5 1496 */ 1497 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1498 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1499 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1500 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1501 1502 /** 1503 * Constant for the "Supplemental Mathematical Operators" Unicode 1504 * character block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1508 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1509 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1510 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1511 1512 /** 1513 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1514 * block. 1515 * @since 1.5 1516 */ 1517 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1518 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1519 "MISCELLANEOUS SYMBOLS AND ARROWS", 1520 "MISCELLANEOUSSYMBOLSANDARROWS"); 1521 1522 /** 1523 * Constant for the "Katakana Phonetic Extensions" Unicode character 1524 * block. 1525 * @since 1.5 1526 */ 1527 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1528 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1529 "KATAKANA PHONETIC EXTENSIONS", 1530 "KATAKANAPHONETICEXTENSIONS"); 1531 1532 /** 1533 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1534 * @since 1.5 1535 */ 1536 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1537 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1538 "YIJING HEXAGRAM SYMBOLS", 1539 "YIJINGHEXAGRAMSYMBOLS"); 1540 1541 /** 1542 * Constant for the "Variation Selectors" Unicode character block. 1543 * @since 1.5 1544 */ 1545 public static final UnicodeBlock VARIATION_SELECTORS = 1546 new UnicodeBlock("VARIATION_SELECTORS", 1547 "VARIATION SELECTORS", 1548 "VARIATIONSELECTORS"); 1549 1550 /** 1551 * Constant for the "Linear B Syllabary" Unicode character block. 1552 * @since 1.5 1553 */ 1554 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1555 new UnicodeBlock("LINEAR_B_SYLLABARY", 1556 "LINEAR B SYLLABARY", 1557 "LINEARBSYLLABARY"); 1558 1559 /** 1560 * Constant for the "Linear B Ideograms" Unicode character block. 1561 * @since 1.5 1562 */ 1563 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1564 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1565 "LINEAR B IDEOGRAMS", 1566 "LINEARBIDEOGRAMS"); 1567 1568 /** 1569 * Constant for the "Aegean Numbers" Unicode character block. 1570 * @since 1.5 1571 */ 1572 public static final UnicodeBlock AEGEAN_NUMBERS = 1573 new UnicodeBlock("AEGEAN_NUMBERS", 1574 "AEGEAN NUMBERS", 1575 "AEGEANNUMBERS"); 1576 1577 /** 1578 * Constant for the "Old Italic" Unicode character block. 1579 * @since 1.5 1580 */ 1581 public static final UnicodeBlock OLD_ITALIC = 1582 new UnicodeBlock("OLD_ITALIC", 1583 "OLD ITALIC", 1584 "OLDITALIC"); 1585 1586 /** 1587 * Constant for the "Gothic" Unicode character block. 1588 * @since 1.5 1589 */ 1590 public static final UnicodeBlock GOTHIC = 1591 new UnicodeBlock("GOTHIC"); 1592 1593 /** 1594 * Constant for the "Ugaritic" Unicode character block. 1595 * @since 1.5 1596 */ 1597 public static final UnicodeBlock UGARITIC = 1598 new UnicodeBlock("UGARITIC"); 1599 1600 /** 1601 * Constant for the "Deseret" Unicode character block. 1602 * @since 1.5 1603 */ 1604 public static final UnicodeBlock DESERET = 1605 new UnicodeBlock("DESERET"); 1606 1607 /** 1608 * Constant for the "Shavian" Unicode character block. 1609 * @since 1.5 1610 */ 1611 public static final UnicodeBlock SHAVIAN = 1612 new UnicodeBlock("SHAVIAN"); 1613 1614 /** 1615 * Constant for the "Osmanya" Unicode character block. 1616 * @since 1.5 1617 */ 1618 public static final UnicodeBlock OSMANYA = 1619 new UnicodeBlock("OSMANYA"); 1620 1621 /** 1622 * Constant for the "Cypriot Syllabary" Unicode character block. 1623 * @since 1.5 1624 */ 1625 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1626 new UnicodeBlock("CYPRIOT_SYLLABARY", 1627 "CYPRIOT SYLLABARY", 1628 "CYPRIOTSYLLABARY"); 1629 1630 /** 1631 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1632 * @since 1.5 1633 */ 1634 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1635 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1636 "BYZANTINE MUSICAL SYMBOLS", 1637 "BYZANTINEMUSICALSYMBOLS"); 1638 1639 /** 1640 * Constant for the "Musical Symbols" Unicode character block. 1641 * @since 1.5 1642 */ 1643 public static final UnicodeBlock MUSICAL_SYMBOLS = 1644 new UnicodeBlock("MUSICAL_SYMBOLS", 1645 "MUSICAL SYMBOLS", 1646 "MUSICALSYMBOLS"); 1647 1648 /** 1649 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1650 * @since 1.5 1651 */ 1652 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1653 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1654 "TAI XUAN JING SYMBOLS", 1655 "TAIXUANJINGSYMBOLS"); 1656 1657 /** 1658 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1659 * character block. 1660 * @since 1.5 1661 */ 1662 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1663 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1664 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1665 "MATHEMATICALALPHANUMERICSYMBOLS"); 1666 1667 /** 1668 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1669 * character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1673 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1674 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1675 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1676 1677 /** 1678 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1682 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1683 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1684 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1685 1686 /** 1687 * Constant for the "Tags" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock TAGS = 1691 new UnicodeBlock("TAGS"); 1692 1693 /** 1694 * Constant for the "Variation Selectors Supplement" Unicode character 1695 * block. 1696 * @since 1.5 1697 */ 1698 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1699 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1700 "VARIATION SELECTORS SUPPLEMENT", 1701 "VARIATIONSELECTORSSUPPLEMENT"); 1702 1703 /** 1704 * Constant for the "Supplementary Private Use Area-A" Unicode character 1705 * block. 1706 * @since 1.5 1707 */ 1708 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1709 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1710 "SUPPLEMENTARY PRIVATE USE AREA-A", 1711 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1712 1713 /** 1714 * Constant for the "Supplementary Private Use Area-B" Unicode character 1715 * block. 1716 * @since 1.5 1717 */ 1718 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1719 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1720 "SUPPLEMENTARY PRIVATE USE AREA-B", 1721 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1722 1723 /** 1724 * Constant for the "High Surrogates" Unicode character block. 1725 * This block represents codepoint values in the high surrogate 1726 * range: U+D800 through U+DB7F 1727 * 1728 * @since 1.5 1729 */ 1730 public static final UnicodeBlock HIGH_SURROGATES = 1731 new UnicodeBlock("HIGH_SURROGATES", 1732 "HIGH SURROGATES", 1733 "HIGHSURROGATES"); 1734 1735 /** 1736 * Constant for the "High Private Use Surrogates" Unicode character 1737 * block. 1738 * This block represents codepoint values in the private use high 1739 * surrogate range: U+DB80 through U+DBFF 1740 * 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1744 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1745 "HIGH PRIVATE USE SURROGATES", 1746 "HIGHPRIVATEUSESURROGATES"); 1747 1748 /** 1749 * Constant for the "Low Surrogates" Unicode character block. 1750 * This block represents codepoint values in the low surrogate 1751 * range: U+DC00 through U+DFFF 1752 * 1753 * @since 1.5 1754 */ 1755 public static final UnicodeBlock LOW_SURROGATES = 1756 new UnicodeBlock("LOW_SURROGATES", 1757 "LOW SURROGATES", 1758 "LOWSURROGATES"); 1759 1760 /** 1761 * Constant for the "Arabic Supplement" Unicode character block. 1762 * @since 1.7 1763 */ 1764 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1765 new UnicodeBlock("ARABIC_SUPPLEMENT", 1766 "ARABIC SUPPLEMENT", 1767 "ARABICSUPPLEMENT"); 1768 1769 /** 1770 * Constant for the "NKo" Unicode character block. 1771 * @since 1.7 1772 */ 1773 public static final UnicodeBlock NKO = 1774 new UnicodeBlock("NKO"); 1775 1776 /** 1777 * Constant for the "Samaritan" Unicode character block. 1778 * @since 1.7 1779 */ 1780 public static final UnicodeBlock SAMARITAN = 1781 new UnicodeBlock("SAMARITAN"); 1782 1783 /** 1784 * Constant for the "Mandaic" Unicode character block. 1785 * @since 1.7 1786 */ 1787 public static final UnicodeBlock MANDAIC = 1788 new UnicodeBlock("MANDAIC"); 1789 1790 /** 1791 * Constant for the "Ethiopic Supplement" Unicode character block. 1792 * @since 1.7 1793 */ 1794 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1795 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1796 "ETHIOPIC SUPPLEMENT", 1797 "ETHIOPICSUPPLEMENT"); 1798 1799 /** 1800 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1801 * Unicode character block. 1802 * @since 1.7 1803 */ 1804 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1805 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1806 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1807 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1808 1809 /** 1810 * Constant for the "New Tai Lue" Unicode character block. 1811 * @since 1.7 1812 */ 1813 public static final UnicodeBlock NEW_TAI_LUE = 1814 new UnicodeBlock("NEW_TAI_LUE", 1815 "NEW TAI LUE", 1816 "NEWTAILUE"); 1817 1818 /** 1819 * Constant for the "Buginese" Unicode character block. 1820 * @since 1.7 1821 */ 1822 public static final UnicodeBlock BUGINESE = 1823 new UnicodeBlock("BUGINESE"); 1824 1825 /** 1826 * Constant for the "Tai Tham" Unicode character block. 1827 * @since 1.7 1828 */ 1829 public static final UnicodeBlock TAI_THAM = 1830 new UnicodeBlock("TAI_THAM", 1831 "TAI THAM", 1832 "TAITHAM"); 1833 1834 /** 1835 * Constant for the "Balinese" Unicode character block. 1836 * @since 1.7 1837 */ 1838 public static final UnicodeBlock BALINESE = 1839 new UnicodeBlock("BALINESE"); 1840 1841 /** 1842 * Constant for the "Sundanese" Unicode character block. 1843 * @since 1.7 1844 */ 1845 public static final UnicodeBlock SUNDANESE = 1846 new UnicodeBlock("SUNDANESE"); 1847 1848 /** 1849 * Constant for the "Batak" Unicode character block. 1850 * @since 1.7 1851 */ 1852 public static final UnicodeBlock BATAK = 1853 new UnicodeBlock("BATAK"); 1854 1855 /** 1856 * Constant for the "Lepcha" Unicode character block. 1857 * @since 1.7 1858 */ 1859 public static final UnicodeBlock LEPCHA = 1860 new UnicodeBlock("LEPCHA"); 1861 1862 /** 1863 * Constant for the "Ol Chiki" Unicode character block. 1864 * @since 1.7 1865 */ 1866 public static final UnicodeBlock OL_CHIKI = 1867 new UnicodeBlock("OL_CHIKI", 1868 "OL CHIKI", 1869 "OLCHIKI"); 1870 1871 /** 1872 * Constant for the "Vedic Extensions" Unicode character block. 1873 * @since 1.7 1874 */ 1875 public static final UnicodeBlock VEDIC_EXTENSIONS = 1876 new UnicodeBlock("VEDIC_EXTENSIONS", 1877 "VEDIC EXTENSIONS", 1878 "VEDICEXTENSIONS"); 1879 1880 /** 1881 * Constant for the "Phonetic Extensions Supplement" Unicode character 1882 * block. 1883 * @since 1.7 1884 */ 1885 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1886 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1887 "PHONETIC EXTENSIONS SUPPLEMENT", 1888 "PHONETICEXTENSIONSSUPPLEMENT"); 1889 1890 /** 1891 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1892 * character block. 1893 * @since 1.7 1894 */ 1895 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1896 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1897 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1898 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1899 1900 /** 1901 * Constant for the "Glagolitic" Unicode character block. 1902 * @since 1.7 1903 */ 1904 public static final UnicodeBlock GLAGOLITIC = 1905 new UnicodeBlock("GLAGOLITIC"); 1906 1907 /** 1908 * Constant for the "Latin Extended-C" Unicode character block. 1909 * @since 1.7 1910 */ 1911 public static final UnicodeBlock LATIN_EXTENDED_C = 1912 new UnicodeBlock("LATIN_EXTENDED_C", 1913 "LATIN EXTENDED-C", 1914 "LATINEXTENDED-C"); 1915 1916 /** 1917 * Constant for the "Coptic" Unicode character block. 1918 * @since 1.7 1919 */ 1920 public static final UnicodeBlock COPTIC = 1921 new UnicodeBlock("COPTIC"); 1922 1923 /** 1924 * Constant for the "Georgian Supplement" Unicode character block. 1925 * @since 1.7 1926 */ 1927 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1928 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1929 "GEORGIAN SUPPLEMENT", 1930 "GEORGIANSUPPLEMENT"); 1931 1932 /** 1933 * Constant for the "Tifinagh" Unicode character block. 1934 * @since 1.7 1935 */ 1936 public static final UnicodeBlock TIFINAGH = 1937 new UnicodeBlock("TIFINAGH"); 1938 1939 /** 1940 * Constant for the "Ethiopic Extended" Unicode character block. 1941 * @since 1.7 1942 */ 1943 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1944 new UnicodeBlock("ETHIOPIC_EXTENDED", 1945 "ETHIOPIC EXTENDED", 1946 "ETHIOPICEXTENDED"); 1947 1948 /** 1949 * Constant for the "Cyrillic Extended-A" Unicode character block. 1950 * @since 1.7 1951 */ 1952 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1953 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1954 "CYRILLIC EXTENDED-A", 1955 "CYRILLICEXTENDED-A"); 1956 1957 /** 1958 * Constant for the "Supplemental Punctuation" Unicode character block. 1959 * @since 1.7 1960 */ 1961 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1962 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1963 "SUPPLEMENTAL PUNCTUATION", 1964 "SUPPLEMENTALPUNCTUATION"); 1965 1966 /** 1967 * Constant for the "CJK Strokes" Unicode character block. 1968 * @since 1.7 1969 */ 1970 public static final UnicodeBlock CJK_STROKES = 1971 new UnicodeBlock("CJK_STROKES", 1972 "CJK STROKES", 1973 "CJKSTROKES"); 1974 1975 /** 1976 * Constant for the "Lisu" Unicode character block. 1977 * @since 1.7 1978 */ 1979 public static final UnicodeBlock LISU = 1980 new UnicodeBlock("LISU"); 1981 1982 /** 1983 * Constant for the "Vai" Unicode character block. 1984 * @since 1.7 1985 */ 1986 public static final UnicodeBlock VAI = 1987 new UnicodeBlock("VAI"); 1988 1989 /** 1990 * Constant for the "Cyrillic Extended-B" Unicode character block. 1991 * @since 1.7 1992 */ 1993 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1994 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1995 "CYRILLIC EXTENDED-B", 1996 "CYRILLICEXTENDED-B"); 1997 1998 /** 1999 * Constant for the "Bamum" Unicode character block. 2000 * @since 1.7 2001 */ 2002 public static final UnicodeBlock BAMUM = 2003 new UnicodeBlock("BAMUM"); 2004 2005 /** 2006 * Constant for the "Modifier Tone Letters" Unicode character block. 2007 * @since 1.7 2008 */ 2009 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2010 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2011 "MODIFIER TONE LETTERS", 2012 "MODIFIERTONELETTERS"); 2013 2014 /** 2015 * Constant for the "Latin Extended-D" Unicode character block. 2016 * @since 1.7 2017 */ 2018 public static final UnicodeBlock LATIN_EXTENDED_D = 2019 new UnicodeBlock("LATIN_EXTENDED_D", 2020 "LATIN EXTENDED-D", 2021 "LATINEXTENDED-D"); 2022 2023 /** 2024 * Constant for the "Syloti Nagri" Unicode character block. 2025 * @since 1.7 2026 */ 2027 public static final UnicodeBlock SYLOTI_NAGRI = 2028 new UnicodeBlock("SYLOTI_NAGRI", 2029 "SYLOTI NAGRI", 2030 "SYLOTINAGRI"); 2031 2032 /** 2033 * Constant for the "Common Indic Number Forms" Unicode character block. 2034 * @since 1.7 2035 */ 2036 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2037 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2038 "COMMON INDIC NUMBER FORMS", 2039 "COMMONINDICNUMBERFORMS"); 2040 2041 /** 2042 * Constant for the "Phags-pa" Unicode character block. 2043 * @since 1.7 2044 */ 2045 public static final UnicodeBlock PHAGS_PA = 2046 new UnicodeBlock("PHAGS_PA", 2047 "PHAGS-PA"); 2048 2049 /** 2050 * Constant for the "Saurashtra" Unicode character block. 2051 * @since 1.7 2052 */ 2053 public static final UnicodeBlock SAURASHTRA = 2054 new UnicodeBlock("SAURASHTRA"); 2055 2056 /** 2057 * Constant for the "Devanagari Extended" Unicode character block. 2058 * @since 1.7 2059 */ 2060 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2061 new UnicodeBlock("DEVANAGARI_EXTENDED", 2062 "DEVANAGARI EXTENDED", 2063 "DEVANAGARIEXTENDED"); 2064 2065 /** 2066 * Constant for the "Kayah Li" Unicode character block. 2067 * @since 1.7 2068 */ 2069 public static final UnicodeBlock KAYAH_LI = 2070 new UnicodeBlock("KAYAH_LI", 2071 "KAYAH LI", 2072 "KAYAHLI"); 2073 2074 /** 2075 * Constant for the "Rejang" Unicode character block. 2076 * @since 1.7 2077 */ 2078 public static final UnicodeBlock REJANG = 2079 new UnicodeBlock("REJANG"); 2080 2081 /** 2082 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2083 * @since 1.7 2084 */ 2085 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2086 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2087 "HANGUL JAMO EXTENDED-A", 2088 "HANGULJAMOEXTENDED-A"); 2089 2090 /** 2091 * Constant for the "Javanese" Unicode character block. 2092 * @since 1.7 2093 */ 2094 public static final UnicodeBlock JAVANESE = 2095 new UnicodeBlock("JAVANESE"); 2096 2097 /** 2098 * Constant for the "Cham" Unicode character block. 2099 * @since 1.7 2100 */ 2101 public static final UnicodeBlock CHAM = 2102 new UnicodeBlock("CHAM"); 2103 2104 /** 2105 * Constant for the "Myanmar Extended-A" Unicode character block. 2106 * @since 1.7 2107 */ 2108 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2109 new UnicodeBlock("MYANMAR_EXTENDED_A", 2110 "MYANMAR EXTENDED-A", 2111 "MYANMAREXTENDED-A"); 2112 2113 /** 2114 * Constant for the "Tai Viet" Unicode character block. 2115 * @since 1.7 2116 */ 2117 public static final UnicodeBlock TAI_VIET = 2118 new UnicodeBlock("TAI_VIET", 2119 "TAI VIET", 2120 "TAIVIET"); 2121 2122 /** 2123 * Constant for the "Ethiopic Extended-A" Unicode character block. 2124 * @since 1.7 2125 */ 2126 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2127 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2128 "ETHIOPIC EXTENDED-A", 2129 "ETHIOPICEXTENDED-A"); 2130 2131 /** 2132 * Constant for the "Meetei Mayek" Unicode character block. 2133 * @since 1.7 2134 */ 2135 public static final UnicodeBlock MEETEI_MAYEK = 2136 new UnicodeBlock("MEETEI_MAYEK", 2137 "MEETEI MAYEK", 2138 "MEETEIMAYEK"); 2139 2140 /** 2141 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2142 * @since 1.7 2143 */ 2144 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2145 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2146 "HANGUL JAMO EXTENDED-B", 2147 "HANGULJAMOEXTENDED-B"); 2148 2149 /** 2150 * Constant for the "Vertical Forms" Unicode character block. 2151 * @since 1.7 2152 */ 2153 public static final UnicodeBlock VERTICAL_FORMS = 2154 new UnicodeBlock("VERTICAL_FORMS", 2155 "VERTICAL FORMS", 2156 "VERTICALFORMS"); 2157 2158 /** 2159 * Constant for the "Ancient Greek Numbers" Unicode character block. 2160 * @since 1.7 2161 */ 2162 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2163 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2164 "ANCIENT GREEK NUMBERS", 2165 "ANCIENTGREEKNUMBERS"); 2166 2167 /** 2168 * Constant for the "Ancient Symbols" Unicode character block. 2169 * @since 1.7 2170 */ 2171 public static final UnicodeBlock ANCIENT_SYMBOLS = 2172 new UnicodeBlock("ANCIENT_SYMBOLS", 2173 "ANCIENT SYMBOLS", 2174 "ANCIENTSYMBOLS"); 2175 2176 /** 2177 * Constant for the "Phaistos Disc" Unicode character block. 2178 * @since 1.7 2179 */ 2180 public static final UnicodeBlock PHAISTOS_DISC = 2181 new UnicodeBlock("PHAISTOS_DISC", 2182 "PHAISTOS DISC", 2183 "PHAISTOSDISC"); 2184 2185 /** 2186 * Constant for the "Lycian" Unicode character block. 2187 * @since 1.7 2188 */ 2189 public static final UnicodeBlock LYCIAN = 2190 new UnicodeBlock("LYCIAN"); 2191 2192 /** 2193 * Constant for the "Carian" Unicode character block. 2194 * @since 1.7 2195 */ 2196 public static final UnicodeBlock CARIAN = 2197 new UnicodeBlock("CARIAN"); 2198 2199 /** 2200 * Constant for the "Old Persian" Unicode character block. 2201 * @since 1.7 2202 */ 2203 public static final UnicodeBlock OLD_PERSIAN = 2204 new UnicodeBlock("OLD_PERSIAN", 2205 "OLD PERSIAN", 2206 "OLDPERSIAN"); 2207 2208 /** 2209 * Constant for the "Imperial Aramaic" Unicode character block. 2210 * @since 1.7 2211 */ 2212 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2213 new UnicodeBlock("IMPERIAL_ARAMAIC", 2214 "IMPERIAL ARAMAIC", 2215 "IMPERIALARAMAIC"); 2216 2217 /** 2218 * Constant for the "Phoenician" Unicode character block. 2219 * @since 1.7 2220 */ 2221 public static final UnicodeBlock PHOENICIAN = 2222 new UnicodeBlock("PHOENICIAN"); 2223 2224 /** 2225 * Constant for the "Lydian" Unicode character block. 2226 * @since 1.7 2227 */ 2228 public static final UnicodeBlock LYDIAN = 2229 new UnicodeBlock("LYDIAN"); 2230 2231 /** 2232 * Constant for the "Kharoshthi" Unicode character block. 2233 * @since 1.7 2234 */ 2235 public static final UnicodeBlock KHAROSHTHI = 2236 new UnicodeBlock("KHAROSHTHI"); 2237 2238 /** 2239 * Constant for the "Old South Arabian" Unicode character block. 2240 * @since 1.7 2241 */ 2242 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2243 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2244 "OLD SOUTH ARABIAN", 2245 "OLDSOUTHARABIAN"); 2246 2247 /** 2248 * Constant for the "Avestan" Unicode character block. 2249 * @since 1.7 2250 */ 2251 public static final UnicodeBlock AVESTAN = 2252 new UnicodeBlock("AVESTAN"); 2253 2254 /** 2255 * Constant for the "Inscriptional Parthian" Unicode character block. 2256 * @since 1.7 2257 */ 2258 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2259 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2260 "INSCRIPTIONAL PARTHIAN", 2261 "INSCRIPTIONALPARTHIAN"); 2262 2263 /** 2264 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2265 * @since 1.7 2266 */ 2267 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2268 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2269 "INSCRIPTIONAL PAHLAVI", 2270 "INSCRIPTIONALPAHLAVI"); 2271 2272 /** 2273 * Constant for the "Old Turkic" Unicode character block. 2274 * @since 1.7 2275 */ 2276 public static final UnicodeBlock OLD_TURKIC = 2277 new UnicodeBlock("OLD_TURKIC", 2278 "OLD TURKIC", 2279 "OLDTURKIC"); 2280 2281 /** 2282 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2283 * @since 1.7 2284 */ 2285 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2286 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2287 "RUMI NUMERAL SYMBOLS", 2288 "RUMINUMERALSYMBOLS"); 2289 2290 /** 2291 * Constant for the "Brahmi" Unicode character block. 2292 * @since 1.7 2293 */ 2294 public static final UnicodeBlock BRAHMI = 2295 new UnicodeBlock("BRAHMI"); 2296 2297 /** 2298 * Constant for the "Kaithi" Unicode character block. 2299 * @since 1.7 2300 */ 2301 public static final UnicodeBlock KAITHI = 2302 new UnicodeBlock("KAITHI"); 2303 2304 /** 2305 * Constant for the "Cuneiform" Unicode character block. 2306 * @since 1.7 2307 */ 2308 public static final UnicodeBlock CUNEIFORM = 2309 new UnicodeBlock("CUNEIFORM"); 2310 2311 /** 2312 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2313 * character block. 2314 * @since 1.7 2315 */ 2316 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2317 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2318 "CUNEIFORM NUMBERS AND PUNCTUATION", 2319 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2320 2321 /** 2322 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2323 * @since 1.7 2324 */ 2325 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2326 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2327 "EGYPTIAN HIEROGLYPHS", 2328 "EGYPTIANHIEROGLYPHS"); 2329 2330 /** 2331 * Constant for the "Bamum Supplement" Unicode character block. 2332 * @since 1.7 2333 */ 2334 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2335 new UnicodeBlock("BAMUM_SUPPLEMENT", 2336 "BAMUM SUPPLEMENT", 2337 "BAMUMSUPPLEMENT"); 2338 2339 /** 2340 * Constant for the "Kana Supplement" Unicode character block. 2341 * @since 1.7 2342 */ 2343 public static final UnicodeBlock KANA_SUPPLEMENT = 2344 new UnicodeBlock("KANA_SUPPLEMENT", 2345 "KANA SUPPLEMENT", 2346 "KANASUPPLEMENT"); 2347 2348 /** 2349 * Constant for the "Ancient Greek Musical Notation" Unicode character 2350 * block. 2351 * @since 1.7 2352 */ 2353 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2354 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2355 "ANCIENT GREEK MUSICAL NOTATION", 2356 "ANCIENTGREEKMUSICALNOTATION"); 2357 2358 /** 2359 * Constant for the "Counting Rod Numerals" Unicode character block. 2360 * @since 1.7 2361 */ 2362 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2363 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2364 "COUNTING ROD NUMERALS", 2365 "COUNTINGRODNUMERALS"); 2366 2367 /** 2368 * Constant for the "Mahjong Tiles" Unicode character block. 2369 * @since 1.7 2370 */ 2371 public static final UnicodeBlock MAHJONG_TILES = 2372 new UnicodeBlock("MAHJONG_TILES", 2373 "MAHJONG TILES", 2374 "MAHJONGTILES"); 2375 2376 /** 2377 * Constant for the "Domino Tiles" Unicode character block. 2378 * @since 1.7 2379 */ 2380 public static final UnicodeBlock DOMINO_TILES = 2381 new UnicodeBlock("DOMINO_TILES", 2382 "DOMINO TILES", 2383 "DOMINOTILES"); 2384 2385 /** 2386 * Constant for the "Playing Cards" Unicode character block. 2387 * @since 1.7 2388 */ 2389 public static final UnicodeBlock PLAYING_CARDS = 2390 new UnicodeBlock("PLAYING_CARDS", 2391 "PLAYING CARDS", 2392 "PLAYINGCARDS"); 2393 2394 /** 2395 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2396 * block. 2397 * @since 1.7 2398 */ 2399 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2400 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2401 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2402 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2403 2404 /** 2405 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2406 * block. 2407 * @since 1.7 2408 */ 2409 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2410 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2411 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2412 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2413 2414 /** 2415 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2416 * character block. 2417 * @since 1.7 2418 */ 2419 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2420 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2421 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2422 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2423 2424 /** 2425 * Constant for the "Emoticons" Unicode character block. 2426 * @since 1.7 2427 */ 2428 public static final UnicodeBlock EMOTICONS = 2429 new UnicodeBlock("EMOTICONS"); 2430 2431 /** 2432 * Constant for the "Transport And Map Symbols" Unicode character block. 2433 * @since 1.7 2434 */ 2435 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2436 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2437 "TRANSPORT AND MAP SYMBOLS", 2438 "TRANSPORTANDMAPSYMBOLS"); 2439 2440 /** 2441 * Constant for the "Alchemical Symbols" Unicode character block. 2442 * @since 1.7 2443 */ 2444 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2445 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2446 "ALCHEMICAL SYMBOLS", 2447 "ALCHEMICALSYMBOLS"); 2448 2449 /** 2450 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2451 * character block. 2452 * @since 1.7 2453 */ 2454 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2455 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2456 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2457 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2458 2459 /** 2460 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2461 * character block. 2462 * @since 1.7 2463 */ 2464 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2465 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2466 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2467 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2468 2469 /** 2470 * Constant for the "Arabic Extended-A" Unicode character block. 2471 * @since 1.8 2472 */ 2473 public static final UnicodeBlock ARABIC_EXTENDED_A = 2474 new UnicodeBlock("ARABIC_EXTENDED_A", 2475 "ARABIC EXTENDED-A", 2476 "ARABICEXTENDED-A"); 2477 2478 /** 2479 * Constant for the "Sundanese Supplement" Unicode character block. 2480 * @since 1.8 2481 */ 2482 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2483 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2484 "SUNDANESE SUPPLEMENT", 2485 "SUNDANESESUPPLEMENT"); 2486 2487 /** 2488 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2489 * @since 1.8 2490 */ 2491 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2492 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2493 "MEETEI MAYEK EXTENSIONS", 2494 "MEETEIMAYEKEXTENSIONS"); 2495 2496 /** 2497 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2498 * @since 1.8 2499 */ 2500 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2501 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2502 "MEROITIC HIEROGLYPHS", 2503 "MEROITICHIEROGLYPHS"); 2504 2505 /** 2506 * Constant for the "Meroitic Cursive" Unicode character block. 2507 * @since 1.8 2508 */ 2509 public static final UnicodeBlock MEROITIC_CURSIVE = 2510 new UnicodeBlock("MEROITIC_CURSIVE", 2511 "MEROITIC CURSIVE", 2512 "MEROITICCURSIVE"); 2513 2514 /** 2515 * Constant for the "Sora Sompeng" Unicode character block. 2516 * @since 1.8 2517 */ 2518 public static final UnicodeBlock SORA_SOMPENG = 2519 new UnicodeBlock("SORA_SOMPENG", 2520 "SORA SOMPENG", 2521 "SORASOMPENG"); 2522 2523 /** 2524 * Constant for the "Chakma" Unicode character block. 2525 * @since 1.8 2526 */ 2527 public static final UnicodeBlock CHAKMA = 2528 new UnicodeBlock("CHAKMA"); 2529 2530 /** 2531 * Constant for the "Sharada" Unicode character block. 2532 * @since 1.8 2533 */ 2534 public static final UnicodeBlock SHARADA = 2535 new UnicodeBlock("SHARADA"); 2536 2537 /** 2538 * Constant for the "Takri" Unicode character block. 2539 * @since 1.8 2540 */ 2541 public static final UnicodeBlock TAKRI = 2542 new UnicodeBlock("TAKRI"); 2543 2544 /** 2545 * Constant for the "Miao" Unicode character block. 2546 * @since 1.8 2547 */ 2548 public static final UnicodeBlock MIAO = 2549 new UnicodeBlock("MIAO"); 2550 2551 /** 2552 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2553 * character block. 2554 * @since 1.8 2555 */ 2556 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2557 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2558 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2559 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2560 2561 private static final int blockStarts[] = { 2562 0x0000, // 0000..007F; Basic Latin 2563 0x0080, // 0080..00FF; Latin-1 Supplement 2564 0x0100, // 0100..017F; Latin Extended-A 2565 0x0180, // 0180..024F; Latin Extended-B 2566 0x0250, // 0250..02AF; IPA Extensions 2567 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2568 0x0300, // 0300..036F; Combining Diacritical Marks 2569 0x0370, // 0370..03FF; Greek and Coptic 2570 0x0400, // 0400..04FF; Cyrillic 2571 0x0500, // 0500..052F; Cyrillic Supplement 2572 0x0530, // 0530..058F; Armenian 2573 0x0590, // 0590..05FF; Hebrew 2574 0x0600, // 0600..06FF; Arabic 2575 0x0700, // 0700..074F; Syriac 2576 0x0750, // 0750..077F; Arabic Supplement 2577 0x0780, // 0780..07BF; Thaana 2578 0x07C0, // 07C0..07FF; NKo 2579 0x0800, // 0800..083F; Samaritan 2580 0x0840, // 0840..085F; Mandaic 2581 0x0860, // unassigned 2582 0x08A0, // 08A0..08FF; Arabic Extended-A 2583 0x0900, // 0900..097F; Devanagari 2584 0x0980, // 0980..09FF; Bengali 2585 0x0A00, // 0A00..0A7F; Gurmukhi 2586 0x0A80, // 0A80..0AFF; Gujarati 2587 0x0B00, // 0B00..0B7F; Oriya 2588 0x0B80, // 0B80..0BFF; Tamil 2589 0x0C00, // 0C00..0C7F; Telugu 2590 0x0C80, // 0C80..0CFF; Kannada 2591 0x0D00, // 0D00..0D7F; Malayalam 2592 0x0D80, // 0D80..0DFF; Sinhala 2593 0x0E00, // 0E00..0E7F; Thai 2594 0x0E80, // 0E80..0EFF; Lao 2595 0x0F00, // 0F00..0FFF; Tibetan 2596 0x1000, // 1000..109F; Myanmar 2597 0x10A0, // 10A0..10FF; Georgian 2598 0x1100, // 1100..11FF; Hangul Jamo 2599 0x1200, // 1200..137F; Ethiopic 2600 0x1380, // 1380..139F; Ethiopic Supplement 2601 0x13A0, // 13A0..13FF; Cherokee 2602 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2603 0x1680, // 1680..169F; Ogham 2604 0x16A0, // 16A0..16FF; Runic 2605 0x1700, // 1700..171F; Tagalog 2606 0x1720, // 1720..173F; Hanunoo 2607 0x1740, // 1740..175F; Buhid 2608 0x1760, // 1760..177F; Tagbanwa 2609 0x1780, // 1780..17FF; Khmer 2610 0x1800, // 1800..18AF; Mongolian 2611 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2612 0x1900, // 1900..194F; Limbu 2613 0x1950, // 1950..197F; Tai Le 2614 0x1980, // 1980..19DF; New Tai Lue 2615 0x19E0, // 19E0..19FF; Khmer Symbols 2616 0x1A00, // 1A00..1A1F; Buginese 2617 0x1A20, // 1A20..1AAF; Tai Tham 2618 0x1AB0, // unassigned 2619 0x1B00, // 1B00..1B7F; Balinese 2620 0x1B80, // 1B80..1BBF; Sundanese 2621 0x1BC0, // 1BC0..1BFF; Batak 2622 0x1C00, // 1C00..1C4F; Lepcha 2623 0x1C50, // 1C50..1C7F; Ol Chiki 2624 0x1C80, // unassigned 2625 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 2626 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2627 0x1D00, // 1D00..1D7F; Phonetic Extensions 2628 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2629 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2630 0x1E00, // 1E00..1EFF; Latin Extended Additional 2631 0x1F00, // 1F00..1FFF; Greek Extended 2632 0x2000, // 2000..206F; General Punctuation 2633 0x2070, // 2070..209F; Superscripts and Subscripts 2634 0x20A0, // 20A0..20CF; Currency Symbols 2635 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2636 0x2100, // 2100..214F; Letterlike Symbols 2637 0x2150, // 2150..218F; Number Forms 2638 0x2190, // 2190..21FF; Arrows 2639 0x2200, // 2200..22FF; Mathematical Operators 2640 0x2300, // 2300..23FF; Miscellaneous Technical 2641 0x2400, // 2400..243F; Control Pictures 2642 0x2440, // 2440..245F; Optical Character Recognition 2643 0x2460, // 2460..24FF; Enclosed Alphanumerics 2644 0x2500, // 2500..257F; Box Drawing 2645 0x2580, // 2580..259F; Block Elements 2646 0x25A0, // 25A0..25FF; Geometric Shapes 2647 0x2600, // 2600..26FF; Miscellaneous Symbols 2648 0x2700, // 2700..27BF; Dingbats 2649 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2650 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2651 0x2800, // 2800..28FF; Braille Patterns 2652 0x2900, // 2900..297F; Supplemental Arrows-B 2653 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2654 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2655 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2656 0x2C00, // 2C00..2C5F; Glagolitic 2657 0x2C60, // 2C60..2C7F; Latin Extended-C 2658 0x2C80, // 2C80..2CFF; Coptic 2659 0x2D00, // 2D00..2D2F; Georgian Supplement 2660 0x2D30, // 2D30..2D7F; Tifinagh 2661 0x2D80, // 2D80..2DDF; Ethiopic Extended 2662 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2663 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2664 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2665 0x2F00, // 2F00..2FDF; Kangxi Radicals 2666 0x2FE0, // unassigned 2667 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2668 0x3000, // 3000..303F; CJK Symbols and Punctuation 2669 0x3040, // 3040..309F; Hiragana 2670 0x30A0, // 30A0..30FF; Katakana 2671 0x3100, // 3100..312F; Bopomofo 2672 0x3130, // 3130..318F; Hangul Compatibility Jamo 2673 0x3190, // 3190..319F; Kanbun 2674 0x31A0, // 31A0..31BF; Bopomofo Extended 2675 0x31C0, // 31C0..31EF; CJK Strokes 2676 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2677 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2678 0x3300, // 3300..33FF; CJK Compatibility 2679 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2680 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2681 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2682 0xA000, // A000..A48F; Yi Syllables 2683 0xA490, // A490..A4CF; Yi Radicals 2684 0xA4D0, // A4D0..A4FF; Lisu 2685 0xA500, // A500..A63F; Vai 2686 0xA640, // A640..A69F; Cyrillic Extended-B 2687 0xA6A0, // A6A0..A6FF; Bamum 2688 0xA700, // A700..A71F; Modifier Tone Letters 2689 0xA720, // A720..A7FF; Latin Extended-D 2690 0xA800, // A800..A82F; Syloti Nagri 2691 0xA830, // A830..A83F; Common Indic Number Forms 2692 0xA840, // A840..A87F; Phags-pa 2693 0xA880, // A880..A8DF; Saurashtra 2694 0xA8E0, // A8E0..A8FF; Devanagari Extended 2695 0xA900, // A900..A92F; Kayah Li 2696 0xA930, // A930..A95F; Rejang 2697 0xA960, // A960..A97F; Hangul Jamo Extended-A 2698 0xA980, // A980..A9DF; Javanese 2699 0xA9E0, // unassigned 2700 0xAA00, // AA00..AA5F; Cham 2701 0xAA60, // AA60..AA7F; Myanmar Extended-A 2702 0xAA80, // AA80..AADF; Tai Viet 2703 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 2704 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2705 0xAB30, // unassigned 2706 0xABC0, // ABC0..ABFF; Meetei Mayek 2707 0xAC00, // AC00..D7AF; Hangul Syllables 2708 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2709 0xD800, // D800..DB7F; High Surrogates 2710 0xDB80, // DB80..DBFF; High Private Use Surrogates 2711 0xDC00, // DC00..DFFF; Low Surrogates 2712 0xE000, // E000..F8FF; Private Use Area 2713 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2714 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2715 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2716 0xFE00, // FE00..FE0F; Variation Selectors 2717 0xFE10, // FE10..FE1F; Vertical Forms 2718 0xFE20, // FE20..FE2F; Combining Half Marks 2719 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2720 0xFE50, // FE50..FE6F; Small Form Variants 2721 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2722 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2723 0xFFF0, // FFF0..FFFF; Specials 2724 0x10000, // 10000..1007F; Linear B Syllabary 2725 0x10080, // 10080..100FF; Linear B Ideograms 2726 0x10100, // 10100..1013F; Aegean Numbers 2727 0x10140, // 10140..1018F; Ancient Greek Numbers 2728 0x10190, // 10190..101CF; Ancient Symbols 2729 0x101D0, // 101D0..101FF; Phaistos Disc 2730 0x10200, // unassigned 2731 0x10280, // 10280..1029F; Lycian 2732 0x102A0, // 102A0..102DF; Carian 2733 0x102E0, // unassigned 2734 0x10300, // 10300..1032F; Old Italic 2735 0x10330, // 10330..1034F; Gothic 2736 0x10350, // unassigned 2737 0x10380, // 10380..1039F; Ugaritic 2738 0x103A0, // 103A0..103DF; Old Persian 2739 0x103E0, // unassigned 2740 0x10400, // 10400..1044F; Deseret 2741 0x10450, // 10450..1047F; Shavian 2742 0x10480, // 10480..104AF; Osmanya 2743 0x104B0, // unassigned 2744 0x10800, // 10800..1083F; Cypriot Syllabary 2745 0x10840, // 10840..1085F; Imperial Aramaic 2746 0x10860, // unassigned 2747 0x10900, // 10900..1091F; Phoenician 2748 0x10920, // 10920..1093F; Lydian 2749 0x10940, // unassigned 2750 0x10980, // 10980..1099F; Meroitic Hieroglyphs 2751 0x109A0, // 109A0..109FF; Meroitic Cursive 2752 0x10A00, // 10A00..10A5F; Kharoshthi 2753 0x10A60, // 10A60..10A7F; Old South Arabian 2754 0x10A80, // unassigned 2755 0x10B00, // 10B00..10B3F; Avestan 2756 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2757 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2758 0x10B80, // unassigned 2759 0x10C00, // 10C00..10C4F; Old Turkic 2760 0x10C50, // unassigned 2761 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2762 0x10E80, // unassigned 2763 0x11000, // 11000..1107F; Brahmi 2764 0x11080, // 11080..110CF; Kaithi 2765 0x110D0, // 110D0..110FF; Sora Sompeng 2766 0x11100, // 11100..1114F; Chakma 2767 0x11150, // unassigned 2768 0x11180, // 11180..111DF; Sharada 2769 0x111E0, // unassigned 2770 0x11680, // 11680..116CF; Takri 2771 0x116D0, // unassigned 2772 0x12000, // 12000..123FF; Cuneiform 2773 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2774 0x12480, // unassigned 2775 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2776 0x13430, // unassigned 2777 0x16800, // 16800..16A3F; Bamum Supplement 2778 0x16A40, // unassigned 2779 0x16F00, // 16F00..16F9F; Miao 2780 0x16FA0, // unassigned 2781 0x1B000, // 1B000..1B0FF; Kana Supplement 2782 0x1B100, // unassigned 2783 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2784 0x1D100, // 1D100..1D1FF; Musical Symbols 2785 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2786 0x1D250, // unassigned 2787 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2788 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2789 0x1D380, // unassigned 2790 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2791 0x1D800, // unassigned 2792 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 2793 0x1EF00, // unassigned 2794 0x1F000, // 1F000..1F02F; Mahjong Tiles 2795 0x1F030, // 1F030..1F09F; Domino Tiles 2796 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2797 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2798 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2799 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2800 0x1F600, // 1F600..1F64F; Emoticons 2801 0x1F650, // unassigned 2802 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2803 0x1F700, // 1F700..1F77F; Alchemical Symbols 2804 0x1F780, // unassigned 2805 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2806 0x2A6E0, // unassigned 2807 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2808 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2809 0x2B820, // unassigned 2810 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2811 0x2FA20, // unassigned 2812 0xE0000, // E0000..E007F; Tags 2813 0xE0080, // unassigned 2814 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2815 0xE01F0, // unassigned 2816 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2817 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2818 }; 2819 2820 private static final UnicodeBlock[] blocks = { 2821 BASIC_LATIN, 2822 LATIN_1_SUPPLEMENT, 2823 LATIN_EXTENDED_A, 2824 LATIN_EXTENDED_B, 2825 IPA_EXTENSIONS, 2826 SPACING_MODIFIER_LETTERS, 2827 COMBINING_DIACRITICAL_MARKS, 2828 GREEK, 2829 CYRILLIC, 2830 CYRILLIC_SUPPLEMENTARY, 2831 ARMENIAN, 2832 HEBREW, 2833 ARABIC, 2834 SYRIAC, 2835 ARABIC_SUPPLEMENT, 2836 THAANA, 2837 NKO, 2838 SAMARITAN, 2839 MANDAIC, 2840 null, 2841 ARABIC_EXTENDED_A, 2842 DEVANAGARI, 2843 BENGALI, 2844 GURMUKHI, 2845 GUJARATI, 2846 ORIYA, 2847 TAMIL, 2848 TELUGU, 2849 KANNADA, 2850 MALAYALAM, 2851 SINHALA, 2852 THAI, 2853 LAO, 2854 TIBETAN, 2855 MYANMAR, 2856 GEORGIAN, 2857 HANGUL_JAMO, 2858 ETHIOPIC, 2859 ETHIOPIC_SUPPLEMENT, 2860 CHEROKEE, 2861 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2862 OGHAM, 2863 RUNIC, 2864 TAGALOG, 2865 HANUNOO, 2866 BUHID, 2867 TAGBANWA, 2868 KHMER, 2869 MONGOLIAN, 2870 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2871 LIMBU, 2872 TAI_LE, 2873 NEW_TAI_LUE, 2874 KHMER_SYMBOLS, 2875 BUGINESE, 2876 TAI_THAM, 2877 null, 2878 BALINESE, 2879 SUNDANESE, 2880 BATAK, 2881 LEPCHA, 2882 OL_CHIKI, 2883 null, 2884 SUNDANESE_SUPPLEMENT, 2885 VEDIC_EXTENSIONS, 2886 PHONETIC_EXTENSIONS, 2887 PHONETIC_EXTENSIONS_SUPPLEMENT, 2888 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2889 LATIN_EXTENDED_ADDITIONAL, 2890 GREEK_EXTENDED, 2891 GENERAL_PUNCTUATION, 2892 SUPERSCRIPTS_AND_SUBSCRIPTS, 2893 CURRENCY_SYMBOLS, 2894 COMBINING_MARKS_FOR_SYMBOLS, 2895 LETTERLIKE_SYMBOLS, 2896 NUMBER_FORMS, 2897 ARROWS, 2898 MATHEMATICAL_OPERATORS, 2899 MISCELLANEOUS_TECHNICAL, 2900 CONTROL_PICTURES, 2901 OPTICAL_CHARACTER_RECOGNITION, 2902 ENCLOSED_ALPHANUMERICS, 2903 BOX_DRAWING, 2904 BLOCK_ELEMENTS, 2905 GEOMETRIC_SHAPES, 2906 MISCELLANEOUS_SYMBOLS, 2907 DINGBATS, 2908 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2909 SUPPLEMENTAL_ARROWS_A, 2910 BRAILLE_PATTERNS, 2911 SUPPLEMENTAL_ARROWS_B, 2912 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2913 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2914 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2915 GLAGOLITIC, 2916 LATIN_EXTENDED_C, 2917 COPTIC, 2918 GEORGIAN_SUPPLEMENT, 2919 TIFINAGH, 2920 ETHIOPIC_EXTENDED, 2921 CYRILLIC_EXTENDED_A, 2922 SUPPLEMENTAL_PUNCTUATION, 2923 CJK_RADICALS_SUPPLEMENT, 2924 KANGXI_RADICALS, 2925 null, 2926 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2927 CJK_SYMBOLS_AND_PUNCTUATION, 2928 HIRAGANA, 2929 KATAKANA, 2930 BOPOMOFO, 2931 HANGUL_COMPATIBILITY_JAMO, 2932 KANBUN, 2933 BOPOMOFO_EXTENDED, 2934 CJK_STROKES, 2935 KATAKANA_PHONETIC_EXTENSIONS, 2936 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2937 CJK_COMPATIBILITY, 2938 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2939 YIJING_HEXAGRAM_SYMBOLS, 2940 CJK_UNIFIED_IDEOGRAPHS, 2941 YI_SYLLABLES, 2942 YI_RADICALS, 2943 LISU, 2944 VAI, 2945 CYRILLIC_EXTENDED_B, 2946 BAMUM, 2947 MODIFIER_TONE_LETTERS, 2948 LATIN_EXTENDED_D, 2949 SYLOTI_NAGRI, 2950 COMMON_INDIC_NUMBER_FORMS, 2951 PHAGS_PA, 2952 SAURASHTRA, 2953 DEVANAGARI_EXTENDED, 2954 KAYAH_LI, 2955 REJANG, 2956 HANGUL_JAMO_EXTENDED_A, 2957 JAVANESE, 2958 null, 2959 CHAM, 2960 MYANMAR_EXTENDED_A, 2961 TAI_VIET, 2962 MEETEI_MAYEK_EXTENSIONS, 2963 ETHIOPIC_EXTENDED_A, 2964 null, 2965 MEETEI_MAYEK, 2966 HANGUL_SYLLABLES, 2967 HANGUL_JAMO_EXTENDED_B, 2968 HIGH_SURROGATES, 2969 HIGH_PRIVATE_USE_SURROGATES, 2970 LOW_SURROGATES, 2971 PRIVATE_USE_AREA, 2972 CJK_COMPATIBILITY_IDEOGRAPHS, 2973 ALPHABETIC_PRESENTATION_FORMS, 2974 ARABIC_PRESENTATION_FORMS_A, 2975 VARIATION_SELECTORS, 2976 VERTICAL_FORMS, 2977 COMBINING_HALF_MARKS, 2978 CJK_COMPATIBILITY_FORMS, 2979 SMALL_FORM_VARIANTS, 2980 ARABIC_PRESENTATION_FORMS_B, 2981 HALFWIDTH_AND_FULLWIDTH_FORMS, 2982 SPECIALS, 2983 LINEAR_B_SYLLABARY, 2984 LINEAR_B_IDEOGRAMS, 2985 AEGEAN_NUMBERS, 2986 ANCIENT_GREEK_NUMBERS, 2987 ANCIENT_SYMBOLS, 2988 PHAISTOS_DISC, 2989 null, 2990 LYCIAN, 2991 CARIAN, 2992 null, 2993 OLD_ITALIC, 2994 GOTHIC, 2995 null, 2996 UGARITIC, 2997 OLD_PERSIAN, 2998 null, 2999 DESERET, 3000 SHAVIAN, 3001 OSMANYA, 3002 null, 3003 CYPRIOT_SYLLABARY, 3004 IMPERIAL_ARAMAIC, 3005 null, 3006 PHOENICIAN, 3007 LYDIAN, 3008 null, 3009 MEROITIC_HIEROGLYPHS, 3010 MEROITIC_CURSIVE, 3011 KHAROSHTHI, 3012 OLD_SOUTH_ARABIAN, 3013 null, 3014 AVESTAN, 3015 INSCRIPTIONAL_PARTHIAN, 3016 INSCRIPTIONAL_PAHLAVI, 3017 null, 3018 OLD_TURKIC, 3019 null, 3020 RUMI_NUMERAL_SYMBOLS, 3021 null, 3022 BRAHMI, 3023 KAITHI, 3024 SORA_SOMPENG, 3025 CHAKMA, 3026 null, 3027 SHARADA, 3028 null, 3029 TAKRI, 3030 null, 3031 CUNEIFORM, 3032 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3033 null, 3034 EGYPTIAN_HIEROGLYPHS, 3035 null, 3036 BAMUM_SUPPLEMENT, 3037 null, 3038 MIAO, 3039 null, 3040 KANA_SUPPLEMENT, 3041 null, 3042 BYZANTINE_MUSICAL_SYMBOLS, 3043 MUSICAL_SYMBOLS, 3044 ANCIENT_GREEK_MUSICAL_NOTATION, 3045 null, 3046 TAI_XUAN_JING_SYMBOLS, 3047 COUNTING_ROD_NUMERALS, 3048 null, 3049 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3050 null, 3051 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3052 null, 3053 MAHJONG_TILES, 3054 DOMINO_TILES, 3055 PLAYING_CARDS, 3056 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3057 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3058 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3059 EMOTICONS, 3060 null, 3061 TRANSPORT_AND_MAP_SYMBOLS, 3062 ALCHEMICAL_SYMBOLS, 3063 null, 3064 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3065 null, 3066 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3067 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3068 null, 3069 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3070 null, 3071 TAGS, 3072 null, 3073 VARIATION_SELECTORS_SUPPLEMENT, 3074 null, 3075 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3076 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3077 }; 3078 3079 3080 /** 3081 * Returns the object representing the Unicode block containing the 3082 * given character, or {@code null} if the character is not a 3083 * member of a defined block. 3084 * 3085 * <p><b>Note:</b> This method cannot handle 3086 * <a href="Character.html#supplementary"> supplementary 3087 * characters</a>. To support all Unicode characters, including 3088 * supplementary characters, use the {@link #of(int)} method. 3089 * 3090 * @param c The character in question 3091 * @return The {@code UnicodeBlock} instance representing the 3092 * Unicode block of which this character is a member, or 3093 * {@code null} if the character is not a member of any 3094 * Unicode block 3095 */ 3096 public static UnicodeBlock of(char c) { 3097 return of((int)c); 3098 } 3099 3100 /** 3101 * Returns the object representing the Unicode block 3102 * containing the given character (Unicode code point), or 3103 * {@code null} if the character is not a member of a 3104 * defined block. 3105 * 3106 * @param codePoint the character (Unicode code point) in question. 3107 * @return The {@code UnicodeBlock} instance representing the 3108 * Unicode block of which this character is a member, or 3109 * {@code null} if the character is not a member of any 3110 * Unicode block 3111 * @exception IllegalArgumentException if the specified 3112 * {@code codePoint} is an invalid Unicode code point. 3113 * @see Character#isValidCodePoint(int) 3114 * @since 1.5 3115 */ 3116 public static UnicodeBlock of(int codePoint) { 3117 if (!isValidCodePoint(codePoint)) { 3118 throw new IllegalArgumentException(); 3119 } 3120 3121 int top, bottom, current; 3122 bottom = 0; 3123 top = blockStarts.length; 3124 current = top/2; 3125 3126 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3127 while (top - bottom > 1) { 3128 if (codePoint >= blockStarts[current]) { 3129 bottom = current; 3130 } else { 3131 top = current; 3132 } 3133 current = (top + bottom) / 2; 3134 } 3135 return blocks[current]; 3136 } 3137 3138 /** 3139 * Returns the UnicodeBlock with the given name. Block 3140 * names are determined by The Unicode Standard. The file 3141 * Blocks-<version>.txt defines blocks for a particular 3142 * version of the standard. The {@link Character} class specifies 3143 * the version of the standard that it supports. 3144 * <p> 3145 * This method accepts block names in the following forms: 3146 * <ol> 3147 * <li> Canonical block names as defined by the Unicode Standard. 3148 * For example, the standard defines a "Basic Latin" block. Therefore, this 3149 * method accepts "Basic Latin" as a valid block name. The documentation of 3150 * each UnicodeBlock provides the canonical name. 3151 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3152 * is a valid block name for the "Basic Latin" block. 3153 * <li>The text representation of each constant UnicodeBlock identifier. 3154 * For example, this method will return the {@link #BASIC_LATIN} block if 3155 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3156 * hyphens in the canonical name with underscores. 3157 * </ol> 3158 * Finally, character case is ignored for all of the valid block name forms. 3159 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3160 * The en_US locale's case mapping rules are used to provide case-insensitive 3161 * string comparisons for block name validation. 3162 * <p> 3163 * If the Unicode Standard changes block names, both the previous and 3164 * current names will be accepted. 3165 * 3166 * @param blockName A {@code UnicodeBlock} name. 3167 * @return The {@code UnicodeBlock} instance identified 3168 * by {@code blockName} 3169 * @throws IllegalArgumentException if {@code blockName} is an 3170 * invalid name 3171 * @throws NullPointerException if {@code blockName} is null 3172 * @since 1.5 3173 */ 3174 public static final UnicodeBlock forName(String blockName) { 3175 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3176 if (block == null) { 3177 throw new IllegalArgumentException(); 3178 } 3179 return block; 3180 } 3181 } 3182 3183 3184 /** 3185 * A family of character subsets representing the character scripts 3186 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3187 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3188 * character is assigned to a single Unicode script, either a specific 3189 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3190 * one of the following three special values, 3191 * {@link Character.UnicodeScript#INHERITED Inherited}, 3192 * {@link Character.UnicodeScript#COMMON Common} or 3193 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3194 * 3195 * @since 1.7 3196 */ 3197 public static enum UnicodeScript { 3198 /** 3199 * Unicode script "Common". 3200 */ 3201 COMMON, 3202 3203 /** 3204 * Unicode script "Latin". 3205 */ 3206 LATIN, 3207 3208 /** 3209 * Unicode script "Greek". 3210 */ 3211 GREEK, 3212 3213 /** 3214 * Unicode script "Cyrillic". 3215 */ 3216 CYRILLIC, 3217 3218 /** 3219 * Unicode script "Armenian". 3220 */ 3221 ARMENIAN, 3222 3223 /** 3224 * Unicode script "Hebrew". 3225 */ 3226 HEBREW, 3227 3228 /** 3229 * Unicode script "Arabic". 3230 */ 3231 ARABIC, 3232 3233 /** 3234 * Unicode script "Syriac". 3235 */ 3236 SYRIAC, 3237 3238 /** 3239 * Unicode script "Thaana". 3240 */ 3241 THAANA, 3242 3243 /** 3244 * Unicode script "Devanagari". 3245 */ 3246 DEVANAGARI, 3247 3248 /** 3249 * Unicode script "Bengali". 3250 */ 3251 BENGALI, 3252 3253 /** 3254 * Unicode script "Gurmukhi". 3255 */ 3256 GURMUKHI, 3257 3258 /** 3259 * Unicode script "Gujarati". 3260 */ 3261 GUJARATI, 3262 3263 /** 3264 * Unicode script "Oriya". 3265 */ 3266 ORIYA, 3267 3268 /** 3269 * Unicode script "Tamil". 3270 */ 3271 TAMIL, 3272 3273 /** 3274 * Unicode script "Telugu". 3275 */ 3276 TELUGU, 3277 3278 /** 3279 * Unicode script "Kannada". 3280 */ 3281 KANNADA, 3282 3283 /** 3284 * Unicode script "Malayalam". 3285 */ 3286 MALAYALAM, 3287 3288 /** 3289 * Unicode script "Sinhala". 3290 */ 3291 SINHALA, 3292 3293 /** 3294 * Unicode script "Thai". 3295 */ 3296 THAI, 3297 3298 /** 3299 * Unicode script "Lao". 3300 */ 3301 LAO, 3302 3303 /** 3304 * Unicode script "Tibetan". 3305 */ 3306 TIBETAN, 3307 3308 /** 3309 * Unicode script "Myanmar". 3310 */ 3311 MYANMAR, 3312 3313 /** 3314 * Unicode script "Georgian". 3315 */ 3316 GEORGIAN, 3317 3318 /** 3319 * Unicode script "Hangul". 3320 */ 3321 HANGUL, 3322 3323 /** 3324 * Unicode script "Ethiopic". 3325 */ 3326 ETHIOPIC, 3327 3328 /** 3329 * Unicode script "Cherokee". 3330 */ 3331 CHEROKEE, 3332 3333 /** 3334 * Unicode script "Canadian_Aboriginal". 3335 */ 3336 CANADIAN_ABORIGINAL, 3337 3338 /** 3339 * Unicode script "Ogham". 3340 */ 3341 OGHAM, 3342 3343 /** 3344 * Unicode script "Runic". 3345 */ 3346 RUNIC, 3347 3348 /** 3349 * Unicode script "Khmer". 3350 */ 3351 KHMER, 3352 3353 /** 3354 * Unicode script "Mongolian". 3355 */ 3356 MONGOLIAN, 3357 3358 /** 3359 * Unicode script "Hiragana". 3360 */ 3361 HIRAGANA, 3362 3363 /** 3364 * Unicode script "Katakana". 3365 */ 3366 KATAKANA, 3367 3368 /** 3369 * Unicode script "Bopomofo". 3370 */ 3371 BOPOMOFO, 3372 3373 /** 3374 * Unicode script "Han". 3375 */ 3376 HAN, 3377 3378 /** 3379 * Unicode script "Yi". 3380 */ 3381 YI, 3382 3383 /** 3384 * Unicode script "Old_Italic". 3385 */ 3386 OLD_ITALIC, 3387 3388 /** 3389 * Unicode script "Gothic". 3390 */ 3391 GOTHIC, 3392 3393 /** 3394 * Unicode script "Deseret". 3395 */ 3396 DESERET, 3397 3398 /** 3399 * Unicode script "Inherited". 3400 */ 3401 INHERITED, 3402 3403 /** 3404 * Unicode script "Tagalog". 3405 */ 3406 TAGALOG, 3407 3408 /** 3409 * Unicode script "Hanunoo". 3410 */ 3411 HANUNOO, 3412 3413 /** 3414 * Unicode script "Buhid". 3415 */ 3416 BUHID, 3417 3418 /** 3419 * Unicode script "Tagbanwa". 3420 */ 3421 TAGBANWA, 3422 3423 /** 3424 * Unicode script "Limbu". 3425 */ 3426 LIMBU, 3427 3428 /** 3429 * Unicode script "Tai_Le". 3430 */ 3431 TAI_LE, 3432 3433 /** 3434 * Unicode script "Linear_B". 3435 */ 3436 LINEAR_B, 3437 3438 /** 3439 * Unicode script "Ugaritic". 3440 */ 3441 UGARITIC, 3442 3443 /** 3444 * Unicode script "Shavian". 3445 */ 3446 SHAVIAN, 3447 3448 /** 3449 * Unicode script "Osmanya". 3450 */ 3451 OSMANYA, 3452 3453 /** 3454 * Unicode script "Cypriot". 3455 */ 3456 CYPRIOT, 3457 3458 /** 3459 * Unicode script "Braille". 3460 */ 3461 BRAILLE, 3462 3463 /** 3464 * Unicode script "Buginese". 3465 */ 3466 BUGINESE, 3467 3468 /** 3469 * Unicode script "Coptic". 3470 */ 3471 COPTIC, 3472 3473 /** 3474 * Unicode script "New_Tai_Lue". 3475 */ 3476 NEW_TAI_LUE, 3477 3478 /** 3479 * Unicode script "Glagolitic". 3480 */ 3481 GLAGOLITIC, 3482 3483 /** 3484 * Unicode script "Tifinagh". 3485 */ 3486 TIFINAGH, 3487 3488 /** 3489 * Unicode script "Syloti_Nagri". 3490 */ 3491 SYLOTI_NAGRI, 3492 3493 /** 3494 * Unicode script "Old_Persian". 3495 */ 3496 OLD_PERSIAN, 3497 3498 /** 3499 * Unicode script "Kharoshthi". 3500 */ 3501 KHAROSHTHI, 3502 3503 /** 3504 * Unicode script "Balinese". 3505 */ 3506 BALINESE, 3507 3508 /** 3509 * Unicode script "Cuneiform". 3510 */ 3511 CUNEIFORM, 3512 3513 /** 3514 * Unicode script "Phoenician". 3515 */ 3516 PHOENICIAN, 3517 3518 /** 3519 * Unicode script "Phags_Pa". 3520 */ 3521 PHAGS_PA, 3522 3523 /** 3524 * Unicode script "Nko". 3525 */ 3526 NKO, 3527 3528 /** 3529 * Unicode script "Sundanese". 3530 */ 3531 SUNDANESE, 3532 3533 /** 3534 * Unicode script "Batak". 3535 */ 3536 BATAK, 3537 3538 /** 3539 * Unicode script "Lepcha". 3540 */ 3541 LEPCHA, 3542 3543 /** 3544 * Unicode script "Ol_Chiki". 3545 */ 3546 OL_CHIKI, 3547 3548 /** 3549 * Unicode script "Vai". 3550 */ 3551 VAI, 3552 3553 /** 3554 * Unicode script "Saurashtra". 3555 */ 3556 SAURASHTRA, 3557 3558 /** 3559 * Unicode script "Kayah_Li". 3560 */ 3561 KAYAH_LI, 3562 3563 /** 3564 * Unicode script "Rejang". 3565 */ 3566 REJANG, 3567 3568 /** 3569 * Unicode script "Lycian". 3570 */ 3571 LYCIAN, 3572 3573 /** 3574 * Unicode script "Carian". 3575 */ 3576 CARIAN, 3577 3578 /** 3579 * Unicode script "Lydian". 3580 */ 3581 LYDIAN, 3582 3583 /** 3584 * Unicode script "Cham". 3585 */ 3586 CHAM, 3587 3588 /** 3589 * Unicode script "Tai_Tham". 3590 */ 3591 TAI_THAM, 3592 3593 /** 3594 * Unicode script "Tai_Viet". 3595 */ 3596 TAI_VIET, 3597 3598 /** 3599 * Unicode script "Avestan". 3600 */ 3601 AVESTAN, 3602 3603 /** 3604 * Unicode script "Egyptian_Hieroglyphs". 3605 */ 3606 EGYPTIAN_HIEROGLYPHS, 3607 3608 /** 3609 * Unicode script "Samaritan". 3610 */ 3611 SAMARITAN, 3612 3613 /** 3614 * Unicode script "Mandaic". 3615 */ 3616 MANDAIC, 3617 3618 /** 3619 * Unicode script "Lisu". 3620 */ 3621 LISU, 3622 3623 /** 3624 * Unicode script "Bamum". 3625 */ 3626 BAMUM, 3627 3628 /** 3629 * Unicode script "Javanese". 3630 */ 3631 JAVANESE, 3632 3633 /** 3634 * Unicode script "Meetei_Mayek". 3635 */ 3636 MEETEI_MAYEK, 3637 3638 /** 3639 * Unicode script "Imperial_Aramaic". 3640 */ 3641 IMPERIAL_ARAMAIC, 3642 3643 /** 3644 * Unicode script "Old_South_Arabian". 3645 */ 3646 OLD_SOUTH_ARABIAN, 3647 3648 /** 3649 * Unicode script "Inscriptional_Parthian". 3650 */ 3651 INSCRIPTIONAL_PARTHIAN, 3652 3653 /** 3654 * Unicode script "Inscriptional_Pahlavi". 3655 */ 3656 INSCRIPTIONAL_PAHLAVI, 3657 3658 /** 3659 * Unicode script "Old_Turkic". 3660 */ 3661 OLD_TURKIC, 3662 3663 /** 3664 * Unicode script "Brahmi". 3665 */ 3666 BRAHMI, 3667 3668 /** 3669 * Unicode script "Kaithi". 3670 */ 3671 KAITHI, 3672 3673 /** 3674 * Unicode script "Meroitic Hieroglyphs". 3675 */ 3676 MEROITIC_HIEROGLYPHS, 3677 3678 /** 3679 * Unicode script "Meroitic Cursive". 3680 */ 3681 MEROITIC_CURSIVE, 3682 3683 /** 3684 * Unicode script "Sora Sompeng". 3685 */ 3686 SORA_SOMPENG, 3687 3688 /** 3689 * Unicode script "Chakma". 3690 */ 3691 CHAKMA, 3692 3693 /** 3694 * Unicode script "Sharada". 3695 */ 3696 SHARADA, 3697 3698 /** 3699 * Unicode script "Takri". 3700 */ 3701 TAKRI, 3702 3703 /** 3704 * Unicode script "Miao". 3705 */ 3706 MIAO, 3707 3708 /** 3709 * Unicode script "Unknown". 3710 */ 3711 UNKNOWN; 3712 3713 private static final int[] scriptStarts = { 3714 0x0000, // 0000..0040; COMMON 3715 0x0041, // 0041..005A; LATIN 3716 0x005B, // 005B..0060; COMMON 3717 0x0061, // 0061..007A; LATIN 3718 0x007B, // 007B..00A9; COMMON 3719 0x00AA, // 00AA..00AA; LATIN 3720 0x00AB, // 00AB..00B9; COMMON 3721 0x00BA, // 00BA..00BA; LATIN 3722 0x00BB, // 00BB..00BF; COMMON 3723 0x00C0, // 00C0..00D6; LATIN 3724 0x00D7, // 00D7..00D7; COMMON 3725 0x00D8, // 00D8..00F6; LATIN 3726 0x00F7, // 00F7..00F7; COMMON 3727 0x00F8, // 00F8..02B8; LATIN 3728 0x02B9, // 02B9..02DF; COMMON 3729 0x02E0, // 02E0..02E4; LATIN 3730 0x02E5, // 02E5..02E9; COMMON 3731 0x02EA, // 02EA..02EB; BOPOMOFO 3732 0x02EC, // 02EC..02FF; COMMON 3733 0x0300, // 0300..036F; INHERITED 3734 0x0370, // 0370..0373; GREEK 3735 0x0374, // 0374..0374; COMMON 3736 0x0375, // 0375..037D; GREEK 3737 0x037E, // 037E..0383; COMMON 3738 0x0384, // 0384..0384; GREEK 3739 0x0385, // 0385..0385; COMMON 3740 0x0386, // 0386..0386; GREEK 3741 0x0387, // 0387..0387; COMMON 3742 0x0388, // 0388..03E1; GREEK 3743 0x03E2, // 03E2..03EF; COPTIC 3744 0x03F0, // 03F0..03FF; GREEK 3745 0x0400, // 0400..0484; CYRILLIC 3746 0x0485, // 0485..0486; INHERITED 3747 0x0487, // 0487..0530; CYRILLIC 3748 0x0531, // 0531..0588; ARMENIAN 3749 0x0589, // 0589..0589; COMMON 3750 0x058A, // 058A..0590; ARMENIAN 3751 0x0591, // 0591..05FF; HEBREW 3752 0x0600, // 0600..060B; ARABIC 3753 0x060C, // 060C..060C; COMMON 3754 0x060D, // 060D..061A; ARABIC 3755 0x061B, // 061B..061D; COMMON 3756 0x061E, // 061E..061E; ARABIC 3757 0x061F, // 061F..061F; COMMON 3758 0x0620, // 0620..063F; ARABIC 3759 0x0640, // 0640..0640; COMMON 3760 0x0641, // 0641..064A; ARABIC 3761 0x064B, // 064B..0655; INHERITED 3762 0x0656, // 0656..065F; ARABIC 3763 0x0660, // 0660..0669; COMMON 3764 0x066A, // 066A..066F; ARABIC 3765 0x0670, // 0670..0670; INHERITED 3766 0x0671, // 0671..06DC; ARABIC 3767 0x06DD, // 06DD..06DD; COMMON 3768 0x06DE, // 06DE..06FF; ARABIC 3769 0x0700, // 0700..074F; SYRIAC 3770 0x0750, // 0750..077F; ARABIC 3771 0x0780, // 0780..07BF; THAANA 3772 0x07C0, // 07C0..07FF; NKO 3773 0x0800, // 0800..083F; SAMARITAN 3774 0x0840, // 0840..089F; MANDAIC 3775 0x08A0, // 08A0..08FF; ARABIC 3776 0x0900, // 0900..0950; DEVANAGARI 3777 0x0951, // 0951..0952; INHERITED 3778 0x0953, // 0953..0963; DEVANAGARI 3779 0x0964, // 0964..0965; COMMON 3780 0x0966, // 0966..0980; DEVANAGARI 3781 0x0981, // 0981..0A00; BENGALI 3782 0x0A01, // 0A01..0A80; GURMUKHI 3783 0x0A81, // 0A81..0B00; GUJARATI 3784 0x0B01, // 0B01..0B81; ORIYA 3785 0x0B82, // 0B82..0C00; TAMIL 3786 0x0C01, // 0C01..0C81; TELUGU 3787 0x0C82, // 0C82..0CF0; KANNADA 3788 0x0D02, // 0D02..0D81; MALAYALAM 3789 0x0D82, // 0D82..0E00; SINHALA 3790 0x0E01, // 0E01..0E3E; THAI 3791 0x0E3F, // 0E3F..0E3F; COMMON 3792 0x0E40, // 0E40..0E80; THAI 3793 0x0E81, // 0E81..0EFF; LAO 3794 0x0F00, // 0F00..0FD4; TIBETAN 3795 0x0FD5, // 0FD5..0FD8; COMMON 3796 0x0FD9, // 0FD9..0FFF; TIBETAN 3797 0x1000, // 1000..109F; MYANMAR 3798 0x10A0, // 10A0..10FA; GEORGIAN 3799 0x10FB, // 10FB..10FB; COMMON 3800 0x10FC, // 10FC..10FF; GEORGIAN 3801 0x1100, // 1100..11FF; HANGUL 3802 0x1200, // 1200..139F; ETHIOPIC 3803 0x13A0, // 13A0..13FF; CHEROKEE 3804 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3805 0x1680, // 1680..169F; OGHAM 3806 0x16A0, // 16A0..16EA; RUNIC 3807 0x16EB, // 16EB..16ED; COMMON 3808 0x16EE, // 16EE..16FF; RUNIC 3809 0x1700, // 1700..171F; TAGALOG 3810 0x1720, // 1720..1734; HANUNOO 3811 0x1735, // 1735..173F; COMMON 3812 0x1740, // 1740..175F; BUHID 3813 0x1760, // 1760..177F; TAGBANWA 3814 0x1780, // 1780..17FF; KHMER 3815 0x1800, // 1800..1801; MONGOLIAN 3816 0x1802, // 1802..1803; COMMON 3817 0x1804, // 1804..1804; MONGOLIAN 3818 0x1805, // 1805..1805; COMMON 3819 0x1806, // 1806..18AF; MONGOLIAN 3820 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3821 0x1900, // 1900..194F; LIMBU 3822 0x1950, // 1950..197F; TAI_LE 3823 0x1980, // 1980..19DF; NEW_TAI_LUE 3824 0x19E0, // 19E0..19FF; KHMER 3825 0x1A00, // 1A00..1A1F; BUGINESE 3826 0x1A20, // 1A20..1AFF; TAI_THAM 3827 0x1B00, // 1B00..1B7F; BALINESE 3828 0x1B80, // 1B80..1BBF; SUNDANESE 3829 0x1BC0, // 1BC0..1BFF; BATAK 3830 0x1C00, // 1C00..1C4F; LEPCHA 3831 0x1C50, // 1C50..1CBF; OL_CHIKI 3832 0x1CC0, // 1CC0..1CCF; SUNDANESE 3833 0x1CD0, // 1CD0..1CD2; INHERITED 3834 0x1CD3, // 1CD3..1CD3; COMMON 3835 0x1CD4, // 1CD4..1CE0; INHERITED 3836 0x1CE1, // 1CE1..1CE1; COMMON 3837 0x1CE2, // 1CE2..1CE8; INHERITED 3838 0x1CE9, // 1CE9..1CEC; COMMON 3839 0x1CED, // 1CED..1CED; INHERITED 3840 0x1CEE, // 1CEE..1CF3; COMMON 3841 0x1CF4, // 1CF4..1CF4; INHERITED 3842 0x1CF5, // 1CF5..1CFF; COMMON 3843 0x1D00, // 1D00..1D25; LATIN 3844 0x1D26, // 1D26..1D2A; GREEK 3845 0x1D2B, // 1D2B..1D2B; CYRILLIC 3846 0x1D2C, // 1D2C..1D5C; LATIN 3847 0x1D5D, // 1D5D..1D61; GREEK 3848 0x1D62, // 1D62..1D65; LATIN 3849 0x1D66, // 1D66..1D6A; GREEK 3850 0x1D6B, // 1D6B..1D77; LATIN 3851 0x1D78, // 1D78..1D78; CYRILLIC 3852 0x1D79, // 1D79..1DBE; LATIN 3853 0x1DBF, // 1DBF..1DBF; GREEK 3854 0x1DC0, // 1DC0..1DFF; INHERITED 3855 0x1E00, // 1E00..1EFF; LATIN 3856 0x1F00, // 1F00..1FFF; GREEK 3857 0x2000, // 2000..200B; COMMON 3858 0x200C, // 200C..200D; INHERITED 3859 0x200E, // 200E..2070; COMMON 3860 0x2071, // 2071..2073; LATIN 3861 0x2074, // 2074..207E; COMMON 3862 0x207F, // 207F..207F; LATIN 3863 0x2080, // 2080..208F; COMMON 3864 0x2090, // 2090..209F; LATIN 3865 0x20A0, // 20A0..20CF; COMMON 3866 0x20D0, // 20D0..20FF; INHERITED 3867 0x2100, // 2100..2125; COMMON 3868 0x2126, // 2126..2126; GREEK 3869 0x2127, // 2127..2129; COMMON 3870 0x212A, // 212A..212B; LATIN 3871 0x212C, // 212C..2131; COMMON 3872 0x2132, // 2132..2132; LATIN 3873 0x2133, // 2133..214D; COMMON 3874 0x214E, // 214E..214E; LATIN 3875 0x214F, // 214F..215F; COMMON 3876 0x2160, // 2160..2188; LATIN 3877 0x2189, // 2189..27FF; COMMON 3878 0x2800, // 2800..28FF; BRAILLE 3879 0x2900, // 2900..2BFF; COMMON 3880 0x2C00, // 2C00..2C5F; GLAGOLITIC 3881 0x2C60, // 2C60..2C7F; LATIN 3882 0x2C80, // 2C80..2CFF; COPTIC 3883 0x2D00, // 2D00..2D2F; GEORGIAN 3884 0x2D30, // 2D30..2D7F; TIFINAGH 3885 0x2D80, // 2D80..2DDF; ETHIOPIC 3886 0x2DE0, // 2DE0..2DFF; CYRILLIC 3887 0x2E00, // 2E00..2E7F; COMMON 3888 0x2E80, // 2E80..2FEF; HAN 3889 0x2FF0, // 2FF0..3004; COMMON 3890 0x3005, // 3005..3005; HAN 3891 0x3006, // 3006..3006; COMMON 3892 0x3007, // 3007..3007; HAN 3893 0x3008, // 3008..3020; COMMON 3894 0x3021, // 3021..3029; HAN 3895 0x302A, // 302A..302D; INHERITED 3896 0x302E, // 302E..302F; HANGUL 3897 0x3030, // 3030..3037; COMMON 3898 0x3038, // 3038..303B; HAN 3899 0x303C, // 303C..3040; COMMON 3900 0x3041, // 3041..3098; HIRAGANA 3901 0x3099, // 3099..309A; INHERITED 3902 0x309B, // 309B..309C; COMMON 3903 0x309D, // 309D..309F; HIRAGANA 3904 0x30A0, // 30A0..30A0; COMMON 3905 0x30A1, // 30A1..30FA; KATAKANA 3906 0x30FB, // 30FB..30FC; COMMON 3907 0x30FD, // 30FD..3104; KATAKANA 3908 0x3105, // 3105..3130; BOPOMOFO 3909 0x3131, // 3131..318F; HANGUL 3910 0x3190, // 3190..319F; COMMON 3911 0x31A0, // 31A0..31BF; BOPOMOFO 3912 0x31C0, // 31C0..31EF; COMMON 3913 0x31F0, // 31F0..31FF; KATAKANA 3914 0x3200, // 3200..321F; HANGUL 3915 0x3220, // 3220..325F; COMMON 3916 0x3260, // 3260..327E; HANGUL 3917 0x327F, // 327F..32CF; COMMON 3918 0x32D0, // 32D0..3357; KATAKANA 3919 0x3358, // 3358..33FF; COMMON 3920 0x3400, // 3400..4DBF; HAN 3921 0x4DC0, // 4DC0..4DFF; COMMON 3922 0x4E00, // 4E00..9FFF; HAN 3923 0xA000, // A000..A4CF; YI 3924 0xA4D0, // A4D0..A4FF; LISU 3925 0xA500, // A500..A63F; VAI 3926 0xA640, // A640..A69F; CYRILLIC 3927 0xA6A0, // A6A0..A6FF; BAMUM 3928 0xA700, // A700..A721; COMMON 3929 0xA722, // A722..A787; LATIN 3930 0xA788, // A788..A78A; COMMON 3931 0xA78B, // A78B..A7FF; LATIN 3932 0xA800, // A800..A82F; SYLOTI_NAGRI 3933 0xA830, // A830..A83F; COMMON 3934 0xA840, // A840..A87F; PHAGS_PA 3935 0xA880, // A880..A8DF; SAURASHTRA 3936 0xA8E0, // A8E0..A8FF; DEVANAGARI 3937 0xA900, // A900..A92F; KAYAH_LI 3938 0xA930, // A930..A95F; REJANG 3939 0xA960, // A960..A97F; HANGUL 3940 0xA980, // A980..A9FF; JAVANESE 3941 0xAA00, // AA00..AA5F; CHAM 3942 0xAA60, // AA60..AA7F; MYANMAR 3943 0xAA80, // AA80..AADF; TAI_VIET 3944 0xAAE0, // AAE0..AB00; MEETEI_MAYEK 3945 0xAB01, // AB01..ABBF; ETHIOPIC 3946 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3947 0xAC00, // AC00..D7FB; HANGUL 3948 0xD7FC, // D7FC..F8FF; UNKNOWN 3949 0xF900, // F900..FAFF; HAN 3950 0xFB00, // FB00..FB12; LATIN 3951 0xFB13, // FB13..FB1C; ARMENIAN 3952 0xFB1D, // FB1D..FB4F; HEBREW 3953 0xFB50, // FB50..FD3D; ARABIC 3954 0xFD3E, // FD3E..FD4F; COMMON 3955 0xFD50, // FD50..FDFC; ARABIC 3956 0xFDFD, // FDFD..FDFF; COMMON 3957 0xFE00, // FE00..FE0F; INHERITED 3958 0xFE10, // FE10..FE1F; COMMON 3959 0xFE20, // FE20..FE2F; INHERITED 3960 0xFE30, // FE30..FE6F; COMMON 3961 0xFE70, // FE70..FEFE; ARABIC 3962 0xFEFF, // FEFF..FF20; COMMON 3963 0xFF21, // FF21..FF3A; LATIN 3964 0xFF3B, // FF3B..FF40; COMMON 3965 0xFF41, // FF41..FF5A; LATIN 3966 0xFF5B, // FF5B..FF65; COMMON 3967 0xFF66, // FF66..FF6F; KATAKANA 3968 0xFF70, // FF70..FF70; COMMON 3969 0xFF71, // FF71..FF9D; KATAKANA 3970 0xFF9E, // FF9E..FF9F; COMMON 3971 0xFFA0, // FFA0..FFDF; HANGUL 3972 0xFFE0, // FFE0..FFFF; COMMON 3973 0x10000, // 10000..100FF; LINEAR_B 3974 0x10100, // 10100..1013F; COMMON 3975 0x10140, // 10140..1018F; GREEK 3976 0x10190, // 10190..101FC; COMMON 3977 0x101FD, // 101FD..1027F; INHERITED 3978 0x10280, // 10280..1029F; LYCIAN 3979 0x102A0, // 102A0..102FF; CARIAN 3980 0x10300, // 10300..1032F; OLD_ITALIC 3981 0x10330, // 10330..1037F; GOTHIC 3982 0x10380, // 10380..1039F; UGARITIC 3983 0x103A0, // 103A0..103FF; OLD_PERSIAN 3984 0x10400, // 10400..1044F; DESERET 3985 0x10450, // 10450..1047F; SHAVIAN 3986 0x10480, // 10480..107FF; OSMANYA 3987 0x10800, // 10800..1083F; CYPRIOT 3988 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3989 0x10900, // 10900..1091F; PHOENICIAN 3990 0x10920, // 10920..1097F; LYDIAN 3991 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 3992 0x109A0, // 109A0..109FF; MEROITIC_CURSIVE 3993 0x10A00, // 10A00..10A5F; KHAROSHTHI 3994 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3995 0x10B00, // 10B00..10B3F; AVESTAN 3996 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 3997 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 3998 0x10C00, // 10C00..10E5F; OLD_TURKIC 3999 0x10E60, // 10E60..10FFF; ARABIC 4000 0x11000, // 11000..1107F; BRAHMI 4001 0x11080, // 11080..110CF; KAITHI 4002 0x110D0, // 110D0..110FF; SORA_SOMPENG 4003 0x11100, // 11100..1117F; CHAKMA 4004 0x11180, // 11180..1167F; SHARADA 4005 0x11680, // 11680..116CF; TAKRI 4006 0x12000, // 12000..12FFF; CUNEIFORM 4007 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 4008 0x16800, // 16800..16A38; BAMUM 4009 0x16F00, // 16F00..16F9F; MIAO 4010 0x1B000, // 1B000..1B000; KATAKANA 4011 0x1B001, // 1B001..1CFFF; HIRAGANA 4012 0x1D000, // 1D000..1D166; COMMON 4013 0x1D167, // 1D167..1D169; INHERITED 4014 0x1D16A, // 1D16A..1D17A; COMMON 4015 0x1D17B, // 1D17B..1D182; INHERITED 4016 0x1D183, // 1D183..1D184; COMMON 4017 0x1D185, // 1D185..1D18B; INHERITED 4018 0x1D18C, // 1D18C..1D1A9; COMMON 4019 0x1D1AA, // 1D1AA..1D1AD; INHERITED 4020 0x1D1AE, // 1D1AE..1D1FF; COMMON 4021 0x1D200, // 1D200..1D2FF; GREEK 4022 0x1D300, // 1D300..1EDFF; COMMON 4023 0x1EE00, // 1EE00..1EFFF; ARABIC 4024 0x1F000, // 1F000..1F1FF; COMMON 4025 0x1F200, // 1F200..1F200; HIRAGANA 4026 0x1F201, // 1F210..1FFFF; COMMON 4027 0x20000, // 20000..E0000; HAN 4028 0xE0001, // E0001..E00FF; COMMON 4029 0xE0100, // E0100..E01EF; INHERITED 4030 0xE01F0 // E01F0..10FFFF; UNKNOWN 4031 4032 }; 4033 4034 private static final UnicodeScript[] scripts = { 4035 COMMON, 4036 LATIN, 4037 COMMON, 4038 LATIN, 4039 COMMON, 4040 LATIN, 4041 COMMON, 4042 LATIN, 4043 COMMON, 4044 LATIN, 4045 COMMON, 4046 LATIN, 4047 COMMON, 4048 LATIN, 4049 COMMON, 4050 LATIN, 4051 COMMON, 4052 BOPOMOFO, 4053 COMMON, 4054 INHERITED, 4055 GREEK, 4056 COMMON, 4057 GREEK, 4058 COMMON, 4059 GREEK, 4060 COMMON, 4061 GREEK, 4062 COMMON, 4063 GREEK, 4064 COPTIC, 4065 GREEK, 4066 CYRILLIC, 4067 INHERITED, 4068 CYRILLIC, 4069 ARMENIAN, 4070 COMMON, 4071 ARMENIAN, 4072 HEBREW, 4073 ARABIC, 4074 COMMON, 4075 ARABIC, 4076 COMMON, 4077 ARABIC, 4078 COMMON, 4079 ARABIC, 4080 COMMON, 4081 ARABIC, 4082 INHERITED, 4083 ARABIC, 4084 COMMON, 4085 ARABIC, 4086 INHERITED, 4087 ARABIC, 4088 COMMON, 4089 ARABIC, 4090 SYRIAC, 4091 ARABIC, 4092 THAANA, 4093 NKO, 4094 SAMARITAN, 4095 MANDAIC, 4096 ARABIC, 4097 DEVANAGARI, 4098 INHERITED, 4099 DEVANAGARI, 4100 COMMON, 4101 DEVANAGARI, 4102 BENGALI, 4103 GURMUKHI, 4104 GUJARATI, 4105 ORIYA, 4106 TAMIL, 4107 TELUGU, 4108 KANNADA, 4109 MALAYALAM, 4110 SINHALA, 4111 THAI, 4112 COMMON, 4113 THAI, 4114 LAO, 4115 TIBETAN, 4116 COMMON, 4117 TIBETAN, 4118 MYANMAR, 4119 GEORGIAN, 4120 COMMON, 4121 GEORGIAN, 4122 HANGUL, 4123 ETHIOPIC, 4124 CHEROKEE, 4125 CANADIAN_ABORIGINAL, 4126 OGHAM, 4127 RUNIC, 4128 COMMON, 4129 RUNIC, 4130 TAGALOG, 4131 HANUNOO, 4132 COMMON, 4133 BUHID, 4134 TAGBANWA, 4135 KHMER, 4136 MONGOLIAN, 4137 COMMON, 4138 MONGOLIAN, 4139 COMMON, 4140 MONGOLIAN, 4141 CANADIAN_ABORIGINAL, 4142 LIMBU, 4143 TAI_LE, 4144 NEW_TAI_LUE, 4145 KHMER, 4146 BUGINESE, 4147 TAI_THAM, 4148 BALINESE, 4149 SUNDANESE, 4150 BATAK, 4151 LEPCHA, 4152 OL_CHIKI, 4153 SUNDANESE, 4154 INHERITED, 4155 COMMON, 4156 INHERITED, 4157 COMMON, 4158 INHERITED, 4159 COMMON, 4160 INHERITED, 4161 COMMON, 4162 INHERITED, 4163 COMMON, 4164 LATIN, 4165 GREEK, 4166 CYRILLIC, 4167 LATIN, 4168 GREEK, 4169 LATIN, 4170 GREEK, 4171 LATIN, 4172 CYRILLIC, 4173 LATIN, 4174 GREEK, 4175 INHERITED, 4176 LATIN, 4177 GREEK, 4178 COMMON, 4179 INHERITED, 4180 COMMON, 4181 LATIN, 4182 COMMON, 4183 LATIN, 4184 COMMON, 4185 LATIN, 4186 COMMON, 4187 INHERITED, 4188 COMMON, 4189 GREEK, 4190 COMMON, 4191 LATIN, 4192 COMMON, 4193 LATIN, 4194 COMMON, 4195 LATIN, 4196 COMMON, 4197 LATIN, 4198 COMMON, 4199 BRAILLE, 4200 COMMON, 4201 GLAGOLITIC, 4202 LATIN, 4203 COPTIC, 4204 GEORGIAN, 4205 TIFINAGH, 4206 ETHIOPIC, 4207 CYRILLIC, 4208 COMMON, 4209 HAN, 4210 COMMON, 4211 HAN, 4212 COMMON, 4213 HAN, 4214 COMMON, 4215 HAN, 4216 INHERITED, 4217 HANGUL, 4218 COMMON, 4219 HAN, 4220 COMMON, 4221 HIRAGANA, 4222 INHERITED, 4223 COMMON, 4224 HIRAGANA, 4225 COMMON, 4226 KATAKANA, 4227 COMMON, 4228 KATAKANA, 4229 BOPOMOFO, 4230 HANGUL, 4231 COMMON, 4232 BOPOMOFO, 4233 COMMON, 4234 KATAKANA, 4235 HANGUL, 4236 COMMON, 4237 HANGUL, 4238 COMMON, 4239 KATAKANA, 4240 COMMON, 4241 HAN, 4242 COMMON, 4243 HAN, 4244 YI, 4245 LISU, 4246 VAI, 4247 CYRILLIC, 4248 BAMUM, 4249 COMMON, 4250 LATIN, 4251 COMMON, 4252 LATIN, 4253 SYLOTI_NAGRI, 4254 COMMON, 4255 PHAGS_PA, 4256 SAURASHTRA, 4257 DEVANAGARI, 4258 KAYAH_LI, 4259 REJANG, 4260 HANGUL, 4261 JAVANESE, 4262 CHAM, 4263 MYANMAR, 4264 TAI_VIET, 4265 MEETEI_MAYEK, 4266 ETHIOPIC, 4267 MEETEI_MAYEK, 4268 HANGUL, 4269 UNKNOWN , 4270 HAN, 4271 LATIN, 4272 ARMENIAN, 4273 HEBREW, 4274 ARABIC, 4275 COMMON, 4276 ARABIC, 4277 COMMON, 4278 INHERITED, 4279 COMMON, 4280 INHERITED, 4281 COMMON, 4282 ARABIC, 4283 COMMON, 4284 LATIN, 4285 COMMON, 4286 LATIN, 4287 COMMON, 4288 KATAKANA, 4289 COMMON, 4290 KATAKANA, 4291 COMMON, 4292 HANGUL, 4293 COMMON, 4294 LINEAR_B, 4295 COMMON, 4296 GREEK, 4297 COMMON, 4298 INHERITED, 4299 LYCIAN, 4300 CARIAN, 4301 OLD_ITALIC, 4302 GOTHIC, 4303 UGARITIC, 4304 OLD_PERSIAN, 4305 DESERET, 4306 SHAVIAN, 4307 OSMANYA, 4308 CYPRIOT, 4309 IMPERIAL_ARAMAIC, 4310 PHOENICIAN, 4311 LYDIAN, 4312 MEROITIC_HIEROGLYPHS, 4313 MEROITIC_CURSIVE, 4314 KHAROSHTHI, 4315 OLD_SOUTH_ARABIAN, 4316 AVESTAN, 4317 INSCRIPTIONAL_PARTHIAN, 4318 INSCRIPTIONAL_PAHLAVI, 4319 OLD_TURKIC, 4320 ARABIC, 4321 BRAHMI, 4322 KAITHI, 4323 SORA_SOMPENG, 4324 CHAKMA, 4325 SHARADA, 4326 TAKRI, 4327 CUNEIFORM, 4328 EGYPTIAN_HIEROGLYPHS, 4329 BAMUM, 4330 MIAO, 4331 KATAKANA, 4332 HIRAGANA, 4333 COMMON, 4334 INHERITED, 4335 COMMON, 4336 INHERITED, 4337 COMMON, 4338 INHERITED, 4339 COMMON, 4340 INHERITED, 4341 COMMON, 4342 GREEK, 4343 COMMON, 4344 ARABIC, 4345 COMMON, 4346 HIRAGANA, 4347 COMMON, 4348 HAN, 4349 COMMON, 4350 INHERITED, 4351 UNKNOWN 4352 }; 4353 4354 private static HashMap<String, Character.UnicodeScript> aliases; 4355 static { 4356 aliases = new HashMap<>(128); 4357 aliases.put("ARAB", ARABIC); 4358 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4359 aliases.put("ARMN", ARMENIAN); 4360 aliases.put("AVST", AVESTAN); 4361 aliases.put("BALI", BALINESE); 4362 aliases.put("BAMU", BAMUM); 4363 aliases.put("BATK", BATAK); 4364 aliases.put("BENG", BENGALI); 4365 aliases.put("BOPO", BOPOMOFO); 4366 aliases.put("BRAI", BRAILLE); 4367 aliases.put("BRAH", BRAHMI); 4368 aliases.put("BUGI", BUGINESE); 4369 aliases.put("BUHD", BUHID); 4370 aliases.put("CAKM", CHAKMA); 4371 aliases.put("CANS", CANADIAN_ABORIGINAL); 4372 aliases.put("CARI", CARIAN); 4373 aliases.put("CHAM", CHAM); 4374 aliases.put("CHER", CHEROKEE); 4375 aliases.put("COPT", COPTIC); 4376 aliases.put("CPRT", CYPRIOT); 4377 aliases.put("CYRL", CYRILLIC); 4378 aliases.put("DEVA", DEVANAGARI); 4379 aliases.put("DSRT", DESERET); 4380 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4381 aliases.put("ETHI", ETHIOPIC); 4382 aliases.put("GEOR", GEORGIAN); 4383 aliases.put("GLAG", GLAGOLITIC); 4384 aliases.put("GOTH", GOTHIC); 4385 aliases.put("GREK", GREEK); 4386 aliases.put("GUJR", GUJARATI); 4387 aliases.put("GURU", GURMUKHI); 4388 aliases.put("HANG", HANGUL); 4389 aliases.put("HANI", HAN); 4390 aliases.put("HANO", HANUNOO); 4391 aliases.put("HEBR", HEBREW); 4392 aliases.put("HIRA", HIRAGANA); 4393 // it appears we don't have the KATAKANA_OR_HIRAGANA 4394 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4395 aliases.put("ITAL", OLD_ITALIC); 4396 aliases.put("JAVA", JAVANESE); 4397 aliases.put("KALI", KAYAH_LI); 4398 aliases.put("KANA", KATAKANA); 4399 aliases.put("KHAR", KHAROSHTHI); 4400 aliases.put("KHMR", KHMER); 4401 aliases.put("KNDA", KANNADA); 4402 aliases.put("KTHI", KAITHI); 4403 aliases.put("LANA", TAI_THAM); 4404 aliases.put("LAOO", LAO); 4405 aliases.put("LATN", LATIN); 4406 aliases.put("LEPC", LEPCHA); 4407 aliases.put("LIMB", LIMBU); 4408 aliases.put("LINB", LINEAR_B); 4409 aliases.put("LISU", LISU); 4410 aliases.put("LYCI", LYCIAN); 4411 aliases.put("LYDI", LYDIAN); 4412 aliases.put("MAND", MANDAIC); 4413 aliases.put("MERC", MEROITIC_CURSIVE); 4414 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 4415 aliases.put("MLYM", MALAYALAM); 4416 aliases.put("MONG", MONGOLIAN); 4417 aliases.put("MTEI", MEETEI_MAYEK); 4418 aliases.put("MYMR", MYANMAR); 4419 aliases.put("NKOO", NKO); 4420 aliases.put("OGAM", OGHAM); 4421 aliases.put("OLCK", OL_CHIKI); 4422 aliases.put("ORKH", OLD_TURKIC); 4423 aliases.put("ORYA", ORIYA); 4424 aliases.put("OSMA", OSMANYA); 4425 aliases.put("PHAG", PHAGS_PA); 4426 aliases.put("PLRD", MIAO); 4427 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4428 aliases.put("PHNX", PHOENICIAN); 4429 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4430 aliases.put("RJNG", REJANG); 4431 aliases.put("RUNR", RUNIC); 4432 aliases.put("SAMR", SAMARITAN); 4433 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4434 aliases.put("SAUR", SAURASHTRA); 4435 aliases.put("SHAW", SHAVIAN); 4436 aliases.put("SHRD", SHARADA); 4437 aliases.put("SINH", SINHALA); 4438 aliases.put("SORA", SORA_SOMPENG); 4439 aliases.put("SUND", SUNDANESE); 4440 aliases.put("SYLO", SYLOTI_NAGRI); 4441 aliases.put("SYRC", SYRIAC); 4442 aliases.put("TAGB", TAGBANWA); 4443 aliases.put("TALE", TAI_LE); 4444 aliases.put("TAKR", TAKRI); 4445 aliases.put("TALU", NEW_TAI_LUE); 4446 aliases.put("TAML", TAMIL); 4447 aliases.put("TAVT", TAI_VIET); 4448 aliases.put("TELU", TELUGU); 4449 aliases.put("TFNG", TIFINAGH); 4450 aliases.put("TGLG", TAGALOG); 4451 aliases.put("THAA", THAANA); 4452 aliases.put("THAI", THAI); 4453 aliases.put("TIBT", TIBETAN); 4454 aliases.put("UGAR", UGARITIC); 4455 aliases.put("VAII", VAI); 4456 aliases.put("XPEO", OLD_PERSIAN); 4457 aliases.put("XSUX", CUNEIFORM); 4458 aliases.put("YIII", YI); 4459 aliases.put("ZINH", INHERITED); 4460 aliases.put("ZYYY", COMMON); 4461 aliases.put("ZZZZ", UNKNOWN); 4462 } 4463 4464 /** 4465 * Returns the enum constant representing the Unicode script of which 4466 * the given character (Unicode code point) is assigned to. 4467 * 4468 * @param codePoint the character (Unicode code point) in question. 4469 * @return The {@code UnicodeScript} constant representing the 4470 * Unicode script of which this character is assigned to. 4471 * 4472 * @exception IllegalArgumentException if the specified 4473 * {@code codePoint} is an invalid Unicode code point. 4474 * @see Character#isValidCodePoint(int) 4475 * 4476 */ 4477 public static UnicodeScript of(int codePoint) { 4478 if (!isValidCodePoint(codePoint)) 4479 throw new IllegalArgumentException(); 4480 int type = getType(codePoint); 4481 // leave SURROGATE and PRIVATE_USE for table lookup 4482 if (type == UNASSIGNED) 4483 return UNKNOWN; 4484 int index = Arrays.binarySearch(scriptStarts, codePoint); 4485 if (index < 0) 4486 index = -index - 2; 4487 return scripts[index]; 4488 } 4489 4490 /** 4491 * Returns the UnicodeScript constant with the given Unicode script 4492 * name or the script name alias. Script names and their aliases are 4493 * determined by The Unicode Standard. The files Scripts<version>.txt 4494 * and PropertyValueAliases<version>.txt define script names 4495 * and the script name aliases for a particular version of the 4496 * standard. The {@link Character} class specifies the version of 4497 * the standard that it supports. 4498 * <p> 4499 * Character case is ignored for all of the valid script names. 4500 * The en_US locale's case mapping rules are used to provide 4501 * case-insensitive string comparisons for script name validation. 4502 * 4503 * @param scriptName A {@code UnicodeScript} name. 4504 * @return The {@code UnicodeScript} constant identified 4505 * by {@code scriptName} 4506 * @throws IllegalArgumentException if {@code scriptName} is an 4507 * invalid name 4508 * @throws NullPointerException if {@code scriptName} is null 4509 */ 4510 public static final UnicodeScript forName(String scriptName) { 4511 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4512 //.replace(' ', '_')); 4513 UnicodeScript sc = aliases.get(scriptName); 4514 if (sc != null) 4515 return sc; 4516 return valueOf(scriptName); 4517 } 4518 } 4519 4520 /** 4521 * The value of the {@code Character}. 4522 * 4523 * @serial 4524 */ 4525 private final char value; 4526 4527 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4528 private static final long serialVersionUID = 3786198910865385080L; 4529 4530 /** 4531 * Constructs a newly allocated {@code Character} object that 4532 * represents the specified {@code char} value. 4533 * 4534 * @param value the value to be represented by the 4535 * {@code Character} object. 4536 */ 4537 public Character(char value) { 4538 this.value = value; 4539 } 4540 4541 private static class CharacterCache { 4542 private CharacterCache(){} 4543 4544 static final Character cache[] = new Character[127 + 1]; 4545 4546 static { 4547 for (int i = 0; i < cache.length; i++) 4548 cache[i] = new Character((char)i); 4549 } 4550 } 4551 4552 /** 4553 * Returns a <tt>Character</tt> instance representing the specified 4554 * <tt>char</tt> value. 4555 * If a new <tt>Character</tt> instance is not required, this method 4556 * should generally be used in preference to the constructor 4557 * {@link #Character(char)}, as this method is likely to yield 4558 * significantly better space and time performance by caching 4559 * frequently requested values. 4560 * 4561 * This method will always cache values in the range {@code 4562 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 4563 * cache other values outside of this range. 4564 * 4565 * @param c a char value. 4566 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4567 * @since 1.5 4568 */ 4569 public static Character valueOf(char c) { 4570 if (c <= 127) { // must cache 4571 return CharacterCache.cache[(int)c]; 4572 } 4573 return new Character(c); 4574 } 4575 4576 /** 4577 * Returns the value of this {@code Character} object. 4578 * @return the primitive {@code char} value represented by 4579 * this object. 4580 */ 4581 public char charValue() { 4582 return value; 4583 } 4584 4585 /** 4586 * Returns a hash code for this {@code Character}; equal to the result 4587 * of invoking {@code charValue()}. 4588 * 4589 * @return a hash code value for this {@code Character} 4590 */ 4591 @Override 4592 public int hashCode() { 4593 return Character.hashCode(value); 4594 } 4595 4596 /** 4597 * Returns a hash code for a {@code char} value; compatible with 4598 * {@code Character.hashCode()}. 4599 * 4600 * @since 1.8 4601 * 4602 * @param value The {@code char} for which to return a hash code. 4603 * @return a hash code value for a {@code char} value. 4604 */ 4605 public static int hashCode(char value) { 4606 return (int)value; 4607 } 4608 4609 /** 4610 * Compares this object against the specified object. 4611 * The result is {@code true} if and only if the argument is not 4612 * {@code null} and is a {@code Character} object that 4613 * represents the same {@code char} value as this object. 4614 * 4615 * @param obj the object to compare with. 4616 * @return {@code true} if the objects are the same; 4617 * {@code false} otherwise. 4618 */ 4619 public boolean equals(Object obj) { 4620 if (obj instanceof Character) { 4621 return value == ((Character)obj).charValue(); 4622 } 4623 return false; 4624 } 4625 4626 /** 4627 * Returns a {@code String} object representing this 4628 * {@code Character}'s value. The result is a string of 4629 * length 1 whose sole component is the primitive 4630 * {@code char} value represented by this 4631 * {@code Character} object. 4632 * 4633 * @return a string representation of this object. 4634 */ 4635 public String toString() { 4636 char buf[] = {value}; 4637 return String.valueOf(buf); 4638 } 4639 4640 /** 4641 * Returns a {@code String} object representing the 4642 * specified {@code char}. The result is a string of length 4643 * 1 consisting solely of the specified {@code char}. 4644 * 4645 * @param c the {@code char} to be converted 4646 * @return the string representation of the specified {@code char} 4647 * @since 1.4 4648 */ 4649 public static String toString(char c) { 4650 return String.valueOf(c); 4651 } 4652 4653 /** 4654 * Determines whether the specified code point is a valid 4655 * <a href="http://www.unicode.org/glossary/#code_point"> 4656 * Unicode code point value</a>. 4657 * 4658 * @param codePoint the Unicode code point to be tested 4659 * @return {@code true} if the specified code point value is between 4660 * {@link #MIN_CODE_POINT} and 4661 * {@link #MAX_CODE_POINT} inclusive; 4662 * {@code false} otherwise. 4663 * @since 1.5 4664 */ 4665 public static boolean isValidCodePoint(int codePoint) { 4666 // Optimized form of: 4667 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4668 int plane = codePoint >>> 16; 4669 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4670 } 4671 4672 /** 4673 * Determines whether the specified character (Unicode code point) 4674 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4675 * Such code points can be represented using a single {@code char}. 4676 * 4677 * @param codePoint the character (Unicode code point) to be tested 4678 * @return {@code true} if the specified code point is between 4679 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4680 * {@code false} otherwise. 4681 * @since 1.7 4682 */ 4683 public static boolean isBmpCodePoint(int codePoint) { 4684 return codePoint >>> 16 == 0; 4685 // Optimized form of: 4686 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4687 // We consistently use logical shift (>>>) to facilitate 4688 // additional runtime optimizations. 4689 } 4690 4691 /** 4692 * Determines whether the specified character (Unicode code point) 4693 * is in the <a href="#supplementary">supplementary character</a> range. 4694 * 4695 * @param codePoint the character (Unicode code point) to be tested 4696 * @return {@code true} if the specified code point is between 4697 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4698 * {@link #MAX_CODE_POINT} inclusive; 4699 * {@code false} otherwise. 4700 * @since 1.5 4701 */ 4702 public static boolean isSupplementaryCodePoint(int codePoint) { 4703 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4704 && codePoint < MAX_CODE_POINT + 1; 4705 } 4706 4707 /** 4708 * Determines if the given {@code char} value is a 4709 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4710 * Unicode high-surrogate code unit</a> 4711 * (also known as <i>leading-surrogate code unit</i>). 4712 * 4713 * <p>Such values do not represent characters by themselves, 4714 * but are used in the representation of 4715 * <a href="#supplementary">supplementary characters</a> 4716 * in the UTF-16 encoding. 4717 * 4718 * @param ch the {@code char} value to be tested. 4719 * @return {@code true} if the {@code char} value is between 4720 * {@link #MIN_HIGH_SURROGATE} and 4721 * {@link #MAX_HIGH_SURROGATE} inclusive; 4722 * {@code false} otherwise. 4723 * @see Character#isLowSurrogate(char) 4724 * @see Character.UnicodeBlock#of(int) 4725 * @since 1.5 4726 */ 4727 public static boolean isHighSurrogate(char ch) { 4728 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4729 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4730 } 4731 4732 /** 4733 * Determines if the given {@code char} value is a 4734 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4735 * Unicode low-surrogate code unit</a> 4736 * (also known as <i>trailing-surrogate code unit</i>). 4737 * 4738 * <p>Such values do not represent characters by themselves, 4739 * but are used in the representation of 4740 * <a href="#supplementary">supplementary characters</a> 4741 * in the UTF-16 encoding. 4742 * 4743 * @param ch the {@code char} value to be tested. 4744 * @return {@code true} if the {@code char} value is between 4745 * {@link #MIN_LOW_SURROGATE} and 4746 * {@link #MAX_LOW_SURROGATE} inclusive; 4747 * {@code false} otherwise. 4748 * @see Character#isHighSurrogate(char) 4749 * @since 1.5 4750 */ 4751 public static boolean isLowSurrogate(char ch) { 4752 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4753 } 4754 4755 /** 4756 * Determines if the given {@code char} value is a Unicode 4757 * <i>surrogate code unit</i>. 4758 * 4759 * <p>Such values do not represent characters by themselves, 4760 * but are used in the representation of 4761 * <a href="#supplementary">supplementary characters</a> 4762 * in the UTF-16 encoding. 4763 * 4764 * <p>A char value is a surrogate code unit if and only if it is either 4765 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4766 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4767 * 4768 * @param ch the {@code char} value to be tested. 4769 * @return {@code true} if the {@code char} value is between 4770 * {@link #MIN_SURROGATE} and 4771 * {@link #MAX_SURROGATE} inclusive; 4772 * {@code false} otherwise. 4773 * @since 1.7 4774 */ 4775 public static boolean isSurrogate(char ch) { 4776 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4777 } 4778 4779 /** 4780 * Determines whether the specified pair of {@code char} 4781 * values is a valid 4782 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4783 * Unicode surrogate pair</a>. 4784 4785 * <p>This method is equivalent to the expression: 4786 * <blockquote><pre>{@code 4787 * isHighSurrogate(high) && isLowSurrogate(low) 4788 * }</pre></blockquote> 4789 * 4790 * @param high the high-surrogate code value to be tested 4791 * @param low the low-surrogate code value to be tested 4792 * @return {@code true} if the specified high and 4793 * low-surrogate code values represent a valid surrogate pair; 4794 * {@code false} otherwise. 4795 * @since 1.5 4796 */ 4797 public static boolean isSurrogatePair(char high, char low) { 4798 return isHighSurrogate(high) && isLowSurrogate(low); 4799 } 4800 4801 /** 4802 * Determines the number of {@code char} values needed to 4803 * represent the specified character (Unicode code point). If the 4804 * specified character is equal to or greater than 0x10000, then 4805 * the method returns 2. Otherwise, the method returns 1. 4806 * 4807 * <p>This method doesn't validate the specified character to be a 4808 * valid Unicode code point. The caller must validate the 4809 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4810 * if necessary. 4811 * 4812 * @param codePoint the character (Unicode code point) to be tested. 4813 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4814 * @see Character#isSupplementaryCodePoint(int) 4815 * @since 1.5 4816 */ 4817 public static int charCount(int codePoint) { 4818 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4819 } 4820 4821 /** 4822 * Converts the specified surrogate pair to its supplementary code 4823 * point value. This method does not validate the specified 4824 * surrogate pair. The caller must validate it using {@link 4825 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4826 * 4827 * @param high the high-surrogate code unit 4828 * @param low the low-surrogate code unit 4829 * @return the supplementary code point composed from the 4830 * specified surrogate pair. 4831 * @since 1.5 4832 */ 4833 public static int toCodePoint(char high, char low) { 4834 // Optimized form of: 4835 // return ((high - MIN_HIGH_SURROGATE) << 10) 4836 // + (low - MIN_LOW_SURROGATE) 4837 // + MIN_SUPPLEMENTARY_CODE_POINT; 4838 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4839 - (MIN_HIGH_SURROGATE << 10) 4840 - MIN_LOW_SURROGATE); 4841 } 4842 4843 /** 4844 * Returns the code point at the given index of the 4845 * {@code CharSequence}. If the {@code char} value at 4846 * the given index in the {@code CharSequence} is in the 4847 * high-surrogate range, the following index is less than the 4848 * length of the {@code CharSequence}, and the 4849 * {@code char} value at the following index is in the 4850 * low-surrogate range, then the supplementary code point 4851 * corresponding to this surrogate pair is returned. Otherwise, 4852 * the {@code char} value at the given index is returned. 4853 * 4854 * @param seq a sequence of {@code char} values (Unicode code 4855 * units) 4856 * @param index the index to the {@code char} values (Unicode 4857 * code units) in {@code seq} to be converted 4858 * @return the Unicode code point at the given index 4859 * @exception NullPointerException if {@code seq} is null. 4860 * @exception IndexOutOfBoundsException if the value 4861 * {@code index} is negative or not less than 4862 * {@link CharSequence#length() seq.length()}. 4863 * @since 1.5 4864 */ 4865 public static int codePointAt(CharSequence seq, int index) { 4866 char c1 = seq.charAt(index); 4867 if (isHighSurrogate(c1) && ++index < seq.length()) { 4868 char c2 = seq.charAt(index); 4869 if (isLowSurrogate(c2)) { 4870 return toCodePoint(c1, c2); 4871 } 4872 } 4873 return c1; 4874 } 4875 4876 /** 4877 * Returns the code point at the given index of the 4878 * {@code char} array. If the {@code char} value at 4879 * the given index in the {@code char} array is in the 4880 * high-surrogate range, the following index is less than the 4881 * length of the {@code char} array, and the 4882 * {@code char} value at the following index is in the 4883 * low-surrogate range, then the supplementary code point 4884 * corresponding to this surrogate pair is returned. Otherwise, 4885 * the {@code char} value at the given index is returned. 4886 * 4887 * @param a the {@code char} array 4888 * @param index the index to the {@code char} values (Unicode 4889 * code units) in the {@code char} array to be converted 4890 * @return the Unicode code point at the given index 4891 * @exception NullPointerException if {@code a} is null. 4892 * @exception IndexOutOfBoundsException if the value 4893 * {@code index} is negative or not less than 4894 * the length of the {@code char} array. 4895 * @since 1.5 4896 */ 4897 public static int codePointAt(char[] a, int index) { 4898 return codePointAtImpl(a, index, a.length); 4899 } 4900 4901 /** 4902 * Returns the code point at the given index of the 4903 * {@code char} array, where only array elements with 4904 * {@code index} less than {@code limit} can be used. If 4905 * the {@code char} value at the given index in the 4906 * {@code char} array is in the high-surrogate range, the 4907 * following index is less than the {@code limit}, and the 4908 * {@code char} value at the following index is in the 4909 * low-surrogate range, then the supplementary code point 4910 * corresponding to this surrogate pair is returned. Otherwise, 4911 * the {@code char} value at the given index is returned. 4912 * 4913 * @param a the {@code char} array 4914 * @param index the index to the {@code char} values (Unicode 4915 * code units) in the {@code char} array to be converted 4916 * @param limit the index after the last array element that 4917 * can be used in the {@code char} array 4918 * @return the Unicode code point at the given index 4919 * @exception NullPointerException if {@code a} is null. 4920 * @exception IndexOutOfBoundsException if the {@code index} 4921 * argument is negative or not less than the {@code limit} 4922 * argument, or if the {@code limit} argument is negative or 4923 * greater than the length of the {@code char} array. 4924 * @since 1.5 4925 */ 4926 public static int codePointAt(char[] a, int index, int limit) { 4927 if (index >= limit || limit < 0 || limit > a.length) { 4928 throw new IndexOutOfBoundsException(); 4929 } 4930 return codePointAtImpl(a, index, limit); 4931 } 4932 4933 // throws ArrayIndexOutOfBoundsException if index out of bounds 4934 static int codePointAtImpl(char[] a, int index, int limit) { 4935 char c1 = a[index]; 4936 if (isHighSurrogate(c1) && ++index < limit) { 4937 char c2 = a[index]; 4938 if (isLowSurrogate(c2)) { 4939 return toCodePoint(c1, c2); 4940 } 4941 } 4942 return c1; 4943 } 4944 4945 /** 4946 * Returns the code point preceding the given index of the 4947 * {@code CharSequence}. If the {@code char} value at 4948 * {@code (index - 1)} in the {@code CharSequence} is in 4949 * the low-surrogate range, {@code (index - 2)} is not 4950 * negative, and the {@code char} value at {@code (index - 2)} 4951 * in the {@code CharSequence} is in the 4952 * high-surrogate range, then the supplementary code point 4953 * corresponding to this surrogate pair is returned. Otherwise, 4954 * the {@code char} value at {@code (index - 1)} is 4955 * returned. 4956 * 4957 * @param seq the {@code CharSequence} instance 4958 * @param index the index following the code point that should be returned 4959 * @return the Unicode code point value before the given index. 4960 * @exception NullPointerException if {@code seq} is null. 4961 * @exception IndexOutOfBoundsException if the {@code index} 4962 * argument is less than 1 or greater than {@link 4963 * CharSequence#length() seq.length()}. 4964 * @since 1.5 4965 */ 4966 public static int codePointBefore(CharSequence seq, int index) { 4967 char c2 = seq.charAt(--index); 4968 if (isLowSurrogate(c2) && index > 0) { 4969 char c1 = seq.charAt(--index); 4970 if (isHighSurrogate(c1)) { 4971 return toCodePoint(c1, c2); 4972 } 4973 } 4974 return c2; 4975 } 4976 4977 /** 4978 * Returns the code point preceding the given index of the 4979 * {@code char} array. If the {@code char} value at 4980 * {@code (index - 1)} in the {@code char} array is in 4981 * the low-surrogate range, {@code (index - 2)} is not 4982 * negative, and the {@code char} value at {@code (index - 2)} 4983 * in the {@code char} array is in the 4984 * high-surrogate range, then the supplementary code point 4985 * corresponding to this surrogate pair is returned. Otherwise, 4986 * the {@code char} value at {@code (index - 1)} is 4987 * returned. 4988 * 4989 * @param a the {@code char} array 4990 * @param index the index following the code point that should be returned 4991 * @return the Unicode code point value before the given index. 4992 * @exception NullPointerException if {@code a} is null. 4993 * @exception IndexOutOfBoundsException if the {@code index} 4994 * argument is less than 1 or greater than the length of the 4995 * {@code char} array 4996 * @since 1.5 4997 */ 4998 public static int codePointBefore(char[] a, int index) { 4999 return codePointBeforeImpl(a, index, 0); 5000 } 5001 5002 /** 5003 * Returns the code point preceding the given index of the 5004 * {@code char} array, where only array elements with 5005 * {@code index} greater than or equal to {@code start} 5006 * can be used. If the {@code char} value at {@code (index - 1)} 5007 * in the {@code char} array is in the 5008 * low-surrogate range, {@code (index - 2)} is not less than 5009 * {@code start}, and the {@code char} value at 5010 * {@code (index - 2)} in the {@code char} array is in 5011 * the high-surrogate range, then the supplementary code point 5012 * corresponding to this surrogate pair is returned. Otherwise, 5013 * the {@code char} value at {@code (index - 1)} is 5014 * returned. 5015 * 5016 * @param a the {@code char} array 5017 * @param index the index following the code point that should be returned 5018 * @param start the index of the first array element in the 5019 * {@code char} array 5020 * @return the Unicode code point value before the given index. 5021 * @exception NullPointerException if {@code a} is null. 5022 * @exception IndexOutOfBoundsException if the {@code index} 5023 * argument is not greater than the {@code start} argument or 5024 * is greater than the length of the {@code char} array, or 5025 * if the {@code start} argument is negative or not less than 5026 * the length of the {@code char} array. 5027 * @since 1.5 5028 */ 5029 public static int codePointBefore(char[] a, int index, int start) { 5030 if (index <= start || start < 0 || start >= a.length) { 5031 throw new IndexOutOfBoundsException(); 5032 } 5033 return codePointBeforeImpl(a, index, start); 5034 } 5035 5036 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 5037 static int codePointBeforeImpl(char[] a, int index, int start) { 5038 char c2 = a[--index]; 5039 if (isLowSurrogate(c2) && index > start) { 5040 char c1 = a[--index]; 5041 if (isHighSurrogate(c1)) { 5042 return toCodePoint(c1, c2); 5043 } 5044 } 5045 return c2; 5046 } 5047 5048 /** 5049 * Returns the leading surrogate (a 5050 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 5051 * high surrogate code unit</a>) of the 5052 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5053 * surrogate pair</a> 5054 * representing the specified supplementary character (Unicode 5055 * code point) in the UTF-16 encoding. If the specified character 5056 * is not a 5057 * <a href="Character.html#supplementary">supplementary character</a>, 5058 * an unspecified {@code char} is returned. 5059 * 5060 * <p>If 5061 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5062 * is {@code true}, then 5063 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 5064 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 5065 * are also always {@code true}. 5066 * 5067 * @param codePoint a supplementary character (Unicode code point) 5068 * @return the leading surrogate code unit used to represent the 5069 * character in the UTF-16 encoding 5070 * @since 1.7 5071 */ 5072 public static char highSurrogate(int codePoint) { 5073 return (char) ((codePoint >>> 10) 5074 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 5075 } 5076 5077 /** 5078 * Returns the trailing surrogate (a 5079 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 5080 * low surrogate code unit</a>) of the 5081 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5082 * surrogate pair</a> 5083 * representing the specified supplementary character (Unicode 5084 * code point) in the UTF-16 encoding. If the specified character 5085 * is not a 5086 * <a href="Character.html#supplementary">supplementary character</a>, 5087 * an unspecified {@code char} is returned. 5088 * 5089 * <p>If 5090 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5091 * is {@code true}, then 5092 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 5093 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 5094 * are also always {@code true}. 5095 * 5096 * @param codePoint a supplementary character (Unicode code point) 5097 * @return the trailing surrogate code unit used to represent the 5098 * character in the UTF-16 encoding 5099 * @since 1.7 5100 */ 5101 public static char lowSurrogate(int codePoint) { 5102 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 5103 } 5104 5105 /** 5106 * Converts the specified character (Unicode code point) to its 5107 * UTF-16 representation. If the specified code point is a BMP 5108 * (Basic Multilingual Plane or Plane 0) value, the same value is 5109 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 5110 * specified code point is a supplementary character, its 5111 * surrogate values are stored in {@code dst[dstIndex]} 5112 * (high-surrogate) and {@code dst[dstIndex+1]} 5113 * (low-surrogate), and 2 is returned. 5114 * 5115 * @param codePoint the character (Unicode code point) to be converted. 5116 * @param dst an array of {@code char} in which the 5117 * {@code codePoint}'s UTF-16 value is stored. 5118 * @param dstIndex the start index into the {@code dst} 5119 * array where the converted value is stored. 5120 * @return 1 if the code point is a BMP code point, 2 if the 5121 * code point is a supplementary code point. 5122 * @exception IllegalArgumentException if the specified 5123 * {@code codePoint} is not a valid Unicode code point. 5124 * @exception NullPointerException if the specified {@code dst} is null. 5125 * @exception IndexOutOfBoundsException if {@code dstIndex} 5126 * is negative or not less than {@code dst.length}, or if 5127 * {@code dst} at {@code dstIndex} doesn't have enough 5128 * array element(s) to store the resulting {@code char} 5129 * value(s). (If {@code dstIndex} is equal to 5130 * {@code dst.length-1} and the specified 5131 * {@code codePoint} is a supplementary character, the 5132 * high-surrogate value is not stored in 5133 * {@code dst[dstIndex]}.) 5134 * @since 1.5 5135 */ 5136 public static int toChars(int codePoint, char[] dst, int dstIndex) { 5137 if (isBmpCodePoint(codePoint)) { 5138 dst[dstIndex] = (char) codePoint; 5139 return 1; 5140 } else if (isValidCodePoint(codePoint)) { 5141 toSurrogates(codePoint, dst, dstIndex); 5142 return 2; 5143 } else { 5144 throw new IllegalArgumentException(); 5145 } 5146 } 5147 5148 /** 5149 * Converts the specified character (Unicode code point) to its 5150 * UTF-16 representation stored in a {@code char} array. If 5151 * the specified code point is a BMP (Basic Multilingual Plane or 5152 * Plane 0) value, the resulting {@code char} array has 5153 * the same value as {@code codePoint}. If the specified code 5154 * point is a supplementary code point, the resulting 5155 * {@code char} array has the corresponding surrogate pair. 5156 * 5157 * @param codePoint a Unicode code point 5158 * @return a {@code char} array having 5159 * {@code codePoint}'s UTF-16 representation. 5160 * @exception IllegalArgumentException if the specified 5161 * {@code codePoint} is not a valid Unicode code point. 5162 * @since 1.5 5163 */ 5164 public static char[] toChars(int codePoint) { 5165 if (isBmpCodePoint(codePoint)) { 5166 return new char[] { (char) codePoint }; 5167 } else if (isValidCodePoint(codePoint)) { 5168 char[] result = new char[2]; 5169 toSurrogates(codePoint, result, 0); 5170 return result; 5171 } else { 5172 throw new IllegalArgumentException(); 5173 } 5174 } 5175 5176 static void toSurrogates(int codePoint, char[] dst, int index) { 5177 // We write elements "backwards" to guarantee all-or-nothing 5178 dst[index+1] = lowSurrogate(codePoint); 5179 dst[index] = highSurrogate(codePoint); 5180 } 5181 5182 /** 5183 * Returns the number of Unicode code points in the text range of 5184 * the specified char sequence. The text range begins at the 5185 * specified {@code beginIndex} and extends to the 5186 * {@code char} at index {@code endIndex - 1}. Thus the 5187 * length (in {@code char}s) of the text range is 5188 * {@code endIndex-beginIndex}. Unpaired surrogates within 5189 * the text range count as one code point each. 5190 * 5191 * @param seq the char sequence 5192 * @param beginIndex the index to the first {@code char} of 5193 * the text range. 5194 * @param endIndex the index after the last {@code char} of 5195 * the text range. 5196 * @return the number of Unicode code points in the specified text 5197 * range 5198 * @exception NullPointerException if {@code seq} is null. 5199 * @exception IndexOutOfBoundsException if the 5200 * {@code beginIndex} is negative, or {@code endIndex} 5201 * is larger than the length of the given sequence, or 5202 * {@code beginIndex} is larger than {@code endIndex}. 5203 * @since 1.5 5204 */ 5205 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5206 int length = seq.length(); 5207 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5208 throw new IndexOutOfBoundsException(); 5209 } 5210 int n = endIndex - beginIndex; 5211 for (int i = beginIndex; i < endIndex; ) { 5212 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5213 isLowSurrogate(seq.charAt(i))) { 5214 n--; 5215 i++; 5216 } 5217 } 5218 return n; 5219 } 5220 5221 /** 5222 * Returns the number of Unicode code points in a subarray of the 5223 * {@code char} array argument. The {@code offset} 5224 * argument is the index of the first {@code char} of the 5225 * subarray and the {@code count} argument specifies the 5226 * length of the subarray in {@code char}s. Unpaired 5227 * surrogates within the subarray count as one code point each. 5228 * 5229 * @param a the {@code char} array 5230 * @param offset the index of the first {@code char} in the 5231 * given {@code char} array 5232 * @param count the length of the subarray in {@code char}s 5233 * @return the number of Unicode code points in the specified subarray 5234 * @exception NullPointerException if {@code a} is null. 5235 * @exception IndexOutOfBoundsException if {@code offset} or 5236 * {@code count} is negative, or if {@code offset + 5237 * count} is larger than the length of the given array. 5238 * @since 1.5 5239 */ 5240 public static int codePointCount(char[] a, int offset, int count) { 5241 if (count > a.length - offset || offset < 0 || count < 0) { 5242 throw new IndexOutOfBoundsException(); 5243 } 5244 return codePointCountImpl(a, offset, count); 5245 } 5246 5247 static int codePointCountImpl(char[] a, int offset, int count) { 5248 int endIndex = offset + count; 5249 int n = count; 5250 for (int i = offset; i < endIndex; ) { 5251 if (isHighSurrogate(a[i++]) && i < endIndex && 5252 isLowSurrogate(a[i])) { 5253 n--; 5254 i++; 5255 } 5256 } 5257 return n; 5258 } 5259 5260 /** 5261 * Returns the index within the given char sequence that is offset 5262 * from the given {@code index} by {@code codePointOffset} 5263 * code points. Unpaired surrogates within the text range given by 5264 * {@code index} and {@code codePointOffset} count as 5265 * one code point each. 5266 * 5267 * @param seq the char sequence 5268 * @param index the index to be offset 5269 * @param codePointOffset the offset in code points 5270 * @return the index within the char sequence 5271 * @exception NullPointerException if {@code seq} is null. 5272 * @exception IndexOutOfBoundsException if {@code index} 5273 * is negative or larger then the length of the char sequence, 5274 * or if {@code codePointOffset} is positive and the 5275 * subsequence starting with {@code index} has fewer than 5276 * {@code codePointOffset} code points, or if 5277 * {@code codePointOffset} is negative and the subsequence 5278 * before {@code index} has fewer than the absolute value 5279 * of {@code codePointOffset} code points. 5280 * @since 1.5 5281 */ 5282 public static int offsetByCodePoints(CharSequence seq, int index, 5283 int codePointOffset) { 5284 int length = seq.length(); 5285 if (index < 0 || index > length) { 5286 throw new IndexOutOfBoundsException(); 5287 } 5288 5289 int x = index; 5290 if (codePointOffset >= 0) { 5291 int i; 5292 for (i = 0; x < length && i < codePointOffset; i++) { 5293 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5294 isLowSurrogate(seq.charAt(x))) { 5295 x++; 5296 } 5297 } 5298 if (i < codePointOffset) { 5299 throw new IndexOutOfBoundsException(); 5300 } 5301 } else { 5302 int i; 5303 for (i = codePointOffset; x > 0 && i < 0; i++) { 5304 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5305 isHighSurrogate(seq.charAt(x-1))) { 5306 x--; 5307 } 5308 } 5309 if (i < 0) { 5310 throw new IndexOutOfBoundsException(); 5311 } 5312 } 5313 return x; 5314 } 5315 5316 /** 5317 * Returns the index within the given {@code char} subarray 5318 * that is offset from the given {@code index} by 5319 * {@code codePointOffset} code points. The 5320 * {@code start} and {@code count} arguments specify a 5321 * subarray of the {@code char} array. Unpaired surrogates 5322 * within the text range given by {@code index} and 5323 * {@code codePointOffset} count as one code point each. 5324 * 5325 * @param a the {@code char} array 5326 * @param start the index of the first {@code char} of the 5327 * subarray 5328 * @param count the length of the subarray in {@code char}s 5329 * @param index the index to be offset 5330 * @param codePointOffset the offset in code points 5331 * @return the index within the subarray 5332 * @exception NullPointerException if {@code a} is null. 5333 * @exception IndexOutOfBoundsException 5334 * if {@code start} or {@code count} is negative, 5335 * or if {@code start + count} is larger than the length of 5336 * the given array, 5337 * or if {@code index} is less than {@code start} or 5338 * larger then {@code start + count}, 5339 * or if {@code codePointOffset} is positive and the text range 5340 * starting with {@code index} and ending with {@code start + count - 1} 5341 * has fewer than {@code codePointOffset} code 5342 * points, 5343 * or if {@code codePointOffset} is negative and the text range 5344 * starting with {@code start} and ending with {@code index - 1} 5345 * has fewer than the absolute value of 5346 * {@code codePointOffset} code points. 5347 * @since 1.5 5348 */ 5349 public static int offsetByCodePoints(char[] a, int start, int count, 5350 int index, int codePointOffset) { 5351 if (count > a.length-start || start < 0 || count < 0 5352 || index < start || index > start+count) { 5353 throw new IndexOutOfBoundsException(); 5354 } 5355 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5356 } 5357 5358 static int offsetByCodePointsImpl(char[]a, int start, int count, 5359 int index, int codePointOffset) { 5360 int x = index; 5361 if (codePointOffset >= 0) { 5362 int limit = start + count; 5363 int i; 5364 for (i = 0; x < limit && i < codePointOffset; i++) { 5365 if (isHighSurrogate(a[x++]) && x < limit && 5366 isLowSurrogate(a[x])) { 5367 x++; 5368 } 5369 } 5370 if (i < codePointOffset) { 5371 throw new IndexOutOfBoundsException(); 5372 } 5373 } else { 5374 int i; 5375 for (i = codePointOffset; x > start && i < 0; i++) { 5376 if (isLowSurrogate(a[--x]) && x > start && 5377 isHighSurrogate(a[x-1])) { 5378 x--; 5379 } 5380 } 5381 if (i < 0) { 5382 throw new IndexOutOfBoundsException(); 5383 } 5384 } 5385 return x; 5386 } 5387 5388 /** 5389 * Determines if the specified character is a lowercase character. 5390 * <p> 5391 * A character is lowercase if its general category type, provided 5392 * by {@code Character.getType(ch)}, is 5393 * {@code LOWERCASE_LETTER}, or it has contributory property 5394 * Other_Lowercase as defined by the Unicode Standard. 5395 * <p> 5396 * The following are examples of lowercase characters: 5397 * <blockquote><pre> 5398 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5399 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5400 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5401 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5402 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5403 * </pre></blockquote> 5404 * <p> Many other Unicode characters are lowercase too. 5405 * 5406 * <p><b>Note:</b> This method cannot handle <a 5407 * href="#supplementary"> supplementary characters</a>. To support 5408 * all Unicode characters, including supplementary characters, use 5409 * the {@link #isLowerCase(int)} method. 5410 * 5411 * @param ch the character to be tested. 5412 * @return {@code true} if the character is lowercase; 5413 * {@code false} otherwise. 5414 * @see Character#isLowerCase(char) 5415 * @see Character#isTitleCase(char) 5416 * @see Character#toLowerCase(char) 5417 * @see Character#getType(char) 5418 */ 5419 public static boolean isLowerCase(char ch) { 5420 return isLowerCase((int)ch); 5421 } 5422 5423 /** 5424 * Determines if the specified character (Unicode code point) is a 5425 * lowercase character. 5426 * <p> 5427 * A character is lowercase if its general category type, provided 5428 * by {@link Character#getType getType(codePoint)}, is 5429 * {@code LOWERCASE_LETTER}, or it has contributory property 5430 * Other_Lowercase as defined by the Unicode Standard. 5431 * <p> 5432 * The following are examples of lowercase characters: 5433 * <blockquote><pre> 5434 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5435 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5436 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5437 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5438 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5439 * </pre></blockquote> 5440 * <p> Many other Unicode characters are lowercase too. 5441 * 5442 * @param codePoint the character (Unicode code point) to be tested. 5443 * @return {@code true} if the character is lowercase; 5444 * {@code false} otherwise. 5445 * @see Character#isLowerCase(int) 5446 * @see Character#isTitleCase(int) 5447 * @see Character#toLowerCase(int) 5448 * @see Character#getType(int) 5449 * @since 1.5 5450 */ 5451 public static boolean isLowerCase(int codePoint) { 5452 return getType(codePoint) == Character.LOWERCASE_LETTER || 5453 CharacterData.of(codePoint).isOtherLowercase(codePoint); 5454 } 5455 5456 /** 5457 * Determines if the specified character is an uppercase character. 5458 * <p> 5459 * A character is uppercase if its general category type, provided by 5460 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 5461 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5462 * <p> 5463 * The following are examples of uppercase characters: 5464 * <blockquote><pre> 5465 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5466 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5467 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5468 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5469 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5470 * </pre></blockquote> 5471 * <p> Many other Unicode characters are uppercase too. 5472 * 5473 * <p><b>Note:</b> This method cannot handle <a 5474 * href="#supplementary"> supplementary characters</a>. To support 5475 * all Unicode characters, including supplementary characters, use 5476 * the {@link #isUpperCase(int)} method. 5477 * 5478 * @param ch the character to be tested. 5479 * @return {@code true} if the character is uppercase; 5480 * {@code false} otherwise. 5481 * @see Character#isLowerCase(char) 5482 * @see Character#isTitleCase(char) 5483 * @see Character#toUpperCase(char) 5484 * @see Character#getType(char) 5485 * @since 1.0 5486 */ 5487 public static boolean isUpperCase(char ch) { 5488 return isUpperCase((int)ch); 5489 } 5490 5491 /** 5492 * Determines if the specified character (Unicode code point) is an uppercase character. 5493 * <p> 5494 * A character is uppercase if its general category type, provided by 5495 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 5496 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5497 * <p> 5498 * The following are examples of uppercase characters: 5499 * <blockquote><pre> 5500 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5501 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5502 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5503 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5504 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5505 * </pre></blockquote> 5506 * <p> Many other Unicode characters are uppercase too. 5507 * 5508 * @param codePoint the character (Unicode code point) to be tested. 5509 * @return {@code true} if the character is uppercase; 5510 * {@code false} otherwise. 5511 * @see Character#isLowerCase(int) 5512 * @see Character#isTitleCase(int) 5513 * @see Character#toUpperCase(int) 5514 * @see Character#getType(int) 5515 * @since 1.5 5516 */ 5517 public static boolean isUpperCase(int codePoint) { 5518 return getType(codePoint) == Character.UPPERCASE_LETTER || 5519 CharacterData.of(codePoint).isOtherUppercase(codePoint); 5520 } 5521 5522 /** 5523 * Determines if the specified character is a titlecase character. 5524 * <p> 5525 * A character is a titlecase character if its general 5526 * category type, provided by {@code Character.getType(ch)}, 5527 * is {@code TITLECASE_LETTER}. 5528 * <p> 5529 * Some characters look like pairs of Latin letters. For example, there 5530 * is an uppercase letter that looks like "LJ" and has a corresponding 5531 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5532 * is the appropriate form to use when rendering a word in lowercase 5533 * with initial capitals, as for a book title. 5534 * <p> 5535 * These are some of the Unicode characters for which this method returns 5536 * {@code true}: 5537 * <ul> 5538 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5539 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5540 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5541 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5542 * </ul> 5543 * <p> Many other Unicode characters are titlecase too. 5544 * 5545 * <p><b>Note:</b> This method cannot handle <a 5546 * href="#supplementary"> supplementary characters</a>. To support 5547 * all Unicode characters, including supplementary characters, use 5548 * the {@link #isTitleCase(int)} method. 5549 * 5550 * @param ch the character to be tested. 5551 * @return {@code true} if the character is titlecase; 5552 * {@code false} otherwise. 5553 * @see Character#isLowerCase(char) 5554 * @see Character#isUpperCase(char) 5555 * @see Character#toTitleCase(char) 5556 * @see Character#getType(char) 5557 * @since 1.0.2 5558 */ 5559 public static boolean isTitleCase(char ch) { 5560 return isTitleCase((int)ch); 5561 } 5562 5563 /** 5564 * Determines if the specified character (Unicode code point) is a titlecase character. 5565 * <p> 5566 * A character is a titlecase character if its general 5567 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5568 * is {@code TITLECASE_LETTER}. 5569 * <p> 5570 * Some characters look like pairs of Latin letters. For example, there 5571 * is an uppercase letter that looks like "LJ" and has a corresponding 5572 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5573 * is the appropriate form to use when rendering a word in lowercase 5574 * with initial capitals, as for a book title. 5575 * <p> 5576 * These are some of the Unicode characters for which this method returns 5577 * {@code true}: 5578 * <ul> 5579 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5580 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5581 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5582 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5583 * </ul> 5584 * <p> Many other Unicode characters are titlecase too. 5585 * 5586 * @param codePoint the character (Unicode code point) to be tested. 5587 * @return {@code true} if the character is titlecase; 5588 * {@code false} otherwise. 5589 * @see Character#isLowerCase(int) 5590 * @see Character#isUpperCase(int) 5591 * @see Character#toTitleCase(int) 5592 * @see Character#getType(int) 5593 * @since 1.5 5594 */ 5595 public static boolean isTitleCase(int codePoint) { 5596 return getType(codePoint) == Character.TITLECASE_LETTER; 5597 } 5598 5599 /** 5600 * Determines if the specified character is a digit. 5601 * <p> 5602 * A character is a digit if its general category type, provided 5603 * by {@code Character.getType(ch)}, is 5604 * {@code DECIMAL_DIGIT_NUMBER}. 5605 * <p> 5606 * Some Unicode character ranges that contain digits: 5607 * <ul> 5608 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5609 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5610 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5611 * Arabic-Indic digits 5612 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5613 * Extended Arabic-Indic digits 5614 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5615 * Devanagari digits 5616 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5617 * Fullwidth digits 5618 * </ul> 5619 * 5620 * Many other character ranges contain digits as well. 5621 * 5622 * <p><b>Note:</b> This method cannot handle <a 5623 * href="#supplementary"> supplementary characters</a>. To support 5624 * all Unicode characters, including supplementary characters, use 5625 * the {@link #isDigit(int)} method. 5626 * 5627 * @param ch the character to be tested. 5628 * @return {@code true} if the character is a digit; 5629 * {@code false} otherwise. 5630 * @see Character#digit(char, int) 5631 * @see Character#forDigit(int, int) 5632 * @see Character#getType(char) 5633 */ 5634 public static boolean isDigit(char ch) { 5635 return isDigit((int)ch); 5636 } 5637 5638 /** 5639 * Determines if the specified character (Unicode code point) is a digit. 5640 * <p> 5641 * A character is a digit if its general category type, provided 5642 * by {@link Character#getType(int) getType(codePoint)}, is 5643 * {@code DECIMAL_DIGIT_NUMBER}. 5644 * <p> 5645 * Some Unicode character ranges that contain digits: 5646 * <ul> 5647 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5648 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5649 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5650 * Arabic-Indic digits 5651 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5652 * Extended Arabic-Indic digits 5653 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5654 * Devanagari digits 5655 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5656 * Fullwidth digits 5657 * </ul> 5658 * 5659 * Many other character ranges contain digits as well. 5660 * 5661 * @param codePoint the character (Unicode code point) to be tested. 5662 * @return {@code true} if the character is a digit; 5663 * {@code false} otherwise. 5664 * @see Character#forDigit(int, int) 5665 * @see Character#getType(int) 5666 * @since 1.5 5667 */ 5668 public static boolean isDigit(int codePoint) { 5669 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5670 } 5671 5672 /** 5673 * Determines if a character is defined in Unicode. 5674 * <p> 5675 * A character is defined if at least one of the following is true: 5676 * <ul> 5677 * <li>It has an entry in the UnicodeData file. 5678 * <li>It has a value in a range defined by the UnicodeData file. 5679 * </ul> 5680 * 5681 * <p><b>Note:</b> This method cannot handle <a 5682 * href="#supplementary"> supplementary characters</a>. To support 5683 * all Unicode characters, including supplementary characters, use 5684 * the {@link #isDefined(int)} method. 5685 * 5686 * @param ch the character to be tested 5687 * @return {@code true} if the character has a defined meaning 5688 * in Unicode; {@code false} otherwise. 5689 * @see Character#isDigit(char) 5690 * @see Character#isLetter(char) 5691 * @see Character#isLetterOrDigit(char) 5692 * @see Character#isLowerCase(char) 5693 * @see Character#isTitleCase(char) 5694 * @see Character#isUpperCase(char) 5695 * @since 1.0.2 5696 */ 5697 public static boolean isDefined(char ch) { 5698 return isDefined((int)ch); 5699 } 5700 5701 /** 5702 * Determines if a character (Unicode code point) is defined in Unicode. 5703 * <p> 5704 * A character is defined if at least one of the following is true: 5705 * <ul> 5706 * <li>It has an entry in the UnicodeData file. 5707 * <li>It has a value in a range defined by the UnicodeData file. 5708 * </ul> 5709 * 5710 * @param codePoint the character (Unicode code point) to be tested. 5711 * @return {@code true} if the character has a defined meaning 5712 * in Unicode; {@code false} otherwise. 5713 * @see Character#isDigit(int) 5714 * @see Character#isLetter(int) 5715 * @see Character#isLetterOrDigit(int) 5716 * @see Character#isLowerCase(int) 5717 * @see Character#isTitleCase(int) 5718 * @see Character#isUpperCase(int) 5719 * @since 1.5 5720 */ 5721 public static boolean isDefined(int codePoint) { 5722 return getType(codePoint) != Character.UNASSIGNED; 5723 } 5724 5725 /** 5726 * Determines if the specified character is a letter. 5727 * <p> 5728 * A character is considered to be a letter if its general 5729 * category type, provided by {@code Character.getType(ch)}, 5730 * is any of the following: 5731 * <ul> 5732 * <li> {@code UPPERCASE_LETTER} 5733 * <li> {@code LOWERCASE_LETTER} 5734 * <li> {@code TITLECASE_LETTER} 5735 * <li> {@code MODIFIER_LETTER} 5736 * <li> {@code OTHER_LETTER} 5737 * </ul> 5738 * 5739 * Not all letters have case. Many characters are 5740 * letters but are neither uppercase nor lowercase nor titlecase. 5741 * 5742 * <p><b>Note:</b> This method cannot handle <a 5743 * href="#supplementary"> supplementary characters</a>. To support 5744 * all Unicode characters, including supplementary characters, use 5745 * the {@link #isLetter(int)} method. 5746 * 5747 * @param ch the character to be tested. 5748 * @return {@code true} if the character is a letter; 5749 * {@code false} otherwise. 5750 * @see Character#isDigit(char) 5751 * @see Character#isJavaIdentifierStart(char) 5752 * @see Character#isJavaLetter(char) 5753 * @see Character#isJavaLetterOrDigit(char) 5754 * @see Character#isLetterOrDigit(char) 5755 * @see Character#isLowerCase(char) 5756 * @see Character#isTitleCase(char) 5757 * @see Character#isUnicodeIdentifierStart(char) 5758 * @see Character#isUpperCase(char) 5759 */ 5760 public static boolean isLetter(char ch) { 5761 return isLetter((int)ch); 5762 } 5763 5764 /** 5765 * Determines if the specified character (Unicode code point) is a letter. 5766 * <p> 5767 * A character is considered to be a letter if its general 5768 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5769 * is any of the following: 5770 * <ul> 5771 * <li> {@code UPPERCASE_LETTER} 5772 * <li> {@code LOWERCASE_LETTER} 5773 * <li> {@code TITLECASE_LETTER} 5774 * <li> {@code MODIFIER_LETTER} 5775 * <li> {@code OTHER_LETTER} 5776 * </ul> 5777 * 5778 * Not all letters have case. Many characters are 5779 * letters but are neither uppercase nor lowercase nor titlecase. 5780 * 5781 * @param codePoint the character (Unicode code point) to be tested. 5782 * @return {@code true} if the character is a letter; 5783 * {@code false} otherwise. 5784 * @see Character#isDigit(int) 5785 * @see Character#isJavaIdentifierStart(int) 5786 * @see Character#isLetterOrDigit(int) 5787 * @see Character#isLowerCase(int) 5788 * @see Character#isTitleCase(int) 5789 * @see Character#isUnicodeIdentifierStart(int) 5790 * @see Character#isUpperCase(int) 5791 * @since 1.5 5792 */ 5793 public static boolean isLetter(int codePoint) { 5794 return ((((1 << Character.UPPERCASE_LETTER) | 5795 (1 << Character.LOWERCASE_LETTER) | 5796 (1 << Character.TITLECASE_LETTER) | 5797 (1 << Character.MODIFIER_LETTER) | 5798 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5799 != 0; 5800 } 5801 5802 /** 5803 * Determines if the specified character is a letter or digit. 5804 * <p> 5805 * A character is considered to be a letter or digit if either 5806 * {@code Character.isLetter(char ch)} or 5807 * {@code Character.isDigit(char ch)} returns 5808 * {@code true} for the character. 5809 * 5810 * <p><b>Note:</b> This method cannot handle <a 5811 * href="#supplementary"> supplementary characters</a>. To support 5812 * all Unicode characters, including supplementary characters, use 5813 * the {@link #isLetterOrDigit(int)} method. 5814 * 5815 * @param ch the character to be tested. 5816 * @return {@code true} if the character is a letter or digit; 5817 * {@code false} otherwise. 5818 * @see Character#isDigit(char) 5819 * @see Character#isJavaIdentifierPart(char) 5820 * @see Character#isJavaLetter(char) 5821 * @see Character#isJavaLetterOrDigit(char) 5822 * @see Character#isLetter(char) 5823 * @see Character#isUnicodeIdentifierPart(char) 5824 * @since 1.0.2 5825 */ 5826 public static boolean isLetterOrDigit(char ch) { 5827 return isLetterOrDigit((int)ch); 5828 } 5829 5830 /** 5831 * Determines if the specified character (Unicode code point) is a letter or digit. 5832 * <p> 5833 * A character is considered to be a letter or digit if either 5834 * {@link #isLetter(int) isLetter(codePoint)} or 5835 * {@link #isDigit(int) isDigit(codePoint)} returns 5836 * {@code true} for the character. 5837 * 5838 * @param codePoint the character (Unicode code point) to be tested. 5839 * @return {@code true} if the character is a letter or digit; 5840 * {@code false} otherwise. 5841 * @see Character#isDigit(int) 5842 * @see Character#isJavaIdentifierPart(int) 5843 * @see Character#isLetter(int) 5844 * @see Character#isUnicodeIdentifierPart(int) 5845 * @since 1.5 5846 */ 5847 public static boolean isLetterOrDigit(int codePoint) { 5848 return ((((1 << Character.UPPERCASE_LETTER) | 5849 (1 << Character.LOWERCASE_LETTER) | 5850 (1 << Character.TITLECASE_LETTER) | 5851 (1 << Character.MODIFIER_LETTER) | 5852 (1 << Character.OTHER_LETTER) | 5853 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5854 != 0; 5855 } 5856 5857 /** 5858 * Determines if the specified character is permissible as the first 5859 * character in a Java identifier. 5860 * <p> 5861 * A character may start a Java identifier if and only if 5862 * one of the following is true: 5863 * <ul> 5864 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5865 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5866 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5867 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5868 * </ul> 5869 * 5870 * @param ch the character to be tested. 5871 * @return {@code true} if the character may start a Java 5872 * identifier; {@code false} otherwise. 5873 * @see Character#isJavaLetterOrDigit(char) 5874 * @see Character#isJavaIdentifierStart(char) 5875 * @see Character#isJavaIdentifierPart(char) 5876 * @see Character#isLetter(char) 5877 * @see Character#isLetterOrDigit(char) 5878 * @see Character#isUnicodeIdentifierStart(char) 5879 * @since 1.0.2 5880 * @deprecated Replaced by isJavaIdentifierStart(char). 5881 */ 5882 @Deprecated 5883 public static boolean isJavaLetter(char ch) { 5884 return isJavaIdentifierStart(ch); 5885 } 5886 5887 /** 5888 * Determines if the specified character may be part of a Java 5889 * identifier as other than the first character. 5890 * <p> 5891 * A character may be part of a Java identifier if and only if any 5892 * of the following are true: 5893 * <ul> 5894 * <li> it is a letter 5895 * <li> it is a currency symbol (such as {@code '$'}) 5896 * <li> it is a connecting punctuation character (such as {@code '_'}) 5897 * <li> it is a digit 5898 * <li> it is a numeric letter (such as a Roman numeral character) 5899 * <li> it is a combining mark 5900 * <li> it is a non-spacing mark 5901 * <li> {@code isIdentifierIgnorable} returns 5902 * {@code true} for the character. 5903 * </ul> 5904 * 5905 * @param ch the character to be tested. 5906 * @return {@code true} if the character may be part of a 5907 * Java identifier; {@code false} otherwise. 5908 * @see Character#isJavaLetter(char) 5909 * @see Character#isJavaIdentifierStart(char) 5910 * @see Character#isJavaIdentifierPart(char) 5911 * @see Character#isLetter(char) 5912 * @see Character#isLetterOrDigit(char) 5913 * @see Character#isUnicodeIdentifierPart(char) 5914 * @see Character#isIdentifierIgnorable(char) 5915 * @since 1.0.2 5916 * @deprecated Replaced by isJavaIdentifierPart(char). 5917 */ 5918 @Deprecated 5919 public static boolean isJavaLetterOrDigit(char ch) { 5920 return isJavaIdentifierPart(ch); 5921 } 5922 5923 /** 5924 * Determines if the specified character (Unicode code point) is an alphabet. 5925 * <p> 5926 * A character is considered to be alphabetic if its general category type, 5927 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 5928 * the following: 5929 * <ul> 5930 * <li> <code>UPPERCASE_LETTER</code> 5931 * <li> <code>LOWERCASE_LETTER</code> 5932 * <li> <code>TITLECASE_LETTER</code> 5933 * <li> <code>MODIFIER_LETTER</code> 5934 * <li> <code>OTHER_LETTER</code> 5935 * <li> <code>LETTER_NUMBER</code> 5936 * </ul> 5937 * or it has contributory property Other_Alphabetic as defined by the 5938 * Unicode Standard. 5939 * 5940 * @param codePoint the character (Unicode code point) to be tested. 5941 * @return <code>true</code> if the character is a Unicode alphabet 5942 * character, <code>false</code> otherwise. 5943 * @since 1.7 5944 */ 5945 public static boolean isAlphabetic(int codePoint) { 5946 return (((((1 << Character.UPPERCASE_LETTER) | 5947 (1 << Character.LOWERCASE_LETTER) | 5948 (1 << Character.TITLECASE_LETTER) | 5949 (1 << Character.MODIFIER_LETTER) | 5950 (1 << Character.OTHER_LETTER) | 5951 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 5952 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 5953 } 5954 5955 /** 5956 * Determines if the specified character (Unicode code point) is a CJKV 5957 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 5958 * the Unicode Standard. 5959 * 5960 * @param codePoint the character (Unicode code point) to be tested. 5961 * @return <code>true</code> if the character is a Unicode ideograph 5962 * character, <code>false</code> otherwise. 5963 * @since 1.7 5964 */ 5965 public static boolean isIdeographic(int codePoint) { 5966 return CharacterData.of(codePoint).isIdeographic(codePoint); 5967 } 5968 5969 /** 5970 * Determines if the specified character is 5971 * permissible as the first character in a Java identifier. 5972 * <p> 5973 * A character may start a Java identifier if and only if 5974 * one of the following conditions is true: 5975 * <ul> 5976 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5977 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5978 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5979 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5980 * </ul> 5981 * 5982 * <p><b>Note:</b> This method cannot handle <a 5983 * href="#supplementary"> supplementary characters</a>. To support 5984 * all Unicode characters, including supplementary characters, use 5985 * the {@link #isJavaIdentifierStart(int)} method. 5986 * 5987 * @param ch the character to be tested. 5988 * @return {@code true} if the character may start a Java identifier; 5989 * {@code false} otherwise. 5990 * @see Character#isJavaIdentifierPart(char) 5991 * @see Character#isLetter(char) 5992 * @see Character#isUnicodeIdentifierStart(char) 5993 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5994 * @since 1.1 5995 */ 5996 public static boolean isJavaIdentifierStart(char ch) { 5997 return isJavaIdentifierStart((int)ch); 5998 } 5999 6000 /** 6001 * Determines if the character (Unicode code point) is 6002 * permissible as the first character in a Java identifier. 6003 * <p> 6004 * A character may start a Java identifier if and only if 6005 * one of the following conditions is true: 6006 * <ul> 6007 * <li> {@link #isLetter(int) isLetter(codePoint)} 6008 * returns {@code true} 6009 * <li> {@link #getType(int) getType(codePoint)} 6010 * returns {@code LETTER_NUMBER} 6011 * <li> the referenced character is a currency symbol (such as {@code '$'}) 6012 * <li> the referenced character is a connecting punctuation character 6013 * (such as {@code '_'}). 6014 * </ul> 6015 * 6016 * @param codePoint the character (Unicode code point) to be tested. 6017 * @return {@code true} if the character may start a Java identifier; 6018 * {@code false} otherwise. 6019 * @see Character#isJavaIdentifierPart(int) 6020 * @see Character#isLetter(int) 6021 * @see Character#isUnicodeIdentifierStart(int) 6022 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6023 * @since 1.5 6024 */ 6025 public static boolean isJavaIdentifierStart(int codePoint) { 6026 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 6027 } 6028 6029 /** 6030 * Determines if the specified character may be part of a Java 6031 * identifier as other than the first character. 6032 * <p> 6033 * A character may be part of a Java identifier if any of the following 6034 * are true: 6035 * <ul> 6036 * <li> it is a letter 6037 * <li> it is a currency symbol (such as {@code '$'}) 6038 * <li> it is a connecting punctuation character (such as {@code '_'}) 6039 * <li> it is a digit 6040 * <li> it is a numeric letter (such as a Roman numeral character) 6041 * <li> it is a combining mark 6042 * <li> it is a non-spacing mark 6043 * <li> {@code isIdentifierIgnorable} returns 6044 * {@code true} for the character 6045 * </ul> 6046 * 6047 * <p><b>Note:</b> This method cannot handle <a 6048 * href="#supplementary"> supplementary characters</a>. To support 6049 * all Unicode characters, including supplementary characters, use 6050 * the {@link #isJavaIdentifierPart(int)} method. 6051 * 6052 * @param ch the character to be tested. 6053 * @return {@code true} if the character may be part of a 6054 * Java identifier; {@code false} otherwise. 6055 * @see Character#isIdentifierIgnorable(char) 6056 * @see Character#isJavaIdentifierStart(char) 6057 * @see Character#isLetterOrDigit(char) 6058 * @see Character#isUnicodeIdentifierPart(char) 6059 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6060 * @since 1.1 6061 */ 6062 public static boolean isJavaIdentifierPart(char ch) { 6063 return isJavaIdentifierPart((int)ch); 6064 } 6065 6066 /** 6067 * Determines if the character (Unicode code point) may be part of a Java 6068 * identifier as other than the first character. 6069 * <p> 6070 * A character may be part of a Java identifier if any of the following 6071 * are true: 6072 * <ul> 6073 * <li> it is a letter 6074 * <li> it is a currency symbol (such as {@code '$'}) 6075 * <li> it is a connecting punctuation character (such as {@code '_'}) 6076 * <li> it is a digit 6077 * <li> it is a numeric letter (such as a Roman numeral character) 6078 * <li> it is a combining mark 6079 * <li> it is a non-spacing mark 6080 * <li> {@link #isIdentifierIgnorable(int) 6081 * isIdentifierIgnorable(codePoint)} returns {@code true} for 6082 * the character 6083 * </ul> 6084 * 6085 * @param codePoint the character (Unicode code point) to be tested. 6086 * @return {@code true} if the character may be part of a 6087 * Java identifier; {@code false} otherwise. 6088 * @see Character#isIdentifierIgnorable(int) 6089 * @see Character#isJavaIdentifierStart(int) 6090 * @see Character#isLetterOrDigit(int) 6091 * @see Character#isUnicodeIdentifierPart(int) 6092 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6093 * @since 1.5 6094 */ 6095 public static boolean isJavaIdentifierPart(int codePoint) { 6096 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 6097 } 6098 6099 /** 6100 * Determines if the specified character is permissible as the 6101 * first character in a Unicode identifier. 6102 * <p> 6103 * A character may start a Unicode identifier if and only if 6104 * one of the following conditions is true: 6105 * <ul> 6106 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 6107 * <li> {@link #getType(char) getType(ch)} returns 6108 * {@code LETTER_NUMBER}. 6109 * </ul> 6110 * 6111 * <p><b>Note:</b> This method cannot handle <a 6112 * href="#supplementary"> supplementary characters</a>. To support 6113 * all Unicode characters, including supplementary characters, use 6114 * the {@link #isUnicodeIdentifierStart(int)} method. 6115 * 6116 * @param ch the character to be tested. 6117 * @return {@code true} if the character may start a Unicode 6118 * identifier; {@code false} otherwise. 6119 * @see Character#isJavaIdentifierStart(char) 6120 * @see Character#isLetter(char) 6121 * @see Character#isUnicodeIdentifierPart(char) 6122 * @since 1.1 6123 */ 6124 public static boolean isUnicodeIdentifierStart(char ch) { 6125 return isUnicodeIdentifierStart((int)ch); 6126 } 6127 6128 /** 6129 * Determines if the specified character (Unicode code point) is permissible as the 6130 * first character in a Unicode identifier. 6131 * <p> 6132 * A character may start a Unicode identifier if and only if 6133 * one of the following conditions is true: 6134 * <ul> 6135 * <li> {@link #isLetter(int) isLetter(codePoint)} 6136 * returns {@code true} 6137 * <li> {@link #getType(int) getType(codePoint)} 6138 * returns {@code LETTER_NUMBER}. 6139 * </ul> 6140 * @param codePoint the character (Unicode code point) to be tested. 6141 * @return {@code true} if the character may start a Unicode 6142 * identifier; {@code false} otherwise. 6143 * @see Character#isJavaIdentifierStart(int) 6144 * @see Character#isLetter(int) 6145 * @see Character#isUnicodeIdentifierPart(int) 6146 * @since 1.5 6147 */ 6148 public static boolean isUnicodeIdentifierStart(int codePoint) { 6149 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 6150 } 6151 6152 /** 6153 * Determines if the specified character may be part of a Unicode 6154 * identifier as other than the first character. 6155 * <p> 6156 * A character may be part of a Unicode identifier if and only if 6157 * one of the following statements is true: 6158 * <ul> 6159 * <li> it is a letter 6160 * <li> it is a connecting punctuation character (such as {@code '_'}) 6161 * <li> it is a digit 6162 * <li> it is a numeric letter (such as a Roman numeral character) 6163 * <li> it is a combining mark 6164 * <li> it is a non-spacing mark 6165 * <li> {@code isIdentifierIgnorable} returns 6166 * {@code true} for this character. 6167 * </ul> 6168 * 6169 * <p><b>Note:</b> This method cannot handle <a 6170 * href="#supplementary"> supplementary characters</a>. To support 6171 * all Unicode characters, including supplementary characters, use 6172 * the {@link #isUnicodeIdentifierPart(int)} method. 6173 * 6174 * @param ch the character to be tested. 6175 * @return {@code true} if the character may be part of a 6176 * Unicode identifier; {@code false} otherwise. 6177 * @see Character#isIdentifierIgnorable(char) 6178 * @see Character#isJavaIdentifierPart(char) 6179 * @see Character#isLetterOrDigit(char) 6180 * @see Character#isUnicodeIdentifierStart(char) 6181 * @since 1.1 6182 */ 6183 public static boolean isUnicodeIdentifierPart(char ch) { 6184 return isUnicodeIdentifierPart((int)ch); 6185 } 6186 6187 /** 6188 * Determines if the specified character (Unicode code point) may be part of a Unicode 6189 * identifier as other than the first character. 6190 * <p> 6191 * A character may be part of a Unicode identifier if and only if 6192 * one of the following statements is true: 6193 * <ul> 6194 * <li> it is a letter 6195 * <li> it is a connecting punctuation character (such as {@code '_'}) 6196 * <li> it is a digit 6197 * <li> it is a numeric letter (such as a Roman numeral character) 6198 * <li> it is a combining mark 6199 * <li> it is a non-spacing mark 6200 * <li> {@code isIdentifierIgnorable} returns 6201 * {@code true} for this character. 6202 * </ul> 6203 * @param codePoint the character (Unicode code point) to be tested. 6204 * @return {@code true} if the character may be part of a 6205 * Unicode identifier; {@code false} otherwise. 6206 * @see Character#isIdentifierIgnorable(int) 6207 * @see Character#isJavaIdentifierPart(int) 6208 * @see Character#isLetterOrDigit(int) 6209 * @see Character#isUnicodeIdentifierStart(int) 6210 * @since 1.5 6211 */ 6212 public static boolean isUnicodeIdentifierPart(int codePoint) { 6213 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 6214 } 6215 6216 /** 6217 * Determines if the specified character should be regarded as 6218 * an ignorable character in a Java identifier or a Unicode identifier. 6219 * <p> 6220 * The following Unicode characters are ignorable in a Java identifier 6221 * or a Unicode identifier: 6222 * <ul> 6223 * <li>ISO control characters that are not whitespace 6224 * <ul> 6225 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6226 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6227 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6228 * </ul> 6229 * 6230 * <li>all characters that have the {@code FORMAT} general 6231 * category value 6232 * </ul> 6233 * 6234 * <p><b>Note:</b> This method cannot handle <a 6235 * href="#supplementary"> supplementary characters</a>. To support 6236 * all Unicode characters, including supplementary characters, use 6237 * the {@link #isIdentifierIgnorable(int)} method. 6238 * 6239 * @param ch the character to be tested. 6240 * @return {@code true} if the character is an ignorable control 6241 * character that may be part of a Java or Unicode identifier; 6242 * {@code false} otherwise. 6243 * @see Character#isJavaIdentifierPart(char) 6244 * @see Character#isUnicodeIdentifierPart(char) 6245 * @since 1.1 6246 */ 6247 public static boolean isIdentifierIgnorable(char ch) { 6248 return isIdentifierIgnorable((int)ch); 6249 } 6250 6251 /** 6252 * Determines if the specified character (Unicode code point) should be regarded as 6253 * an ignorable character in a Java identifier or a Unicode identifier. 6254 * <p> 6255 * The following Unicode characters are ignorable in a Java identifier 6256 * or a Unicode identifier: 6257 * <ul> 6258 * <li>ISO control characters that are not whitespace 6259 * <ul> 6260 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6261 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6262 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6263 * </ul> 6264 * 6265 * <li>all characters that have the {@code FORMAT} general 6266 * category value 6267 * </ul> 6268 * 6269 * @param codePoint the character (Unicode code point) to be tested. 6270 * @return {@code true} if the character is an ignorable control 6271 * character that may be part of a Java or Unicode identifier; 6272 * {@code false} otherwise. 6273 * @see Character#isJavaIdentifierPart(int) 6274 * @see Character#isUnicodeIdentifierPart(int) 6275 * @since 1.5 6276 */ 6277 public static boolean isIdentifierIgnorable(int codePoint) { 6278 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6279 } 6280 6281 /** 6282 * Converts the character argument to lowercase using case 6283 * mapping information from the UnicodeData file. 6284 * <p> 6285 * Note that 6286 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 6287 * does not always return {@code true} for some ranges of 6288 * characters, particularly those that are symbols or ideographs. 6289 * 6290 * <p>In general, {@link String#toLowerCase()} should be used to map 6291 * characters to lowercase. {@code String} case mapping methods 6292 * have several benefits over {@code Character} case mapping methods. 6293 * {@code String} case mapping methods can perform locale-sensitive 6294 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6295 * the {@code Character} case mapping methods cannot. 6296 * 6297 * <p><b>Note:</b> This method cannot handle <a 6298 * href="#supplementary"> supplementary characters</a>. To support 6299 * all Unicode characters, including supplementary characters, use 6300 * the {@link #toLowerCase(int)} method. 6301 * 6302 * @param ch the character to be converted. 6303 * @return the lowercase equivalent of the character, if any; 6304 * otherwise, the character itself. 6305 * @see Character#isLowerCase(char) 6306 * @see String#toLowerCase() 6307 */ 6308 public static char toLowerCase(char ch) { 6309 return (char)toLowerCase((int)ch); 6310 } 6311 6312 /** 6313 * Converts the character (Unicode code point) argument to 6314 * lowercase using case mapping information from the UnicodeData 6315 * file. 6316 * 6317 * <p> Note that 6318 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 6319 * does not always return {@code true} for some ranges of 6320 * characters, particularly those that are symbols or ideographs. 6321 * 6322 * <p>In general, {@link String#toLowerCase()} should be used to map 6323 * characters to lowercase. {@code String} case mapping methods 6324 * have several benefits over {@code Character} case mapping methods. 6325 * {@code String} case mapping methods can perform locale-sensitive 6326 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6327 * the {@code Character} case mapping methods cannot. 6328 * 6329 * @param codePoint the character (Unicode code point) to be converted. 6330 * @return the lowercase equivalent of the character (Unicode code 6331 * point), if any; otherwise, the character itself. 6332 * @see Character#isLowerCase(int) 6333 * @see String#toLowerCase() 6334 * 6335 * @since 1.5 6336 */ 6337 public static int toLowerCase(int codePoint) { 6338 return CharacterData.of(codePoint).toLowerCase(codePoint); 6339 } 6340 6341 /** 6342 * Converts the character argument to uppercase using case mapping 6343 * information from the UnicodeData file. 6344 * <p> 6345 * Note that 6346 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 6347 * does not always return {@code true} for some ranges of 6348 * characters, particularly those that are symbols or ideographs. 6349 * 6350 * <p>In general, {@link String#toUpperCase()} should be used to map 6351 * characters to uppercase. {@code String} case mapping methods 6352 * have several benefits over {@code Character} case mapping methods. 6353 * {@code String} case mapping methods can perform locale-sensitive 6354 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6355 * the {@code Character} case mapping methods cannot. 6356 * 6357 * <p><b>Note:</b> This method cannot handle <a 6358 * href="#supplementary"> supplementary characters</a>. To support 6359 * all Unicode characters, including supplementary characters, use 6360 * the {@link #toUpperCase(int)} method. 6361 * 6362 * @param ch the character to be converted. 6363 * @return the uppercase equivalent of the character, if any; 6364 * otherwise, the character itself. 6365 * @see Character#isUpperCase(char) 6366 * @see String#toUpperCase() 6367 */ 6368 public static char toUpperCase(char ch) { 6369 return (char)toUpperCase((int)ch); 6370 } 6371 6372 /** 6373 * Converts the character (Unicode code point) argument to 6374 * uppercase using case mapping information from the UnicodeData 6375 * file. 6376 * 6377 * <p>Note that 6378 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 6379 * does not always return {@code true} for some ranges of 6380 * characters, particularly those that are symbols or ideographs. 6381 * 6382 * <p>In general, {@link String#toUpperCase()} should be used to map 6383 * characters to uppercase. {@code String} case mapping methods 6384 * have several benefits over {@code Character} case mapping methods. 6385 * {@code String} case mapping methods can perform locale-sensitive 6386 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6387 * the {@code Character} case mapping methods cannot. 6388 * 6389 * @param codePoint the character (Unicode code point) to be converted. 6390 * @return the uppercase equivalent of the character, if any; 6391 * otherwise, the character itself. 6392 * @see Character#isUpperCase(int) 6393 * @see String#toUpperCase() 6394 * 6395 * @since 1.5 6396 */ 6397 public static int toUpperCase(int codePoint) { 6398 return CharacterData.of(codePoint).toUpperCase(codePoint); 6399 } 6400 6401 /** 6402 * Converts the character argument to titlecase using case mapping 6403 * information from the UnicodeData file. If a character has no 6404 * explicit titlecase mapping and is not itself a titlecase char 6405 * according to UnicodeData, then the uppercase mapping is 6406 * returned as an equivalent titlecase mapping. If the 6407 * {@code char} argument is already a titlecase 6408 * {@code char}, the same {@code char} value will be 6409 * returned. 6410 * <p> 6411 * Note that 6412 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 6413 * does not always return {@code true} for some ranges of 6414 * characters. 6415 * 6416 * <p><b>Note:</b> This method cannot handle <a 6417 * href="#supplementary"> supplementary characters</a>. To support 6418 * all Unicode characters, including supplementary characters, use 6419 * the {@link #toTitleCase(int)} method. 6420 * 6421 * @param ch the character to be converted. 6422 * @return the titlecase equivalent of the character, if any; 6423 * otherwise, the character itself. 6424 * @see Character#isTitleCase(char) 6425 * @see Character#toLowerCase(char) 6426 * @see Character#toUpperCase(char) 6427 * @since 1.0.2 6428 */ 6429 public static char toTitleCase(char ch) { 6430 return (char)toTitleCase((int)ch); 6431 } 6432 6433 /** 6434 * Converts the character (Unicode code point) argument to titlecase using case mapping 6435 * information from the UnicodeData file. If a character has no 6436 * explicit titlecase mapping and is not itself a titlecase char 6437 * according to UnicodeData, then the uppercase mapping is 6438 * returned as an equivalent titlecase mapping. If the 6439 * character argument is already a titlecase 6440 * character, the same character value will be 6441 * returned. 6442 * 6443 * <p>Note that 6444 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 6445 * does not always return {@code true} for some ranges of 6446 * characters. 6447 * 6448 * @param codePoint the character (Unicode code point) to be converted. 6449 * @return the titlecase equivalent of the character, if any; 6450 * otherwise, the character itself. 6451 * @see Character#isTitleCase(int) 6452 * @see Character#toLowerCase(int) 6453 * @see Character#toUpperCase(int) 6454 * @since 1.5 6455 */ 6456 public static int toTitleCase(int codePoint) { 6457 return CharacterData.of(codePoint).toTitleCase(codePoint); 6458 } 6459 6460 /** 6461 * Returns the numeric value of the character {@code ch} in the 6462 * specified radix. 6463 * <p> 6464 * If the radix is not in the range {@code MIN_RADIX} ≤ 6465 * {@code radix} ≤ {@code MAX_RADIX} or if the 6466 * value of {@code ch} is not a valid digit in the specified 6467 * radix, {@code -1} is returned. A character is a valid digit 6468 * if at least one of the following is true: 6469 * <ul> 6470 * <li>The method {@code isDigit} is {@code true} of the character 6471 * and the Unicode decimal digit value of the character (or its 6472 * single-character decomposition) is less than the specified radix. 6473 * In this case the decimal digit value is returned. 6474 * <li>The character is one of the uppercase Latin letters 6475 * {@code 'A'} through {@code 'Z'} and its code is less than 6476 * {@code radix + 'A' - 10}. 6477 * In this case, {@code ch - 'A' + 10} 6478 * is returned. 6479 * <li>The character is one of the lowercase Latin letters 6480 * {@code 'a'} through {@code 'z'} and its code is less than 6481 * {@code radix + 'a' - 10}. 6482 * In this case, {@code ch - 'a' + 10} 6483 * is returned. 6484 * <li>The character is one of the fullwidth uppercase Latin letters A 6485 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6486 * and its code is less than 6487 * {@code radix + '\u005CuFF21' - 10}. 6488 * In this case, {@code ch - '\u005CuFF21' + 10} 6489 * is returned. 6490 * <li>The character is one of the fullwidth lowercase Latin letters a 6491 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6492 * and its code is less than 6493 * {@code radix + '\u005CuFF41' - 10}. 6494 * In this case, {@code ch - '\u005CuFF41' + 10} 6495 * is returned. 6496 * </ul> 6497 * 6498 * <p><b>Note:</b> This method cannot handle <a 6499 * href="#supplementary"> supplementary characters</a>. To support 6500 * all Unicode characters, including supplementary characters, use 6501 * the {@link #digit(int, int)} method. 6502 * 6503 * @param ch the character to be converted. 6504 * @param radix the radix. 6505 * @return the numeric value represented by the character in the 6506 * specified radix. 6507 * @see Character#forDigit(int, int) 6508 * @see Character#isDigit(char) 6509 */ 6510 public static int digit(char ch, int radix) { 6511 return digit((int)ch, radix); 6512 } 6513 6514 /** 6515 * Returns the numeric value of the specified character (Unicode 6516 * code point) in the specified radix. 6517 * 6518 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 6519 * {@code radix} ≤ {@code MAX_RADIX} or if the 6520 * character is not a valid digit in the specified 6521 * radix, {@code -1} is returned. A character is a valid digit 6522 * if at least one of the following is true: 6523 * <ul> 6524 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 6525 * and the Unicode decimal digit value of the character (or its 6526 * single-character decomposition) is less than the specified radix. 6527 * In this case the decimal digit value is returned. 6528 * <li>The character is one of the uppercase Latin letters 6529 * {@code 'A'} through {@code 'Z'} and its code is less than 6530 * {@code radix + 'A' - 10}. 6531 * In this case, {@code codePoint - 'A' + 10} 6532 * is returned. 6533 * <li>The character is one of the lowercase Latin letters 6534 * {@code 'a'} through {@code 'z'} and its code is less than 6535 * {@code radix + 'a' - 10}. 6536 * In this case, {@code codePoint - 'a' + 10} 6537 * is returned. 6538 * <li>The character is one of the fullwidth uppercase Latin letters A 6539 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6540 * and its code is less than 6541 * {@code radix + '\u005CuFF21' - 10}. 6542 * In this case, 6543 * {@code codePoint - '\u005CuFF21' + 10} 6544 * is returned. 6545 * <li>The character is one of the fullwidth lowercase Latin letters a 6546 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6547 * and its code is less than 6548 * {@code radix + '\u005CuFF41'- 10}. 6549 * In this case, 6550 * {@code codePoint - '\u005CuFF41' + 10} 6551 * is returned. 6552 * </ul> 6553 * 6554 * @param codePoint the character (Unicode code point) to be converted. 6555 * @param radix the radix. 6556 * @return the numeric value represented by the character in the 6557 * specified radix. 6558 * @see Character#forDigit(int, int) 6559 * @see Character#isDigit(int) 6560 * @since 1.5 6561 */ 6562 public static int digit(int codePoint, int radix) { 6563 return CharacterData.of(codePoint).digit(codePoint, radix); 6564 } 6565 6566 /** 6567 * Returns the {@code int} value that the specified Unicode 6568 * character represents. For example, the character 6569 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 6570 * an int with a value of 50. 6571 * <p> 6572 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6573 * {@code '\u005Cu005A'}), lowercase 6574 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6575 * full width variant ({@code '\u005CuFF21'} through 6576 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6577 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6578 * through 35. This is independent of the Unicode specification, 6579 * which does not assign numeric values to these {@code char} 6580 * values. 6581 * <p> 6582 * If the character does not have a numeric value, then -1 is returned. 6583 * If the character has a numeric value that cannot be represented as a 6584 * nonnegative integer (for example, a fractional value), then -2 6585 * is returned. 6586 * 6587 * <p><b>Note:</b> This method cannot handle <a 6588 * href="#supplementary"> supplementary characters</a>. To support 6589 * all Unicode characters, including supplementary characters, use 6590 * the {@link #getNumericValue(int)} method. 6591 * 6592 * @param ch the character to be converted. 6593 * @return the numeric value of the character, as a nonnegative {@code int} 6594 * value; -2 if the character has a numeric value that is not a 6595 * nonnegative integer; -1 if the character has no numeric value. 6596 * @see Character#forDigit(int, int) 6597 * @see Character#isDigit(char) 6598 * @since 1.1 6599 */ 6600 public static int getNumericValue(char ch) { 6601 return getNumericValue((int)ch); 6602 } 6603 6604 /** 6605 * Returns the {@code int} value that the specified 6606 * character (Unicode code point) represents. For example, the character 6607 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 6608 * an {@code int} with a value of 50. 6609 * <p> 6610 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6611 * {@code '\u005Cu005A'}), lowercase 6612 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6613 * full width variant ({@code '\u005CuFF21'} through 6614 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6615 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6616 * through 35. This is independent of the Unicode specification, 6617 * which does not assign numeric values to these {@code char} 6618 * values. 6619 * <p> 6620 * If the character does not have a numeric value, then -1 is returned. 6621 * If the character has a numeric value that cannot be represented as a 6622 * nonnegative integer (for example, a fractional value), then -2 6623 * is returned. 6624 * 6625 * @param codePoint the character (Unicode code point) to be converted. 6626 * @return the numeric value of the character, as a nonnegative {@code int} 6627 * value; -2 if the character has a numeric value that is not a 6628 * nonnegative integer; -1 if the character has no numeric value. 6629 * @see Character#forDigit(int, int) 6630 * @see Character#isDigit(int) 6631 * @since 1.5 6632 */ 6633 public static int getNumericValue(int codePoint) { 6634 return CharacterData.of(codePoint).getNumericValue(codePoint); 6635 } 6636 6637 /** 6638 * Determines if the specified character is ISO-LATIN-1 white space. 6639 * This method returns {@code true} for the following five 6640 * characters only: 6641 * <table summary="truechars"> 6642 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 6643 * <td>{@code HORIZONTAL TABULATION}</td></tr> 6644 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 6645 * <td>{@code NEW LINE}</td></tr> 6646 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 6647 * <td>{@code FORM FEED}</td></tr> 6648 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 6649 * <td>{@code CARRIAGE RETURN}</td></tr> 6650 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 6651 * <td>{@code SPACE}</td></tr> 6652 * </table> 6653 * 6654 * @param ch the character to be tested. 6655 * @return {@code true} if the character is ISO-LATIN-1 white 6656 * space; {@code false} otherwise. 6657 * @see Character#isSpaceChar(char) 6658 * @see Character#isWhitespace(char) 6659 * @deprecated Replaced by isWhitespace(char). 6660 */ 6661 @Deprecated 6662 public static boolean isSpace(char ch) { 6663 return (ch <= 0x0020) && 6664 (((((1L << 0x0009) | 6665 (1L << 0x000A) | 6666 (1L << 0x000C) | 6667 (1L << 0x000D) | 6668 (1L << 0x0020)) >> ch) & 1L) != 0); 6669 } 6670 6671 6672 /** 6673 * Determines if the specified character is a Unicode space character. 6674 * A character is considered to be a space character if and only if 6675 * it is specified to be a space character by the Unicode Standard. This 6676 * method returns true if the character's general category type is any of 6677 * the following: 6678 * <ul> 6679 * <li> {@code SPACE_SEPARATOR} 6680 * <li> {@code LINE_SEPARATOR} 6681 * <li> {@code PARAGRAPH_SEPARATOR} 6682 * </ul> 6683 * 6684 * <p><b>Note:</b> This method cannot handle <a 6685 * href="#supplementary"> supplementary characters</a>. To support 6686 * all Unicode characters, including supplementary characters, use 6687 * the {@link #isSpaceChar(int)} method. 6688 * 6689 * @param ch the character to be tested. 6690 * @return {@code true} if the character is a space character; 6691 * {@code false} otherwise. 6692 * @see Character#isWhitespace(char) 6693 * @since 1.1 6694 */ 6695 public static boolean isSpaceChar(char ch) { 6696 return isSpaceChar((int)ch); 6697 } 6698 6699 /** 6700 * Determines if the specified character (Unicode code point) is a 6701 * Unicode space character. A character is considered to be a 6702 * space character if and only if it is specified to be a space 6703 * character by the Unicode Standard. This method returns true if 6704 * the character's general category type is any of the following: 6705 * 6706 * <ul> 6707 * <li> {@link #SPACE_SEPARATOR} 6708 * <li> {@link #LINE_SEPARATOR} 6709 * <li> {@link #PARAGRAPH_SEPARATOR} 6710 * </ul> 6711 * 6712 * @param codePoint the character (Unicode code point) to be tested. 6713 * @return {@code true} if the character is a space character; 6714 * {@code false} otherwise. 6715 * @see Character#isWhitespace(int) 6716 * @since 1.5 6717 */ 6718 public static boolean isSpaceChar(int codePoint) { 6719 return ((((1 << Character.SPACE_SEPARATOR) | 6720 (1 << Character.LINE_SEPARATOR) | 6721 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6722 != 0; 6723 } 6724 6725 /** 6726 * Determines if the specified character is white space according to Java. 6727 * A character is a Java whitespace character if and only if it satisfies 6728 * one of the following criteria: 6729 * <ul> 6730 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 6731 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 6732 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6733 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6734 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6735 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6736 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6737 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6738 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6739 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6740 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6741 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6742 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6743 * </ul> 6744 * 6745 * <p><b>Note:</b> This method cannot handle <a 6746 * href="#supplementary"> supplementary characters</a>. To support 6747 * all Unicode characters, including supplementary characters, use 6748 * the {@link #isWhitespace(int)} method. 6749 * 6750 * @param ch the character to be tested. 6751 * @return {@code true} if the character is a Java whitespace 6752 * character; {@code false} otherwise. 6753 * @see Character#isSpaceChar(char) 6754 * @since 1.1 6755 */ 6756 public static boolean isWhitespace(char ch) { 6757 return isWhitespace((int)ch); 6758 } 6759 6760 /** 6761 * Determines if the specified character (Unicode code point) is 6762 * white space according to Java. A character is a Java 6763 * whitespace character if and only if it satisfies one of the 6764 * following criteria: 6765 * <ul> 6766 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6767 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6768 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6769 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6770 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6771 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6772 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6773 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6774 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6775 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6776 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6777 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6778 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6779 * </ul> 6780 * 6781 * @param codePoint the character (Unicode code point) to be tested. 6782 * @return {@code true} if the character is a Java whitespace 6783 * character; {@code false} otherwise. 6784 * @see Character#isSpaceChar(int) 6785 * @since 1.5 6786 */ 6787 public static boolean isWhitespace(int codePoint) { 6788 return CharacterData.of(codePoint).isWhitespace(codePoint); 6789 } 6790 6791 /** 6792 * Determines if the specified character is an ISO control 6793 * character. A character is considered to be an ISO control 6794 * character if its code is in the range {@code '\u005Cu0000'} 6795 * through {@code '\u005Cu001F'} or in the range 6796 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6797 * 6798 * <p><b>Note:</b> This method cannot handle <a 6799 * href="#supplementary"> supplementary characters</a>. To support 6800 * all Unicode characters, including supplementary characters, use 6801 * the {@link #isISOControl(int)} method. 6802 * 6803 * @param ch the character to be tested. 6804 * @return {@code true} if the character is an ISO control character; 6805 * {@code false} otherwise. 6806 * 6807 * @see Character#isSpaceChar(char) 6808 * @see Character#isWhitespace(char) 6809 * @since 1.1 6810 */ 6811 public static boolean isISOControl(char ch) { 6812 return isISOControl((int)ch); 6813 } 6814 6815 /** 6816 * Determines if the referenced character (Unicode code point) is an ISO control 6817 * character. A character is considered to be an ISO control 6818 * character if its code is in the range {@code '\u005Cu0000'} 6819 * through {@code '\u005Cu001F'} or in the range 6820 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6821 * 6822 * @param codePoint the character (Unicode code point) to be tested. 6823 * @return {@code true} if the character is an ISO control character; 6824 * {@code false} otherwise. 6825 * @see Character#isSpaceChar(int) 6826 * @see Character#isWhitespace(int) 6827 * @since 1.5 6828 */ 6829 public static boolean isISOControl(int codePoint) { 6830 // Optimized form of: 6831 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6832 // (codePoint >= 0x7F && codePoint <= 0x9F); 6833 return codePoint <= 0x9F && 6834 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6835 } 6836 6837 /** 6838 * Returns a value indicating a character's general category. 6839 * 6840 * <p><b>Note:</b> This method cannot handle <a 6841 * href="#supplementary"> supplementary characters</a>. To support 6842 * all Unicode characters, including supplementary characters, use 6843 * the {@link #getType(int)} method. 6844 * 6845 * @param ch the character to be tested. 6846 * @return a value of type {@code int} representing the 6847 * character's general category. 6848 * @see Character#COMBINING_SPACING_MARK 6849 * @see Character#CONNECTOR_PUNCTUATION 6850 * @see Character#CONTROL 6851 * @see Character#CURRENCY_SYMBOL 6852 * @see Character#DASH_PUNCTUATION 6853 * @see Character#DECIMAL_DIGIT_NUMBER 6854 * @see Character#ENCLOSING_MARK 6855 * @see Character#END_PUNCTUATION 6856 * @see Character#FINAL_QUOTE_PUNCTUATION 6857 * @see Character#FORMAT 6858 * @see Character#INITIAL_QUOTE_PUNCTUATION 6859 * @see Character#LETTER_NUMBER 6860 * @see Character#LINE_SEPARATOR 6861 * @see Character#LOWERCASE_LETTER 6862 * @see Character#MATH_SYMBOL 6863 * @see Character#MODIFIER_LETTER 6864 * @see Character#MODIFIER_SYMBOL 6865 * @see Character#NON_SPACING_MARK 6866 * @see Character#OTHER_LETTER 6867 * @see Character#OTHER_NUMBER 6868 * @see Character#OTHER_PUNCTUATION 6869 * @see Character#OTHER_SYMBOL 6870 * @see Character#PARAGRAPH_SEPARATOR 6871 * @see Character#PRIVATE_USE 6872 * @see Character#SPACE_SEPARATOR 6873 * @see Character#START_PUNCTUATION 6874 * @see Character#SURROGATE 6875 * @see Character#TITLECASE_LETTER 6876 * @see Character#UNASSIGNED 6877 * @see Character#UPPERCASE_LETTER 6878 * @since 1.1 6879 */ 6880 public static int getType(char ch) { 6881 return getType((int)ch); 6882 } 6883 6884 /** 6885 * Returns a value indicating a character's general category. 6886 * 6887 * @param codePoint the character (Unicode code point) to be tested. 6888 * @return a value of type {@code int} representing the 6889 * character's general category. 6890 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6891 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6892 * @see Character#CONTROL CONTROL 6893 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6894 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6895 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6896 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6897 * @see Character#END_PUNCTUATION END_PUNCTUATION 6898 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6899 * @see Character#FORMAT FORMAT 6900 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6901 * @see Character#LETTER_NUMBER LETTER_NUMBER 6902 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6903 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6904 * @see Character#MATH_SYMBOL MATH_SYMBOL 6905 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6906 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6907 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6908 * @see Character#OTHER_LETTER OTHER_LETTER 6909 * @see Character#OTHER_NUMBER OTHER_NUMBER 6910 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6911 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6912 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6913 * @see Character#PRIVATE_USE PRIVATE_USE 6914 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6915 * @see Character#START_PUNCTUATION START_PUNCTUATION 6916 * @see Character#SURROGATE SURROGATE 6917 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6918 * @see Character#UNASSIGNED UNASSIGNED 6919 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6920 * @since 1.5 6921 */ 6922 public static int getType(int codePoint) { 6923 return CharacterData.of(codePoint).getType(codePoint); 6924 } 6925 6926 /** 6927 * Determines the character representation for a specific digit in 6928 * the specified radix. If the value of {@code radix} is not a 6929 * valid radix, or the value of {@code digit} is not a valid 6930 * digit in the specified radix, the null character 6931 * ({@code '\u005Cu0000'}) is returned. 6932 * <p> 6933 * The {@code radix} argument is valid if it is greater than or 6934 * equal to {@code MIN_RADIX} and less than or equal to 6935 * {@code MAX_RADIX}. The {@code digit} argument is valid if 6936 * {@code 0 <= digit < radix}. 6937 * <p> 6938 * If the digit is less than 10, then 6939 * {@code '0' + digit} is returned. Otherwise, the value 6940 * {@code 'a' + digit - 10} is returned. 6941 * 6942 * @param digit the number to convert to a character. 6943 * @param radix the radix. 6944 * @return the {@code char} representation of the specified digit 6945 * in the specified radix. 6946 * @see Character#MIN_RADIX 6947 * @see Character#MAX_RADIX 6948 * @see Character#digit(char, int) 6949 */ 6950 public static char forDigit(int digit, int radix) { 6951 if ((digit >= radix) || (digit < 0)) { 6952 return '\0'; 6953 } 6954 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6955 return '\0'; 6956 } 6957 if (digit < 10) { 6958 return (char)('0' + digit); 6959 } 6960 return (char)('a' - 10 + digit); 6961 } 6962 6963 /** 6964 * Returns the Unicode directionality property for the given 6965 * character. Character directionality is used to calculate the 6966 * visual ordering of text. The directionality value of undefined 6967 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 6968 * 6969 * <p><b>Note:</b> This method cannot handle <a 6970 * href="#supplementary"> supplementary characters</a>. To support 6971 * all Unicode characters, including supplementary characters, use 6972 * the {@link #getDirectionality(int)} method. 6973 * 6974 * @param ch {@code char} for which the directionality property 6975 * is requested. 6976 * @return the directionality property of the {@code char} value. 6977 * 6978 * @see Character#DIRECTIONALITY_UNDEFINED 6979 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6980 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6981 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6982 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6983 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6984 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6985 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6986 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6987 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6988 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6989 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6990 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6991 * @see Character#DIRECTIONALITY_WHITESPACE 6992 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6993 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6994 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6995 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6996 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6997 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6998 * @since 1.4 6999 */ 7000 public static byte getDirectionality(char ch) { 7001 return getDirectionality((int)ch); 7002 } 7003 7004 /** 7005 * Returns the Unicode directionality property for the given 7006 * character (Unicode code point). Character directionality is 7007 * used to calculate the visual ordering of text. The 7008 * directionality value of undefined character is {@link 7009 * #DIRECTIONALITY_UNDEFINED}. 7010 * 7011 * @param codePoint the character (Unicode code point) for which 7012 * the directionality property is requested. 7013 * @return the directionality property of the character. 7014 * 7015 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 7016 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 7017 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 7018 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 7019 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 7020 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 7021 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 7022 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 7023 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 7024 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 7025 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 7026 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 7027 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 7028 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 7029 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 7030 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 7031 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 7032 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 7033 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7034 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7035 * @since 1.5 7036 */ 7037 public static byte getDirectionality(int codePoint) { 7038 return CharacterData.of(codePoint).getDirectionality(codePoint); 7039 } 7040 7041 /** 7042 * Determines whether the character is mirrored according to the 7043 * Unicode specification. Mirrored characters should have their 7044 * glyphs horizontally mirrored when displayed in text that is 7045 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 7046 * PARENTHESIS is semantically defined to be an <i>opening 7047 * parenthesis</i>. This will appear as a "(" in text that is 7048 * left-to-right but as a ")" in text that is right-to-left. 7049 * 7050 * <p><b>Note:</b> This method cannot handle <a 7051 * href="#supplementary"> supplementary characters</a>. To support 7052 * all Unicode characters, including supplementary characters, use 7053 * the {@link #isMirrored(int)} method. 7054 * 7055 * @param ch {@code char} for which the mirrored property is requested 7056 * @return {@code true} if the char is mirrored, {@code false} 7057 * if the {@code char} is not mirrored or is not defined. 7058 * @since 1.4 7059 */ 7060 public static boolean isMirrored(char ch) { 7061 return isMirrored((int)ch); 7062 } 7063 7064 /** 7065 * Determines whether the specified character (Unicode code point) 7066 * is mirrored according to the Unicode specification. Mirrored 7067 * characters should have their glyphs horizontally mirrored when 7068 * displayed in text that is right-to-left. For example, 7069 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 7070 * defined to be an <i>opening parenthesis</i>. This will appear 7071 * as a "(" in text that is left-to-right but as a ")" in text 7072 * that is right-to-left. 7073 * 7074 * @param codePoint the character (Unicode code point) to be tested. 7075 * @return {@code true} if the character is mirrored, {@code false} 7076 * if the character is not mirrored or is not defined. 7077 * @since 1.5 7078 */ 7079 public static boolean isMirrored(int codePoint) { 7080 return CharacterData.of(codePoint).isMirrored(codePoint); 7081 } 7082 7083 /** 7084 * Compares two {@code Character} objects numerically. 7085 * 7086 * @param anotherCharacter the {@code Character} to be compared. 7087 7088 * @return the value {@code 0} if the argument {@code Character} 7089 * is equal to this {@code Character}; a value less than 7090 * {@code 0} if this {@code Character} is numerically less 7091 * than the {@code Character} argument; and a value greater than 7092 * {@code 0} if this {@code Character} is numerically greater 7093 * than the {@code Character} argument (unsigned comparison). 7094 * Note that this is strictly a numerical comparison; it is not 7095 * locale-dependent. 7096 * @since 1.2 7097 */ 7098 public int compareTo(Character anotherCharacter) { 7099 return compare(this.value, anotherCharacter.value); 7100 } 7101 7102 /** 7103 * Compares two {@code char} values numerically. 7104 * The value returned is identical to what would be returned by: 7105 * <pre> 7106 * Character.valueOf(x).compareTo(Character.valueOf(y)) 7107 * </pre> 7108 * 7109 * @param x the first {@code char} to compare 7110 * @param y the second {@code char} to compare 7111 * @return the value {@code 0} if {@code x == y}; 7112 * a value less than {@code 0} if {@code x < y}; and 7113 * a value greater than {@code 0} if {@code x > y} 7114 * @since 1.7 7115 */ 7116 public static int compare(char x, char y) { 7117 return x - y; 7118 } 7119 7120 /** 7121 * Converts the character (Unicode code point) argument to uppercase using 7122 * information from the UnicodeData file. 7123 * 7124 * @param codePoint the character (Unicode code point) to be converted. 7125 * @return either the uppercase equivalent of the character, if 7126 * any, or an error flag ({@code Character.ERROR}) 7127 * that indicates that a 1:M {@code char} mapping exists. 7128 * @see Character#isLowerCase(char) 7129 * @see Character#isUpperCase(char) 7130 * @see Character#toLowerCase(char) 7131 * @see Character#toTitleCase(char) 7132 * @since 1.4 7133 */ 7134 static int toUpperCaseEx(int codePoint) { 7135 assert isValidCodePoint(codePoint); 7136 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 7137 } 7138 7139 /** 7140 * Converts the character (Unicode code point) argument to uppercase using case 7141 * mapping information from the SpecialCasing file in the Unicode 7142 * specification. If a character has no explicit uppercase 7143 * mapping, then the {@code char} itself is returned in the 7144 * {@code char[]}. 7145 * 7146 * @param codePoint the character (Unicode code point) to be converted. 7147 * @return a {@code char[]} with the uppercased character. 7148 * @since 1.4 7149 */ 7150 static char[] toUpperCaseCharArray(int codePoint) { 7151 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 7152 assert isBmpCodePoint(codePoint); 7153 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 7154 } 7155 7156 /** 7157 * The number of bits used to represent a <tt>char</tt> value in unsigned 7158 * binary form, constant {@code 16}. 7159 * 7160 * @since 1.5 7161 */ 7162 public static final int SIZE = 16; 7163 7164 /** 7165 * The number of bytes used to represent a {@code char} value in unsigned 7166 * binary form. 7167 * 7168 * @since 1.8 7169 */ 7170 public static final int BYTES = SIZE / Byte.SIZE; 7171 7172 /** 7173 * Returns the value obtained by reversing the order of the bytes in the 7174 * specified <tt>char</tt> value. 7175 * 7176 * @param ch The {@code char} of which to reverse the byte order. 7177 * @return the value obtained by reversing (or, equivalently, swapping) 7178 * the bytes in the specified <tt>char</tt> value. 7179 * @since 1.5 7180 */ 7181 public static char reverseBytes(char ch) { 7182 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 7183 } 7184 7185 /** 7186 * Returns the Unicode name of the specified character 7187 * {@code codePoint}, or null if the code point is 7188 * {@link #UNASSIGNED unassigned}. 7189 * <p> 7190 * Note: if the specified character is not assigned a name by 7191 * the <i>UnicodeData</i> file (part of the Unicode Character 7192 * Database maintained by the Unicode Consortium), the returned 7193 * name is the same as the result of expression. 7194 * 7195 * <blockquote>{@code 7196 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 7197 * + " " 7198 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7199 * 7200 * }</blockquote> 7201 * 7202 * @param codePoint the character (Unicode code point) 7203 * 7204 * @return the Unicode name of the specified character, or null if 7205 * the code point is unassigned. 7206 * 7207 * @exception IllegalArgumentException if the specified 7208 * {@code codePoint} is not a valid Unicode 7209 * code point. 7210 * 7211 * @since 1.7 7212 */ 7213 public static String getName(int codePoint) { 7214 if (!isValidCodePoint(codePoint)) { 7215 throw new IllegalArgumentException(); 7216 } 7217 String name = CharacterName.get(codePoint); 7218 if (name != null) 7219 return name; 7220 if (getType(codePoint) == UNASSIGNED) 7221 return null; 7222 UnicodeBlock block = UnicodeBlock.of(codePoint); 7223 if (block != null) 7224 return block.toString().replace('_', ' ') + " " 7225 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7226 // should never come here 7227 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7228 } 7229 }