1 /* 2 * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The {@code Character} class wraps a value of the primitive 35 * type {@code char} in an object. An object of type 36 * {@code Character} contains a single field whose type is 37 * {@code char}. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.2.0. 44 * <p> 45 * The methods and data of class {@code Character} are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h3><a name="unicode">Unicode Character Representations</a></h3> 58 * 59 * <p>The {@code char} data type (and therefore the value that a 60 * {@code Character} object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode Standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * Standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in {@code char} arrays and 76 * in the {@code String} and {@code StringBuffer} classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of {@code char} values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A {@code char} value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * {@code int} value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of {@code int} are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate {@code char} values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a {@code char} value cannot support 95 * supplementary characters. They treat {@code char} values from the 96 * surrogate ranges as undefined characters. For example, 97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an {@code int} value support all 102 * Unicode characters, including supplementary characters. For 103 * example, {@code Character.isLetter(0x2F81A)} returns 104 * {@code true} because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * {@code char} values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * {@code digit} method, the {@code forDigit} method, and the 129 * {@code toString} method of class {@code Integer}. 130 * 131 * @see Character#digit(char, int) 132 * @see Character#forDigit(int, int) 133 * @see Integer#toString(int, int) 134 * @see Integer#valueOf(String) 135 */ 136 public static final int MIN_RADIX = 2; 137 138 /** 139 * The maximum radix available for conversion to and from strings. 140 * The constant value of this field is the largest value permitted 141 * for the radix argument in radix-conversion methods such as the 142 * {@code digit} method, the {@code forDigit} method, and the 143 * {@code toString} method of class {@code Integer}. 144 * 145 * @see Character#digit(char, int) 146 * @see Character#forDigit(int, int) 147 * @see Integer#toString(int, int) 148 * @see Integer#valueOf(String) 149 */ 150 public static final int MAX_RADIX = 36; 151 152 /** 153 * The constant value of this field is the smallest value of type 154 * {@code char}, {@code '\u005Cu0000'}. 155 * 156 * @since 1.0.2 157 */ 158 public static final char MIN_VALUE = '\u0000'; 159 160 /** 161 * The constant value of this field is the largest value of type 162 * {@code char}, {@code '\u005CuFFFF'}. 163 * 164 * @since 1.0.2 165 */ 166 public static final char MAX_VALUE = '\uFFFF'; 167 168 /** 169 * The {@code Class} instance representing the primitive type 170 * {@code char}. 171 * 172 * @since 1.1 173 */ 174 @SuppressWarnings("unchecked") 175 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 176 177 /* 178 * Normative general types 179 */ 180 181 /* 182 * General character types 183 */ 184 185 /** 186 * General category "Cn" in the Unicode specification. 187 * @since 1.1 188 */ 189 public static final byte UNASSIGNED = 0; 190 191 /** 192 * General category "Lu" in the Unicode specification. 193 * @since 1.1 194 */ 195 public static final byte UPPERCASE_LETTER = 1; 196 197 /** 198 * General category "Ll" in the Unicode specification. 199 * @since 1.1 200 */ 201 public static final byte LOWERCASE_LETTER = 2; 202 203 /** 204 * General category "Lt" in the Unicode specification. 205 * @since 1.1 206 */ 207 public static final byte TITLECASE_LETTER = 3; 208 209 /** 210 * General category "Lm" in the Unicode specification. 211 * @since 1.1 212 */ 213 public static final byte MODIFIER_LETTER = 4; 214 215 /** 216 * General category "Lo" in the Unicode specification. 217 * @since 1.1 218 */ 219 public static final byte OTHER_LETTER = 5; 220 221 /** 222 * General category "Mn" in the Unicode specification. 223 * @since 1.1 224 */ 225 public static final byte NON_SPACING_MARK = 6; 226 227 /** 228 * General category "Me" in the Unicode specification. 229 * @since 1.1 230 */ 231 public static final byte ENCLOSING_MARK = 7; 232 233 /** 234 * General category "Mc" in the Unicode specification. 235 * @since 1.1 236 */ 237 public static final byte COMBINING_SPACING_MARK = 8; 238 239 /** 240 * General category "Nd" in the Unicode specification. 241 * @since 1.1 242 */ 243 public static final byte DECIMAL_DIGIT_NUMBER = 9; 244 245 /** 246 * General category "Nl" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte LETTER_NUMBER = 10; 250 251 /** 252 * General category "No" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte OTHER_NUMBER = 11; 256 257 /** 258 * General category "Zs" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte SPACE_SEPARATOR = 12; 262 263 /** 264 * General category "Zl" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte LINE_SEPARATOR = 13; 268 269 /** 270 * General category "Zp" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte PARAGRAPH_SEPARATOR = 14; 274 275 /** 276 * General category "Cc" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte CONTROL = 15; 280 281 /** 282 * General category "Cf" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte FORMAT = 16; 286 287 /** 288 * General category "Co" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte PRIVATE_USE = 18; 292 293 /** 294 * General category "Cs" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte SURROGATE = 19; 298 299 /** 300 * General category "Pd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DASH_PUNCTUATION = 20; 304 305 /** 306 * General category "Ps" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte START_PUNCTUATION = 21; 310 311 /** 312 * General category "Pe" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte END_PUNCTUATION = 22; 316 317 /** 318 * General category "Pc" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte CONNECTOR_PUNCTUATION = 23; 322 323 /** 324 * General category "Po" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte OTHER_PUNCTUATION = 24; 328 329 /** 330 * General category "Sm" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte MATH_SYMBOL = 25; 334 335 /** 336 * General category "Sc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CURRENCY_SYMBOL = 26; 340 341 /** 342 * General category "Sk" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte MODIFIER_SYMBOL = 27; 346 347 /** 348 * General category "So" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte OTHER_SYMBOL = 28; 352 353 /** 354 * General category "Pi" in the Unicode specification. 355 * @since 1.4 356 */ 357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 358 359 /** 360 * General category "Pf" in the Unicode specification. 361 * @since 1.4 362 */ 363 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 364 365 /** 366 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 367 */ 368 static final int ERROR = 0xFFFFFFFF; 369 370 371 /** 372 * Undefined bidirectional character type. Undefined {@code char} 373 * values have undefined directionality in the Unicode specification. 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_UNDEFINED = -1; 377 378 /** 379 * Strong bidirectional character type "L" in the Unicode specification. 380 * @since 1.4 381 */ 382 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 383 384 /** 385 * Strong bidirectional character type "R" in the Unicode specification. 386 * @since 1.4 387 */ 388 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 389 390 /** 391 * Strong bidirectional character type "AL" in the Unicode specification. 392 * @since 1.4 393 */ 394 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 395 396 /** 397 * Weak bidirectional character type "EN" in the Unicode specification. 398 * @since 1.4 399 */ 400 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 401 402 /** 403 * Weak bidirectional character type "ES" in the Unicode specification. 404 * @since 1.4 405 */ 406 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 407 408 /** 409 * Weak bidirectional character type "ET" in the Unicode specification. 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 413 414 /** 415 * Weak bidirectional character type "AN" in the Unicode specification. 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 419 420 /** 421 * Weak bidirectional character type "CS" in the Unicode specification. 422 * @since 1.4 423 */ 424 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 425 426 /** 427 * Weak bidirectional character type "NSM" in the Unicode specification. 428 * @since 1.4 429 */ 430 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 431 432 /** 433 * Weak bidirectional character type "BN" in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 437 438 /** 439 * Neutral bidirectional character type "B" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 443 444 /** 445 * Neutral bidirectional character type "S" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 449 450 /** 451 * Neutral bidirectional character type "WS" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_WHITESPACE = 12; 455 456 /** 457 * Neutral bidirectional character type "ON" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 461 462 /** 463 * Strong bidirectional character type "LRE" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 467 468 /** 469 * Strong bidirectional character type "LRO" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 473 474 /** 475 * Strong bidirectional character type "RLE" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 479 480 /** 481 * Strong bidirectional character type "RLO" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 485 486 /** 487 * Weak bidirectional character type "PDF" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 491 492 /** 493 * The minimum value of a 494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 495 * Unicode high-surrogate code unit</a> 496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 497 * A high-surrogate is also known as a <i>leading-surrogate</i>. 498 * 499 * @since 1.5 500 */ 501 public static final char MIN_HIGH_SURROGATE = '\uD800'; 502 503 /** 504 * The maximum value of a 505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 506 * Unicode high-surrogate code unit</a> 507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 508 * A high-surrogate is also known as a <i>leading-surrogate</i>. 509 * 510 * @since 1.5 511 */ 512 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 513 514 /** 515 * The minimum value of a 516 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 517 * Unicode low-surrogate code unit</a> 518 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 519 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 520 * 521 * @since 1.5 522 */ 523 public static final char MIN_LOW_SURROGATE = '\uDC00'; 524 525 /** 526 * The maximum value of a 527 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 528 * Unicode low-surrogate code unit</a> 529 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 530 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 531 * 532 * @since 1.5 533 */ 534 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 535 536 /** 537 * The minimum value of a Unicode surrogate code unit in the 538 * UTF-16 encoding, constant {@code '\u005CuD800'}. 539 * 540 * @since 1.5 541 */ 542 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 543 544 /** 545 * The maximum value of a Unicode surrogate code unit in the 546 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 547 * 548 * @since 1.5 549 */ 550 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 551 552 /** 553 * The minimum value of a 554 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 555 * Unicode supplementary code point</a>, constant {@code U+10000}. 556 * 557 * @since 1.5 558 */ 559 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 560 561 /** 562 * The minimum value of a 563 * <a href="http://www.unicode.org/glossary/#code_point"> 564 * Unicode code point</a>, constant {@code U+0000}. 565 * 566 * @since 1.5 567 */ 568 public static final int MIN_CODE_POINT = 0x000000; 569 570 /** 571 * The maximum value of a 572 * <a href="http://www.unicode.org/glossary/#code_point"> 573 * Unicode code point</a>, constant {@code U+10FFFF}. 574 * 575 * @since 1.5 576 */ 577 public static final int MAX_CODE_POINT = 0X10FFFF; 578 579 580 /** 581 * Instances of this class represent particular subsets of the Unicode 582 * character set. The only family of subsets defined in the 583 * {@code Character} class is {@link Character.UnicodeBlock}. 584 * Other portions of the Java API may define other subsets for their 585 * own purposes. 586 * 587 * @since 1.2 588 */ 589 public static class Subset { 590 591 private String name; 592 593 /** 594 * Constructs a new {@code Subset} instance. 595 * 596 * @param name The name of this subset 597 * @exception NullPointerException if name is {@code null} 598 */ 599 protected Subset(String name) { 600 if (name == null) { 601 throw new NullPointerException("name"); 602 } 603 this.name = name; 604 } 605 606 /** 607 * Compares two {@code Subset} objects for equality. 608 * This method returns {@code true} if and only if 609 * {@code this} and the argument refer to the same 610 * object; since this method is {@code final}, this 611 * guarantee holds for all subclasses. 612 */ 613 public final boolean equals(Object obj) { 614 return (this == obj); 615 } 616 617 /** 618 * Returns the standard hash code as defined by the 619 * {@link Object#hashCode} method. This method 620 * is {@code final} in order to ensure that the 621 * {@code equals} and {@code hashCode} methods will 622 * be consistent in all subclasses. 623 */ 624 public final int hashCode() { 625 return super.hashCode(); 626 } 627 628 /** 629 * Returns the name of this subset. 630 */ 631 public final String toString() { 632 return name; 633 } 634 } 635 636 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 637 // for the latest specification of Unicode Blocks. 638 639 /** 640 * A family of character subsets representing the character blocks in the 641 * Unicode specification. Character blocks generally define characters 642 * used for a specific script or purpose. A character is contained by 643 * at most one Unicode block. 644 * 645 * @since 1.2 646 */ 647 public static final class UnicodeBlock extends Subset { 648 /** 649 * 510 - the expected number of enteties 650 * 0.75 - the default load factor of HashMap 651 */ 652 private static Map<String, UnicodeBlock> map = 653 new HashMap<>((int)(510 / 0.75f + 1.0f)); 654 655 /** 656 * Creates a UnicodeBlock with the given identifier name. 657 * This name must be the same as the block identifier. 658 */ 659 private UnicodeBlock(String idName) { 660 super(idName); 661 map.put(idName, this); 662 } 663 664 /** 665 * Creates a UnicodeBlock with the given identifier name and 666 * alias name. 667 */ 668 private UnicodeBlock(String idName, String alias) { 669 this(idName); 670 map.put(alias, this); 671 } 672 673 /** 674 * Creates a UnicodeBlock with the given identifier name and 675 * alias names. 676 */ 677 private UnicodeBlock(String idName, String... aliases) { 678 this(idName); 679 for (String alias : aliases) 680 map.put(alias, this); 681 } 682 683 /** 684 * Constant for the "Basic Latin" Unicode character block. 685 * @since 1.2 686 */ 687 public static final UnicodeBlock BASIC_LATIN = 688 new UnicodeBlock("BASIC_LATIN", 689 "BASIC LATIN", 690 "BASICLATIN"); 691 692 /** 693 * Constant for the "Latin-1 Supplement" Unicode character block. 694 * @since 1.2 695 */ 696 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 697 new UnicodeBlock("LATIN_1_SUPPLEMENT", 698 "LATIN-1 SUPPLEMENT", 699 "LATIN-1SUPPLEMENT"); 700 701 /** 702 * Constant for the "Latin Extended-A" Unicode character block. 703 * @since 1.2 704 */ 705 public static final UnicodeBlock LATIN_EXTENDED_A = 706 new UnicodeBlock("LATIN_EXTENDED_A", 707 "LATIN EXTENDED-A", 708 "LATINEXTENDED-A"); 709 710 /** 711 * Constant for the "Latin Extended-B" Unicode character block. 712 * @since 1.2 713 */ 714 public static final UnicodeBlock LATIN_EXTENDED_B = 715 new UnicodeBlock("LATIN_EXTENDED_B", 716 "LATIN EXTENDED-B", 717 "LATINEXTENDED-B"); 718 719 /** 720 * Constant for the "IPA Extensions" Unicode character block. 721 * @since 1.2 722 */ 723 public static final UnicodeBlock IPA_EXTENSIONS = 724 new UnicodeBlock("IPA_EXTENSIONS", 725 "IPA EXTENSIONS", 726 "IPAEXTENSIONS"); 727 728 /** 729 * Constant for the "Spacing Modifier Letters" Unicode character block. 730 * @since 1.2 731 */ 732 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 733 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 734 "SPACING MODIFIER LETTERS", 735 "SPACINGMODIFIERLETTERS"); 736 737 /** 738 * Constant for the "Combining Diacritical Marks" Unicode character block. 739 * @since 1.2 740 */ 741 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 742 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 743 "COMBINING DIACRITICAL MARKS", 744 "COMBININGDIACRITICALMARKS"); 745 746 /** 747 * Constant for the "Greek and Coptic" Unicode character block. 748 * <p> 749 * This block was previously known as the "Greek" block. 750 * 751 * @since 1.2 752 */ 753 public static final UnicodeBlock GREEK = 754 new UnicodeBlock("GREEK", 755 "GREEK AND COPTIC", 756 "GREEKANDCOPTIC"); 757 758 /** 759 * Constant for the "Cyrillic" Unicode character block. 760 * @since 1.2 761 */ 762 public static final UnicodeBlock CYRILLIC = 763 new UnicodeBlock("CYRILLIC"); 764 765 /** 766 * Constant for the "Armenian" Unicode character block. 767 * @since 1.2 768 */ 769 public static final UnicodeBlock ARMENIAN = 770 new UnicodeBlock("ARMENIAN"); 771 772 /** 773 * Constant for the "Hebrew" Unicode character block. 774 * @since 1.2 775 */ 776 public static final UnicodeBlock HEBREW = 777 new UnicodeBlock("HEBREW"); 778 779 /** 780 * Constant for the "Arabic" Unicode character block. 781 * @since 1.2 782 */ 783 public static final UnicodeBlock ARABIC = 784 new UnicodeBlock("ARABIC"); 785 786 /** 787 * Constant for the "Devanagari" Unicode character block. 788 * @since 1.2 789 */ 790 public static final UnicodeBlock DEVANAGARI = 791 new UnicodeBlock("DEVANAGARI"); 792 793 /** 794 * Constant for the "Bengali" Unicode character block. 795 * @since 1.2 796 */ 797 public static final UnicodeBlock BENGALI = 798 new UnicodeBlock("BENGALI"); 799 800 /** 801 * Constant for the "Gurmukhi" Unicode character block. 802 * @since 1.2 803 */ 804 public static final UnicodeBlock GURMUKHI = 805 new UnicodeBlock("GURMUKHI"); 806 807 /** 808 * Constant for the "Gujarati" Unicode character block. 809 * @since 1.2 810 */ 811 public static final UnicodeBlock GUJARATI = 812 new UnicodeBlock("GUJARATI"); 813 814 /** 815 * Constant for the "Oriya" Unicode character block. 816 * @since 1.2 817 */ 818 public static final UnicodeBlock ORIYA = 819 new UnicodeBlock("ORIYA"); 820 821 /** 822 * Constant for the "Tamil" Unicode character block. 823 * @since 1.2 824 */ 825 public static final UnicodeBlock TAMIL = 826 new UnicodeBlock("TAMIL"); 827 828 /** 829 * Constant for the "Telugu" Unicode character block. 830 * @since 1.2 831 */ 832 public static final UnicodeBlock TELUGU = 833 new UnicodeBlock("TELUGU"); 834 835 /** 836 * Constant for the "Kannada" Unicode character block. 837 * @since 1.2 838 */ 839 public static final UnicodeBlock KANNADA = 840 new UnicodeBlock("KANNADA"); 841 842 /** 843 * Constant for the "Malayalam" Unicode character block. 844 * @since 1.2 845 */ 846 public static final UnicodeBlock MALAYALAM = 847 new UnicodeBlock("MALAYALAM"); 848 849 /** 850 * Constant for the "Thai" Unicode character block. 851 * @since 1.2 852 */ 853 public static final UnicodeBlock THAI = 854 new UnicodeBlock("THAI"); 855 856 /** 857 * Constant for the "Lao" Unicode character block. 858 * @since 1.2 859 */ 860 public static final UnicodeBlock LAO = 861 new UnicodeBlock("LAO"); 862 863 /** 864 * Constant for the "Tibetan" Unicode character block. 865 * @since 1.2 866 */ 867 public static final UnicodeBlock TIBETAN = 868 new UnicodeBlock("TIBETAN"); 869 870 /** 871 * Constant for the "Georgian" Unicode character block. 872 * @since 1.2 873 */ 874 public static final UnicodeBlock GEORGIAN = 875 new UnicodeBlock("GEORGIAN"); 876 877 /** 878 * Constant for the "Hangul Jamo" Unicode character block. 879 * @since 1.2 880 */ 881 public static final UnicodeBlock HANGUL_JAMO = 882 new UnicodeBlock("HANGUL_JAMO", 883 "HANGUL JAMO", 884 "HANGULJAMO"); 885 886 /** 887 * Constant for the "Latin Extended Additional" Unicode character block. 888 * @since 1.2 889 */ 890 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 891 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 892 "LATIN EXTENDED ADDITIONAL", 893 "LATINEXTENDEDADDITIONAL"); 894 895 /** 896 * Constant for the "Greek Extended" Unicode character block. 897 * @since 1.2 898 */ 899 public static final UnicodeBlock GREEK_EXTENDED = 900 new UnicodeBlock("GREEK_EXTENDED", 901 "GREEK EXTENDED", 902 "GREEKEXTENDED"); 903 904 /** 905 * Constant for the "General Punctuation" Unicode character block. 906 * @since 1.2 907 */ 908 public static final UnicodeBlock GENERAL_PUNCTUATION = 909 new UnicodeBlock("GENERAL_PUNCTUATION", 910 "GENERAL PUNCTUATION", 911 "GENERALPUNCTUATION"); 912 913 /** 914 * Constant for the "Superscripts and Subscripts" Unicode character 915 * block. 916 * @since 1.2 917 */ 918 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 919 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 920 "SUPERSCRIPTS AND SUBSCRIPTS", 921 "SUPERSCRIPTSANDSUBSCRIPTS"); 922 923 /** 924 * Constant for the "Currency Symbols" Unicode character block. 925 * @since 1.2 926 */ 927 public static final UnicodeBlock CURRENCY_SYMBOLS = 928 new UnicodeBlock("CURRENCY_SYMBOLS", 929 "CURRENCY SYMBOLS", 930 "CURRENCYSYMBOLS"); 931 932 /** 933 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 934 * character block. 935 * <p> 936 * This block was previously known as "Combining Marks for Symbols". 937 * @since 1.2 938 */ 939 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 940 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 941 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 942 "COMBININGDIACRITICALMARKSFORSYMBOLS", 943 "COMBINING MARKS FOR SYMBOLS", 944 "COMBININGMARKSFORSYMBOLS"); 945 946 /** 947 * Constant for the "Letterlike Symbols" Unicode character block. 948 * @since 1.2 949 */ 950 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 951 new UnicodeBlock("LETTERLIKE_SYMBOLS", 952 "LETTERLIKE SYMBOLS", 953 "LETTERLIKESYMBOLS"); 954 955 /** 956 * Constant for the "Number Forms" Unicode character block. 957 * @since 1.2 958 */ 959 public static final UnicodeBlock NUMBER_FORMS = 960 new UnicodeBlock("NUMBER_FORMS", 961 "NUMBER FORMS", 962 "NUMBERFORMS"); 963 964 /** 965 * Constant for the "Arrows" Unicode character block. 966 * @since 1.2 967 */ 968 public static final UnicodeBlock ARROWS = 969 new UnicodeBlock("ARROWS"); 970 971 /** 972 * Constant for the "Mathematical Operators" Unicode character block. 973 * @since 1.2 974 */ 975 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 976 new UnicodeBlock("MATHEMATICAL_OPERATORS", 977 "MATHEMATICAL OPERATORS", 978 "MATHEMATICALOPERATORS"); 979 980 /** 981 * Constant for the "Miscellaneous Technical" Unicode character block. 982 * @since 1.2 983 */ 984 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 985 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 986 "MISCELLANEOUS TECHNICAL", 987 "MISCELLANEOUSTECHNICAL"); 988 989 /** 990 * Constant for the "Control Pictures" Unicode character block. 991 * @since 1.2 992 */ 993 public static final UnicodeBlock CONTROL_PICTURES = 994 new UnicodeBlock("CONTROL_PICTURES", 995 "CONTROL PICTURES", 996 "CONTROLPICTURES"); 997 998 /** 999 * Constant for the "Optical Character Recognition" Unicode character block. 1000 * @since 1.2 1001 */ 1002 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1003 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1004 "OPTICAL CHARACTER RECOGNITION", 1005 "OPTICALCHARACTERRECOGNITION"); 1006 1007 /** 1008 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1009 * @since 1.2 1010 */ 1011 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1012 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1013 "ENCLOSED ALPHANUMERICS", 1014 "ENCLOSEDALPHANUMERICS"); 1015 1016 /** 1017 * Constant for the "Box Drawing" Unicode character block. 1018 * @since 1.2 1019 */ 1020 public static final UnicodeBlock BOX_DRAWING = 1021 new UnicodeBlock("BOX_DRAWING", 1022 "BOX DRAWING", 1023 "BOXDRAWING"); 1024 1025 /** 1026 * Constant for the "Block Elements" Unicode character block. 1027 * @since 1.2 1028 */ 1029 public static final UnicodeBlock BLOCK_ELEMENTS = 1030 new UnicodeBlock("BLOCK_ELEMENTS", 1031 "BLOCK ELEMENTS", 1032 "BLOCKELEMENTS"); 1033 1034 /** 1035 * Constant for the "Geometric Shapes" Unicode character block. 1036 * @since 1.2 1037 */ 1038 public static final UnicodeBlock GEOMETRIC_SHAPES = 1039 new UnicodeBlock("GEOMETRIC_SHAPES", 1040 "GEOMETRIC SHAPES", 1041 "GEOMETRICSHAPES"); 1042 1043 /** 1044 * Constant for the "Miscellaneous Symbols" Unicode character block. 1045 * @since 1.2 1046 */ 1047 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1048 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1049 "MISCELLANEOUS SYMBOLS", 1050 "MISCELLANEOUSSYMBOLS"); 1051 1052 /** 1053 * Constant for the "Dingbats" Unicode character block. 1054 * @since 1.2 1055 */ 1056 public static final UnicodeBlock DINGBATS = 1057 new UnicodeBlock("DINGBATS"); 1058 1059 /** 1060 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1061 * @since 1.2 1062 */ 1063 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1064 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1065 "CJK SYMBOLS AND PUNCTUATION", 1066 "CJKSYMBOLSANDPUNCTUATION"); 1067 1068 /** 1069 * Constant for the "Hiragana" Unicode character block. 1070 * @since 1.2 1071 */ 1072 public static final UnicodeBlock HIRAGANA = 1073 new UnicodeBlock("HIRAGANA"); 1074 1075 /** 1076 * Constant for the "Katakana" Unicode character block. 1077 * @since 1.2 1078 */ 1079 public static final UnicodeBlock KATAKANA = 1080 new UnicodeBlock("KATAKANA"); 1081 1082 /** 1083 * Constant for the "Bopomofo" Unicode character block. 1084 * @since 1.2 1085 */ 1086 public static final UnicodeBlock BOPOMOFO = 1087 new UnicodeBlock("BOPOMOFO"); 1088 1089 /** 1090 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1091 * @since 1.2 1092 */ 1093 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1094 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1095 "HANGUL COMPATIBILITY JAMO", 1096 "HANGULCOMPATIBILITYJAMO"); 1097 1098 /** 1099 * Constant for the "Kanbun" Unicode character block. 1100 * @since 1.2 1101 */ 1102 public static final UnicodeBlock KANBUN = 1103 new UnicodeBlock("KANBUN"); 1104 1105 /** 1106 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1107 * @since 1.2 1108 */ 1109 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1110 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1111 "ENCLOSED CJK LETTERS AND MONTHS", 1112 "ENCLOSEDCJKLETTERSANDMONTHS"); 1113 1114 /** 1115 * Constant for the "CJK Compatibility" Unicode character block. 1116 * @since 1.2 1117 */ 1118 public static final UnicodeBlock CJK_COMPATIBILITY = 1119 new UnicodeBlock("CJK_COMPATIBILITY", 1120 "CJK COMPATIBILITY", 1121 "CJKCOMPATIBILITY"); 1122 1123 /** 1124 * Constant for the "CJK Unified Ideographs" Unicode character block. 1125 * @since 1.2 1126 */ 1127 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1128 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1129 "CJK UNIFIED IDEOGRAPHS", 1130 "CJKUNIFIEDIDEOGRAPHS"); 1131 1132 /** 1133 * Constant for the "Hangul Syllables" Unicode character block. 1134 * @since 1.2 1135 */ 1136 public static final UnicodeBlock HANGUL_SYLLABLES = 1137 new UnicodeBlock("HANGUL_SYLLABLES", 1138 "HANGUL SYLLABLES", 1139 "HANGULSYLLABLES"); 1140 1141 /** 1142 * Constant for the "Private Use Area" Unicode character block. 1143 * @since 1.2 1144 */ 1145 public static final UnicodeBlock PRIVATE_USE_AREA = 1146 new UnicodeBlock("PRIVATE_USE_AREA", 1147 "PRIVATE USE AREA", 1148 "PRIVATEUSEAREA"); 1149 1150 /** 1151 * Constant for the "CJK Compatibility Ideographs" Unicode character 1152 * block. 1153 * @since 1.2 1154 */ 1155 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1156 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1157 "CJK COMPATIBILITY IDEOGRAPHS", 1158 "CJKCOMPATIBILITYIDEOGRAPHS"); 1159 1160 /** 1161 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1162 * @since 1.2 1163 */ 1164 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1165 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1166 "ALPHABETIC PRESENTATION FORMS", 1167 "ALPHABETICPRESENTATIONFORMS"); 1168 1169 /** 1170 * Constant for the "Arabic Presentation Forms-A" Unicode character 1171 * block. 1172 * @since 1.2 1173 */ 1174 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1175 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1176 "ARABIC PRESENTATION FORMS-A", 1177 "ARABICPRESENTATIONFORMS-A"); 1178 1179 /** 1180 * Constant for the "Combining Half Marks" Unicode character block. 1181 * @since 1.2 1182 */ 1183 public static final UnicodeBlock COMBINING_HALF_MARKS = 1184 new UnicodeBlock("COMBINING_HALF_MARKS", 1185 "COMBINING HALF MARKS", 1186 "COMBININGHALFMARKS"); 1187 1188 /** 1189 * Constant for the "CJK Compatibility Forms" Unicode character block. 1190 * @since 1.2 1191 */ 1192 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1193 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1194 "CJK COMPATIBILITY FORMS", 1195 "CJKCOMPATIBILITYFORMS"); 1196 1197 /** 1198 * Constant for the "Small Form Variants" Unicode character block. 1199 * @since 1.2 1200 */ 1201 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1202 new UnicodeBlock("SMALL_FORM_VARIANTS", 1203 "SMALL FORM VARIANTS", 1204 "SMALLFORMVARIANTS"); 1205 1206 /** 1207 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1208 * @since 1.2 1209 */ 1210 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1211 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1212 "ARABIC PRESENTATION FORMS-B", 1213 "ARABICPRESENTATIONFORMS-B"); 1214 1215 /** 1216 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1217 * block. 1218 * @since 1.2 1219 */ 1220 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1221 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1222 "HALFWIDTH AND FULLWIDTH FORMS", 1223 "HALFWIDTHANDFULLWIDTHFORMS"); 1224 1225 /** 1226 * Constant for the "Specials" Unicode character block. 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock SPECIALS = 1230 new UnicodeBlock("SPECIALS"); 1231 1232 /** 1233 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1234 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1235 * {@link #LOW_SURROGATES}. These new constants match 1236 * the block definitions of the Unicode Standard. 1237 * The {@link #of(char)} and {@link #of(int)} methods 1238 * return the new constants, not SURROGATES_AREA. 1239 */ 1240 @Deprecated 1241 public static final UnicodeBlock SURROGATES_AREA = 1242 new UnicodeBlock("SURROGATES_AREA"); 1243 1244 /** 1245 * Constant for the "Syriac" Unicode character block. 1246 * @since 1.4 1247 */ 1248 public static final UnicodeBlock SYRIAC = 1249 new UnicodeBlock("SYRIAC"); 1250 1251 /** 1252 * Constant for the "Thaana" Unicode character block. 1253 * @since 1.4 1254 */ 1255 public static final UnicodeBlock THAANA = 1256 new UnicodeBlock("THAANA"); 1257 1258 /** 1259 * Constant for the "Sinhala" Unicode character block. 1260 * @since 1.4 1261 */ 1262 public static final UnicodeBlock SINHALA = 1263 new UnicodeBlock("SINHALA"); 1264 1265 /** 1266 * Constant for the "Myanmar" Unicode character block. 1267 * @since 1.4 1268 */ 1269 public static final UnicodeBlock MYANMAR = 1270 new UnicodeBlock("MYANMAR"); 1271 1272 /** 1273 * Constant for the "Ethiopic" Unicode character block. 1274 * @since 1.4 1275 */ 1276 public static final UnicodeBlock ETHIOPIC = 1277 new UnicodeBlock("ETHIOPIC"); 1278 1279 /** 1280 * Constant for the "Cherokee" Unicode character block. 1281 * @since 1.4 1282 */ 1283 public static final UnicodeBlock CHEROKEE = 1284 new UnicodeBlock("CHEROKEE"); 1285 1286 /** 1287 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1288 * @since 1.4 1289 */ 1290 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1291 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1292 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1293 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1294 1295 /** 1296 * Constant for the "Ogham" Unicode character block. 1297 * @since 1.4 1298 */ 1299 public static final UnicodeBlock OGHAM = 1300 new UnicodeBlock("OGHAM"); 1301 1302 /** 1303 * Constant for the "Runic" Unicode character block. 1304 * @since 1.4 1305 */ 1306 public static final UnicodeBlock RUNIC = 1307 new UnicodeBlock("RUNIC"); 1308 1309 /** 1310 * Constant for the "Khmer" Unicode character block. 1311 * @since 1.4 1312 */ 1313 public static final UnicodeBlock KHMER = 1314 new UnicodeBlock("KHMER"); 1315 1316 /** 1317 * Constant for the "Mongolian" Unicode character block. 1318 * @since 1.4 1319 */ 1320 public static final UnicodeBlock MONGOLIAN = 1321 new UnicodeBlock("MONGOLIAN"); 1322 1323 /** 1324 * Constant for the "Braille Patterns" Unicode character block. 1325 * @since 1.4 1326 */ 1327 public static final UnicodeBlock BRAILLE_PATTERNS = 1328 new UnicodeBlock("BRAILLE_PATTERNS", 1329 "BRAILLE PATTERNS", 1330 "BRAILLEPATTERNS"); 1331 1332 /** 1333 * Constant for the "CJK Radicals Supplement" Unicode character block. 1334 * @since 1.4 1335 */ 1336 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1337 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1338 "CJK RADICALS SUPPLEMENT", 1339 "CJKRADICALSSUPPLEMENT"); 1340 1341 /** 1342 * Constant for the "Kangxi Radicals" Unicode character block. 1343 * @since 1.4 1344 */ 1345 public static final UnicodeBlock KANGXI_RADICALS = 1346 new UnicodeBlock("KANGXI_RADICALS", 1347 "KANGXI RADICALS", 1348 "KANGXIRADICALS"); 1349 1350 /** 1351 * Constant for the "Ideographic Description Characters" Unicode character block. 1352 * @since 1.4 1353 */ 1354 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1355 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1356 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1357 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1358 1359 /** 1360 * Constant for the "Bopomofo Extended" Unicode character block. 1361 * @since 1.4 1362 */ 1363 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1364 new UnicodeBlock("BOPOMOFO_EXTENDED", 1365 "BOPOMOFO EXTENDED", 1366 "BOPOMOFOEXTENDED"); 1367 1368 /** 1369 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1370 * @since 1.4 1371 */ 1372 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1373 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1374 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1375 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1376 1377 /** 1378 * Constant for the "Yi Syllables" Unicode character block. 1379 * @since 1.4 1380 */ 1381 public static final UnicodeBlock YI_SYLLABLES = 1382 new UnicodeBlock("YI_SYLLABLES", 1383 "YI SYLLABLES", 1384 "YISYLLABLES"); 1385 1386 /** 1387 * Constant for the "Yi Radicals" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock YI_RADICALS = 1391 new UnicodeBlock("YI_RADICALS", 1392 "YI RADICALS", 1393 "YIRADICALS"); 1394 1395 /** 1396 * Constant for the "Cyrillic Supplementary" Unicode character block. 1397 * @since 1.5 1398 */ 1399 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1400 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1401 "CYRILLIC SUPPLEMENTARY", 1402 "CYRILLICSUPPLEMENTARY", 1403 "CYRILLIC SUPPLEMENT", 1404 "CYRILLICSUPPLEMENT"); 1405 1406 /** 1407 * Constant for the "Tagalog" Unicode character block. 1408 * @since 1.5 1409 */ 1410 public static final UnicodeBlock TAGALOG = 1411 new UnicodeBlock("TAGALOG"); 1412 1413 /** 1414 * Constant for the "Hanunoo" Unicode character block. 1415 * @since 1.5 1416 */ 1417 public static final UnicodeBlock HANUNOO = 1418 new UnicodeBlock("HANUNOO"); 1419 1420 /** 1421 * Constant for the "Buhid" Unicode character block. 1422 * @since 1.5 1423 */ 1424 public static final UnicodeBlock BUHID = 1425 new UnicodeBlock("BUHID"); 1426 1427 /** 1428 * Constant for the "Tagbanwa" Unicode character block. 1429 * @since 1.5 1430 */ 1431 public static final UnicodeBlock TAGBANWA = 1432 new UnicodeBlock("TAGBANWA"); 1433 1434 /** 1435 * Constant for the "Limbu" Unicode character block. 1436 * @since 1.5 1437 */ 1438 public static final UnicodeBlock LIMBU = 1439 new UnicodeBlock("LIMBU"); 1440 1441 /** 1442 * Constant for the "Tai Le" Unicode character block. 1443 * @since 1.5 1444 */ 1445 public static final UnicodeBlock TAI_LE = 1446 new UnicodeBlock("TAI_LE", 1447 "TAI LE", 1448 "TAILE"); 1449 1450 /** 1451 * Constant for the "Khmer Symbols" Unicode character block. 1452 * @since 1.5 1453 */ 1454 public static final UnicodeBlock KHMER_SYMBOLS = 1455 new UnicodeBlock("KHMER_SYMBOLS", 1456 "KHMER SYMBOLS", 1457 "KHMERSYMBOLS"); 1458 1459 /** 1460 * Constant for the "Phonetic Extensions" Unicode character block. 1461 * @since 1.5 1462 */ 1463 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1464 new UnicodeBlock("PHONETIC_EXTENSIONS", 1465 "PHONETIC EXTENSIONS", 1466 "PHONETICEXTENSIONS"); 1467 1468 /** 1469 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1470 * @since 1.5 1471 */ 1472 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1473 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1474 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1475 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1476 1477 /** 1478 * Constant for the "Supplemental Arrows-A" Unicode character block. 1479 * @since 1.5 1480 */ 1481 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1482 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1483 "SUPPLEMENTAL ARROWS-A", 1484 "SUPPLEMENTALARROWS-A"); 1485 1486 /** 1487 * Constant for the "Supplemental Arrows-B" Unicode character block. 1488 * @since 1.5 1489 */ 1490 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1491 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1492 "SUPPLEMENTAL ARROWS-B", 1493 "SUPPLEMENTALARROWS-B"); 1494 1495 /** 1496 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1497 * character block. 1498 * @since 1.5 1499 */ 1500 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1501 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1502 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1503 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1504 1505 /** 1506 * Constant for the "Supplemental Mathematical Operators" Unicode 1507 * character block. 1508 * @since 1.5 1509 */ 1510 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1511 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1512 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1513 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1514 1515 /** 1516 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1517 * block. 1518 * @since 1.5 1519 */ 1520 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1521 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1522 "MISCELLANEOUS SYMBOLS AND ARROWS", 1523 "MISCELLANEOUSSYMBOLSANDARROWS"); 1524 1525 /** 1526 * Constant for the "Katakana Phonetic Extensions" Unicode character 1527 * block. 1528 * @since 1.5 1529 */ 1530 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1531 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1532 "KATAKANA PHONETIC EXTENSIONS", 1533 "KATAKANAPHONETICEXTENSIONS"); 1534 1535 /** 1536 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1537 * @since 1.5 1538 */ 1539 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1540 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1541 "YIJING HEXAGRAM SYMBOLS", 1542 "YIJINGHEXAGRAMSYMBOLS"); 1543 1544 /** 1545 * Constant for the "Variation Selectors" Unicode character block. 1546 * @since 1.5 1547 */ 1548 public static final UnicodeBlock VARIATION_SELECTORS = 1549 new UnicodeBlock("VARIATION_SELECTORS", 1550 "VARIATION SELECTORS", 1551 "VARIATIONSELECTORS"); 1552 1553 /** 1554 * Constant for the "Linear B Syllabary" Unicode character block. 1555 * @since 1.5 1556 */ 1557 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1558 new UnicodeBlock("LINEAR_B_SYLLABARY", 1559 "LINEAR B SYLLABARY", 1560 "LINEARBSYLLABARY"); 1561 1562 /** 1563 * Constant for the "Linear B Ideograms" Unicode character block. 1564 * @since 1.5 1565 */ 1566 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1567 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1568 "LINEAR B IDEOGRAMS", 1569 "LINEARBIDEOGRAMS"); 1570 1571 /** 1572 * Constant for the "Aegean Numbers" Unicode character block. 1573 * @since 1.5 1574 */ 1575 public static final UnicodeBlock AEGEAN_NUMBERS = 1576 new UnicodeBlock("AEGEAN_NUMBERS", 1577 "AEGEAN NUMBERS", 1578 "AEGEANNUMBERS"); 1579 1580 /** 1581 * Constant for the "Old Italic" Unicode character block. 1582 * @since 1.5 1583 */ 1584 public static final UnicodeBlock OLD_ITALIC = 1585 new UnicodeBlock("OLD_ITALIC", 1586 "OLD ITALIC", 1587 "OLDITALIC"); 1588 1589 /** 1590 * Constant for the "Gothic" Unicode character block. 1591 * @since 1.5 1592 */ 1593 public static final UnicodeBlock GOTHIC = 1594 new UnicodeBlock("GOTHIC"); 1595 1596 /** 1597 * Constant for the "Ugaritic" Unicode character block. 1598 * @since 1.5 1599 */ 1600 public static final UnicodeBlock UGARITIC = 1601 new UnicodeBlock("UGARITIC"); 1602 1603 /** 1604 * Constant for the "Deseret" Unicode character block. 1605 * @since 1.5 1606 */ 1607 public static final UnicodeBlock DESERET = 1608 new UnicodeBlock("DESERET"); 1609 1610 /** 1611 * Constant for the "Shavian" Unicode character block. 1612 * @since 1.5 1613 */ 1614 public static final UnicodeBlock SHAVIAN = 1615 new UnicodeBlock("SHAVIAN"); 1616 1617 /** 1618 * Constant for the "Osmanya" Unicode character block. 1619 * @since 1.5 1620 */ 1621 public static final UnicodeBlock OSMANYA = 1622 new UnicodeBlock("OSMANYA"); 1623 1624 /** 1625 * Constant for the "Cypriot Syllabary" Unicode character block. 1626 * @since 1.5 1627 */ 1628 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1629 new UnicodeBlock("CYPRIOT_SYLLABARY", 1630 "CYPRIOT SYLLABARY", 1631 "CYPRIOTSYLLABARY"); 1632 1633 /** 1634 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1635 * @since 1.5 1636 */ 1637 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1638 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1639 "BYZANTINE MUSICAL SYMBOLS", 1640 "BYZANTINEMUSICALSYMBOLS"); 1641 1642 /** 1643 * Constant for the "Musical Symbols" Unicode character block. 1644 * @since 1.5 1645 */ 1646 public static final UnicodeBlock MUSICAL_SYMBOLS = 1647 new UnicodeBlock("MUSICAL_SYMBOLS", 1648 "MUSICAL SYMBOLS", 1649 "MUSICALSYMBOLS"); 1650 1651 /** 1652 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1653 * @since 1.5 1654 */ 1655 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1656 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1657 "TAI XUAN JING SYMBOLS", 1658 "TAIXUANJINGSYMBOLS"); 1659 1660 /** 1661 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1662 * character block. 1663 * @since 1.5 1664 */ 1665 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1666 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1667 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1668 "MATHEMATICALALPHANUMERICSYMBOLS"); 1669 1670 /** 1671 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1672 * character block. 1673 * @since 1.5 1674 */ 1675 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1676 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1677 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1678 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1679 1680 /** 1681 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1682 * @since 1.5 1683 */ 1684 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1685 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1686 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1687 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1688 1689 /** 1690 * Constant for the "Tags" Unicode character block. 1691 * @since 1.5 1692 */ 1693 public static final UnicodeBlock TAGS = 1694 new UnicodeBlock("TAGS"); 1695 1696 /** 1697 * Constant for the "Variation Selectors Supplement" Unicode character 1698 * block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1702 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1703 "VARIATION SELECTORS SUPPLEMENT", 1704 "VARIATIONSELECTORSSUPPLEMENT"); 1705 1706 /** 1707 * Constant for the "Supplementary Private Use Area-A" Unicode character 1708 * block. 1709 * @since 1.5 1710 */ 1711 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1712 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1713 "SUPPLEMENTARY PRIVATE USE AREA-A", 1714 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1715 1716 /** 1717 * Constant for the "Supplementary Private Use Area-B" Unicode character 1718 * block. 1719 * @since 1.5 1720 */ 1721 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1722 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1723 "SUPPLEMENTARY PRIVATE USE AREA-B", 1724 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1725 1726 /** 1727 * Constant for the "High Surrogates" Unicode character block. 1728 * This block represents codepoint values in the high surrogate 1729 * range: U+D800 through U+DB7F 1730 * 1731 * @since 1.5 1732 */ 1733 public static final UnicodeBlock HIGH_SURROGATES = 1734 new UnicodeBlock("HIGH_SURROGATES", 1735 "HIGH SURROGATES", 1736 "HIGHSURROGATES"); 1737 1738 /** 1739 * Constant for the "High Private Use Surrogates" Unicode character 1740 * block. 1741 * This block represents codepoint values in the private use high 1742 * surrogate range: U+DB80 through U+DBFF 1743 * 1744 * @since 1.5 1745 */ 1746 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1747 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1748 "HIGH PRIVATE USE SURROGATES", 1749 "HIGHPRIVATEUSESURROGATES"); 1750 1751 /** 1752 * Constant for the "Low Surrogates" Unicode character block. 1753 * This block represents codepoint values in the low surrogate 1754 * range: U+DC00 through U+DFFF 1755 * 1756 * @since 1.5 1757 */ 1758 public static final UnicodeBlock LOW_SURROGATES = 1759 new UnicodeBlock("LOW_SURROGATES", 1760 "LOW SURROGATES", 1761 "LOWSURROGATES"); 1762 1763 /** 1764 * Constant for the "Arabic Supplement" Unicode character block. 1765 * @since 1.7 1766 */ 1767 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1768 new UnicodeBlock("ARABIC_SUPPLEMENT", 1769 "ARABIC SUPPLEMENT", 1770 "ARABICSUPPLEMENT"); 1771 1772 /** 1773 * Constant for the "NKo" Unicode character block. 1774 * @since 1.7 1775 */ 1776 public static final UnicodeBlock NKO = 1777 new UnicodeBlock("NKO"); 1778 1779 /** 1780 * Constant for the "Samaritan" Unicode character block. 1781 * @since 1.7 1782 */ 1783 public static final UnicodeBlock SAMARITAN = 1784 new UnicodeBlock("SAMARITAN"); 1785 1786 /** 1787 * Constant for the "Mandaic" Unicode character block. 1788 * @since 1.7 1789 */ 1790 public static final UnicodeBlock MANDAIC = 1791 new UnicodeBlock("MANDAIC"); 1792 1793 /** 1794 * Constant for the "Ethiopic Supplement" Unicode character block. 1795 * @since 1.7 1796 */ 1797 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1798 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1799 "ETHIOPIC SUPPLEMENT", 1800 "ETHIOPICSUPPLEMENT"); 1801 1802 /** 1803 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1804 * Unicode character block. 1805 * @since 1.7 1806 */ 1807 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1808 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1809 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1810 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1811 1812 /** 1813 * Constant for the "New Tai Lue" Unicode character block. 1814 * @since 1.7 1815 */ 1816 public static final UnicodeBlock NEW_TAI_LUE = 1817 new UnicodeBlock("NEW_TAI_LUE", 1818 "NEW TAI LUE", 1819 "NEWTAILUE"); 1820 1821 /** 1822 * Constant for the "Buginese" Unicode character block. 1823 * @since 1.7 1824 */ 1825 public static final UnicodeBlock BUGINESE = 1826 new UnicodeBlock("BUGINESE"); 1827 1828 /** 1829 * Constant for the "Tai Tham" Unicode character block. 1830 * @since 1.7 1831 */ 1832 public static final UnicodeBlock TAI_THAM = 1833 new UnicodeBlock("TAI_THAM", 1834 "TAI THAM", 1835 "TAITHAM"); 1836 1837 /** 1838 * Constant for the "Balinese" Unicode character block. 1839 * @since 1.7 1840 */ 1841 public static final UnicodeBlock BALINESE = 1842 new UnicodeBlock("BALINESE"); 1843 1844 /** 1845 * Constant for the "Sundanese" Unicode character block. 1846 * @since 1.7 1847 */ 1848 public static final UnicodeBlock SUNDANESE = 1849 new UnicodeBlock("SUNDANESE"); 1850 1851 /** 1852 * Constant for the "Batak" Unicode character block. 1853 * @since 1.7 1854 */ 1855 public static final UnicodeBlock BATAK = 1856 new UnicodeBlock("BATAK"); 1857 1858 /** 1859 * Constant for the "Lepcha" Unicode character block. 1860 * @since 1.7 1861 */ 1862 public static final UnicodeBlock LEPCHA = 1863 new UnicodeBlock("LEPCHA"); 1864 1865 /** 1866 * Constant for the "Ol Chiki" Unicode character block. 1867 * @since 1.7 1868 */ 1869 public static final UnicodeBlock OL_CHIKI = 1870 new UnicodeBlock("OL_CHIKI", 1871 "OL CHIKI", 1872 "OLCHIKI"); 1873 1874 /** 1875 * Constant for the "Vedic Extensions" Unicode character block. 1876 * @since 1.7 1877 */ 1878 public static final UnicodeBlock VEDIC_EXTENSIONS = 1879 new UnicodeBlock("VEDIC_EXTENSIONS", 1880 "VEDIC EXTENSIONS", 1881 "VEDICEXTENSIONS"); 1882 1883 /** 1884 * Constant for the "Phonetic Extensions Supplement" Unicode character 1885 * block. 1886 * @since 1.7 1887 */ 1888 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1889 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1890 "PHONETIC EXTENSIONS SUPPLEMENT", 1891 "PHONETICEXTENSIONSSUPPLEMENT"); 1892 1893 /** 1894 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1895 * character block. 1896 * @since 1.7 1897 */ 1898 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1899 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1900 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1901 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1902 1903 /** 1904 * Constant for the "Glagolitic" Unicode character block. 1905 * @since 1.7 1906 */ 1907 public static final UnicodeBlock GLAGOLITIC = 1908 new UnicodeBlock("GLAGOLITIC"); 1909 1910 /** 1911 * Constant for the "Latin Extended-C" Unicode character block. 1912 * @since 1.7 1913 */ 1914 public static final UnicodeBlock LATIN_EXTENDED_C = 1915 new UnicodeBlock("LATIN_EXTENDED_C", 1916 "LATIN EXTENDED-C", 1917 "LATINEXTENDED-C"); 1918 1919 /** 1920 * Constant for the "Coptic" Unicode character block. 1921 * @since 1.7 1922 */ 1923 public static final UnicodeBlock COPTIC = 1924 new UnicodeBlock("COPTIC"); 1925 1926 /** 1927 * Constant for the "Georgian Supplement" Unicode character block. 1928 * @since 1.7 1929 */ 1930 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1931 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1932 "GEORGIAN SUPPLEMENT", 1933 "GEORGIANSUPPLEMENT"); 1934 1935 /** 1936 * Constant for the "Tifinagh" Unicode character block. 1937 * @since 1.7 1938 */ 1939 public static final UnicodeBlock TIFINAGH = 1940 new UnicodeBlock("TIFINAGH"); 1941 1942 /** 1943 * Constant for the "Ethiopic Extended" Unicode character block. 1944 * @since 1.7 1945 */ 1946 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1947 new UnicodeBlock("ETHIOPIC_EXTENDED", 1948 "ETHIOPIC EXTENDED", 1949 "ETHIOPICEXTENDED"); 1950 1951 /** 1952 * Constant for the "Cyrillic Extended-A" Unicode character block. 1953 * @since 1.7 1954 */ 1955 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1956 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1957 "CYRILLIC EXTENDED-A", 1958 "CYRILLICEXTENDED-A"); 1959 1960 /** 1961 * Constant for the "Supplemental Punctuation" Unicode character block. 1962 * @since 1.7 1963 */ 1964 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1965 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1966 "SUPPLEMENTAL PUNCTUATION", 1967 "SUPPLEMENTALPUNCTUATION"); 1968 1969 /** 1970 * Constant for the "CJK Strokes" Unicode character block. 1971 * @since 1.7 1972 */ 1973 public static final UnicodeBlock CJK_STROKES = 1974 new UnicodeBlock("CJK_STROKES", 1975 "CJK STROKES", 1976 "CJKSTROKES"); 1977 1978 /** 1979 * Constant for the "Lisu" Unicode character block. 1980 * @since 1.7 1981 */ 1982 public static final UnicodeBlock LISU = 1983 new UnicodeBlock("LISU"); 1984 1985 /** 1986 * Constant for the "Vai" Unicode character block. 1987 * @since 1.7 1988 */ 1989 public static final UnicodeBlock VAI = 1990 new UnicodeBlock("VAI"); 1991 1992 /** 1993 * Constant for the "Cyrillic Extended-B" Unicode character block. 1994 * @since 1.7 1995 */ 1996 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1997 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1998 "CYRILLIC EXTENDED-B", 1999 "CYRILLICEXTENDED-B"); 2000 2001 /** 2002 * Constant for the "Bamum" Unicode character block. 2003 * @since 1.7 2004 */ 2005 public static final UnicodeBlock BAMUM = 2006 new UnicodeBlock("BAMUM"); 2007 2008 /** 2009 * Constant for the "Modifier Tone Letters" Unicode character block. 2010 * @since 1.7 2011 */ 2012 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2013 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2014 "MODIFIER TONE LETTERS", 2015 "MODIFIERTONELETTERS"); 2016 2017 /** 2018 * Constant for the "Latin Extended-D" Unicode character block. 2019 * @since 1.7 2020 */ 2021 public static final UnicodeBlock LATIN_EXTENDED_D = 2022 new UnicodeBlock("LATIN_EXTENDED_D", 2023 "LATIN EXTENDED-D", 2024 "LATINEXTENDED-D"); 2025 2026 /** 2027 * Constant for the "Syloti Nagri" Unicode character block. 2028 * @since 1.7 2029 */ 2030 public static final UnicodeBlock SYLOTI_NAGRI = 2031 new UnicodeBlock("SYLOTI_NAGRI", 2032 "SYLOTI NAGRI", 2033 "SYLOTINAGRI"); 2034 2035 /** 2036 * Constant for the "Common Indic Number Forms" Unicode character block. 2037 * @since 1.7 2038 */ 2039 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2040 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2041 "COMMON INDIC NUMBER FORMS", 2042 "COMMONINDICNUMBERFORMS"); 2043 2044 /** 2045 * Constant for the "Phags-pa" Unicode character block. 2046 * @since 1.7 2047 */ 2048 public static final UnicodeBlock PHAGS_PA = 2049 new UnicodeBlock("PHAGS_PA", 2050 "PHAGS-PA"); 2051 2052 /** 2053 * Constant for the "Saurashtra" Unicode character block. 2054 * @since 1.7 2055 */ 2056 public static final UnicodeBlock SAURASHTRA = 2057 new UnicodeBlock("SAURASHTRA"); 2058 2059 /** 2060 * Constant for the "Devanagari Extended" Unicode character block. 2061 * @since 1.7 2062 */ 2063 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2064 new UnicodeBlock("DEVANAGARI_EXTENDED", 2065 "DEVANAGARI EXTENDED", 2066 "DEVANAGARIEXTENDED"); 2067 2068 /** 2069 * Constant for the "Kayah Li" Unicode character block. 2070 * @since 1.7 2071 */ 2072 public static final UnicodeBlock KAYAH_LI = 2073 new UnicodeBlock("KAYAH_LI", 2074 "KAYAH LI", 2075 "KAYAHLI"); 2076 2077 /** 2078 * Constant for the "Rejang" Unicode character block. 2079 * @since 1.7 2080 */ 2081 public static final UnicodeBlock REJANG = 2082 new UnicodeBlock("REJANG"); 2083 2084 /** 2085 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2086 * @since 1.7 2087 */ 2088 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2089 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2090 "HANGUL JAMO EXTENDED-A", 2091 "HANGULJAMOEXTENDED-A"); 2092 2093 /** 2094 * Constant for the "Javanese" Unicode character block. 2095 * @since 1.7 2096 */ 2097 public static final UnicodeBlock JAVANESE = 2098 new UnicodeBlock("JAVANESE"); 2099 2100 /** 2101 * Constant for the "Cham" Unicode character block. 2102 * @since 1.7 2103 */ 2104 public static final UnicodeBlock CHAM = 2105 new UnicodeBlock("CHAM"); 2106 2107 /** 2108 * Constant for the "Myanmar Extended-A" Unicode character block. 2109 * @since 1.7 2110 */ 2111 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2112 new UnicodeBlock("MYANMAR_EXTENDED_A", 2113 "MYANMAR EXTENDED-A", 2114 "MYANMAREXTENDED-A"); 2115 2116 /** 2117 * Constant for the "Tai Viet" Unicode character block. 2118 * @since 1.7 2119 */ 2120 public static final UnicodeBlock TAI_VIET = 2121 new UnicodeBlock("TAI_VIET", 2122 "TAI VIET", 2123 "TAIVIET"); 2124 2125 /** 2126 * Constant for the "Ethiopic Extended-A" Unicode character block. 2127 * @since 1.7 2128 */ 2129 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2130 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2131 "ETHIOPIC EXTENDED-A", 2132 "ETHIOPICEXTENDED-A"); 2133 2134 /** 2135 * Constant for the "Meetei Mayek" Unicode character block. 2136 * @since 1.7 2137 */ 2138 public static final UnicodeBlock MEETEI_MAYEK = 2139 new UnicodeBlock("MEETEI_MAYEK", 2140 "MEETEI MAYEK", 2141 "MEETEIMAYEK"); 2142 2143 /** 2144 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2145 * @since 1.7 2146 */ 2147 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2148 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2149 "HANGUL JAMO EXTENDED-B", 2150 "HANGULJAMOEXTENDED-B"); 2151 2152 /** 2153 * Constant for the "Vertical Forms" Unicode character block. 2154 * @since 1.7 2155 */ 2156 public static final UnicodeBlock VERTICAL_FORMS = 2157 new UnicodeBlock("VERTICAL_FORMS", 2158 "VERTICAL FORMS", 2159 "VERTICALFORMS"); 2160 2161 /** 2162 * Constant for the "Ancient Greek Numbers" Unicode character block. 2163 * @since 1.7 2164 */ 2165 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2166 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2167 "ANCIENT GREEK NUMBERS", 2168 "ANCIENTGREEKNUMBERS"); 2169 2170 /** 2171 * Constant for the "Ancient Symbols" Unicode character block. 2172 * @since 1.7 2173 */ 2174 public static final UnicodeBlock ANCIENT_SYMBOLS = 2175 new UnicodeBlock("ANCIENT_SYMBOLS", 2176 "ANCIENT SYMBOLS", 2177 "ANCIENTSYMBOLS"); 2178 2179 /** 2180 * Constant for the "Phaistos Disc" Unicode character block. 2181 * @since 1.7 2182 */ 2183 public static final UnicodeBlock PHAISTOS_DISC = 2184 new UnicodeBlock("PHAISTOS_DISC", 2185 "PHAISTOS DISC", 2186 "PHAISTOSDISC"); 2187 2188 /** 2189 * Constant for the "Lycian" Unicode character block. 2190 * @since 1.7 2191 */ 2192 public static final UnicodeBlock LYCIAN = 2193 new UnicodeBlock("LYCIAN"); 2194 2195 /** 2196 * Constant for the "Carian" Unicode character block. 2197 * @since 1.7 2198 */ 2199 public static final UnicodeBlock CARIAN = 2200 new UnicodeBlock("CARIAN"); 2201 2202 /** 2203 * Constant for the "Old Persian" Unicode character block. 2204 * @since 1.7 2205 */ 2206 public static final UnicodeBlock OLD_PERSIAN = 2207 new UnicodeBlock("OLD_PERSIAN", 2208 "OLD PERSIAN", 2209 "OLDPERSIAN"); 2210 2211 /** 2212 * Constant for the "Imperial Aramaic" Unicode character block. 2213 * @since 1.7 2214 */ 2215 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2216 new UnicodeBlock("IMPERIAL_ARAMAIC", 2217 "IMPERIAL ARAMAIC", 2218 "IMPERIALARAMAIC"); 2219 2220 /** 2221 * Constant for the "Phoenician" Unicode character block. 2222 * @since 1.7 2223 */ 2224 public static final UnicodeBlock PHOENICIAN = 2225 new UnicodeBlock("PHOENICIAN"); 2226 2227 /** 2228 * Constant for the "Lydian" Unicode character block. 2229 * @since 1.7 2230 */ 2231 public static final UnicodeBlock LYDIAN = 2232 new UnicodeBlock("LYDIAN"); 2233 2234 /** 2235 * Constant for the "Kharoshthi" Unicode character block. 2236 * @since 1.7 2237 */ 2238 public static final UnicodeBlock KHAROSHTHI = 2239 new UnicodeBlock("KHAROSHTHI"); 2240 2241 /** 2242 * Constant for the "Old South Arabian" Unicode character block. 2243 * @since 1.7 2244 */ 2245 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2246 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2247 "OLD SOUTH ARABIAN", 2248 "OLDSOUTHARABIAN"); 2249 2250 /** 2251 * Constant for the "Avestan" Unicode character block. 2252 * @since 1.7 2253 */ 2254 public static final UnicodeBlock AVESTAN = 2255 new UnicodeBlock("AVESTAN"); 2256 2257 /** 2258 * Constant for the "Inscriptional Parthian" Unicode character block. 2259 * @since 1.7 2260 */ 2261 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2262 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2263 "INSCRIPTIONAL PARTHIAN", 2264 "INSCRIPTIONALPARTHIAN"); 2265 2266 /** 2267 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2268 * @since 1.7 2269 */ 2270 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2271 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2272 "INSCRIPTIONAL PAHLAVI", 2273 "INSCRIPTIONALPAHLAVI"); 2274 2275 /** 2276 * Constant for the "Old Turkic" Unicode character block. 2277 * @since 1.7 2278 */ 2279 public static final UnicodeBlock OLD_TURKIC = 2280 new UnicodeBlock("OLD_TURKIC", 2281 "OLD TURKIC", 2282 "OLDTURKIC"); 2283 2284 /** 2285 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2286 * @since 1.7 2287 */ 2288 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2289 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2290 "RUMI NUMERAL SYMBOLS", 2291 "RUMINUMERALSYMBOLS"); 2292 2293 /** 2294 * Constant for the "Brahmi" Unicode character block. 2295 * @since 1.7 2296 */ 2297 public static final UnicodeBlock BRAHMI = 2298 new UnicodeBlock("BRAHMI"); 2299 2300 /** 2301 * Constant for the "Kaithi" Unicode character block. 2302 * @since 1.7 2303 */ 2304 public static final UnicodeBlock KAITHI = 2305 new UnicodeBlock("KAITHI"); 2306 2307 /** 2308 * Constant for the "Cuneiform" Unicode character block. 2309 * @since 1.7 2310 */ 2311 public static final UnicodeBlock CUNEIFORM = 2312 new UnicodeBlock("CUNEIFORM"); 2313 2314 /** 2315 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2316 * character block. 2317 * @since 1.7 2318 */ 2319 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2320 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2321 "CUNEIFORM NUMBERS AND PUNCTUATION", 2322 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2323 2324 /** 2325 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2326 * @since 1.7 2327 */ 2328 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2329 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2330 "EGYPTIAN HIEROGLYPHS", 2331 "EGYPTIANHIEROGLYPHS"); 2332 2333 /** 2334 * Constant for the "Bamum Supplement" Unicode character block. 2335 * @since 1.7 2336 */ 2337 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2338 new UnicodeBlock("BAMUM_SUPPLEMENT", 2339 "BAMUM SUPPLEMENT", 2340 "BAMUMSUPPLEMENT"); 2341 2342 /** 2343 * Constant for the "Kana Supplement" Unicode character block. 2344 * @since 1.7 2345 */ 2346 public static final UnicodeBlock KANA_SUPPLEMENT = 2347 new UnicodeBlock("KANA_SUPPLEMENT", 2348 "KANA SUPPLEMENT", 2349 "KANASUPPLEMENT"); 2350 2351 /** 2352 * Constant for the "Ancient Greek Musical Notation" Unicode character 2353 * block. 2354 * @since 1.7 2355 */ 2356 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2357 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2358 "ANCIENT GREEK MUSICAL NOTATION", 2359 "ANCIENTGREEKMUSICALNOTATION"); 2360 2361 /** 2362 * Constant for the "Counting Rod Numerals" Unicode character block. 2363 * @since 1.7 2364 */ 2365 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2366 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2367 "COUNTING ROD NUMERALS", 2368 "COUNTINGRODNUMERALS"); 2369 2370 /** 2371 * Constant for the "Mahjong Tiles" Unicode character block. 2372 * @since 1.7 2373 */ 2374 public static final UnicodeBlock MAHJONG_TILES = 2375 new UnicodeBlock("MAHJONG_TILES", 2376 "MAHJONG TILES", 2377 "MAHJONGTILES"); 2378 2379 /** 2380 * Constant for the "Domino Tiles" Unicode character block. 2381 * @since 1.7 2382 */ 2383 public static final UnicodeBlock DOMINO_TILES = 2384 new UnicodeBlock("DOMINO_TILES", 2385 "DOMINO TILES", 2386 "DOMINOTILES"); 2387 2388 /** 2389 * Constant for the "Playing Cards" Unicode character block. 2390 * @since 1.7 2391 */ 2392 public static final UnicodeBlock PLAYING_CARDS = 2393 new UnicodeBlock("PLAYING_CARDS", 2394 "PLAYING CARDS", 2395 "PLAYINGCARDS"); 2396 2397 /** 2398 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2399 * block. 2400 * @since 1.7 2401 */ 2402 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2403 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2404 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2405 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2406 2407 /** 2408 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2409 * block. 2410 * @since 1.7 2411 */ 2412 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2413 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2414 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2415 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2416 2417 /** 2418 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2419 * character block. 2420 * @since 1.7 2421 */ 2422 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2423 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2424 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2425 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2426 2427 /** 2428 * Constant for the "Emoticons" Unicode character block. 2429 * @since 1.7 2430 */ 2431 public static final UnicodeBlock EMOTICONS = 2432 new UnicodeBlock("EMOTICONS"); 2433 2434 /** 2435 * Constant for the "Transport And Map Symbols" Unicode character block. 2436 * @since 1.7 2437 */ 2438 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2439 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2440 "TRANSPORT AND MAP SYMBOLS", 2441 "TRANSPORTANDMAPSYMBOLS"); 2442 2443 /** 2444 * Constant for the "Alchemical Symbols" Unicode character block. 2445 * @since 1.7 2446 */ 2447 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2448 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2449 "ALCHEMICAL SYMBOLS", 2450 "ALCHEMICALSYMBOLS"); 2451 2452 /** 2453 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2454 * character block. 2455 * @since 1.7 2456 */ 2457 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2458 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2459 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2460 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2461 2462 /** 2463 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2464 * character block. 2465 * @since 1.7 2466 */ 2467 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2468 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2469 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2470 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2471 2472 /** 2473 * Constant for the "Arabic Extended-A" Unicode character block. 2474 * @since 1.8 2475 */ 2476 public static final UnicodeBlock ARABIC_EXTENDED_A = 2477 new UnicodeBlock("ARABIC_EXTENDED_A", 2478 "ARABIC EXTENDED-A", 2479 "ARABICEXTENDED-A"); 2480 2481 /** 2482 * Constant for the "Sundanese Supplement" Unicode character block. 2483 * @since 1.8 2484 */ 2485 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2486 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2487 "SUNDANESE SUPPLEMENT", 2488 "SUNDANESESUPPLEMENT"); 2489 2490 /** 2491 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2492 * @since 1.8 2493 */ 2494 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2495 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2496 "MEETEI MAYEK EXTENSIONS", 2497 "MEETEIMAYEKEXTENSIONS"); 2498 2499 /** 2500 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2501 * @since 1.8 2502 */ 2503 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2504 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2505 "MEROITIC HIEROGLYPHS", 2506 "MEROITICHIEROGLYPHS"); 2507 2508 /** 2509 * Constant for the "Meroitic Cursive" Unicode character block. 2510 * @since 1.8 2511 */ 2512 public static final UnicodeBlock MEROITIC_CURSIVE = 2513 new UnicodeBlock("MEROITIC_CURSIVE", 2514 "MEROITIC CURSIVE", 2515 "MEROITICCURSIVE"); 2516 2517 /** 2518 * Constant for the "Sora Sompeng" Unicode character block. 2519 * @since 1.8 2520 */ 2521 public static final UnicodeBlock SORA_SOMPENG = 2522 new UnicodeBlock("SORA_SOMPENG", 2523 "SORA SOMPENG", 2524 "SORASOMPENG"); 2525 2526 /** 2527 * Constant for the "Chakma" Unicode character block. 2528 * @since 1.8 2529 */ 2530 public static final UnicodeBlock CHAKMA = 2531 new UnicodeBlock("CHAKMA"); 2532 2533 /** 2534 * Constant for the "Sharada" Unicode character block. 2535 * @since 1.8 2536 */ 2537 public static final UnicodeBlock SHARADA = 2538 new UnicodeBlock("SHARADA"); 2539 2540 /** 2541 * Constant for the "Takri" Unicode character block. 2542 * @since 1.8 2543 */ 2544 public static final UnicodeBlock TAKRI = 2545 new UnicodeBlock("TAKRI"); 2546 2547 /** 2548 * Constant for the "Miao" Unicode character block. 2549 * @since 1.8 2550 */ 2551 public static final UnicodeBlock MIAO = 2552 new UnicodeBlock("MIAO"); 2553 2554 /** 2555 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2556 * character block. 2557 * @since 1.8 2558 */ 2559 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2560 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2561 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2562 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2563 2564 private static final int blockStarts[] = { 2565 0x0000, // 0000..007F; Basic Latin 2566 0x0080, // 0080..00FF; Latin-1 Supplement 2567 0x0100, // 0100..017F; Latin Extended-A 2568 0x0180, // 0180..024F; Latin Extended-B 2569 0x0250, // 0250..02AF; IPA Extensions 2570 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2571 0x0300, // 0300..036F; Combining Diacritical Marks 2572 0x0370, // 0370..03FF; Greek and Coptic 2573 0x0400, // 0400..04FF; Cyrillic 2574 0x0500, // 0500..052F; Cyrillic Supplement 2575 0x0530, // 0530..058F; Armenian 2576 0x0590, // 0590..05FF; Hebrew 2577 0x0600, // 0600..06FF; Arabic 2578 0x0700, // 0700..074F; Syriac 2579 0x0750, // 0750..077F; Arabic Supplement 2580 0x0780, // 0780..07BF; Thaana 2581 0x07C0, // 07C0..07FF; NKo 2582 0x0800, // 0800..083F; Samaritan 2583 0x0840, // 0840..085F; Mandaic 2584 0x0860, // unassigned 2585 0x08A0, // 08A0..08FF; Arabic Extended-A 2586 0x0900, // 0900..097F; Devanagari 2587 0x0980, // 0980..09FF; Bengali 2588 0x0A00, // 0A00..0A7F; Gurmukhi 2589 0x0A80, // 0A80..0AFF; Gujarati 2590 0x0B00, // 0B00..0B7F; Oriya 2591 0x0B80, // 0B80..0BFF; Tamil 2592 0x0C00, // 0C00..0C7F; Telugu 2593 0x0C80, // 0C80..0CFF; Kannada 2594 0x0D00, // 0D00..0D7F; Malayalam 2595 0x0D80, // 0D80..0DFF; Sinhala 2596 0x0E00, // 0E00..0E7F; Thai 2597 0x0E80, // 0E80..0EFF; Lao 2598 0x0F00, // 0F00..0FFF; Tibetan 2599 0x1000, // 1000..109F; Myanmar 2600 0x10A0, // 10A0..10FF; Georgian 2601 0x1100, // 1100..11FF; Hangul Jamo 2602 0x1200, // 1200..137F; Ethiopic 2603 0x1380, // 1380..139F; Ethiopic Supplement 2604 0x13A0, // 13A0..13FF; Cherokee 2605 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2606 0x1680, // 1680..169F; Ogham 2607 0x16A0, // 16A0..16FF; Runic 2608 0x1700, // 1700..171F; Tagalog 2609 0x1720, // 1720..173F; Hanunoo 2610 0x1740, // 1740..175F; Buhid 2611 0x1760, // 1760..177F; Tagbanwa 2612 0x1780, // 1780..17FF; Khmer 2613 0x1800, // 1800..18AF; Mongolian 2614 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2615 0x1900, // 1900..194F; Limbu 2616 0x1950, // 1950..197F; Tai Le 2617 0x1980, // 1980..19DF; New Tai Lue 2618 0x19E0, // 19E0..19FF; Khmer Symbols 2619 0x1A00, // 1A00..1A1F; Buginese 2620 0x1A20, // 1A20..1AAF; Tai Tham 2621 0x1AB0, // unassigned 2622 0x1B00, // 1B00..1B7F; Balinese 2623 0x1B80, // 1B80..1BBF; Sundanese 2624 0x1BC0, // 1BC0..1BFF; Batak 2625 0x1C00, // 1C00..1C4F; Lepcha 2626 0x1C50, // 1C50..1C7F; Ol Chiki 2627 0x1C80, // unassigned 2628 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 2629 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2630 0x1D00, // 1D00..1D7F; Phonetic Extensions 2631 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2632 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2633 0x1E00, // 1E00..1EFF; Latin Extended Additional 2634 0x1F00, // 1F00..1FFF; Greek Extended 2635 0x2000, // 2000..206F; General Punctuation 2636 0x2070, // 2070..209F; Superscripts and Subscripts 2637 0x20A0, // 20A0..20CF; Currency Symbols 2638 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2639 0x2100, // 2100..214F; Letterlike Symbols 2640 0x2150, // 2150..218F; Number Forms 2641 0x2190, // 2190..21FF; Arrows 2642 0x2200, // 2200..22FF; Mathematical Operators 2643 0x2300, // 2300..23FF; Miscellaneous Technical 2644 0x2400, // 2400..243F; Control Pictures 2645 0x2440, // 2440..245F; Optical Character Recognition 2646 0x2460, // 2460..24FF; Enclosed Alphanumerics 2647 0x2500, // 2500..257F; Box Drawing 2648 0x2580, // 2580..259F; Block Elements 2649 0x25A0, // 25A0..25FF; Geometric Shapes 2650 0x2600, // 2600..26FF; Miscellaneous Symbols 2651 0x2700, // 2700..27BF; Dingbats 2652 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2653 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2654 0x2800, // 2800..28FF; Braille Patterns 2655 0x2900, // 2900..297F; Supplemental Arrows-B 2656 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2657 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2658 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2659 0x2C00, // 2C00..2C5F; Glagolitic 2660 0x2C60, // 2C60..2C7F; Latin Extended-C 2661 0x2C80, // 2C80..2CFF; Coptic 2662 0x2D00, // 2D00..2D2F; Georgian Supplement 2663 0x2D30, // 2D30..2D7F; Tifinagh 2664 0x2D80, // 2D80..2DDF; Ethiopic Extended 2665 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2666 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2667 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2668 0x2F00, // 2F00..2FDF; Kangxi Radicals 2669 0x2FE0, // unassigned 2670 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2671 0x3000, // 3000..303F; CJK Symbols and Punctuation 2672 0x3040, // 3040..309F; Hiragana 2673 0x30A0, // 30A0..30FF; Katakana 2674 0x3100, // 3100..312F; Bopomofo 2675 0x3130, // 3130..318F; Hangul Compatibility Jamo 2676 0x3190, // 3190..319F; Kanbun 2677 0x31A0, // 31A0..31BF; Bopomofo Extended 2678 0x31C0, // 31C0..31EF; CJK Strokes 2679 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2680 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2681 0x3300, // 3300..33FF; CJK Compatibility 2682 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2683 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2684 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2685 0xA000, // A000..A48F; Yi Syllables 2686 0xA490, // A490..A4CF; Yi Radicals 2687 0xA4D0, // A4D0..A4FF; Lisu 2688 0xA500, // A500..A63F; Vai 2689 0xA640, // A640..A69F; Cyrillic Extended-B 2690 0xA6A0, // A6A0..A6FF; Bamum 2691 0xA700, // A700..A71F; Modifier Tone Letters 2692 0xA720, // A720..A7FF; Latin Extended-D 2693 0xA800, // A800..A82F; Syloti Nagri 2694 0xA830, // A830..A83F; Common Indic Number Forms 2695 0xA840, // A840..A87F; Phags-pa 2696 0xA880, // A880..A8DF; Saurashtra 2697 0xA8E0, // A8E0..A8FF; Devanagari Extended 2698 0xA900, // A900..A92F; Kayah Li 2699 0xA930, // A930..A95F; Rejang 2700 0xA960, // A960..A97F; Hangul Jamo Extended-A 2701 0xA980, // A980..A9DF; Javanese 2702 0xA9E0, // unassigned 2703 0xAA00, // AA00..AA5F; Cham 2704 0xAA60, // AA60..AA7F; Myanmar Extended-A 2705 0xAA80, // AA80..AADF; Tai Viet 2706 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 2707 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2708 0xAB30, // unassigned 2709 0xABC0, // ABC0..ABFF; Meetei Mayek 2710 0xAC00, // AC00..D7AF; Hangul Syllables 2711 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2712 0xD800, // D800..DB7F; High Surrogates 2713 0xDB80, // DB80..DBFF; High Private Use Surrogates 2714 0xDC00, // DC00..DFFF; Low Surrogates 2715 0xE000, // E000..F8FF; Private Use Area 2716 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2717 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2718 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2719 0xFE00, // FE00..FE0F; Variation Selectors 2720 0xFE10, // FE10..FE1F; Vertical Forms 2721 0xFE20, // FE20..FE2F; Combining Half Marks 2722 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2723 0xFE50, // FE50..FE6F; Small Form Variants 2724 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2725 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2726 0xFFF0, // FFF0..FFFF; Specials 2727 0x10000, // 10000..1007F; Linear B Syllabary 2728 0x10080, // 10080..100FF; Linear B Ideograms 2729 0x10100, // 10100..1013F; Aegean Numbers 2730 0x10140, // 10140..1018F; Ancient Greek Numbers 2731 0x10190, // 10190..101CF; Ancient Symbols 2732 0x101D0, // 101D0..101FF; Phaistos Disc 2733 0x10200, // unassigned 2734 0x10280, // 10280..1029F; Lycian 2735 0x102A0, // 102A0..102DF; Carian 2736 0x102E0, // unassigned 2737 0x10300, // 10300..1032F; Old Italic 2738 0x10330, // 10330..1034F; Gothic 2739 0x10350, // unassigned 2740 0x10380, // 10380..1039F; Ugaritic 2741 0x103A0, // 103A0..103DF; Old Persian 2742 0x103E0, // unassigned 2743 0x10400, // 10400..1044F; Deseret 2744 0x10450, // 10450..1047F; Shavian 2745 0x10480, // 10480..104AF; Osmanya 2746 0x104B0, // unassigned 2747 0x10800, // 10800..1083F; Cypriot Syllabary 2748 0x10840, // 10840..1085F; Imperial Aramaic 2749 0x10860, // unassigned 2750 0x10900, // 10900..1091F; Phoenician 2751 0x10920, // 10920..1093F; Lydian 2752 0x10940, // unassigned 2753 0x10980, // 10980..1099F; Meroitic Hieroglyphs 2754 0x109A0, // 109A0..109FF; Meroitic Cursive 2755 0x10A00, // 10A00..10A5F; Kharoshthi 2756 0x10A60, // 10A60..10A7F; Old South Arabian 2757 0x10A80, // unassigned 2758 0x10B00, // 10B00..10B3F; Avestan 2759 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2760 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2761 0x10B80, // unassigned 2762 0x10C00, // 10C00..10C4F; Old Turkic 2763 0x10C50, // unassigned 2764 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2765 0x10E80, // unassigned 2766 0x11000, // 11000..1107F; Brahmi 2767 0x11080, // 11080..110CF; Kaithi 2768 0x110D0, // 110D0..110FF; Sora Sompeng 2769 0x11100, // 11100..1114F; Chakma 2770 0x11150, // unassigned 2771 0x11180, // 11180..111DF; Sharada 2772 0x111E0, // unassigned 2773 0x11680, // 11680..116CF; Takri 2774 0x116D0, // unassigned 2775 0x12000, // 12000..123FF; Cuneiform 2776 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2777 0x12480, // unassigned 2778 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2779 0x13430, // unassigned 2780 0x16800, // 16800..16A3F; Bamum Supplement 2781 0x16A40, // unassigned 2782 0x16F00, // 16F00..16F9F; Miao 2783 0x16FA0, // unassigned 2784 0x1B000, // 1B000..1B0FF; Kana Supplement 2785 0x1B100, // unassigned 2786 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2787 0x1D100, // 1D100..1D1FF; Musical Symbols 2788 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2789 0x1D250, // unassigned 2790 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2791 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2792 0x1D380, // unassigned 2793 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2794 0x1D800, // unassigned 2795 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 2796 0x1EF00, // unassigned 2797 0x1F000, // 1F000..1F02F; Mahjong Tiles 2798 0x1F030, // 1F030..1F09F; Domino Tiles 2799 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2800 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2801 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2802 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2803 0x1F600, // 1F600..1F64F; Emoticons 2804 0x1F650, // unassigned 2805 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2806 0x1F700, // 1F700..1F77F; Alchemical Symbols 2807 0x1F780, // unassigned 2808 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2809 0x2A6E0, // unassigned 2810 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2811 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2812 0x2B820, // unassigned 2813 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2814 0x2FA20, // unassigned 2815 0xE0000, // E0000..E007F; Tags 2816 0xE0080, // unassigned 2817 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2818 0xE01F0, // unassigned 2819 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2820 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2821 }; 2822 2823 private static final UnicodeBlock[] blocks = { 2824 BASIC_LATIN, 2825 LATIN_1_SUPPLEMENT, 2826 LATIN_EXTENDED_A, 2827 LATIN_EXTENDED_B, 2828 IPA_EXTENSIONS, 2829 SPACING_MODIFIER_LETTERS, 2830 COMBINING_DIACRITICAL_MARKS, 2831 GREEK, 2832 CYRILLIC, 2833 CYRILLIC_SUPPLEMENTARY, 2834 ARMENIAN, 2835 HEBREW, 2836 ARABIC, 2837 SYRIAC, 2838 ARABIC_SUPPLEMENT, 2839 THAANA, 2840 NKO, 2841 SAMARITAN, 2842 MANDAIC, 2843 null, 2844 ARABIC_EXTENDED_A, 2845 DEVANAGARI, 2846 BENGALI, 2847 GURMUKHI, 2848 GUJARATI, 2849 ORIYA, 2850 TAMIL, 2851 TELUGU, 2852 KANNADA, 2853 MALAYALAM, 2854 SINHALA, 2855 THAI, 2856 LAO, 2857 TIBETAN, 2858 MYANMAR, 2859 GEORGIAN, 2860 HANGUL_JAMO, 2861 ETHIOPIC, 2862 ETHIOPIC_SUPPLEMENT, 2863 CHEROKEE, 2864 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2865 OGHAM, 2866 RUNIC, 2867 TAGALOG, 2868 HANUNOO, 2869 BUHID, 2870 TAGBANWA, 2871 KHMER, 2872 MONGOLIAN, 2873 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2874 LIMBU, 2875 TAI_LE, 2876 NEW_TAI_LUE, 2877 KHMER_SYMBOLS, 2878 BUGINESE, 2879 TAI_THAM, 2880 null, 2881 BALINESE, 2882 SUNDANESE, 2883 BATAK, 2884 LEPCHA, 2885 OL_CHIKI, 2886 null, 2887 SUNDANESE_SUPPLEMENT, 2888 VEDIC_EXTENSIONS, 2889 PHONETIC_EXTENSIONS, 2890 PHONETIC_EXTENSIONS_SUPPLEMENT, 2891 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2892 LATIN_EXTENDED_ADDITIONAL, 2893 GREEK_EXTENDED, 2894 GENERAL_PUNCTUATION, 2895 SUPERSCRIPTS_AND_SUBSCRIPTS, 2896 CURRENCY_SYMBOLS, 2897 COMBINING_MARKS_FOR_SYMBOLS, 2898 LETTERLIKE_SYMBOLS, 2899 NUMBER_FORMS, 2900 ARROWS, 2901 MATHEMATICAL_OPERATORS, 2902 MISCELLANEOUS_TECHNICAL, 2903 CONTROL_PICTURES, 2904 OPTICAL_CHARACTER_RECOGNITION, 2905 ENCLOSED_ALPHANUMERICS, 2906 BOX_DRAWING, 2907 BLOCK_ELEMENTS, 2908 GEOMETRIC_SHAPES, 2909 MISCELLANEOUS_SYMBOLS, 2910 DINGBATS, 2911 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2912 SUPPLEMENTAL_ARROWS_A, 2913 BRAILLE_PATTERNS, 2914 SUPPLEMENTAL_ARROWS_B, 2915 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2916 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2917 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2918 GLAGOLITIC, 2919 LATIN_EXTENDED_C, 2920 COPTIC, 2921 GEORGIAN_SUPPLEMENT, 2922 TIFINAGH, 2923 ETHIOPIC_EXTENDED, 2924 CYRILLIC_EXTENDED_A, 2925 SUPPLEMENTAL_PUNCTUATION, 2926 CJK_RADICALS_SUPPLEMENT, 2927 KANGXI_RADICALS, 2928 null, 2929 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2930 CJK_SYMBOLS_AND_PUNCTUATION, 2931 HIRAGANA, 2932 KATAKANA, 2933 BOPOMOFO, 2934 HANGUL_COMPATIBILITY_JAMO, 2935 KANBUN, 2936 BOPOMOFO_EXTENDED, 2937 CJK_STROKES, 2938 KATAKANA_PHONETIC_EXTENSIONS, 2939 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2940 CJK_COMPATIBILITY, 2941 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2942 YIJING_HEXAGRAM_SYMBOLS, 2943 CJK_UNIFIED_IDEOGRAPHS, 2944 YI_SYLLABLES, 2945 YI_RADICALS, 2946 LISU, 2947 VAI, 2948 CYRILLIC_EXTENDED_B, 2949 BAMUM, 2950 MODIFIER_TONE_LETTERS, 2951 LATIN_EXTENDED_D, 2952 SYLOTI_NAGRI, 2953 COMMON_INDIC_NUMBER_FORMS, 2954 PHAGS_PA, 2955 SAURASHTRA, 2956 DEVANAGARI_EXTENDED, 2957 KAYAH_LI, 2958 REJANG, 2959 HANGUL_JAMO_EXTENDED_A, 2960 JAVANESE, 2961 null, 2962 CHAM, 2963 MYANMAR_EXTENDED_A, 2964 TAI_VIET, 2965 MEETEI_MAYEK_EXTENSIONS, 2966 ETHIOPIC_EXTENDED_A, 2967 null, 2968 MEETEI_MAYEK, 2969 HANGUL_SYLLABLES, 2970 HANGUL_JAMO_EXTENDED_B, 2971 HIGH_SURROGATES, 2972 HIGH_PRIVATE_USE_SURROGATES, 2973 LOW_SURROGATES, 2974 PRIVATE_USE_AREA, 2975 CJK_COMPATIBILITY_IDEOGRAPHS, 2976 ALPHABETIC_PRESENTATION_FORMS, 2977 ARABIC_PRESENTATION_FORMS_A, 2978 VARIATION_SELECTORS, 2979 VERTICAL_FORMS, 2980 COMBINING_HALF_MARKS, 2981 CJK_COMPATIBILITY_FORMS, 2982 SMALL_FORM_VARIANTS, 2983 ARABIC_PRESENTATION_FORMS_B, 2984 HALFWIDTH_AND_FULLWIDTH_FORMS, 2985 SPECIALS, 2986 LINEAR_B_SYLLABARY, 2987 LINEAR_B_IDEOGRAMS, 2988 AEGEAN_NUMBERS, 2989 ANCIENT_GREEK_NUMBERS, 2990 ANCIENT_SYMBOLS, 2991 PHAISTOS_DISC, 2992 null, 2993 LYCIAN, 2994 CARIAN, 2995 null, 2996 OLD_ITALIC, 2997 GOTHIC, 2998 null, 2999 UGARITIC, 3000 OLD_PERSIAN, 3001 null, 3002 DESERET, 3003 SHAVIAN, 3004 OSMANYA, 3005 null, 3006 CYPRIOT_SYLLABARY, 3007 IMPERIAL_ARAMAIC, 3008 null, 3009 PHOENICIAN, 3010 LYDIAN, 3011 null, 3012 MEROITIC_HIEROGLYPHS, 3013 MEROITIC_CURSIVE, 3014 KHAROSHTHI, 3015 OLD_SOUTH_ARABIAN, 3016 null, 3017 AVESTAN, 3018 INSCRIPTIONAL_PARTHIAN, 3019 INSCRIPTIONAL_PAHLAVI, 3020 null, 3021 OLD_TURKIC, 3022 null, 3023 RUMI_NUMERAL_SYMBOLS, 3024 null, 3025 BRAHMI, 3026 KAITHI, 3027 SORA_SOMPENG, 3028 CHAKMA, 3029 null, 3030 SHARADA, 3031 null, 3032 TAKRI, 3033 null, 3034 CUNEIFORM, 3035 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3036 null, 3037 EGYPTIAN_HIEROGLYPHS, 3038 null, 3039 BAMUM_SUPPLEMENT, 3040 null, 3041 MIAO, 3042 null, 3043 KANA_SUPPLEMENT, 3044 null, 3045 BYZANTINE_MUSICAL_SYMBOLS, 3046 MUSICAL_SYMBOLS, 3047 ANCIENT_GREEK_MUSICAL_NOTATION, 3048 null, 3049 TAI_XUAN_JING_SYMBOLS, 3050 COUNTING_ROD_NUMERALS, 3051 null, 3052 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3053 null, 3054 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3055 null, 3056 MAHJONG_TILES, 3057 DOMINO_TILES, 3058 PLAYING_CARDS, 3059 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3060 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3061 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3062 EMOTICONS, 3063 null, 3064 TRANSPORT_AND_MAP_SYMBOLS, 3065 ALCHEMICAL_SYMBOLS, 3066 null, 3067 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3068 null, 3069 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3070 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3071 null, 3072 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3073 null, 3074 TAGS, 3075 null, 3076 VARIATION_SELECTORS_SUPPLEMENT, 3077 null, 3078 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3079 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3080 }; 3081 3082 3083 /** 3084 * Returns the object representing the Unicode block containing the 3085 * given character, or {@code null} if the character is not a 3086 * member of a defined block. 3087 * 3088 * <p><b>Note:</b> This method cannot handle 3089 * <a href="Character.html#supplementary"> supplementary 3090 * characters</a>. To support all Unicode characters, including 3091 * supplementary characters, use the {@link #of(int)} method. 3092 * 3093 * @param c The character in question 3094 * @return The {@code UnicodeBlock} instance representing the 3095 * Unicode block of which this character is a member, or 3096 * {@code null} if the character is not a member of any 3097 * Unicode block 3098 */ 3099 public static UnicodeBlock of(char c) { 3100 return of((int)c); 3101 } 3102 3103 /** 3104 * Returns the object representing the Unicode block 3105 * containing the given character (Unicode code point), or 3106 * {@code null} if the character is not a member of a 3107 * defined block. 3108 * 3109 * @param codePoint the character (Unicode code point) in question. 3110 * @return The {@code UnicodeBlock} instance representing the 3111 * Unicode block of which this character is a member, or 3112 * {@code null} if the character is not a member of any 3113 * Unicode block 3114 * @exception IllegalArgumentException if the specified 3115 * {@code codePoint} is an invalid Unicode code point. 3116 * @see Character#isValidCodePoint(int) 3117 * @since 1.5 3118 */ 3119 public static UnicodeBlock of(int codePoint) { 3120 if (!isValidCodePoint(codePoint)) { 3121 throw new IllegalArgumentException(); 3122 } 3123 3124 int top, bottom, current; 3125 bottom = 0; 3126 top = blockStarts.length; 3127 current = top/2; 3128 3129 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3130 while (top - bottom > 1) { 3131 if (codePoint >= blockStarts[current]) { 3132 bottom = current; 3133 } else { 3134 top = current; 3135 } 3136 current = (top + bottom) / 2; 3137 } 3138 return blocks[current]; 3139 } 3140 3141 /** 3142 * Returns the UnicodeBlock with the given name. Block 3143 * names are determined by The Unicode Standard. The file 3144 * Blocks-<version>.txt defines blocks for a particular 3145 * version of the standard. The {@link Character} class specifies 3146 * the version of the standard that it supports. 3147 * <p> 3148 * This method accepts block names in the following forms: 3149 * <ol> 3150 * <li> Canonical block names as defined by the Unicode Standard. 3151 * For example, the standard defines a "Basic Latin" block. Therefore, this 3152 * method accepts "Basic Latin" as a valid block name. The documentation of 3153 * each UnicodeBlock provides the canonical name. 3154 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3155 * is a valid block name for the "Basic Latin" block. 3156 * <li>The text representation of each constant UnicodeBlock identifier. 3157 * For example, this method will return the {@link #BASIC_LATIN} block if 3158 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3159 * hyphens in the canonical name with underscores. 3160 * </ol> 3161 * Finally, character case is ignored for all of the valid block name forms. 3162 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3163 * The en_US locale's case mapping rules are used to provide case-insensitive 3164 * string comparisons for block name validation. 3165 * <p> 3166 * If the Unicode Standard changes block names, both the previous and 3167 * current names will be accepted. 3168 * 3169 * @param blockName A {@code UnicodeBlock} name. 3170 * @return The {@code UnicodeBlock} instance identified 3171 * by {@code blockName} 3172 * @throws IllegalArgumentException if {@code blockName} is an 3173 * invalid name 3174 * @throws NullPointerException if {@code blockName} is null 3175 * @since 1.5 3176 */ 3177 public static final UnicodeBlock forName(String blockName) { 3178 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3179 if (block == null) { 3180 throw new IllegalArgumentException(); 3181 } 3182 return block; 3183 } 3184 } 3185 3186 3187 /** 3188 * A family of character subsets representing the character scripts 3189 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3190 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3191 * character is assigned to a single Unicode script, either a specific 3192 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3193 * one of the following three special values, 3194 * {@link Character.UnicodeScript#INHERITED Inherited}, 3195 * {@link Character.UnicodeScript#COMMON Common} or 3196 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3197 * 3198 * @since 1.7 3199 */ 3200 public static enum UnicodeScript { 3201 /** 3202 * Unicode script "Common". 3203 */ 3204 COMMON, 3205 3206 /** 3207 * Unicode script "Latin". 3208 */ 3209 LATIN, 3210 3211 /** 3212 * Unicode script "Greek". 3213 */ 3214 GREEK, 3215 3216 /** 3217 * Unicode script "Cyrillic". 3218 */ 3219 CYRILLIC, 3220 3221 /** 3222 * Unicode script "Armenian". 3223 */ 3224 ARMENIAN, 3225 3226 /** 3227 * Unicode script "Hebrew". 3228 */ 3229 HEBREW, 3230 3231 /** 3232 * Unicode script "Arabic". 3233 */ 3234 ARABIC, 3235 3236 /** 3237 * Unicode script "Syriac". 3238 */ 3239 SYRIAC, 3240 3241 /** 3242 * Unicode script "Thaana". 3243 */ 3244 THAANA, 3245 3246 /** 3247 * Unicode script "Devanagari". 3248 */ 3249 DEVANAGARI, 3250 3251 /** 3252 * Unicode script "Bengali". 3253 */ 3254 BENGALI, 3255 3256 /** 3257 * Unicode script "Gurmukhi". 3258 */ 3259 GURMUKHI, 3260 3261 /** 3262 * Unicode script "Gujarati". 3263 */ 3264 GUJARATI, 3265 3266 /** 3267 * Unicode script "Oriya". 3268 */ 3269 ORIYA, 3270 3271 /** 3272 * Unicode script "Tamil". 3273 */ 3274 TAMIL, 3275 3276 /** 3277 * Unicode script "Telugu". 3278 */ 3279 TELUGU, 3280 3281 /** 3282 * Unicode script "Kannada". 3283 */ 3284 KANNADA, 3285 3286 /** 3287 * Unicode script "Malayalam". 3288 */ 3289 MALAYALAM, 3290 3291 /** 3292 * Unicode script "Sinhala". 3293 */ 3294 SINHALA, 3295 3296 /** 3297 * Unicode script "Thai". 3298 */ 3299 THAI, 3300 3301 /** 3302 * Unicode script "Lao". 3303 */ 3304 LAO, 3305 3306 /** 3307 * Unicode script "Tibetan". 3308 */ 3309 TIBETAN, 3310 3311 /** 3312 * Unicode script "Myanmar". 3313 */ 3314 MYANMAR, 3315 3316 /** 3317 * Unicode script "Georgian". 3318 */ 3319 GEORGIAN, 3320 3321 /** 3322 * Unicode script "Hangul". 3323 */ 3324 HANGUL, 3325 3326 /** 3327 * Unicode script "Ethiopic". 3328 */ 3329 ETHIOPIC, 3330 3331 /** 3332 * Unicode script "Cherokee". 3333 */ 3334 CHEROKEE, 3335 3336 /** 3337 * Unicode script "Canadian_Aboriginal". 3338 */ 3339 CANADIAN_ABORIGINAL, 3340 3341 /** 3342 * Unicode script "Ogham". 3343 */ 3344 OGHAM, 3345 3346 /** 3347 * Unicode script "Runic". 3348 */ 3349 RUNIC, 3350 3351 /** 3352 * Unicode script "Khmer". 3353 */ 3354 KHMER, 3355 3356 /** 3357 * Unicode script "Mongolian". 3358 */ 3359 MONGOLIAN, 3360 3361 /** 3362 * Unicode script "Hiragana". 3363 */ 3364 HIRAGANA, 3365 3366 /** 3367 * Unicode script "Katakana". 3368 */ 3369 KATAKANA, 3370 3371 /** 3372 * Unicode script "Bopomofo". 3373 */ 3374 BOPOMOFO, 3375 3376 /** 3377 * Unicode script "Han". 3378 */ 3379 HAN, 3380 3381 /** 3382 * Unicode script "Yi". 3383 */ 3384 YI, 3385 3386 /** 3387 * Unicode script "Old_Italic". 3388 */ 3389 OLD_ITALIC, 3390 3391 /** 3392 * Unicode script "Gothic". 3393 */ 3394 GOTHIC, 3395 3396 /** 3397 * Unicode script "Deseret". 3398 */ 3399 DESERET, 3400 3401 /** 3402 * Unicode script "Inherited". 3403 */ 3404 INHERITED, 3405 3406 /** 3407 * Unicode script "Tagalog". 3408 */ 3409 TAGALOG, 3410 3411 /** 3412 * Unicode script "Hanunoo". 3413 */ 3414 HANUNOO, 3415 3416 /** 3417 * Unicode script "Buhid". 3418 */ 3419 BUHID, 3420 3421 /** 3422 * Unicode script "Tagbanwa". 3423 */ 3424 TAGBANWA, 3425 3426 /** 3427 * Unicode script "Limbu". 3428 */ 3429 LIMBU, 3430 3431 /** 3432 * Unicode script "Tai_Le". 3433 */ 3434 TAI_LE, 3435 3436 /** 3437 * Unicode script "Linear_B". 3438 */ 3439 LINEAR_B, 3440 3441 /** 3442 * Unicode script "Ugaritic". 3443 */ 3444 UGARITIC, 3445 3446 /** 3447 * Unicode script "Shavian". 3448 */ 3449 SHAVIAN, 3450 3451 /** 3452 * Unicode script "Osmanya". 3453 */ 3454 OSMANYA, 3455 3456 /** 3457 * Unicode script "Cypriot". 3458 */ 3459 CYPRIOT, 3460 3461 /** 3462 * Unicode script "Braille". 3463 */ 3464 BRAILLE, 3465 3466 /** 3467 * Unicode script "Buginese". 3468 */ 3469 BUGINESE, 3470 3471 /** 3472 * Unicode script "Coptic". 3473 */ 3474 COPTIC, 3475 3476 /** 3477 * Unicode script "New_Tai_Lue". 3478 */ 3479 NEW_TAI_LUE, 3480 3481 /** 3482 * Unicode script "Glagolitic". 3483 */ 3484 GLAGOLITIC, 3485 3486 /** 3487 * Unicode script "Tifinagh". 3488 */ 3489 TIFINAGH, 3490 3491 /** 3492 * Unicode script "Syloti_Nagri". 3493 */ 3494 SYLOTI_NAGRI, 3495 3496 /** 3497 * Unicode script "Old_Persian". 3498 */ 3499 OLD_PERSIAN, 3500 3501 /** 3502 * Unicode script "Kharoshthi". 3503 */ 3504 KHAROSHTHI, 3505 3506 /** 3507 * Unicode script "Balinese". 3508 */ 3509 BALINESE, 3510 3511 /** 3512 * Unicode script "Cuneiform". 3513 */ 3514 CUNEIFORM, 3515 3516 /** 3517 * Unicode script "Phoenician". 3518 */ 3519 PHOENICIAN, 3520 3521 /** 3522 * Unicode script "Phags_Pa". 3523 */ 3524 PHAGS_PA, 3525 3526 /** 3527 * Unicode script "Nko". 3528 */ 3529 NKO, 3530 3531 /** 3532 * Unicode script "Sundanese". 3533 */ 3534 SUNDANESE, 3535 3536 /** 3537 * Unicode script "Batak". 3538 */ 3539 BATAK, 3540 3541 /** 3542 * Unicode script "Lepcha". 3543 */ 3544 LEPCHA, 3545 3546 /** 3547 * Unicode script "Ol_Chiki". 3548 */ 3549 OL_CHIKI, 3550 3551 /** 3552 * Unicode script "Vai". 3553 */ 3554 VAI, 3555 3556 /** 3557 * Unicode script "Saurashtra". 3558 */ 3559 SAURASHTRA, 3560 3561 /** 3562 * Unicode script "Kayah_Li". 3563 */ 3564 KAYAH_LI, 3565 3566 /** 3567 * Unicode script "Rejang". 3568 */ 3569 REJANG, 3570 3571 /** 3572 * Unicode script "Lycian". 3573 */ 3574 LYCIAN, 3575 3576 /** 3577 * Unicode script "Carian". 3578 */ 3579 CARIAN, 3580 3581 /** 3582 * Unicode script "Lydian". 3583 */ 3584 LYDIAN, 3585 3586 /** 3587 * Unicode script "Cham". 3588 */ 3589 CHAM, 3590 3591 /** 3592 * Unicode script "Tai_Tham". 3593 */ 3594 TAI_THAM, 3595 3596 /** 3597 * Unicode script "Tai_Viet". 3598 */ 3599 TAI_VIET, 3600 3601 /** 3602 * Unicode script "Avestan". 3603 */ 3604 AVESTAN, 3605 3606 /** 3607 * Unicode script "Egyptian_Hieroglyphs". 3608 */ 3609 EGYPTIAN_HIEROGLYPHS, 3610 3611 /** 3612 * Unicode script "Samaritan". 3613 */ 3614 SAMARITAN, 3615 3616 /** 3617 * Unicode script "Mandaic". 3618 */ 3619 MANDAIC, 3620 3621 /** 3622 * Unicode script "Lisu". 3623 */ 3624 LISU, 3625 3626 /** 3627 * Unicode script "Bamum". 3628 */ 3629 BAMUM, 3630 3631 /** 3632 * Unicode script "Javanese". 3633 */ 3634 JAVANESE, 3635 3636 /** 3637 * Unicode script "Meetei_Mayek". 3638 */ 3639 MEETEI_MAYEK, 3640 3641 /** 3642 * Unicode script "Imperial_Aramaic". 3643 */ 3644 IMPERIAL_ARAMAIC, 3645 3646 /** 3647 * Unicode script "Old_South_Arabian". 3648 */ 3649 OLD_SOUTH_ARABIAN, 3650 3651 /** 3652 * Unicode script "Inscriptional_Parthian". 3653 */ 3654 INSCRIPTIONAL_PARTHIAN, 3655 3656 /** 3657 * Unicode script "Inscriptional_Pahlavi". 3658 */ 3659 INSCRIPTIONAL_PAHLAVI, 3660 3661 /** 3662 * Unicode script "Old_Turkic". 3663 */ 3664 OLD_TURKIC, 3665 3666 /** 3667 * Unicode script "Brahmi". 3668 */ 3669 BRAHMI, 3670 3671 /** 3672 * Unicode script "Kaithi". 3673 */ 3674 KAITHI, 3675 3676 /** 3677 * Unicode script "Meroitic Hieroglyphs". 3678 */ 3679 MEROITIC_HIEROGLYPHS, 3680 3681 /** 3682 * Unicode script "Meroitic Cursive". 3683 */ 3684 MEROITIC_CURSIVE, 3685 3686 /** 3687 * Unicode script "Sora Sompeng". 3688 */ 3689 SORA_SOMPENG, 3690 3691 /** 3692 * Unicode script "Chakma". 3693 */ 3694 CHAKMA, 3695 3696 /** 3697 * Unicode script "Sharada". 3698 */ 3699 SHARADA, 3700 3701 /** 3702 * Unicode script "Takri". 3703 */ 3704 TAKRI, 3705 3706 /** 3707 * Unicode script "Miao". 3708 */ 3709 MIAO, 3710 3711 /** 3712 * Unicode script "Unknown". 3713 */ 3714 UNKNOWN; 3715 3716 private static final int[] scriptStarts = { 3717 0x0000, // 0000..0040; COMMON 3718 0x0041, // 0041..005A; LATIN 3719 0x005B, // 005B..0060; COMMON 3720 0x0061, // 0061..007A; LATIN 3721 0x007B, // 007B..00A9; COMMON 3722 0x00AA, // 00AA..00AA; LATIN 3723 0x00AB, // 00AB..00B9; COMMON 3724 0x00BA, // 00BA..00BA; LATIN 3725 0x00BB, // 00BB..00BF; COMMON 3726 0x00C0, // 00C0..00D6; LATIN 3727 0x00D7, // 00D7..00D7; COMMON 3728 0x00D8, // 00D8..00F6; LATIN 3729 0x00F7, // 00F7..00F7; COMMON 3730 0x00F8, // 00F8..02B8; LATIN 3731 0x02B9, // 02B9..02DF; COMMON 3732 0x02E0, // 02E0..02E4; LATIN 3733 0x02E5, // 02E5..02E9; COMMON 3734 0x02EA, // 02EA..02EB; BOPOMOFO 3735 0x02EC, // 02EC..02FF; COMMON 3736 0x0300, // 0300..036F; INHERITED 3737 0x0370, // 0370..0373; GREEK 3738 0x0374, // 0374..0374; COMMON 3739 0x0375, // 0375..037D; GREEK 3740 0x037E, // 037E..0383; COMMON 3741 0x0384, // 0384..0384; GREEK 3742 0x0385, // 0385..0385; COMMON 3743 0x0386, // 0386..0386; GREEK 3744 0x0387, // 0387..0387; COMMON 3745 0x0388, // 0388..03E1; GREEK 3746 0x03E2, // 03E2..03EF; COPTIC 3747 0x03F0, // 03F0..03FF; GREEK 3748 0x0400, // 0400..0484; CYRILLIC 3749 0x0485, // 0485..0486; INHERITED 3750 0x0487, // 0487..0530; CYRILLIC 3751 0x0531, // 0531..0588; ARMENIAN 3752 0x0589, // 0589..0589; COMMON 3753 0x058A, // 058A..0590; ARMENIAN 3754 0x0591, // 0591..05FF; HEBREW 3755 0x0600, // 0600..060B; ARABIC 3756 0x060C, // 060C..060C; COMMON 3757 0x060D, // 060D..061A; ARABIC 3758 0x061B, // 061B..061D; COMMON 3759 0x061E, // 061E..061E; ARABIC 3760 0x061F, // 061F..061F; COMMON 3761 0x0620, // 0620..063F; ARABIC 3762 0x0640, // 0640..0640; COMMON 3763 0x0641, // 0641..064A; ARABIC 3764 0x064B, // 064B..0655; INHERITED 3765 0x0656, // 0656..065F; ARABIC 3766 0x0660, // 0660..0669; COMMON 3767 0x066A, // 066A..066F; ARABIC 3768 0x0670, // 0670..0670; INHERITED 3769 0x0671, // 0671..06DC; ARABIC 3770 0x06DD, // 06DD..06DD; COMMON 3771 0x06DE, // 06DE..06FF; ARABIC 3772 0x0700, // 0700..074F; SYRIAC 3773 0x0750, // 0750..077F; ARABIC 3774 0x0780, // 0780..07BF; THAANA 3775 0x07C0, // 07C0..07FF; NKO 3776 0x0800, // 0800..083F; SAMARITAN 3777 0x0840, // 0840..089F; MANDAIC 3778 0x08A0, // 08A0..08FF; ARABIC 3779 0x0900, // 0900..0950; DEVANAGARI 3780 0x0951, // 0951..0952; INHERITED 3781 0x0953, // 0953..0963; DEVANAGARI 3782 0x0964, // 0964..0965; COMMON 3783 0x0966, // 0966..0980; DEVANAGARI 3784 0x0981, // 0981..0A00; BENGALI 3785 0x0A01, // 0A01..0A80; GURMUKHI 3786 0x0A81, // 0A81..0B00; GUJARATI 3787 0x0B01, // 0B01..0B81; ORIYA 3788 0x0B82, // 0B82..0C00; TAMIL 3789 0x0C01, // 0C01..0C81; TELUGU 3790 0x0C82, // 0C82..0CF0; KANNADA 3791 0x0D02, // 0D02..0D81; MALAYALAM 3792 0x0D82, // 0D82..0E00; SINHALA 3793 0x0E01, // 0E01..0E3E; THAI 3794 0x0E3F, // 0E3F..0E3F; COMMON 3795 0x0E40, // 0E40..0E80; THAI 3796 0x0E81, // 0E81..0EFF; LAO 3797 0x0F00, // 0F00..0FD4; TIBETAN 3798 0x0FD5, // 0FD5..0FD8; COMMON 3799 0x0FD9, // 0FD9..0FFF; TIBETAN 3800 0x1000, // 1000..109F; MYANMAR 3801 0x10A0, // 10A0..10FA; GEORGIAN 3802 0x10FB, // 10FB..10FB; COMMON 3803 0x10FC, // 10FC..10FF; GEORGIAN 3804 0x1100, // 1100..11FF; HANGUL 3805 0x1200, // 1200..139F; ETHIOPIC 3806 0x13A0, // 13A0..13FF; CHEROKEE 3807 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3808 0x1680, // 1680..169F; OGHAM 3809 0x16A0, // 16A0..16EA; RUNIC 3810 0x16EB, // 16EB..16ED; COMMON 3811 0x16EE, // 16EE..16FF; RUNIC 3812 0x1700, // 1700..171F; TAGALOG 3813 0x1720, // 1720..1734; HANUNOO 3814 0x1735, // 1735..173F; COMMON 3815 0x1740, // 1740..175F; BUHID 3816 0x1760, // 1760..177F; TAGBANWA 3817 0x1780, // 1780..17FF; KHMER 3818 0x1800, // 1800..1801; MONGOLIAN 3819 0x1802, // 1802..1803; COMMON 3820 0x1804, // 1804..1804; MONGOLIAN 3821 0x1805, // 1805..1805; COMMON 3822 0x1806, // 1806..18AF; MONGOLIAN 3823 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3824 0x1900, // 1900..194F; LIMBU 3825 0x1950, // 1950..197F; TAI_LE 3826 0x1980, // 1980..19DF; NEW_TAI_LUE 3827 0x19E0, // 19E0..19FF; KHMER 3828 0x1A00, // 1A00..1A1F; BUGINESE 3829 0x1A20, // 1A20..1AFF; TAI_THAM 3830 0x1B00, // 1B00..1B7F; BALINESE 3831 0x1B80, // 1B80..1BBF; SUNDANESE 3832 0x1BC0, // 1BC0..1BFF; BATAK 3833 0x1C00, // 1C00..1C4F; LEPCHA 3834 0x1C50, // 1C50..1CBF; OL_CHIKI 3835 0x1CC0, // 1CC0..1CCF; SUNDANESE 3836 0x1CD0, // 1CD0..1CD2; INHERITED 3837 0x1CD3, // 1CD3..1CD3; COMMON 3838 0x1CD4, // 1CD4..1CE0; INHERITED 3839 0x1CE1, // 1CE1..1CE1; COMMON 3840 0x1CE2, // 1CE2..1CE8; INHERITED 3841 0x1CE9, // 1CE9..1CEC; COMMON 3842 0x1CED, // 1CED..1CED; INHERITED 3843 0x1CEE, // 1CEE..1CF3; COMMON 3844 0x1CF4, // 1CF4..1CF4; INHERITED 3845 0x1CF5, // 1CF5..1CFF; COMMON 3846 0x1D00, // 1D00..1D25; LATIN 3847 0x1D26, // 1D26..1D2A; GREEK 3848 0x1D2B, // 1D2B..1D2B; CYRILLIC 3849 0x1D2C, // 1D2C..1D5C; LATIN 3850 0x1D5D, // 1D5D..1D61; GREEK 3851 0x1D62, // 1D62..1D65; LATIN 3852 0x1D66, // 1D66..1D6A; GREEK 3853 0x1D6B, // 1D6B..1D77; LATIN 3854 0x1D78, // 1D78..1D78; CYRILLIC 3855 0x1D79, // 1D79..1DBE; LATIN 3856 0x1DBF, // 1DBF..1DBF; GREEK 3857 0x1DC0, // 1DC0..1DFF; INHERITED 3858 0x1E00, // 1E00..1EFF; LATIN 3859 0x1F00, // 1F00..1FFF; GREEK 3860 0x2000, // 2000..200B; COMMON 3861 0x200C, // 200C..200D; INHERITED 3862 0x200E, // 200E..2070; COMMON 3863 0x2071, // 2071..2073; LATIN 3864 0x2074, // 2074..207E; COMMON 3865 0x207F, // 207F..207F; LATIN 3866 0x2080, // 2080..208F; COMMON 3867 0x2090, // 2090..209F; LATIN 3868 0x20A0, // 20A0..20CF; COMMON 3869 0x20D0, // 20D0..20FF; INHERITED 3870 0x2100, // 2100..2125; COMMON 3871 0x2126, // 2126..2126; GREEK 3872 0x2127, // 2127..2129; COMMON 3873 0x212A, // 212A..212B; LATIN 3874 0x212C, // 212C..2131; COMMON 3875 0x2132, // 2132..2132; LATIN 3876 0x2133, // 2133..214D; COMMON 3877 0x214E, // 214E..214E; LATIN 3878 0x214F, // 214F..215F; COMMON 3879 0x2160, // 2160..2188; LATIN 3880 0x2189, // 2189..27FF; COMMON 3881 0x2800, // 2800..28FF; BRAILLE 3882 0x2900, // 2900..2BFF; COMMON 3883 0x2C00, // 2C00..2C5F; GLAGOLITIC 3884 0x2C60, // 2C60..2C7F; LATIN 3885 0x2C80, // 2C80..2CFF; COPTIC 3886 0x2D00, // 2D00..2D2F; GEORGIAN 3887 0x2D30, // 2D30..2D7F; TIFINAGH 3888 0x2D80, // 2D80..2DDF; ETHIOPIC 3889 0x2DE0, // 2DE0..2DFF; CYRILLIC 3890 0x2E00, // 2E00..2E7F; COMMON 3891 0x2E80, // 2E80..2FEF; HAN 3892 0x2FF0, // 2FF0..3004; COMMON 3893 0x3005, // 3005..3005; HAN 3894 0x3006, // 3006..3006; COMMON 3895 0x3007, // 3007..3007; HAN 3896 0x3008, // 3008..3020; COMMON 3897 0x3021, // 3021..3029; HAN 3898 0x302A, // 302A..302D; INHERITED 3899 0x302E, // 302E..302F; HANGUL 3900 0x3030, // 3030..3037; COMMON 3901 0x3038, // 3038..303B; HAN 3902 0x303C, // 303C..3040; COMMON 3903 0x3041, // 3041..3098; HIRAGANA 3904 0x3099, // 3099..309A; INHERITED 3905 0x309B, // 309B..309C; COMMON 3906 0x309D, // 309D..309F; HIRAGANA 3907 0x30A0, // 30A0..30A0; COMMON 3908 0x30A1, // 30A1..30FA; KATAKANA 3909 0x30FB, // 30FB..30FC; COMMON 3910 0x30FD, // 30FD..3104; KATAKANA 3911 0x3105, // 3105..3130; BOPOMOFO 3912 0x3131, // 3131..318F; HANGUL 3913 0x3190, // 3190..319F; COMMON 3914 0x31A0, // 31A0..31BF; BOPOMOFO 3915 0x31C0, // 31C0..31EF; COMMON 3916 0x31F0, // 31F0..31FF; KATAKANA 3917 0x3200, // 3200..321F; HANGUL 3918 0x3220, // 3220..325F; COMMON 3919 0x3260, // 3260..327E; HANGUL 3920 0x327F, // 327F..32CF; COMMON 3921 0x32D0, // 32D0..3357; KATAKANA 3922 0x3358, // 3358..33FF; COMMON 3923 0x3400, // 3400..4DBF; HAN 3924 0x4DC0, // 4DC0..4DFF; COMMON 3925 0x4E00, // 4E00..9FFF; HAN 3926 0xA000, // A000..A4CF; YI 3927 0xA4D0, // A4D0..A4FF; LISU 3928 0xA500, // A500..A63F; VAI 3929 0xA640, // A640..A69F; CYRILLIC 3930 0xA6A0, // A6A0..A6FF; BAMUM 3931 0xA700, // A700..A721; COMMON 3932 0xA722, // A722..A787; LATIN 3933 0xA788, // A788..A78A; COMMON 3934 0xA78B, // A78B..A7FF; LATIN 3935 0xA800, // A800..A82F; SYLOTI_NAGRI 3936 0xA830, // A830..A83F; COMMON 3937 0xA840, // A840..A87F; PHAGS_PA 3938 0xA880, // A880..A8DF; SAURASHTRA 3939 0xA8E0, // A8E0..A8FF; DEVANAGARI 3940 0xA900, // A900..A92F; KAYAH_LI 3941 0xA930, // A930..A95F; REJANG 3942 0xA960, // A960..A97F; HANGUL 3943 0xA980, // A980..A9FF; JAVANESE 3944 0xAA00, // AA00..AA5F; CHAM 3945 0xAA60, // AA60..AA7F; MYANMAR 3946 0xAA80, // AA80..AADF; TAI_VIET 3947 0xAAE0, // AAE0..AB00; MEETEI_MAYEK 3948 0xAB01, // AB01..ABBF; ETHIOPIC 3949 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3950 0xAC00, // AC00..D7FB; HANGUL 3951 0xD7FC, // D7FC..F8FF; UNKNOWN 3952 0xF900, // F900..FAFF; HAN 3953 0xFB00, // FB00..FB12; LATIN 3954 0xFB13, // FB13..FB1C; ARMENIAN 3955 0xFB1D, // FB1D..FB4F; HEBREW 3956 0xFB50, // FB50..FD3D; ARABIC 3957 0xFD3E, // FD3E..FD4F; COMMON 3958 0xFD50, // FD50..FDFC; ARABIC 3959 0xFDFD, // FDFD..FDFF; COMMON 3960 0xFE00, // FE00..FE0F; INHERITED 3961 0xFE10, // FE10..FE1F; COMMON 3962 0xFE20, // FE20..FE2F; INHERITED 3963 0xFE30, // FE30..FE6F; COMMON 3964 0xFE70, // FE70..FEFE; ARABIC 3965 0xFEFF, // FEFF..FF20; COMMON 3966 0xFF21, // FF21..FF3A; LATIN 3967 0xFF3B, // FF3B..FF40; COMMON 3968 0xFF41, // FF41..FF5A; LATIN 3969 0xFF5B, // FF5B..FF65; COMMON 3970 0xFF66, // FF66..FF6F; KATAKANA 3971 0xFF70, // FF70..FF70; COMMON 3972 0xFF71, // FF71..FF9D; KATAKANA 3973 0xFF9E, // FF9E..FF9F; COMMON 3974 0xFFA0, // FFA0..FFDF; HANGUL 3975 0xFFE0, // FFE0..FFFF; COMMON 3976 0x10000, // 10000..100FF; LINEAR_B 3977 0x10100, // 10100..1013F; COMMON 3978 0x10140, // 10140..1018F; GREEK 3979 0x10190, // 10190..101FC; COMMON 3980 0x101FD, // 101FD..1027F; INHERITED 3981 0x10280, // 10280..1029F; LYCIAN 3982 0x102A0, // 102A0..102FF; CARIAN 3983 0x10300, // 10300..1032F; OLD_ITALIC 3984 0x10330, // 10330..1037F; GOTHIC 3985 0x10380, // 10380..1039F; UGARITIC 3986 0x103A0, // 103A0..103FF; OLD_PERSIAN 3987 0x10400, // 10400..1044F; DESERET 3988 0x10450, // 10450..1047F; SHAVIAN 3989 0x10480, // 10480..107FF; OSMANYA 3990 0x10800, // 10800..1083F; CYPRIOT 3991 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3992 0x10900, // 10900..1091F; PHOENICIAN 3993 0x10920, // 10920..1097F; LYDIAN 3994 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 3995 0x109A0, // 109A0..109FF; MEROITIC_CURSIVE 3996 0x10A00, // 10A00..10A5F; KHAROSHTHI 3997 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3998 0x10B00, // 10B00..10B3F; AVESTAN 3999 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 4000 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 4001 0x10C00, // 10C00..10E5F; OLD_TURKIC 4002 0x10E60, // 10E60..10FFF; ARABIC 4003 0x11000, // 11000..1107F; BRAHMI 4004 0x11080, // 11080..110CF; KAITHI 4005 0x110D0, // 110D0..110FF; SORA_SOMPENG 4006 0x11100, // 11100..1117F; CHAKMA 4007 0x11180, // 11180..1167F; SHARADA 4008 0x11680, // 11680..116CF; TAKRI 4009 0x12000, // 12000..12FFF; CUNEIFORM 4010 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 4011 0x16800, // 16800..16A38; BAMUM 4012 0x16F00, // 16F00..16F9F; MIAO 4013 0x1B000, // 1B000..1B000; KATAKANA 4014 0x1B001, // 1B001..1CFFF; HIRAGANA 4015 0x1D000, // 1D000..1D166; COMMON 4016 0x1D167, // 1D167..1D169; INHERITED 4017 0x1D16A, // 1D16A..1D17A; COMMON 4018 0x1D17B, // 1D17B..1D182; INHERITED 4019 0x1D183, // 1D183..1D184; COMMON 4020 0x1D185, // 1D185..1D18B; INHERITED 4021 0x1D18C, // 1D18C..1D1A9; COMMON 4022 0x1D1AA, // 1D1AA..1D1AD; INHERITED 4023 0x1D1AE, // 1D1AE..1D1FF; COMMON 4024 0x1D200, // 1D200..1D2FF; GREEK 4025 0x1D300, // 1D300..1EDFF; COMMON 4026 0x1EE00, // 1EE00..1EFFF; ARABIC 4027 0x1F000, // 1F000..1F1FF; COMMON 4028 0x1F200, // 1F200..1F200; HIRAGANA 4029 0x1F201, // 1F210..1FFFF; COMMON 4030 0x20000, // 20000..E0000; HAN 4031 0xE0001, // E0001..E00FF; COMMON 4032 0xE0100, // E0100..E01EF; INHERITED 4033 0xE01F0 // E01F0..10FFFF; UNKNOWN 4034 4035 }; 4036 4037 private static final UnicodeScript[] scripts = { 4038 COMMON, 4039 LATIN, 4040 COMMON, 4041 LATIN, 4042 COMMON, 4043 LATIN, 4044 COMMON, 4045 LATIN, 4046 COMMON, 4047 LATIN, 4048 COMMON, 4049 LATIN, 4050 COMMON, 4051 LATIN, 4052 COMMON, 4053 LATIN, 4054 COMMON, 4055 BOPOMOFO, 4056 COMMON, 4057 INHERITED, 4058 GREEK, 4059 COMMON, 4060 GREEK, 4061 COMMON, 4062 GREEK, 4063 COMMON, 4064 GREEK, 4065 COMMON, 4066 GREEK, 4067 COPTIC, 4068 GREEK, 4069 CYRILLIC, 4070 INHERITED, 4071 CYRILLIC, 4072 ARMENIAN, 4073 COMMON, 4074 ARMENIAN, 4075 HEBREW, 4076 ARABIC, 4077 COMMON, 4078 ARABIC, 4079 COMMON, 4080 ARABIC, 4081 COMMON, 4082 ARABIC, 4083 COMMON, 4084 ARABIC, 4085 INHERITED, 4086 ARABIC, 4087 COMMON, 4088 ARABIC, 4089 INHERITED, 4090 ARABIC, 4091 COMMON, 4092 ARABIC, 4093 SYRIAC, 4094 ARABIC, 4095 THAANA, 4096 NKO, 4097 SAMARITAN, 4098 MANDAIC, 4099 ARABIC, 4100 DEVANAGARI, 4101 INHERITED, 4102 DEVANAGARI, 4103 COMMON, 4104 DEVANAGARI, 4105 BENGALI, 4106 GURMUKHI, 4107 GUJARATI, 4108 ORIYA, 4109 TAMIL, 4110 TELUGU, 4111 KANNADA, 4112 MALAYALAM, 4113 SINHALA, 4114 THAI, 4115 COMMON, 4116 THAI, 4117 LAO, 4118 TIBETAN, 4119 COMMON, 4120 TIBETAN, 4121 MYANMAR, 4122 GEORGIAN, 4123 COMMON, 4124 GEORGIAN, 4125 HANGUL, 4126 ETHIOPIC, 4127 CHEROKEE, 4128 CANADIAN_ABORIGINAL, 4129 OGHAM, 4130 RUNIC, 4131 COMMON, 4132 RUNIC, 4133 TAGALOG, 4134 HANUNOO, 4135 COMMON, 4136 BUHID, 4137 TAGBANWA, 4138 KHMER, 4139 MONGOLIAN, 4140 COMMON, 4141 MONGOLIAN, 4142 COMMON, 4143 MONGOLIAN, 4144 CANADIAN_ABORIGINAL, 4145 LIMBU, 4146 TAI_LE, 4147 NEW_TAI_LUE, 4148 KHMER, 4149 BUGINESE, 4150 TAI_THAM, 4151 BALINESE, 4152 SUNDANESE, 4153 BATAK, 4154 LEPCHA, 4155 OL_CHIKI, 4156 SUNDANESE, 4157 INHERITED, 4158 COMMON, 4159 INHERITED, 4160 COMMON, 4161 INHERITED, 4162 COMMON, 4163 INHERITED, 4164 COMMON, 4165 INHERITED, 4166 COMMON, 4167 LATIN, 4168 GREEK, 4169 CYRILLIC, 4170 LATIN, 4171 GREEK, 4172 LATIN, 4173 GREEK, 4174 LATIN, 4175 CYRILLIC, 4176 LATIN, 4177 GREEK, 4178 INHERITED, 4179 LATIN, 4180 GREEK, 4181 COMMON, 4182 INHERITED, 4183 COMMON, 4184 LATIN, 4185 COMMON, 4186 LATIN, 4187 COMMON, 4188 LATIN, 4189 COMMON, 4190 INHERITED, 4191 COMMON, 4192 GREEK, 4193 COMMON, 4194 LATIN, 4195 COMMON, 4196 LATIN, 4197 COMMON, 4198 LATIN, 4199 COMMON, 4200 LATIN, 4201 COMMON, 4202 BRAILLE, 4203 COMMON, 4204 GLAGOLITIC, 4205 LATIN, 4206 COPTIC, 4207 GEORGIAN, 4208 TIFINAGH, 4209 ETHIOPIC, 4210 CYRILLIC, 4211 COMMON, 4212 HAN, 4213 COMMON, 4214 HAN, 4215 COMMON, 4216 HAN, 4217 COMMON, 4218 HAN, 4219 INHERITED, 4220 HANGUL, 4221 COMMON, 4222 HAN, 4223 COMMON, 4224 HIRAGANA, 4225 INHERITED, 4226 COMMON, 4227 HIRAGANA, 4228 COMMON, 4229 KATAKANA, 4230 COMMON, 4231 KATAKANA, 4232 BOPOMOFO, 4233 HANGUL, 4234 COMMON, 4235 BOPOMOFO, 4236 COMMON, 4237 KATAKANA, 4238 HANGUL, 4239 COMMON, 4240 HANGUL, 4241 COMMON, 4242 KATAKANA, 4243 COMMON, 4244 HAN, 4245 COMMON, 4246 HAN, 4247 YI, 4248 LISU, 4249 VAI, 4250 CYRILLIC, 4251 BAMUM, 4252 COMMON, 4253 LATIN, 4254 COMMON, 4255 LATIN, 4256 SYLOTI_NAGRI, 4257 COMMON, 4258 PHAGS_PA, 4259 SAURASHTRA, 4260 DEVANAGARI, 4261 KAYAH_LI, 4262 REJANG, 4263 HANGUL, 4264 JAVANESE, 4265 CHAM, 4266 MYANMAR, 4267 TAI_VIET, 4268 MEETEI_MAYEK, 4269 ETHIOPIC, 4270 MEETEI_MAYEK, 4271 HANGUL, 4272 UNKNOWN , 4273 HAN, 4274 LATIN, 4275 ARMENIAN, 4276 HEBREW, 4277 ARABIC, 4278 COMMON, 4279 ARABIC, 4280 COMMON, 4281 INHERITED, 4282 COMMON, 4283 INHERITED, 4284 COMMON, 4285 ARABIC, 4286 COMMON, 4287 LATIN, 4288 COMMON, 4289 LATIN, 4290 COMMON, 4291 KATAKANA, 4292 COMMON, 4293 KATAKANA, 4294 COMMON, 4295 HANGUL, 4296 COMMON, 4297 LINEAR_B, 4298 COMMON, 4299 GREEK, 4300 COMMON, 4301 INHERITED, 4302 LYCIAN, 4303 CARIAN, 4304 OLD_ITALIC, 4305 GOTHIC, 4306 UGARITIC, 4307 OLD_PERSIAN, 4308 DESERET, 4309 SHAVIAN, 4310 OSMANYA, 4311 CYPRIOT, 4312 IMPERIAL_ARAMAIC, 4313 PHOENICIAN, 4314 LYDIAN, 4315 MEROITIC_HIEROGLYPHS, 4316 MEROITIC_CURSIVE, 4317 KHAROSHTHI, 4318 OLD_SOUTH_ARABIAN, 4319 AVESTAN, 4320 INSCRIPTIONAL_PARTHIAN, 4321 INSCRIPTIONAL_PAHLAVI, 4322 OLD_TURKIC, 4323 ARABIC, 4324 BRAHMI, 4325 KAITHI, 4326 SORA_SOMPENG, 4327 CHAKMA, 4328 SHARADA, 4329 TAKRI, 4330 CUNEIFORM, 4331 EGYPTIAN_HIEROGLYPHS, 4332 BAMUM, 4333 MIAO, 4334 KATAKANA, 4335 HIRAGANA, 4336 COMMON, 4337 INHERITED, 4338 COMMON, 4339 INHERITED, 4340 COMMON, 4341 INHERITED, 4342 COMMON, 4343 INHERITED, 4344 COMMON, 4345 GREEK, 4346 COMMON, 4347 ARABIC, 4348 COMMON, 4349 HIRAGANA, 4350 COMMON, 4351 HAN, 4352 COMMON, 4353 INHERITED, 4354 UNKNOWN 4355 }; 4356 4357 private static HashMap<String, Character.UnicodeScript> aliases; 4358 static { 4359 aliases = new HashMap<>(128); 4360 aliases.put("ARAB", ARABIC); 4361 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4362 aliases.put("ARMN", ARMENIAN); 4363 aliases.put("AVST", AVESTAN); 4364 aliases.put("BALI", BALINESE); 4365 aliases.put("BAMU", BAMUM); 4366 aliases.put("BATK", BATAK); 4367 aliases.put("BENG", BENGALI); 4368 aliases.put("BOPO", BOPOMOFO); 4369 aliases.put("BRAI", BRAILLE); 4370 aliases.put("BRAH", BRAHMI); 4371 aliases.put("BUGI", BUGINESE); 4372 aliases.put("BUHD", BUHID); 4373 aliases.put("CAKM", CHAKMA); 4374 aliases.put("CANS", CANADIAN_ABORIGINAL); 4375 aliases.put("CARI", CARIAN); 4376 aliases.put("CHAM", CHAM); 4377 aliases.put("CHER", CHEROKEE); 4378 aliases.put("COPT", COPTIC); 4379 aliases.put("CPRT", CYPRIOT); 4380 aliases.put("CYRL", CYRILLIC); 4381 aliases.put("DEVA", DEVANAGARI); 4382 aliases.put("DSRT", DESERET); 4383 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4384 aliases.put("ETHI", ETHIOPIC); 4385 aliases.put("GEOR", GEORGIAN); 4386 aliases.put("GLAG", GLAGOLITIC); 4387 aliases.put("GOTH", GOTHIC); 4388 aliases.put("GREK", GREEK); 4389 aliases.put("GUJR", GUJARATI); 4390 aliases.put("GURU", GURMUKHI); 4391 aliases.put("HANG", HANGUL); 4392 aliases.put("HANI", HAN); 4393 aliases.put("HANO", HANUNOO); 4394 aliases.put("HEBR", HEBREW); 4395 aliases.put("HIRA", HIRAGANA); 4396 // it appears we don't have the KATAKANA_OR_HIRAGANA 4397 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4398 aliases.put("ITAL", OLD_ITALIC); 4399 aliases.put("JAVA", JAVANESE); 4400 aliases.put("KALI", KAYAH_LI); 4401 aliases.put("KANA", KATAKANA); 4402 aliases.put("KHAR", KHAROSHTHI); 4403 aliases.put("KHMR", KHMER); 4404 aliases.put("KNDA", KANNADA); 4405 aliases.put("KTHI", KAITHI); 4406 aliases.put("LANA", TAI_THAM); 4407 aliases.put("LAOO", LAO); 4408 aliases.put("LATN", LATIN); 4409 aliases.put("LEPC", LEPCHA); 4410 aliases.put("LIMB", LIMBU); 4411 aliases.put("LINB", LINEAR_B); 4412 aliases.put("LISU", LISU); 4413 aliases.put("LYCI", LYCIAN); 4414 aliases.put("LYDI", LYDIAN); 4415 aliases.put("MAND", MANDAIC); 4416 aliases.put("MERC", MEROITIC_CURSIVE); 4417 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 4418 aliases.put("MLYM", MALAYALAM); 4419 aliases.put("MONG", MONGOLIAN); 4420 aliases.put("MTEI", MEETEI_MAYEK); 4421 aliases.put("MYMR", MYANMAR); 4422 aliases.put("NKOO", NKO); 4423 aliases.put("OGAM", OGHAM); 4424 aliases.put("OLCK", OL_CHIKI); 4425 aliases.put("ORKH", OLD_TURKIC); 4426 aliases.put("ORYA", ORIYA); 4427 aliases.put("OSMA", OSMANYA); 4428 aliases.put("PHAG", PHAGS_PA); 4429 aliases.put("PLRD", MIAO); 4430 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4431 aliases.put("PHNX", PHOENICIAN); 4432 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4433 aliases.put("RJNG", REJANG); 4434 aliases.put("RUNR", RUNIC); 4435 aliases.put("SAMR", SAMARITAN); 4436 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4437 aliases.put("SAUR", SAURASHTRA); 4438 aliases.put("SHAW", SHAVIAN); 4439 aliases.put("SHRD", SHARADA); 4440 aliases.put("SINH", SINHALA); 4441 aliases.put("SORA", SORA_SOMPENG); 4442 aliases.put("SUND", SUNDANESE); 4443 aliases.put("SYLO", SYLOTI_NAGRI); 4444 aliases.put("SYRC", SYRIAC); 4445 aliases.put("TAGB", TAGBANWA); 4446 aliases.put("TALE", TAI_LE); 4447 aliases.put("TAKR", TAKRI); 4448 aliases.put("TALU", NEW_TAI_LUE); 4449 aliases.put("TAML", TAMIL); 4450 aliases.put("TAVT", TAI_VIET); 4451 aliases.put("TELU", TELUGU); 4452 aliases.put("TFNG", TIFINAGH); 4453 aliases.put("TGLG", TAGALOG); 4454 aliases.put("THAA", THAANA); 4455 aliases.put("THAI", THAI); 4456 aliases.put("TIBT", TIBETAN); 4457 aliases.put("UGAR", UGARITIC); 4458 aliases.put("VAII", VAI); 4459 aliases.put("XPEO", OLD_PERSIAN); 4460 aliases.put("XSUX", CUNEIFORM); 4461 aliases.put("YIII", YI); 4462 aliases.put("ZINH", INHERITED); 4463 aliases.put("ZYYY", COMMON); 4464 aliases.put("ZZZZ", UNKNOWN); 4465 } 4466 4467 /** 4468 * Returns the enum constant representing the Unicode script of which 4469 * the given character (Unicode code point) is assigned to. 4470 * 4471 * @param codePoint the character (Unicode code point) in question. 4472 * @return The {@code UnicodeScript} constant representing the 4473 * Unicode script of which this character is assigned to. 4474 * 4475 * @exception IllegalArgumentException if the specified 4476 * {@code codePoint} is an invalid Unicode code point. 4477 * @see Character#isValidCodePoint(int) 4478 * 4479 */ 4480 public static UnicodeScript of(int codePoint) { 4481 if (!isValidCodePoint(codePoint)) 4482 throw new IllegalArgumentException(); 4483 int type = getType(codePoint); 4484 // leave SURROGATE and PRIVATE_USE for table lookup 4485 if (type == UNASSIGNED) 4486 return UNKNOWN; 4487 int index = Arrays.binarySearch(scriptStarts, codePoint); 4488 if (index < 0) 4489 index = -index - 2; 4490 return scripts[index]; 4491 } 4492 4493 /** 4494 * Returns the UnicodeScript constant with the given Unicode script 4495 * name or the script name alias. Script names and their aliases are 4496 * determined by The Unicode Standard. The files Scripts<version>.txt 4497 * and PropertyValueAliases<version>.txt define script names 4498 * and the script name aliases for a particular version of the 4499 * standard. The {@link Character} class specifies the version of 4500 * the standard that it supports. 4501 * <p> 4502 * Character case is ignored for all of the valid script names. 4503 * The en_US locale's case mapping rules are used to provide 4504 * case-insensitive string comparisons for script name validation. 4505 * 4506 * @param scriptName A {@code UnicodeScript} name. 4507 * @return The {@code UnicodeScript} constant identified 4508 * by {@code scriptName} 4509 * @throws IllegalArgumentException if {@code scriptName} is an 4510 * invalid name 4511 * @throws NullPointerException if {@code scriptName} is null 4512 */ 4513 public static final UnicodeScript forName(String scriptName) { 4514 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4515 //.replace(' ', '_')); 4516 UnicodeScript sc = aliases.get(scriptName); 4517 if (sc != null) 4518 return sc; 4519 return valueOf(scriptName); 4520 } 4521 } 4522 4523 /** 4524 * The value of the {@code Character}. 4525 * 4526 * @serial 4527 */ 4528 private final char value; 4529 4530 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4531 private static final long serialVersionUID = 3786198910865385080L; 4532 4533 /** 4534 * Constructs a newly allocated {@code Character} object that 4535 * represents the specified {@code char} value. 4536 * 4537 * @param value the value to be represented by the 4538 * {@code Character} object. 4539 */ 4540 public Character(char value) { 4541 this.value = value; 4542 } 4543 4544 private static class CharacterCache { 4545 private CharacterCache(){} 4546 4547 static final Character cache[] = new Character[127 + 1]; 4548 4549 static { 4550 for (int i = 0; i < cache.length; i++) 4551 cache[i] = new Character((char)i); 4552 } 4553 } 4554 4555 /** 4556 * Returns a <tt>Character</tt> instance representing the specified 4557 * <tt>char</tt> value. 4558 * If a new <tt>Character</tt> instance is not required, this method 4559 * should generally be used in preference to the constructor 4560 * {@link #Character(char)}, as this method is likely to yield 4561 * significantly better space and time performance by caching 4562 * frequently requested values. 4563 * 4564 * This method will always cache values in the range {@code 4565 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 4566 * cache other values outside of this range. 4567 * 4568 * @param c a char value. 4569 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4570 * @since 1.5 4571 */ 4572 public static Character valueOf(char c) { 4573 if (c <= 127) { // must cache 4574 return CharacterCache.cache[(int)c]; 4575 } 4576 return new Character(c); 4577 } 4578 4579 /** 4580 * Returns the value of this {@code Character} object. 4581 * @return the primitive {@code char} value represented by 4582 * this object. 4583 */ 4584 public char charValue() { 4585 return value; 4586 } 4587 4588 /** 4589 * Returns a hash code for this {@code Character}; equal to the result 4590 * of invoking {@code charValue()}. 4591 * 4592 * @return a hash code value for this {@code Character} 4593 */ 4594 @Override 4595 public int hashCode() { 4596 return Character.hashCode(value); 4597 } 4598 4599 /** 4600 * Returns a hash code for a {@code char} value; compatible with 4601 * {@code Character.hashCode()}. 4602 * 4603 * @since 1.8 4604 * 4605 * @param value The {@code char} for which to return a hash code. 4606 * @return a hash code value for a {@code char} value. 4607 */ 4608 public static int hashCode(char value) { 4609 return (int)value; 4610 } 4611 4612 /** 4613 * Compares this object against the specified object. 4614 * The result is {@code true} if and only if the argument is not 4615 * {@code null} and is a {@code Character} object that 4616 * represents the same {@code char} value as this object. 4617 * 4618 * @param obj the object to compare with. 4619 * @return {@code true} if the objects are the same; 4620 * {@code false} otherwise. 4621 */ 4622 public boolean equals(Object obj) { 4623 if (obj instanceof Character) { 4624 return value == ((Character)obj).charValue(); 4625 } 4626 return false; 4627 } 4628 4629 /** 4630 * Returns a {@code String} object representing this 4631 * {@code Character}'s value. The result is a string of 4632 * length 1 whose sole component is the primitive 4633 * {@code char} value represented by this 4634 * {@code Character} object. 4635 * 4636 * @return a string representation of this object. 4637 */ 4638 public String toString() { 4639 char buf[] = {value}; 4640 return String.valueOf(buf); 4641 } 4642 4643 /** 4644 * Returns a {@code String} object representing the 4645 * specified {@code char}. The result is a string of length 4646 * 1 consisting solely of the specified {@code char}. 4647 * 4648 * @param c the {@code char} to be converted 4649 * @return the string representation of the specified {@code char} 4650 * @since 1.4 4651 */ 4652 public static String toString(char c) { 4653 return String.valueOf(c); 4654 } 4655 4656 /** 4657 * Determines whether the specified code point is a valid 4658 * <a href="http://www.unicode.org/glossary/#code_point"> 4659 * Unicode code point value</a>. 4660 * 4661 * @param codePoint the Unicode code point to be tested 4662 * @return {@code true} if the specified code point value is between 4663 * {@link #MIN_CODE_POINT} and 4664 * {@link #MAX_CODE_POINT} inclusive; 4665 * {@code false} otherwise. 4666 * @since 1.5 4667 */ 4668 public static boolean isValidCodePoint(int codePoint) { 4669 // Optimized form of: 4670 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4671 int plane = codePoint >>> 16; 4672 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4673 } 4674 4675 /** 4676 * Determines whether the specified character (Unicode code point) 4677 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4678 * Such code points can be represented using a single {@code char}. 4679 * 4680 * @param codePoint the character (Unicode code point) to be tested 4681 * @return {@code true} if the specified code point is between 4682 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4683 * {@code false} otherwise. 4684 * @since 1.7 4685 */ 4686 public static boolean isBmpCodePoint(int codePoint) { 4687 return codePoint >>> 16 == 0; 4688 // Optimized form of: 4689 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4690 // We consistently use logical shift (>>>) to facilitate 4691 // additional runtime optimizations. 4692 } 4693 4694 /** 4695 * Determines whether the specified character (Unicode code point) 4696 * is in the <a href="#supplementary">supplementary character</a> range. 4697 * 4698 * @param codePoint the character (Unicode code point) to be tested 4699 * @return {@code true} if the specified code point is between 4700 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4701 * {@link #MAX_CODE_POINT} inclusive; 4702 * {@code false} otherwise. 4703 * @since 1.5 4704 */ 4705 public static boolean isSupplementaryCodePoint(int codePoint) { 4706 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4707 && codePoint < MAX_CODE_POINT + 1; 4708 } 4709 4710 /** 4711 * Determines if the given {@code char} value is a 4712 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4713 * Unicode high-surrogate code unit</a> 4714 * (also known as <i>leading-surrogate code unit</i>). 4715 * 4716 * <p>Such values do not represent characters by themselves, 4717 * but are used in the representation of 4718 * <a href="#supplementary">supplementary characters</a> 4719 * in the UTF-16 encoding. 4720 * 4721 * @param ch the {@code char} value to be tested. 4722 * @return {@code true} if the {@code char} value is between 4723 * {@link #MIN_HIGH_SURROGATE} and 4724 * {@link #MAX_HIGH_SURROGATE} inclusive; 4725 * {@code false} otherwise. 4726 * @see Character#isLowSurrogate(char) 4727 * @see Character.UnicodeBlock#of(int) 4728 * @since 1.5 4729 */ 4730 public static boolean isHighSurrogate(char ch) { 4731 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4732 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4733 } 4734 4735 /** 4736 * Determines if the given {@code char} value is a 4737 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4738 * Unicode low-surrogate code unit</a> 4739 * (also known as <i>trailing-surrogate code unit</i>). 4740 * 4741 * <p>Such values do not represent characters by themselves, 4742 * but are used in the representation of 4743 * <a href="#supplementary">supplementary characters</a> 4744 * in the UTF-16 encoding. 4745 * 4746 * @param ch the {@code char} value to be tested. 4747 * @return {@code true} if the {@code char} value is between 4748 * {@link #MIN_LOW_SURROGATE} and 4749 * {@link #MAX_LOW_SURROGATE} inclusive; 4750 * {@code false} otherwise. 4751 * @see Character#isHighSurrogate(char) 4752 * @since 1.5 4753 */ 4754 public static boolean isLowSurrogate(char ch) { 4755 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4756 } 4757 4758 /** 4759 * Determines if the given {@code char} value is a Unicode 4760 * <i>surrogate code unit</i>. 4761 * 4762 * <p>Such values do not represent characters by themselves, 4763 * but are used in the representation of 4764 * <a href="#supplementary">supplementary characters</a> 4765 * in the UTF-16 encoding. 4766 * 4767 * <p>A char value is a surrogate code unit if and only if it is either 4768 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4769 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4770 * 4771 * @param ch the {@code char} value to be tested. 4772 * @return {@code true} if the {@code char} value is between 4773 * {@link #MIN_SURROGATE} and 4774 * {@link #MAX_SURROGATE} inclusive; 4775 * {@code false} otherwise. 4776 * @since 1.7 4777 */ 4778 public static boolean isSurrogate(char ch) { 4779 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4780 } 4781 4782 /** 4783 * Determines whether the specified pair of {@code char} 4784 * values is a valid 4785 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4786 * Unicode surrogate pair</a>. 4787 4788 * <p>This method is equivalent to the expression: 4789 * <blockquote><pre>{@code 4790 * isHighSurrogate(high) && isLowSurrogate(low) 4791 * }</pre></blockquote> 4792 * 4793 * @param high the high-surrogate code value to be tested 4794 * @param low the low-surrogate code value to be tested 4795 * @return {@code true} if the specified high and 4796 * low-surrogate code values represent a valid surrogate pair; 4797 * {@code false} otherwise. 4798 * @since 1.5 4799 */ 4800 public static boolean isSurrogatePair(char high, char low) { 4801 return isHighSurrogate(high) && isLowSurrogate(low); 4802 } 4803 4804 /** 4805 * Determines the number of {@code char} values needed to 4806 * represent the specified character (Unicode code point). If the 4807 * specified character is equal to or greater than 0x10000, then 4808 * the method returns 2. Otherwise, the method returns 1. 4809 * 4810 * <p>This method doesn't validate the specified character to be a 4811 * valid Unicode code point. The caller must validate the 4812 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4813 * if necessary. 4814 * 4815 * @param codePoint the character (Unicode code point) to be tested. 4816 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4817 * @see Character#isSupplementaryCodePoint(int) 4818 * @since 1.5 4819 */ 4820 public static int charCount(int codePoint) { 4821 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4822 } 4823 4824 /** 4825 * Converts the specified surrogate pair to its supplementary code 4826 * point value. This method does not validate the specified 4827 * surrogate pair. The caller must validate it using {@link 4828 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4829 * 4830 * @param high the high-surrogate code unit 4831 * @param low the low-surrogate code unit 4832 * @return the supplementary code point composed from the 4833 * specified surrogate pair. 4834 * @since 1.5 4835 */ 4836 public static int toCodePoint(char high, char low) { 4837 // Optimized form of: 4838 // return ((high - MIN_HIGH_SURROGATE) << 10) 4839 // + (low - MIN_LOW_SURROGATE) 4840 // + MIN_SUPPLEMENTARY_CODE_POINT; 4841 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4842 - (MIN_HIGH_SURROGATE << 10) 4843 - MIN_LOW_SURROGATE); 4844 } 4845 4846 /** 4847 * Returns the code point at the given index of the 4848 * {@code CharSequence}. If the {@code char} value at 4849 * the given index in the {@code CharSequence} is in the 4850 * high-surrogate range, the following index is less than the 4851 * length of the {@code CharSequence}, and the 4852 * {@code char} value at the following index is in the 4853 * low-surrogate range, then the supplementary code point 4854 * corresponding to this surrogate pair is returned. Otherwise, 4855 * the {@code char} value at the given index is returned. 4856 * 4857 * @param seq a sequence of {@code char} values (Unicode code 4858 * units) 4859 * @param index the index to the {@code char} values (Unicode 4860 * code units) in {@code seq} to be converted 4861 * @return the Unicode code point at the given index 4862 * @exception NullPointerException if {@code seq} is null. 4863 * @exception IndexOutOfBoundsException if the value 4864 * {@code index} is negative or not less than 4865 * {@link CharSequence#length() seq.length()}. 4866 * @since 1.5 4867 */ 4868 public static int codePointAt(CharSequence seq, int index) { 4869 char c1 = seq.charAt(index); 4870 if (isHighSurrogate(c1) && ++index < seq.length()) { 4871 char c2 = seq.charAt(index); 4872 if (isLowSurrogate(c2)) { 4873 return toCodePoint(c1, c2); 4874 } 4875 } 4876 return c1; 4877 } 4878 4879 /** 4880 * Returns the code point at the given index of the 4881 * {@code char} array. If the {@code char} value at 4882 * the given index in the {@code char} array is in the 4883 * high-surrogate range, the following index is less than the 4884 * length of the {@code char} array, and the 4885 * {@code char} value at the following index is in the 4886 * low-surrogate range, then the supplementary code point 4887 * corresponding to this surrogate pair is returned. Otherwise, 4888 * the {@code char} value at the given index is returned. 4889 * 4890 * @param a the {@code char} array 4891 * @param index the index to the {@code char} values (Unicode 4892 * code units) in the {@code char} array to be converted 4893 * @return the Unicode code point at the given index 4894 * @exception NullPointerException if {@code a} is null. 4895 * @exception IndexOutOfBoundsException if the value 4896 * {@code index} is negative or not less than 4897 * the length of the {@code char} array. 4898 * @since 1.5 4899 */ 4900 public static int codePointAt(char[] a, int index) { 4901 return codePointAtImpl(a, index, a.length); 4902 } 4903 4904 /** 4905 * Returns the code point at the given index of the 4906 * {@code char} array, where only array elements with 4907 * {@code index} less than {@code limit} can be used. If 4908 * the {@code char} value at the given index in the 4909 * {@code char} array is in the high-surrogate range, the 4910 * following index is less than the {@code limit}, and the 4911 * {@code char} value at the following index is in the 4912 * low-surrogate range, then the supplementary code point 4913 * corresponding to this surrogate pair is returned. Otherwise, 4914 * the {@code char} value at the given index is returned. 4915 * 4916 * @param a the {@code char} array 4917 * @param index the index to the {@code char} values (Unicode 4918 * code units) in the {@code char} array to be converted 4919 * @param limit the index after the last array element that 4920 * can be used in the {@code char} array 4921 * @return the Unicode code point at the given index 4922 * @exception NullPointerException if {@code a} is null. 4923 * @exception IndexOutOfBoundsException if the {@code index} 4924 * argument is negative or not less than the {@code limit} 4925 * argument, or if the {@code limit} argument is negative or 4926 * greater than the length of the {@code char} array. 4927 * @since 1.5 4928 */ 4929 public static int codePointAt(char[] a, int index, int limit) { 4930 if (index >= limit || limit < 0 || limit > a.length) { 4931 throw new IndexOutOfBoundsException(); 4932 } 4933 return codePointAtImpl(a, index, limit); 4934 } 4935 4936 // throws ArrayIndexOutOfBoundsException if index out of bounds 4937 static int codePointAtImpl(char[] a, int index, int limit) { 4938 char c1 = a[index]; 4939 if (isHighSurrogate(c1) && ++index < limit) { 4940 char c2 = a[index]; 4941 if (isLowSurrogate(c2)) { 4942 return toCodePoint(c1, c2); 4943 } 4944 } 4945 return c1; 4946 } 4947 4948 /** 4949 * Returns the code point preceding the given index of the 4950 * {@code CharSequence}. If the {@code char} value at 4951 * {@code (index - 1)} in the {@code CharSequence} is in 4952 * the low-surrogate range, {@code (index - 2)} is not 4953 * negative, and the {@code char} value at {@code (index - 2)} 4954 * in the {@code CharSequence} is in the 4955 * high-surrogate range, then the supplementary code point 4956 * corresponding to this surrogate pair is returned. Otherwise, 4957 * the {@code char} value at {@code (index - 1)} is 4958 * returned. 4959 * 4960 * @param seq the {@code CharSequence} instance 4961 * @param index the index following the code point that should be returned 4962 * @return the Unicode code point value before the given index. 4963 * @exception NullPointerException if {@code seq} is null. 4964 * @exception IndexOutOfBoundsException if the {@code index} 4965 * argument is less than 1 or greater than {@link 4966 * CharSequence#length() seq.length()}. 4967 * @since 1.5 4968 */ 4969 public static int codePointBefore(CharSequence seq, int index) { 4970 char c2 = seq.charAt(--index); 4971 if (isLowSurrogate(c2) && index > 0) { 4972 char c1 = seq.charAt(--index); 4973 if (isHighSurrogate(c1)) { 4974 return toCodePoint(c1, c2); 4975 } 4976 } 4977 return c2; 4978 } 4979 4980 /** 4981 * Returns the code point preceding the given index of the 4982 * {@code char} array. If the {@code char} value at 4983 * {@code (index - 1)} in the {@code char} array is in 4984 * the low-surrogate range, {@code (index - 2)} is not 4985 * negative, and the {@code char} value at {@code (index - 2)} 4986 * in the {@code char} array is in the 4987 * high-surrogate range, then the supplementary code point 4988 * corresponding to this surrogate pair is returned. Otherwise, 4989 * the {@code char} value at {@code (index - 1)} is 4990 * returned. 4991 * 4992 * @param a the {@code char} array 4993 * @param index the index following the code point that should be returned 4994 * @return the Unicode code point value before the given index. 4995 * @exception NullPointerException if {@code a} is null. 4996 * @exception IndexOutOfBoundsException if the {@code index} 4997 * argument is less than 1 or greater than the length of the 4998 * {@code char} array 4999 * @since 1.5 5000 */ 5001 public static int codePointBefore(char[] a, int index) { 5002 return codePointBeforeImpl(a, index, 0); 5003 } 5004 5005 /** 5006 * Returns the code point preceding the given index of the 5007 * {@code char} array, where only array elements with 5008 * {@code index} greater than or equal to {@code start} 5009 * can be used. If the {@code char} value at {@code (index - 1)} 5010 * in the {@code char} array is in the 5011 * low-surrogate range, {@code (index - 2)} is not less than 5012 * {@code start}, and the {@code char} value at 5013 * {@code (index - 2)} in the {@code char} array is in 5014 * the high-surrogate range, then the supplementary code point 5015 * corresponding to this surrogate pair is returned. Otherwise, 5016 * the {@code char} value at {@code (index - 1)} is 5017 * returned. 5018 * 5019 * @param a the {@code char} array 5020 * @param index the index following the code point that should be returned 5021 * @param start the index of the first array element in the 5022 * {@code char} array 5023 * @return the Unicode code point value before the given index. 5024 * @exception NullPointerException if {@code a} is null. 5025 * @exception IndexOutOfBoundsException if the {@code index} 5026 * argument is not greater than the {@code start} argument or 5027 * is greater than the length of the {@code char} array, or 5028 * if the {@code start} argument is negative or not less than 5029 * the length of the {@code char} array. 5030 * @since 1.5 5031 */ 5032 public static int codePointBefore(char[] a, int index, int start) { 5033 if (index <= start || start < 0 || start >= a.length) { 5034 throw new IndexOutOfBoundsException(); 5035 } 5036 return codePointBeforeImpl(a, index, start); 5037 } 5038 5039 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 5040 static int codePointBeforeImpl(char[] a, int index, int start) { 5041 char c2 = a[--index]; 5042 if (isLowSurrogate(c2) && index > start) { 5043 char c1 = a[--index]; 5044 if (isHighSurrogate(c1)) { 5045 return toCodePoint(c1, c2); 5046 } 5047 } 5048 return c2; 5049 } 5050 5051 /** 5052 * Returns the leading surrogate (a 5053 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 5054 * high surrogate code unit</a>) of the 5055 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5056 * surrogate pair</a> 5057 * representing the specified supplementary character (Unicode 5058 * code point) in the UTF-16 encoding. If the specified character 5059 * is not a 5060 * <a href="Character.html#supplementary">supplementary character</a>, 5061 * an unspecified {@code char} is returned. 5062 * 5063 * <p>If 5064 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5065 * is {@code true}, then 5066 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 5067 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 5068 * are also always {@code true}. 5069 * 5070 * @param codePoint a supplementary character (Unicode code point) 5071 * @return the leading surrogate code unit used to represent the 5072 * character in the UTF-16 encoding 5073 * @since 1.7 5074 */ 5075 public static char highSurrogate(int codePoint) { 5076 return (char) ((codePoint >>> 10) 5077 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 5078 } 5079 5080 /** 5081 * Returns the trailing surrogate (a 5082 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 5083 * low surrogate code unit</a>) of the 5084 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5085 * surrogate pair</a> 5086 * representing the specified supplementary character (Unicode 5087 * code point) in the UTF-16 encoding. If the specified character 5088 * is not a 5089 * <a href="Character.html#supplementary">supplementary character</a>, 5090 * an unspecified {@code char} is returned. 5091 * 5092 * <p>If 5093 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5094 * is {@code true}, then 5095 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 5096 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 5097 * are also always {@code true}. 5098 * 5099 * @param codePoint a supplementary character (Unicode code point) 5100 * @return the trailing surrogate code unit used to represent the 5101 * character in the UTF-16 encoding 5102 * @since 1.7 5103 */ 5104 public static char lowSurrogate(int codePoint) { 5105 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 5106 } 5107 5108 /** 5109 * Converts the specified character (Unicode code point) to its 5110 * UTF-16 representation. If the specified code point is a BMP 5111 * (Basic Multilingual Plane or Plane 0) value, the same value is 5112 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 5113 * specified code point is a supplementary character, its 5114 * surrogate values are stored in {@code dst[dstIndex]} 5115 * (high-surrogate) and {@code dst[dstIndex+1]} 5116 * (low-surrogate), and 2 is returned. 5117 * 5118 * @param codePoint the character (Unicode code point) to be converted. 5119 * @param dst an array of {@code char} in which the 5120 * {@code codePoint}'s UTF-16 value is stored. 5121 * @param dstIndex the start index into the {@code dst} 5122 * array where the converted value is stored. 5123 * @return 1 if the code point is a BMP code point, 2 if the 5124 * code point is a supplementary code point. 5125 * @exception IllegalArgumentException if the specified 5126 * {@code codePoint} is not a valid Unicode code point. 5127 * @exception NullPointerException if the specified {@code dst} is null. 5128 * @exception IndexOutOfBoundsException if {@code dstIndex} 5129 * is negative or not less than {@code dst.length}, or if 5130 * {@code dst} at {@code dstIndex} doesn't have enough 5131 * array element(s) to store the resulting {@code char} 5132 * value(s). (If {@code dstIndex} is equal to 5133 * {@code dst.length-1} and the specified 5134 * {@code codePoint} is a supplementary character, the 5135 * high-surrogate value is not stored in 5136 * {@code dst[dstIndex]}.) 5137 * @since 1.5 5138 */ 5139 public static int toChars(int codePoint, char[] dst, int dstIndex) { 5140 if (isBmpCodePoint(codePoint)) { 5141 dst[dstIndex] = (char) codePoint; 5142 return 1; 5143 } else if (isValidCodePoint(codePoint)) { 5144 toSurrogates(codePoint, dst, dstIndex); 5145 return 2; 5146 } else { 5147 throw new IllegalArgumentException(); 5148 } 5149 } 5150 5151 /** 5152 * Converts the specified character (Unicode code point) to its 5153 * UTF-16 representation stored in a {@code char} array. If 5154 * the specified code point is a BMP (Basic Multilingual Plane or 5155 * Plane 0) value, the resulting {@code char} array has 5156 * the same value as {@code codePoint}. If the specified code 5157 * point is a supplementary code point, the resulting 5158 * {@code char} array has the corresponding surrogate pair. 5159 * 5160 * @param codePoint a Unicode code point 5161 * @return a {@code char} array having 5162 * {@code codePoint}'s UTF-16 representation. 5163 * @exception IllegalArgumentException if the specified 5164 * {@code codePoint} is not a valid Unicode code point. 5165 * @since 1.5 5166 */ 5167 public static char[] toChars(int codePoint) { 5168 if (isBmpCodePoint(codePoint)) { 5169 return new char[] { (char) codePoint }; 5170 } else if (isValidCodePoint(codePoint)) { 5171 char[] result = new char[2]; 5172 toSurrogates(codePoint, result, 0); 5173 return result; 5174 } else { 5175 throw new IllegalArgumentException(); 5176 } 5177 } 5178 5179 static void toSurrogates(int codePoint, char[] dst, int index) { 5180 // We write elements "backwards" to guarantee all-or-nothing 5181 dst[index+1] = lowSurrogate(codePoint); 5182 dst[index] = highSurrogate(codePoint); 5183 } 5184 5185 /** 5186 * Returns the number of Unicode code points in the text range of 5187 * the specified char sequence. The text range begins at the 5188 * specified {@code beginIndex} and extends to the 5189 * {@code char} at index {@code endIndex - 1}. Thus the 5190 * length (in {@code char}s) of the text range is 5191 * {@code endIndex-beginIndex}. Unpaired surrogates within 5192 * the text range count as one code point each. 5193 * 5194 * @param seq the char sequence 5195 * @param beginIndex the index to the first {@code char} of 5196 * the text range. 5197 * @param endIndex the index after the last {@code char} of 5198 * the text range. 5199 * @return the number of Unicode code points in the specified text 5200 * range 5201 * @exception NullPointerException if {@code seq} is null. 5202 * @exception IndexOutOfBoundsException if the 5203 * {@code beginIndex} is negative, or {@code endIndex} 5204 * is larger than the length of the given sequence, or 5205 * {@code beginIndex} is larger than {@code endIndex}. 5206 * @since 1.5 5207 */ 5208 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5209 int length = seq.length(); 5210 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5211 throw new IndexOutOfBoundsException(); 5212 } 5213 int n = endIndex - beginIndex; 5214 for (int i = beginIndex; i < endIndex; ) { 5215 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5216 isLowSurrogate(seq.charAt(i))) { 5217 n--; 5218 i++; 5219 } 5220 } 5221 return n; 5222 } 5223 5224 /** 5225 * Returns the number of Unicode code points in a subarray of the 5226 * {@code char} array argument. The {@code offset} 5227 * argument is the index of the first {@code char} of the 5228 * subarray and the {@code count} argument specifies the 5229 * length of the subarray in {@code char}s. Unpaired 5230 * surrogates within the subarray count as one code point each. 5231 * 5232 * @param a the {@code char} array 5233 * @param offset the index of the first {@code char} in the 5234 * given {@code char} array 5235 * @param count the length of the subarray in {@code char}s 5236 * @return the number of Unicode code points in the specified subarray 5237 * @exception NullPointerException if {@code a} is null. 5238 * @exception IndexOutOfBoundsException if {@code offset} or 5239 * {@code count} is negative, or if {@code offset + 5240 * count} is larger than the length of the given array. 5241 * @since 1.5 5242 */ 5243 public static int codePointCount(char[] a, int offset, int count) { 5244 if (count > a.length - offset || offset < 0 || count < 0) { 5245 throw new IndexOutOfBoundsException(); 5246 } 5247 return codePointCountImpl(a, offset, count); 5248 } 5249 5250 static int codePointCountImpl(char[] a, int offset, int count) { 5251 int endIndex = offset + count; 5252 int n = count; 5253 for (int i = offset; i < endIndex; ) { 5254 if (isHighSurrogate(a[i++]) && i < endIndex && 5255 isLowSurrogate(a[i])) { 5256 n--; 5257 i++; 5258 } 5259 } 5260 return n; 5261 } 5262 5263 /** 5264 * Returns the index within the given char sequence that is offset 5265 * from the given {@code index} by {@code codePointOffset} 5266 * code points. Unpaired surrogates within the text range given by 5267 * {@code index} and {@code codePointOffset} count as 5268 * one code point each. 5269 * 5270 * @param seq the char sequence 5271 * @param index the index to be offset 5272 * @param codePointOffset the offset in code points 5273 * @return the index within the char sequence 5274 * @exception NullPointerException if {@code seq} is null. 5275 * @exception IndexOutOfBoundsException if {@code index} 5276 * is negative or larger then the length of the char sequence, 5277 * or if {@code codePointOffset} is positive and the 5278 * subsequence starting with {@code index} has fewer than 5279 * {@code codePointOffset} code points, or if 5280 * {@code codePointOffset} is negative and the subsequence 5281 * before {@code index} has fewer than the absolute value 5282 * of {@code codePointOffset} code points. 5283 * @since 1.5 5284 */ 5285 public static int offsetByCodePoints(CharSequence seq, int index, 5286 int codePointOffset) { 5287 int length = seq.length(); 5288 if (index < 0 || index > length) { 5289 throw new IndexOutOfBoundsException(); 5290 } 5291 5292 int x = index; 5293 if (codePointOffset >= 0) { 5294 int i; 5295 for (i = 0; x < length && i < codePointOffset; i++) { 5296 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5297 isLowSurrogate(seq.charAt(x))) { 5298 x++; 5299 } 5300 } 5301 if (i < codePointOffset) { 5302 throw new IndexOutOfBoundsException(); 5303 } 5304 } else { 5305 int i; 5306 for (i = codePointOffset; x > 0 && i < 0; i++) { 5307 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5308 isHighSurrogate(seq.charAt(x-1))) { 5309 x--; 5310 } 5311 } 5312 if (i < 0) { 5313 throw new IndexOutOfBoundsException(); 5314 } 5315 } 5316 return x; 5317 } 5318 5319 /** 5320 * Returns the index within the given {@code char} subarray 5321 * that is offset from the given {@code index} by 5322 * {@code codePointOffset} code points. The 5323 * {@code start} and {@code count} arguments specify a 5324 * subarray of the {@code char} array. Unpaired surrogates 5325 * within the text range given by {@code index} and 5326 * {@code codePointOffset} count as one code point each. 5327 * 5328 * @param a the {@code char} array 5329 * @param start the index of the first {@code char} of the 5330 * subarray 5331 * @param count the length of the subarray in {@code char}s 5332 * @param index the index to be offset 5333 * @param codePointOffset the offset in code points 5334 * @return the index within the subarray 5335 * @exception NullPointerException if {@code a} is null. 5336 * @exception IndexOutOfBoundsException 5337 * if {@code start} or {@code count} is negative, 5338 * or if {@code start + count} is larger than the length of 5339 * the given array, 5340 * or if {@code index} is less than {@code start} or 5341 * larger then {@code start + count}, 5342 * or if {@code codePointOffset} is positive and the text range 5343 * starting with {@code index} and ending with {@code start + count - 1} 5344 * has fewer than {@code codePointOffset} code 5345 * points, 5346 * or if {@code codePointOffset} is negative and the text range 5347 * starting with {@code start} and ending with {@code index - 1} 5348 * has fewer than the absolute value of 5349 * {@code codePointOffset} code points. 5350 * @since 1.5 5351 */ 5352 public static int offsetByCodePoints(char[] a, int start, int count, 5353 int index, int codePointOffset) { 5354 if (count > a.length-start || start < 0 || count < 0 5355 || index < start || index > start+count) { 5356 throw new IndexOutOfBoundsException(); 5357 } 5358 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5359 } 5360 5361 static int offsetByCodePointsImpl(char[]a, int start, int count, 5362 int index, int codePointOffset) { 5363 int x = index; 5364 if (codePointOffset >= 0) { 5365 int limit = start + count; 5366 int i; 5367 for (i = 0; x < limit && i < codePointOffset; i++) { 5368 if (isHighSurrogate(a[x++]) && x < limit && 5369 isLowSurrogate(a[x])) { 5370 x++; 5371 } 5372 } 5373 if (i < codePointOffset) { 5374 throw new IndexOutOfBoundsException(); 5375 } 5376 } else { 5377 int i; 5378 for (i = codePointOffset; x > start && i < 0; i++) { 5379 if (isLowSurrogate(a[--x]) && x > start && 5380 isHighSurrogate(a[x-1])) { 5381 x--; 5382 } 5383 } 5384 if (i < 0) { 5385 throw new IndexOutOfBoundsException(); 5386 } 5387 } 5388 return x; 5389 } 5390 5391 /** 5392 * Determines if the specified character is a lowercase character. 5393 * <p> 5394 * A character is lowercase if its general category type, provided 5395 * by {@code Character.getType(ch)}, is 5396 * {@code LOWERCASE_LETTER}, or it has contributory property 5397 * Other_Lowercase as defined by the Unicode Standard. 5398 * <p> 5399 * The following are examples of lowercase characters: 5400 * <blockquote><pre> 5401 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5402 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5403 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5404 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5405 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5406 * </pre></blockquote> 5407 * <p> Many other Unicode characters are lowercase too. 5408 * 5409 * <p><b>Note:</b> This method cannot handle <a 5410 * href="#supplementary"> supplementary characters</a>. To support 5411 * all Unicode characters, including supplementary characters, use 5412 * the {@link #isLowerCase(int)} method. 5413 * 5414 * @param ch the character to be tested. 5415 * @return {@code true} if the character is lowercase; 5416 * {@code false} otherwise. 5417 * @see Character#isLowerCase(char) 5418 * @see Character#isTitleCase(char) 5419 * @see Character#toLowerCase(char) 5420 * @see Character#getType(char) 5421 */ 5422 public static boolean isLowerCase(char ch) { 5423 return isLowerCase((int)ch); 5424 } 5425 5426 /** 5427 * Determines if the specified character (Unicode code point) is a 5428 * lowercase character. 5429 * <p> 5430 * A character is lowercase if its general category type, provided 5431 * by {@link Character#getType getType(codePoint)}, is 5432 * {@code LOWERCASE_LETTER}, or it has contributory property 5433 * Other_Lowercase as defined by the Unicode Standard. 5434 * <p> 5435 * The following are examples of lowercase characters: 5436 * <blockquote><pre> 5437 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5438 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5439 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5440 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5441 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5442 * </pre></blockquote> 5443 * <p> Many other Unicode characters are lowercase too. 5444 * 5445 * @param codePoint the character (Unicode code point) to be tested. 5446 * @return {@code true} if the character is lowercase; 5447 * {@code false} otherwise. 5448 * @see Character#isLowerCase(int) 5449 * @see Character#isTitleCase(int) 5450 * @see Character#toLowerCase(int) 5451 * @see Character#getType(int) 5452 * @since 1.5 5453 */ 5454 public static boolean isLowerCase(int codePoint) { 5455 return getType(codePoint) == Character.LOWERCASE_LETTER || 5456 CharacterData.of(codePoint).isOtherLowercase(codePoint); 5457 } 5458 5459 /** 5460 * Determines if the specified character is an uppercase character. 5461 * <p> 5462 * A character is uppercase if its general category type, provided by 5463 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 5464 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5465 * <p> 5466 * The following are examples of uppercase characters: 5467 * <blockquote><pre> 5468 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5469 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5470 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5471 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5472 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5473 * </pre></blockquote> 5474 * <p> Many other Unicode characters are uppercase too. 5475 * 5476 * <p><b>Note:</b> This method cannot handle <a 5477 * href="#supplementary"> supplementary characters</a>. To support 5478 * all Unicode characters, including supplementary characters, use 5479 * the {@link #isUpperCase(int)} method. 5480 * 5481 * @param ch the character to be tested. 5482 * @return {@code true} if the character is uppercase; 5483 * {@code false} otherwise. 5484 * @see Character#isLowerCase(char) 5485 * @see Character#isTitleCase(char) 5486 * @see Character#toUpperCase(char) 5487 * @see Character#getType(char) 5488 * @since 1.0 5489 */ 5490 public static boolean isUpperCase(char ch) { 5491 return isUpperCase((int)ch); 5492 } 5493 5494 /** 5495 * Determines if the specified character (Unicode code point) is an uppercase character. 5496 * <p> 5497 * A character is uppercase if its general category type, provided by 5498 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 5499 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5500 * <p> 5501 * The following are examples of uppercase characters: 5502 * <blockquote><pre> 5503 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5504 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5505 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5506 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5507 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5508 * </pre></blockquote> 5509 * <p> Many other Unicode characters are uppercase too. 5510 * 5511 * @param codePoint the character (Unicode code point) to be tested. 5512 * @return {@code true} if the character is uppercase; 5513 * {@code false} otherwise. 5514 * @see Character#isLowerCase(int) 5515 * @see Character#isTitleCase(int) 5516 * @see Character#toUpperCase(int) 5517 * @see Character#getType(int) 5518 * @since 1.5 5519 */ 5520 public static boolean isUpperCase(int codePoint) { 5521 return getType(codePoint) == Character.UPPERCASE_LETTER || 5522 CharacterData.of(codePoint).isOtherUppercase(codePoint); 5523 } 5524 5525 /** 5526 * Determines if the specified character is a titlecase character. 5527 * <p> 5528 * A character is a titlecase character if its general 5529 * category type, provided by {@code Character.getType(ch)}, 5530 * is {@code TITLECASE_LETTER}. 5531 * <p> 5532 * Some characters look like pairs of Latin letters. For example, there 5533 * is an uppercase letter that looks like "LJ" and has a corresponding 5534 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5535 * is the appropriate form to use when rendering a word in lowercase 5536 * with initial capitals, as for a book title. 5537 * <p> 5538 * These are some of the Unicode characters for which this method returns 5539 * {@code true}: 5540 * <ul> 5541 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5542 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5543 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5544 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5545 * </ul> 5546 * <p> Many other Unicode characters are titlecase too. 5547 * 5548 * <p><b>Note:</b> This method cannot handle <a 5549 * href="#supplementary"> supplementary characters</a>. To support 5550 * all Unicode characters, including supplementary characters, use 5551 * the {@link #isTitleCase(int)} method. 5552 * 5553 * @param ch the character to be tested. 5554 * @return {@code true} if the character is titlecase; 5555 * {@code false} otherwise. 5556 * @see Character#isLowerCase(char) 5557 * @see Character#isUpperCase(char) 5558 * @see Character#toTitleCase(char) 5559 * @see Character#getType(char) 5560 * @since 1.0.2 5561 */ 5562 public static boolean isTitleCase(char ch) { 5563 return isTitleCase((int)ch); 5564 } 5565 5566 /** 5567 * Determines if the specified character (Unicode code point) is a titlecase character. 5568 * <p> 5569 * A character is a titlecase character if its general 5570 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5571 * is {@code TITLECASE_LETTER}. 5572 * <p> 5573 * Some characters look like pairs of Latin letters. For example, there 5574 * is an uppercase letter that looks like "LJ" and has a corresponding 5575 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5576 * is the appropriate form to use when rendering a word in lowercase 5577 * with initial capitals, as for a book title. 5578 * <p> 5579 * These are some of the Unicode characters for which this method returns 5580 * {@code true}: 5581 * <ul> 5582 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5583 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5584 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5585 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5586 * </ul> 5587 * <p> Many other Unicode characters are titlecase too. 5588 * 5589 * @param codePoint the character (Unicode code point) to be tested. 5590 * @return {@code true} if the character is titlecase; 5591 * {@code false} otherwise. 5592 * @see Character#isLowerCase(int) 5593 * @see Character#isUpperCase(int) 5594 * @see Character#toTitleCase(int) 5595 * @see Character#getType(int) 5596 * @since 1.5 5597 */ 5598 public static boolean isTitleCase(int codePoint) { 5599 return getType(codePoint) == Character.TITLECASE_LETTER; 5600 } 5601 5602 /** 5603 * Determines if the specified character is a digit. 5604 * <p> 5605 * A character is a digit if its general category type, provided 5606 * by {@code Character.getType(ch)}, is 5607 * {@code DECIMAL_DIGIT_NUMBER}. 5608 * <p> 5609 * Some Unicode character ranges that contain digits: 5610 * <ul> 5611 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5612 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5613 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5614 * Arabic-Indic digits 5615 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5616 * Extended Arabic-Indic digits 5617 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5618 * Devanagari digits 5619 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5620 * Fullwidth digits 5621 * </ul> 5622 * 5623 * Many other character ranges contain digits as well. 5624 * 5625 * <p><b>Note:</b> This method cannot handle <a 5626 * href="#supplementary"> supplementary characters</a>. To support 5627 * all Unicode characters, including supplementary characters, use 5628 * the {@link #isDigit(int)} method. 5629 * 5630 * @param ch the character to be tested. 5631 * @return {@code true} if the character is a digit; 5632 * {@code false} otherwise. 5633 * @see Character#digit(char, int) 5634 * @see Character#forDigit(int, int) 5635 * @see Character#getType(char) 5636 */ 5637 public static boolean isDigit(char ch) { 5638 return isDigit((int)ch); 5639 } 5640 5641 /** 5642 * Determines if the specified character (Unicode code point) is a digit. 5643 * <p> 5644 * A character is a digit if its general category type, provided 5645 * by {@link Character#getType(int) getType(codePoint)}, is 5646 * {@code DECIMAL_DIGIT_NUMBER}. 5647 * <p> 5648 * Some Unicode character ranges that contain digits: 5649 * <ul> 5650 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5651 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5652 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5653 * Arabic-Indic digits 5654 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5655 * Extended Arabic-Indic digits 5656 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5657 * Devanagari digits 5658 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5659 * Fullwidth digits 5660 * </ul> 5661 * 5662 * Many other character ranges contain digits as well. 5663 * 5664 * @param codePoint the character (Unicode code point) to be tested. 5665 * @return {@code true} if the character is a digit; 5666 * {@code false} otherwise. 5667 * @see Character#forDigit(int, int) 5668 * @see Character#getType(int) 5669 * @since 1.5 5670 */ 5671 public static boolean isDigit(int codePoint) { 5672 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5673 } 5674 5675 /** 5676 * Determines if a character is defined in Unicode. 5677 * <p> 5678 * A character is defined if at least one of the following is true: 5679 * <ul> 5680 * <li>It has an entry in the UnicodeData file. 5681 * <li>It has a value in a range defined by the UnicodeData file. 5682 * </ul> 5683 * 5684 * <p><b>Note:</b> This method cannot handle <a 5685 * href="#supplementary"> supplementary characters</a>. To support 5686 * all Unicode characters, including supplementary characters, use 5687 * the {@link #isDefined(int)} method. 5688 * 5689 * @param ch the character to be tested 5690 * @return {@code true} if the character has a defined meaning 5691 * in Unicode; {@code false} otherwise. 5692 * @see Character#isDigit(char) 5693 * @see Character#isLetter(char) 5694 * @see Character#isLetterOrDigit(char) 5695 * @see Character#isLowerCase(char) 5696 * @see Character#isTitleCase(char) 5697 * @see Character#isUpperCase(char) 5698 * @since 1.0.2 5699 */ 5700 public static boolean isDefined(char ch) { 5701 return isDefined((int)ch); 5702 } 5703 5704 /** 5705 * Determines if a character (Unicode code point) is defined in Unicode. 5706 * <p> 5707 * A character is defined if at least one of the following is true: 5708 * <ul> 5709 * <li>It has an entry in the UnicodeData file. 5710 * <li>It has a value in a range defined by the UnicodeData file. 5711 * </ul> 5712 * 5713 * @param codePoint the character (Unicode code point) to be tested. 5714 * @return {@code true} if the character has a defined meaning 5715 * in Unicode; {@code false} otherwise. 5716 * @see Character#isDigit(int) 5717 * @see Character#isLetter(int) 5718 * @see Character#isLetterOrDigit(int) 5719 * @see Character#isLowerCase(int) 5720 * @see Character#isTitleCase(int) 5721 * @see Character#isUpperCase(int) 5722 * @since 1.5 5723 */ 5724 public static boolean isDefined(int codePoint) { 5725 return getType(codePoint) != Character.UNASSIGNED; 5726 } 5727 5728 /** 5729 * Determines if the specified character is a letter. 5730 * <p> 5731 * A character is considered to be a letter if its general 5732 * category type, provided by {@code Character.getType(ch)}, 5733 * is any of the following: 5734 * <ul> 5735 * <li> {@code UPPERCASE_LETTER} 5736 * <li> {@code LOWERCASE_LETTER} 5737 * <li> {@code TITLECASE_LETTER} 5738 * <li> {@code MODIFIER_LETTER} 5739 * <li> {@code OTHER_LETTER} 5740 * </ul> 5741 * 5742 * Not all letters have case. Many characters are 5743 * letters but are neither uppercase nor lowercase nor titlecase. 5744 * 5745 * <p><b>Note:</b> This method cannot handle <a 5746 * href="#supplementary"> supplementary characters</a>. To support 5747 * all Unicode characters, including supplementary characters, use 5748 * the {@link #isLetter(int)} method. 5749 * 5750 * @param ch the character to be tested. 5751 * @return {@code true} if the character is a letter; 5752 * {@code false} otherwise. 5753 * @see Character#isDigit(char) 5754 * @see Character#isJavaIdentifierStart(char) 5755 * @see Character#isJavaLetter(char) 5756 * @see Character#isJavaLetterOrDigit(char) 5757 * @see Character#isLetterOrDigit(char) 5758 * @see Character#isLowerCase(char) 5759 * @see Character#isTitleCase(char) 5760 * @see Character#isUnicodeIdentifierStart(char) 5761 * @see Character#isUpperCase(char) 5762 */ 5763 public static boolean isLetter(char ch) { 5764 return isLetter((int)ch); 5765 } 5766 5767 /** 5768 * Determines if the specified character (Unicode code point) is a letter. 5769 * <p> 5770 * A character is considered to be a letter if its general 5771 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5772 * is any of the following: 5773 * <ul> 5774 * <li> {@code UPPERCASE_LETTER} 5775 * <li> {@code LOWERCASE_LETTER} 5776 * <li> {@code TITLECASE_LETTER} 5777 * <li> {@code MODIFIER_LETTER} 5778 * <li> {@code OTHER_LETTER} 5779 * </ul> 5780 * 5781 * Not all letters have case. Many characters are 5782 * letters but are neither uppercase nor lowercase nor titlecase. 5783 * 5784 * @param codePoint the character (Unicode code point) to be tested. 5785 * @return {@code true} if the character is a letter; 5786 * {@code false} otherwise. 5787 * @see Character#isDigit(int) 5788 * @see Character#isJavaIdentifierStart(int) 5789 * @see Character#isLetterOrDigit(int) 5790 * @see Character#isLowerCase(int) 5791 * @see Character#isTitleCase(int) 5792 * @see Character#isUnicodeIdentifierStart(int) 5793 * @see Character#isUpperCase(int) 5794 * @since 1.5 5795 */ 5796 public static boolean isLetter(int codePoint) { 5797 return ((((1 << Character.UPPERCASE_LETTER) | 5798 (1 << Character.LOWERCASE_LETTER) | 5799 (1 << Character.TITLECASE_LETTER) | 5800 (1 << Character.MODIFIER_LETTER) | 5801 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5802 != 0; 5803 } 5804 5805 /** 5806 * Determines if the specified character is a letter or digit. 5807 * <p> 5808 * A character is considered to be a letter or digit if either 5809 * {@code Character.isLetter(char ch)} or 5810 * {@code Character.isDigit(char ch)} returns 5811 * {@code true} for the character. 5812 * 5813 * <p><b>Note:</b> This method cannot handle <a 5814 * href="#supplementary"> supplementary characters</a>. To support 5815 * all Unicode characters, including supplementary characters, use 5816 * the {@link #isLetterOrDigit(int)} method. 5817 * 5818 * @param ch the character to be tested. 5819 * @return {@code true} if the character is a letter or digit; 5820 * {@code false} otherwise. 5821 * @see Character#isDigit(char) 5822 * @see Character#isJavaIdentifierPart(char) 5823 * @see Character#isJavaLetter(char) 5824 * @see Character#isJavaLetterOrDigit(char) 5825 * @see Character#isLetter(char) 5826 * @see Character#isUnicodeIdentifierPart(char) 5827 * @since 1.0.2 5828 */ 5829 public static boolean isLetterOrDigit(char ch) { 5830 return isLetterOrDigit((int)ch); 5831 } 5832 5833 /** 5834 * Determines if the specified character (Unicode code point) is a letter or digit. 5835 * <p> 5836 * A character is considered to be a letter or digit if either 5837 * {@link #isLetter(int) isLetter(codePoint)} or 5838 * {@link #isDigit(int) isDigit(codePoint)} returns 5839 * {@code true} for the character. 5840 * 5841 * @param codePoint the character (Unicode code point) to be tested. 5842 * @return {@code true} if the character is a letter or digit; 5843 * {@code false} otherwise. 5844 * @see Character#isDigit(int) 5845 * @see Character#isJavaIdentifierPart(int) 5846 * @see Character#isLetter(int) 5847 * @see Character#isUnicodeIdentifierPart(int) 5848 * @since 1.5 5849 */ 5850 public static boolean isLetterOrDigit(int codePoint) { 5851 return ((((1 << Character.UPPERCASE_LETTER) | 5852 (1 << Character.LOWERCASE_LETTER) | 5853 (1 << Character.TITLECASE_LETTER) | 5854 (1 << Character.MODIFIER_LETTER) | 5855 (1 << Character.OTHER_LETTER) | 5856 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5857 != 0; 5858 } 5859 5860 /** 5861 * Determines if the specified character is permissible as the first 5862 * character in a Java identifier. 5863 * <p> 5864 * A character may start a Java identifier if and only if 5865 * one of the following is true: 5866 * <ul> 5867 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5868 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5869 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5870 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5871 * </ul> 5872 * 5873 * @param ch the character to be tested. 5874 * @return {@code true} if the character may start a Java 5875 * identifier; {@code false} otherwise. 5876 * @see Character#isJavaLetterOrDigit(char) 5877 * @see Character#isJavaIdentifierStart(char) 5878 * @see Character#isJavaIdentifierPart(char) 5879 * @see Character#isLetter(char) 5880 * @see Character#isLetterOrDigit(char) 5881 * @see Character#isUnicodeIdentifierStart(char) 5882 * @since 1.0.2 5883 * @deprecated Replaced by isJavaIdentifierStart(char). 5884 */ 5885 @Deprecated 5886 public static boolean isJavaLetter(char ch) { 5887 return isJavaIdentifierStart(ch); 5888 } 5889 5890 /** 5891 * Determines if the specified character may be part of a Java 5892 * identifier as other than the first character. 5893 * <p> 5894 * A character may be part of a Java identifier if and only if any 5895 * of the following are true: 5896 * <ul> 5897 * <li> it is a letter 5898 * <li> it is a currency symbol (such as {@code '$'}) 5899 * <li> it is a connecting punctuation character (such as {@code '_'}) 5900 * <li> it is a digit 5901 * <li> it is a numeric letter (such as a Roman numeral character) 5902 * <li> it is a combining mark 5903 * <li> it is a non-spacing mark 5904 * <li> {@code isIdentifierIgnorable} returns 5905 * {@code true} for the character. 5906 * </ul> 5907 * 5908 * @param ch the character to be tested. 5909 * @return {@code true} if the character may be part of a 5910 * Java identifier; {@code false} otherwise. 5911 * @see Character#isJavaLetter(char) 5912 * @see Character#isJavaIdentifierStart(char) 5913 * @see Character#isJavaIdentifierPart(char) 5914 * @see Character#isLetter(char) 5915 * @see Character#isLetterOrDigit(char) 5916 * @see Character#isUnicodeIdentifierPart(char) 5917 * @see Character#isIdentifierIgnorable(char) 5918 * @since 1.0.2 5919 * @deprecated Replaced by isJavaIdentifierPart(char). 5920 */ 5921 @Deprecated 5922 public static boolean isJavaLetterOrDigit(char ch) { 5923 return isJavaIdentifierPart(ch); 5924 } 5925 5926 /** 5927 * Determines if the specified character (Unicode code point) is an alphabet. 5928 * <p> 5929 * A character is considered to be alphabetic if its general category type, 5930 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 5931 * the following: 5932 * <ul> 5933 * <li> <code>UPPERCASE_LETTER</code> 5934 * <li> <code>LOWERCASE_LETTER</code> 5935 * <li> <code>TITLECASE_LETTER</code> 5936 * <li> <code>MODIFIER_LETTER</code> 5937 * <li> <code>OTHER_LETTER</code> 5938 * <li> <code>LETTER_NUMBER</code> 5939 * </ul> 5940 * or it has contributory property Other_Alphabetic as defined by the 5941 * Unicode Standard. 5942 * 5943 * @param codePoint the character (Unicode code point) to be tested. 5944 * @return <code>true</code> if the character is a Unicode alphabet 5945 * character, <code>false</code> otherwise. 5946 * @since 1.7 5947 */ 5948 public static boolean isAlphabetic(int codePoint) { 5949 return (((((1 << Character.UPPERCASE_LETTER) | 5950 (1 << Character.LOWERCASE_LETTER) | 5951 (1 << Character.TITLECASE_LETTER) | 5952 (1 << Character.MODIFIER_LETTER) | 5953 (1 << Character.OTHER_LETTER) | 5954 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 5955 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 5956 } 5957 5958 /** 5959 * Determines if the specified character (Unicode code point) is a CJKV 5960 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 5961 * the Unicode Standard. 5962 * 5963 * @param codePoint the character (Unicode code point) to be tested. 5964 * @return <code>true</code> if the character is a Unicode ideograph 5965 * character, <code>false</code> otherwise. 5966 * @since 1.7 5967 */ 5968 public static boolean isIdeographic(int codePoint) { 5969 return CharacterData.of(codePoint).isIdeographic(codePoint); 5970 } 5971 5972 /** 5973 * Determines if the specified character is 5974 * permissible as the first character in a Java identifier. 5975 * <p> 5976 * A character may start a Java identifier if and only if 5977 * one of the following conditions is true: 5978 * <ul> 5979 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5980 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5981 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5982 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5983 * </ul> 5984 * 5985 * <p><b>Note:</b> This method cannot handle <a 5986 * href="#supplementary"> supplementary characters</a>. To support 5987 * all Unicode characters, including supplementary characters, use 5988 * the {@link #isJavaIdentifierStart(int)} method. 5989 * 5990 * @param ch the character to be tested. 5991 * @return {@code true} if the character may start a Java identifier; 5992 * {@code false} otherwise. 5993 * @see Character#isJavaIdentifierPart(char) 5994 * @see Character#isLetter(char) 5995 * @see Character#isUnicodeIdentifierStart(char) 5996 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5997 * @since 1.1 5998 */ 5999 public static boolean isJavaIdentifierStart(char ch) { 6000 return isJavaIdentifierStart((int)ch); 6001 } 6002 6003 /** 6004 * Determines if the character (Unicode code point) is 6005 * permissible as the first character in a Java identifier. 6006 * <p> 6007 * A character may start a Java identifier if and only if 6008 * one of the following conditions is true: 6009 * <ul> 6010 * <li> {@link #isLetter(int) isLetter(codePoint)} 6011 * returns {@code true} 6012 * <li> {@link #getType(int) getType(codePoint)} 6013 * returns {@code LETTER_NUMBER} 6014 * <li> the referenced character is a currency symbol (such as {@code '$'}) 6015 * <li> the referenced character is a connecting punctuation character 6016 * (such as {@code '_'}). 6017 * </ul> 6018 * 6019 * @param codePoint the character (Unicode code point) to be tested. 6020 * @return {@code true} if the character may start a Java identifier; 6021 * {@code false} otherwise. 6022 * @see Character#isJavaIdentifierPart(int) 6023 * @see Character#isLetter(int) 6024 * @see Character#isUnicodeIdentifierStart(int) 6025 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6026 * @since 1.5 6027 */ 6028 public static boolean isJavaIdentifierStart(int codePoint) { 6029 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 6030 } 6031 6032 /** 6033 * Determines if the specified character may be part of a Java 6034 * identifier as other than the first character. 6035 * <p> 6036 * A character may be part of a Java identifier if any of the following 6037 * are true: 6038 * <ul> 6039 * <li> it is a letter 6040 * <li> it is a currency symbol (such as {@code '$'}) 6041 * <li> it is a connecting punctuation character (such as {@code '_'}) 6042 * <li> it is a digit 6043 * <li> it is a numeric letter (such as a Roman numeral character) 6044 * <li> it is a combining mark 6045 * <li> it is a non-spacing mark 6046 * <li> {@code isIdentifierIgnorable} returns 6047 * {@code true} for the character 6048 * </ul> 6049 * 6050 * <p><b>Note:</b> This method cannot handle <a 6051 * href="#supplementary"> supplementary characters</a>. To support 6052 * all Unicode characters, including supplementary characters, use 6053 * the {@link #isJavaIdentifierPart(int)} method. 6054 * 6055 * @param ch the character to be tested. 6056 * @return {@code true} if the character may be part of a 6057 * Java identifier; {@code false} otherwise. 6058 * @see Character#isIdentifierIgnorable(char) 6059 * @see Character#isJavaIdentifierStart(char) 6060 * @see Character#isLetterOrDigit(char) 6061 * @see Character#isUnicodeIdentifierPart(char) 6062 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6063 * @since 1.1 6064 */ 6065 public static boolean isJavaIdentifierPart(char ch) { 6066 return isJavaIdentifierPart((int)ch); 6067 } 6068 6069 /** 6070 * Determines if the character (Unicode code point) may be part of a Java 6071 * identifier as other than the first character. 6072 * <p> 6073 * A character may be part of a Java identifier if any of the following 6074 * are true: 6075 * <ul> 6076 * <li> it is a letter 6077 * <li> it is a currency symbol (such as {@code '$'}) 6078 * <li> it is a connecting punctuation character (such as {@code '_'}) 6079 * <li> it is a digit 6080 * <li> it is a numeric letter (such as a Roman numeral character) 6081 * <li> it is a combining mark 6082 * <li> it is a non-spacing mark 6083 * <li> {@link #isIdentifierIgnorable(int) 6084 * isIdentifierIgnorable(codePoint)} returns {@code true} for 6085 * the character 6086 * </ul> 6087 * 6088 * @param codePoint the character (Unicode code point) to be tested. 6089 * @return {@code true} if the character may be part of a 6090 * Java identifier; {@code false} otherwise. 6091 * @see Character#isIdentifierIgnorable(int) 6092 * @see Character#isJavaIdentifierStart(int) 6093 * @see Character#isLetterOrDigit(int) 6094 * @see Character#isUnicodeIdentifierPart(int) 6095 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6096 * @since 1.5 6097 */ 6098 public static boolean isJavaIdentifierPart(int codePoint) { 6099 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 6100 } 6101 6102 /** 6103 * Determines if the specified character is permissible as the 6104 * first character in a Unicode identifier. 6105 * <p> 6106 * A character may start a Unicode identifier if and only if 6107 * one of the following conditions is true: 6108 * <ul> 6109 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 6110 * <li> {@link #getType(char) getType(ch)} returns 6111 * {@code LETTER_NUMBER}. 6112 * </ul> 6113 * 6114 * <p><b>Note:</b> This method cannot handle <a 6115 * href="#supplementary"> supplementary characters</a>. To support 6116 * all Unicode characters, including supplementary characters, use 6117 * the {@link #isUnicodeIdentifierStart(int)} method. 6118 * 6119 * @param ch the character to be tested. 6120 * @return {@code true} if the character may start a Unicode 6121 * identifier; {@code false} otherwise. 6122 * @see Character#isJavaIdentifierStart(char) 6123 * @see Character#isLetter(char) 6124 * @see Character#isUnicodeIdentifierPart(char) 6125 * @since 1.1 6126 */ 6127 public static boolean isUnicodeIdentifierStart(char ch) { 6128 return isUnicodeIdentifierStart((int)ch); 6129 } 6130 6131 /** 6132 * Determines if the specified character (Unicode code point) is permissible as the 6133 * first character in a Unicode identifier. 6134 * <p> 6135 * A character may start a Unicode identifier if and only if 6136 * one of the following conditions is true: 6137 * <ul> 6138 * <li> {@link #isLetter(int) isLetter(codePoint)} 6139 * returns {@code true} 6140 * <li> {@link #getType(int) getType(codePoint)} 6141 * returns {@code LETTER_NUMBER}. 6142 * </ul> 6143 * @param codePoint the character (Unicode code point) to be tested. 6144 * @return {@code true} if the character may start a Unicode 6145 * identifier; {@code false} otherwise. 6146 * @see Character#isJavaIdentifierStart(int) 6147 * @see Character#isLetter(int) 6148 * @see Character#isUnicodeIdentifierPart(int) 6149 * @since 1.5 6150 */ 6151 public static boolean isUnicodeIdentifierStart(int codePoint) { 6152 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 6153 } 6154 6155 /** 6156 * Determines if the specified character may be part of a Unicode 6157 * identifier as other than the first character. 6158 * <p> 6159 * A character may be part of a Unicode identifier if and only if 6160 * one of the following statements is true: 6161 * <ul> 6162 * <li> it is a letter 6163 * <li> it is a connecting punctuation character (such as {@code '_'}) 6164 * <li> it is a digit 6165 * <li> it is a numeric letter (such as a Roman numeral character) 6166 * <li> it is a combining mark 6167 * <li> it is a non-spacing mark 6168 * <li> {@code isIdentifierIgnorable} returns 6169 * {@code true} for this character. 6170 * </ul> 6171 * 6172 * <p><b>Note:</b> This method cannot handle <a 6173 * href="#supplementary"> supplementary characters</a>. To support 6174 * all Unicode characters, including supplementary characters, use 6175 * the {@link #isUnicodeIdentifierPart(int)} method. 6176 * 6177 * @param ch the character to be tested. 6178 * @return {@code true} if the character may be part of a 6179 * Unicode identifier; {@code false} otherwise. 6180 * @see Character#isIdentifierIgnorable(char) 6181 * @see Character#isJavaIdentifierPart(char) 6182 * @see Character#isLetterOrDigit(char) 6183 * @see Character#isUnicodeIdentifierStart(char) 6184 * @since 1.1 6185 */ 6186 public static boolean isUnicodeIdentifierPart(char ch) { 6187 return isUnicodeIdentifierPart((int)ch); 6188 } 6189 6190 /** 6191 * Determines if the specified character (Unicode code point) may be part of a Unicode 6192 * identifier as other than the first character. 6193 * <p> 6194 * A character may be part of a Unicode identifier if and only if 6195 * one of the following statements is true: 6196 * <ul> 6197 * <li> it is a letter 6198 * <li> it is a connecting punctuation character (such as {@code '_'}) 6199 * <li> it is a digit 6200 * <li> it is a numeric letter (such as a Roman numeral character) 6201 * <li> it is a combining mark 6202 * <li> it is a non-spacing mark 6203 * <li> {@code isIdentifierIgnorable} returns 6204 * {@code true} for this character. 6205 * </ul> 6206 * @param codePoint the character (Unicode code point) to be tested. 6207 * @return {@code true} if the character may be part of a 6208 * Unicode identifier; {@code false} otherwise. 6209 * @see Character#isIdentifierIgnorable(int) 6210 * @see Character#isJavaIdentifierPart(int) 6211 * @see Character#isLetterOrDigit(int) 6212 * @see Character#isUnicodeIdentifierStart(int) 6213 * @since 1.5 6214 */ 6215 public static boolean isUnicodeIdentifierPart(int codePoint) { 6216 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 6217 } 6218 6219 /** 6220 * Determines if the specified character should be regarded as 6221 * an ignorable character in a Java identifier or a Unicode identifier. 6222 * <p> 6223 * The following Unicode characters are ignorable in a Java identifier 6224 * or a Unicode identifier: 6225 * <ul> 6226 * <li>ISO control characters that are not whitespace 6227 * <ul> 6228 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6229 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6230 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6231 * </ul> 6232 * 6233 * <li>all characters that have the {@code FORMAT} general 6234 * category value 6235 * </ul> 6236 * 6237 * <p><b>Note:</b> This method cannot handle <a 6238 * href="#supplementary"> supplementary characters</a>. To support 6239 * all Unicode characters, including supplementary characters, use 6240 * the {@link #isIdentifierIgnorable(int)} method. 6241 * 6242 * @param ch the character to be tested. 6243 * @return {@code true} if the character is an ignorable control 6244 * character that may be part of a Java or Unicode identifier; 6245 * {@code false} otherwise. 6246 * @see Character#isJavaIdentifierPart(char) 6247 * @see Character#isUnicodeIdentifierPart(char) 6248 * @since 1.1 6249 */ 6250 public static boolean isIdentifierIgnorable(char ch) { 6251 return isIdentifierIgnorable((int)ch); 6252 } 6253 6254 /** 6255 * Determines if the specified character (Unicode code point) should be regarded as 6256 * an ignorable character in a Java identifier or a Unicode identifier. 6257 * <p> 6258 * The following Unicode characters are ignorable in a Java identifier 6259 * or a Unicode identifier: 6260 * <ul> 6261 * <li>ISO control characters that are not whitespace 6262 * <ul> 6263 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6264 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6265 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6266 * </ul> 6267 * 6268 * <li>all characters that have the {@code FORMAT} general 6269 * category value 6270 * </ul> 6271 * 6272 * @param codePoint the character (Unicode code point) to be tested. 6273 * @return {@code true} if the character is an ignorable control 6274 * character that may be part of a Java or Unicode identifier; 6275 * {@code false} otherwise. 6276 * @see Character#isJavaIdentifierPart(int) 6277 * @see Character#isUnicodeIdentifierPart(int) 6278 * @since 1.5 6279 */ 6280 public static boolean isIdentifierIgnorable(int codePoint) { 6281 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6282 } 6283 6284 /** 6285 * Converts the character argument to lowercase using case 6286 * mapping information from the UnicodeData file. 6287 * <p> 6288 * Note that 6289 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 6290 * does not always return {@code true} for some ranges of 6291 * characters, particularly those that are symbols or ideographs. 6292 * 6293 * <p>In general, {@link String#toLowerCase()} should be used to map 6294 * characters to lowercase. {@code String} case mapping methods 6295 * have several benefits over {@code Character} case mapping methods. 6296 * {@code String} case mapping methods can perform locale-sensitive 6297 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6298 * the {@code Character} case mapping methods cannot. 6299 * 6300 * <p><b>Note:</b> This method cannot handle <a 6301 * href="#supplementary"> supplementary characters</a>. To support 6302 * all Unicode characters, including supplementary characters, use 6303 * the {@link #toLowerCase(int)} method. 6304 * 6305 * @param ch the character to be converted. 6306 * @return the lowercase equivalent of the character, if any; 6307 * otherwise, the character itself. 6308 * @see Character#isLowerCase(char) 6309 * @see String#toLowerCase() 6310 */ 6311 public static char toLowerCase(char ch) { 6312 return (char)toLowerCase((int)ch); 6313 } 6314 6315 /** 6316 * Converts the character (Unicode code point) argument to 6317 * lowercase using case mapping information from the UnicodeData 6318 * file. 6319 * 6320 * <p> Note that 6321 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 6322 * does not always return {@code true} for some ranges of 6323 * characters, particularly those that are symbols or ideographs. 6324 * 6325 * <p>In general, {@link String#toLowerCase()} should be used to map 6326 * characters to lowercase. {@code String} case mapping methods 6327 * have several benefits over {@code Character} case mapping methods. 6328 * {@code String} case mapping methods can perform locale-sensitive 6329 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6330 * the {@code Character} case mapping methods cannot. 6331 * 6332 * @param codePoint the character (Unicode code point) to be converted. 6333 * @return the lowercase equivalent of the character (Unicode code 6334 * point), if any; otherwise, the character itself. 6335 * @see Character#isLowerCase(int) 6336 * @see String#toLowerCase() 6337 * 6338 * @since 1.5 6339 */ 6340 public static int toLowerCase(int codePoint) { 6341 return CharacterData.of(codePoint).toLowerCase(codePoint); 6342 } 6343 6344 /** 6345 * Converts the character argument to uppercase using case mapping 6346 * information from the UnicodeData file. 6347 * <p> 6348 * Note that 6349 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 6350 * does not always return {@code true} for some ranges of 6351 * characters, particularly those that are symbols or ideographs. 6352 * 6353 * <p>In general, {@link String#toUpperCase()} should be used to map 6354 * characters to uppercase. {@code String} case mapping methods 6355 * have several benefits over {@code Character} case mapping methods. 6356 * {@code String} case mapping methods can perform locale-sensitive 6357 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6358 * the {@code Character} case mapping methods cannot. 6359 * 6360 * <p><b>Note:</b> This method cannot handle <a 6361 * href="#supplementary"> supplementary characters</a>. To support 6362 * all Unicode characters, including supplementary characters, use 6363 * the {@link #toUpperCase(int)} method. 6364 * 6365 * @param ch the character to be converted. 6366 * @return the uppercase equivalent of the character, if any; 6367 * otherwise, the character itself. 6368 * @see Character#isUpperCase(char) 6369 * @see String#toUpperCase() 6370 */ 6371 public static char toUpperCase(char ch) { 6372 return (char)toUpperCase((int)ch); 6373 } 6374 6375 /** 6376 * Converts the character (Unicode code point) argument to 6377 * uppercase using case mapping information from the UnicodeData 6378 * file. 6379 * 6380 * <p>Note that 6381 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 6382 * does not always return {@code true} for some ranges of 6383 * characters, particularly those that are symbols or ideographs. 6384 * 6385 * <p>In general, {@link String#toUpperCase()} should be used to map 6386 * characters to uppercase. {@code String} case mapping methods 6387 * have several benefits over {@code Character} case mapping methods. 6388 * {@code String} case mapping methods can perform locale-sensitive 6389 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6390 * the {@code Character} case mapping methods cannot. 6391 * 6392 * @param codePoint the character (Unicode code point) to be converted. 6393 * @return the uppercase equivalent of the character, if any; 6394 * otherwise, the character itself. 6395 * @see Character#isUpperCase(int) 6396 * @see String#toUpperCase() 6397 * 6398 * @since 1.5 6399 */ 6400 public static int toUpperCase(int codePoint) { 6401 return CharacterData.of(codePoint).toUpperCase(codePoint); 6402 } 6403 6404 /** 6405 * Converts the character argument to titlecase using case mapping 6406 * information from the UnicodeData file. If a character has no 6407 * explicit titlecase mapping and is not itself a titlecase char 6408 * according to UnicodeData, then the uppercase mapping is 6409 * returned as an equivalent titlecase mapping. If the 6410 * {@code char} argument is already a titlecase 6411 * {@code char}, the same {@code char} value will be 6412 * returned. 6413 * <p> 6414 * Note that 6415 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 6416 * does not always return {@code true} for some ranges of 6417 * characters. 6418 * 6419 * <p><b>Note:</b> This method cannot handle <a 6420 * href="#supplementary"> supplementary characters</a>. To support 6421 * all Unicode characters, including supplementary characters, use 6422 * the {@link #toTitleCase(int)} method. 6423 * 6424 * @param ch the character to be converted. 6425 * @return the titlecase equivalent of the character, if any; 6426 * otherwise, the character itself. 6427 * @see Character#isTitleCase(char) 6428 * @see Character#toLowerCase(char) 6429 * @see Character#toUpperCase(char) 6430 * @since 1.0.2 6431 */ 6432 public static char toTitleCase(char ch) { 6433 return (char)toTitleCase((int)ch); 6434 } 6435 6436 /** 6437 * Converts the character (Unicode code point) argument to titlecase using case mapping 6438 * information from the UnicodeData file. If a character has no 6439 * explicit titlecase mapping and is not itself a titlecase char 6440 * according to UnicodeData, then the uppercase mapping is 6441 * returned as an equivalent titlecase mapping. If the 6442 * character argument is already a titlecase 6443 * character, the same character value will be 6444 * returned. 6445 * 6446 * <p>Note that 6447 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 6448 * does not always return {@code true} for some ranges of 6449 * characters. 6450 * 6451 * @param codePoint the character (Unicode code point) to be converted. 6452 * @return the titlecase equivalent of the character, if any; 6453 * otherwise, the character itself. 6454 * @see Character#isTitleCase(int) 6455 * @see Character#toLowerCase(int) 6456 * @see Character#toUpperCase(int) 6457 * @since 1.5 6458 */ 6459 public static int toTitleCase(int codePoint) { 6460 return CharacterData.of(codePoint).toTitleCase(codePoint); 6461 } 6462 6463 /** 6464 * Returns the numeric value of the character {@code ch} in the 6465 * specified radix. 6466 * <p> 6467 * If the radix is not in the range {@code MIN_RADIX} ≤ 6468 * {@code radix} ≤ {@code MAX_RADIX} or if the 6469 * value of {@code ch} is not a valid digit in the specified 6470 * radix, {@code -1} is returned. A character is a valid digit 6471 * if at least one of the following is true: 6472 * <ul> 6473 * <li>The method {@code isDigit} is {@code true} of the character 6474 * and the Unicode decimal digit value of the character (or its 6475 * single-character decomposition) is less than the specified radix. 6476 * In this case the decimal digit value is returned. 6477 * <li>The character is one of the uppercase Latin letters 6478 * {@code 'A'} through {@code 'Z'} and its code is less than 6479 * {@code radix + 'A' - 10}. 6480 * In this case, {@code ch - 'A' + 10} 6481 * is returned. 6482 * <li>The character is one of the lowercase Latin letters 6483 * {@code 'a'} through {@code 'z'} and its code is less than 6484 * {@code radix + 'a' - 10}. 6485 * In this case, {@code ch - 'a' + 10} 6486 * is returned. 6487 * <li>The character is one of the fullwidth uppercase Latin letters A 6488 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6489 * and its code is less than 6490 * {@code radix + '\u005CuFF21' - 10}. 6491 * In this case, {@code ch - '\u005CuFF21' + 10} 6492 * is returned. 6493 * <li>The character is one of the fullwidth lowercase Latin letters a 6494 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6495 * and its code is less than 6496 * {@code radix + '\u005CuFF41' - 10}. 6497 * In this case, {@code ch - '\u005CuFF41' + 10} 6498 * is returned. 6499 * </ul> 6500 * 6501 * <p><b>Note:</b> This method cannot handle <a 6502 * href="#supplementary"> supplementary characters</a>. To support 6503 * all Unicode characters, including supplementary characters, use 6504 * the {@link #digit(int, int)} method. 6505 * 6506 * @param ch the character to be converted. 6507 * @param radix the radix. 6508 * @return the numeric value represented by the character in the 6509 * specified radix. 6510 * @see Character#forDigit(int, int) 6511 * @see Character#isDigit(char) 6512 */ 6513 public static int digit(char ch, int radix) { 6514 return digit((int)ch, radix); 6515 } 6516 6517 /** 6518 * Returns the numeric value of the specified character (Unicode 6519 * code point) in the specified radix. 6520 * 6521 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 6522 * {@code radix} ≤ {@code MAX_RADIX} or if the 6523 * character is not a valid digit in the specified 6524 * radix, {@code -1} is returned. A character is a valid digit 6525 * if at least one of the following is true: 6526 * <ul> 6527 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 6528 * and the Unicode decimal digit value of the character (or its 6529 * single-character decomposition) is less than the specified radix. 6530 * In this case the decimal digit value is returned. 6531 * <li>The character is one of the uppercase Latin letters 6532 * {@code 'A'} through {@code 'Z'} and its code is less than 6533 * {@code radix + 'A' - 10}. 6534 * In this case, {@code codePoint - 'A' + 10} 6535 * is returned. 6536 * <li>The character is one of the lowercase Latin letters 6537 * {@code 'a'} through {@code 'z'} and its code is less than 6538 * {@code radix + 'a' - 10}. 6539 * In this case, {@code codePoint - 'a' + 10} 6540 * is returned. 6541 * <li>The character is one of the fullwidth uppercase Latin letters A 6542 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6543 * and its code is less than 6544 * {@code radix + '\u005CuFF21' - 10}. 6545 * In this case, 6546 * {@code codePoint - '\u005CuFF21' + 10} 6547 * is returned. 6548 * <li>The character is one of the fullwidth lowercase Latin letters a 6549 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6550 * and its code is less than 6551 * {@code radix + '\u005CuFF41'- 10}. 6552 * In this case, 6553 * {@code codePoint - '\u005CuFF41' + 10} 6554 * is returned. 6555 * </ul> 6556 * 6557 * @param codePoint the character (Unicode code point) to be converted. 6558 * @param radix the radix. 6559 * @return the numeric value represented by the character in the 6560 * specified radix. 6561 * @see Character#forDigit(int, int) 6562 * @see Character#isDigit(int) 6563 * @since 1.5 6564 */ 6565 public static int digit(int codePoint, int radix) { 6566 return CharacterData.of(codePoint).digit(codePoint, radix); 6567 } 6568 6569 /** 6570 * Returns the {@code int} value that the specified Unicode 6571 * character represents. For example, the character 6572 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 6573 * an int with a value of 50. 6574 * <p> 6575 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6576 * {@code '\u005Cu005A'}), lowercase 6577 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6578 * full width variant ({@code '\u005CuFF21'} through 6579 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6580 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6581 * through 35. This is independent of the Unicode specification, 6582 * which does not assign numeric values to these {@code char} 6583 * values. 6584 * <p> 6585 * If the character does not have a numeric value, then -1 is returned. 6586 * If the character has a numeric value that cannot be represented as a 6587 * nonnegative integer (for example, a fractional value), then -2 6588 * is returned. 6589 * 6590 * <p><b>Note:</b> This method cannot handle <a 6591 * href="#supplementary"> supplementary characters</a>. To support 6592 * all Unicode characters, including supplementary characters, use 6593 * the {@link #getNumericValue(int)} method. 6594 * 6595 * @param ch the character to be converted. 6596 * @return the numeric value of the character, as a nonnegative {@code int} 6597 * value; -2 if the character has a numeric value that is not a 6598 * nonnegative integer; -1 if the character has no numeric value. 6599 * @see Character#forDigit(int, int) 6600 * @see Character#isDigit(char) 6601 * @since 1.1 6602 */ 6603 public static int getNumericValue(char ch) { 6604 return getNumericValue((int)ch); 6605 } 6606 6607 /** 6608 * Returns the {@code int} value that the specified 6609 * character (Unicode code point) represents. For example, the character 6610 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 6611 * an {@code int} with a value of 50. 6612 * <p> 6613 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6614 * {@code '\u005Cu005A'}), lowercase 6615 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6616 * full width variant ({@code '\u005CuFF21'} through 6617 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6618 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6619 * through 35. This is independent of the Unicode specification, 6620 * which does not assign numeric values to these {@code char} 6621 * values. 6622 * <p> 6623 * If the character does not have a numeric value, then -1 is returned. 6624 * If the character has a numeric value that cannot be represented as a 6625 * nonnegative integer (for example, a fractional value), then -2 6626 * is returned. 6627 * 6628 * @param codePoint the character (Unicode code point) to be converted. 6629 * @return the numeric value of the character, as a nonnegative {@code int} 6630 * value; -2 if the character has a numeric value that is not a 6631 * nonnegative integer; -1 if the character has no numeric value. 6632 * @see Character#forDigit(int, int) 6633 * @see Character#isDigit(int) 6634 * @since 1.5 6635 */ 6636 public static int getNumericValue(int codePoint) { 6637 return CharacterData.of(codePoint).getNumericValue(codePoint); 6638 } 6639 6640 /** 6641 * Determines if the specified character is ISO-LATIN-1 white space. 6642 * This method returns {@code true} for the following five 6643 * characters only: 6644 * <table summary="truechars"> 6645 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 6646 * <td>{@code HORIZONTAL TABULATION}</td></tr> 6647 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 6648 * <td>{@code NEW LINE}</td></tr> 6649 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 6650 * <td>{@code FORM FEED}</td></tr> 6651 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 6652 * <td>{@code CARRIAGE RETURN}</td></tr> 6653 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 6654 * <td>{@code SPACE}</td></tr> 6655 * </table> 6656 * 6657 * @param ch the character to be tested. 6658 * @return {@code true} if the character is ISO-LATIN-1 white 6659 * space; {@code false} otherwise. 6660 * @see Character#isSpaceChar(char) 6661 * @see Character#isWhitespace(char) 6662 * @deprecated Replaced by isWhitespace(char). 6663 */ 6664 @Deprecated 6665 public static boolean isSpace(char ch) { 6666 return (ch <= 0x0020) && 6667 (((((1L << 0x0009) | 6668 (1L << 0x000A) | 6669 (1L << 0x000C) | 6670 (1L << 0x000D) | 6671 (1L << 0x0020)) >> ch) & 1L) != 0); 6672 } 6673 6674 6675 /** 6676 * Determines if the specified character is a Unicode space character. 6677 * A character is considered to be a space character if and only if 6678 * it is specified to be a space character by the Unicode Standard. This 6679 * method returns true if the character's general category type is any of 6680 * the following: 6681 * <ul> 6682 * <li> {@code SPACE_SEPARATOR} 6683 * <li> {@code LINE_SEPARATOR} 6684 * <li> {@code PARAGRAPH_SEPARATOR} 6685 * </ul> 6686 * 6687 * <p><b>Note:</b> This method cannot handle <a 6688 * href="#supplementary"> supplementary characters</a>. To support 6689 * all Unicode characters, including supplementary characters, use 6690 * the {@link #isSpaceChar(int)} method. 6691 * 6692 * @param ch the character to be tested. 6693 * @return {@code true} if the character is a space character; 6694 * {@code false} otherwise. 6695 * @see Character#isWhitespace(char) 6696 * @since 1.1 6697 */ 6698 public static boolean isSpaceChar(char ch) { 6699 return isSpaceChar((int)ch); 6700 } 6701 6702 /** 6703 * Determines if the specified character (Unicode code point) is a 6704 * Unicode space character. A character is considered to be a 6705 * space character if and only if it is specified to be a space 6706 * character by the Unicode Standard. This method returns true if 6707 * the character's general category type is any of the following: 6708 * 6709 * <ul> 6710 * <li> {@link #SPACE_SEPARATOR} 6711 * <li> {@link #LINE_SEPARATOR} 6712 * <li> {@link #PARAGRAPH_SEPARATOR} 6713 * </ul> 6714 * 6715 * @param codePoint the character (Unicode code point) to be tested. 6716 * @return {@code true} if the character is a space character; 6717 * {@code false} otherwise. 6718 * @see Character#isWhitespace(int) 6719 * @since 1.5 6720 */ 6721 public static boolean isSpaceChar(int codePoint) { 6722 return ((((1 << Character.SPACE_SEPARATOR) | 6723 (1 << Character.LINE_SEPARATOR) | 6724 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6725 != 0; 6726 } 6727 6728 /** 6729 * Determines if the specified character is white space according to Java. 6730 * A character is a Java whitespace character if and only if it satisfies 6731 * one of the following criteria: 6732 * <ul> 6733 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 6734 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 6735 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6736 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6737 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6738 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6739 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6740 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6741 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6742 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6743 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6744 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6745 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6746 * </ul> 6747 * 6748 * <p><b>Note:</b> This method cannot handle <a 6749 * href="#supplementary"> supplementary characters</a>. To support 6750 * all Unicode characters, including supplementary characters, use 6751 * the {@link #isWhitespace(int)} method. 6752 * 6753 * @param ch the character to be tested. 6754 * @return {@code true} if the character is a Java whitespace 6755 * character; {@code false} otherwise. 6756 * @see Character#isSpaceChar(char) 6757 * @since 1.1 6758 */ 6759 public static boolean isWhitespace(char ch) { 6760 return isWhitespace((int)ch); 6761 } 6762 6763 /** 6764 * Determines if the specified character (Unicode code point) is 6765 * white space according to Java. A character is a Java 6766 * whitespace character if and only if it satisfies one of the 6767 * following criteria: 6768 * <ul> 6769 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6770 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6771 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6772 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6773 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6774 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6775 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6776 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6777 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6778 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6779 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6780 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6781 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6782 * </ul> 6783 * 6784 * @param codePoint the character (Unicode code point) to be tested. 6785 * @return {@code true} if the character is a Java whitespace 6786 * character; {@code false} otherwise. 6787 * @see Character#isSpaceChar(int) 6788 * @since 1.5 6789 */ 6790 public static boolean isWhitespace(int codePoint) { 6791 return CharacterData.of(codePoint).isWhitespace(codePoint); 6792 } 6793 6794 /** 6795 * Determines if the specified character is an ISO control 6796 * character. A character is considered to be an ISO control 6797 * character if its code is in the range {@code '\u005Cu0000'} 6798 * through {@code '\u005Cu001F'} or in the range 6799 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6800 * 6801 * <p><b>Note:</b> This method cannot handle <a 6802 * href="#supplementary"> supplementary characters</a>. To support 6803 * all Unicode characters, including supplementary characters, use 6804 * the {@link #isISOControl(int)} method. 6805 * 6806 * @param ch the character to be tested. 6807 * @return {@code true} if the character is an ISO control character; 6808 * {@code false} otherwise. 6809 * 6810 * @see Character#isSpaceChar(char) 6811 * @see Character#isWhitespace(char) 6812 * @since 1.1 6813 */ 6814 public static boolean isISOControl(char ch) { 6815 return isISOControl((int)ch); 6816 } 6817 6818 /** 6819 * Determines if the referenced character (Unicode code point) is an ISO control 6820 * character. A character is considered to be an ISO control 6821 * character if its code is in the range {@code '\u005Cu0000'} 6822 * through {@code '\u005Cu001F'} or in the range 6823 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6824 * 6825 * @param codePoint the character (Unicode code point) to be tested. 6826 * @return {@code true} if the character is an ISO control character; 6827 * {@code false} otherwise. 6828 * @see Character#isSpaceChar(int) 6829 * @see Character#isWhitespace(int) 6830 * @since 1.5 6831 */ 6832 public static boolean isISOControl(int codePoint) { 6833 // Optimized form of: 6834 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6835 // (codePoint >= 0x7F && codePoint <= 0x9F); 6836 return codePoint <= 0x9F && 6837 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6838 } 6839 6840 /** 6841 * Returns a value indicating a character's general category. 6842 * 6843 * <p><b>Note:</b> This method cannot handle <a 6844 * href="#supplementary"> supplementary characters</a>. To support 6845 * all Unicode characters, including supplementary characters, use 6846 * the {@link #getType(int)} method. 6847 * 6848 * @param ch the character to be tested. 6849 * @return a value of type {@code int} representing the 6850 * character's general category. 6851 * @see Character#COMBINING_SPACING_MARK 6852 * @see Character#CONNECTOR_PUNCTUATION 6853 * @see Character#CONTROL 6854 * @see Character#CURRENCY_SYMBOL 6855 * @see Character#DASH_PUNCTUATION 6856 * @see Character#DECIMAL_DIGIT_NUMBER 6857 * @see Character#ENCLOSING_MARK 6858 * @see Character#END_PUNCTUATION 6859 * @see Character#FINAL_QUOTE_PUNCTUATION 6860 * @see Character#FORMAT 6861 * @see Character#INITIAL_QUOTE_PUNCTUATION 6862 * @see Character#LETTER_NUMBER 6863 * @see Character#LINE_SEPARATOR 6864 * @see Character#LOWERCASE_LETTER 6865 * @see Character#MATH_SYMBOL 6866 * @see Character#MODIFIER_LETTER 6867 * @see Character#MODIFIER_SYMBOL 6868 * @see Character#NON_SPACING_MARK 6869 * @see Character#OTHER_LETTER 6870 * @see Character#OTHER_NUMBER 6871 * @see Character#OTHER_PUNCTUATION 6872 * @see Character#OTHER_SYMBOL 6873 * @see Character#PARAGRAPH_SEPARATOR 6874 * @see Character#PRIVATE_USE 6875 * @see Character#SPACE_SEPARATOR 6876 * @see Character#START_PUNCTUATION 6877 * @see Character#SURROGATE 6878 * @see Character#TITLECASE_LETTER 6879 * @see Character#UNASSIGNED 6880 * @see Character#UPPERCASE_LETTER 6881 * @since 1.1 6882 */ 6883 public static int getType(char ch) { 6884 return getType((int)ch); 6885 } 6886 6887 /** 6888 * Returns a value indicating a character's general category. 6889 * 6890 * @param codePoint the character (Unicode code point) to be tested. 6891 * @return a value of type {@code int} representing the 6892 * character's general category. 6893 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6894 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6895 * @see Character#CONTROL CONTROL 6896 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6897 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6898 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6899 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6900 * @see Character#END_PUNCTUATION END_PUNCTUATION 6901 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6902 * @see Character#FORMAT FORMAT 6903 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6904 * @see Character#LETTER_NUMBER LETTER_NUMBER 6905 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6906 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6907 * @see Character#MATH_SYMBOL MATH_SYMBOL 6908 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6909 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6910 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6911 * @see Character#OTHER_LETTER OTHER_LETTER 6912 * @see Character#OTHER_NUMBER OTHER_NUMBER 6913 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6914 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6915 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6916 * @see Character#PRIVATE_USE PRIVATE_USE 6917 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6918 * @see Character#START_PUNCTUATION START_PUNCTUATION 6919 * @see Character#SURROGATE SURROGATE 6920 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6921 * @see Character#UNASSIGNED UNASSIGNED 6922 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6923 * @since 1.5 6924 */ 6925 public static int getType(int codePoint) { 6926 return CharacterData.of(codePoint).getType(codePoint); 6927 } 6928 6929 /** 6930 * Determines the character representation for a specific digit in 6931 * the specified radix. If the value of {@code radix} is not a 6932 * valid radix, or the value of {@code digit} is not a valid 6933 * digit in the specified radix, the null character 6934 * ({@code '\u005Cu0000'}) is returned. 6935 * <p> 6936 * The {@code radix} argument is valid if it is greater than or 6937 * equal to {@code MIN_RADIX} and less than or equal to 6938 * {@code MAX_RADIX}. The {@code digit} argument is valid if 6939 * {@code 0 <= digit < radix}. 6940 * <p> 6941 * If the digit is less than 10, then 6942 * {@code '0' + digit} is returned. Otherwise, the value 6943 * {@code 'a' + digit - 10} is returned. 6944 * 6945 * @param digit the number to convert to a character. 6946 * @param radix the radix. 6947 * @return the {@code char} representation of the specified digit 6948 * in the specified radix. 6949 * @see Character#MIN_RADIX 6950 * @see Character#MAX_RADIX 6951 * @see Character#digit(char, int) 6952 */ 6953 public static char forDigit(int digit, int radix) { 6954 if ((digit >= radix) || (digit < 0)) { 6955 return '\0'; 6956 } 6957 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6958 return '\0'; 6959 } 6960 if (digit < 10) { 6961 return (char)('0' + digit); 6962 } 6963 return (char)('a' - 10 + digit); 6964 } 6965 6966 /** 6967 * Returns the Unicode directionality property for the given 6968 * character. Character directionality is used to calculate the 6969 * visual ordering of text. The directionality value of undefined 6970 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 6971 * 6972 * <p><b>Note:</b> This method cannot handle <a 6973 * href="#supplementary"> supplementary characters</a>. To support 6974 * all Unicode characters, including supplementary characters, use 6975 * the {@link #getDirectionality(int)} method. 6976 * 6977 * @param ch {@code char} for which the directionality property 6978 * is requested. 6979 * @return the directionality property of the {@code char} value. 6980 * 6981 * @see Character#DIRECTIONALITY_UNDEFINED 6982 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6983 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6984 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6985 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6986 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6987 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6988 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6989 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6990 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6991 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6992 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6993 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6994 * @see Character#DIRECTIONALITY_WHITESPACE 6995 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6996 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6997 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6998 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6999 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7000 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7001 * @since 1.4 7002 */ 7003 public static byte getDirectionality(char ch) { 7004 return getDirectionality((int)ch); 7005 } 7006 7007 /** 7008 * Returns the Unicode directionality property for the given 7009 * character (Unicode code point). Character directionality is 7010 * used to calculate the visual ordering of text. The 7011 * directionality value of undefined character is {@link 7012 * #DIRECTIONALITY_UNDEFINED}. 7013 * 7014 * @param codePoint the character (Unicode code point) for which 7015 * the directionality property is requested. 7016 * @return the directionality property of the character. 7017 * 7018 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 7019 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 7020 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 7021 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 7022 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 7023 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 7024 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 7025 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 7026 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 7027 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 7028 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 7029 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 7030 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 7031 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 7032 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 7033 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 7034 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 7035 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 7036 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7037 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7038 * @since 1.5 7039 */ 7040 public static byte getDirectionality(int codePoint) { 7041 return CharacterData.of(codePoint).getDirectionality(codePoint); 7042 } 7043 7044 /** 7045 * Determines whether the character is mirrored according to the 7046 * Unicode specification. Mirrored characters should have their 7047 * glyphs horizontally mirrored when displayed in text that is 7048 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 7049 * PARENTHESIS is semantically defined to be an <i>opening 7050 * parenthesis</i>. This will appear as a "(" in text that is 7051 * left-to-right but as a ")" in text that is right-to-left. 7052 * 7053 * <p><b>Note:</b> This method cannot handle <a 7054 * href="#supplementary"> supplementary characters</a>. To support 7055 * all Unicode characters, including supplementary characters, use 7056 * the {@link #isMirrored(int)} method. 7057 * 7058 * @param ch {@code char} for which the mirrored property is requested 7059 * @return {@code true} if the char is mirrored, {@code false} 7060 * if the {@code char} is not mirrored or is not defined. 7061 * @since 1.4 7062 */ 7063 public static boolean isMirrored(char ch) { 7064 return isMirrored((int)ch); 7065 } 7066 7067 /** 7068 * Determines whether the specified character (Unicode code point) 7069 * is mirrored according to the Unicode specification. Mirrored 7070 * characters should have their glyphs horizontally mirrored when 7071 * displayed in text that is right-to-left. For example, 7072 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 7073 * defined to be an <i>opening parenthesis</i>. This will appear 7074 * as a "(" in text that is left-to-right but as a ")" in text 7075 * that is right-to-left. 7076 * 7077 * @param codePoint the character (Unicode code point) to be tested. 7078 * @return {@code true} if the character is mirrored, {@code false} 7079 * if the character is not mirrored or is not defined. 7080 * @since 1.5 7081 */ 7082 public static boolean isMirrored(int codePoint) { 7083 return CharacterData.of(codePoint).isMirrored(codePoint); 7084 } 7085 7086 /** 7087 * Compares two {@code Character} objects numerically. 7088 * 7089 * @param anotherCharacter the {@code Character} to be compared. 7090 7091 * @return the value {@code 0} if the argument {@code Character} 7092 * is equal to this {@code Character}; a value less than 7093 * {@code 0} if this {@code Character} is numerically less 7094 * than the {@code Character} argument; and a value greater than 7095 * {@code 0} if this {@code Character} is numerically greater 7096 * than the {@code Character} argument (unsigned comparison). 7097 * Note that this is strictly a numerical comparison; it is not 7098 * locale-dependent. 7099 * @since 1.2 7100 */ 7101 public int compareTo(Character anotherCharacter) { 7102 return compare(this.value, anotherCharacter.value); 7103 } 7104 7105 /** 7106 * Compares two {@code char} values numerically. 7107 * The value returned is identical to what would be returned by: 7108 * <pre> 7109 * Character.valueOf(x).compareTo(Character.valueOf(y)) 7110 * </pre> 7111 * 7112 * @param x the first {@code char} to compare 7113 * @param y the second {@code char} to compare 7114 * @return the value {@code 0} if {@code x == y}; 7115 * a value less than {@code 0} if {@code x < y}; and 7116 * a value greater than {@code 0} if {@code x > y} 7117 * @since 1.7 7118 */ 7119 public static int compare(char x, char y) { 7120 return x - y; 7121 } 7122 7123 /** 7124 * Converts the character (Unicode code point) argument to uppercase using 7125 * information from the UnicodeData file. 7126 * 7127 * @param codePoint the character (Unicode code point) to be converted. 7128 * @return either the uppercase equivalent of the character, if 7129 * any, or an error flag ({@code Character.ERROR}) 7130 * that indicates that a 1:M {@code char} mapping exists. 7131 * @see Character#isLowerCase(char) 7132 * @see Character#isUpperCase(char) 7133 * @see Character#toLowerCase(char) 7134 * @see Character#toTitleCase(char) 7135 * @since 1.4 7136 */ 7137 static int toUpperCaseEx(int codePoint) { 7138 assert isValidCodePoint(codePoint); 7139 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 7140 } 7141 7142 /** 7143 * Converts the character (Unicode code point) argument to uppercase using case 7144 * mapping information from the SpecialCasing file in the Unicode 7145 * specification. If a character has no explicit uppercase 7146 * mapping, then the {@code char} itself is returned in the 7147 * {@code char[]}. 7148 * 7149 * @param codePoint the character (Unicode code point) to be converted. 7150 * @return a {@code char[]} with the uppercased character. 7151 * @since 1.4 7152 */ 7153 static char[] toUpperCaseCharArray(int codePoint) { 7154 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 7155 assert isBmpCodePoint(codePoint); 7156 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 7157 } 7158 7159 /** 7160 * The number of bits used to represent a <tt>char</tt> value in unsigned 7161 * binary form, constant {@code 16}. 7162 * 7163 * @since 1.5 7164 */ 7165 public static final int SIZE = 16; 7166 7167 /** 7168 * The number of bytes used to represent a {@code char} value in unsigned 7169 * binary form. 7170 * 7171 * @since 1.8 7172 */ 7173 public static final int BYTES = SIZE / Byte.SIZE; 7174 7175 /** 7176 * Returns the value obtained by reversing the order of the bytes in the 7177 * specified <tt>char</tt> value. 7178 * 7179 * @param ch The {@code char} of which to reverse the byte order. 7180 * @return the value obtained by reversing (or, equivalently, swapping) 7181 * the bytes in the specified <tt>char</tt> value. 7182 * @since 1.5 7183 */ 7184 public static char reverseBytes(char ch) { 7185 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 7186 } 7187 7188 /** 7189 * Returns the Unicode name of the specified character 7190 * {@code codePoint}, or null if the code point is 7191 * {@link #UNASSIGNED unassigned}. 7192 * <p> 7193 * Note: if the specified character is not assigned a name by 7194 * the <i>UnicodeData</i> file (part of the Unicode Character 7195 * Database maintained by the Unicode Consortium), the returned 7196 * name is the same as the result of expression. 7197 * 7198 * <blockquote>{@code 7199 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 7200 * + " " 7201 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7202 * 7203 * }</blockquote> 7204 * 7205 * @param codePoint the character (Unicode code point) 7206 * 7207 * @return the Unicode name of the specified character, or null if 7208 * the code point is unassigned. 7209 * 7210 * @exception IllegalArgumentException if the specified 7211 * {@code codePoint} is not a valid Unicode 7212 * code point. 7213 * 7214 * @since 1.7 7215 */ 7216 public static String getName(int codePoint) { 7217 if (!isValidCodePoint(codePoint)) { 7218 throw new IllegalArgumentException(); 7219 } 7220 String name = CharacterName.get(codePoint); 7221 if (name != null) 7222 return name; 7223 if (getType(codePoint) == UNASSIGNED) 7224 return null; 7225 UnicodeBlock block = UnicodeBlock.of(codePoint); 7226 if (block != null) 7227 return block.toString().replace('_', ' ') + " " 7228 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7229 // should never come here 7230 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7231 } 7232 }