1 /* 2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The {@code Character} class wraps a value of the primitive 35 * type {@code char} in an object. An object of type 36 * {@code Character} contains a single field whose type is 37 * {@code char}. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.2.0. 44 * <p> 45 * The methods and data of class {@code Character} are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h3><a name="unicode">Unicode Character Representations</a></h3> 58 * 59 * <p>The {@code char} data type (and therefore the value that a 60 * {@code Character} object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode Standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * Standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in {@code char} arrays and 76 * in the {@code String} and {@code StringBuffer} classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of {@code char} values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A {@code char} value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * {@code int} value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of {@code int} are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate {@code char} values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a {@code char} value cannot support 95 * supplementary characters. They treat {@code char} values from the 96 * surrogate ranges as undefined characters. For example, 97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an {@code int} value support all 102 * Unicode characters, including supplementary characters. For 103 * example, {@code Character.isLetter(0x2F81A)} returns 104 * {@code true} because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * {@code char} values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * {@code digit} method, the {@code forDigit} method, and the 129 * {@code toString} method of class {@code Integer}. 130 * 131 * @see Character#digit(char, int) 132 * @see Character#forDigit(int, int) 133 * @see Integer#toString(int, int) 134 * @see Integer#valueOf(String) 135 */ 136 public static final int MIN_RADIX = 2; 137 138 /** 139 * The maximum radix available for conversion to and from strings. 140 * The constant value of this field is the largest value permitted 141 * for the radix argument in radix-conversion methods such as the 142 * {@code digit} method, the {@code forDigit} method, and the 143 * {@code toString} method of class {@code Integer}. 144 * 145 * @see Character#digit(char, int) 146 * @see Character#forDigit(int, int) 147 * @see Integer#toString(int, int) 148 * @see Integer#valueOf(String) 149 */ 150 public static final int MAX_RADIX = 36; 151 152 /** 153 * The constant value of this field is the smallest value of type 154 * {@code char}, {@code '\u005Cu0000'}. 155 * 156 * @since 1.0.2 157 */ 158 public static final char MIN_VALUE = '\u0000'; 159 160 /** 161 * The constant value of this field is the largest value of type 162 * {@code char}, {@code '\u005CuFFFF'}. 163 * 164 * @since 1.0.2 165 */ 166 public static final char MAX_VALUE = '\uFFFF'; 167 168 /** 169 * The {@code Class} instance representing the primitive type 170 * {@code char}. 171 * 172 * @since 1.1 173 */ 174 @SuppressWarnings("unchecked") 175 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 176 177 /* 178 * Normative general types 179 */ 180 181 /* 182 * General character types 183 */ 184 185 /** 186 * General category "Cn" in the Unicode specification. 187 * @since 1.1 188 */ 189 public static final byte UNASSIGNED = 0; 190 191 /** 192 * General category "Lu" in the Unicode specification. 193 * @since 1.1 194 */ 195 public static final byte UPPERCASE_LETTER = 1; 196 197 /** 198 * General category "Ll" in the Unicode specification. 199 * @since 1.1 200 */ 201 public static final byte LOWERCASE_LETTER = 2; 202 203 /** 204 * General category "Lt" in the Unicode specification. 205 * @since 1.1 206 */ 207 public static final byte TITLECASE_LETTER = 3; 208 209 /** 210 * General category "Lm" in the Unicode specification. 211 * @since 1.1 212 */ 213 public static final byte MODIFIER_LETTER = 4; 214 215 /** 216 * General category "Lo" in the Unicode specification. 217 * @since 1.1 218 */ 219 public static final byte OTHER_LETTER = 5; 220 221 /** 222 * General category "Mn" in the Unicode specification. 223 * @since 1.1 224 */ 225 public static final byte NON_SPACING_MARK = 6; 226 227 /** 228 * General category "Me" in the Unicode specification. 229 * @since 1.1 230 */ 231 public static final byte ENCLOSING_MARK = 7; 232 233 /** 234 * General category "Mc" in the Unicode specification. 235 * @since 1.1 236 */ 237 public static final byte COMBINING_SPACING_MARK = 8; 238 239 /** 240 * General category "Nd" in the Unicode specification. 241 * @since 1.1 242 */ 243 public static final byte DECIMAL_DIGIT_NUMBER = 9; 244 245 /** 246 * General category "Nl" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte LETTER_NUMBER = 10; 250 251 /** 252 * General category "No" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte OTHER_NUMBER = 11; 256 257 /** 258 * General category "Zs" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte SPACE_SEPARATOR = 12; 262 263 /** 264 * General category "Zl" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte LINE_SEPARATOR = 13; 268 269 /** 270 * General category "Zp" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte PARAGRAPH_SEPARATOR = 14; 274 275 /** 276 * General category "Cc" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte CONTROL = 15; 280 281 /** 282 * General category "Cf" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte FORMAT = 16; 286 287 /** 288 * General category "Co" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte PRIVATE_USE = 18; 292 293 /** 294 * General category "Cs" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte SURROGATE = 19; 298 299 /** 300 * General category "Pd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DASH_PUNCTUATION = 20; 304 305 /** 306 * General category "Ps" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte START_PUNCTUATION = 21; 310 311 /** 312 * General category "Pe" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte END_PUNCTUATION = 22; 316 317 /** 318 * General category "Pc" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte CONNECTOR_PUNCTUATION = 23; 322 323 /** 324 * General category "Po" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte OTHER_PUNCTUATION = 24; 328 329 /** 330 * General category "Sm" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte MATH_SYMBOL = 25; 334 335 /** 336 * General category "Sc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CURRENCY_SYMBOL = 26; 340 341 /** 342 * General category "Sk" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte MODIFIER_SYMBOL = 27; 346 347 /** 348 * General category "So" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte OTHER_SYMBOL = 28; 352 353 /** 354 * General category "Pi" in the Unicode specification. 355 * @since 1.4 356 */ 357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 358 359 /** 360 * General category "Pf" in the Unicode specification. 361 * @since 1.4 362 */ 363 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 364 365 /** 366 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 367 */ 368 static final int ERROR = 0xFFFFFFFF; 369 370 371 /** 372 * Undefined bidirectional character type. Undefined {@code char} 373 * values have undefined directionality in the Unicode specification. 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_UNDEFINED = -1; 377 378 /** 379 * Strong bidirectional character type "L" in the Unicode specification. 380 * @since 1.4 381 */ 382 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 383 384 /** 385 * Strong bidirectional character type "R" in the Unicode specification. 386 * @since 1.4 387 */ 388 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 389 390 /** 391 * Strong bidirectional character type "AL" in the Unicode specification. 392 * @since 1.4 393 */ 394 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 395 396 /** 397 * Weak bidirectional character type "EN" in the Unicode specification. 398 * @since 1.4 399 */ 400 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 401 402 /** 403 * Weak bidirectional character type "ES" in the Unicode specification. 404 * @since 1.4 405 */ 406 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 407 408 /** 409 * Weak bidirectional character type "ET" in the Unicode specification. 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 413 414 /** 415 * Weak bidirectional character type "AN" in the Unicode specification. 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 419 420 /** 421 * Weak bidirectional character type "CS" in the Unicode specification. 422 * @since 1.4 423 */ 424 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 425 426 /** 427 * Weak bidirectional character type "NSM" in the Unicode specification. 428 * @since 1.4 429 */ 430 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 431 432 /** 433 * Weak bidirectional character type "BN" in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 437 438 /** 439 * Neutral bidirectional character type "B" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 443 444 /** 445 * Neutral bidirectional character type "S" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 449 450 /** 451 * Neutral bidirectional character type "WS" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_WHITESPACE = 12; 455 456 /** 457 * Neutral bidirectional character type "ON" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 461 462 /** 463 * Strong bidirectional character type "LRE" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 467 468 /** 469 * Strong bidirectional character type "LRO" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 473 474 /** 475 * Strong bidirectional character type "RLE" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 479 480 /** 481 * Strong bidirectional character type "RLO" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 485 486 /** 487 * Weak bidirectional character type "PDF" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 491 492 /** 493 * The minimum value of a 494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 495 * Unicode high-surrogate code unit</a> 496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 497 * A high-surrogate is also known as a <i>leading-surrogate</i>. 498 * 499 * @since 1.5 500 */ 501 public static final char MIN_HIGH_SURROGATE = '\uD800'; 502 503 /** 504 * The maximum value of a 505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 506 * Unicode high-surrogate code unit</a> 507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 508 * A high-surrogate is also known as a <i>leading-surrogate</i>. 509 * 510 * @since 1.5 511 */ 512 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 513 514 /** 515 * The minimum value of a 516 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 517 * Unicode low-surrogate code unit</a> 518 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 519 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 520 * 521 * @since 1.5 522 */ 523 public static final char MIN_LOW_SURROGATE = '\uDC00'; 524 525 /** 526 * The maximum value of a 527 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 528 * Unicode low-surrogate code unit</a> 529 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 530 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 531 * 532 * @since 1.5 533 */ 534 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 535 536 /** 537 * The minimum value of a Unicode surrogate code unit in the 538 * UTF-16 encoding, constant {@code '\u005CuD800'}. 539 * 540 * @since 1.5 541 */ 542 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 543 544 /** 545 * The maximum value of a Unicode surrogate code unit in the 546 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 547 * 548 * @since 1.5 549 */ 550 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 551 552 /** 553 * The minimum value of a 554 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 555 * Unicode supplementary code point</a>, constant {@code U+10000}. 556 * 557 * @since 1.5 558 */ 559 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 560 561 /** 562 * The minimum value of a 563 * <a href="http://www.unicode.org/glossary/#code_point"> 564 * Unicode code point</a>, constant {@code U+0000}. 565 * 566 * @since 1.5 567 */ 568 public static final int MIN_CODE_POINT = 0x000000; 569 570 /** 571 * The maximum value of a 572 * <a href="http://www.unicode.org/glossary/#code_point"> 573 * Unicode code point</a>, constant {@code U+10FFFF}. 574 * 575 * @since 1.5 576 */ 577 public static final int MAX_CODE_POINT = 0X10FFFF; 578 579 580 /** 581 * Instances of this class represent particular subsets of the Unicode 582 * character set. The only family of subsets defined in the 583 * {@code Character} class is {@link Character.UnicodeBlock}. 584 * Other portions of the Java API may define other subsets for their 585 * own purposes. 586 * 587 * @since 1.2 588 */ 589 public static class Subset { 590 591 private String name; 592 593 /** 594 * Constructs a new {@code Subset} instance. 595 * 596 * @param name The name of this subset 597 * @exception NullPointerException if name is {@code null} 598 */ 599 protected Subset(String name) { 600 if (name == null) { 601 throw new NullPointerException("name"); 602 } 603 this.name = name; 604 } 605 606 /** 607 * Compares two {@code Subset} objects for equality. 608 * This method returns {@code true} if and only if 609 * {@code this} and the argument refer to the same 610 * object; since this method is {@code final}, this 611 * guarantee holds for all subclasses. 612 */ 613 public final boolean equals(Object obj) { 614 return (this == obj); 615 } 616 617 /** 618 * Returns the standard hash code as defined by the 619 * {@link Object#hashCode} method. This method 620 * is {@code final} in order to ensure that the 621 * {@code equals} and {@code hashCode} methods will 622 * be consistent in all subclasses. 623 */ 624 public final int hashCode() { 625 return super.hashCode(); 626 } 627 628 /** 629 * Returns the name of this subset. 630 */ 631 public final String toString() { 632 return name; 633 } 634 } 635 636 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 637 // for the latest specification of Unicode Blocks. 638 639 /** 640 * A family of character subsets representing the character blocks in the 641 * Unicode specification. Character blocks generally define characters 642 * used for a specific script or purpose. A character is contained by 643 * at most one Unicode block. 644 * 645 * @since 1.2 646 */ 647 public static final class UnicodeBlock extends Subset { 648 649 private static Map<String, UnicodeBlock> map = new HashMap<>(256); 650 651 /** 652 * Creates a UnicodeBlock with the given identifier name. 653 * This name must be the same as the block identifier. 654 */ 655 private UnicodeBlock(String idName) { 656 super(idName); 657 map.put(idName, this); 658 } 659 660 /** 661 * Creates a UnicodeBlock with the given identifier name and 662 * alias name. 663 */ 664 private UnicodeBlock(String idName, String alias) { 665 this(idName); 666 map.put(alias, this); 667 } 668 669 /** 670 * Creates a UnicodeBlock with the given identifier name and 671 * alias names. 672 */ 673 private UnicodeBlock(String idName, String... aliases) { 674 this(idName); 675 for (String alias : aliases) 676 map.put(alias, this); 677 } 678 679 /** 680 * Constant for the "Basic Latin" Unicode character block. 681 * @since 1.2 682 */ 683 public static final UnicodeBlock BASIC_LATIN = 684 new UnicodeBlock("BASIC_LATIN", 685 "BASIC LATIN", 686 "BASICLATIN"); 687 688 /** 689 * Constant for the "Latin-1 Supplement" Unicode character block. 690 * @since 1.2 691 */ 692 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 693 new UnicodeBlock("LATIN_1_SUPPLEMENT", 694 "LATIN-1 SUPPLEMENT", 695 "LATIN-1SUPPLEMENT"); 696 697 /** 698 * Constant for the "Latin Extended-A" Unicode character block. 699 * @since 1.2 700 */ 701 public static final UnicodeBlock LATIN_EXTENDED_A = 702 new UnicodeBlock("LATIN_EXTENDED_A", 703 "LATIN EXTENDED-A", 704 "LATINEXTENDED-A"); 705 706 /** 707 * Constant for the "Latin Extended-B" Unicode character block. 708 * @since 1.2 709 */ 710 public static final UnicodeBlock LATIN_EXTENDED_B = 711 new UnicodeBlock("LATIN_EXTENDED_B", 712 "LATIN EXTENDED-B", 713 "LATINEXTENDED-B"); 714 715 /** 716 * Constant for the "IPA Extensions" Unicode character block. 717 * @since 1.2 718 */ 719 public static final UnicodeBlock IPA_EXTENSIONS = 720 new UnicodeBlock("IPA_EXTENSIONS", 721 "IPA EXTENSIONS", 722 "IPAEXTENSIONS"); 723 724 /** 725 * Constant for the "Spacing Modifier Letters" Unicode character block. 726 * @since 1.2 727 */ 728 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 729 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 730 "SPACING MODIFIER LETTERS", 731 "SPACINGMODIFIERLETTERS"); 732 733 /** 734 * Constant for the "Combining Diacritical Marks" Unicode character block. 735 * @since 1.2 736 */ 737 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 738 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 739 "COMBINING DIACRITICAL MARKS", 740 "COMBININGDIACRITICALMARKS"); 741 742 /** 743 * Constant for the "Greek and Coptic" Unicode character block. 744 * <p> 745 * This block was previously known as the "Greek" block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock GREEK = 750 new UnicodeBlock("GREEK", 751 "GREEK AND COPTIC", 752 "GREEKANDCOPTIC"); 753 754 /** 755 * Constant for the "Cyrillic" Unicode character block. 756 * @since 1.2 757 */ 758 public static final UnicodeBlock CYRILLIC = 759 new UnicodeBlock("CYRILLIC"); 760 761 /** 762 * Constant for the "Armenian" Unicode character block. 763 * @since 1.2 764 */ 765 public static final UnicodeBlock ARMENIAN = 766 new UnicodeBlock("ARMENIAN"); 767 768 /** 769 * Constant for the "Hebrew" Unicode character block. 770 * @since 1.2 771 */ 772 public static final UnicodeBlock HEBREW = 773 new UnicodeBlock("HEBREW"); 774 775 /** 776 * Constant for the "Arabic" Unicode character block. 777 * @since 1.2 778 */ 779 public static final UnicodeBlock ARABIC = 780 new UnicodeBlock("ARABIC"); 781 782 /** 783 * Constant for the "Devanagari" Unicode character block. 784 * @since 1.2 785 */ 786 public static final UnicodeBlock DEVANAGARI = 787 new UnicodeBlock("DEVANAGARI"); 788 789 /** 790 * Constant for the "Bengali" Unicode character block. 791 * @since 1.2 792 */ 793 public static final UnicodeBlock BENGALI = 794 new UnicodeBlock("BENGALI"); 795 796 /** 797 * Constant for the "Gurmukhi" Unicode character block. 798 * @since 1.2 799 */ 800 public static final UnicodeBlock GURMUKHI = 801 new UnicodeBlock("GURMUKHI"); 802 803 /** 804 * Constant for the "Gujarati" Unicode character block. 805 * @since 1.2 806 */ 807 public static final UnicodeBlock GUJARATI = 808 new UnicodeBlock("GUJARATI"); 809 810 /** 811 * Constant for the "Oriya" Unicode character block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock ORIYA = 815 new UnicodeBlock("ORIYA"); 816 817 /** 818 * Constant for the "Tamil" Unicode character block. 819 * @since 1.2 820 */ 821 public static final UnicodeBlock TAMIL = 822 new UnicodeBlock("TAMIL"); 823 824 /** 825 * Constant for the "Telugu" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock TELUGU = 829 new UnicodeBlock("TELUGU"); 830 831 /** 832 * Constant for the "Kannada" Unicode character block. 833 * @since 1.2 834 */ 835 public static final UnicodeBlock KANNADA = 836 new UnicodeBlock("KANNADA"); 837 838 /** 839 * Constant for the "Malayalam" Unicode character block. 840 * @since 1.2 841 */ 842 public static final UnicodeBlock MALAYALAM = 843 new UnicodeBlock("MALAYALAM"); 844 845 /** 846 * Constant for the "Thai" Unicode character block. 847 * @since 1.2 848 */ 849 public static final UnicodeBlock THAI = 850 new UnicodeBlock("THAI"); 851 852 /** 853 * Constant for the "Lao" Unicode character block. 854 * @since 1.2 855 */ 856 public static final UnicodeBlock LAO = 857 new UnicodeBlock("LAO"); 858 859 /** 860 * Constant for the "Tibetan" Unicode character block. 861 * @since 1.2 862 */ 863 public static final UnicodeBlock TIBETAN = 864 new UnicodeBlock("TIBETAN"); 865 866 /** 867 * Constant for the "Georgian" Unicode character block. 868 * @since 1.2 869 */ 870 public static final UnicodeBlock GEORGIAN = 871 new UnicodeBlock("GEORGIAN"); 872 873 /** 874 * Constant for the "Hangul Jamo" Unicode character block. 875 * @since 1.2 876 */ 877 public static final UnicodeBlock HANGUL_JAMO = 878 new UnicodeBlock("HANGUL_JAMO", 879 "HANGUL JAMO", 880 "HANGULJAMO"); 881 882 /** 883 * Constant for the "Latin Extended Additional" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 887 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 888 "LATIN EXTENDED ADDITIONAL", 889 "LATINEXTENDEDADDITIONAL"); 890 891 /** 892 * Constant for the "Greek Extended" Unicode character block. 893 * @since 1.2 894 */ 895 public static final UnicodeBlock GREEK_EXTENDED = 896 new UnicodeBlock("GREEK_EXTENDED", 897 "GREEK EXTENDED", 898 "GREEKEXTENDED"); 899 900 /** 901 * Constant for the "General Punctuation" Unicode character block. 902 * @since 1.2 903 */ 904 public static final UnicodeBlock GENERAL_PUNCTUATION = 905 new UnicodeBlock("GENERAL_PUNCTUATION", 906 "GENERAL PUNCTUATION", 907 "GENERALPUNCTUATION"); 908 909 /** 910 * Constant for the "Superscripts and Subscripts" Unicode character 911 * block. 912 * @since 1.2 913 */ 914 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 915 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 916 "SUPERSCRIPTS AND SUBSCRIPTS", 917 "SUPERSCRIPTSANDSUBSCRIPTS"); 918 919 /** 920 * Constant for the "Currency Symbols" Unicode character block. 921 * @since 1.2 922 */ 923 public static final UnicodeBlock CURRENCY_SYMBOLS = 924 new UnicodeBlock("CURRENCY_SYMBOLS", 925 "CURRENCY SYMBOLS", 926 "CURRENCYSYMBOLS"); 927 928 /** 929 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 930 * character block. 931 * <p> 932 * This block was previously known as "Combining Marks for Symbols". 933 * @since 1.2 934 */ 935 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 936 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 937 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 938 "COMBININGDIACRITICALMARKSFORSYMBOLS", 939 "COMBINING MARKS FOR SYMBOLS", 940 "COMBININGMARKSFORSYMBOLS"); 941 942 /** 943 * Constant for the "Letterlike Symbols" Unicode character block. 944 * @since 1.2 945 */ 946 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 947 new UnicodeBlock("LETTERLIKE_SYMBOLS", 948 "LETTERLIKE SYMBOLS", 949 "LETTERLIKESYMBOLS"); 950 951 /** 952 * Constant for the "Number Forms" Unicode character block. 953 * @since 1.2 954 */ 955 public static final UnicodeBlock NUMBER_FORMS = 956 new UnicodeBlock("NUMBER_FORMS", 957 "NUMBER FORMS", 958 "NUMBERFORMS"); 959 960 /** 961 * Constant for the "Arrows" Unicode character block. 962 * @since 1.2 963 */ 964 public static final UnicodeBlock ARROWS = 965 new UnicodeBlock("ARROWS"); 966 967 /** 968 * Constant for the "Mathematical Operators" Unicode character block. 969 * @since 1.2 970 */ 971 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 972 new UnicodeBlock("MATHEMATICAL_OPERATORS", 973 "MATHEMATICAL OPERATORS", 974 "MATHEMATICALOPERATORS"); 975 976 /** 977 * Constant for the "Miscellaneous Technical" Unicode character block. 978 * @since 1.2 979 */ 980 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 981 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 982 "MISCELLANEOUS TECHNICAL", 983 "MISCELLANEOUSTECHNICAL"); 984 985 /** 986 * Constant for the "Control Pictures" Unicode character block. 987 * @since 1.2 988 */ 989 public static final UnicodeBlock CONTROL_PICTURES = 990 new UnicodeBlock("CONTROL_PICTURES", 991 "CONTROL PICTURES", 992 "CONTROLPICTURES"); 993 994 /** 995 * Constant for the "Optical Character Recognition" Unicode character block. 996 * @since 1.2 997 */ 998 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 999 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1000 "OPTICAL CHARACTER RECOGNITION", 1001 "OPTICALCHARACTERRECOGNITION"); 1002 1003 /** 1004 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1005 * @since 1.2 1006 */ 1007 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1008 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1009 "ENCLOSED ALPHANUMERICS", 1010 "ENCLOSEDALPHANUMERICS"); 1011 1012 /** 1013 * Constant for the "Box Drawing" Unicode character block. 1014 * @since 1.2 1015 */ 1016 public static final UnicodeBlock BOX_DRAWING = 1017 new UnicodeBlock("BOX_DRAWING", 1018 "BOX DRAWING", 1019 "BOXDRAWING"); 1020 1021 /** 1022 * Constant for the "Block Elements" Unicode character block. 1023 * @since 1.2 1024 */ 1025 public static final UnicodeBlock BLOCK_ELEMENTS = 1026 new UnicodeBlock("BLOCK_ELEMENTS", 1027 "BLOCK ELEMENTS", 1028 "BLOCKELEMENTS"); 1029 1030 /** 1031 * Constant for the "Geometric Shapes" Unicode character block. 1032 * @since 1.2 1033 */ 1034 public static final UnicodeBlock GEOMETRIC_SHAPES = 1035 new UnicodeBlock("GEOMETRIC_SHAPES", 1036 "GEOMETRIC SHAPES", 1037 "GEOMETRICSHAPES"); 1038 1039 /** 1040 * Constant for the "Miscellaneous Symbols" Unicode character block. 1041 * @since 1.2 1042 */ 1043 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1044 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1045 "MISCELLANEOUS SYMBOLS", 1046 "MISCELLANEOUSSYMBOLS"); 1047 1048 /** 1049 * Constant for the "Dingbats" Unicode character block. 1050 * @since 1.2 1051 */ 1052 public static final UnicodeBlock DINGBATS = 1053 new UnicodeBlock("DINGBATS"); 1054 1055 /** 1056 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1057 * @since 1.2 1058 */ 1059 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1060 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1061 "CJK SYMBOLS AND PUNCTUATION", 1062 "CJKSYMBOLSANDPUNCTUATION"); 1063 1064 /** 1065 * Constant for the "Hiragana" Unicode character block. 1066 * @since 1.2 1067 */ 1068 public static final UnicodeBlock HIRAGANA = 1069 new UnicodeBlock("HIRAGANA"); 1070 1071 /** 1072 * Constant for the "Katakana" Unicode character block. 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock KATAKANA = 1076 new UnicodeBlock("KATAKANA"); 1077 1078 /** 1079 * Constant for the "Bopomofo" Unicode character block. 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock BOPOMOFO = 1083 new UnicodeBlock("BOPOMOFO"); 1084 1085 /** 1086 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1090 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1091 "HANGUL COMPATIBILITY JAMO", 1092 "HANGULCOMPATIBILITYJAMO"); 1093 1094 /** 1095 * Constant for the "Kanbun" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock KANBUN = 1099 new UnicodeBlock("KANBUN"); 1100 1101 /** 1102 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1106 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1107 "ENCLOSED CJK LETTERS AND MONTHS", 1108 "ENCLOSEDCJKLETTERSANDMONTHS"); 1109 1110 /** 1111 * Constant for the "CJK Compatibility" Unicode character block. 1112 * @since 1.2 1113 */ 1114 public static final UnicodeBlock CJK_COMPATIBILITY = 1115 new UnicodeBlock("CJK_COMPATIBILITY", 1116 "CJK COMPATIBILITY", 1117 "CJKCOMPATIBILITY"); 1118 1119 /** 1120 * Constant for the "CJK Unified Ideographs" Unicode character block. 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1124 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1125 "CJK UNIFIED IDEOGRAPHS", 1126 "CJKUNIFIEDIDEOGRAPHS"); 1127 1128 /** 1129 * Constant for the "Hangul Syllables" Unicode character block. 1130 * @since 1.2 1131 */ 1132 public static final UnicodeBlock HANGUL_SYLLABLES = 1133 new UnicodeBlock("HANGUL_SYLLABLES", 1134 "HANGUL SYLLABLES", 1135 "HANGULSYLLABLES"); 1136 1137 /** 1138 * Constant for the "Private Use Area" Unicode character block. 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock PRIVATE_USE_AREA = 1142 new UnicodeBlock("PRIVATE_USE_AREA", 1143 "PRIVATE USE AREA", 1144 "PRIVATEUSEAREA"); 1145 1146 /** 1147 * Constant for the "CJK Compatibility Ideographs" Unicode character 1148 * block. 1149 * @since 1.2 1150 */ 1151 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1152 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1153 "CJK COMPATIBILITY IDEOGRAPHS", 1154 "CJKCOMPATIBILITYIDEOGRAPHS"); 1155 1156 /** 1157 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1158 * @since 1.2 1159 */ 1160 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1161 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1162 "ALPHABETIC PRESENTATION FORMS", 1163 "ALPHABETICPRESENTATIONFORMS"); 1164 1165 /** 1166 * Constant for the "Arabic Presentation Forms-A" Unicode character 1167 * block. 1168 * @since 1.2 1169 */ 1170 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1171 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1172 "ARABIC PRESENTATION FORMS-A", 1173 "ARABICPRESENTATIONFORMS-A"); 1174 1175 /** 1176 * Constant for the "Combining Half Marks" Unicode character block. 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock COMBINING_HALF_MARKS = 1180 new UnicodeBlock("COMBINING_HALF_MARKS", 1181 "COMBINING HALF MARKS", 1182 "COMBININGHALFMARKS"); 1183 1184 /** 1185 * Constant for the "CJK Compatibility Forms" Unicode character block. 1186 * @since 1.2 1187 */ 1188 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1189 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1190 "CJK COMPATIBILITY FORMS", 1191 "CJKCOMPATIBILITYFORMS"); 1192 1193 /** 1194 * Constant for the "Small Form Variants" Unicode character block. 1195 * @since 1.2 1196 */ 1197 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1198 new UnicodeBlock("SMALL_FORM_VARIANTS", 1199 "SMALL FORM VARIANTS", 1200 "SMALLFORMVARIANTS"); 1201 1202 /** 1203 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1204 * @since 1.2 1205 */ 1206 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1207 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1208 "ARABIC PRESENTATION FORMS-B", 1209 "ARABICPRESENTATIONFORMS-B"); 1210 1211 /** 1212 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1213 * block. 1214 * @since 1.2 1215 */ 1216 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1217 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1218 "HALFWIDTH AND FULLWIDTH FORMS", 1219 "HALFWIDTHANDFULLWIDTHFORMS"); 1220 1221 /** 1222 * Constant for the "Specials" Unicode character block. 1223 * @since 1.2 1224 */ 1225 public static final UnicodeBlock SPECIALS = 1226 new UnicodeBlock("SPECIALS"); 1227 1228 /** 1229 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1230 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1231 * {@link #LOW_SURROGATES}. These new constants match 1232 * the block definitions of the Unicode Standard. 1233 * The {@link #of(char)} and {@link #of(int)} methods 1234 * return the new constants, not SURROGATES_AREA. 1235 */ 1236 @Deprecated 1237 public static final UnicodeBlock SURROGATES_AREA = 1238 new UnicodeBlock("SURROGATES_AREA"); 1239 1240 /** 1241 * Constant for the "Syriac" Unicode character block. 1242 * @since 1.4 1243 */ 1244 public static final UnicodeBlock SYRIAC = 1245 new UnicodeBlock("SYRIAC"); 1246 1247 /** 1248 * Constant for the "Thaana" Unicode character block. 1249 * @since 1.4 1250 */ 1251 public static final UnicodeBlock THAANA = 1252 new UnicodeBlock("THAANA"); 1253 1254 /** 1255 * Constant for the "Sinhala" Unicode character block. 1256 * @since 1.4 1257 */ 1258 public static final UnicodeBlock SINHALA = 1259 new UnicodeBlock("SINHALA"); 1260 1261 /** 1262 * Constant for the "Myanmar" Unicode character block. 1263 * @since 1.4 1264 */ 1265 public static final UnicodeBlock MYANMAR = 1266 new UnicodeBlock("MYANMAR"); 1267 1268 /** 1269 * Constant for the "Ethiopic" Unicode character block. 1270 * @since 1.4 1271 */ 1272 public static final UnicodeBlock ETHIOPIC = 1273 new UnicodeBlock("ETHIOPIC"); 1274 1275 /** 1276 * Constant for the "Cherokee" Unicode character block. 1277 * @since 1.4 1278 */ 1279 public static final UnicodeBlock CHEROKEE = 1280 new UnicodeBlock("CHEROKEE"); 1281 1282 /** 1283 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1284 * @since 1.4 1285 */ 1286 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1287 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1288 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1289 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1290 1291 /** 1292 * Constant for the "Ogham" Unicode character block. 1293 * @since 1.4 1294 */ 1295 public static final UnicodeBlock OGHAM = 1296 new UnicodeBlock("OGHAM"); 1297 1298 /** 1299 * Constant for the "Runic" Unicode character block. 1300 * @since 1.4 1301 */ 1302 public static final UnicodeBlock RUNIC = 1303 new UnicodeBlock("RUNIC"); 1304 1305 /** 1306 * Constant for the "Khmer" Unicode character block. 1307 * @since 1.4 1308 */ 1309 public static final UnicodeBlock KHMER = 1310 new UnicodeBlock("KHMER"); 1311 1312 /** 1313 * Constant for the "Mongolian" Unicode character block. 1314 * @since 1.4 1315 */ 1316 public static final UnicodeBlock MONGOLIAN = 1317 new UnicodeBlock("MONGOLIAN"); 1318 1319 /** 1320 * Constant for the "Braille Patterns" Unicode character block. 1321 * @since 1.4 1322 */ 1323 public static final UnicodeBlock BRAILLE_PATTERNS = 1324 new UnicodeBlock("BRAILLE_PATTERNS", 1325 "BRAILLE PATTERNS", 1326 "BRAILLEPATTERNS"); 1327 1328 /** 1329 * Constant for the "CJK Radicals Supplement" Unicode character block. 1330 * @since 1.4 1331 */ 1332 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1333 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1334 "CJK RADICALS SUPPLEMENT", 1335 "CJKRADICALSSUPPLEMENT"); 1336 1337 /** 1338 * Constant for the "Kangxi Radicals" Unicode character block. 1339 * @since 1.4 1340 */ 1341 public static final UnicodeBlock KANGXI_RADICALS = 1342 new UnicodeBlock("KANGXI_RADICALS", 1343 "KANGXI RADICALS", 1344 "KANGXIRADICALS"); 1345 1346 /** 1347 * Constant for the "Ideographic Description Characters" Unicode character block. 1348 * @since 1.4 1349 */ 1350 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1351 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1352 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1353 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1354 1355 /** 1356 * Constant for the "Bopomofo Extended" Unicode character block. 1357 * @since 1.4 1358 */ 1359 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1360 new UnicodeBlock("BOPOMOFO_EXTENDED", 1361 "BOPOMOFO EXTENDED", 1362 "BOPOMOFOEXTENDED"); 1363 1364 /** 1365 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1366 * @since 1.4 1367 */ 1368 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1369 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1370 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1371 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1372 1373 /** 1374 * Constant for the "Yi Syllables" Unicode character block. 1375 * @since 1.4 1376 */ 1377 public static final UnicodeBlock YI_SYLLABLES = 1378 new UnicodeBlock("YI_SYLLABLES", 1379 "YI SYLLABLES", 1380 "YISYLLABLES"); 1381 1382 /** 1383 * Constant for the "Yi Radicals" Unicode character block. 1384 * @since 1.4 1385 */ 1386 public static final UnicodeBlock YI_RADICALS = 1387 new UnicodeBlock("YI_RADICALS", 1388 "YI RADICALS", 1389 "YIRADICALS"); 1390 1391 /** 1392 * Constant for the "Cyrillic Supplementary" Unicode character block. 1393 * @since 1.5 1394 */ 1395 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1396 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1397 "CYRILLIC SUPPLEMENTARY", 1398 "CYRILLICSUPPLEMENTARY", 1399 "CYRILLIC SUPPLEMENT", 1400 "CYRILLICSUPPLEMENT"); 1401 1402 /** 1403 * Constant for the "Tagalog" Unicode character block. 1404 * @since 1.5 1405 */ 1406 public static final UnicodeBlock TAGALOG = 1407 new UnicodeBlock("TAGALOG"); 1408 1409 /** 1410 * Constant for the "Hanunoo" Unicode character block. 1411 * @since 1.5 1412 */ 1413 public static final UnicodeBlock HANUNOO = 1414 new UnicodeBlock("HANUNOO"); 1415 1416 /** 1417 * Constant for the "Buhid" Unicode character block. 1418 * @since 1.5 1419 */ 1420 public static final UnicodeBlock BUHID = 1421 new UnicodeBlock("BUHID"); 1422 1423 /** 1424 * Constant for the "Tagbanwa" Unicode character block. 1425 * @since 1.5 1426 */ 1427 public static final UnicodeBlock TAGBANWA = 1428 new UnicodeBlock("TAGBANWA"); 1429 1430 /** 1431 * Constant for the "Limbu" Unicode character block. 1432 * @since 1.5 1433 */ 1434 public static final UnicodeBlock LIMBU = 1435 new UnicodeBlock("LIMBU"); 1436 1437 /** 1438 * Constant for the "Tai Le" Unicode character block. 1439 * @since 1.5 1440 */ 1441 public static final UnicodeBlock TAI_LE = 1442 new UnicodeBlock("TAI_LE", 1443 "TAI LE", 1444 "TAILE"); 1445 1446 /** 1447 * Constant for the "Khmer Symbols" Unicode character block. 1448 * @since 1.5 1449 */ 1450 public static final UnicodeBlock KHMER_SYMBOLS = 1451 new UnicodeBlock("KHMER_SYMBOLS", 1452 "KHMER SYMBOLS", 1453 "KHMERSYMBOLS"); 1454 1455 /** 1456 * Constant for the "Phonetic Extensions" Unicode character block. 1457 * @since 1.5 1458 */ 1459 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1460 new UnicodeBlock("PHONETIC_EXTENSIONS", 1461 "PHONETIC EXTENSIONS", 1462 "PHONETICEXTENSIONS"); 1463 1464 /** 1465 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1466 * @since 1.5 1467 */ 1468 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1469 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1470 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1471 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1472 1473 /** 1474 * Constant for the "Supplemental Arrows-A" Unicode character block. 1475 * @since 1.5 1476 */ 1477 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1478 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1479 "SUPPLEMENTAL ARROWS-A", 1480 "SUPPLEMENTALARROWS-A"); 1481 1482 /** 1483 * Constant for the "Supplemental Arrows-B" Unicode character block. 1484 * @since 1.5 1485 */ 1486 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1487 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1488 "SUPPLEMENTAL ARROWS-B", 1489 "SUPPLEMENTALARROWS-B"); 1490 1491 /** 1492 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1493 * character block. 1494 * @since 1.5 1495 */ 1496 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1497 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1498 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1499 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1500 1501 /** 1502 * Constant for the "Supplemental Mathematical Operators" Unicode 1503 * character block. 1504 * @since 1.5 1505 */ 1506 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1507 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1508 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1509 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1510 1511 /** 1512 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1513 * block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1517 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1518 "MISCELLANEOUS SYMBOLS AND ARROWS", 1519 "MISCELLANEOUSSYMBOLSANDARROWS"); 1520 1521 /** 1522 * Constant for the "Katakana Phonetic Extensions" Unicode character 1523 * block. 1524 * @since 1.5 1525 */ 1526 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1527 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1528 "KATAKANA PHONETIC EXTENSIONS", 1529 "KATAKANAPHONETICEXTENSIONS"); 1530 1531 /** 1532 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1533 * @since 1.5 1534 */ 1535 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1536 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1537 "YIJING HEXAGRAM SYMBOLS", 1538 "YIJINGHEXAGRAMSYMBOLS"); 1539 1540 /** 1541 * Constant for the "Variation Selectors" Unicode character block. 1542 * @since 1.5 1543 */ 1544 public static final UnicodeBlock VARIATION_SELECTORS = 1545 new UnicodeBlock("VARIATION_SELECTORS", 1546 "VARIATION SELECTORS", 1547 "VARIATIONSELECTORS"); 1548 1549 /** 1550 * Constant for the "Linear B Syllabary" Unicode character block. 1551 * @since 1.5 1552 */ 1553 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1554 new UnicodeBlock("LINEAR_B_SYLLABARY", 1555 "LINEAR B SYLLABARY", 1556 "LINEARBSYLLABARY"); 1557 1558 /** 1559 * Constant for the "Linear B Ideograms" Unicode character block. 1560 * @since 1.5 1561 */ 1562 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1563 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1564 "LINEAR B IDEOGRAMS", 1565 "LINEARBIDEOGRAMS"); 1566 1567 /** 1568 * Constant for the "Aegean Numbers" Unicode character block. 1569 * @since 1.5 1570 */ 1571 public static final UnicodeBlock AEGEAN_NUMBERS = 1572 new UnicodeBlock("AEGEAN_NUMBERS", 1573 "AEGEAN NUMBERS", 1574 "AEGEANNUMBERS"); 1575 1576 /** 1577 * Constant for the "Old Italic" Unicode character block. 1578 * @since 1.5 1579 */ 1580 public static final UnicodeBlock OLD_ITALIC = 1581 new UnicodeBlock("OLD_ITALIC", 1582 "OLD ITALIC", 1583 "OLDITALIC"); 1584 1585 /** 1586 * Constant for the "Gothic" Unicode character block. 1587 * @since 1.5 1588 */ 1589 public static final UnicodeBlock GOTHIC = 1590 new UnicodeBlock("GOTHIC"); 1591 1592 /** 1593 * Constant for the "Ugaritic" Unicode character block. 1594 * @since 1.5 1595 */ 1596 public static final UnicodeBlock UGARITIC = 1597 new UnicodeBlock("UGARITIC"); 1598 1599 /** 1600 * Constant for the "Deseret" Unicode character block. 1601 * @since 1.5 1602 */ 1603 public static final UnicodeBlock DESERET = 1604 new UnicodeBlock("DESERET"); 1605 1606 /** 1607 * Constant for the "Shavian" Unicode character block. 1608 * @since 1.5 1609 */ 1610 public static final UnicodeBlock SHAVIAN = 1611 new UnicodeBlock("SHAVIAN"); 1612 1613 /** 1614 * Constant for the "Osmanya" Unicode character block. 1615 * @since 1.5 1616 */ 1617 public static final UnicodeBlock OSMANYA = 1618 new UnicodeBlock("OSMANYA"); 1619 1620 /** 1621 * Constant for the "Cypriot Syllabary" Unicode character block. 1622 * @since 1.5 1623 */ 1624 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1625 new UnicodeBlock("CYPRIOT_SYLLABARY", 1626 "CYPRIOT SYLLABARY", 1627 "CYPRIOTSYLLABARY"); 1628 1629 /** 1630 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1631 * @since 1.5 1632 */ 1633 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1634 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1635 "BYZANTINE MUSICAL SYMBOLS", 1636 "BYZANTINEMUSICALSYMBOLS"); 1637 1638 /** 1639 * Constant for the "Musical Symbols" Unicode character block. 1640 * @since 1.5 1641 */ 1642 public static final UnicodeBlock MUSICAL_SYMBOLS = 1643 new UnicodeBlock("MUSICAL_SYMBOLS", 1644 "MUSICAL SYMBOLS", 1645 "MUSICALSYMBOLS"); 1646 1647 /** 1648 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1649 * @since 1.5 1650 */ 1651 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1652 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1653 "TAI XUAN JING SYMBOLS", 1654 "TAIXUANJINGSYMBOLS"); 1655 1656 /** 1657 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1658 * character block. 1659 * @since 1.5 1660 */ 1661 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1662 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1663 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1664 "MATHEMATICALALPHANUMERICSYMBOLS"); 1665 1666 /** 1667 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1668 * character block. 1669 * @since 1.5 1670 */ 1671 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1672 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1673 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1674 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1675 1676 /** 1677 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1678 * @since 1.5 1679 */ 1680 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1681 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1682 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1683 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1684 1685 /** 1686 * Constant for the "Tags" Unicode character block. 1687 * @since 1.5 1688 */ 1689 public static final UnicodeBlock TAGS = 1690 new UnicodeBlock("TAGS"); 1691 1692 /** 1693 * Constant for the "Variation Selectors Supplement" Unicode character 1694 * block. 1695 * @since 1.5 1696 */ 1697 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1698 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1699 "VARIATION SELECTORS SUPPLEMENT", 1700 "VARIATIONSELECTORSSUPPLEMENT"); 1701 1702 /** 1703 * Constant for the "Supplementary Private Use Area-A" Unicode character 1704 * block. 1705 * @since 1.5 1706 */ 1707 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1708 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1709 "SUPPLEMENTARY PRIVATE USE AREA-A", 1710 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1711 1712 /** 1713 * Constant for the "Supplementary Private Use Area-B" Unicode character 1714 * block. 1715 * @since 1.5 1716 */ 1717 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1718 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1719 "SUPPLEMENTARY PRIVATE USE AREA-B", 1720 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1721 1722 /** 1723 * Constant for the "High Surrogates" Unicode character block. 1724 * This block represents codepoint values in the high surrogate 1725 * range: U+D800 through U+DB7F 1726 * 1727 * @since 1.5 1728 */ 1729 public static final UnicodeBlock HIGH_SURROGATES = 1730 new UnicodeBlock("HIGH_SURROGATES", 1731 "HIGH SURROGATES", 1732 "HIGHSURROGATES"); 1733 1734 /** 1735 * Constant for the "High Private Use Surrogates" Unicode character 1736 * block. 1737 * This block represents codepoint values in the private use high 1738 * surrogate range: U+DB80 through U+DBFF 1739 * 1740 * @since 1.5 1741 */ 1742 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1743 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1744 "HIGH PRIVATE USE SURROGATES", 1745 "HIGHPRIVATEUSESURROGATES"); 1746 1747 /** 1748 * Constant for the "Low Surrogates" Unicode character block. 1749 * This block represents codepoint values in the low surrogate 1750 * range: U+DC00 through U+DFFF 1751 * 1752 * @since 1.5 1753 */ 1754 public static final UnicodeBlock LOW_SURROGATES = 1755 new UnicodeBlock("LOW_SURROGATES", 1756 "LOW SURROGATES", 1757 "LOWSURROGATES"); 1758 1759 /** 1760 * Constant for the "Arabic Supplement" Unicode character block. 1761 * @since 1.7 1762 */ 1763 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1764 new UnicodeBlock("ARABIC_SUPPLEMENT", 1765 "ARABIC SUPPLEMENT", 1766 "ARABICSUPPLEMENT"); 1767 1768 /** 1769 * Constant for the "NKo" Unicode character block. 1770 * @since 1.7 1771 */ 1772 public static final UnicodeBlock NKO = 1773 new UnicodeBlock("NKO"); 1774 1775 /** 1776 * Constant for the "Samaritan" Unicode character block. 1777 * @since 1.7 1778 */ 1779 public static final UnicodeBlock SAMARITAN = 1780 new UnicodeBlock("SAMARITAN"); 1781 1782 /** 1783 * Constant for the "Mandaic" Unicode character block. 1784 * @since 1.7 1785 */ 1786 public static final UnicodeBlock MANDAIC = 1787 new UnicodeBlock("MANDAIC"); 1788 1789 /** 1790 * Constant for the "Ethiopic Supplement" Unicode character block. 1791 * @since 1.7 1792 */ 1793 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1794 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1795 "ETHIOPIC SUPPLEMENT", 1796 "ETHIOPICSUPPLEMENT"); 1797 1798 /** 1799 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1800 * Unicode character block. 1801 * @since 1.7 1802 */ 1803 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1804 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1805 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1806 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1807 1808 /** 1809 * Constant for the "New Tai Lue" Unicode character block. 1810 * @since 1.7 1811 */ 1812 public static final UnicodeBlock NEW_TAI_LUE = 1813 new UnicodeBlock("NEW_TAI_LUE", 1814 "NEW TAI LUE", 1815 "NEWTAILUE"); 1816 1817 /** 1818 * Constant for the "Buginese" Unicode character block. 1819 * @since 1.7 1820 */ 1821 public static final UnicodeBlock BUGINESE = 1822 new UnicodeBlock("BUGINESE"); 1823 1824 /** 1825 * Constant for the "Tai Tham" Unicode character block. 1826 * @since 1.7 1827 */ 1828 public static final UnicodeBlock TAI_THAM = 1829 new UnicodeBlock("TAI_THAM", 1830 "TAI THAM", 1831 "TAITHAM"); 1832 1833 /** 1834 * Constant for the "Balinese" Unicode character block. 1835 * @since 1.7 1836 */ 1837 public static final UnicodeBlock BALINESE = 1838 new UnicodeBlock("BALINESE"); 1839 1840 /** 1841 * Constant for the "Sundanese" Unicode character block. 1842 * @since 1.7 1843 */ 1844 public static final UnicodeBlock SUNDANESE = 1845 new UnicodeBlock("SUNDANESE"); 1846 1847 /** 1848 * Constant for the "Batak" Unicode character block. 1849 * @since 1.7 1850 */ 1851 public static final UnicodeBlock BATAK = 1852 new UnicodeBlock("BATAK"); 1853 1854 /** 1855 * Constant for the "Lepcha" Unicode character block. 1856 * @since 1.7 1857 */ 1858 public static final UnicodeBlock LEPCHA = 1859 new UnicodeBlock("LEPCHA"); 1860 1861 /** 1862 * Constant for the "Ol Chiki" Unicode character block. 1863 * @since 1.7 1864 */ 1865 public static final UnicodeBlock OL_CHIKI = 1866 new UnicodeBlock("OL_CHIKI", 1867 "OL CHIKI", 1868 "OLCHIKI"); 1869 1870 /** 1871 * Constant for the "Vedic Extensions" Unicode character block. 1872 * @since 1.7 1873 */ 1874 public static final UnicodeBlock VEDIC_EXTENSIONS = 1875 new UnicodeBlock("VEDIC_EXTENSIONS", 1876 "VEDIC EXTENSIONS", 1877 "VEDICEXTENSIONS"); 1878 1879 /** 1880 * Constant for the "Phonetic Extensions Supplement" Unicode character 1881 * block. 1882 * @since 1.7 1883 */ 1884 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1885 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1886 "PHONETIC EXTENSIONS SUPPLEMENT", 1887 "PHONETICEXTENSIONSSUPPLEMENT"); 1888 1889 /** 1890 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1891 * character block. 1892 * @since 1.7 1893 */ 1894 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1895 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1896 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1897 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1898 1899 /** 1900 * Constant for the "Glagolitic" Unicode character block. 1901 * @since 1.7 1902 */ 1903 public static final UnicodeBlock GLAGOLITIC = 1904 new UnicodeBlock("GLAGOLITIC"); 1905 1906 /** 1907 * Constant for the "Latin Extended-C" Unicode character block. 1908 * @since 1.7 1909 */ 1910 public static final UnicodeBlock LATIN_EXTENDED_C = 1911 new UnicodeBlock("LATIN_EXTENDED_C", 1912 "LATIN EXTENDED-C", 1913 "LATINEXTENDED-C"); 1914 1915 /** 1916 * Constant for the "Coptic" Unicode character block. 1917 * @since 1.7 1918 */ 1919 public static final UnicodeBlock COPTIC = 1920 new UnicodeBlock("COPTIC"); 1921 1922 /** 1923 * Constant for the "Georgian Supplement" Unicode character block. 1924 * @since 1.7 1925 */ 1926 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1927 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1928 "GEORGIAN SUPPLEMENT", 1929 "GEORGIANSUPPLEMENT"); 1930 1931 /** 1932 * Constant for the "Tifinagh" Unicode character block. 1933 * @since 1.7 1934 */ 1935 public static final UnicodeBlock TIFINAGH = 1936 new UnicodeBlock("TIFINAGH"); 1937 1938 /** 1939 * Constant for the "Ethiopic Extended" Unicode character block. 1940 * @since 1.7 1941 */ 1942 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1943 new UnicodeBlock("ETHIOPIC_EXTENDED", 1944 "ETHIOPIC EXTENDED", 1945 "ETHIOPICEXTENDED"); 1946 1947 /** 1948 * Constant for the "Cyrillic Extended-A" Unicode character block. 1949 * @since 1.7 1950 */ 1951 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1952 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1953 "CYRILLIC EXTENDED-A", 1954 "CYRILLICEXTENDED-A"); 1955 1956 /** 1957 * Constant for the "Supplemental Punctuation" Unicode character block. 1958 * @since 1.7 1959 */ 1960 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1961 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1962 "SUPPLEMENTAL PUNCTUATION", 1963 "SUPPLEMENTALPUNCTUATION"); 1964 1965 /** 1966 * Constant for the "CJK Strokes" Unicode character block. 1967 * @since 1.7 1968 */ 1969 public static final UnicodeBlock CJK_STROKES = 1970 new UnicodeBlock("CJK_STROKES", 1971 "CJK STROKES", 1972 "CJKSTROKES"); 1973 1974 /** 1975 * Constant for the "Lisu" Unicode character block. 1976 * @since 1.7 1977 */ 1978 public static final UnicodeBlock LISU = 1979 new UnicodeBlock("LISU"); 1980 1981 /** 1982 * Constant for the "Vai" Unicode character block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock VAI = 1986 new UnicodeBlock("VAI"); 1987 1988 /** 1989 * Constant for the "Cyrillic Extended-B" Unicode character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1993 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1994 "CYRILLIC EXTENDED-B", 1995 "CYRILLICEXTENDED-B"); 1996 1997 /** 1998 * Constant for the "Bamum" Unicode character block. 1999 * @since 1.7 2000 */ 2001 public static final UnicodeBlock BAMUM = 2002 new UnicodeBlock("BAMUM"); 2003 2004 /** 2005 * Constant for the "Modifier Tone Letters" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2009 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2010 "MODIFIER TONE LETTERS", 2011 "MODIFIERTONELETTERS"); 2012 2013 /** 2014 * Constant for the "Latin Extended-D" Unicode character block. 2015 * @since 1.7 2016 */ 2017 public static final UnicodeBlock LATIN_EXTENDED_D = 2018 new UnicodeBlock("LATIN_EXTENDED_D", 2019 "LATIN EXTENDED-D", 2020 "LATINEXTENDED-D"); 2021 2022 /** 2023 * Constant for the "Syloti Nagri" Unicode character block. 2024 * @since 1.7 2025 */ 2026 public static final UnicodeBlock SYLOTI_NAGRI = 2027 new UnicodeBlock("SYLOTI_NAGRI", 2028 "SYLOTI NAGRI", 2029 "SYLOTINAGRI"); 2030 2031 /** 2032 * Constant for the "Common Indic Number Forms" Unicode character block. 2033 * @since 1.7 2034 */ 2035 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2036 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2037 "COMMON INDIC NUMBER FORMS", 2038 "COMMONINDICNUMBERFORMS"); 2039 2040 /** 2041 * Constant for the "Phags-pa" Unicode character block. 2042 * @since 1.7 2043 */ 2044 public static final UnicodeBlock PHAGS_PA = 2045 new UnicodeBlock("PHAGS_PA", 2046 "PHAGS-PA"); 2047 2048 /** 2049 * Constant for the "Saurashtra" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock SAURASHTRA = 2053 new UnicodeBlock("SAURASHTRA"); 2054 2055 /** 2056 * Constant for the "Devanagari Extended" Unicode character block. 2057 * @since 1.7 2058 */ 2059 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2060 new UnicodeBlock("DEVANAGARI_EXTENDED", 2061 "DEVANAGARI EXTENDED", 2062 "DEVANAGARIEXTENDED"); 2063 2064 /** 2065 * Constant for the "Kayah Li" Unicode character block. 2066 * @since 1.7 2067 */ 2068 public static final UnicodeBlock KAYAH_LI = 2069 new UnicodeBlock("KAYAH_LI", 2070 "KAYAH LI", 2071 "KAYAHLI"); 2072 2073 /** 2074 * Constant for the "Rejang" Unicode character block. 2075 * @since 1.7 2076 */ 2077 public static final UnicodeBlock REJANG = 2078 new UnicodeBlock("REJANG"); 2079 2080 /** 2081 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2082 * @since 1.7 2083 */ 2084 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2085 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2086 "HANGUL JAMO EXTENDED-A", 2087 "HANGULJAMOEXTENDED-A"); 2088 2089 /** 2090 * Constant for the "Javanese" Unicode character block. 2091 * @since 1.7 2092 */ 2093 public static final UnicodeBlock JAVANESE = 2094 new UnicodeBlock("JAVANESE"); 2095 2096 /** 2097 * Constant for the "Cham" Unicode character block. 2098 * @since 1.7 2099 */ 2100 public static final UnicodeBlock CHAM = 2101 new UnicodeBlock("CHAM"); 2102 2103 /** 2104 * Constant for the "Myanmar Extended-A" Unicode character block. 2105 * @since 1.7 2106 */ 2107 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2108 new UnicodeBlock("MYANMAR_EXTENDED_A", 2109 "MYANMAR EXTENDED-A", 2110 "MYANMAREXTENDED-A"); 2111 2112 /** 2113 * Constant for the "Tai Viet" Unicode character block. 2114 * @since 1.7 2115 */ 2116 public static final UnicodeBlock TAI_VIET = 2117 new UnicodeBlock("TAI_VIET", 2118 "TAI VIET", 2119 "TAIVIET"); 2120 2121 /** 2122 * Constant for the "Ethiopic Extended-A" Unicode character block. 2123 * @since 1.7 2124 */ 2125 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2126 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2127 "ETHIOPIC EXTENDED-A", 2128 "ETHIOPICEXTENDED-A"); 2129 2130 /** 2131 * Constant for the "Meetei Mayek" Unicode character block. 2132 * @since 1.7 2133 */ 2134 public static final UnicodeBlock MEETEI_MAYEK = 2135 new UnicodeBlock("MEETEI_MAYEK", 2136 "MEETEI MAYEK", 2137 "MEETEIMAYEK"); 2138 2139 /** 2140 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2141 * @since 1.7 2142 */ 2143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2144 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2145 "HANGUL JAMO EXTENDED-B", 2146 "HANGULJAMOEXTENDED-B"); 2147 2148 /** 2149 * Constant for the "Vertical Forms" Unicode character block. 2150 * @since 1.7 2151 */ 2152 public static final UnicodeBlock VERTICAL_FORMS = 2153 new UnicodeBlock("VERTICAL_FORMS", 2154 "VERTICAL FORMS", 2155 "VERTICALFORMS"); 2156 2157 /** 2158 * Constant for the "Ancient Greek Numbers" Unicode character block. 2159 * @since 1.7 2160 */ 2161 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2162 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2163 "ANCIENT GREEK NUMBERS", 2164 "ANCIENTGREEKNUMBERS"); 2165 2166 /** 2167 * Constant for the "Ancient Symbols" Unicode character block. 2168 * @since 1.7 2169 */ 2170 public static final UnicodeBlock ANCIENT_SYMBOLS = 2171 new UnicodeBlock("ANCIENT_SYMBOLS", 2172 "ANCIENT SYMBOLS", 2173 "ANCIENTSYMBOLS"); 2174 2175 /** 2176 * Constant for the "Phaistos Disc" Unicode character block. 2177 * @since 1.7 2178 */ 2179 public static final UnicodeBlock PHAISTOS_DISC = 2180 new UnicodeBlock("PHAISTOS_DISC", 2181 "PHAISTOS DISC", 2182 "PHAISTOSDISC"); 2183 2184 /** 2185 * Constant for the "Lycian" Unicode character block. 2186 * @since 1.7 2187 */ 2188 public static final UnicodeBlock LYCIAN = 2189 new UnicodeBlock("LYCIAN"); 2190 2191 /** 2192 * Constant for the "Carian" Unicode character block. 2193 * @since 1.7 2194 */ 2195 public static final UnicodeBlock CARIAN = 2196 new UnicodeBlock("CARIAN"); 2197 2198 /** 2199 * Constant for the "Old Persian" Unicode character block. 2200 * @since 1.7 2201 */ 2202 public static final UnicodeBlock OLD_PERSIAN = 2203 new UnicodeBlock("OLD_PERSIAN", 2204 "OLD PERSIAN", 2205 "OLDPERSIAN"); 2206 2207 /** 2208 * Constant for the "Imperial Aramaic" Unicode character block. 2209 * @since 1.7 2210 */ 2211 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2212 new UnicodeBlock("IMPERIAL_ARAMAIC", 2213 "IMPERIAL ARAMAIC", 2214 "IMPERIALARAMAIC"); 2215 2216 /** 2217 * Constant for the "Phoenician" Unicode character block. 2218 * @since 1.7 2219 */ 2220 public static final UnicodeBlock PHOENICIAN = 2221 new UnicodeBlock("PHOENICIAN"); 2222 2223 /** 2224 * Constant for the "Lydian" Unicode character block. 2225 * @since 1.7 2226 */ 2227 public static final UnicodeBlock LYDIAN = 2228 new UnicodeBlock("LYDIAN"); 2229 2230 /** 2231 * Constant for the "Kharoshthi" Unicode character block. 2232 * @since 1.7 2233 */ 2234 public static final UnicodeBlock KHAROSHTHI = 2235 new UnicodeBlock("KHAROSHTHI"); 2236 2237 /** 2238 * Constant for the "Old South Arabian" Unicode character block. 2239 * @since 1.7 2240 */ 2241 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2242 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2243 "OLD SOUTH ARABIAN", 2244 "OLDSOUTHARABIAN"); 2245 2246 /** 2247 * Constant for the "Avestan" Unicode character block. 2248 * @since 1.7 2249 */ 2250 public static final UnicodeBlock AVESTAN = 2251 new UnicodeBlock("AVESTAN"); 2252 2253 /** 2254 * Constant for the "Inscriptional Parthian" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2258 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2259 "INSCRIPTIONAL PARTHIAN", 2260 "INSCRIPTIONALPARTHIAN"); 2261 2262 /** 2263 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2264 * @since 1.7 2265 */ 2266 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2267 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2268 "INSCRIPTIONAL PAHLAVI", 2269 "INSCRIPTIONALPAHLAVI"); 2270 2271 /** 2272 * Constant for the "Old Turkic" Unicode character block. 2273 * @since 1.7 2274 */ 2275 public static final UnicodeBlock OLD_TURKIC = 2276 new UnicodeBlock("OLD_TURKIC", 2277 "OLD TURKIC", 2278 "OLDTURKIC"); 2279 2280 /** 2281 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2282 * @since 1.7 2283 */ 2284 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2285 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2286 "RUMI NUMERAL SYMBOLS", 2287 "RUMINUMERALSYMBOLS"); 2288 2289 /** 2290 * Constant for the "Brahmi" Unicode character block. 2291 * @since 1.7 2292 */ 2293 public static final UnicodeBlock BRAHMI = 2294 new UnicodeBlock("BRAHMI"); 2295 2296 /** 2297 * Constant for the "Kaithi" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock KAITHI = 2301 new UnicodeBlock("KAITHI"); 2302 2303 /** 2304 * Constant for the "Cuneiform" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock CUNEIFORM = 2308 new UnicodeBlock("CUNEIFORM"); 2309 2310 /** 2311 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2312 * character block. 2313 * @since 1.7 2314 */ 2315 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2316 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2317 "CUNEIFORM NUMBERS AND PUNCTUATION", 2318 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2319 2320 /** 2321 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2322 * @since 1.7 2323 */ 2324 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2325 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2326 "EGYPTIAN HIEROGLYPHS", 2327 "EGYPTIANHIEROGLYPHS"); 2328 2329 /** 2330 * Constant for the "Bamum Supplement" Unicode character block. 2331 * @since 1.7 2332 */ 2333 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2334 new UnicodeBlock("BAMUM_SUPPLEMENT", 2335 "BAMUM SUPPLEMENT", 2336 "BAMUMSUPPLEMENT"); 2337 2338 /** 2339 * Constant for the "Kana Supplement" Unicode character block. 2340 * @since 1.7 2341 */ 2342 public static final UnicodeBlock KANA_SUPPLEMENT = 2343 new UnicodeBlock("KANA_SUPPLEMENT", 2344 "KANA SUPPLEMENT", 2345 "KANASUPPLEMENT"); 2346 2347 /** 2348 * Constant for the "Ancient Greek Musical Notation" Unicode character 2349 * block. 2350 * @since 1.7 2351 */ 2352 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2353 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2354 "ANCIENT GREEK MUSICAL NOTATION", 2355 "ANCIENTGREEKMUSICALNOTATION"); 2356 2357 /** 2358 * Constant for the "Counting Rod Numerals" Unicode character block. 2359 * @since 1.7 2360 */ 2361 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2362 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2363 "COUNTING ROD NUMERALS", 2364 "COUNTINGRODNUMERALS"); 2365 2366 /** 2367 * Constant for the "Mahjong Tiles" Unicode character block. 2368 * @since 1.7 2369 */ 2370 public static final UnicodeBlock MAHJONG_TILES = 2371 new UnicodeBlock("MAHJONG_TILES", 2372 "MAHJONG TILES", 2373 "MAHJONGTILES"); 2374 2375 /** 2376 * Constant for the "Domino Tiles" Unicode character block. 2377 * @since 1.7 2378 */ 2379 public static final UnicodeBlock DOMINO_TILES = 2380 new UnicodeBlock("DOMINO_TILES", 2381 "DOMINO TILES", 2382 "DOMINOTILES"); 2383 2384 /** 2385 * Constant for the "Playing Cards" Unicode character block. 2386 * @since 1.7 2387 */ 2388 public static final UnicodeBlock PLAYING_CARDS = 2389 new UnicodeBlock("PLAYING_CARDS", 2390 "PLAYING CARDS", 2391 "PLAYINGCARDS"); 2392 2393 /** 2394 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2395 * block. 2396 * @since 1.7 2397 */ 2398 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2399 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2400 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2401 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2402 2403 /** 2404 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2405 * block. 2406 * @since 1.7 2407 */ 2408 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2409 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2410 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2411 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2412 2413 /** 2414 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2415 * character block. 2416 * @since 1.7 2417 */ 2418 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2419 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2420 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2421 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2422 2423 /** 2424 * Constant for the "Emoticons" Unicode character block. 2425 * @since 1.7 2426 */ 2427 public static final UnicodeBlock EMOTICONS = 2428 new UnicodeBlock("EMOTICONS"); 2429 2430 /** 2431 * Constant for the "Transport And Map Symbols" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2435 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2436 "TRANSPORT AND MAP SYMBOLS", 2437 "TRANSPORTANDMAPSYMBOLS"); 2438 2439 /** 2440 * Constant for the "Alchemical Symbols" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2444 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2445 "ALCHEMICAL SYMBOLS", 2446 "ALCHEMICALSYMBOLS"); 2447 2448 /** 2449 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2450 * character block. 2451 * @since 1.7 2452 */ 2453 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2454 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2455 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2456 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2457 2458 /** 2459 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2460 * character block. 2461 * @since 1.7 2462 */ 2463 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2464 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2465 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2466 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2467 2468 /** 2469 * Constant for the "Arabic Extended-A" Unicode character block. 2470 * @since 1.8 2471 */ 2472 public static final UnicodeBlock ARABIC_EXTENDED_A = 2473 new UnicodeBlock("ARABIC_EXTENDED_A", 2474 "ARABIC EXTENDED-A", 2475 "ARABICEXTENDED-A"); 2476 2477 /** 2478 * Constant for the "Sundanese Supplement" Unicode character block. 2479 * @since 1.8 2480 */ 2481 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2482 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2483 "SUNDANESE SUPPLEMENT", 2484 "SUNDANESESUPPLEMENT"); 2485 2486 /** 2487 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2488 * @since 1.8 2489 */ 2490 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2491 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2492 "MEETEI MAYEK EXTENSIONS", 2493 "MEETEIMAYEKEXTENSIONS"); 2494 2495 /** 2496 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2497 * @since 1.8 2498 */ 2499 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2500 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2501 "MEROITIC HIEROGLYPHS", 2502 "MEROITICHIEROGLYPHS"); 2503 2504 /** 2505 * Constant for the "Meroitic Cursive" Unicode character block. 2506 * @since 1.8 2507 */ 2508 public static final UnicodeBlock MEROITIC_CURSIVE = 2509 new UnicodeBlock("MEROITIC_CURSIVE", 2510 "MEROITIC CURSIVE", 2511 "MEROITICCURSIVE"); 2512 2513 /** 2514 * Constant for the "Sora Sompeng" Unicode character block. 2515 * @since 1.8 2516 */ 2517 public static final UnicodeBlock SORA_SOMPENG = 2518 new UnicodeBlock("SORA_SOMPENG", 2519 "SORA SOMPENG", 2520 "SORASOMPENG"); 2521 2522 /** 2523 * Constant for the "Chakma" Unicode character block. 2524 * @since 1.8 2525 */ 2526 public static final UnicodeBlock CHAKMA = 2527 new UnicodeBlock("CHAKMA"); 2528 2529 /** 2530 * Constant for the "Sharada" Unicode character block. 2531 * @since 1.8 2532 */ 2533 public static final UnicodeBlock SHARADA = 2534 new UnicodeBlock("SHARADA"); 2535 2536 /** 2537 * Constant for the "Takri" Unicode character block. 2538 * @since 1.8 2539 */ 2540 public static final UnicodeBlock TAKRI = 2541 new UnicodeBlock("TAKRI"); 2542 2543 /** 2544 * Constant for the "Miao" Unicode character block. 2545 * @since 1.8 2546 */ 2547 public static final UnicodeBlock MIAO = 2548 new UnicodeBlock("MIAO"); 2549 2550 /** 2551 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2552 * character block. 2553 * @since 1.8 2554 */ 2555 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2556 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2557 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2558 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2559 2560 private static final int blockStarts[] = { 2561 0x0000, // 0000..007F; Basic Latin 2562 0x0080, // 0080..00FF; Latin-1 Supplement 2563 0x0100, // 0100..017F; Latin Extended-A 2564 0x0180, // 0180..024F; Latin Extended-B 2565 0x0250, // 0250..02AF; IPA Extensions 2566 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2567 0x0300, // 0300..036F; Combining Diacritical Marks 2568 0x0370, // 0370..03FF; Greek and Coptic 2569 0x0400, // 0400..04FF; Cyrillic 2570 0x0500, // 0500..052F; Cyrillic Supplement 2571 0x0530, // 0530..058F; Armenian 2572 0x0590, // 0590..05FF; Hebrew 2573 0x0600, // 0600..06FF; Arabic 2574 0x0700, // 0700..074F; Syriac 2575 0x0750, // 0750..077F; Arabic Supplement 2576 0x0780, // 0780..07BF; Thaana 2577 0x07C0, // 07C0..07FF; NKo 2578 0x0800, // 0800..083F; Samaritan 2579 0x0840, // 0840..085F; Mandaic 2580 0x0860, // unassigned 2581 0x08A0, // 08A0..08FF; Arabic Extended-A 2582 0x0900, // 0900..097F; Devanagari 2583 0x0980, // 0980..09FF; Bengali 2584 0x0A00, // 0A00..0A7F; Gurmukhi 2585 0x0A80, // 0A80..0AFF; Gujarati 2586 0x0B00, // 0B00..0B7F; Oriya 2587 0x0B80, // 0B80..0BFF; Tamil 2588 0x0C00, // 0C00..0C7F; Telugu 2589 0x0C80, // 0C80..0CFF; Kannada 2590 0x0D00, // 0D00..0D7F; Malayalam 2591 0x0D80, // 0D80..0DFF; Sinhala 2592 0x0E00, // 0E00..0E7F; Thai 2593 0x0E80, // 0E80..0EFF; Lao 2594 0x0F00, // 0F00..0FFF; Tibetan 2595 0x1000, // 1000..109F; Myanmar 2596 0x10A0, // 10A0..10FF; Georgian 2597 0x1100, // 1100..11FF; Hangul Jamo 2598 0x1200, // 1200..137F; Ethiopic 2599 0x1380, // 1380..139F; Ethiopic Supplement 2600 0x13A0, // 13A0..13FF; Cherokee 2601 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2602 0x1680, // 1680..169F; Ogham 2603 0x16A0, // 16A0..16FF; Runic 2604 0x1700, // 1700..171F; Tagalog 2605 0x1720, // 1720..173F; Hanunoo 2606 0x1740, // 1740..175F; Buhid 2607 0x1760, // 1760..177F; Tagbanwa 2608 0x1780, // 1780..17FF; Khmer 2609 0x1800, // 1800..18AF; Mongolian 2610 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2611 0x1900, // 1900..194F; Limbu 2612 0x1950, // 1950..197F; Tai Le 2613 0x1980, // 1980..19DF; New Tai Lue 2614 0x19E0, // 19E0..19FF; Khmer Symbols 2615 0x1A00, // 1A00..1A1F; Buginese 2616 0x1A20, // 1A20..1AAF; Tai Tham 2617 0x1AB0, // unassigned 2618 0x1B00, // 1B00..1B7F; Balinese 2619 0x1B80, // 1B80..1BBF; Sundanese 2620 0x1BC0, // 1BC0..1BFF; Batak 2621 0x1C00, // 1C00..1C4F; Lepcha 2622 0x1C50, // 1C50..1C7F; Ol Chiki 2623 0x1C80, // unassigned 2624 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 2625 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2626 0x1D00, // 1D00..1D7F; Phonetic Extensions 2627 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2628 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2629 0x1E00, // 1E00..1EFF; Latin Extended Additional 2630 0x1F00, // 1F00..1FFF; Greek Extended 2631 0x2000, // 2000..206F; General Punctuation 2632 0x2070, // 2070..209F; Superscripts and Subscripts 2633 0x20A0, // 20A0..20CF; Currency Symbols 2634 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2635 0x2100, // 2100..214F; Letterlike Symbols 2636 0x2150, // 2150..218F; Number Forms 2637 0x2190, // 2190..21FF; Arrows 2638 0x2200, // 2200..22FF; Mathematical Operators 2639 0x2300, // 2300..23FF; Miscellaneous Technical 2640 0x2400, // 2400..243F; Control Pictures 2641 0x2440, // 2440..245F; Optical Character Recognition 2642 0x2460, // 2460..24FF; Enclosed Alphanumerics 2643 0x2500, // 2500..257F; Box Drawing 2644 0x2580, // 2580..259F; Block Elements 2645 0x25A0, // 25A0..25FF; Geometric Shapes 2646 0x2600, // 2600..26FF; Miscellaneous Symbols 2647 0x2700, // 2700..27BF; Dingbats 2648 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2649 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2650 0x2800, // 2800..28FF; Braille Patterns 2651 0x2900, // 2900..297F; Supplemental Arrows-B 2652 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2653 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2654 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2655 0x2C00, // 2C00..2C5F; Glagolitic 2656 0x2C60, // 2C60..2C7F; Latin Extended-C 2657 0x2C80, // 2C80..2CFF; Coptic 2658 0x2D00, // 2D00..2D2F; Georgian Supplement 2659 0x2D30, // 2D30..2D7F; Tifinagh 2660 0x2D80, // 2D80..2DDF; Ethiopic Extended 2661 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2662 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2663 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2664 0x2F00, // 2F00..2FDF; Kangxi Radicals 2665 0x2FE0, // unassigned 2666 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2667 0x3000, // 3000..303F; CJK Symbols and Punctuation 2668 0x3040, // 3040..309F; Hiragana 2669 0x30A0, // 30A0..30FF; Katakana 2670 0x3100, // 3100..312F; Bopomofo 2671 0x3130, // 3130..318F; Hangul Compatibility Jamo 2672 0x3190, // 3190..319F; Kanbun 2673 0x31A0, // 31A0..31BF; Bopomofo Extended 2674 0x31C0, // 31C0..31EF; CJK Strokes 2675 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2676 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2677 0x3300, // 3300..33FF; CJK Compatibility 2678 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2679 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2680 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2681 0xA000, // A000..A48F; Yi Syllables 2682 0xA490, // A490..A4CF; Yi Radicals 2683 0xA4D0, // A4D0..A4FF; Lisu 2684 0xA500, // A500..A63F; Vai 2685 0xA640, // A640..A69F; Cyrillic Extended-B 2686 0xA6A0, // A6A0..A6FF; Bamum 2687 0xA700, // A700..A71F; Modifier Tone Letters 2688 0xA720, // A720..A7FF; Latin Extended-D 2689 0xA800, // A800..A82F; Syloti Nagri 2690 0xA830, // A830..A83F; Common Indic Number Forms 2691 0xA840, // A840..A87F; Phags-pa 2692 0xA880, // A880..A8DF; Saurashtra 2693 0xA8E0, // A8E0..A8FF; Devanagari Extended 2694 0xA900, // A900..A92F; Kayah Li 2695 0xA930, // A930..A95F; Rejang 2696 0xA960, // A960..A97F; Hangul Jamo Extended-A 2697 0xA980, // A980..A9DF; Javanese 2698 0xA9E0, // unassigned 2699 0xAA00, // AA00..AA5F; Cham 2700 0xAA60, // AA60..AA7F; Myanmar Extended-A 2701 0xAA80, // AA80..AADF; Tai Viet 2702 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 2703 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2704 0xAB30, // unassigned 2705 0xABC0, // ABC0..ABFF; Meetei Mayek 2706 0xAC00, // AC00..D7AF; Hangul Syllables 2707 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2708 0xD800, // D800..DB7F; High Surrogates 2709 0xDB80, // DB80..DBFF; High Private Use Surrogates 2710 0xDC00, // DC00..DFFF; Low Surrogates 2711 0xE000, // E000..F8FF; Private Use Area 2712 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2713 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2714 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2715 0xFE00, // FE00..FE0F; Variation Selectors 2716 0xFE10, // FE10..FE1F; Vertical Forms 2717 0xFE20, // FE20..FE2F; Combining Half Marks 2718 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2719 0xFE50, // FE50..FE6F; Small Form Variants 2720 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2721 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2722 0xFFF0, // FFF0..FFFF; Specials 2723 0x10000, // 10000..1007F; Linear B Syllabary 2724 0x10080, // 10080..100FF; Linear B Ideograms 2725 0x10100, // 10100..1013F; Aegean Numbers 2726 0x10140, // 10140..1018F; Ancient Greek Numbers 2727 0x10190, // 10190..101CF; Ancient Symbols 2728 0x101D0, // 101D0..101FF; Phaistos Disc 2729 0x10200, // unassigned 2730 0x10280, // 10280..1029F; Lycian 2731 0x102A0, // 102A0..102DF; Carian 2732 0x102E0, // unassigned 2733 0x10300, // 10300..1032F; Old Italic 2734 0x10330, // 10330..1034F; Gothic 2735 0x10350, // unassigned 2736 0x10380, // 10380..1039F; Ugaritic 2737 0x103A0, // 103A0..103DF; Old Persian 2738 0x103E0, // unassigned 2739 0x10400, // 10400..1044F; Deseret 2740 0x10450, // 10450..1047F; Shavian 2741 0x10480, // 10480..104AF; Osmanya 2742 0x104B0, // unassigned 2743 0x10800, // 10800..1083F; Cypriot Syllabary 2744 0x10840, // 10840..1085F; Imperial Aramaic 2745 0x10860, // unassigned 2746 0x10900, // 10900..1091F; Phoenician 2747 0x10920, // 10920..1093F; Lydian 2748 0x10940, // unassigned 2749 0x10980, // 10980..1099F; Meroitic Hieroglyphs 2750 0x109A0, // 109A0..109FF; Meroitic Cursive 2751 0x10A00, // 10A00..10A5F; Kharoshthi 2752 0x10A60, // 10A60..10A7F; Old South Arabian 2753 0x10A80, // unassigned 2754 0x10B00, // 10B00..10B3F; Avestan 2755 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2756 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2757 0x10B80, // unassigned 2758 0x10C00, // 10C00..10C4F; Old Turkic 2759 0x10C50, // unassigned 2760 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2761 0x10E80, // unassigned 2762 0x11000, // 11000..1107F; Brahmi 2763 0x11080, // 11080..110CF; Kaithi 2764 0x110D0, // 110D0..110FF; Sora Sompeng 2765 0x11100, // 11100..1114F; Chakma 2766 0x11150, // unassigned 2767 0x11180, // 11180..111DF; Sharada 2768 0x111E0, // unassigned 2769 0x11680, // 11680..116CF; Takri 2770 0x116D0, // unassigned 2771 0x12000, // 12000..123FF; Cuneiform 2772 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2773 0x12480, // unassigned 2774 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2775 0x13430, // unassigned 2776 0x16800, // 16800..16A3F; Bamum Supplement 2777 0x16A40, // unassigned 2778 0x16F00, // 16F00..16F9F; Miao 2779 0x16FA0, // unassigned 2780 0x1B000, // 1B000..1B0FF; Kana Supplement 2781 0x1B100, // unassigned 2782 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2783 0x1D100, // 1D100..1D1FF; Musical Symbols 2784 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2785 0x1D250, // unassigned 2786 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2787 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2788 0x1D380, // unassigned 2789 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2790 0x1D800, // unassigned 2791 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 2792 0x1EF00, // unassigned 2793 0x1F000, // 1F000..1F02F; Mahjong Tiles 2794 0x1F030, // 1F030..1F09F; Domino Tiles 2795 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2796 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2797 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2798 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2799 0x1F600, // 1F600..1F64F; Emoticons 2800 0x1F650, // unassigned 2801 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2802 0x1F700, // 1F700..1F77F; Alchemical Symbols 2803 0x1F780, // unassigned 2804 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2805 0x2A6E0, // unassigned 2806 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2807 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2808 0x2B820, // unassigned 2809 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2810 0x2FA20, // unassigned 2811 0xE0000, // E0000..E007F; Tags 2812 0xE0080, // unassigned 2813 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2814 0xE01F0, // unassigned 2815 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2816 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2817 }; 2818 2819 private static final UnicodeBlock[] blocks = { 2820 BASIC_LATIN, 2821 LATIN_1_SUPPLEMENT, 2822 LATIN_EXTENDED_A, 2823 LATIN_EXTENDED_B, 2824 IPA_EXTENSIONS, 2825 SPACING_MODIFIER_LETTERS, 2826 COMBINING_DIACRITICAL_MARKS, 2827 GREEK, 2828 CYRILLIC, 2829 CYRILLIC_SUPPLEMENTARY, 2830 ARMENIAN, 2831 HEBREW, 2832 ARABIC, 2833 SYRIAC, 2834 ARABIC_SUPPLEMENT, 2835 THAANA, 2836 NKO, 2837 SAMARITAN, 2838 MANDAIC, 2839 null, 2840 ARABIC_EXTENDED_A, 2841 DEVANAGARI, 2842 BENGALI, 2843 GURMUKHI, 2844 GUJARATI, 2845 ORIYA, 2846 TAMIL, 2847 TELUGU, 2848 KANNADA, 2849 MALAYALAM, 2850 SINHALA, 2851 THAI, 2852 LAO, 2853 TIBETAN, 2854 MYANMAR, 2855 GEORGIAN, 2856 HANGUL_JAMO, 2857 ETHIOPIC, 2858 ETHIOPIC_SUPPLEMENT, 2859 CHEROKEE, 2860 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2861 OGHAM, 2862 RUNIC, 2863 TAGALOG, 2864 HANUNOO, 2865 BUHID, 2866 TAGBANWA, 2867 KHMER, 2868 MONGOLIAN, 2869 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2870 LIMBU, 2871 TAI_LE, 2872 NEW_TAI_LUE, 2873 KHMER_SYMBOLS, 2874 BUGINESE, 2875 TAI_THAM, 2876 null, 2877 BALINESE, 2878 SUNDANESE, 2879 BATAK, 2880 LEPCHA, 2881 OL_CHIKI, 2882 null, 2883 SUNDANESE_SUPPLEMENT, 2884 VEDIC_EXTENSIONS, 2885 PHONETIC_EXTENSIONS, 2886 PHONETIC_EXTENSIONS_SUPPLEMENT, 2887 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2888 LATIN_EXTENDED_ADDITIONAL, 2889 GREEK_EXTENDED, 2890 GENERAL_PUNCTUATION, 2891 SUPERSCRIPTS_AND_SUBSCRIPTS, 2892 CURRENCY_SYMBOLS, 2893 COMBINING_MARKS_FOR_SYMBOLS, 2894 LETTERLIKE_SYMBOLS, 2895 NUMBER_FORMS, 2896 ARROWS, 2897 MATHEMATICAL_OPERATORS, 2898 MISCELLANEOUS_TECHNICAL, 2899 CONTROL_PICTURES, 2900 OPTICAL_CHARACTER_RECOGNITION, 2901 ENCLOSED_ALPHANUMERICS, 2902 BOX_DRAWING, 2903 BLOCK_ELEMENTS, 2904 GEOMETRIC_SHAPES, 2905 MISCELLANEOUS_SYMBOLS, 2906 DINGBATS, 2907 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2908 SUPPLEMENTAL_ARROWS_A, 2909 BRAILLE_PATTERNS, 2910 SUPPLEMENTAL_ARROWS_B, 2911 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2912 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2913 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2914 GLAGOLITIC, 2915 LATIN_EXTENDED_C, 2916 COPTIC, 2917 GEORGIAN_SUPPLEMENT, 2918 TIFINAGH, 2919 ETHIOPIC_EXTENDED, 2920 CYRILLIC_EXTENDED_A, 2921 SUPPLEMENTAL_PUNCTUATION, 2922 CJK_RADICALS_SUPPLEMENT, 2923 KANGXI_RADICALS, 2924 null, 2925 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2926 CJK_SYMBOLS_AND_PUNCTUATION, 2927 HIRAGANA, 2928 KATAKANA, 2929 BOPOMOFO, 2930 HANGUL_COMPATIBILITY_JAMO, 2931 KANBUN, 2932 BOPOMOFO_EXTENDED, 2933 CJK_STROKES, 2934 KATAKANA_PHONETIC_EXTENSIONS, 2935 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2936 CJK_COMPATIBILITY, 2937 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2938 YIJING_HEXAGRAM_SYMBOLS, 2939 CJK_UNIFIED_IDEOGRAPHS, 2940 YI_SYLLABLES, 2941 YI_RADICALS, 2942 LISU, 2943 VAI, 2944 CYRILLIC_EXTENDED_B, 2945 BAMUM, 2946 MODIFIER_TONE_LETTERS, 2947 LATIN_EXTENDED_D, 2948 SYLOTI_NAGRI, 2949 COMMON_INDIC_NUMBER_FORMS, 2950 PHAGS_PA, 2951 SAURASHTRA, 2952 DEVANAGARI_EXTENDED, 2953 KAYAH_LI, 2954 REJANG, 2955 HANGUL_JAMO_EXTENDED_A, 2956 JAVANESE, 2957 null, 2958 CHAM, 2959 MYANMAR_EXTENDED_A, 2960 TAI_VIET, 2961 MEETEI_MAYEK_EXTENSIONS, 2962 ETHIOPIC_EXTENDED_A, 2963 null, 2964 MEETEI_MAYEK, 2965 HANGUL_SYLLABLES, 2966 HANGUL_JAMO_EXTENDED_B, 2967 HIGH_SURROGATES, 2968 HIGH_PRIVATE_USE_SURROGATES, 2969 LOW_SURROGATES, 2970 PRIVATE_USE_AREA, 2971 CJK_COMPATIBILITY_IDEOGRAPHS, 2972 ALPHABETIC_PRESENTATION_FORMS, 2973 ARABIC_PRESENTATION_FORMS_A, 2974 VARIATION_SELECTORS, 2975 VERTICAL_FORMS, 2976 COMBINING_HALF_MARKS, 2977 CJK_COMPATIBILITY_FORMS, 2978 SMALL_FORM_VARIANTS, 2979 ARABIC_PRESENTATION_FORMS_B, 2980 HALFWIDTH_AND_FULLWIDTH_FORMS, 2981 SPECIALS, 2982 LINEAR_B_SYLLABARY, 2983 LINEAR_B_IDEOGRAMS, 2984 AEGEAN_NUMBERS, 2985 ANCIENT_GREEK_NUMBERS, 2986 ANCIENT_SYMBOLS, 2987 PHAISTOS_DISC, 2988 null, 2989 LYCIAN, 2990 CARIAN, 2991 null, 2992 OLD_ITALIC, 2993 GOTHIC, 2994 null, 2995 UGARITIC, 2996 OLD_PERSIAN, 2997 null, 2998 DESERET, 2999 SHAVIAN, 3000 OSMANYA, 3001 null, 3002 CYPRIOT_SYLLABARY, 3003 IMPERIAL_ARAMAIC, 3004 null, 3005 PHOENICIAN, 3006 LYDIAN, 3007 null, 3008 MEROITIC_HIEROGLYPHS, 3009 MEROITIC_CURSIVE, 3010 KHAROSHTHI, 3011 OLD_SOUTH_ARABIAN, 3012 null, 3013 AVESTAN, 3014 INSCRIPTIONAL_PARTHIAN, 3015 INSCRIPTIONAL_PAHLAVI, 3016 null, 3017 OLD_TURKIC, 3018 null, 3019 RUMI_NUMERAL_SYMBOLS, 3020 null, 3021 BRAHMI, 3022 KAITHI, 3023 SORA_SOMPENG, 3024 CHAKMA, 3025 null, 3026 SHARADA, 3027 null, 3028 TAKRI, 3029 null, 3030 CUNEIFORM, 3031 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 3032 null, 3033 EGYPTIAN_HIEROGLYPHS, 3034 null, 3035 BAMUM_SUPPLEMENT, 3036 null, 3037 MIAO, 3038 null, 3039 KANA_SUPPLEMENT, 3040 null, 3041 BYZANTINE_MUSICAL_SYMBOLS, 3042 MUSICAL_SYMBOLS, 3043 ANCIENT_GREEK_MUSICAL_NOTATION, 3044 null, 3045 TAI_XUAN_JING_SYMBOLS, 3046 COUNTING_ROD_NUMERALS, 3047 null, 3048 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 3049 null, 3050 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 3051 null, 3052 MAHJONG_TILES, 3053 DOMINO_TILES, 3054 PLAYING_CARDS, 3055 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 3056 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 3057 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 3058 EMOTICONS, 3059 null, 3060 TRANSPORT_AND_MAP_SYMBOLS, 3061 ALCHEMICAL_SYMBOLS, 3062 null, 3063 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 3064 null, 3065 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 3066 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 3067 null, 3068 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 3069 null, 3070 TAGS, 3071 null, 3072 VARIATION_SELECTORS_SUPPLEMENT, 3073 null, 3074 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 3075 SUPPLEMENTARY_PRIVATE_USE_AREA_B 3076 }; 3077 3078 3079 /** 3080 * Returns the object representing the Unicode block containing the 3081 * given character, or {@code null} if the character is not a 3082 * member of a defined block. 3083 * 3084 * <p><b>Note:</b> This method cannot handle 3085 * <a href="Character.html#supplementary"> supplementary 3086 * characters</a>. To support all Unicode characters, including 3087 * supplementary characters, use the {@link #of(int)} method. 3088 * 3089 * @param c The character in question 3090 * @return The {@code UnicodeBlock} instance representing the 3091 * Unicode block of which this character is a member, or 3092 * {@code null} if the character is not a member of any 3093 * Unicode block 3094 */ 3095 public static UnicodeBlock of(char c) { 3096 return of((int)c); 3097 } 3098 3099 /** 3100 * Returns the object representing the Unicode block 3101 * containing the given character (Unicode code point), or 3102 * {@code null} if the character is not a member of a 3103 * defined block. 3104 * 3105 * @param codePoint the character (Unicode code point) in question. 3106 * @return The {@code UnicodeBlock} instance representing the 3107 * Unicode block of which this character is a member, or 3108 * {@code null} if the character is not a member of any 3109 * Unicode block 3110 * @exception IllegalArgumentException if the specified 3111 * {@code codePoint} is an invalid Unicode code point. 3112 * @see Character#isValidCodePoint(int) 3113 * @since 1.5 3114 */ 3115 public static UnicodeBlock of(int codePoint) { 3116 if (!isValidCodePoint(codePoint)) { 3117 throw new IllegalArgumentException(); 3118 } 3119 3120 int top, bottom, current; 3121 bottom = 0; 3122 top = blockStarts.length; 3123 current = top/2; 3124 3125 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3126 while (top - bottom > 1) { 3127 if (codePoint >= blockStarts[current]) { 3128 bottom = current; 3129 } else { 3130 top = current; 3131 } 3132 current = (top + bottom) / 2; 3133 } 3134 return blocks[current]; 3135 } 3136 3137 /** 3138 * Returns the UnicodeBlock with the given name. Block 3139 * names are determined by The Unicode Standard. The file 3140 * Blocks-<version>.txt defines blocks for a particular 3141 * version of the standard. The {@link Character} class specifies 3142 * the version of the standard that it supports. 3143 * <p> 3144 * This method accepts block names in the following forms: 3145 * <ol> 3146 * <li> Canonical block names as defined by the Unicode Standard. 3147 * For example, the standard defines a "Basic Latin" block. Therefore, this 3148 * method accepts "Basic Latin" as a valid block name. The documentation of 3149 * each UnicodeBlock provides the canonical name. 3150 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3151 * is a valid block name for the "Basic Latin" block. 3152 * <li>The text representation of each constant UnicodeBlock identifier. 3153 * For example, this method will return the {@link #BASIC_LATIN} block if 3154 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3155 * hyphens in the canonical name with underscores. 3156 * </ol> 3157 * Finally, character case is ignored for all of the valid block name forms. 3158 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3159 * The en_US locale's case mapping rules are used to provide case-insensitive 3160 * string comparisons for block name validation. 3161 * <p> 3162 * If the Unicode Standard changes block names, both the previous and 3163 * current names will be accepted. 3164 * 3165 * @param blockName A {@code UnicodeBlock} name. 3166 * @return The {@code UnicodeBlock} instance identified 3167 * by {@code blockName} 3168 * @throws IllegalArgumentException if {@code blockName} is an 3169 * invalid name 3170 * @throws NullPointerException if {@code blockName} is null 3171 * @since 1.5 3172 */ 3173 public static final UnicodeBlock forName(String blockName) { 3174 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3175 if (block == null) { 3176 throw new IllegalArgumentException(); 3177 } 3178 return block; 3179 } 3180 } 3181 3182 3183 /** 3184 * A family of character subsets representing the character scripts 3185 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3186 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3187 * character is assigned to a single Unicode script, either a specific 3188 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3189 * one of the following three special values, 3190 * {@link Character.UnicodeScript#INHERITED Inherited}, 3191 * {@link Character.UnicodeScript#COMMON Common} or 3192 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3193 * 3194 * @since 1.7 3195 */ 3196 public static enum UnicodeScript { 3197 /** 3198 * Unicode script "Common". 3199 */ 3200 COMMON, 3201 3202 /** 3203 * Unicode script "Latin". 3204 */ 3205 LATIN, 3206 3207 /** 3208 * Unicode script "Greek". 3209 */ 3210 GREEK, 3211 3212 /** 3213 * Unicode script "Cyrillic". 3214 */ 3215 CYRILLIC, 3216 3217 /** 3218 * Unicode script "Armenian". 3219 */ 3220 ARMENIAN, 3221 3222 /** 3223 * Unicode script "Hebrew". 3224 */ 3225 HEBREW, 3226 3227 /** 3228 * Unicode script "Arabic". 3229 */ 3230 ARABIC, 3231 3232 /** 3233 * Unicode script "Syriac". 3234 */ 3235 SYRIAC, 3236 3237 /** 3238 * Unicode script "Thaana". 3239 */ 3240 THAANA, 3241 3242 /** 3243 * Unicode script "Devanagari". 3244 */ 3245 DEVANAGARI, 3246 3247 /** 3248 * Unicode script "Bengali". 3249 */ 3250 BENGALI, 3251 3252 /** 3253 * Unicode script "Gurmukhi". 3254 */ 3255 GURMUKHI, 3256 3257 /** 3258 * Unicode script "Gujarati". 3259 */ 3260 GUJARATI, 3261 3262 /** 3263 * Unicode script "Oriya". 3264 */ 3265 ORIYA, 3266 3267 /** 3268 * Unicode script "Tamil". 3269 */ 3270 TAMIL, 3271 3272 /** 3273 * Unicode script "Telugu". 3274 */ 3275 TELUGU, 3276 3277 /** 3278 * Unicode script "Kannada". 3279 */ 3280 KANNADA, 3281 3282 /** 3283 * Unicode script "Malayalam". 3284 */ 3285 MALAYALAM, 3286 3287 /** 3288 * Unicode script "Sinhala". 3289 */ 3290 SINHALA, 3291 3292 /** 3293 * Unicode script "Thai". 3294 */ 3295 THAI, 3296 3297 /** 3298 * Unicode script "Lao". 3299 */ 3300 LAO, 3301 3302 /** 3303 * Unicode script "Tibetan". 3304 */ 3305 TIBETAN, 3306 3307 /** 3308 * Unicode script "Myanmar". 3309 */ 3310 MYANMAR, 3311 3312 /** 3313 * Unicode script "Georgian". 3314 */ 3315 GEORGIAN, 3316 3317 /** 3318 * Unicode script "Hangul". 3319 */ 3320 HANGUL, 3321 3322 /** 3323 * Unicode script "Ethiopic". 3324 */ 3325 ETHIOPIC, 3326 3327 /** 3328 * Unicode script "Cherokee". 3329 */ 3330 CHEROKEE, 3331 3332 /** 3333 * Unicode script "Canadian_Aboriginal". 3334 */ 3335 CANADIAN_ABORIGINAL, 3336 3337 /** 3338 * Unicode script "Ogham". 3339 */ 3340 OGHAM, 3341 3342 /** 3343 * Unicode script "Runic". 3344 */ 3345 RUNIC, 3346 3347 /** 3348 * Unicode script "Khmer". 3349 */ 3350 KHMER, 3351 3352 /** 3353 * Unicode script "Mongolian". 3354 */ 3355 MONGOLIAN, 3356 3357 /** 3358 * Unicode script "Hiragana". 3359 */ 3360 HIRAGANA, 3361 3362 /** 3363 * Unicode script "Katakana". 3364 */ 3365 KATAKANA, 3366 3367 /** 3368 * Unicode script "Bopomofo". 3369 */ 3370 BOPOMOFO, 3371 3372 /** 3373 * Unicode script "Han". 3374 */ 3375 HAN, 3376 3377 /** 3378 * Unicode script "Yi". 3379 */ 3380 YI, 3381 3382 /** 3383 * Unicode script "Old_Italic". 3384 */ 3385 OLD_ITALIC, 3386 3387 /** 3388 * Unicode script "Gothic". 3389 */ 3390 GOTHIC, 3391 3392 /** 3393 * Unicode script "Deseret". 3394 */ 3395 DESERET, 3396 3397 /** 3398 * Unicode script "Inherited". 3399 */ 3400 INHERITED, 3401 3402 /** 3403 * Unicode script "Tagalog". 3404 */ 3405 TAGALOG, 3406 3407 /** 3408 * Unicode script "Hanunoo". 3409 */ 3410 HANUNOO, 3411 3412 /** 3413 * Unicode script "Buhid". 3414 */ 3415 BUHID, 3416 3417 /** 3418 * Unicode script "Tagbanwa". 3419 */ 3420 TAGBANWA, 3421 3422 /** 3423 * Unicode script "Limbu". 3424 */ 3425 LIMBU, 3426 3427 /** 3428 * Unicode script "Tai_Le". 3429 */ 3430 TAI_LE, 3431 3432 /** 3433 * Unicode script "Linear_B". 3434 */ 3435 LINEAR_B, 3436 3437 /** 3438 * Unicode script "Ugaritic". 3439 */ 3440 UGARITIC, 3441 3442 /** 3443 * Unicode script "Shavian". 3444 */ 3445 SHAVIAN, 3446 3447 /** 3448 * Unicode script "Osmanya". 3449 */ 3450 OSMANYA, 3451 3452 /** 3453 * Unicode script "Cypriot". 3454 */ 3455 CYPRIOT, 3456 3457 /** 3458 * Unicode script "Braille". 3459 */ 3460 BRAILLE, 3461 3462 /** 3463 * Unicode script "Buginese". 3464 */ 3465 BUGINESE, 3466 3467 /** 3468 * Unicode script "Coptic". 3469 */ 3470 COPTIC, 3471 3472 /** 3473 * Unicode script "New_Tai_Lue". 3474 */ 3475 NEW_TAI_LUE, 3476 3477 /** 3478 * Unicode script "Glagolitic". 3479 */ 3480 GLAGOLITIC, 3481 3482 /** 3483 * Unicode script "Tifinagh". 3484 */ 3485 TIFINAGH, 3486 3487 /** 3488 * Unicode script "Syloti_Nagri". 3489 */ 3490 SYLOTI_NAGRI, 3491 3492 /** 3493 * Unicode script "Old_Persian". 3494 */ 3495 OLD_PERSIAN, 3496 3497 /** 3498 * Unicode script "Kharoshthi". 3499 */ 3500 KHAROSHTHI, 3501 3502 /** 3503 * Unicode script "Balinese". 3504 */ 3505 BALINESE, 3506 3507 /** 3508 * Unicode script "Cuneiform". 3509 */ 3510 CUNEIFORM, 3511 3512 /** 3513 * Unicode script "Phoenician". 3514 */ 3515 PHOENICIAN, 3516 3517 /** 3518 * Unicode script "Phags_Pa". 3519 */ 3520 PHAGS_PA, 3521 3522 /** 3523 * Unicode script "Nko". 3524 */ 3525 NKO, 3526 3527 /** 3528 * Unicode script "Sundanese". 3529 */ 3530 SUNDANESE, 3531 3532 /** 3533 * Unicode script "Batak". 3534 */ 3535 BATAK, 3536 3537 /** 3538 * Unicode script "Lepcha". 3539 */ 3540 LEPCHA, 3541 3542 /** 3543 * Unicode script "Ol_Chiki". 3544 */ 3545 OL_CHIKI, 3546 3547 /** 3548 * Unicode script "Vai". 3549 */ 3550 VAI, 3551 3552 /** 3553 * Unicode script "Saurashtra". 3554 */ 3555 SAURASHTRA, 3556 3557 /** 3558 * Unicode script "Kayah_Li". 3559 */ 3560 KAYAH_LI, 3561 3562 /** 3563 * Unicode script "Rejang". 3564 */ 3565 REJANG, 3566 3567 /** 3568 * Unicode script "Lycian". 3569 */ 3570 LYCIAN, 3571 3572 /** 3573 * Unicode script "Carian". 3574 */ 3575 CARIAN, 3576 3577 /** 3578 * Unicode script "Lydian". 3579 */ 3580 LYDIAN, 3581 3582 /** 3583 * Unicode script "Cham". 3584 */ 3585 CHAM, 3586 3587 /** 3588 * Unicode script "Tai_Tham". 3589 */ 3590 TAI_THAM, 3591 3592 /** 3593 * Unicode script "Tai_Viet". 3594 */ 3595 TAI_VIET, 3596 3597 /** 3598 * Unicode script "Avestan". 3599 */ 3600 AVESTAN, 3601 3602 /** 3603 * Unicode script "Egyptian_Hieroglyphs". 3604 */ 3605 EGYPTIAN_HIEROGLYPHS, 3606 3607 /** 3608 * Unicode script "Samaritan". 3609 */ 3610 SAMARITAN, 3611 3612 /** 3613 * Unicode script "Mandaic". 3614 */ 3615 MANDAIC, 3616 3617 /** 3618 * Unicode script "Lisu". 3619 */ 3620 LISU, 3621 3622 /** 3623 * Unicode script "Bamum". 3624 */ 3625 BAMUM, 3626 3627 /** 3628 * Unicode script "Javanese". 3629 */ 3630 JAVANESE, 3631 3632 /** 3633 * Unicode script "Meetei_Mayek". 3634 */ 3635 MEETEI_MAYEK, 3636 3637 /** 3638 * Unicode script "Imperial_Aramaic". 3639 */ 3640 IMPERIAL_ARAMAIC, 3641 3642 /** 3643 * Unicode script "Old_South_Arabian". 3644 */ 3645 OLD_SOUTH_ARABIAN, 3646 3647 /** 3648 * Unicode script "Inscriptional_Parthian". 3649 */ 3650 INSCRIPTIONAL_PARTHIAN, 3651 3652 /** 3653 * Unicode script "Inscriptional_Pahlavi". 3654 */ 3655 INSCRIPTIONAL_PAHLAVI, 3656 3657 /** 3658 * Unicode script "Old_Turkic". 3659 */ 3660 OLD_TURKIC, 3661 3662 /** 3663 * Unicode script "Brahmi". 3664 */ 3665 BRAHMI, 3666 3667 /** 3668 * Unicode script "Kaithi". 3669 */ 3670 KAITHI, 3671 3672 /** 3673 * Unicode script "Meroitic Hieroglyphs". 3674 */ 3675 MEROITIC_HIEROGLYPHS, 3676 3677 /** 3678 * Unicode script "Meroitic Cursive". 3679 */ 3680 MEROITIC_CURSIVE, 3681 3682 /** 3683 * Unicode script "Sora Sompeng". 3684 */ 3685 SORA_SOMPENG, 3686 3687 /** 3688 * Unicode script "Chakma". 3689 */ 3690 CHAKMA, 3691 3692 /** 3693 * Unicode script "Sharada". 3694 */ 3695 SHARADA, 3696 3697 /** 3698 * Unicode script "Takri". 3699 */ 3700 TAKRI, 3701 3702 /** 3703 * Unicode script "Miao". 3704 */ 3705 MIAO, 3706 3707 /** 3708 * Unicode script "Unknown". 3709 */ 3710 UNKNOWN; 3711 3712 private static final int[] scriptStarts = { 3713 0x0000, // 0000..0040; COMMON 3714 0x0041, // 0041..005A; LATIN 3715 0x005B, // 005B..0060; COMMON 3716 0x0061, // 0061..007A; LATIN 3717 0x007B, // 007B..00A9; COMMON 3718 0x00AA, // 00AA..00AA; LATIN 3719 0x00AB, // 00AB..00B9; COMMON 3720 0x00BA, // 00BA..00BA; LATIN 3721 0x00BB, // 00BB..00BF; COMMON 3722 0x00C0, // 00C0..00D6; LATIN 3723 0x00D7, // 00D7..00D7; COMMON 3724 0x00D8, // 00D8..00F6; LATIN 3725 0x00F7, // 00F7..00F7; COMMON 3726 0x00F8, // 00F8..02B8; LATIN 3727 0x02B9, // 02B9..02DF; COMMON 3728 0x02E0, // 02E0..02E4; LATIN 3729 0x02E5, // 02E5..02E9; COMMON 3730 0x02EA, // 02EA..02EB; BOPOMOFO 3731 0x02EC, // 02EC..02FF; COMMON 3732 0x0300, // 0300..036F; INHERITED 3733 0x0370, // 0370..0373; GREEK 3734 0x0374, // 0374..0374; COMMON 3735 0x0375, // 0375..037D; GREEK 3736 0x037E, // 037E..0383; COMMON 3737 0x0384, // 0384..0384; GREEK 3738 0x0385, // 0385..0385; COMMON 3739 0x0386, // 0386..0386; GREEK 3740 0x0387, // 0387..0387; COMMON 3741 0x0388, // 0388..03E1; GREEK 3742 0x03E2, // 03E2..03EF; COPTIC 3743 0x03F0, // 03F0..03FF; GREEK 3744 0x0400, // 0400..0484; CYRILLIC 3745 0x0485, // 0485..0486; INHERITED 3746 0x0487, // 0487..0530; CYRILLIC 3747 0x0531, // 0531..0588; ARMENIAN 3748 0x0589, // 0589..0589; COMMON 3749 0x058A, // 058A..0590; ARMENIAN 3750 0x0591, // 0591..05FF; HEBREW 3751 0x0600, // 0600..060B; ARABIC 3752 0x060C, // 060C..060C; COMMON 3753 0x060D, // 060D..061A; ARABIC 3754 0x061B, // 061B..061D; COMMON 3755 0x061E, // 061E..061E; ARABIC 3756 0x061F, // 061F..061F; COMMON 3757 0x0620, // 0620..063F; ARABIC 3758 0x0640, // 0640..0640; COMMON 3759 0x0641, // 0641..064A; ARABIC 3760 0x064B, // 064B..0655; INHERITED 3761 0x0656, // 0656..065F; ARABIC 3762 0x0660, // 0660..0669; COMMON 3763 0x066A, // 066A..066F; ARABIC 3764 0x0670, // 0670..0670; INHERITED 3765 0x0671, // 0671..06DC; ARABIC 3766 0x06DD, // 06DD..06DD; COMMON 3767 0x06DE, // 06DE..06FF; ARABIC 3768 0x0700, // 0700..074F; SYRIAC 3769 0x0750, // 0750..077F; ARABIC 3770 0x0780, // 0780..07BF; THAANA 3771 0x07C0, // 07C0..07FF; NKO 3772 0x0800, // 0800..083F; SAMARITAN 3773 0x0840, // 0840..089F; MANDAIC 3774 0x08A0, // 08A0..08FF; ARABIC 3775 0x0900, // 0900..0950; DEVANAGARI 3776 0x0951, // 0951..0952; INHERITED 3777 0x0953, // 0953..0963; DEVANAGARI 3778 0x0964, // 0964..0965; COMMON 3779 0x0966, // 0966..0980; DEVANAGARI 3780 0x0981, // 0981..0A00; BENGALI 3781 0x0A01, // 0A01..0A80; GURMUKHI 3782 0x0A81, // 0A81..0B00; GUJARATI 3783 0x0B01, // 0B01..0B81; ORIYA 3784 0x0B82, // 0B82..0C00; TAMIL 3785 0x0C01, // 0C01..0C81; TELUGU 3786 0x0C82, // 0C82..0CF0; KANNADA 3787 0x0D02, // 0D02..0D81; MALAYALAM 3788 0x0D82, // 0D82..0E00; SINHALA 3789 0x0E01, // 0E01..0E3E; THAI 3790 0x0E3F, // 0E3F..0E3F; COMMON 3791 0x0E40, // 0E40..0E80; THAI 3792 0x0E81, // 0E81..0EFF; LAO 3793 0x0F00, // 0F00..0FD4; TIBETAN 3794 0x0FD5, // 0FD5..0FD8; COMMON 3795 0x0FD9, // 0FD9..0FFF; TIBETAN 3796 0x1000, // 1000..109F; MYANMAR 3797 0x10A0, // 10A0..10FA; GEORGIAN 3798 0x10FB, // 10FB..10FB; COMMON 3799 0x10FC, // 10FC..10FF; GEORGIAN 3800 0x1100, // 1100..11FF; HANGUL 3801 0x1200, // 1200..139F; ETHIOPIC 3802 0x13A0, // 13A0..13FF; CHEROKEE 3803 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3804 0x1680, // 1680..169F; OGHAM 3805 0x16A0, // 16A0..16EA; RUNIC 3806 0x16EB, // 16EB..16ED; COMMON 3807 0x16EE, // 16EE..16FF; RUNIC 3808 0x1700, // 1700..171F; TAGALOG 3809 0x1720, // 1720..1734; HANUNOO 3810 0x1735, // 1735..173F; COMMON 3811 0x1740, // 1740..175F; BUHID 3812 0x1760, // 1760..177F; TAGBANWA 3813 0x1780, // 1780..17FF; KHMER 3814 0x1800, // 1800..1801; MONGOLIAN 3815 0x1802, // 1802..1803; COMMON 3816 0x1804, // 1804..1804; MONGOLIAN 3817 0x1805, // 1805..1805; COMMON 3818 0x1806, // 1806..18AF; MONGOLIAN 3819 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3820 0x1900, // 1900..194F; LIMBU 3821 0x1950, // 1950..197F; TAI_LE 3822 0x1980, // 1980..19DF; NEW_TAI_LUE 3823 0x19E0, // 19E0..19FF; KHMER 3824 0x1A00, // 1A00..1A1F; BUGINESE 3825 0x1A20, // 1A20..1AFF; TAI_THAM 3826 0x1B00, // 1B00..1B7F; BALINESE 3827 0x1B80, // 1B80..1BBF; SUNDANESE 3828 0x1BC0, // 1BC0..1BFF; BATAK 3829 0x1C00, // 1C00..1C4F; LEPCHA 3830 0x1C50, // 1C50..1CBF; OL_CHIKI 3831 0x1CC0, // 1CC0..1CCF; SUNDANESE 3832 0x1CD0, // 1CD0..1CD2; INHERITED 3833 0x1CD3, // 1CD3..1CD3; COMMON 3834 0x1CD4, // 1CD4..1CE0; INHERITED 3835 0x1CE1, // 1CE1..1CE1; COMMON 3836 0x1CE2, // 1CE2..1CE8; INHERITED 3837 0x1CE9, // 1CE9..1CEC; COMMON 3838 0x1CED, // 1CED..1CED; INHERITED 3839 0x1CEE, // 1CEE..1CF3; COMMON 3840 0x1CF4, // 1CF4..1CF4; INHERITED 3841 0x1CF5, // 1CF5..1CFF; COMMON 3842 0x1D00, // 1D00..1D25; LATIN 3843 0x1D26, // 1D26..1D2A; GREEK 3844 0x1D2B, // 1D2B..1D2B; CYRILLIC 3845 0x1D2C, // 1D2C..1D5C; LATIN 3846 0x1D5D, // 1D5D..1D61; GREEK 3847 0x1D62, // 1D62..1D65; LATIN 3848 0x1D66, // 1D66..1D6A; GREEK 3849 0x1D6B, // 1D6B..1D77; LATIN 3850 0x1D78, // 1D78..1D78; CYRILLIC 3851 0x1D79, // 1D79..1DBE; LATIN 3852 0x1DBF, // 1DBF..1DBF; GREEK 3853 0x1DC0, // 1DC0..1DFF; INHERITED 3854 0x1E00, // 1E00..1EFF; LATIN 3855 0x1F00, // 1F00..1FFF; GREEK 3856 0x2000, // 2000..200B; COMMON 3857 0x200C, // 200C..200D; INHERITED 3858 0x200E, // 200E..2070; COMMON 3859 0x2071, // 2071..2073; LATIN 3860 0x2074, // 2074..207E; COMMON 3861 0x207F, // 207F..207F; LATIN 3862 0x2080, // 2080..208F; COMMON 3863 0x2090, // 2090..209F; LATIN 3864 0x20A0, // 20A0..20CF; COMMON 3865 0x20D0, // 20D0..20FF; INHERITED 3866 0x2100, // 2100..2125; COMMON 3867 0x2126, // 2126..2126; GREEK 3868 0x2127, // 2127..2129; COMMON 3869 0x212A, // 212A..212B; LATIN 3870 0x212C, // 212C..2131; COMMON 3871 0x2132, // 2132..2132; LATIN 3872 0x2133, // 2133..214D; COMMON 3873 0x214E, // 214E..214E; LATIN 3874 0x214F, // 214F..215F; COMMON 3875 0x2160, // 2160..2188; LATIN 3876 0x2189, // 2189..27FF; COMMON 3877 0x2800, // 2800..28FF; BRAILLE 3878 0x2900, // 2900..2BFF; COMMON 3879 0x2C00, // 2C00..2C5F; GLAGOLITIC 3880 0x2C60, // 2C60..2C7F; LATIN 3881 0x2C80, // 2C80..2CFF; COPTIC 3882 0x2D00, // 2D00..2D2F; GEORGIAN 3883 0x2D30, // 2D30..2D7F; TIFINAGH 3884 0x2D80, // 2D80..2DDF; ETHIOPIC 3885 0x2DE0, // 2DE0..2DFF; CYRILLIC 3886 0x2E00, // 2E00..2E7F; COMMON 3887 0x2E80, // 2E80..2FEF; HAN 3888 0x2FF0, // 2FF0..3004; COMMON 3889 0x3005, // 3005..3005; HAN 3890 0x3006, // 3006..3006; COMMON 3891 0x3007, // 3007..3007; HAN 3892 0x3008, // 3008..3020; COMMON 3893 0x3021, // 3021..3029; HAN 3894 0x302A, // 302A..302D; INHERITED 3895 0x302E, // 302E..302F; HANGUL 3896 0x3030, // 3030..3037; COMMON 3897 0x3038, // 3038..303B; HAN 3898 0x303C, // 303C..3040; COMMON 3899 0x3041, // 3041..3098; HIRAGANA 3900 0x3099, // 3099..309A; INHERITED 3901 0x309B, // 309B..309C; COMMON 3902 0x309D, // 309D..309F; HIRAGANA 3903 0x30A0, // 30A0..30A0; COMMON 3904 0x30A1, // 30A1..30FA; KATAKANA 3905 0x30FB, // 30FB..30FC; COMMON 3906 0x30FD, // 30FD..3104; KATAKANA 3907 0x3105, // 3105..3130; BOPOMOFO 3908 0x3131, // 3131..318F; HANGUL 3909 0x3190, // 3190..319F; COMMON 3910 0x31A0, // 31A0..31BF; BOPOMOFO 3911 0x31C0, // 31C0..31EF; COMMON 3912 0x31F0, // 31F0..31FF; KATAKANA 3913 0x3200, // 3200..321F; HANGUL 3914 0x3220, // 3220..325F; COMMON 3915 0x3260, // 3260..327E; HANGUL 3916 0x327F, // 327F..32CF; COMMON 3917 0x32D0, // 32D0..3357; KATAKANA 3918 0x3358, // 3358..33FF; COMMON 3919 0x3400, // 3400..4DBF; HAN 3920 0x4DC0, // 4DC0..4DFF; COMMON 3921 0x4E00, // 4E00..9FFF; HAN 3922 0xA000, // A000..A4CF; YI 3923 0xA4D0, // A4D0..A4FF; LISU 3924 0xA500, // A500..A63F; VAI 3925 0xA640, // A640..A69F; CYRILLIC 3926 0xA6A0, // A6A0..A6FF; BAMUM 3927 0xA700, // A700..A721; COMMON 3928 0xA722, // A722..A787; LATIN 3929 0xA788, // A788..A78A; COMMON 3930 0xA78B, // A78B..A7FF; LATIN 3931 0xA800, // A800..A82F; SYLOTI_NAGRI 3932 0xA830, // A830..A83F; COMMON 3933 0xA840, // A840..A87F; PHAGS_PA 3934 0xA880, // A880..A8DF; SAURASHTRA 3935 0xA8E0, // A8E0..A8FF; DEVANAGARI 3936 0xA900, // A900..A92F; KAYAH_LI 3937 0xA930, // A930..A95F; REJANG 3938 0xA960, // A960..A97F; HANGUL 3939 0xA980, // A980..A9FF; JAVANESE 3940 0xAA00, // AA00..AA5F; CHAM 3941 0xAA60, // AA60..AA7F; MYANMAR 3942 0xAA80, // AA80..AADF; TAI_VIET 3943 0xAAE0, // AAE0..AB00; MEETEI_MAYEK 3944 0xAB01, // AB01..ABBF; ETHIOPIC 3945 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3946 0xAC00, // AC00..D7FB; HANGUL 3947 0xD7FC, // D7FC..F8FF; UNKNOWN 3948 0xF900, // F900..FAFF; HAN 3949 0xFB00, // FB00..FB12; LATIN 3950 0xFB13, // FB13..FB1C; ARMENIAN 3951 0xFB1D, // FB1D..FB4F; HEBREW 3952 0xFB50, // FB50..FD3D; ARABIC 3953 0xFD3E, // FD3E..FD4F; COMMON 3954 0xFD50, // FD50..FDFC; ARABIC 3955 0xFDFD, // FDFD..FDFF; COMMON 3956 0xFE00, // FE00..FE0F; INHERITED 3957 0xFE10, // FE10..FE1F; COMMON 3958 0xFE20, // FE20..FE2F; INHERITED 3959 0xFE30, // FE30..FE6F; COMMON 3960 0xFE70, // FE70..FEFE; ARABIC 3961 0xFEFF, // FEFF..FF20; COMMON 3962 0xFF21, // FF21..FF3A; LATIN 3963 0xFF3B, // FF3B..FF40; COMMON 3964 0xFF41, // FF41..FF5A; LATIN 3965 0xFF5B, // FF5B..FF65; COMMON 3966 0xFF66, // FF66..FF6F; KATAKANA 3967 0xFF70, // FF70..FF70; COMMON 3968 0xFF71, // FF71..FF9D; KATAKANA 3969 0xFF9E, // FF9E..FF9F; COMMON 3970 0xFFA0, // FFA0..FFDF; HANGUL 3971 0xFFE0, // FFE0..FFFF; COMMON 3972 0x10000, // 10000..100FF; LINEAR_B 3973 0x10100, // 10100..1013F; COMMON 3974 0x10140, // 10140..1018F; GREEK 3975 0x10190, // 10190..101FC; COMMON 3976 0x101FD, // 101FD..1027F; INHERITED 3977 0x10280, // 10280..1029F; LYCIAN 3978 0x102A0, // 102A0..102FF; CARIAN 3979 0x10300, // 10300..1032F; OLD_ITALIC 3980 0x10330, // 10330..1037F; GOTHIC 3981 0x10380, // 10380..1039F; UGARITIC 3982 0x103A0, // 103A0..103FF; OLD_PERSIAN 3983 0x10400, // 10400..1044F; DESERET 3984 0x10450, // 10450..1047F; SHAVIAN 3985 0x10480, // 10480..107FF; OSMANYA 3986 0x10800, // 10800..1083F; CYPRIOT 3987 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3988 0x10900, // 10900..1091F; PHOENICIAN 3989 0x10920, // 10920..1097F; LYDIAN 3990 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 3991 0x109A0, // 109A0..109FF; MEROITIC_CURSIVE 3992 0x10A00, // 10A00..10A5F; KHAROSHTHI 3993 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3994 0x10B00, // 10B00..10B3F; AVESTAN 3995 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 3996 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 3997 0x10C00, // 10C00..10E5F; OLD_TURKIC 3998 0x10E60, // 10E60..10FFF; ARABIC 3999 0x11000, // 11000..1107F; BRAHMI 4000 0x11080, // 11080..110CF; KAITHI 4001 0x110D0, // 110D0..110FF; SORA_SOMPENG 4002 0x11100, // 11100..1117F; CHAKMA 4003 0x11180, // 11180..1167F; SHARADA 4004 0x11680, // 11680..116CF; TAKRI 4005 0x12000, // 12000..12FFF; CUNEIFORM 4006 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 4007 0x16800, // 16800..16A38; BAMUM 4008 0x16F00, // 16F00..16F9F; MIAO 4009 0x1B000, // 1B000..1B000; KATAKANA 4010 0x1B001, // 1B001..1CFFF; HIRAGANA 4011 0x1D000, // 1D000..1D166; COMMON 4012 0x1D167, // 1D167..1D169; INHERITED 4013 0x1D16A, // 1D16A..1D17A; COMMON 4014 0x1D17B, // 1D17B..1D182; INHERITED 4015 0x1D183, // 1D183..1D184; COMMON 4016 0x1D185, // 1D185..1D18B; INHERITED 4017 0x1D18C, // 1D18C..1D1A9; COMMON 4018 0x1D1AA, // 1D1AA..1D1AD; INHERITED 4019 0x1D1AE, // 1D1AE..1D1FF; COMMON 4020 0x1D200, // 1D200..1D2FF; GREEK 4021 0x1D300, // 1D300..1EDFF; COMMON 4022 0x1EE00, // 1EE00..1EFFF; ARABIC 4023 0x1F000, // 1F000..1F1FF; COMMON 4024 0x1F200, // 1F200..1F200; HIRAGANA 4025 0x1F201, // 1F210..1FFFF; COMMON 4026 0x20000, // 20000..E0000; HAN 4027 0xE0001, // E0001..E00FF; COMMON 4028 0xE0100, // E0100..E01EF; INHERITED 4029 0xE01F0 // E01F0..10FFFF; UNKNOWN 4030 4031 }; 4032 4033 private static final UnicodeScript[] scripts = { 4034 COMMON, 4035 LATIN, 4036 COMMON, 4037 LATIN, 4038 COMMON, 4039 LATIN, 4040 COMMON, 4041 LATIN, 4042 COMMON, 4043 LATIN, 4044 COMMON, 4045 LATIN, 4046 COMMON, 4047 LATIN, 4048 COMMON, 4049 LATIN, 4050 COMMON, 4051 BOPOMOFO, 4052 COMMON, 4053 INHERITED, 4054 GREEK, 4055 COMMON, 4056 GREEK, 4057 COMMON, 4058 GREEK, 4059 COMMON, 4060 GREEK, 4061 COMMON, 4062 GREEK, 4063 COPTIC, 4064 GREEK, 4065 CYRILLIC, 4066 INHERITED, 4067 CYRILLIC, 4068 ARMENIAN, 4069 COMMON, 4070 ARMENIAN, 4071 HEBREW, 4072 ARABIC, 4073 COMMON, 4074 ARABIC, 4075 COMMON, 4076 ARABIC, 4077 COMMON, 4078 ARABIC, 4079 COMMON, 4080 ARABIC, 4081 INHERITED, 4082 ARABIC, 4083 COMMON, 4084 ARABIC, 4085 INHERITED, 4086 ARABIC, 4087 COMMON, 4088 ARABIC, 4089 SYRIAC, 4090 ARABIC, 4091 THAANA, 4092 NKO, 4093 SAMARITAN, 4094 MANDAIC, 4095 ARABIC, 4096 DEVANAGARI, 4097 INHERITED, 4098 DEVANAGARI, 4099 COMMON, 4100 DEVANAGARI, 4101 BENGALI, 4102 GURMUKHI, 4103 GUJARATI, 4104 ORIYA, 4105 TAMIL, 4106 TELUGU, 4107 KANNADA, 4108 MALAYALAM, 4109 SINHALA, 4110 THAI, 4111 COMMON, 4112 THAI, 4113 LAO, 4114 TIBETAN, 4115 COMMON, 4116 TIBETAN, 4117 MYANMAR, 4118 GEORGIAN, 4119 COMMON, 4120 GEORGIAN, 4121 HANGUL, 4122 ETHIOPIC, 4123 CHEROKEE, 4124 CANADIAN_ABORIGINAL, 4125 OGHAM, 4126 RUNIC, 4127 COMMON, 4128 RUNIC, 4129 TAGALOG, 4130 HANUNOO, 4131 COMMON, 4132 BUHID, 4133 TAGBANWA, 4134 KHMER, 4135 MONGOLIAN, 4136 COMMON, 4137 MONGOLIAN, 4138 COMMON, 4139 MONGOLIAN, 4140 CANADIAN_ABORIGINAL, 4141 LIMBU, 4142 TAI_LE, 4143 NEW_TAI_LUE, 4144 KHMER, 4145 BUGINESE, 4146 TAI_THAM, 4147 BALINESE, 4148 SUNDANESE, 4149 BATAK, 4150 LEPCHA, 4151 OL_CHIKI, 4152 SUNDANESE, 4153 INHERITED, 4154 COMMON, 4155 INHERITED, 4156 COMMON, 4157 INHERITED, 4158 COMMON, 4159 INHERITED, 4160 COMMON, 4161 INHERITED, 4162 COMMON, 4163 LATIN, 4164 GREEK, 4165 CYRILLIC, 4166 LATIN, 4167 GREEK, 4168 LATIN, 4169 GREEK, 4170 LATIN, 4171 CYRILLIC, 4172 LATIN, 4173 GREEK, 4174 INHERITED, 4175 LATIN, 4176 GREEK, 4177 COMMON, 4178 INHERITED, 4179 COMMON, 4180 LATIN, 4181 COMMON, 4182 LATIN, 4183 COMMON, 4184 LATIN, 4185 COMMON, 4186 INHERITED, 4187 COMMON, 4188 GREEK, 4189 COMMON, 4190 LATIN, 4191 COMMON, 4192 LATIN, 4193 COMMON, 4194 LATIN, 4195 COMMON, 4196 LATIN, 4197 COMMON, 4198 BRAILLE, 4199 COMMON, 4200 GLAGOLITIC, 4201 LATIN, 4202 COPTIC, 4203 GEORGIAN, 4204 TIFINAGH, 4205 ETHIOPIC, 4206 CYRILLIC, 4207 COMMON, 4208 HAN, 4209 COMMON, 4210 HAN, 4211 COMMON, 4212 HAN, 4213 COMMON, 4214 HAN, 4215 INHERITED, 4216 HANGUL, 4217 COMMON, 4218 HAN, 4219 COMMON, 4220 HIRAGANA, 4221 INHERITED, 4222 COMMON, 4223 HIRAGANA, 4224 COMMON, 4225 KATAKANA, 4226 COMMON, 4227 KATAKANA, 4228 BOPOMOFO, 4229 HANGUL, 4230 COMMON, 4231 BOPOMOFO, 4232 COMMON, 4233 KATAKANA, 4234 HANGUL, 4235 COMMON, 4236 HANGUL, 4237 COMMON, 4238 KATAKANA, 4239 COMMON, 4240 HAN, 4241 COMMON, 4242 HAN, 4243 YI, 4244 LISU, 4245 VAI, 4246 CYRILLIC, 4247 BAMUM, 4248 COMMON, 4249 LATIN, 4250 COMMON, 4251 LATIN, 4252 SYLOTI_NAGRI, 4253 COMMON, 4254 PHAGS_PA, 4255 SAURASHTRA, 4256 DEVANAGARI, 4257 KAYAH_LI, 4258 REJANG, 4259 HANGUL, 4260 JAVANESE, 4261 CHAM, 4262 MYANMAR, 4263 TAI_VIET, 4264 MEETEI_MAYEK, 4265 ETHIOPIC, 4266 MEETEI_MAYEK, 4267 HANGUL, 4268 UNKNOWN , 4269 HAN, 4270 LATIN, 4271 ARMENIAN, 4272 HEBREW, 4273 ARABIC, 4274 COMMON, 4275 ARABIC, 4276 COMMON, 4277 INHERITED, 4278 COMMON, 4279 INHERITED, 4280 COMMON, 4281 ARABIC, 4282 COMMON, 4283 LATIN, 4284 COMMON, 4285 LATIN, 4286 COMMON, 4287 KATAKANA, 4288 COMMON, 4289 KATAKANA, 4290 COMMON, 4291 HANGUL, 4292 COMMON, 4293 LINEAR_B, 4294 COMMON, 4295 GREEK, 4296 COMMON, 4297 INHERITED, 4298 LYCIAN, 4299 CARIAN, 4300 OLD_ITALIC, 4301 GOTHIC, 4302 UGARITIC, 4303 OLD_PERSIAN, 4304 DESERET, 4305 SHAVIAN, 4306 OSMANYA, 4307 CYPRIOT, 4308 IMPERIAL_ARAMAIC, 4309 PHOENICIAN, 4310 LYDIAN, 4311 MEROITIC_HIEROGLYPHS, 4312 MEROITIC_CURSIVE, 4313 KHAROSHTHI, 4314 OLD_SOUTH_ARABIAN, 4315 AVESTAN, 4316 INSCRIPTIONAL_PARTHIAN, 4317 INSCRIPTIONAL_PAHLAVI, 4318 OLD_TURKIC, 4319 ARABIC, 4320 BRAHMI, 4321 KAITHI, 4322 SORA_SOMPENG, 4323 CHAKMA, 4324 SHARADA, 4325 TAKRI, 4326 CUNEIFORM, 4327 EGYPTIAN_HIEROGLYPHS, 4328 BAMUM, 4329 MIAO, 4330 KATAKANA, 4331 HIRAGANA, 4332 COMMON, 4333 INHERITED, 4334 COMMON, 4335 INHERITED, 4336 COMMON, 4337 INHERITED, 4338 COMMON, 4339 INHERITED, 4340 COMMON, 4341 GREEK, 4342 COMMON, 4343 ARABIC, 4344 COMMON, 4345 HIRAGANA, 4346 COMMON, 4347 HAN, 4348 COMMON, 4349 INHERITED, 4350 UNKNOWN 4351 }; 4352 4353 private static HashMap<String, Character.UnicodeScript> aliases; 4354 static { 4355 aliases = new HashMap<>(128); 4356 aliases.put("ARAB", ARABIC); 4357 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4358 aliases.put("ARMN", ARMENIAN); 4359 aliases.put("AVST", AVESTAN); 4360 aliases.put("BALI", BALINESE); 4361 aliases.put("BAMU", BAMUM); 4362 aliases.put("BATK", BATAK); 4363 aliases.put("BENG", BENGALI); 4364 aliases.put("BOPO", BOPOMOFO); 4365 aliases.put("BRAI", BRAILLE); 4366 aliases.put("BRAH", BRAHMI); 4367 aliases.put("BUGI", BUGINESE); 4368 aliases.put("BUHD", BUHID); 4369 aliases.put("CAKM", CHAKMA); 4370 aliases.put("CANS", CANADIAN_ABORIGINAL); 4371 aliases.put("CARI", CARIAN); 4372 aliases.put("CHAM", CHAM); 4373 aliases.put("CHER", CHEROKEE); 4374 aliases.put("COPT", COPTIC); 4375 aliases.put("CPRT", CYPRIOT); 4376 aliases.put("CYRL", CYRILLIC); 4377 aliases.put("DEVA", DEVANAGARI); 4378 aliases.put("DSRT", DESERET); 4379 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4380 aliases.put("ETHI", ETHIOPIC); 4381 aliases.put("GEOR", GEORGIAN); 4382 aliases.put("GLAG", GLAGOLITIC); 4383 aliases.put("GOTH", GOTHIC); 4384 aliases.put("GREK", GREEK); 4385 aliases.put("GUJR", GUJARATI); 4386 aliases.put("GURU", GURMUKHI); 4387 aliases.put("HANG", HANGUL); 4388 aliases.put("HANI", HAN); 4389 aliases.put("HANO", HANUNOO); 4390 aliases.put("HEBR", HEBREW); 4391 aliases.put("HIRA", HIRAGANA); 4392 // it appears we don't have the KATAKANA_OR_HIRAGANA 4393 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4394 aliases.put("ITAL", OLD_ITALIC); 4395 aliases.put("JAVA", JAVANESE); 4396 aliases.put("KALI", KAYAH_LI); 4397 aliases.put("KANA", KATAKANA); 4398 aliases.put("KHAR", KHAROSHTHI); 4399 aliases.put("KHMR", KHMER); 4400 aliases.put("KNDA", KANNADA); 4401 aliases.put("KTHI", KAITHI); 4402 aliases.put("LANA", TAI_THAM); 4403 aliases.put("LAOO", LAO); 4404 aliases.put("LATN", LATIN); 4405 aliases.put("LEPC", LEPCHA); 4406 aliases.put("LIMB", LIMBU); 4407 aliases.put("LINB", LINEAR_B); 4408 aliases.put("LISU", LISU); 4409 aliases.put("LYCI", LYCIAN); 4410 aliases.put("LYDI", LYDIAN); 4411 aliases.put("MAND", MANDAIC); 4412 aliases.put("MERC", MEROITIC_CURSIVE); 4413 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 4414 aliases.put("MLYM", MALAYALAM); 4415 aliases.put("MONG", MONGOLIAN); 4416 aliases.put("MTEI", MEETEI_MAYEK); 4417 aliases.put("MYMR", MYANMAR); 4418 aliases.put("NKOO", NKO); 4419 aliases.put("OGAM", OGHAM); 4420 aliases.put("OLCK", OL_CHIKI); 4421 aliases.put("ORKH", OLD_TURKIC); 4422 aliases.put("ORYA", ORIYA); 4423 aliases.put("OSMA", OSMANYA); 4424 aliases.put("PHAG", PHAGS_PA); 4425 aliases.put("PLRD", MIAO); 4426 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4427 aliases.put("PHNX", PHOENICIAN); 4428 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4429 aliases.put("RJNG", REJANG); 4430 aliases.put("RUNR", RUNIC); 4431 aliases.put("SAMR", SAMARITAN); 4432 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4433 aliases.put("SAUR", SAURASHTRA); 4434 aliases.put("SHAW", SHAVIAN); 4435 aliases.put("SHRD", SHARADA); 4436 aliases.put("SINH", SINHALA); 4437 aliases.put("SORA", SORA_SOMPENG); 4438 aliases.put("SUND", SUNDANESE); 4439 aliases.put("SYLO", SYLOTI_NAGRI); 4440 aliases.put("SYRC", SYRIAC); 4441 aliases.put("TAGB", TAGBANWA); 4442 aliases.put("TALE", TAI_LE); 4443 aliases.put("TAKR", TAKRI); 4444 aliases.put("TALU", NEW_TAI_LUE); 4445 aliases.put("TAML", TAMIL); 4446 aliases.put("TAVT", TAI_VIET); 4447 aliases.put("TELU", TELUGU); 4448 aliases.put("TFNG", TIFINAGH); 4449 aliases.put("TGLG", TAGALOG); 4450 aliases.put("THAA", THAANA); 4451 aliases.put("THAI", THAI); 4452 aliases.put("TIBT", TIBETAN); 4453 aliases.put("UGAR", UGARITIC); 4454 aliases.put("VAII", VAI); 4455 aliases.put("XPEO", OLD_PERSIAN); 4456 aliases.put("XSUX", CUNEIFORM); 4457 aliases.put("YIII", YI); 4458 aliases.put("ZINH", INHERITED); 4459 aliases.put("ZYYY", COMMON); 4460 aliases.put("ZZZZ", UNKNOWN); 4461 } 4462 4463 /** 4464 * Returns the enum constant representing the Unicode script of which 4465 * the given character (Unicode code point) is assigned to. 4466 * 4467 * @param codePoint the character (Unicode code point) in question. 4468 * @return The {@code UnicodeScript} constant representing the 4469 * Unicode script of which this character is assigned to. 4470 * 4471 * @exception IllegalArgumentException if the specified 4472 * {@code codePoint} is an invalid Unicode code point. 4473 * @see Character#isValidCodePoint(int) 4474 * 4475 */ 4476 public static UnicodeScript of(int codePoint) { 4477 if (!isValidCodePoint(codePoint)) 4478 throw new IllegalArgumentException(); 4479 int type = getType(codePoint); 4480 // leave SURROGATE and PRIVATE_USE for table lookup 4481 if (type == UNASSIGNED) 4482 return UNKNOWN; 4483 int index = Arrays.binarySearch(scriptStarts, codePoint); 4484 if (index < 0) 4485 index = -index - 2; 4486 return scripts[index]; 4487 } 4488 4489 /** 4490 * Returns the UnicodeScript constant with the given Unicode script 4491 * name or the script name alias. Script names and their aliases are 4492 * determined by The Unicode Standard. The files Scripts<version>.txt 4493 * and PropertyValueAliases<version>.txt define script names 4494 * and the script name aliases for a particular version of the 4495 * standard. The {@link Character} class specifies the version of 4496 * the standard that it supports. 4497 * <p> 4498 * Character case is ignored for all of the valid script names. 4499 * The en_US locale's case mapping rules are used to provide 4500 * case-insensitive string comparisons for script name validation. 4501 * 4502 * @param scriptName A {@code UnicodeScript} name. 4503 * @return The {@code UnicodeScript} constant identified 4504 * by {@code scriptName} 4505 * @throws IllegalArgumentException if {@code scriptName} is an 4506 * invalid name 4507 * @throws NullPointerException if {@code scriptName} is null 4508 */ 4509 public static final UnicodeScript forName(String scriptName) { 4510 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4511 //.replace(' ', '_')); 4512 UnicodeScript sc = aliases.get(scriptName); 4513 if (sc != null) 4514 return sc; 4515 return valueOf(scriptName); 4516 } 4517 } 4518 4519 /** 4520 * The value of the {@code Character}. 4521 * 4522 * @serial 4523 */ 4524 private final char value; 4525 4526 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4527 private static final long serialVersionUID = 3786198910865385080L; 4528 4529 /** 4530 * Constructs a newly allocated {@code Character} object that 4531 * represents the specified {@code char} value. 4532 * 4533 * @param value the value to be represented by the 4534 * {@code Character} object. 4535 */ 4536 public Character(char value) { 4537 this.value = value; 4538 } 4539 4540 private static class CharacterCache { 4541 private CharacterCache(){} 4542 4543 static final Character cache[] = new Character[127 + 1]; 4544 4545 static { 4546 for (int i = 0; i < cache.length; i++) 4547 cache[i] = new Character((char)i); 4548 } 4549 } 4550 4551 /** 4552 * Returns a <tt>Character</tt> instance representing the specified 4553 * <tt>char</tt> value. 4554 * If a new <tt>Character</tt> instance is not required, this method 4555 * should generally be used in preference to the constructor 4556 * {@link #Character(char)}, as this method is likely to yield 4557 * significantly better space and time performance by caching 4558 * frequently requested values. 4559 * 4560 * This method will always cache values in the range {@code 4561 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 4562 * cache other values outside of this range. 4563 * 4564 * @param c a char value. 4565 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4566 * @since 1.5 4567 */ 4568 public static Character valueOf(char c) { 4569 if (c <= 127) { // must cache 4570 return CharacterCache.cache[(int)c]; 4571 } 4572 return new Character(c); 4573 } 4574 4575 /** 4576 * Returns the value of this {@code Character} object. 4577 * @return the primitive {@code char} value represented by 4578 * this object. 4579 */ 4580 public char charValue() { 4581 return value; 4582 } 4583 4584 /** 4585 * Returns a hash code for this {@code Character}; equal to the result 4586 * of invoking {@code charValue()}. 4587 * 4588 * @return a hash code value for this {@code Character} 4589 */ 4590 @Override 4591 public int hashCode() { 4592 return Character.hashCode(value); 4593 } 4594 4595 /** 4596 * Returns a hash code for a {@code char} value; compatible with 4597 * {@code Character.hashCode()}. 4598 * 4599 * @since 1.8 4600 * 4601 * @param value The {@code char} for which to return a hash code. 4602 * @return a hash code value for a {@code char} value. 4603 */ 4604 public static int hashCode(char value) { 4605 return (int)value; 4606 } 4607 4608 /** 4609 * Compares this object against the specified object. 4610 * The result is {@code true} if and only if the argument is not 4611 * {@code null} and is a {@code Character} object that 4612 * represents the same {@code char} value as this object. 4613 * 4614 * @param obj the object to compare with. 4615 * @return {@code true} if the objects are the same; 4616 * {@code false} otherwise. 4617 */ 4618 public boolean equals(Object obj) { 4619 if (obj instanceof Character) { 4620 return value == ((Character)obj).charValue(); 4621 } 4622 return false; 4623 } 4624 4625 /** 4626 * Returns a {@code String} object representing this 4627 * {@code Character}'s value. The result is a string of 4628 * length 1 whose sole component is the primitive 4629 * {@code char} value represented by this 4630 * {@code Character} object. 4631 * 4632 * @return a string representation of this object. 4633 */ 4634 public String toString() { 4635 char buf[] = {value}; 4636 return String.valueOf(buf); 4637 } 4638 4639 /** 4640 * Returns a {@code String} object representing the 4641 * specified {@code char}. The result is a string of length 4642 * 1 consisting solely of the specified {@code char}. 4643 * 4644 * @param c the {@code char} to be converted 4645 * @return the string representation of the specified {@code char} 4646 * @since 1.4 4647 */ 4648 public static String toString(char c) { 4649 return String.valueOf(c); 4650 } 4651 4652 /** 4653 * Determines whether the specified code point is a valid 4654 * <a href="http://www.unicode.org/glossary/#code_point"> 4655 * Unicode code point value</a>. 4656 * 4657 * @param codePoint the Unicode code point to be tested 4658 * @return {@code true} if the specified code point value is between 4659 * {@link #MIN_CODE_POINT} and 4660 * {@link #MAX_CODE_POINT} inclusive; 4661 * {@code false} otherwise. 4662 * @since 1.5 4663 */ 4664 public static boolean isValidCodePoint(int codePoint) { 4665 // Optimized form of: 4666 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4667 int plane = codePoint >>> 16; 4668 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4669 } 4670 4671 /** 4672 * Determines whether the specified character (Unicode code point) 4673 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4674 * Such code points can be represented using a single {@code char}. 4675 * 4676 * @param codePoint the character (Unicode code point) to be tested 4677 * @return {@code true} if the specified code point is between 4678 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4679 * {@code false} otherwise. 4680 * @since 1.7 4681 */ 4682 public static boolean isBmpCodePoint(int codePoint) { 4683 return codePoint >>> 16 == 0; 4684 // Optimized form of: 4685 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4686 // We consistently use logical shift (>>>) to facilitate 4687 // additional runtime optimizations. 4688 } 4689 4690 /** 4691 * Determines whether the specified character (Unicode code point) 4692 * is in the <a href="#supplementary">supplementary character</a> range. 4693 * 4694 * @param codePoint the character (Unicode code point) to be tested 4695 * @return {@code true} if the specified code point is between 4696 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4697 * {@link #MAX_CODE_POINT} inclusive; 4698 * {@code false} otherwise. 4699 * @since 1.5 4700 */ 4701 public static boolean isSupplementaryCodePoint(int codePoint) { 4702 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4703 && codePoint < MAX_CODE_POINT + 1; 4704 } 4705 4706 /** 4707 * Determines if the given {@code char} value is a 4708 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4709 * Unicode high-surrogate code unit</a> 4710 * (also known as <i>leading-surrogate code unit</i>). 4711 * 4712 * <p>Such values do not represent characters by themselves, 4713 * but are used in the representation of 4714 * <a href="#supplementary">supplementary characters</a> 4715 * in the UTF-16 encoding. 4716 * 4717 * @param ch the {@code char} value to be tested. 4718 * @return {@code true} if the {@code char} value is between 4719 * {@link #MIN_HIGH_SURROGATE} and 4720 * {@link #MAX_HIGH_SURROGATE} inclusive; 4721 * {@code false} otherwise. 4722 * @see Character#isLowSurrogate(char) 4723 * @see Character.UnicodeBlock#of(int) 4724 * @since 1.5 4725 */ 4726 public static boolean isHighSurrogate(char ch) { 4727 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4728 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4729 } 4730 4731 /** 4732 * Determines if the given {@code char} value is a 4733 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4734 * Unicode low-surrogate code unit</a> 4735 * (also known as <i>trailing-surrogate code unit</i>). 4736 * 4737 * <p>Such values do not represent characters by themselves, 4738 * but are used in the representation of 4739 * <a href="#supplementary">supplementary characters</a> 4740 * in the UTF-16 encoding. 4741 * 4742 * @param ch the {@code char} value to be tested. 4743 * @return {@code true} if the {@code char} value is between 4744 * {@link #MIN_LOW_SURROGATE} and 4745 * {@link #MAX_LOW_SURROGATE} inclusive; 4746 * {@code false} otherwise. 4747 * @see Character#isHighSurrogate(char) 4748 * @since 1.5 4749 */ 4750 public static boolean isLowSurrogate(char ch) { 4751 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4752 } 4753 4754 /** 4755 * Determines if the given {@code char} value is a Unicode 4756 * <i>surrogate code unit</i>. 4757 * 4758 * <p>Such values do not represent characters by themselves, 4759 * but are used in the representation of 4760 * <a href="#supplementary">supplementary characters</a> 4761 * in the UTF-16 encoding. 4762 * 4763 * <p>A char value is a surrogate code unit if and only if it is either 4764 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4765 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4766 * 4767 * @param ch the {@code char} value to be tested. 4768 * @return {@code true} if the {@code char} value is between 4769 * {@link #MIN_SURROGATE} and 4770 * {@link #MAX_SURROGATE} inclusive; 4771 * {@code false} otherwise. 4772 * @since 1.7 4773 */ 4774 public static boolean isSurrogate(char ch) { 4775 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4776 } 4777 4778 /** 4779 * Determines whether the specified pair of {@code char} 4780 * values is a valid 4781 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4782 * Unicode surrogate pair</a>. 4783 4784 * <p>This method is equivalent to the expression: 4785 * <blockquote><pre>{@code 4786 * isHighSurrogate(high) && isLowSurrogate(low) 4787 * }</pre></blockquote> 4788 * 4789 * @param high the high-surrogate code value to be tested 4790 * @param low the low-surrogate code value to be tested 4791 * @return {@code true} if the specified high and 4792 * low-surrogate code values represent a valid surrogate pair; 4793 * {@code false} otherwise. 4794 * @since 1.5 4795 */ 4796 public static boolean isSurrogatePair(char high, char low) { 4797 return isHighSurrogate(high) && isLowSurrogate(low); 4798 } 4799 4800 /** 4801 * Determines the number of {@code char} values needed to 4802 * represent the specified character (Unicode code point). If the 4803 * specified character is equal to or greater than 0x10000, then 4804 * the method returns 2. Otherwise, the method returns 1. 4805 * 4806 * <p>This method doesn't validate the specified character to be a 4807 * valid Unicode code point. The caller must validate the 4808 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4809 * if necessary. 4810 * 4811 * @param codePoint the character (Unicode code point) to be tested. 4812 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4813 * @see Character#isSupplementaryCodePoint(int) 4814 * @since 1.5 4815 */ 4816 public static int charCount(int codePoint) { 4817 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4818 } 4819 4820 /** 4821 * Converts the specified surrogate pair to its supplementary code 4822 * point value. This method does not validate the specified 4823 * surrogate pair. The caller must validate it using {@link 4824 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4825 * 4826 * @param high the high-surrogate code unit 4827 * @param low the low-surrogate code unit 4828 * @return the supplementary code point composed from the 4829 * specified surrogate pair. 4830 * @since 1.5 4831 */ 4832 public static int toCodePoint(char high, char low) { 4833 // Optimized form of: 4834 // return ((high - MIN_HIGH_SURROGATE) << 10) 4835 // + (low - MIN_LOW_SURROGATE) 4836 // + MIN_SUPPLEMENTARY_CODE_POINT; 4837 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4838 - (MIN_HIGH_SURROGATE << 10) 4839 - MIN_LOW_SURROGATE); 4840 } 4841 4842 /** 4843 * Returns the code point at the given index of the 4844 * {@code CharSequence}. If the {@code char} value at 4845 * the given index in the {@code CharSequence} is in the 4846 * high-surrogate range, the following index is less than the 4847 * length of the {@code CharSequence}, and the 4848 * {@code char} value at the following index is in the 4849 * low-surrogate range, then the supplementary code point 4850 * corresponding to this surrogate pair is returned. Otherwise, 4851 * the {@code char} value at the given index is returned. 4852 * 4853 * @param seq a sequence of {@code char} values (Unicode code 4854 * units) 4855 * @param index the index to the {@code char} values (Unicode 4856 * code units) in {@code seq} to be converted 4857 * @return the Unicode code point at the given index 4858 * @exception NullPointerException if {@code seq} is null. 4859 * @exception IndexOutOfBoundsException if the value 4860 * {@code index} is negative or not less than 4861 * {@link CharSequence#length() seq.length()}. 4862 * @since 1.5 4863 */ 4864 public static int codePointAt(CharSequence seq, int index) { 4865 char c1 = seq.charAt(index); 4866 if (isHighSurrogate(c1) && ++index < seq.length()) { 4867 char c2 = seq.charAt(index); 4868 if (isLowSurrogate(c2)) { 4869 return toCodePoint(c1, c2); 4870 } 4871 } 4872 return c1; 4873 } 4874 4875 /** 4876 * Returns the code point at the given index of the 4877 * {@code char} array. If the {@code char} value at 4878 * the given index in the {@code char} array is in the 4879 * high-surrogate range, the following index is less than the 4880 * length of the {@code char} array, and the 4881 * {@code char} value at the following index is in the 4882 * low-surrogate range, then the supplementary code point 4883 * corresponding to this surrogate pair is returned. Otherwise, 4884 * the {@code char} value at the given index is returned. 4885 * 4886 * @param a the {@code char} array 4887 * @param index the index to the {@code char} values (Unicode 4888 * code units) in the {@code char} array to be converted 4889 * @return the Unicode code point at the given index 4890 * @exception NullPointerException if {@code a} is null. 4891 * @exception IndexOutOfBoundsException if the value 4892 * {@code index} is negative or not less than 4893 * the length of the {@code char} array. 4894 * @since 1.5 4895 */ 4896 public static int codePointAt(char[] a, int index) { 4897 return codePointAtImpl(a, index, a.length); 4898 } 4899 4900 /** 4901 * Returns the code point at the given index of the 4902 * {@code char} array, where only array elements with 4903 * {@code index} less than {@code limit} can be used. If 4904 * the {@code char} value at the given index in the 4905 * {@code char} array is in the high-surrogate range, the 4906 * following index is less than the {@code limit}, and the 4907 * {@code char} value at the following index is in the 4908 * low-surrogate range, then the supplementary code point 4909 * corresponding to this surrogate pair is returned. Otherwise, 4910 * the {@code char} value at the given index is returned. 4911 * 4912 * @param a the {@code char} array 4913 * @param index the index to the {@code char} values (Unicode 4914 * code units) in the {@code char} array to be converted 4915 * @param limit the index after the last array element that 4916 * can be used in the {@code char} array 4917 * @return the Unicode code point at the given index 4918 * @exception NullPointerException if {@code a} is null. 4919 * @exception IndexOutOfBoundsException if the {@code index} 4920 * argument is negative or not less than the {@code limit} 4921 * argument, or if the {@code limit} argument is negative or 4922 * greater than the length of the {@code char} array. 4923 * @since 1.5 4924 */ 4925 public static int codePointAt(char[] a, int index, int limit) { 4926 if (index >= limit || limit < 0 || limit > a.length) { 4927 throw new IndexOutOfBoundsException(); 4928 } 4929 return codePointAtImpl(a, index, limit); 4930 } 4931 4932 // throws ArrayIndexOutOfBoundsException if index out of bounds 4933 static int codePointAtImpl(char[] a, int index, int limit) { 4934 char c1 = a[index]; 4935 if (isHighSurrogate(c1) && ++index < limit) { 4936 char c2 = a[index]; 4937 if (isLowSurrogate(c2)) { 4938 return toCodePoint(c1, c2); 4939 } 4940 } 4941 return c1; 4942 } 4943 4944 /** 4945 * Returns the code point preceding the given index of the 4946 * {@code CharSequence}. If the {@code char} value at 4947 * {@code (index - 1)} in the {@code CharSequence} is in 4948 * the low-surrogate range, {@code (index - 2)} is not 4949 * negative, and the {@code char} value at {@code (index - 2)} 4950 * in the {@code CharSequence} is in the 4951 * high-surrogate range, then the supplementary code point 4952 * corresponding to this surrogate pair is returned. Otherwise, 4953 * the {@code char} value at {@code (index - 1)} is 4954 * returned. 4955 * 4956 * @param seq the {@code CharSequence} instance 4957 * @param index the index following the code point that should be returned 4958 * @return the Unicode code point value before the given index. 4959 * @exception NullPointerException if {@code seq} is null. 4960 * @exception IndexOutOfBoundsException if the {@code index} 4961 * argument is less than 1 or greater than {@link 4962 * CharSequence#length() seq.length()}. 4963 * @since 1.5 4964 */ 4965 public static int codePointBefore(CharSequence seq, int index) { 4966 char c2 = seq.charAt(--index); 4967 if (isLowSurrogate(c2) && index > 0) { 4968 char c1 = seq.charAt(--index); 4969 if (isHighSurrogate(c1)) { 4970 return toCodePoint(c1, c2); 4971 } 4972 } 4973 return c2; 4974 } 4975 4976 /** 4977 * Returns the code point preceding the given index of the 4978 * {@code char} array. If the {@code char} value at 4979 * {@code (index - 1)} in the {@code char} array is in 4980 * the low-surrogate range, {@code (index - 2)} is not 4981 * negative, and the {@code char} value at {@code (index - 2)} 4982 * in the {@code char} array is in the 4983 * high-surrogate range, then the supplementary code point 4984 * corresponding to this surrogate pair is returned. Otherwise, 4985 * the {@code char} value at {@code (index - 1)} is 4986 * returned. 4987 * 4988 * @param a the {@code char} array 4989 * @param index the index following the code point that should be returned 4990 * @return the Unicode code point value before the given index. 4991 * @exception NullPointerException if {@code a} is null. 4992 * @exception IndexOutOfBoundsException if the {@code index} 4993 * argument is less than 1 or greater than the length of the 4994 * {@code char} array 4995 * @since 1.5 4996 */ 4997 public static int codePointBefore(char[] a, int index) { 4998 return codePointBeforeImpl(a, index, 0); 4999 } 5000 5001 /** 5002 * Returns the code point preceding the given index of the 5003 * {@code char} array, where only array elements with 5004 * {@code index} greater than or equal to {@code start} 5005 * can be used. If the {@code char} value at {@code (index - 1)} 5006 * in the {@code char} array is in the 5007 * low-surrogate range, {@code (index - 2)} is not less than 5008 * {@code start}, and the {@code char} value at 5009 * {@code (index - 2)} in the {@code char} array is in 5010 * the high-surrogate range, then the supplementary code point 5011 * corresponding to this surrogate pair is returned. Otherwise, 5012 * the {@code char} value at {@code (index - 1)} is 5013 * returned. 5014 * 5015 * @param a the {@code char} array 5016 * @param index the index following the code point that should be returned 5017 * @param start the index of the first array element in the 5018 * {@code char} array 5019 * @return the Unicode code point value before the given index. 5020 * @exception NullPointerException if {@code a} is null. 5021 * @exception IndexOutOfBoundsException if the {@code index} 5022 * argument is not greater than the {@code start} argument or 5023 * is greater than the length of the {@code char} array, or 5024 * if the {@code start} argument is negative or not less than 5025 * the length of the {@code char} array. 5026 * @since 1.5 5027 */ 5028 public static int codePointBefore(char[] a, int index, int start) { 5029 if (index <= start || start < 0 || start >= a.length) { 5030 throw new IndexOutOfBoundsException(); 5031 } 5032 return codePointBeforeImpl(a, index, start); 5033 } 5034 5035 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 5036 static int codePointBeforeImpl(char[] a, int index, int start) { 5037 char c2 = a[--index]; 5038 if (isLowSurrogate(c2) && index > start) { 5039 char c1 = a[--index]; 5040 if (isHighSurrogate(c1)) { 5041 return toCodePoint(c1, c2); 5042 } 5043 } 5044 return c2; 5045 } 5046 5047 /** 5048 * Returns the leading surrogate (a 5049 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 5050 * high surrogate code unit</a>) of the 5051 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5052 * surrogate pair</a> 5053 * representing the specified supplementary character (Unicode 5054 * code point) in the UTF-16 encoding. If the specified character 5055 * is not a 5056 * <a href="Character.html#supplementary">supplementary character</a>, 5057 * an unspecified {@code char} is returned. 5058 * 5059 * <p>If 5060 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5061 * is {@code true}, then 5062 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 5063 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 5064 * are also always {@code true}. 5065 * 5066 * @param codePoint a supplementary character (Unicode code point) 5067 * @return the leading surrogate code unit used to represent the 5068 * character in the UTF-16 encoding 5069 * @since 1.7 5070 */ 5071 public static char highSurrogate(int codePoint) { 5072 return (char) ((codePoint >>> 10) 5073 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 5074 } 5075 5076 /** 5077 * Returns the trailing surrogate (a 5078 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 5079 * low surrogate code unit</a>) of the 5080 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 5081 * surrogate pair</a> 5082 * representing the specified supplementary character (Unicode 5083 * code point) in the UTF-16 encoding. If the specified character 5084 * is not a 5085 * <a href="Character.html#supplementary">supplementary character</a>, 5086 * an unspecified {@code char} is returned. 5087 * 5088 * <p>If 5089 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 5090 * is {@code true}, then 5091 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 5092 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 5093 * are also always {@code true}. 5094 * 5095 * @param codePoint a supplementary character (Unicode code point) 5096 * @return the trailing surrogate code unit used to represent the 5097 * character in the UTF-16 encoding 5098 * @since 1.7 5099 */ 5100 public static char lowSurrogate(int codePoint) { 5101 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 5102 } 5103 5104 /** 5105 * Converts the specified character (Unicode code point) to its 5106 * UTF-16 representation. If the specified code point is a BMP 5107 * (Basic Multilingual Plane or Plane 0) value, the same value is 5108 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 5109 * specified code point is a supplementary character, its 5110 * surrogate values are stored in {@code dst[dstIndex]} 5111 * (high-surrogate) and {@code dst[dstIndex+1]} 5112 * (low-surrogate), and 2 is returned. 5113 * 5114 * @param codePoint the character (Unicode code point) to be converted. 5115 * @param dst an array of {@code char} in which the 5116 * {@code codePoint}'s UTF-16 value is stored. 5117 * @param dstIndex the start index into the {@code dst} 5118 * array where the converted value is stored. 5119 * @return 1 if the code point is a BMP code point, 2 if the 5120 * code point is a supplementary code point. 5121 * @exception IllegalArgumentException if the specified 5122 * {@code codePoint} is not a valid Unicode code point. 5123 * @exception NullPointerException if the specified {@code dst} is null. 5124 * @exception IndexOutOfBoundsException if {@code dstIndex} 5125 * is negative or not less than {@code dst.length}, or if 5126 * {@code dst} at {@code dstIndex} doesn't have enough 5127 * array element(s) to store the resulting {@code char} 5128 * value(s). (If {@code dstIndex} is equal to 5129 * {@code dst.length-1} and the specified 5130 * {@code codePoint} is a supplementary character, the 5131 * high-surrogate value is not stored in 5132 * {@code dst[dstIndex]}.) 5133 * @since 1.5 5134 */ 5135 public static int toChars(int codePoint, char[] dst, int dstIndex) { 5136 if (isBmpCodePoint(codePoint)) { 5137 dst[dstIndex] = (char) codePoint; 5138 return 1; 5139 } else if (isValidCodePoint(codePoint)) { 5140 toSurrogates(codePoint, dst, dstIndex); 5141 return 2; 5142 } else { 5143 throw new IllegalArgumentException(); 5144 } 5145 } 5146 5147 /** 5148 * Converts the specified character (Unicode code point) to its 5149 * UTF-16 representation stored in a {@code char} array. If 5150 * the specified code point is a BMP (Basic Multilingual Plane or 5151 * Plane 0) value, the resulting {@code char} array has 5152 * the same value as {@code codePoint}. If the specified code 5153 * point is a supplementary code point, the resulting 5154 * {@code char} array has the corresponding surrogate pair. 5155 * 5156 * @param codePoint a Unicode code point 5157 * @return a {@code char} array having 5158 * {@code codePoint}'s UTF-16 representation. 5159 * @exception IllegalArgumentException if the specified 5160 * {@code codePoint} is not a valid Unicode code point. 5161 * @since 1.5 5162 */ 5163 public static char[] toChars(int codePoint) { 5164 if (isBmpCodePoint(codePoint)) { 5165 return new char[] { (char) codePoint }; 5166 } else if (isValidCodePoint(codePoint)) { 5167 char[] result = new char[2]; 5168 toSurrogates(codePoint, result, 0); 5169 return result; 5170 } else { 5171 throw new IllegalArgumentException(); 5172 } 5173 } 5174 5175 static void toSurrogates(int codePoint, char[] dst, int index) { 5176 // We write elements "backwards" to guarantee all-or-nothing 5177 dst[index+1] = lowSurrogate(codePoint); 5178 dst[index] = highSurrogate(codePoint); 5179 } 5180 5181 /** 5182 * Returns the number of Unicode code points in the text range of 5183 * the specified char sequence. The text range begins at the 5184 * specified {@code beginIndex} and extends to the 5185 * {@code char} at index {@code endIndex - 1}. Thus the 5186 * length (in {@code char}s) of the text range is 5187 * {@code endIndex-beginIndex}. Unpaired surrogates within 5188 * the text range count as one code point each. 5189 * 5190 * @param seq the char sequence 5191 * @param beginIndex the index to the first {@code char} of 5192 * the text range. 5193 * @param endIndex the index after the last {@code char} of 5194 * the text range. 5195 * @return the number of Unicode code points in the specified text 5196 * range 5197 * @exception NullPointerException if {@code seq} is null. 5198 * @exception IndexOutOfBoundsException if the 5199 * {@code beginIndex} is negative, or {@code endIndex} 5200 * is larger than the length of the given sequence, or 5201 * {@code beginIndex} is larger than {@code endIndex}. 5202 * @since 1.5 5203 */ 5204 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5205 int length = seq.length(); 5206 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5207 throw new IndexOutOfBoundsException(); 5208 } 5209 int n = endIndex - beginIndex; 5210 for (int i = beginIndex; i < endIndex; ) { 5211 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5212 isLowSurrogate(seq.charAt(i))) { 5213 n--; 5214 i++; 5215 } 5216 } 5217 return n; 5218 } 5219 5220 /** 5221 * Returns the number of Unicode code points in a subarray of the 5222 * {@code char} array argument. The {@code offset} 5223 * argument is the index of the first {@code char} of the 5224 * subarray and the {@code count} argument specifies the 5225 * length of the subarray in {@code char}s. Unpaired 5226 * surrogates within the subarray count as one code point each. 5227 * 5228 * @param a the {@code char} array 5229 * @param offset the index of the first {@code char} in the 5230 * given {@code char} array 5231 * @param count the length of the subarray in {@code char}s 5232 * @return the number of Unicode code points in the specified subarray 5233 * @exception NullPointerException if {@code a} is null. 5234 * @exception IndexOutOfBoundsException if {@code offset} or 5235 * {@code count} is negative, or if {@code offset + 5236 * count} is larger than the length of the given array. 5237 * @since 1.5 5238 */ 5239 public static int codePointCount(char[] a, int offset, int count) { 5240 if (count > a.length - offset || offset < 0 || count < 0) { 5241 throw new IndexOutOfBoundsException(); 5242 } 5243 return codePointCountImpl(a, offset, count); 5244 } 5245 5246 static int codePointCountImpl(char[] a, int offset, int count) { 5247 int endIndex = offset + count; 5248 int n = count; 5249 for (int i = offset; i < endIndex; ) { 5250 if (isHighSurrogate(a[i++]) && i < endIndex && 5251 isLowSurrogate(a[i])) { 5252 n--; 5253 i++; 5254 } 5255 } 5256 return n; 5257 } 5258 5259 /** 5260 * Returns the index within the given char sequence that is offset 5261 * from the given {@code index} by {@code codePointOffset} 5262 * code points. Unpaired surrogates within the text range given by 5263 * {@code index} and {@code codePointOffset} count as 5264 * one code point each. 5265 * 5266 * @param seq the char sequence 5267 * @param index the index to be offset 5268 * @param codePointOffset the offset in code points 5269 * @return the index within the char sequence 5270 * @exception NullPointerException if {@code seq} is null. 5271 * @exception IndexOutOfBoundsException if {@code index} 5272 * is negative or larger then the length of the char sequence, 5273 * or if {@code codePointOffset} is positive and the 5274 * subsequence starting with {@code index} has fewer than 5275 * {@code codePointOffset} code points, or if 5276 * {@code codePointOffset} is negative and the subsequence 5277 * before {@code index} has fewer than the absolute value 5278 * of {@code codePointOffset} code points. 5279 * @since 1.5 5280 */ 5281 public static int offsetByCodePoints(CharSequence seq, int index, 5282 int codePointOffset) { 5283 int length = seq.length(); 5284 if (index < 0 || index > length) { 5285 throw new IndexOutOfBoundsException(); 5286 } 5287 5288 int x = index; 5289 if (codePointOffset >= 0) { 5290 int i; 5291 for (i = 0; x < length && i < codePointOffset; i++) { 5292 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5293 isLowSurrogate(seq.charAt(x))) { 5294 x++; 5295 } 5296 } 5297 if (i < codePointOffset) { 5298 throw new IndexOutOfBoundsException(); 5299 } 5300 } else { 5301 int i; 5302 for (i = codePointOffset; x > 0 && i < 0; i++) { 5303 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5304 isHighSurrogate(seq.charAt(x-1))) { 5305 x--; 5306 } 5307 } 5308 if (i < 0) { 5309 throw new IndexOutOfBoundsException(); 5310 } 5311 } 5312 return x; 5313 } 5314 5315 /** 5316 * Returns the index within the given {@code char} subarray 5317 * that is offset from the given {@code index} by 5318 * {@code codePointOffset} code points. The 5319 * {@code start} and {@code count} arguments specify a 5320 * subarray of the {@code char} array. Unpaired surrogates 5321 * within the text range given by {@code index} and 5322 * {@code codePointOffset} count as one code point each. 5323 * 5324 * @param a the {@code char} array 5325 * @param start the index of the first {@code char} of the 5326 * subarray 5327 * @param count the length of the subarray in {@code char}s 5328 * @param index the index to be offset 5329 * @param codePointOffset the offset in code points 5330 * @return the index within the subarray 5331 * @exception NullPointerException if {@code a} is null. 5332 * @exception IndexOutOfBoundsException 5333 * if {@code start} or {@code count} is negative, 5334 * or if {@code start + count} is larger than the length of 5335 * the given array, 5336 * or if {@code index} is less than {@code start} or 5337 * larger then {@code start + count}, 5338 * or if {@code codePointOffset} is positive and the text range 5339 * starting with {@code index} and ending with {@code start + count - 1} 5340 * has fewer than {@code codePointOffset} code 5341 * points, 5342 * or if {@code codePointOffset} is negative and the text range 5343 * starting with {@code start} and ending with {@code index - 1} 5344 * has fewer than the absolute value of 5345 * {@code codePointOffset} code points. 5346 * @since 1.5 5347 */ 5348 public static int offsetByCodePoints(char[] a, int start, int count, 5349 int index, int codePointOffset) { 5350 if (count > a.length-start || start < 0 || count < 0 5351 || index < start || index > start+count) { 5352 throw new IndexOutOfBoundsException(); 5353 } 5354 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5355 } 5356 5357 static int offsetByCodePointsImpl(char[]a, int start, int count, 5358 int index, int codePointOffset) { 5359 int x = index; 5360 if (codePointOffset >= 0) { 5361 int limit = start + count; 5362 int i; 5363 for (i = 0; x < limit && i < codePointOffset; i++) { 5364 if (isHighSurrogate(a[x++]) && x < limit && 5365 isLowSurrogate(a[x])) { 5366 x++; 5367 } 5368 } 5369 if (i < codePointOffset) { 5370 throw new IndexOutOfBoundsException(); 5371 } 5372 } else { 5373 int i; 5374 for (i = codePointOffset; x > start && i < 0; i++) { 5375 if (isLowSurrogate(a[--x]) && x > start && 5376 isHighSurrogate(a[x-1])) { 5377 x--; 5378 } 5379 } 5380 if (i < 0) { 5381 throw new IndexOutOfBoundsException(); 5382 } 5383 } 5384 return x; 5385 } 5386 5387 /** 5388 * Determines if the specified character is a lowercase character. 5389 * <p> 5390 * A character is lowercase if its general category type, provided 5391 * by {@code Character.getType(ch)}, is 5392 * {@code LOWERCASE_LETTER}, or it has contributory property 5393 * Other_Lowercase as defined by the Unicode Standard. 5394 * <p> 5395 * The following are examples of lowercase characters: 5396 * <blockquote><pre> 5397 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5398 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5399 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5400 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5401 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5402 * </pre></blockquote> 5403 * <p> Many other Unicode characters are lowercase too. 5404 * 5405 * <p><b>Note:</b> This method cannot handle <a 5406 * href="#supplementary"> supplementary characters</a>. To support 5407 * all Unicode characters, including supplementary characters, use 5408 * the {@link #isLowerCase(int)} method. 5409 * 5410 * @param ch the character to be tested. 5411 * @return {@code true} if the character is lowercase; 5412 * {@code false} otherwise. 5413 * @see Character#isLowerCase(char) 5414 * @see Character#isTitleCase(char) 5415 * @see Character#toLowerCase(char) 5416 * @see Character#getType(char) 5417 */ 5418 public static boolean isLowerCase(char ch) { 5419 return isLowerCase((int)ch); 5420 } 5421 5422 /** 5423 * Determines if the specified character (Unicode code point) is a 5424 * lowercase character. 5425 * <p> 5426 * A character is lowercase if its general category type, provided 5427 * by {@link Character#getType getType(codePoint)}, is 5428 * {@code LOWERCASE_LETTER}, or it has contributory property 5429 * Other_Lowercase as defined by the Unicode Standard. 5430 * <p> 5431 * The following are examples of lowercase characters: 5432 * <blockquote><pre> 5433 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5434 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5435 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5436 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5437 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5438 * </pre></blockquote> 5439 * <p> Many other Unicode characters are lowercase too. 5440 * 5441 * @param codePoint the character (Unicode code point) to be tested. 5442 * @return {@code true} if the character is lowercase; 5443 * {@code false} otherwise. 5444 * @see Character#isLowerCase(int) 5445 * @see Character#isTitleCase(int) 5446 * @see Character#toLowerCase(int) 5447 * @see Character#getType(int) 5448 * @since 1.5 5449 */ 5450 public static boolean isLowerCase(int codePoint) { 5451 return getType(codePoint) == Character.LOWERCASE_LETTER || 5452 CharacterData.of(codePoint).isOtherLowercase(codePoint); 5453 } 5454 5455 /** 5456 * Determines if the specified character is an uppercase character. 5457 * <p> 5458 * A character is uppercase if its general category type, provided by 5459 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 5460 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5461 * <p> 5462 * The following are examples of uppercase characters: 5463 * <blockquote><pre> 5464 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5465 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5466 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5467 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5468 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5469 * </pre></blockquote> 5470 * <p> Many other Unicode characters are uppercase too. 5471 * 5472 * <p><b>Note:</b> This method cannot handle <a 5473 * href="#supplementary"> supplementary characters</a>. To support 5474 * all Unicode characters, including supplementary characters, use 5475 * the {@link #isUpperCase(int)} method. 5476 * 5477 * @param ch the character to be tested. 5478 * @return {@code true} if the character is uppercase; 5479 * {@code false} otherwise. 5480 * @see Character#isLowerCase(char) 5481 * @see Character#isTitleCase(char) 5482 * @see Character#toUpperCase(char) 5483 * @see Character#getType(char) 5484 * @since 1.0 5485 */ 5486 public static boolean isUpperCase(char ch) { 5487 return isUpperCase((int)ch); 5488 } 5489 5490 /** 5491 * Determines if the specified character (Unicode code point) is an uppercase character. 5492 * <p> 5493 * A character is uppercase if its general category type, provided by 5494 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 5495 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 5496 * <p> 5497 * The following are examples of uppercase characters: 5498 * <blockquote><pre> 5499 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5500 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5501 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5502 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5503 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5504 * </pre></blockquote> 5505 * <p> Many other Unicode characters are uppercase too. 5506 * 5507 * @param codePoint the character (Unicode code point) to be tested. 5508 * @return {@code true} if the character is uppercase; 5509 * {@code false} otherwise. 5510 * @see Character#isLowerCase(int) 5511 * @see Character#isTitleCase(int) 5512 * @see Character#toUpperCase(int) 5513 * @see Character#getType(int) 5514 * @since 1.5 5515 */ 5516 public static boolean isUpperCase(int codePoint) { 5517 return getType(codePoint) == Character.UPPERCASE_LETTER || 5518 CharacterData.of(codePoint).isOtherUppercase(codePoint); 5519 } 5520 5521 /** 5522 * Determines if the specified character is a titlecase character. 5523 * <p> 5524 * A character is a titlecase character if its general 5525 * category type, provided by {@code Character.getType(ch)}, 5526 * is {@code TITLECASE_LETTER}. 5527 * <p> 5528 * Some characters look like pairs of Latin letters. For example, there 5529 * is an uppercase letter that looks like "LJ" and has a corresponding 5530 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5531 * is the appropriate form to use when rendering a word in lowercase 5532 * with initial capitals, as for a book title. 5533 * <p> 5534 * These are some of the Unicode characters for which this method returns 5535 * {@code true}: 5536 * <ul> 5537 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5538 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5539 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5540 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5541 * </ul> 5542 * <p> Many other Unicode characters are titlecase too. 5543 * 5544 * <p><b>Note:</b> This method cannot handle <a 5545 * href="#supplementary"> supplementary characters</a>. To support 5546 * all Unicode characters, including supplementary characters, use 5547 * the {@link #isTitleCase(int)} method. 5548 * 5549 * @param ch the character to be tested. 5550 * @return {@code true} if the character is titlecase; 5551 * {@code false} otherwise. 5552 * @see Character#isLowerCase(char) 5553 * @see Character#isUpperCase(char) 5554 * @see Character#toTitleCase(char) 5555 * @see Character#getType(char) 5556 * @since 1.0.2 5557 */ 5558 public static boolean isTitleCase(char ch) { 5559 return isTitleCase((int)ch); 5560 } 5561 5562 /** 5563 * Determines if the specified character (Unicode code point) is a titlecase character. 5564 * <p> 5565 * A character is a titlecase character if its general 5566 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5567 * is {@code TITLECASE_LETTER}. 5568 * <p> 5569 * Some characters look like pairs of Latin letters. For example, there 5570 * is an uppercase letter that looks like "LJ" and has a corresponding 5571 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5572 * is the appropriate form to use when rendering a word in lowercase 5573 * with initial capitals, as for a book title. 5574 * <p> 5575 * These are some of the Unicode characters for which this method returns 5576 * {@code true}: 5577 * <ul> 5578 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5579 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5580 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5581 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5582 * </ul> 5583 * <p> Many other Unicode characters are titlecase too. 5584 * 5585 * @param codePoint the character (Unicode code point) to be tested. 5586 * @return {@code true} if the character is titlecase; 5587 * {@code false} otherwise. 5588 * @see Character#isLowerCase(int) 5589 * @see Character#isUpperCase(int) 5590 * @see Character#toTitleCase(int) 5591 * @see Character#getType(int) 5592 * @since 1.5 5593 */ 5594 public static boolean isTitleCase(int codePoint) { 5595 return getType(codePoint) == Character.TITLECASE_LETTER; 5596 } 5597 5598 /** 5599 * Determines if the specified character is a digit. 5600 * <p> 5601 * A character is a digit if its general category type, provided 5602 * by {@code Character.getType(ch)}, is 5603 * {@code DECIMAL_DIGIT_NUMBER}. 5604 * <p> 5605 * Some Unicode character ranges that contain digits: 5606 * <ul> 5607 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5608 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5609 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5610 * Arabic-Indic digits 5611 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5612 * Extended Arabic-Indic digits 5613 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5614 * Devanagari digits 5615 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5616 * Fullwidth digits 5617 * </ul> 5618 * 5619 * Many other character ranges contain digits as well. 5620 * 5621 * <p><b>Note:</b> This method cannot handle <a 5622 * href="#supplementary"> supplementary characters</a>. To support 5623 * all Unicode characters, including supplementary characters, use 5624 * the {@link #isDigit(int)} method. 5625 * 5626 * @param ch the character to be tested. 5627 * @return {@code true} if the character is a digit; 5628 * {@code false} otherwise. 5629 * @see Character#digit(char, int) 5630 * @see Character#forDigit(int, int) 5631 * @see Character#getType(char) 5632 */ 5633 public static boolean isDigit(char ch) { 5634 return isDigit((int)ch); 5635 } 5636 5637 /** 5638 * Determines if the specified character (Unicode code point) is a digit. 5639 * <p> 5640 * A character is a digit if its general category type, provided 5641 * by {@link Character#getType(int) getType(codePoint)}, is 5642 * {@code DECIMAL_DIGIT_NUMBER}. 5643 * <p> 5644 * Some Unicode character ranges that contain digits: 5645 * <ul> 5646 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5647 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5648 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5649 * Arabic-Indic digits 5650 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5651 * Extended Arabic-Indic digits 5652 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5653 * Devanagari digits 5654 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5655 * Fullwidth digits 5656 * </ul> 5657 * 5658 * Many other character ranges contain digits as well. 5659 * 5660 * @param codePoint the character (Unicode code point) to be tested. 5661 * @return {@code true} if the character is a digit; 5662 * {@code false} otherwise. 5663 * @see Character#forDigit(int, int) 5664 * @see Character#getType(int) 5665 * @since 1.5 5666 */ 5667 public static boolean isDigit(int codePoint) { 5668 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5669 } 5670 5671 /** 5672 * Determines if a character is defined in Unicode. 5673 * <p> 5674 * A character is defined if at least one of the following is true: 5675 * <ul> 5676 * <li>It has an entry in the UnicodeData file. 5677 * <li>It has a value in a range defined by the UnicodeData file. 5678 * </ul> 5679 * 5680 * <p><b>Note:</b> This method cannot handle <a 5681 * href="#supplementary"> supplementary characters</a>. To support 5682 * all Unicode characters, including supplementary characters, use 5683 * the {@link #isDefined(int)} method. 5684 * 5685 * @param ch the character to be tested 5686 * @return {@code true} if the character has a defined meaning 5687 * in Unicode; {@code false} otherwise. 5688 * @see Character#isDigit(char) 5689 * @see Character#isLetter(char) 5690 * @see Character#isLetterOrDigit(char) 5691 * @see Character#isLowerCase(char) 5692 * @see Character#isTitleCase(char) 5693 * @see Character#isUpperCase(char) 5694 * @since 1.0.2 5695 */ 5696 public static boolean isDefined(char ch) { 5697 return isDefined((int)ch); 5698 } 5699 5700 /** 5701 * Determines if a character (Unicode code point) is defined in Unicode. 5702 * <p> 5703 * A character is defined if at least one of the following is true: 5704 * <ul> 5705 * <li>It has an entry in the UnicodeData file. 5706 * <li>It has a value in a range defined by the UnicodeData file. 5707 * </ul> 5708 * 5709 * @param codePoint the character (Unicode code point) to be tested. 5710 * @return {@code true} if the character has a defined meaning 5711 * in Unicode; {@code false} otherwise. 5712 * @see Character#isDigit(int) 5713 * @see Character#isLetter(int) 5714 * @see Character#isLetterOrDigit(int) 5715 * @see Character#isLowerCase(int) 5716 * @see Character#isTitleCase(int) 5717 * @see Character#isUpperCase(int) 5718 * @since 1.5 5719 */ 5720 public static boolean isDefined(int codePoint) { 5721 return getType(codePoint) != Character.UNASSIGNED; 5722 } 5723 5724 /** 5725 * Determines if the specified character is a letter. 5726 * <p> 5727 * A character is considered to be a letter if its general 5728 * category type, provided by {@code Character.getType(ch)}, 5729 * is any of the following: 5730 * <ul> 5731 * <li> {@code UPPERCASE_LETTER} 5732 * <li> {@code LOWERCASE_LETTER} 5733 * <li> {@code TITLECASE_LETTER} 5734 * <li> {@code MODIFIER_LETTER} 5735 * <li> {@code OTHER_LETTER} 5736 * </ul> 5737 * 5738 * Not all letters have case. Many characters are 5739 * letters but are neither uppercase nor lowercase nor titlecase. 5740 * 5741 * <p><b>Note:</b> This method cannot handle <a 5742 * href="#supplementary"> supplementary characters</a>. To support 5743 * all Unicode characters, including supplementary characters, use 5744 * the {@link #isLetter(int)} method. 5745 * 5746 * @param ch the character to be tested. 5747 * @return {@code true} if the character is a letter; 5748 * {@code false} otherwise. 5749 * @see Character#isDigit(char) 5750 * @see Character#isJavaIdentifierStart(char) 5751 * @see Character#isJavaLetter(char) 5752 * @see Character#isJavaLetterOrDigit(char) 5753 * @see Character#isLetterOrDigit(char) 5754 * @see Character#isLowerCase(char) 5755 * @see Character#isTitleCase(char) 5756 * @see Character#isUnicodeIdentifierStart(char) 5757 * @see Character#isUpperCase(char) 5758 */ 5759 public static boolean isLetter(char ch) { 5760 return isLetter((int)ch); 5761 } 5762 5763 /** 5764 * Determines if the specified character (Unicode code point) is a letter. 5765 * <p> 5766 * A character is considered to be a letter if its general 5767 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5768 * is any of the following: 5769 * <ul> 5770 * <li> {@code UPPERCASE_LETTER} 5771 * <li> {@code LOWERCASE_LETTER} 5772 * <li> {@code TITLECASE_LETTER} 5773 * <li> {@code MODIFIER_LETTER} 5774 * <li> {@code OTHER_LETTER} 5775 * </ul> 5776 * 5777 * Not all letters have case. Many characters are 5778 * letters but are neither uppercase nor lowercase nor titlecase. 5779 * 5780 * @param codePoint the character (Unicode code point) to be tested. 5781 * @return {@code true} if the character is a letter; 5782 * {@code false} otherwise. 5783 * @see Character#isDigit(int) 5784 * @see Character#isJavaIdentifierStart(int) 5785 * @see Character#isLetterOrDigit(int) 5786 * @see Character#isLowerCase(int) 5787 * @see Character#isTitleCase(int) 5788 * @see Character#isUnicodeIdentifierStart(int) 5789 * @see Character#isUpperCase(int) 5790 * @since 1.5 5791 */ 5792 public static boolean isLetter(int codePoint) { 5793 return ((((1 << Character.UPPERCASE_LETTER) | 5794 (1 << Character.LOWERCASE_LETTER) | 5795 (1 << Character.TITLECASE_LETTER) | 5796 (1 << Character.MODIFIER_LETTER) | 5797 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5798 != 0; 5799 } 5800 5801 /** 5802 * Determines if the specified character is a letter or digit. 5803 * <p> 5804 * A character is considered to be a letter or digit if either 5805 * {@code Character.isLetter(char ch)} or 5806 * {@code Character.isDigit(char ch)} returns 5807 * {@code true} for the character. 5808 * 5809 * <p><b>Note:</b> This method cannot handle <a 5810 * href="#supplementary"> supplementary characters</a>. To support 5811 * all Unicode characters, including supplementary characters, use 5812 * the {@link #isLetterOrDigit(int)} method. 5813 * 5814 * @param ch the character to be tested. 5815 * @return {@code true} if the character is a letter or digit; 5816 * {@code false} otherwise. 5817 * @see Character#isDigit(char) 5818 * @see Character#isJavaIdentifierPart(char) 5819 * @see Character#isJavaLetter(char) 5820 * @see Character#isJavaLetterOrDigit(char) 5821 * @see Character#isLetter(char) 5822 * @see Character#isUnicodeIdentifierPart(char) 5823 * @since 1.0.2 5824 */ 5825 public static boolean isLetterOrDigit(char ch) { 5826 return isLetterOrDigit((int)ch); 5827 } 5828 5829 /** 5830 * Determines if the specified character (Unicode code point) is a letter or digit. 5831 * <p> 5832 * A character is considered to be a letter or digit if either 5833 * {@link #isLetter(int) isLetter(codePoint)} or 5834 * {@link #isDigit(int) isDigit(codePoint)} returns 5835 * {@code true} for the character. 5836 * 5837 * @param codePoint the character (Unicode code point) to be tested. 5838 * @return {@code true} if the character is a letter or digit; 5839 * {@code false} otherwise. 5840 * @see Character#isDigit(int) 5841 * @see Character#isJavaIdentifierPart(int) 5842 * @see Character#isLetter(int) 5843 * @see Character#isUnicodeIdentifierPart(int) 5844 * @since 1.5 5845 */ 5846 public static boolean isLetterOrDigit(int codePoint) { 5847 return ((((1 << Character.UPPERCASE_LETTER) | 5848 (1 << Character.LOWERCASE_LETTER) | 5849 (1 << Character.TITLECASE_LETTER) | 5850 (1 << Character.MODIFIER_LETTER) | 5851 (1 << Character.OTHER_LETTER) | 5852 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5853 != 0; 5854 } 5855 5856 /** 5857 * Determines if the specified character is permissible as the first 5858 * character in a Java identifier. 5859 * <p> 5860 * A character may start a Java identifier if and only if 5861 * one of the following is true: 5862 * <ul> 5863 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5864 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5865 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5866 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5867 * </ul> 5868 * 5869 * @param ch the character to be tested. 5870 * @return {@code true} if the character may start a Java 5871 * identifier; {@code false} otherwise. 5872 * @see Character#isJavaLetterOrDigit(char) 5873 * @see Character#isJavaIdentifierStart(char) 5874 * @see Character#isJavaIdentifierPart(char) 5875 * @see Character#isLetter(char) 5876 * @see Character#isLetterOrDigit(char) 5877 * @see Character#isUnicodeIdentifierStart(char) 5878 * @since 1.0.2 5879 * @deprecated Replaced by isJavaIdentifierStart(char). 5880 */ 5881 @Deprecated 5882 public static boolean isJavaLetter(char ch) { 5883 return isJavaIdentifierStart(ch); 5884 } 5885 5886 /** 5887 * Determines if the specified character may be part of a Java 5888 * identifier as other than the first character. 5889 * <p> 5890 * A character may be part of a Java identifier if and only if any 5891 * of the following are true: 5892 * <ul> 5893 * <li> it is a letter 5894 * <li> it is a currency symbol (such as {@code '$'}) 5895 * <li> it is a connecting punctuation character (such as {@code '_'}) 5896 * <li> it is a digit 5897 * <li> it is a numeric letter (such as a Roman numeral character) 5898 * <li> it is a combining mark 5899 * <li> it is a non-spacing mark 5900 * <li> {@code isIdentifierIgnorable} returns 5901 * {@code true} for the character. 5902 * </ul> 5903 * 5904 * @param ch the character to be tested. 5905 * @return {@code true} if the character may be part of a 5906 * Java identifier; {@code false} otherwise. 5907 * @see Character#isJavaLetter(char) 5908 * @see Character#isJavaIdentifierStart(char) 5909 * @see Character#isJavaIdentifierPart(char) 5910 * @see Character#isLetter(char) 5911 * @see Character#isLetterOrDigit(char) 5912 * @see Character#isUnicodeIdentifierPart(char) 5913 * @see Character#isIdentifierIgnorable(char) 5914 * @since 1.0.2 5915 * @deprecated Replaced by isJavaIdentifierPart(char). 5916 */ 5917 @Deprecated 5918 public static boolean isJavaLetterOrDigit(char ch) { 5919 return isJavaIdentifierPart(ch); 5920 } 5921 5922 /** 5923 * Determines if the specified character (Unicode code point) is an alphabet. 5924 * <p> 5925 * A character is considered to be alphabetic if its general category type, 5926 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 5927 * the following: 5928 * <ul> 5929 * <li> <code>UPPERCASE_LETTER</code> 5930 * <li> <code>LOWERCASE_LETTER</code> 5931 * <li> <code>TITLECASE_LETTER</code> 5932 * <li> <code>MODIFIER_LETTER</code> 5933 * <li> <code>OTHER_LETTER</code> 5934 * <li> <code>LETTER_NUMBER</code> 5935 * </ul> 5936 * or it has contributory property Other_Alphabetic as defined by the 5937 * Unicode Standard. 5938 * 5939 * @param codePoint the character (Unicode code point) to be tested. 5940 * @return <code>true</code> if the character is a Unicode alphabet 5941 * character, <code>false</code> otherwise. 5942 * @since 1.7 5943 */ 5944 public static boolean isAlphabetic(int codePoint) { 5945 return (((((1 << Character.UPPERCASE_LETTER) | 5946 (1 << Character.LOWERCASE_LETTER) | 5947 (1 << Character.TITLECASE_LETTER) | 5948 (1 << Character.MODIFIER_LETTER) | 5949 (1 << Character.OTHER_LETTER) | 5950 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 5951 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 5952 } 5953 5954 /** 5955 * Determines if the specified character (Unicode code point) is a CJKV 5956 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 5957 * the Unicode Standard. 5958 * 5959 * @param codePoint the character (Unicode code point) to be tested. 5960 * @return <code>true</code> if the character is a Unicode ideograph 5961 * character, <code>false</code> otherwise. 5962 * @since 1.7 5963 */ 5964 public static boolean isIdeographic(int codePoint) { 5965 return CharacterData.of(codePoint).isIdeographic(codePoint); 5966 } 5967 5968 /** 5969 * Determines if the specified character is 5970 * permissible as the first character in a Java identifier. 5971 * <p> 5972 * A character may start a Java identifier if and only if 5973 * one of the following conditions is true: 5974 * <ul> 5975 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5976 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5977 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5978 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5979 * </ul> 5980 * 5981 * <p><b>Note:</b> This method cannot handle <a 5982 * href="#supplementary"> supplementary characters</a>. To support 5983 * all Unicode characters, including supplementary characters, use 5984 * the {@link #isJavaIdentifierStart(int)} method. 5985 * 5986 * @param ch the character to be tested. 5987 * @return {@code true} if the character may start a Java identifier; 5988 * {@code false} otherwise. 5989 * @see Character#isJavaIdentifierPart(char) 5990 * @see Character#isLetter(char) 5991 * @see Character#isUnicodeIdentifierStart(char) 5992 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5993 * @since 1.1 5994 */ 5995 public static boolean isJavaIdentifierStart(char ch) { 5996 return isJavaIdentifierStart((int)ch); 5997 } 5998 5999 /** 6000 * Determines if the character (Unicode code point) is 6001 * permissible as the first character in a Java identifier. 6002 * <p> 6003 * A character may start a Java identifier if and only if 6004 * one of the following conditions is true: 6005 * <ul> 6006 * <li> {@link #isLetter(int) isLetter(codePoint)} 6007 * returns {@code true} 6008 * <li> {@link #getType(int) getType(codePoint)} 6009 * returns {@code LETTER_NUMBER} 6010 * <li> the referenced character is a currency symbol (such as {@code '$'}) 6011 * <li> the referenced character is a connecting punctuation character 6012 * (such as {@code '_'}). 6013 * </ul> 6014 * 6015 * @param codePoint the character (Unicode code point) to be tested. 6016 * @return {@code true} if the character may start a Java identifier; 6017 * {@code false} otherwise. 6018 * @see Character#isJavaIdentifierPart(int) 6019 * @see Character#isLetter(int) 6020 * @see Character#isUnicodeIdentifierStart(int) 6021 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6022 * @since 1.5 6023 */ 6024 public static boolean isJavaIdentifierStart(int codePoint) { 6025 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 6026 } 6027 6028 /** 6029 * Determines if the specified character may be part of a Java 6030 * identifier as other than the first character. 6031 * <p> 6032 * A character may be part of a Java identifier if any of the following 6033 * are true: 6034 * <ul> 6035 * <li> it is a letter 6036 * <li> it is a currency symbol (such as {@code '$'}) 6037 * <li> it is a connecting punctuation character (such as {@code '_'}) 6038 * <li> it is a digit 6039 * <li> it is a numeric letter (such as a Roman numeral character) 6040 * <li> it is a combining mark 6041 * <li> it is a non-spacing mark 6042 * <li> {@code isIdentifierIgnorable} returns 6043 * {@code true} for the character 6044 * </ul> 6045 * 6046 * <p><b>Note:</b> This method cannot handle <a 6047 * href="#supplementary"> supplementary characters</a>. To support 6048 * all Unicode characters, including supplementary characters, use 6049 * the {@link #isJavaIdentifierPart(int)} method. 6050 * 6051 * @param ch the character to be tested. 6052 * @return {@code true} if the character may be part of a 6053 * Java identifier; {@code false} otherwise. 6054 * @see Character#isIdentifierIgnorable(char) 6055 * @see Character#isJavaIdentifierStart(char) 6056 * @see Character#isLetterOrDigit(char) 6057 * @see Character#isUnicodeIdentifierPart(char) 6058 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6059 * @since 1.1 6060 */ 6061 public static boolean isJavaIdentifierPart(char ch) { 6062 return isJavaIdentifierPart((int)ch); 6063 } 6064 6065 /** 6066 * Determines if the character (Unicode code point) may be part of a Java 6067 * identifier as other than the first character. 6068 * <p> 6069 * A character may be part of a Java identifier if any of the following 6070 * are true: 6071 * <ul> 6072 * <li> it is a letter 6073 * <li> it is a currency symbol (such as {@code '$'}) 6074 * <li> it is a connecting punctuation character (such as {@code '_'}) 6075 * <li> it is a digit 6076 * <li> it is a numeric letter (such as a Roman numeral character) 6077 * <li> it is a combining mark 6078 * <li> it is a non-spacing mark 6079 * <li> {@link #isIdentifierIgnorable(int) 6080 * isIdentifierIgnorable(codePoint)} returns {@code true} for 6081 * the character 6082 * </ul> 6083 * 6084 * @param codePoint the character (Unicode code point) to be tested. 6085 * @return {@code true} if the character may be part of a 6086 * Java identifier; {@code false} otherwise. 6087 * @see Character#isIdentifierIgnorable(int) 6088 * @see Character#isJavaIdentifierStart(int) 6089 * @see Character#isLetterOrDigit(int) 6090 * @see Character#isUnicodeIdentifierPart(int) 6091 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 6092 * @since 1.5 6093 */ 6094 public static boolean isJavaIdentifierPart(int codePoint) { 6095 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 6096 } 6097 6098 /** 6099 * Determines if the specified character is permissible as the 6100 * first character in a Unicode identifier. 6101 * <p> 6102 * A character may start a Unicode identifier if and only if 6103 * one of the following conditions is true: 6104 * <ul> 6105 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 6106 * <li> {@link #getType(char) getType(ch)} returns 6107 * {@code LETTER_NUMBER}. 6108 * </ul> 6109 * 6110 * <p><b>Note:</b> This method cannot handle <a 6111 * href="#supplementary"> supplementary characters</a>. To support 6112 * all Unicode characters, including supplementary characters, use 6113 * the {@link #isUnicodeIdentifierStart(int)} method. 6114 * 6115 * @param ch the character to be tested. 6116 * @return {@code true} if the character may start a Unicode 6117 * identifier; {@code false} otherwise. 6118 * @see Character#isJavaIdentifierStart(char) 6119 * @see Character#isLetter(char) 6120 * @see Character#isUnicodeIdentifierPart(char) 6121 * @since 1.1 6122 */ 6123 public static boolean isUnicodeIdentifierStart(char ch) { 6124 return isUnicodeIdentifierStart((int)ch); 6125 } 6126 6127 /** 6128 * Determines if the specified character (Unicode code point) is permissible as the 6129 * first character in a Unicode identifier. 6130 * <p> 6131 * A character may start a Unicode identifier if and only if 6132 * one of the following conditions is true: 6133 * <ul> 6134 * <li> {@link #isLetter(int) isLetter(codePoint)} 6135 * returns {@code true} 6136 * <li> {@link #getType(int) getType(codePoint)} 6137 * returns {@code LETTER_NUMBER}. 6138 * </ul> 6139 * @param codePoint the character (Unicode code point) to be tested. 6140 * @return {@code true} if the character may start a Unicode 6141 * identifier; {@code false} otherwise. 6142 * @see Character#isJavaIdentifierStart(int) 6143 * @see Character#isLetter(int) 6144 * @see Character#isUnicodeIdentifierPart(int) 6145 * @since 1.5 6146 */ 6147 public static boolean isUnicodeIdentifierStart(int codePoint) { 6148 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 6149 } 6150 6151 /** 6152 * Determines if the specified character may be part of a Unicode 6153 * identifier as other than the first character. 6154 * <p> 6155 * A character may be part of a Unicode identifier if and only if 6156 * one of the following statements is true: 6157 * <ul> 6158 * <li> it is a letter 6159 * <li> it is a connecting punctuation character (such as {@code '_'}) 6160 * <li> it is a digit 6161 * <li> it is a numeric letter (such as a Roman numeral character) 6162 * <li> it is a combining mark 6163 * <li> it is a non-spacing mark 6164 * <li> {@code isIdentifierIgnorable} returns 6165 * {@code true} for this character. 6166 * </ul> 6167 * 6168 * <p><b>Note:</b> This method cannot handle <a 6169 * href="#supplementary"> supplementary characters</a>. To support 6170 * all Unicode characters, including supplementary characters, use 6171 * the {@link #isUnicodeIdentifierPart(int)} method. 6172 * 6173 * @param ch the character to be tested. 6174 * @return {@code true} if the character may be part of a 6175 * Unicode identifier; {@code false} otherwise. 6176 * @see Character#isIdentifierIgnorable(char) 6177 * @see Character#isJavaIdentifierPart(char) 6178 * @see Character#isLetterOrDigit(char) 6179 * @see Character#isUnicodeIdentifierStart(char) 6180 * @since 1.1 6181 */ 6182 public static boolean isUnicodeIdentifierPart(char ch) { 6183 return isUnicodeIdentifierPart((int)ch); 6184 } 6185 6186 /** 6187 * Determines if the specified character (Unicode code point) may be part of a Unicode 6188 * identifier as other than the first character. 6189 * <p> 6190 * A character may be part of a Unicode identifier if and only if 6191 * one of the following statements is true: 6192 * <ul> 6193 * <li> it is a letter 6194 * <li> it is a connecting punctuation character (such as {@code '_'}) 6195 * <li> it is a digit 6196 * <li> it is a numeric letter (such as a Roman numeral character) 6197 * <li> it is a combining mark 6198 * <li> it is a non-spacing mark 6199 * <li> {@code isIdentifierIgnorable} returns 6200 * {@code true} for this character. 6201 * </ul> 6202 * @param codePoint the character (Unicode code point) to be tested. 6203 * @return {@code true} if the character may be part of a 6204 * Unicode identifier; {@code false} otherwise. 6205 * @see Character#isIdentifierIgnorable(int) 6206 * @see Character#isJavaIdentifierPart(int) 6207 * @see Character#isLetterOrDigit(int) 6208 * @see Character#isUnicodeIdentifierStart(int) 6209 * @since 1.5 6210 */ 6211 public static boolean isUnicodeIdentifierPart(int codePoint) { 6212 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 6213 } 6214 6215 /** 6216 * Determines if the specified character should be regarded as 6217 * an ignorable character in a Java identifier or a Unicode identifier. 6218 * <p> 6219 * The following Unicode characters are ignorable in a Java identifier 6220 * or a Unicode identifier: 6221 * <ul> 6222 * <li>ISO control characters that are not whitespace 6223 * <ul> 6224 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6225 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6226 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6227 * </ul> 6228 * 6229 * <li>all characters that have the {@code FORMAT} general 6230 * category value 6231 * </ul> 6232 * 6233 * <p><b>Note:</b> This method cannot handle <a 6234 * href="#supplementary"> supplementary characters</a>. To support 6235 * all Unicode characters, including supplementary characters, use 6236 * the {@link #isIdentifierIgnorable(int)} method. 6237 * 6238 * @param ch the character to be tested. 6239 * @return {@code true} if the character is an ignorable control 6240 * character that may be part of a Java or Unicode identifier; 6241 * {@code false} otherwise. 6242 * @see Character#isJavaIdentifierPart(char) 6243 * @see Character#isUnicodeIdentifierPart(char) 6244 * @since 1.1 6245 */ 6246 public static boolean isIdentifierIgnorable(char ch) { 6247 return isIdentifierIgnorable((int)ch); 6248 } 6249 6250 /** 6251 * Determines if the specified character (Unicode code point) should be regarded as 6252 * an ignorable character in a Java identifier or a Unicode identifier. 6253 * <p> 6254 * The following Unicode characters are ignorable in a Java identifier 6255 * or a Unicode identifier: 6256 * <ul> 6257 * <li>ISO control characters that are not whitespace 6258 * <ul> 6259 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6260 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6261 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6262 * </ul> 6263 * 6264 * <li>all characters that have the {@code FORMAT} general 6265 * category value 6266 * </ul> 6267 * 6268 * @param codePoint the character (Unicode code point) to be tested. 6269 * @return {@code true} if the character is an ignorable control 6270 * character that may be part of a Java or Unicode identifier; 6271 * {@code false} otherwise. 6272 * @see Character#isJavaIdentifierPart(int) 6273 * @see Character#isUnicodeIdentifierPart(int) 6274 * @since 1.5 6275 */ 6276 public static boolean isIdentifierIgnorable(int codePoint) { 6277 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6278 } 6279 6280 /** 6281 * Converts the character argument to lowercase using case 6282 * mapping information from the UnicodeData file. 6283 * <p> 6284 * Note that 6285 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 6286 * does not always return {@code true} for some ranges of 6287 * characters, particularly those that are symbols or ideographs. 6288 * 6289 * <p>In general, {@link String#toLowerCase()} should be used to map 6290 * characters to lowercase. {@code String} case mapping methods 6291 * have several benefits over {@code Character} case mapping methods. 6292 * {@code String} case mapping methods can perform locale-sensitive 6293 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6294 * the {@code Character} case mapping methods cannot. 6295 * 6296 * <p><b>Note:</b> This method cannot handle <a 6297 * href="#supplementary"> supplementary characters</a>. To support 6298 * all Unicode characters, including supplementary characters, use 6299 * the {@link #toLowerCase(int)} method. 6300 * 6301 * @param ch the character to be converted. 6302 * @return the lowercase equivalent of the character, if any; 6303 * otherwise, the character itself. 6304 * @see Character#isLowerCase(char) 6305 * @see String#toLowerCase() 6306 */ 6307 public static char toLowerCase(char ch) { 6308 return (char)toLowerCase((int)ch); 6309 } 6310 6311 /** 6312 * Converts the character (Unicode code point) argument to 6313 * lowercase using case mapping information from the UnicodeData 6314 * file. 6315 * 6316 * <p> Note that 6317 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 6318 * does not always return {@code true} for some ranges of 6319 * characters, particularly those that are symbols or ideographs. 6320 * 6321 * <p>In general, {@link String#toLowerCase()} should be used to map 6322 * characters to lowercase. {@code String} case mapping methods 6323 * have several benefits over {@code Character} case mapping methods. 6324 * {@code String} case mapping methods can perform locale-sensitive 6325 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6326 * the {@code Character} case mapping methods cannot. 6327 * 6328 * @param codePoint the character (Unicode code point) to be converted. 6329 * @return the lowercase equivalent of the character (Unicode code 6330 * point), if any; otherwise, the character itself. 6331 * @see Character#isLowerCase(int) 6332 * @see String#toLowerCase() 6333 * 6334 * @since 1.5 6335 */ 6336 public static int toLowerCase(int codePoint) { 6337 return CharacterData.of(codePoint).toLowerCase(codePoint); 6338 } 6339 6340 /** 6341 * Converts the character argument to uppercase using case mapping 6342 * information from the UnicodeData file. 6343 * <p> 6344 * Note that 6345 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 6346 * does not always return {@code true} for some ranges of 6347 * characters, particularly those that are symbols or ideographs. 6348 * 6349 * <p>In general, {@link String#toUpperCase()} should be used to map 6350 * characters to uppercase. {@code String} case mapping methods 6351 * have several benefits over {@code Character} case mapping methods. 6352 * {@code String} case mapping methods can perform locale-sensitive 6353 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6354 * the {@code Character} case mapping methods cannot. 6355 * 6356 * <p><b>Note:</b> This method cannot handle <a 6357 * href="#supplementary"> supplementary characters</a>. To support 6358 * all Unicode characters, including supplementary characters, use 6359 * the {@link #toUpperCase(int)} method. 6360 * 6361 * @param ch the character to be converted. 6362 * @return the uppercase equivalent of the character, if any; 6363 * otherwise, the character itself. 6364 * @see Character#isUpperCase(char) 6365 * @see String#toUpperCase() 6366 */ 6367 public static char toUpperCase(char ch) { 6368 return (char)toUpperCase((int)ch); 6369 } 6370 6371 /** 6372 * Converts the character (Unicode code point) argument to 6373 * uppercase using case mapping information from the UnicodeData 6374 * file. 6375 * 6376 * <p>Note that 6377 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 6378 * does not always return {@code true} for some ranges of 6379 * characters, particularly those that are symbols or ideographs. 6380 * 6381 * <p>In general, {@link String#toUpperCase()} should be used to map 6382 * characters to uppercase. {@code String} case mapping methods 6383 * have several benefits over {@code Character} case mapping methods. 6384 * {@code String} case mapping methods can perform locale-sensitive 6385 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6386 * the {@code Character} case mapping methods cannot. 6387 * 6388 * @param codePoint the character (Unicode code point) to be converted. 6389 * @return the uppercase equivalent of the character, if any; 6390 * otherwise, the character itself. 6391 * @see Character#isUpperCase(int) 6392 * @see String#toUpperCase() 6393 * 6394 * @since 1.5 6395 */ 6396 public static int toUpperCase(int codePoint) { 6397 return CharacterData.of(codePoint).toUpperCase(codePoint); 6398 } 6399 6400 /** 6401 * Converts the character argument to titlecase using case mapping 6402 * information from the UnicodeData file. If a character has no 6403 * explicit titlecase mapping and is not itself a titlecase char 6404 * according to UnicodeData, then the uppercase mapping is 6405 * returned as an equivalent titlecase mapping. If the 6406 * {@code char} argument is already a titlecase 6407 * {@code char}, the same {@code char} value will be 6408 * returned. 6409 * <p> 6410 * Note that 6411 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 6412 * does not always return {@code true} for some ranges of 6413 * characters. 6414 * 6415 * <p><b>Note:</b> This method cannot handle <a 6416 * href="#supplementary"> supplementary characters</a>. To support 6417 * all Unicode characters, including supplementary characters, use 6418 * the {@link #toTitleCase(int)} method. 6419 * 6420 * @param ch the character to be converted. 6421 * @return the titlecase equivalent of the character, if any; 6422 * otherwise, the character itself. 6423 * @see Character#isTitleCase(char) 6424 * @see Character#toLowerCase(char) 6425 * @see Character#toUpperCase(char) 6426 * @since 1.0.2 6427 */ 6428 public static char toTitleCase(char ch) { 6429 return (char)toTitleCase((int)ch); 6430 } 6431 6432 /** 6433 * Converts the character (Unicode code point) argument to titlecase using case mapping 6434 * information from the UnicodeData file. If a character has no 6435 * explicit titlecase mapping and is not itself a titlecase char 6436 * according to UnicodeData, then the uppercase mapping is 6437 * returned as an equivalent titlecase mapping. If the 6438 * character argument is already a titlecase 6439 * character, the same character value will be 6440 * returned. 6441 * 6442 * <p>Note that 6443 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 6444 * does not always return {@code true} for some ranges of 6445 * characters. 6446 * 6447 * @param codePoint the character (Unicode code point) to be converted. 6448 * @return the titlecase equivalent of the character, if any; 6449 * otherwise, the character itself. 6450 * @see Character#isTitleCase(int) 6451 * @see Character#toLowerCase(int) 6452 * @see Character#toUpperCase(int) 6453 * @since 1.5 6454 */ 6455 public static int toTitleCase(int codePoint) { 6456 return CharacterData.of(codePoint).toTitleCase(codePoint); 6457 } 6458 6459 /** 6460 * Returns the numeric value of the character {@code ch} in the 6461 * specified radix. 6462 * <p> 6463 * If the radix is not in the range {@code MIN_RADIX} ≤ 6464 * {@code radix} ≤ {@code MAX_RADIX} or if the 6465 * value of {@code ch} is not a valid digit in the specified 6466 * radix, {@code -1} is returned. A character is a valid digit 6467 * if at least one of the following is true: 6468 * <ul> 6469 * <li>The method {@code isDigit} is {@code true} of the character 6470 * and the Unicode decimal digit value of the character (or its 6471 * single-character decomposition) is less than the specified radix. 6472 * In this case the decimal digit value is returned. 6473 * <li>The character is one of the uppercase Latin letters 6474 * {@code 'A'} through {@code 'Z'} and its code is less than 6475 * {@code radix + 'A' - 10}. 6476 * In this case, {@code ch - 'A' + 10} 6477 * is returned. 6478 * <li>The character is one of the lowercase Latin letters 6479 * {@code 'a'} through {@code 'z'} and its code is less than 6480 * {@code radix + 'a' - 10}. 6481 * In this case, {@code ch - 'a' + 10} 6482 * is returned. 6483 * <li>The character is one of the fullwidth uppercase Latin letters A 6484 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6485 * and its code is less than 6486 * {@code radix + '\u005CuFF21' - 10}. 6487 * In this case, {@code ch - '\u005CuFF21' + 10} 6488 * is returned. 6489 * <li>The character is one of the fullwidth lowercase Latin letters a 6490 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6491 * and its code is less than 6492 * {@code radix + '\u005CuFF41' - 10}. 6493 * In this case, {@code ch - '\u005CuFF41' + 10} 6494 * is returned. 6495 * </ul> 6496 * 6497 * <p><b>Note:</b> This method cannot handle <a 6498 * href="#supplementary"> supplementary characters</a>. To support 6499 * all Unicode characters, including supplementary characters, use 6500 * the {@link #digit(int, int)} method. 6501 * 6502 * @param ch the character to be converted. 6503 * @param radix the radix. 6504 * @return the numeric value represented by the character in the 6505 * specified radix. 6506 * @see Character#forDigit(int, int) 6507 * @see Character#isDigit(char) 6508 */ 6509 public static int digit(char ch, int radix) { 6510 return digit((int)ch, radix); 6511 } 6512 6513 /** 6514 * Returns the numeric value of the specified character (Unicode 6515 * code point) in the specified radix. 6516 * 6517 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 6518 * {@code radix} ≤ {@code MAX_RADIX} or if the 6519 * character is not a valid digit in the specified 6520 * radix, {@code -1} is returned. A character is a valid digit 6521 * if at least one of the following is true: 6522 * <ul> 6523 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 6524 * and the Unicode decimal digit value of the character (or its 6525 * single-character decomposition) is less than the specified radix. 6526 * In this case the decimal digit value is returned. 6527 * <li>The character is one of the uppercase Latin letters 6528 * {@code 'A'} through {@code 'Z'} and its code is less than 6529 * {@code radix + 'A' - 10}. 6530 * In this case, {@code codePoint - 'A' + 10} 6531 * is returned. 6532 * <li>The character is one of the lowercase Latin letters 6533 * {@code 'a'} through {@code 'z'} and its code is less than 6534 * {@code radix + 'a' - 10}. 6535 * In this case, {@code codePoint - 'a' + 10} 6536 * is returned. 6537 * <li>The character is one of the fullwidth uppercase Latin letters A 6538 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6539 * and its code is less than 6540 * {@code radix + '\u005CuFF21' - 10}. 6541 * In this case, 6542 * {@code codePoint - '\u005CuFF21' + 10} 6543 * is returned. 6544 * <li>The character is one of the fullwidth lowercase Latin letters a 6545 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6546 * and its code is less than 6547 * {@code radix + '\u005CuFF41'- 10}. 6548 * In this case, 6549 * {@code codePoint - '\u005CuFF41' + 10} 6550 * is returned. 6551 * </ul> 6552 * 6553 * @param codePoint the character (Unicode code point) to be converted. 6554 * @param radix the radix. 6555 * @return the numeric value represented by the character in the 6556 * specified radix. 6557 * @see Character#forDigit(int, int) 6558 * @see Character#isDigit(int) 6559 * @since 1.5 6560 */ 6561 public static int digit(int codePoint, int radix) { 6562 return CharacterData.of(codePoint).digit(codePoint, radix); 6563 } 6564 6565 /** 6566 * Returns the {@code int} value that the specified Unicode 6567 * character represents. For example, the character 6568 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 6569 * an int with a value of 50. 6570 * <p> 6571 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6572 * {@code '\u005Cu005A'}), lowercase 6573 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6574 * full width variant ({@code '\u005CuFF21'} through 6575 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6576 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6577 * through 35. This is independent of the Unicode specification, 6578 * which does not assign numeric values to these {@code char} 6579 * values. 6580 * <p> 6581 * If the character does not have a numeric value, then -1 is returned. 6582 * If the character has a numeric value that cannot be represented as a 6583 * nonnegative integer (for example, a fractional value), then -2 6584 * is returned. 6585 * 6586 * <p><b>Note:</b> This method cannot handle <a 6587 * href="#supplementary"> supplementary characters</a>. To support 6588 * all Unicode characters, including supplementary characters, use 6589 * the {@link #getNumericValue(int)} method. 6590 * 6591 * @param ch the character to be converted. 6592 * @return the numeric value of the character, as a nonnegative {@code int} 6593 * value; -2 if the character has a numeric value that is not a 6594 * nonnegative integer; -1 if the character has no numeric value. 6595 * @see Character#forDigit(int, int) 6596 * @see Character#isDigit(char) 6597 * @since 1.1 6598 */ 6599 public static int getNumericValue(char ch) { 6600 return getNumericValue((int)ch); 6601 } 6602 6603 /** 6604 * Returns the {@code int} value that the specified 6605 * character (Unicode code point) represents. For example, the character 6606 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 6607 * an {@code int} with a value of 50. 6608 * <p> 6609 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6610 * {@code '\u005Cu005A'}), lowercase 6611 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6612 * full width variant ({@code '\u005CuFF21'} through 6613 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6614 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6615 * through 35. This is independent of the Unicode specification, 6616 * which does not assign numeric values to these {@code char} 6617 * values. 6618 * <p> 6619 * If the character does not have a numeric value, then -1 is returned. 6620 * If the character has a numeric value that cannot be represented as a 6621 * nonnegative integer (for example, a fractional value), then -2 6622 * is returned. 6623 * 6624 * @param codePoint the character (Unicode code point) to be converted. 6625 * @return the numeric value of the character, as a nonnegative {@code int} 6626 * value; -2 if the character has a numeric value that is not a 6627 * nonnegative integer; -1 if the character has no numeric value. 6628 * @see Character#forDigit(int, int) 6629 * @see Character#isDigit(int) 6630 * @since 1.5 6631 */ 6632 public static int getNumericValue(int codePoint) { 6633 return CharacterData.of(codePoint).getNumericValue(codePoint); 6634 } 6635 6636 /** 6637 * Determines if the specified character is ISO-LATIN-1 white space. 6638 * This method returns {@code true} for the following five 6639 * characters only: 6640 * <table summary="truechars"> 6641 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 6642 * <td>{@code HORIZONTAL TABULATION}</td></tr> 6643 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 6644 * <td>{@code NEW LINE}</td></tr> 6645 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 6646 * <td>{@code FORM FEED}</td></tr> 6647 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 6648 * <td>{@code CARRIAGE RETURN}</td></tr> 6649 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 6650 * <td>{@code SPACE}</td></tr> 6651 * </table> 6652 * 6653 * @param ch the character to be tested. 6654 * @return {@code true} if the character is ISO-LATIN-1 white 6655 * space; {@code false} otherwise. 6656 * @see Character#isSpaceChar(char) 6657 * @see Character#isWhitespace(char) 6658 * @deprecated Replaced by isWhitespace(char). 6659 */ 6660 @Deprecated 6661 public static boolean isSpace(char ch) { 6662 return (ch <= 0x0020) && 6663 (((((1L << 0x0009) | 6664 (1L << 0x000A) | 6665 (1L << 0x000C) | 6666 (1L << 0x000D) | 6667 (1L << 0x0020)) >> ch) & 1L) != 0); 6668 } 6669 6670 6671 /** 6672 * Determines if the specified character is a Unicode space character. 6673 * A character is considered to be a space character if and only if 6674 * it is specified to be a space character by the Unicode Standard. This 6675 * method returns true if the character's general category type is any of 6676 * the following: 6677 * <ul> 6678 * <li> {@code SPACE_SEPARATOR} 6679 * <li> {@code LINE_SEPARATOR} 6680 * <li> {@code PARAGRAPH_SEPARATOR} 6681 * </ul> 6682 * 6683 * <p><b>Note:</b> This method cannot handle <a 6684 * href="#supplementary"> supplementary characters</a>. To support 6685 * all Unicode characters, including supplementary characters, use 6686 * the {@link #isSpaceChar(int)} method. 6687 * 6688 * @param ch the character to be tested. 6689 * @return {@code true} if the character is a space character; 6690 * {@code false} otherwise. 6691 * @see Character#isWhitespace(char) 6692 * @since 1.1 6693 */ 6694 public static boolean isSpaceChar(char ch) { 6695 return isSpaceChar((int)ch); 6696 } 6697 6698 /** 6699 * Determines if the specified character (Unicode code point) is a 6700 * Unicode space character. A character is considered to be a 6701 * space character if and only if it is specified to be a space 6702 * character by the Unicode Standard. This method returns true if 6703 * the character's general category type is any of the following: 6704 * 6705 * <ul> 6706 * <li> {@link #SPACE_SEPARATOR} 6707 * <li> {@link #LINE_SEPARATOR} 6708 * <li> {@link #PARAGRAPH_SEPARATOR} 6709 * </ul> 6710 * 6711 * @param codePoint the character (Unicode code point) to be tested. 6712 * @return {@code true} if the character is a space character; 6713 * {@code false} otherwise. 6714 * @see Character#isWhitespace(int) 6715 * @since 1.5 6716 */ 6717 public static boolean isSpaceChar(int codePoint) { 6718 return ((((1 << Character.SPACE_SEPARATOR) | 6719 (1 << Character.LINE_SEPARATOR) | 6720 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6721 != 0; 6722 } 6723 6724 /** 6725 * Determines if the specified character is white space according to Java. 6726 * A character is a Java whitespace character if and only if it satisfies 6727 * one of the following criteria: 6728 * <ul> 6729 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 6730 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 6731 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6732 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6733 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6734 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6735 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6736 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6737 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6738 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6739 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6740 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6741 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6742 * </ul> 6743 * 6744 * <p><b>Note:</b> This method cannot handle <a 6745 * href="#supplementary"> supplementary characters</a>. To support 6746 * all Unicode characters, including supplementary characters, use 6747 * the {@link #isWhitespace(int)} method. 6748 * 6749 * @param ch the character to be tested. 6750 * @return {@code true} if the character is a Java whitespace 6751 * character; {@code false} otherwise. 6752 * @see Character#isSpaceChar(char) 6753 * @since 1.1 6754 */ 6755 public static boolean isWhitespace(char ch) { 6756 return isWhitespace((int)ch); 6757 } 6758 6759 /** 6760 * Determines if the specified character (Unicode code point) is 6761 * white space according to Java. A character is a Java 6762 * whitespace character if and only if it satisfies one of the 6763 * following criteria: 6764 * <ul> 6765 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6766 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6767 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6768 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6769 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6770 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6771 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6772 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6773 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6774 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6775 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6776 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6777 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6778 * </ul> 6779 * 6780 * @param codePoint the character (Unicode code point) to be tested. 6781 * @return {@code true} if the character is a Java whitespace 6782 * character; {@code false} otherwise. 6783 * @see Character#isSpaceChar(int) 6784 * @since 1.5 6785 */ 6786 public static boolean isWhitespace(int codePoint) { 6787 return CharacterData.of(codePoint).isWhitespace(codePoint); 6788 } 6789 6790 /** 6791 * Determines if the specified character is an ISO control 6792 * character. A character is considered to be an ISO control 6793 * character if its code is in the range {@code '\u005Cu0000'} 6794 * through {@code '\u005Cu001F'} or in the range 6795 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6796 * 6797 * <p><b>Note:</b> This method cannot handle <a 6798 * href="#supplementary"> supplementary characters</a>. To support 6799 * all Unicode characters, including supplementary characters, use 6800 * the {@link #isISOControl(int)} method. 6801 * 6802 * @param ch the character to be tested. 6803 * @return {@code true} if the character is an ISO control character; 6804 * {@code false} otherwise. 6805 * 6806 * @see Character#isSpaceChar(char) 6807 * @see Character#isWhitespace(char) 6808 * @since 1.1 6809 */ 6810 public static boolean isISOControl(char ch) { 6811 return isISOControl((int)ch); 6812 } 6813 6814 /** 6815 * Determines if the referenced character (Unicode code point) is an ISO control 6816 * character. A character is considered to be an ISO control 6817 * character if its code is in the range {@code '\u005Cu0000'} 6818 * through {@code '\u005Cu001F'} or in the range 6819 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6820 * 6821 * @param codePoint the character (Unicode code point) to be tested. 6822 * @return {@code true} if the character is an ISO control character; 6823 * {@code false} otherwise. 6824 * @see Character#isSpaceChar(int) 6825 * @see Character#isWhitespace(int) 6826 * @since 1.5 6827 */ 6828 public static boolean isISOControl(int codePoint) { 6829 // Optimized form of: 6830 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6831 // (codePoint >= 0x7F && codePoint <= 0x9F); 6832 return codePoint <= 0x9F && 6833 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6834 } 6835 6836 /** 6837 * Returns a value indicating a character's general category. 6838 * 6839 * <p><b>Note:</b> This method cannot handle <a 6840 * href="#supplementary"> supplementary characters</a>. To support 6841 * all Unicode characters, including supplementary characters, use 6842 * the {@link #getType(int)} method. 6843 * 6844 * @param ch the character to be tested. 6845 * @return a value of type {@code int} representing the 6846 * character's general category. 6847 * @see Character#COMBINING_SPACING_MARK 6848 * @see Character#CONNECTOR_PUNCTUATION 6849 * @see Character#CONTROL 6850 * @see Character#CURRENCY_SYMBOL 6851 * @see Character#DASH_PUNCTUATION 6852 * @see Character#DECIMAL_DIGIT_NUMBER 6853 * @see Character#ENCLOSING_MARK 6854 * @see Character#END_PUNCTUATION 6855 * @see Character#FINAL_QUOTE_PUNCTUATION 6856 * @see Character#FORMAT 6857 * @see Character#INITIAL_QUOTE_PUNCTUATION 6858 * @see Character#LETTER_NUMBER 6859 * @see Character#LINE_SEPARATOR 6860 * @see Character#LOWERCASE_LETTER 6861 * @see Character#MATH_SYMBOL 6862 * @see Character#MODIFIER_LETTER 6863 * @see Character#MODIFIER_SYMBOL 6864 * @see Character#NON_SPACING_MARK 6865 * @see Character#OTHER_LETTER 6866 * @see Character#OTHER_NUMBER 6867 * @see Character#OTHER_PUNCTUATION 6868 * @see Character#OTHER_SYMBOL 6869 * @see Character#PARAGRAPH_SEPARATOR 6870 * @see Character#PRIVATE_USE 6871 * @see Character#SPACE_SEPARATOR 6872 * @see Character#START_PUNCTUATION 6873 * @see Character#SURROGATE 6874 * @see Character#TITLECASE_LETTER 6875 * @see Character#UNASSIGNED 6876 * @see Character#UPPERCASE_LETTER 6877 * @since 1.1 6878 */ 6879 public static int getType(char ch) { 6880 return getType((int)ch); 6881 } 6882 6883 /** 6884 * Returns a value indicating a character's general category. 6885 * 6886 * @param codePoint the character (Unicode code point) to be tested. 6887 * @return a value of type {@code int} representing the 6888 * character's general category. 6889 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6890 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6891 * @see Character#CONTROL CONTROL 6892 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6893 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6894 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6895 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6896 * @see Character#END_PUNCTUATION END_PUNCTUATION 6897 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6898 * @see Character#FORMAT FORMAT 6899 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6900 * @see Character#LETTER_NUMBER LETTER_NUMBER 6901 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6902 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6903 * @see Character#MATH_SYMBOL MATH_SYMBOL 6904 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6905 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6906 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6907 * @see Character#OTHER_LETTER OTHER_LETTER 6908 * @see Character#OTHER_NUMBER OTHER_NUMBER 6909 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6910 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6911 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6912 * @see Character#PRIVATE_USE PRIVATE_USE 6913 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6914 * @see Character#START_PUNCTUATION START_PUNCTUATION 6915 * @see Character#SURROGATE SURROGATE 6916 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6917 * @see Character#UNASSIGNED UNASSIGNED 6918 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6919 * @since 1.5 6920 */ 6921 public static int getType(int codePoint) { 6922 return CharacterData.of(codePoint).getType(codePoint); 6923 } 6924 6925 /** 6926 * Determines the character representation for a specific digit in 6927 * the specified radix. If the value of {@code radix} is not a 6928 * valid radix, or the value of {@code digit} is not a valid 6929 * digit in the specified radix, the null character 6930 * ({@code '\u005Cu0000'}) is returned. 6931 * <p> 6932 * The {@code radix} argument is valid if it is greater than or 6933 * equal to {@code MIN_RADIX} and less than or equal to 6934 * {@code MAX_RADIX}. The {@code digit} argument is valid if 6935 * {@code 0 <= digit < radix}. 6936 * <p> 6937 * If the digit is less than 10, then 6938 * {@code '0' + digit} is returned. Otherwise, the value 6939 * {@code 'a' + digit - 10} is returned. 6940 * 6941 * @param digit the number to convert to a character. 6942 * @param radix the radix. 6943 * @return the {@code char} representation of the specified digit 6944 * in the specified radix. 6945 * @see Character#MIN_RADIX 6946 * @see Character#MAX_RADIX 6947 * @see Character#digit(char, int) 6948 */ 6949 public static char forDigit(int digit, int radix) { 6950 if ((digit >= radix) || (digit < 0)) { 6951 return '\0'; 6952 } 6953 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6954 return '\0'; 6955 } 6956 if (digit < 10) { 6957 return (char)('0' + digit); 6958 } 6959 return (char)('a' - 10 + digit); 6960 } 6961 6962 /** 6963 * Returns the Unicode directionality property for the given 6964 * character. Character directionality is used to calculate the 6965 * visual ordering of text. The directionality value of undefined 6966 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 6967 * 6968 * <p><b>Note:</b> This method cannot handle <a 6969 * href="#supplementary"> supplementary characters</a>. To support 6970 * all Unicode characters, including supplementary characters, use 6971 * the {@link #getDirectionality(int)} method. 6972 * 6973 * @param ch {@code char} for which the directionality property 6974 * is requested. 6975 * @return the directionality property of the {@code char} value. 6976 * 6977 * @see Character#DIRECTIONALITY_UNDEFINED 6978 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6979 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6980 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6981 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6982 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6983 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6984 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6985 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6986 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6987 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6988 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6989 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6990 * @see Character#DIRECTIONALITY_WHITESPACE 6991 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6992 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6993 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6994 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6995 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6996 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6997 * @since 1.4 6998 */ 6999 public static byte getDirectionality(char ch) { 7000 return getDirectionality((int)ch); 7001 } 7002 7003 /** 7004 * Returns the Unicode directionality property for the given 7005 * character (Unicode code point). Character directionality is 7006 * used to calculate the visual ordering of text. The 7007 * directionality value of undefined character is {@link 7008 * #DIRECTIONALITY_UNDEFINED}. 7009 * 7010 * @param codePoint the character (Unicode code point) for which 7011 * the directionality property is requested. 7012 * @return the directionality property of the character. 7013 * 7014 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 7015 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 7016 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 7017 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 7018 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 7019 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 7020 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 7021 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 7022 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 7023 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 7024 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 7025 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 7026 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 7027 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 7028 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 7029 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 7030 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 7031 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 7032 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 7033 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 7034 * @since 1.5 7035 */ 7036 public static byte getDirectionality(int codePoint) { 7037 return CharacterData.of(codePoint).getDirectionality(codePoint); 7038 } 7039 7040 /** 7041 * Determines whether the character is mirrored according to the 7042 * Unicode specification. Mirrored characters should have their 7043 * glyphs horizontally mirrored when displayed in text that is 7044 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 7045 * PARENTHESIS is semantically defined to be an <i>opening 7046 * parenthesis</i>. This will appear as a "(" in text that is 7047 * left-to-right but as a ")" in text that is right-to-left. 7048 * 7049 * <p><b>Note:</b> This method cannot handle <a 7050 * href="#supplementary"> supplementary characters</a>. To support 7051 * all Unicode characters, including supplementary characters, use 7052 * the {@link #isMirrored(int)} method. 7053 * 7054 * @param ch {@code char} for which the mirrored property is requested 7055 * @return {@code true} if the char is mirrored, {@code false} 7056 * if the {@code char} is not mirrored or is not defined. 7057 * @since 1.4 7058 */ 7059 public static boolean isMirrored(char ch) { 7060 return isMirrored((int)ch); 7061 } 7062 7063 /** 7064 * Determines whether the specified character (Unicode code point) 7065 * is mirrored according to the Unicode specification. Mirrored 7066 * characters should have their glyphs horizontally mirrored when 7067 * displayed in text that is right-to-left. For example, 7068 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 7069 * defined to be an <i>opening parenthesis</i>. This will appear 7070 * as a "(" in text that is left-to-right but as a ")" in text 7071 * that is right-to-left. 7072 * 7073 * @param codePoint the character (Unicode code point) to be tested. 7074 * @return {@code true} if the character is mirrored, {@code false} 7075 * if the character is not mirrored or is not defined. 7076 * @since 1.5 7077 */ 7078 public static boolean isMirrored(int codePoint) { 7079 return CharacterData.of(codePoint).isMirrored(codePoint); 7080 } 7081 7082 /** 7083 * Compares two {@code Character} objects numerically. 7084 * 7085 * @param anotherCharacter the {@code Character} to be compared. 7086 7087 * @return the value {@code 0} if the argument {@code Character} 7088 * is equal to this {@code Character}; a value less than 7089 * {@code 0} if this {@code Character} is numerically less 7090 * than the {@code Character} argument; and a value greater than 7091 * {@code 0} if this {@code Character} is numerically greater 7092 * than the {@code Character} argument (unsigned comparison). 7093 * Note that this is strictly a numerical comparison; it is not 7094 * locale-dependent. 7095 * @since 1.2 7096 */ 7097 public int compareTo(Character anotherCharacter) { 7098 return compare(this.value, anotherCharacter.value); 7099 } 7100 7101 /** 7102 * Compares two {@code char} values numerically. 7103 * The value returned is identical to what would be returned by: 7104 * <pre> 7105 * Character.valueOf(x).compareTo(Character.valueOf(y)) 7106 * </pre> 7107 * 7108 * @param x the first {@code char} to compare 7109 * @param y the second {@code char} to compare 7110 * @return the value {@code 0} if {@code x == y}; 7111 * a value less than {@code 0} if {@code x < y}; and 7112 * a value greater than {@code 0} if {@code x > y} 7113 * @since 1.7 7114 */ 7115 public static int compare(char x, char y) { 7116 return x - y; 7117 } 7118 7119 /** 7120 * Converts the character (Unicode code point) argument to uppercase using 7121 * information from the UnicodeData file. 7122 * 7123 * @param codePoint the character (Unicode code point) to be converted. 7124 * @return either the uppercase equivalent of the character, if 7125 * any, or an error flag ({@code Character.ERROR}) 7126 * that indicates that a 1:M {@code char} mapping exists. 7127 * @see Character#isLowerCase(char) 7128 * @see Character#isUpperCase(char) 7129 * @see Character#toLowerCase(char) 7130 * @see Character#toTitleCase(char) 7131 * @since 1.4 7132 */ 7133 static int toUpperCaseEx(int codePoint) { 7134 assert isValidCodePoint(codePoint); 7135 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 7136 } 7137 7138 /** 7139 * Converts the character (Unicode code point) argument to uppercase using case 7140 * mapping information from the SpecialCasing file in the Unicode 7141 * specification. If a character has no explicit uppercase 7142 * mapping, then the {@code char} itself is returned in the 7143 * {@code char[]}. 7144 * 7145 * @param codePoint the character (Unicode code point) to be converted. 7146 * @return a {@code char[]} with the uppercased character. 7147 * @since 1.4 7148 */ 7149 static char[] toUpperCaseCharArray(int codePoint) { 7150 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 7151 assert isBmpCodePoint(codePoint); 7152 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 7153 } 7154 7155 /** 7156 * The number of bits used to represent a <tt>char</tt> value in unsigned 7157 * binary form, constant {@code 16}. 7158 * 7159 * @since 1.5 7160 */ 7161 public static final int SIZE = 16; 7162 7163 /** 7164 * The number of bytes used to represent a {@code char} value in unsigned 7165 * binary form. 7166 * 7167 * @since 1.8 7168 */ 7169 public static final int BYTES = SIZE / Byte.SIZE; 7170 7171 /** 7172 * Returns the value obtained by reversing the order of the bytes in the 7173 * specified <tt>char</tt> value. 7174 * 7175 * @param ch The {@code char} of which to reverse the byte order. 7176 * @return the value obtained by reversing (or, equivalently, swapping) 7177 * the bytes in the specified <tt>char</tt> value. 7178 * @since 1.5 7179 */ 7180 public static char reverseBytes(char ch) { 7181 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 7182 } 7183 7184 /** 7185 * Returns the Unicode name of the specified character 7186 * {@code codePoint}, or null if the code point is 7187 * {@link #UNASSIGNED unassigned}. 7188 * <p> 7189 * Note: if the specified character is not assigned a name by 7190 * the <i>UnicodeData</i> file (part of the Unicode Character 7191 * Database maintained by the Unicode Consortium), the returned 7192 * name is the same as the result of expression. 7193 * 7194 * <blockquote>{@code 7195 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 7196 * + " " 7197 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7198 * 7199 * }</blockquote> 7200 * 7201 * @param codePoint the character (Unicode code point) 7202 * 7203 * @return the Unicode name of the specified character, or null if 7204 * the code point is unassigned. 7205 * 7206 * @exception IllegalArgumentException if the specified 7207 * {@code codePoint} is not a valid Unicode 7208 * code point. 7209 * 7210 * @since 1.7 7211 */ 7212 public static String getName(int codePoint) { 7213 if (!isValidCodePoint(codePoint)) { 7214 throw new IllegalArgumentException(); 7215 } 7216 String name = CharacterName.get(codePoint); 7217 if (name != null) 7218 return name; 7219 if (getType(codePoint) == UNASSIGNED) 7220 return null; 7221 UnicodeBlock block = UnicodeBlock.of(codePoint); 7222 if (block != null) 7223 return block.toString().replace('_', ' ') + " " 7224 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7225 // should never come here 7226 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 7227 } 7228 }