1 /* 2 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The {@code Character} class wraps a value of the primitive 35 * type {@code char} in an object. An object of type 36 * {@code Character} contains a single field whose type is 37 * {@code char}. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.0.0. 44 * <p> 45 * The methods and data of class {@code Character} are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h4><a name="unicode">Unicode Character Representations</a></h4> 58 * 59 * <p>The {@code char} data type (and therefore the value that a 60 * {@code Character} object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in {@code char} arrays and 76 * in the {@code String} and {@code StringBuffer} classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of {@code char} values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A {@code char} value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * {@code int} value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of {@code int} are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate {@code char} values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a {@code char} value cannot support 95 * supplementary characters. They treat {@code char} values from the 96 * surrogate ranges as undefined characters. For example, 97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an {@code int} value support all 102 * Unicode characters, including supplementary characters. For 103 * example, {@code Character.isLetter(0x2F81A)} returns 104 * {@code true} because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * {@code char} values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * {@code digit} method, the {@code forDigit} method, and the 129 * {@code toString} method of class {@code Integer}. 130 * 131 * @see Character#digit(char, int) 132 * @see Character#forDigit(int, int) 133 * @see Integer#toString(int, int) 134 * @see Integer#valueOf(String) 135 */ 136 public static final int MIN_RADIX = 2; 137 138 /** 139 * The maximum radix available for conversion to and from strings. 140 * The constant value of this field is the largest value permitted 141 * for the radix argument in radix-conversion methods such as the 142 * {@code digit} method, the {@code forDigit} method, and the 143 * {@code toString} method of class {@code Integer}. 144 * 145 * @see Character#digit(char, int) 146 * @see Character#forDigit(int, int) 147 * @see Integer#toString(int, int) 148 * @see Integer#valueOf(String) 149 */ 150 public static final int MAX_RADIX = 36; 151 152 /** 153 * The constant value of this field is the smallest value of type 154 * {@code char}, {@code '\u005Cu0000'}. 155 * 156 * @since 1.0.2 157 */ 158 public static final char MIN_VALUE = '\u0000'; 159 160 /** 161 * The constant value of this field is the largest value of type 162 * {@code char}, {@code '\u005CuFFFF'}. 163 * 164 * @since 1.0.2 165 */ 166 public static final char MAX_VALUE = '\uFFFF'; 167 168 /** 169 * The {@code Class} instance representing the primitive type 170 * {@code char}. 171 * 172 * @since 1.1 173 */ 174 @SuppressWarnings("unchecked") 175 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 176 177 /* 178 * Normative general types 179 */ 180 181 /* 182 * General character types 183 */ 184 185 /** 186 * General category "Cn" in the Unicode specification. 187 * @since 1.1 188 */ 189 public static final byte UNASSIGNED = 0; 190 191 /** 192 * General category "Lu" in the Unicode specification. 193 * @since 1.1 194 */ 195 public static final byte UPPERCASE_LETTER = 1; 196 197 /** 198 * General category "Ll" in the Unicode specification. 199 * @since 1.1 200 */ 201 public static final byte LOWERCASE_LETTER = 2; 202 203 /** 204 * General category "Lt" in the Unicode specification. 205 * @since 1.1 206 */ 207 public static final byte TITLECASE_LETTER = 3; 208 209 /** 210 * General category "Lm" in the Unicode specification. 211 * @since 1.1 212 */ 213 public static final byte MODIFIER_LETTER = 4; 214 215 /** 216 * General category "Lo" in the Unicode specification. 217 * @since 1.1 218 */ 219 public static final byte OTHER_LETTER = 5; 220 221 /** 222 * General category "Mn" in the Unicode specification. 223 * @since 1.1 224 */ 225 public static final byte NON_SPACING_MARK = 6; 226 227 /** 228 * General category "Me" in the Unicode specification. 229 * @since 1.1 230 */ 231 public static final byte ENCLOSING_MARK = 7; 232 233 /** 234 * General category "Mc" in the Unicode specification. 235 * @since 1.1 236 */ 237 public static final byte COMBINING_SPACING_MARK = 8; 238 239 /** 240 * General category "Nd" in the Unicode specification. 241 * @since 1.1 242 */ 243 public static final byte DECIMAL_DIGIT_NUMBER = 9; 244 245 /** 246 * General category "Nl" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte LETTER_NUMBER = 10; 250 251 /** 252 * General category "No" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte OTHER_NUMBER = 11; 256 257 /** 258 * General category "Zs" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte SPACE_SEPARATOR = 12; 262 263 /** 264 * General category "Zl" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte LINE_SEPARATOR = 13; 268 269 /** 270 * General category "Zp" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte PARAGRAPH_SEPARATOR = 14; 274 275 /** 276 * General category "Cc" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte CONTROL = 15; 280 281 /** 282 * General category "Cf" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte FORMAT = 16; 286 287 /** 288 * General category "Co" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte PRIVATE_USE = 18; 292 293 /** 294 * General category "Cs" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte SURROGATE = 19; 298 299 /** 300 * General category "Pd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DASH_PUNCTUATION = 20; 304 305 /** 306 * General category "Ps" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte START_PUNCTUATION = 21; 310 311 /** 312 * General category "Pe" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte END_PUNCTUATION = 22; 316 317 /** 318 * General category "Pc" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte CONNECTOR_PUNCTUATION = 23; 322 323 /** 324 * General category "Po" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte OTHER_PUNCTUATION = 24; 328 329 /** 330 * General category "Sm" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte MATH_SYMBOL = 25; 334 335 /** 336 * General category "Sc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CURRENCY_SYMBOL = 26; 340 341 /** 342 * General category "Sk" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte MODIFIER_SYMBOL = 27; 346 347 /** 348 * General category "So" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte OTHER_SYMBOL = 28; 352 353 /** 354 * General category "Pi" in the Unicode specification. 355 * @since 1.4 356 */ 357 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 358 359 /** 360 * General category "Pf" in the Unicode specification. 361 * @since 1.4 362 */ 363 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 364 365 /** 366 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 367 */ 368 static final int ERROR = 0xFFFFFFFF; 369 370 371 /** 372 * Undefined bidirectional character type. Undefined {@code char} 373 * values have undefined directionality in the Unicode specification. 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_UNDEFINED = -1; 377 378 /** 379 * Strong bidirectional character type "L" in the Unicode specification. 380 * @since 1.4 381 */ 382 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 383 384 /** 385 * Strong bidirectional character type "R" in the Unicode specification. 386 * @since 1.4 387 */ 388 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 389 390 /** 391 * Strong bidirectional character type "AL" in the Unicode specification. 392 * @since 1.4 393 */ 394 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 395 396 /** 397 * Weak bidirectional character type "EN" in the Unicode specification. 398 * @since 1.4 399 */ 400 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 401 402 /** 403 * Weak bidirectional character type "ES" in the Unicode specification. 404 * @since 1.4 405 */ 406 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 407 408 /** 409 * Weak bidirectional character type "ET" in the Unicode specification. 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 413 414 /** 415 * Weak bidirectional character type "AN" in the Unicode specification. 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 419 420 /** 421 * Weak bidirectional character type "CS" in the Unicode specification. 422 * @since 1.4 423 */ 424 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 425 426 /** 427 * Weak bidirectional character type "NSM" in the Unicode specification. 428 * @since 1.4 429 */ 430 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 431 432 /** 433 * Weak bidirectional character type "BN" in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 437 438 /** 439 * Neutral bidirectional character type "B" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 443 444 /** 445 * Neutral bidirectional character type "S" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 449 450 /** 451 * Neutral bidirectional character type "WS" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_WHITESPACE = 12; 455 456 /** 457 * Neutral bidirectional character type "ON" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 461 462 /** 463 * Strong bidirectional character type "LRE" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 467 468 /** 469 * Strong bidirectional character type "LRO" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 473 474 /** 475 * Strong bidirectional character type "RLE" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 479 480 /** 481 * Strong bidirectional character type "RLO" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 485 486 /** 487 * Weak bidirectional character type "PDF" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 491 492 /** 493 * The minimum value of a 494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 495 * Unicode high-surrogate code unit</a> 496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 497 * A high-surrogate is also known as a <i>leading-surrogate</i>. 498 * 499 * @since 1.5 500 */ 501 public static final char MIN_HIGH_SURROGATE = '\uD800'; 502 503 /** 504 * The maximum value of a 505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 506 * Unicode high-surrogate code unit</a> 507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 508 * A high-surrogate is also known as a <i>leading-surrogate</i>. 509 * 510 * @since 1.5 511 */ 512 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 513 514 /** 515 * The minimum value of a 516 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 517 * Unicode low-surrogate code unit</a> 518 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 519 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 520 * 521 * @since 1.5 522 */ 523 public static final char MIN_LOW_SURROGATE = '\uDC00'; 524 525 /** 526 * The maximum value of a 527 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 528 * Unicode low-surrogate code unit</a> 529 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 530 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 531 * 532 * @since 1.5 533 */ 534 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 535 536 /** 537 * The minimum value of a Unicode surrogate code unit in the 538 * UTF-16 encoding, constant {@code '\u005CuD800'}. 539 * 540 * @since 1.5 541 */ 542 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 543 544 /** 545 * The maximum value of a Unicode surrogate code unit in the 546 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 547 * 548 * @since 1.5 549 */ 550 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 551 552 /** 553 * The minimum value of a 554 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 555 * Unicode supplementary code point</a>, constant {@code U+10000}. 556 * 557 * @since 1.5 558 */ 559 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 560 561 /** 562 * The minimum value of a 563 * <a href="http://www.unicode.org/glossary/#code_point"> 564 * Unicode code point</a>, constant {@code U+0000}. 565 * 566 * @since 1.5 567 */ 568 public static final int MIN_CODE_POINT = 0x000000; 569 570 /** 571 * The maximum value of a 572 * <a href="http://www.unicode.org/glossary/#code_point"> 573 * Unicode code point</a>, constant {@code U+10FFFF}. 574 * 575 * @since 1.5 576 */ 577 public static final int MAX_CODE_POINT = 0X10FFFF; 578 579 580 /** 581 * Instances of this class represent particular subsets of the Unicode 582 * character set. The only family of subsets defined in the 583 * {@code Character} class is {@link Character.UnicodeBlock}. 584 * Other portions of the Java API may define other subsets for their 585 * own purposes. 586 * 587 * @since 1.2 588 */ 589 public static class Subset { 590 591 private String name; 592 593 /** 594 * Constructs a new {@code Subset} instance. 595 * 596 * @param name The name of this subset 597 * @exception NullPointerException if name is {@code null} 598 */ 599 protected Subset(String name) { 600 if (name == null) { 601 throw new NullPointerException("name"); 602 } 603 this.name = name; 604 } 605 606 /** 607 * Compares two {@code Subset} objects for equality. 608 * This method returns {@code true} if and only if 609 * {@code this} and the argument refer to the same 610 * object; since this method is {@code final}, this 611 * guarantee holds for all subclasses. 612 */ 613 public final boolean equals(Object obj) { 614 return (this == obj); 615 } 616 617 /** 618 * Returns the standard hash code as defined by the 619 * {@link Object#hashCode} method. This method 620 * is {@code final} in order to ensure that the 621 * {@code equals} and {@code hashCode} methods will 622 * be consistent in all subclasses. 623 */ 624 public final int hashCode() { 625 return super.hashCode(); 626 } 627 628 /** 629 * Returns the name of this subset. 630 */ 631 public final String toString() { 632 return name; 633 } 634 } 635 636 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 637 // for the latest specification of Unicode Blocks. 638 639 /** 640 * A family of character subsets representing the character blocks in the 641 * Unicode specification. Character blocks generally define characters 642 * used for a specific script or purpose. A character is contained by 643 * at most one Unicode block. 644 * 645 * @since 1.2 646 */ 647 public static final class UnicodeBlock extends Subset { 648 649 private static Map<String, UnicodeBlock> map = new HashMap<>(256); 650 651 /** 652 * Creates a UnicodeBlock with the given identifier name. 653 * This name must be the same as the block identifier. 654 */ 655 private UnicodeBlock(String idName) { 656 super(idName); 657 map.put(idName, this); 658 } 659 660 /** 661 * Creates a UnicodeBlock with the given identifier name and 662 * alias name. 663 */ 664 private UnicodeBlock(String idName, String alias) { 665 this(idName); 666 map.put(alias, this); 667 } 668 669 /** 670 * Creates a UnicodeBlock with the given identifier name and 671 * alias names. 672 */ 673 private UnicodeBlock(String idName, String... aliases) { 674 this(idName); 675 for (String alias : aliases) 676 map.put(alias, this); 677 } 678 679 /** 680 * Constant for the "Basic Latin" Unicode character block. 681 * @since 1.2 682 */ 683 public static final UnicodeBlock BASIC_LATIN = 684 new UnicodeBlock("BASIC_LATIN", 685 "BASIC LATIN", 686 "BASICLATIN"); 687 688 /** 689 * Constant for the "Latin-1 Supplement" Unicode character block. 690 * @since 1.2 691 */ 692 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 693 new UnicodeBlock("LATIN_1_SUPPLEMENT", 694 "LATIN-1 SUPPLEMENT", 695 "LATIN-1SUPPLEMENT"); 696 697 /** 698 * Constant for the "Latin Extended-A" Unicode character block. 699 * @since 1.2 700 */ 701 public static final UnicodeBlock LATIN_EXTENDED_A = 702 new UnicodeBlock("LATIN_EXTENDED_A", 703 "LATIN EXTENDED-A", 704 "LATINEXTENDED-A"); 705 706 /** 707 * Constant for the "Latin Extended-B" Unicode character block. 708 * @since 1.2 709 */ 710 public static final UnicodeBlock LATIN_EXTENDED_B = 711 new UnicodeBlock("LATIN_EXTENDED_B", 712 "LATIN EXTENDED-B", 713 "LATINEXTENDED-B"); 714 715 /** 716 * Constant for the "IPA Extensions" Unicode character block. 717 * @since 1.2 718 */ 719 public static final UnicodeBlock IPA_EXTENSIONS = 720 new UnicodeBlock("IPA_EXTENSIONS", 721 "IPA EXTENSIONS", 722 "IPAEXTENSIONS"); 723 724 /** 725 * Constant for the "Spacing Modifier Letters" Unicode character block. 726 * @since 1.2 727 */ 728 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 729 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 730 "SPACING MODIFIER LETTERS", 731 "SPACINGMODIFIERLETTERS"); 732 733 /** 734 * Constant for the "Combining Diacritical Marks" Unicode character block. 735 * @since 1.2 736 */ 737 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 738 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 739 "COMBINING DIACRITICAL MARKS", 740 "COMBININGDIACRITICALMARKS"); 741 742 /** 743 * Constant for the "Greek and Coptic" Unicode character block. 744 * <p> 745 * This block was previously known as the "Greek" block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock GREEK = 750 new UnicodeBlock("GREEK", 751 "GREEK AND COPTIC", 752 "GREEKANDCOPTIC"); 753 754 /** 755 * Constant for the "Cyrillic" Unicode character block. 756 * @since 1.2 757 */ 758 public static final UnicodeBlock CYRILLIC = 759 new UnicodeBlock("CYRILLIC"); 760 761 /** 762 * Constant for the "Armenian" Unicode character block. 763 * @since 1.2 764 */ 765 public static final UnicodeBlock ARMENIAN = 766 new UnicodeBlock("ARMENIAN"); 767 768 /** 769 * Constant for the "Hebrew" Unicode character block. 770 * @since 1.2 771 */ 772 public static final UnicodeBlock HEBREW = 773 new UnicodeBlock("HEBREW"); 774 775 /** 776 * Constant for the "Arabic" Unicode character block. 777 * @since 1.2 778 */ 779 public static final UnicodeBlock ARABIC = 780 new UnicodeBlock("ARABIC"); 781 782 /** 783 * Constant for the "Devanagari" Unicode character block. 784 * @since 1.2 785 */ 786 public static final UnicodeBlock DEVANAGARI = 787 new UnicodeBlock("DEVANAGARI"); 788 789 /** 790 * Constant for the "Bengali" Unicode character block. 791 * @since 1.2 792 */ 793 public static final UnicodeBlock BENGALI = 794 new UnicodeBlock("BENGALI"); 795 796 /** 797 * Constant for the "Gurmukhi" Unicode character block. 798 * @since 1.2 799 */ 800 public static final UnicodeBlock GURMUKHI = 801 new UnicodeBlock("GURMUKHI"); 802 803 /** 804 * Constant for the "Gujarati" Unicode character block. 805 * @since 1.2 806 */ 807 public static final UnicodeBlock GUJARATI = 808 new UnicodeBlock("GUJARATI"); 809 810 /** 811 * Constant for the "Oriya" Unicode character block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock ORIYA = 815 new UnicodeBlock("ORIYA"); 816 817 /** 818 * Constant for the "Tamil" Unicode character block. 819 * @since 1.2 820 */ 821 public static final UnicodeBlock TAMIL = 822 new UnicodeBlock("TAMIL"); 823 824 /** 825 * Constant for the "Telugu" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock TELUGU = 829 new UnicodeBlock("TELUGU"); 830 831 /** 832 * Constant for the "Kannada" Unicode character block. 833 * @since 1.2 834 */ 835 public static final UnicodeBlock KANNADA = 836 new UnicodeBlock("KANNADA"); 837 838 /** 839 * Constant for the "Malayalam" Unicode character block. 840 * @since 1.2 841 */ 842 public static final UnicodeBlock MALAYALAM = 843 new UnicodeBlock("MALAYALAM"); 844 845 /** 846 * Constant for the "Thai" Unicode character block. 847 * @since 1.2 848 */ 849 public static final UnicodeBlock THAI = 850 new UnicodeBlock("THAI"); 851 852 /** 853 * Constant for the "Lao" Unicode character block. 854 * @since 1.2 855 */ 856 public static final UnicodeBlock LAO = 857 new UnicodeBlock("LAO"); 858 859 /** 860 * Constant for the "Tibetan" Unicode character block. 861 * @since 1.2 862 */ 863 public static final UnicodeBlock TIBETAN = 864 new UnicodeBlock("TIBETAN"); 865 866 /** 867 * Constant for the "Georgian" Unicode character block. 868 * @since 1.2 869 */ 870 public static final UnicodeBlock GEORGIAN = 871 new UnicodeBlock("GEORGIAN"); 872 873 /** 874 * Constant for the "Hangul Jamo" Unicode character block. 875 * @since 1.2 876 */ 877 public static final UnicodeBlock HANGUL_JAMO = 878 new UnicodeBlock("HANGUL_JAMO", 879 "HANGUL JAMO", 880 "HANGULJAMO"); 881 882 /** 883 * Constant for the "Latin Extended Additional" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 887 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 888 "LATIN EXTENDED ADDITIONAL", 889 "LATINEXTENDEDADDITIONAL"); 890 891 /** 892 * Constant for the "Greek Extended" Unicode character block. 893 * @since 1.2 894 */ 895 public static final UnicodeBlock GREEK_EXTENDED = 896 new UnicodeBlock("GREEK_EXTENDED", 897 "GREEK EXTENDED", 898 "GREEKEXTENDED"); 899 900 /** 901 * Constant for the "General Punctuation" Unicode character block. 902 * @since 1.2 903 */ 904 public static final UnicodeBlock GENERAL_PUNCTUATION = 905 new UnicodeBlock("GENERAL_PUNCTUATION", 906 "GENERAL PUNCTUATION", 907 "GENERALPUNCTUATION"); 908 909 /** 910 * Constant for the "Superscripts and Subscripts" Unicode character 911 * block. 912 * @since 1.2 913 */ 914 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 915 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 916 "SUPERSCRIPTS AND SUBSCRIPTS", 917 "SUPERSCRIPTSANDSUBSCRIPTS"); 918 919 /** 920 * Constant for the "Currency Symbols" Unicode character block. 921 * @since 1.2 922 */ 923 public static final UnicodeBlock CURRENCY_SYMBOLS = 924 new UnicodeBlock("CURRENCY_SYMBOLS", 925 "CURRENCY SYMBOLS", 926 "CURRENCYSYMBOLS"); 927 928 /** 929 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 930 * character block. 931 * <p> 932 * This block was previously known as "Combining Marks for Symbols". 933 * @since 1.2 934 */ 935 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 936 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 937 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 938 "COMBININGDIACRITICALMARKSFORSYMBOLS", 939 "COMBINING MARKS FOR SYMBOLS", 940 "COMBININGMARKSFORSYMBOLS"); 941 942 /** 943 * Constant for the "Letterlike Symbols" Unicode character block. 944 * @since 1.2 945 */ 946 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 947 new UnicodeBlock("LETTERLIKE_SYMBOLS", 948 "LETTERLIKE SYMBOLS", 949 "LETTERLIKESYMBOLS"); 950 951 /** 952 * Constant for the "Number Forms" Unicode character block. 953 * @since 1.2 954 */ 955 public static final UnicodeBlock NUMBER_FORMS = 956 new UnicodeBlock("NUMBER_FORMS", 957 "NUMBER FORMS", 958 "NUMBERFORMS"); 959 960 /** 961 * Constant for the "Arrows" Unicode character block. 962 * @since 1.2 963 */ 964 public static final UnicodeBlock ARROWS = 965 new UnicodeBlock("ARROWS"); 966 967 /** 968 * Constant for the "Mathematical Operators" Unicode character block. 969 * @since 1.2 970 */ 971 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 972 new UnicodeBlock("MATHEMATICAL_OPERATORS", 973 "MATHEMATICAL OPERATORS", 974 "MATHEMATICALOPERATORS"); 975 976 /** 977 * Constant for the "Miscellaneous Technical" Unicode character block. 978 * @since 1.2 979 */ 980 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 981 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 982 "MISCELLANEOUS TECHNICAL", 983 "MISCELLANEOUSTECHNICAL"); 984 985 /** 986 * Constant for the "Control Pictures" Unicode character block. 987 * @since 1.2 988 */ 989 public static final UnicodeBlock CONTROL_PICTURES = 990 new UnicodeBlock("CONTROL_PICTURES", 991 "CONTROL PICTURES", 992 "CONTROLPICTURES"); 993 994 /** 995 * Constant for the "Optical Character Recognition" Unicode character block. 996 * @since 1.2 997 */ 998 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 999 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1000 "OPTICAL CHARACTER RECOGNITION", 1001 "OPTICALCHARACTERRECOGNITION"); 1002 1003 /** 1004 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1005 * @since 1.2 1006 */ 1007 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1008 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1009 "ENCLOSED ALPHANUMERICS", 1010 "ENCLOSEDALPHANUMERICS"); 1011 1012 /** 1013 * Constant for the "Box Drawing" Unicode character block. 1014 * @since 1.2 1015 */ 1016 public static final UnicodeBlock BOX_DRAWING = 1017 new UnicodeBlock("BOX_DRAWING", 1018 "BOX DRAWING", 1019 "BOXDRAWING"); 1020 1021 /** 1022 * Constant for the "Block Elements" Unicode character block. 1023 * @since 1.2 1024 */ 1025 public static final UnicodeBlock BLOCK_ELEMENTS = 1026 new UnicodeBlock("BLOCK_ELEMENTS", 1027 "BLOCK ELEMENTS", 1028 "BLOCKELEMENTS"); 1029 1030 /** 1031 * Constant for the "Geometric Shapes" Unicode character block. 1032 * @since 1.2 1033 */ 1034 public static final UnicodeBlock GEOMETRIC_SHAPES = 1035 new UnicodeBlock("GEOMETRIC_SHAPES", 1036 "GEOMETRIC SHAPES", 1037 "GEOMETRICSHAPES"); 1038 1039 /** 1040 * Constant for the "Miscellaneous Symbols" Unicode character block. 1041 * @since 1.2 1042 */ 1043 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1044 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1045 "MISCELLANEOUS SYMBOLS", 1046 "MISCELLANEOUSSYMBOLS"); 1047 1048 /** 1049 * Constant for the "Dingbats" Unicode character block. 1050 * @since 1.2 1051 */ 1052 public static final UnicodeBlock DINGBATS = 1053 new UnicodeBlock("DINGBATS"); 1054 1055 /** 1056 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1057 * @since 1.2 1058 */ 1059 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1060 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1061 "CJK SYMBOLS AND PUNCTUATION", 1062 "CJKSYMBOLSANDPUNCTUATION"); 1063 1064 /** 1065 * Constant for the "Hiragana" Unicode character block. 1066 * @since 1.2 1067 */ 1068 public static final UnicodeBlock HIRAGANA = 1069 new UnicodeBlock("HIRAGANA"); 1070 1071 /** 1072 * Constant for the "Katakana" Unicode character block. 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock KATAKANA = 1076 new UnicodeBlock("KATAKANA"); 1077 1078 /** 1079 * Constant for the "Bopomofo" Unicode character block. 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock BOPOMOFO = 1083 new UnicodeBlock("BOPOMOFO"); 1084 1085 /** 1086 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1090 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1091 "HANGUL COMPATIBILITY JAMO", 1092 "HANGULCOMPATIBILITYJAMO"); 1093 1094 /** 1095 * Constant for the "Kanbun" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock KANBUN = 1099 new UnicodeBlock("KANBUN"); 1100 1101 /** 1102 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1106 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1107 "ENCLOSED CJK LETTERS AND MONTHS", 1108 "ENCLOSEDCJKLETTERSANDMONTHS"); 1109 1110 /** 1111 * Constant for the "CJK Compatibility" Unicode character block. 1112 * @since 1.2 1113 */ 1114 public static final UnicodeBlock CJK_COMPATIBILITY = 1115 new UnicodeBlock("CJK_COMPATIBILITY", 1116 "CJK COMPATIBILITY", 1117 "CJKCOMPATIBILITY"); 1118 1119 /** 1120 * Constant for the "CJK Unified Ideographs" Unicode character block. 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1124 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1125 "CJK UNIFIED IDEOGRAPHS", 1126 "CJKUNIFIEDIDEOGRAPHS"); 1127 1128 /** 1129 * Constant for the "Hangul Syllables" Unicode character block. 1130 * @since 1.2 1131 */ 1132 public static final UnicodeBlock HANGUL_SYLLABLES = 1133 new UnicodeBlock("HANGUL_SYLLABLES", 1134 "HANGUL SYLLABLES", 1135 "HANGULSYLLABLES"); 1136 1137 /** 1138 * Constant for the "Private Use Area" Unicode character block. 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock PRIVATE_USE_AREA = 1142 new UnicodeBlock("PRIVATE_USE_AREA", 1143 "PRIVATE USE AREA", 1144 "PRIVATEUSEAREA"); 1145 1146 /** 1147 * Constant for the "CJK Compatibility Ideographs" Unicode character 1148 * block. 1149 * @since 1.2 1150 */ 1151 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1152 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1153 "CJK COMPATIBILITY IDEOGRAPHS", 1154 "CJKCOMPATIBILITYIDEOGRAPHS"); 1155 1156 /** 1157 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1158 * @since 1.2 1159 */ 1160 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1161 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1162 "ALPHABETIC PRESENTATION FORMS", 1163 "ALPHABETICPRESENTATIONFORMS"); 1164 1165 /** 1166 * Constant for the "Arabic Presentation Forms-A" Unicode character 1167 * block. 1168 * @since 1.2 1169 */ 1170 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1171 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1172 "ARABIC PRESENTATION FORMS-A", 1173 "ARABICPRESENTATIONFORMS-A"); 1174 1175 /** 1176 * Constant for the "Combining Half Marks" Unicode character block. 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock COMBINING_HALF_MARKS = 1180 new UnicodeBlock("COMBINING_HALF_MARKS", 1181 "COMBINING HALF MARKS", 1182 "COMBININGHALFMARKS"); 1183 1184 /** 1185 * Constant for the "CJK Compatibility Forms" Unicode character block. 1186 * @since 1.2 1187 */ 1188 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1189 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1190 "CJK COMPATIBILITY FORMS", 1191 "CJKCOMPATIBILITYFORMS"); 1192 1193 /** 1194 * Constant for the "Small Form Variants" Unicode character block. 1195 * @since 1.2 1196 */ 1197 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1198 new UnicodeBlock("SMALL_FORM_VARIANTS", 1199 "SMALL FORM VARIANTS", 1200 "SMALLFORMVARIANTS"); 1201 1202 /** 1203 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1204 * @since 1.2 1205 */ 1206 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1207 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1208 "ARABIC PRESENTATION FORMS-B", 1209 "ARABICPRESENTATIONFORMS-B"); 1210 1211 /** 1212 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1213 * block. 1214 * @since 1.2 1215 */ 1216 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1217 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1218 "HALFWIDTH AND FULLWIDTH FORMS", 1219 "HALFWIDTHANDFULLWIDTHFORMS"); 1220 1221 /** 1222 * Constant for the "Specials" Unicode character block. 1223 * @since 1.2 1224 */ 1225 public static final UnicodeBlock SPECIALS = 1226 new UnicodeBlock("SPECIALS"); 1227 1228 /** 1229 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1230 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1231 * {@link #LOW_SURROGATES}. These new constants match 1232 * the block definitions of the Unicode Standard. 1233 * The {@link #of(char)} and {@link #of(int)} methods 1234 * return the new constants, not SURROGATES_AREA. 1235 */ 1236 @Deprecated 1237 public static final UnicodeBlock SURROGATES_AREA = 1238 new UnicodeBlock("SURROGATES_AREA"); 1239 1240 /** 1241 * Constant for the "Syriac" Unicode character block. 1242 * @since 1.4 1243 */ 1244 public static final UnicodeBlock SYRIAC = 1245 new UnicodeBlock("SYRIAC"); 1246 1247 /** 1248 * Constant for the "Thaana" Unicode character block. 1249 * @since 1.4 1250 */ 1251 public static final UnicodeBlock THAANA = 1252 new UnicodeBlock("THAANA"); 1253 1254 /** 1255 * Constant for the "Sinhala" Unicode character block. 1256 * @since 1.4 1257 */ 1258 public static final UnicodeBlock SINHALA = 1259 new UnicodeBlock("SINHALA"); 1260 1261 /** 1262 * Constant for the "Myanmar" Unicode character block. 1263 * @since 1.4 1264 */ 1265 public static final UnicodeBlock MYANMAR = 1266 new UnicodeBlock("MYANMAR"); 1267 1268 /** 1269 * Constant for the "Ethiopic" Unicode character block. 1270 * @since 1.4 1271 */ 1272 public static final UnicodeBlock ETHIOPIC = 1273 new UnicodeBlock("ETHIOPIC"); 1274 1275 /** 1276 * Constant for the "Cherokee" Unicode character block. 1277 * @since 1.4 1278 */ 1279 public static final UnicodeBlock CHEROKEE = 1280 new UnicodeBlock("CHEROKEE"); 1281 1282 /** 1283 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1284 * @since 1.4 1285 */ 1286 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1287 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1288 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1289 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1290 1291 /** 1292 * Constant for the "Ogham" Unicode character block. 1293 * @since 1.4 1294 */ 1295 public static final UnicodeBlock OGHAM = 1296 new UnicodeBlock("OGHAM"); 1297 1298 /** 1299 * Constant for the "Runic" Unicode character block. 1300 * @since 1.4 1301 */ 1302 public static final UnicodeBlock RUNIC = 1303 new UnicodeBlock("RUNIC"); 1304 1305 /** 1306 * Constant for the "Khmer" Unicode character block. 1307 * @since 1.4 1308 */ 1309 public static final UnicodeBlock KHMER = 1310 new UnicodeBlock("KHMER"); 1311 1312 /** 1313 * Constant for the "Mongolian" Unicode character block. 1314 * @since 1.4 1315 */ 1316 public static final UnicodeBlock MONGOLIAN = 1317 new UnicodeBlock("MONGOLIAN"); 1318 1319 /** 1320 * Constant for the "Braille Patterns" Unicode character block. 1321 * @since 1.4 1322 */ 1323 public static final UnicodeBlock BRAILLE_PATTERNS = 1324 new UnicodeBlock("BRAILLE_PATTERNS", 1325 "BRAILLE PATTERNS", 1326 "BRAILLEPATTERNS"); 1327 1328 /** 1329 * Constant for the "CJK Radicals Supplement" Unicode character block. 1330 * @since 1.4 1331 */ 1332 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1333 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1334 "CJK RADICALS SUPPLEMENT", 1335 "CJKRADICALSSUPPLEMENT"); 1336 1337 /** 1338 * Constant for the "Kangxi Radicals" Unicode character block. 1339 * @since 1.4 1340 */ 1341 public static final UnicodeBlock KANGXI_RADICALS = 1342 new UnicodeBlock("KANGXI_RADICALS", 1343 "KANGXI RADICALS", 1344 "KANGXIRADICALS"); 1345 1346 /** 1347 * Constant for the "Ideographic Description Characters" Unicode character block. 1348 * @since 1.4 1349 */ 1350 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1351 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1352 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1353 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1354 1355 /** 1356 * Constant for the "Bopomofo Extended" Unicode character block. 1357 * @since 1.4 1358 */ 1359 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1360 new UnicodeBlock("BOPOMOFO_EXTENDED", 1361 "BOPOMOFO EXTENDED", 1362 "BOPOMOFOEXTENDED"); 1363 1364 /** 1365 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1366 * @since 1.4 1367 */ 1368 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1369 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1370 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1371 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1372 1373 /** 1374 * Constant for the "Yi Syllables" Unicode character block. 1375 * @since 1.4 1376 */ 1377 public static final UnicodeBlock YI_SYLLABLES = 1378 new UnicodeBlock("YI_SYLLABLES", 1379 "YI SYLLABLES", 1380 "YISYLLABLES"); 1381 1382 /** 1383 * Constant for the "Yi Radicals" Unicode character block. 1384 * @since 1.4 1385 */ 1386 public static final UnicodeBlock YI_RADICALS = 1387 new UnicodeBlock("YI_RADICALS", 1388 "YI RADICALS", 1389 "YIRADICALS"); 1390 1391 /** 1392 * Constant for the "Cyrillic Supplementary" Unicode character block. 1393 * @since 1.5 1394 */ 1395 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1396 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1397 "CYRILLIC SUPPLEMENTARY", 1398 "CYRILLICSUPPLEMENTARY", 1399 "CYRILLIC SUPPLEMENT", 1400 "CYRILLICSUPPLEMENT"); 1401 1402 /** 1403 * Constant for the "Tagalog" Unicode character block. 1404 * @since 1.5 1405 */ 1406 public static final UnicodeBlock TAGALOG = 1407 new UnicodeBlock("TAGALOG"); 1408 1409 /** 1410 * Constant for the "Hanunoo" Unicode character block. 1411 * @since 1.5 1412 */ 1413 public static final UnicodeBlock HANUNOO = 1414 new UnicodeBlock("HANUNOO"); 1415 1416 /** 1417 * Constant for the "Buhid" Unicode character block. 1418 * @since 1.5 1419 */ 1420 public static final UnicodeBlock BUHID = 1421 new UnicodeBlock("BUHID"); 1422 1423 /** 1424 * Constant for the "Tagbanwa" Unicode character block. 1425 * @since 1.5 1426 */ 1427 public static final UnicodeBlock TAGBANWA = 1428 new UnicodeBlock("TAGBANWA"); 1429 1430 /** 1431 * Constant for the "Limbu" Unicode character block. 1432 * @since 1.5 1433 */ 1434 public static final UnicodeBlock LIMBU = 1435 new UnicodeBlock("LIMBU"); 1436 1437 /** 1438 * Constant for the "Tai Le" Unicode character block. 1439 * @since 1.5 1440 */ 1441 public static final UnicodeBlock TAI_LE = 1442 new UnicodeBlock("TAI_LE", 1443 "TAI LE", 1444 "TAILE"); 1445 1446 /** 1447 * Constant for the "Khmer Symbols" Unicode character block. 1448 * @since 1.5 1449 */ 1450 public static final UnicodeBlock KHMER_SYMBOLS = 1451 new UnicodeBlock("KHMER_SYMBOLS", 1452 "KHMER SYMBOLS", 1453 "KHMERSYMBOLS"); 1454 1455 /** 1456 * Constant for the "Phonetic Extensions" Unicode character block. 1457 * @since 1.5 1458 */ 1459 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1460 new UnicodeBlock("PHONETIC_EXTENSIONS", 1461 "PHONETIC EXTENSIONS", 1462 "PHONETICEXTENSIONS"); 1463 1464 /** 1465 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1466 * @since 1.5 1467 */ 1468 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1469 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1470 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1471 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1472 1473 /** 1474 * Constant for the "Supplemental Arrows-A" Unicode character block. 1475 * @since 1.5 1476 */ 1477 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1478 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1479 "SUPPLEMENTAL ARROWS-A", 1480 "SUPPLEMENTALARROWS-A"); 1481 1482 /** 1483 * Constant for the "Supplemental Arrows-B" Unicode character block. 1484 * @since 1.5 1485 */ 1486 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1487 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1488 "SUPPLEMENTAL ARROWS-B", 1489 "SUPPLEMENTALARROWS-B"); 1490 1491 /** 1492 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1493 * character block. 1494 * @since 1.5 1495 */ 1496 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1497 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1498 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1499 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1500 1501 /** 1502 * Constant for the "Supplemental Mathematical Operators" Unicode 1503 * character block. 1504 * @since 1.5 1505 */ 1506 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1507 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1508 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1509 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1510 1511 /** 1512 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1513 * block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1517 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1518 "MISCELLANEOUS SYMBOLS AND ARROWS", 1519 "MISCELLANEOUSSYMBOLSANDARROWS"); 1520 1521 /** 1522 * Constant for the "Katakana Phonetic Extensions" Unicode character 1523 * block. 1524 * @since 1.5 1525 */ 1526 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1527 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1528 "KATAKANA PHONETIC EXTENSIONS", 1529 "KATAKANAPHONETICEXTENSIONS"); 1530 1531 /** 1532 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1533 * @since 1.5 1534 */ 1535 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1536 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1537 "YIJING HEXAGRAM SYMBOLS", 1538 "YIJINGHEXAGRAMSYMBOLS"); 1539 1540 /** 1541 * Constant for the "Variation Selectors" Unicode character block. 1542 * @since 1.5 1543 */ 1544 public static final UnicodeBlock VARIATION_SELECTORS = 1545 new UnicodeBlock("VARIATION_SELECTORS", 1546 "VARIATION SELECTORS", 1547 "VARIATIONSELECTORS"); 1548 1549 /** 1550 * Constant for the "Linear B Syllabary" Unicode character block. 1551 * @since 1.5 1552 */ 1553 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1554 new UnicodeBlock("LINEAR_B_SYLLABARY", 1555 "LINEAR B SYLLABARY", 1556 "LINEARBSYLLABARY"); 1557 1558 /** 1559 * Constant for the "Linear B Ideograms" Unicode character block. 1560 * @since 1.5 1561 */ 1562 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1563 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1564 "LINEAR B IDEOGRAMS", 1565 "LINEARBIDEOGRAMS"); 1566 1567 /** 1568 * Constant for the "Aegean Numbers" Unicode character block. 1569 * @since 1.5 1570 */ 1571 public static final UnicodeBlock AEGEAN_NUMBERS = 1572 new UnicodeBlock("AEGEAN_NUMBERS", 1573 "AEGEAN NUMBERS", 1574 "AEGEANNUMBERS"); 1575 1576 /** 1577 * Constant for the "Old Italic" Unicode character block. 1578 * @since 1.5 1579 */ 1580 public static final UnicodeBlock OLD_ITALIC = 1581 new UnicodeBlock("OLD_ITALIC", 1582 "OLD ITALIC", 1583 "OLDITALIC"); 1584 1585 /** 1586 * Constant for the "Gothic" Unicode character block. 1587 * @since 1.5 1588 */ 1589 public static final UnicodeBlock GOTHIC = 1590 new UnicodeBlock("GOTHIC"); 1591 1592 /** 1593 * Constant for the "Ugaritic" Unicode character block. 1594 * @since 1.5 1595 */ 1596 public static final UnicodeBlock UGARITIC = 1597 new UnicodeBlock("UGARITIC"); 1598 1599 /** 1600 * Constant for the "Deseret" Unicode character block. 1601 * @since 1.5 1602 */ 1603 public static final UnicodeBlock DESERET = 1604 new UnicodeBlock("DESERET"); 1605 1606 /** 1607 * Constant for the "Shavian" Unicode character block. 1608 * @since 1.5 1609 */ 1610 public static final UnicodeBlock SHAVIAN = 1611 new UnicodeBlock("SHAVIAN"); 1612 1613 /** 1614 * Constant for the "Osmanya" Unicode character block. 1615 * @since 1.5 1616 */ 1617 public static final UnicodeBlock OSMANYA = 1618 new UnicodeBlock("OSMANYA"); 1619 1620 /** 1621 * Constant for the "Cypriot Syllabary" Unicode character block. 1622 * @since 1.5 1623 */ 1624 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1625 new UnicodeBlock("CYPRIOT_SYLLABARY", 1626 "CYPRIOT SYLLABARY", 1627 "CYPRIOTSYLLABARY"); 1628 1629 /** 1630 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1631 * @since 1.5 1632 */ 1633 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1634 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1635 "BYZANTINE MUSICAL SYMBOLS", 1636 "BYZANTINEMUSICALSYMBOLS"); 1637 1638 /** 1639 * Constant for the "Musical Symbols" Unicode character block. 1640 * @since 1.5 1641 */ 1642 public static final UnicodeBlock MUSICAL_SYMBOLS = 1643 new UnicodeBlock("MUSICAL_SYMBOLS", 1644 "MUSICAL SYMBOLS", 1645 "MUSICALSYMBOLS"); 1646 1647 /** 1648 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1649 * @since 1.5 1650 */ 1651 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1652 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1653 "TAI XUAN JING SYMBOLS", 1654 "TAIXUANJINGSYMBOLS"); 1655 1656 /** 1657 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1658 * character block. 1659 * @since 1.5 1660 */ 1661 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1662 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1663 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1664 "MATHEMATICALALPHANUMERICSYMBOLS"); 1665 1666 /** 1667 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1668 * character block. 1669 * @since 1.5 1670 */ 1671 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1672 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1673 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1674 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1675 1676 /** 1677 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1678 * @since 1.5 1679 */ 1680 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1681 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1682 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1683 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1684 1685 /** 1686 * Constant for the "Tags" Unicode character block. 1687 * @since 1.5 1688 */ 1689 public static final UnicodeBlock TAGS = 1690 new UnicodeBlock("TAGS"); 1691 1692 /** 1693 * Constant for the "Variation Selectors Supplement" Unicode character 1694 * block. 1695 * @since 1.5 1696 */ 1697 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1698 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1699 "VARIATION SELECTORS SUPPLEMENT", 1700 "VARIATIONSELECTORSSUPPLEMENT"); 1701 1702 /** 1703 * Constant for the "Supplementary Private Use Area-A" Unicode character 1704 * block. 1705 * @since 1.5 1706 */ 1707 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1708 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1709 "SUPPLEMENTARY PRIVATE USE AREA-A", 1710 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1711 1712 /** 1713 * Constant for the "Supplementary Private Use Area-B" Unicode character 1714 * block. 1715 * @since 1.5 1716 */ 1717 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1718 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1719 "SUPPLEMENTARY PRIVATE USE AREA-B", 1720 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1721 1722 /** 1723 * Constant for the "High Surrogates" Unicode character block. 1724 * This block represents codepoint values in the high surrogate 1725 * range: U+D800 through U+DB7F 1726 * 1727 * @since 1.5 1728 */ 1729 public static final UnicodeBlock HIGH_SURROGATES = 1730 new UnicodeBlock("HIGH_SURROGATES", 1731 "HIGH SURROGATES", 1732 "HIGHSURROGATES"); 1733 1734 /** 1735 * Constant for the "High Private Use Surrogates" Unicode character 1736 * block. 1737 * This block represents codepoint values in the private use high 1738 * surrogate range: U+DB80 through U+DBFF 1739 * 1740 * @since 1.5 1741 */ 1742 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1743 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1744 "HIGH PRIVATE USE SURROGATES", 1745 "HIGHPRIVATEUSESURROGATES"); 1746 1747 /** 1748 * Constant for the "Low Surrogates" Unicode character block. 1749 * This block represents codepoint values in the low surrogate 1750 * range: U+DC00 through U+DFFF 1751 * 1752 * @since 1.5 1753 */ 1754 public static final UnicodeBlock LOW_SURROGATES = 1755 new UnicodeBlock("LOW_SURROGATES", 1756 "LOW SURROGATES", 1757 "LOWSURROGATES"); 1758 1759 /** 1760 * Constant for the "Arabic Supplement" Unicode character block. 1761 * @since 1.7 1762 */ 1763 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1764 new UnicodeBlock("ARABIC_SUPPLEMENT", 1765 "ARABIC SUPPLEMENT", 1766 "ARABICSUPPLEMENT"); 1767 1768 /** 1769 * Constant for the "NKo" Unicode character block. 1770 * @since 1.7 1771 */ 1772 public static final UnicodeBlock NKO = 1773 new UnicodeBlock("NKO"); 1774 1775 /** 1776 * Constant for the "Samaritan" Unicode character block. 1777 * @since 1.7 1778 */ 1779 public static final UnicodeBlock SAMARITAN = 1780 new UnicodeBlock("SAMARITAN"); 1781 1782 /** 1783 * Constant for the "Mandaic" Unicode character block. 1784 * @since 1.7 1785 */ 1786 public static final UnicodeBlock MANDAIC = 1787 new UnicodeBlock("MANDAIC"); 1788 1789 /** 1790 * Constant for the "Ethiopic Supplement" Unicode character block. 1791 * @since 1.7 1792 */ 1793 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1794 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1795 "ETHIOPIC SUPPLEMENT", 1796 "ETHIOPICSUPPLEMENT"); 1797 1798 /** 1799 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1800 * Unicode character block. 1801 * @since 1.7 1802 */ 1803 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1804 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1805 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1806 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1807 1808 /** 1809 * Constant for the "New Tai Lue" Unicode character block. 1810 * @since 1.7 1811 */ 1812 public static final UnicodeBlock NEW_TAI_LUE = 1813 new UnicodeBlock("NEW_TAI_LUE", 1814 "NEW TAI LUE", 1815 "NEWTAILUE"); 1816 1817 /** 1818 * Constant for the "Buginese" Unicode character block. 1819 * @since 1.7 1820 */ 1821 public static final UnicodeBlock BUGINESE = 1822 new UnicodeBlock("BUGINESE"); 1823 1824 /** 1825 * Constant for the "Tai Tham" Unicode character block. 1826 * @since 1.7 1827 */ 1828 public static final UnicodeBlock TAI_THAM = 1829 new UnicodeBlock("TAI_THAM", 1830 "TAI THAM", 1831 "TAITHAM"); 1832 1833 /** 1834 * Constant for the "Balinese" Unicode character block. 1835 * @since 1.7 1836 */ 1837 public static final UnicodeBlock BALINESE = 1838 new UnicodeBlock("BALINESE"); 1839 1840 /** 1841 * Constant for the "Sundanese" Unicode character block. 1842 * @since 1.7 1843 */ 1844 public static final UnicodeBlock SUNDANESE = 1845 new UnicodeBlock("SUNDANESE"); 1846 1847 /** 1848 * Constant for the "Batak" Unicode character block. 1849 * @since 1.7 1850 */ 1851 public static final UnicodeBlock BATAK = 1852 new UnicodeBlock("BATAK"); 1853 1854 /** 1855 * Constant for the "Lepcha" Unicode character block. 1856 * @since 1.7 1857 */ 1858 public static final UnicodeBlock LEPCHA = 1859 new UnicodeBlock("LEPCHA"); 1860 1861 /** 1862 * Constant for the "Ol Chiki" Unicode character block. 1863 * @since 1.7 1864 */ 1865 public static final UnicodeBlock OL_CHIKI = 1866 new UnicodeBlock("OL_CHIKI", 1867 "OL CHIKI", 1868 "OLCHIKI"); 1869 1870 /** 1871 * Constant for the "Vedic Extensions" Unicode character block. 1872 * @since 1.7 1873 */ 1874 public static final UnicodeBlock VEDIC_EXTENSIONS = 1875 new UnicodeBlock("VEDIC_EXTENSIONS", 1876 "VEDIC EXTENSIONS", 1877 "VEDICEXTENSIONS"); 1878 1879 /** 1880 * Constant for the "Phonetic Extensions Supplement" Unicode character 1881 * block. 1882 * @since 1.7 1883 */ 1884 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1885 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1886 "PHONETIC EXTENSIONS SUPPLEMENT", 1887 "PHONETICEXTENSIONSSUPPLEMENT"); 1888 1889 /** 1890 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1891 * character block. 1892 * @since 1.7 1893 */ 1894 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1895 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1896 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1897 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1898 1899 /** 1900 * Constant for the "Glagolitic" Unicode character block. 1901 * @since 1.7 1902 */ 1903 public static final UnicodeBlock GLAGOLITIC = 1904 new UnicodeBlock("GLAGOLITIC"); 1905 1906 /** 1907 * Constant for the "Latin Extended-C" Unicode character block. 1908 * @since 1.7 1909 */ 1910 public static final UnicodeBlock LATIN_EXTENDED_C = 1911 new UnicodeBlock("LATIN_EXTENDED_C", 1912 "LATIN EXTENDED-C", 1913 "LATINEXTENDED-C"); 1914 1915 /** 1916 * Constant for the "Coptic" Unicode character block. 1917 * @since 1.7 1918 */ 1919 public static final UnicodeBlock COPTIC = 1920 new UnicodeBlock("COPTIC"); 1921 1922 /** 1923 * Constant for the "Georgian Supplement" Unicode character block. 1924 * @since 1.7 1925 */ 1926 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1927 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1928 "GEORGIAN SUPPLEMENT", 1929 "GEORGIANSUPPLEMENT"); 1930 1931 /** 1932 * Constant for the "Tifinagh" Unicode character block. 1933 * @since 1.7 1934 */ 1935 public static final UnicodeBlock TIFINAGH = 1936 new UnicodeBlock("TIFINAGH"); 1937 1938 /** 1939 * Constant for the "Ethiopic Extended" Unicode character block. 1940 * @since 1.7 1941 */ 1942 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1943 new UnicodeBlock("ETHIOPIC_EXTENDED", 1944 "ETHIOPIC EXTENDED", 1945 "ETHIOPICEXTENDED"); 1946 1947 /** 1948 * Constant for the "Cyrillic Extended-A" Unicode character block. 1949 * @since 1.7 1950 */ 1951 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1952 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1953 "CYRILLIC EXTENDED-A", 1954 "CYRILLICEXTENDED-A"); 1955 1956 /** 1957 * Constant for the "Supplemental Punctuation" Unicode character block. 1958 * @since 1.7 1959 */ 1960 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1961 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1962 "SUPPLEMENTAL PUNCTUATION", 1963 "SUPPLEMENTALPUNCTUATION"); 1964 1965 /** 1966 * Constant for the "CJK Strokes" Unicode character block. 1967 * @since 1.7 1968 */ 1969 public static final UnicodeBlock CJK_STROKES = 1970 new UnicodeBlock("CJK_STROKES", 1971 "CJK STROKES", 1972 "CJKSTROKES"); 1973 1974 /** 1975 * Constant for the "Lisu" Unicode character block. 1976 * @since 1.7 1977 */ 1978 public static final UnicodeBlock LISU = 1979 new UnicodeBlock("LISU"); 1980 1981 /** 1982 * Constant for the "Vai" Unicode character block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock VAI = 1986 new UnicodeBlock("VAI"); 1987 1988 /** 1989 * Constant for the "Cyrillic Extended-B" Unicode character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1993 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1994 "CYRILLIC EXTENDED-B", 1995 "CYRILLICEXTENDED-B"); 1996 1997 /** 1998 * Constant for the "Bamum" Unicode character block. 1999 * @since 1.7 2000 */ 2001 public static final UnicodeBlock BAMUM = 2002 new UnicodeBlock("BAMUM"); 2003 2004 /** 2005 * Constant for the "Modifier Tone Letters" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2009 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2010 "MODIFIER TONE LETTERS", 2011 "MODIFIERTONELETTERS"); 2012 2013 /** 2014 * Constant for the "Latin Extended-D" Unicode character block. 2015 * @since 1.7 2016 */ 2017 public static final UnicodeBlock LATIN_EXTENDED_D = 2018 new UnicodeBlock("LATIN_EXTENDED_D", 2019 "LATIN EXTENDED-D", 2020 "LATINEXTENDED-D"); 2021 2022 /** 2023 * Constant for the "Syloti Nagri" Unicode character block. 2024 * @since 1.7 2025 */ 2026 public static final UnicodeBlock SYLOTI_NAGRI = 2027 new UnicodeBlock("SYLOTI_NAGRI", 2028 "SYLOTI NAGRI", 2029 "SYLOTINAGRI"); 2030 2031 /** 2032 * Constant for the "Common Indic Number Forms" Unicode character block. 2033 * @since 1.7 2034 */ 2035 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2036 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2037 "COMMON INDIC NUMBER FORMS", 2038 "COMMONINDICNUMBERFORMS"); 2039 2040 /** 2041 * Constant for the "Phags-pa" Unicode character block. 2042 * @since 1.7 2043 */ 2044 public static final UnicodeBlock PHAGS_PA = 2045 new UnicodeBlock("PHAGS_PA", 2046 "PHAGS-PA"); 2047 2048 /** 2049 * Constant for the "Saurashtra" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock SAURASHTRA = 2053 new UnicodeBlock("SAURASHTRA"); 2054 2055 /** 2056 * Constant for the "Devanagari Extended" Unicode character block. 2057 * @since 1.7 2058 */ 2059 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2060 new UnicodeBlock("DEVANAGARI_EXTENDED", 2061 "DEVANAGARI EXTENDED", 2062 "DEVANAGARIEXTENDED"); 2063 2064 /** 2065 * Constant for the "Kayah Li" Unicode character block. 2066 * @since 1.7 2067 */ 2068 public static final UnicodeBlock KAYAH_LI = 2069 new UnicodeBlock("KAYAH_LI", 2070 "KAYAH LI", 2071 "KAYAHLI"); 2072 2073 /** 2074 * Constant for the "Rejang" Unicode character block. 2075 * @since 1.7 2076 */ 2077 public static final UnicodeBlock REJANG = 2078 new UnicodeBlock("REJANG"); 2079 2080 /** 2081 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2082 * @since 1.7 2083 */ 2084 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2085 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2086 "HANGUL JAMO EXTENDED-A", 2087 "HANGULJAMOEXTENDED-A"); 2088 2089 /** 2090 * Constant for the "Javanese" Unicode character block. 2091 * @since 1.7 2092 */ 2093 public static final UnicodeBlock JAVANESE = 2094 new UnicodeBlock("JAVANESE"); 2095 2096 /** 2097 * Constant for the "Cham" Unicode character block. 2098 * @since 1.7 2099 */ 2100 public static final UnicodeBlock CHAM = 2101 new UnicodeBlock("CHAM"); 2102 2103 /** 2104 * Constant for the "Myanmar Extended-A" Unicode character block. 2105 * @since 1.7 2106 */ 2107 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2108 new UnicodeBlock("MYANMAR_EXTENDED_A", 2109 "MYANMAR EXTENDED-A", 2110 "MYANMAREXTENDED-A"); 2111 2112 /** 2113 * Constant for the "Tai Viet" Unicode character block. 2114 * @since 1.7 2115 */ 2116 public static final UnicodeBlock TAI_VIET = 2117 new UnicodeBlock("TAI_VIET", 2118 "TAI VIET", 2119 "TAIVIET"); 2120 2121 /** 2122 * Constant for the "Ethiopic Extended-A" Unicode character block. 2123 * @since 1.7 2124 */ 2125 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2126 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2127 "ETHIOPIC EXTENDED-A", 2128 "ETHIOPICEXTENDED-A"); 2129 2130 /** 2131 * Constant for the "Meetei Mayek" Unicode character block. 2132 * @since 1.7 2133 */ 2134 public static final UnicodeBlock MEETEI_MAYEK = 2135 new UnicodeBlock("MEETEI_MAYEK", 2136 "MEETEI MAYEK", 2137 "MEETEIMAYEK"); 2138 2139 /** 2140 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2141 * @since 1.7 2142 */ 2143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2144 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2145 "HANGUL JAMO EXTENDED-B", 2146 "HANGULJAMOEXTENDED-B"); 2147 2148 /** 2149 * Constant for the "Vertical Forms" Unicode character block. 2150 * @since 1.7 2151 */ 2152 public static final UnicodeBlock VERTICAL_FORMS = 2153 new UnicodeBlock("VERTICAL_FORMS", 2154 "VERTICAL FORMS", 2155 "VERTICALFORMS"); 2156 2157 /** 2158 * Constant for the "Ancient Greek Numbers" Unicode character block. 2159 * @since 1.7 2160 */ 2161 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2162 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2163 "ANCIENT GREEK NUMBERS", 2164 "ANCIENTGREEKNUMBERS"); 2165 2166 /** 2167 * Constant for the "Ancient Symbols" Unicode character block. 2168 * @since 1.7 2169 */ 2170 public static final UnicodeBlock ANCIENT_SYMBOLS = 2171 new UnicodeBlock("ANCIENT_SYMBOLS", 2172 "ANCIENT SYMBOLS", 2173 "ANCIENTSYMBOLS"); 2174 2175 /** 2176 * Constant for the "Phaistos Disc" Unicode character block. 2177 * @since 1.7 2178 */ 2179 public static final UnicodeBlock PHAISTOS_DISC = 2180 new UnicodeBlock("PHAISTOS_DISC", 2181 "PHAISTOS DISC", 2182 "PHAISTOSDISC"); 2183 2184 /** 2185 * Constant for the "Lycian" Unicode character block. 2186 * @since 1.7 2187 */ 2188 public static final UnicodeBlock LYCIAN = 2189 new UnicodeBlock("LYCIAN"); 2190 2191 /** 2192 * Constant for the "Carian" Unicode character block. 2193 * @since 1.7 2194 */ 2195 public static final UnicodeBlock CARIAN = 2196 new UnicodeBlock("CARIAN"); 2197 2198 /** 2199 * Constant for the "Old Persian" Unicode character block. 2200 * @since 1.7 2201 */ 2202 public static final UnicodeBlock OLD_PERSIAN = 2203 new UnicodeBlock("OLD_PERSIAN", 2204 "OLD PERSIAN", 2205 "OLDPERSIAN"); 2206 2207 /** 2208 * Constant for the "Imperial Aramaic" Unicode character block. 2209 * @since 1.7 2210 */ 2211 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2212 new UnicodeBlock("IMPERIAL_ARAMAIC", 2213 "IMPERIAL ARAMAIC", 2214 "IMPERIALARAMAIC"); 2215 2216 /** 2217 * Constant for the "Phoenician" Unicode character block. 2218 * @since 1.7 2219 */ 2220 public static final UnicodeBlock PHOENICIAN = 2221 new UnicodeBlock("PHOENICIAN"); 2222 2223 /** 2224 * Constant for the "Lydian" Unicode character block. 2225 * @since 1.7 2226 */ 2227 public static final UnicodeBlock LYDIAN = 2228 new UnicodeBlock("LYDIAN"); 2229 2230 /** 2231 * Constant for the "Kharoshthi" Unicode character block. 2232 * @since 1.7 2233 */ 2234 public static final UnicodeBlock KHAROSHTHI = 2235 new UnicodeBlock("KHAROSHTHI"); 2236 2237 /** 2238 * Constant for the "Old South Arabian" Unicode character block. 2239 * @since 1.7 2240 */ 2241 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2242 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2243 "OLD SOUTH ARABIAN", 2244 "OLDSOUTHARABIAN"); 2245 2246 /** 2247 * Constant for the "Avestan" Unicode character block. 2248 * @since 1.7 2249 */ 2250 public static final UnicodeBlock AVESTAN = 2251 new UnicodeBlock("AVESTAN"); 2252 2253 /** 2254 * Constant for the "Inscriptional Parthian" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2258 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2259 "INSCRIPTIONAL PARTHIAN", 2260 "INSCRIPTIONALPARTHIAN"); 2261 2262 /** 2263 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2264 * @since 1.7 2265 */ 2266 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2267 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2268 "INSCRIPTIONAL PAHLAVI", 2269 "INSCRIPTIONALPAHLAVI"); 2270 2271 /** 2272 * Constant for the "Old Turkic" Unicode character block. 2273 * @since 1.7 2274 */ 2275 public static final UnicodeBlock OLD_TURKIC = 2276 new UnicodeBlock("OLD_TURKIC", 2277 "OLD TURKIC", 2278 "OLDTURKIC"); 2279 2280 /** 2281 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2282 * @since 1.7 2283 */ 2284 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2285 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2286 "RUMI NUMERAL SYMBOLS", 2287 "RUMINUMERALSYMBOLS"); 2288 2289 /** 2290 * Constant for the "Brahmi" Unicode character block. 2291 * @since 1.7 2292 */ 2293 public static final UnicodeBlock BRAHMI = 2294 new UnicodeBlock("BRAHMI"); 2295 2296 /** 2297 * Constant for the "Kaithi" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock KAITHI = 2301 new UnicodeBlock("KAITHI"); 2302 2303 /** 2304 * Constant for the "Cuneiform" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock CUNEIFORM = 2308 new UnicodeBlock("CUNEIFORM"); 2309 2310 /** 2311 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2312 * character block. 2313 * @since 1.7 2314 */ 2315 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2316 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2317 "CUNEIFORM NUMBERS AND PUNCTUATION", 2318 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2319 2320 /** 2321 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2322 * @since 1.7 2323 */ 2324 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2325 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2326 "EGYPTIAN HIEROGLYPHS", 2327 "EGYPTIANHIEROGLYPHS"); 2328 2329 /** 2330 * Constant for the "Bamum Supplement" Unicode character block. 2331 * @since 1.7 2332 */ 2333 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2334 new UnicodeBlock("BAMUM_SUPPLEMENT", 2335 "BAMUM SUPPLEMENT", 2336 "BAMUMSUPPLEMENT"); 2337 2338 /** 2339 * Constant for the "Kana Supplement" Unicode character block. 2340 * @since 1.7 2341 */ 2342 public static final UnicodeBlock KANA_SUPPLEMENT = 2343 new UnicodeBlock("KANA_SUPPLEMENT", 2344 "KANA SUPPLEMENT", 2345 "KANASUPPLEMENT"); 2346 2347 /** 2348 * Constant for the "Ancient Greek Musical Notation" Unicode character 2349 * block. 2350 * @since 1.7 2351 */ 2352 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2353 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2354 "ANCIENT GREEK MUSICAL NOTATION", 2355 "ANCIENTGREEKMUSICALNOTATION"); 2356 2357 /** 2358 * Constant for the "Counting Rod Numerals" Unicode character block. 2359 * @since 1.7 2360 */ 2361 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2362 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2363 "COUNTING ROD NUMERALS", 2364 "COUNTINGRODNUMERALS"); 2365 2366 /** 2367 * Constant for the "Mahjong Tiles" Unicode character block. 2368 * @since 1.7 2369 */ 2370 public static final UnicodeBlock MAHJONG_TILES = 2371 new UnicodeBlock("MAHJONG_TILES", 2372 "MAHJONG TILES", 2373 "MAHJONGTILES"); 2374 2375 /** 2376 * Constant for the "Domino Tiles" Unicode character block. 2377 * @since 1.7 2378 */ 2379 public static final UnicodeBlock DOMINO_TILES = 2380 new UnicodeBlock("DOMINO_TILES", 2381 "DOMINO TILES", 2382 "DOMINOTILES"); 2383 2384 /** 2385 * Constant for the "Playing Cards" Unicode character block. 2386 * @since 1.7 2387 */ 2388 public static final UnicodeBlock PLAYING_CARDS = 2389 new UnicodeBlock("PLAYING_CARDS", 2390 "PLAYING CARDS", 2391 "PLAYINGCARDS"); 2392 2393 /** 2394 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2395 * block. 2396 * @since 1.7 2397 */ 2398 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2399 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2400 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2401 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2402 2403 /** 2404 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2405 * block. 2406 * @since 1.7 2407 */ 2408 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2409 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2410 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2411 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2412 2413 /** 2414 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2415 * character block. 2416 * @since 1.7 2417 */ 2418 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2419 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2420 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2421 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2422 2423 /** 2424 * Constant for the "Emoticons" Unicode character block. 2425 * @since 1.7 2426 */ 2427 public static final UnicodeBlock EMOTICONS = 2428 new UnicodeBlock("EMOTICONS"); 2429 2430 /** 2431 * Constant for the "Transport And Map Symbols" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2435 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2436 "TRANSPORT AND MAP SYMBOLS", 2437 "TRANSPORTANDMAPSYMBOLS"); 2438 2439 /** 2440 * Constant for the "Alchemical Symbols" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2444 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2445 "ALCHEMICAL SYMBOLS", 2446 "ALCHEMICALSYMBOLS"); 2447 2448 /** 2449 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2450 * character block. 2451 * @since 1.7 2452 */ 2453 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2454 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2455 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2456 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2457 2458 /** 2459 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2460 * character block. 2461 * @since 1.7 2462 */ 2463 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2464 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2465 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2466 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2467 2468 private static final int blockStarts[] = { 2469 0x0000, // 0000..007F; Basic Latin 2470 0x0080, // 0080..00FF; Latin-1 Supplement 2471 0x0100, // 0100..017F; Latin Extended-A 2472 0x0180, // 0180..024F; Latin Extended-B 2473 0x0250, // 0250..02AF; IPA Extensions 2474 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2475 0x0300, // 0300..036F; Combining Diacritical Marks 2476 0x0370, // 0370..03FF; Greek and Coptic 2477 0x0400, // 0400..04FF; Cyrillic 2478 0x0500, // 0500..052F; Cyrillic Supplement 2479 0x0530, // 0530..058F; Armenian 2480 0x0590, // 0590..05FF; Hebrew 2481 0x0600, // 0600..06FF; Arabic 2482 0x0700, // 0700..074F; Syriac 2483 0x0750, // 0750..077F; Arabic Supplement 2484 0x0780, // 0780..07BF; Thaana 2485 0x07C0, // 07C0..07FF; NKo 2486 0x0800, // 0800..083F; Samaritan 2487 0x0840, // 0840..085F; Mandaic 2488 0x0860, // unassigned 2489 0x0900, // 0900..097F; Devanagari 2490 0x0980, // 0980..09FF; Bengali 2491 0x0A00, // 0A00..0A7F; Gurmukhi 2492 0x0A80, // 0A80..0AFF; Gujarati 2493 0x0B00, // 0B00..0B7F; Oriya 2494 0x0B80, // 0B80..0BFF; Tamil 2495 0x0C00, // 0C00..0C7F; Telugu 2496 0x0C80, // 0C80..0CFF; Kannada 2497 0x0D00, // 0D00..0D7F; Malayalam 2498 0x0D80, // 0D80..0DFF; Sinhala 2499 0x0E00, // 0E00..0E7F; Thai 2500 0x0E80, // 0E80..0EFF; Lao 2501 0x0F00, // 0F00..0FFF; Tibetan 2502 0x1000, // 1000..109F; Myanmar 2503 0x10A0, // 10A0..10FF; Georgian 2504 0x1100, // 1100..11FF; Hangul Jamo 2505 0x1200, // 1200..137F; Ethiopic 2506 0x1380, // 1380..139F; Ethiopic Supplement 2507 0x13A0, // 13A0..13FF; Cherokee 2508 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2509 0x1680, // 1680..169F; Ogham 2510 0x16A0, // 16A0..16FF; Runic 2511 0x1700, // 1700..171F; Tagalog 2512 0x1720, // 1720..173F; Hanunoo 2513 0x1740, // 1740..175F; Buhid 2514 0x1760, // 1760..177F; Tagbanwa 2515 0x1780, // 1780..17FF; Khmer 2516 0x1800, // 1800..18AF; Mongolian 2517 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2518 0x1900, // 1900..194F; Limbu 2519 0x1950, // 1950..197F; Tai Le 2520 0x1980, // 1980..19DF; New Tai Lue 2521 0x19E0, // 19E0..19FF; Khmer Symbols 2522 0x1A00, // 1A00..1A1F; Buginese 2523 0x1A20, // 1A20..1AAF; Tai Tham 2524 0x1AB0, // unassigned 2525 0x1B00, // 1B00..1B7F; Balinese 2526 0x1B80, // 1B80..1BBF; Sundanese 2527 0x1BC0, // 1BC0..1BFF; Batak 2528 0x1C00, // 1C00..1C4F; Lepcha 2529 0x1C50, // 1C50..1C7F; Ol Chiki 2530 0x1C80, // unassigned 2531 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2532 0x1D00, // 1D00..1D7F; Phonetic Extensions 2533 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2534 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2535 0x1E00, // 1E00..1EFF; Latin Extended Additional 2536 0x1F00, // 1F00..1FFF; Greek Extended 2537 0x2000, // 2000..206F; General Punctuation 2538 0x2070, // 2070..209F; Superscripts and Subscripts 2539 0x20A0, // 20A0..20CF; Currency Symbols 2540 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2541 0x2100, // 2100..214F; Letterlike Symbols 2542 0x2150, // 2150..218F; Number Forms 2543 0x2190, // 2190..21FF; Arrows 2544 0x2200, // 2200..22FF; Mathematical Operators 2545 0x2300, // 2300..23FF; Miscellaneous Technical 2546 0x2400, // 2400..243F; Control Pictures 2547 0x2440, // 2440..245F; Optical Character Recognition 2548 0x2460, // 2460..24FF; Enclosed Alphanumerics 2549 0x2500, // 2500..257F; Box Drawing 2550 0x2580, // 2580..259F; Block Elements 2551 0x25A0, // 25A0..25FF; Geometric Shapes 2552 0x2600, // 2600..26FF; Miscellaneous Symbols 2553 0x2700, // 2700..27BF; Dingbats 2554 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2555 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2556 0x2800, // 2800..28FF; Braille Patterns 2557 0x2900, // 2900..297F; Supplemental Arrows-B 2558 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2559 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2560 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2561 0x2C00, // 2C00..2C5F; Glagolitic 2562 0x2C60, // 2C60..2C7F; Latin Extended-C 2563 0x2C80, // 2C80..2CFF; Coptic 2564 0x2D00, // 2D00..2D2F; Georgian Supplement 2565 0x2D30, // 2D30..2D7F; Tifinagh 2566 0x2D80, // 2D80..2DDF; Ethiopic Extended 2567 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2568 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2569 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2570 0x2F00, // 2F00..2FDF; Kangxi Radicals 2571 0x2FE0, // unassigned 2572 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2573 0x3000, // 3000..303F; CJK Symbols and Punctuation 2574 0x3040, // 3040..309F; Hiragana 2575 0x30A0, // 30A0..30FF; Katakana 2576 0x3100, // 3100..312F; Bopomofo 2577 0x3130, // 3130..318F; Hangul Compatibility Jamo 2578 0x3190, // 3190..319F; Kanbun 2579 0x31A0, // 31A0..31BF; Bopomofo Extended 2580 0x31C0, // 31C0..31EF; CJK Strokes 2581 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2582 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2583 0x3300, // 3300..33FF; CJK Compatibility 2584 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2585 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2586 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2587 0xA000, // A000..A48F; Yi Syllables 2588 0xA490, // A490..A4CF; Yi Radicals 2589 0xA4D0, // A4D0..A4FF; Lisu 2590 0xA500, // A500..A63F; Vai 2591 0xA640, // A640..A69F; Cyrillic Extended-B 2592 0xA6A0, // A6A0..A6FF; Bamum 2593 0xA700, // A700..A71F; Modifier Tone Letters 2594 0xA720, // A720..A7FF; Latin Extended-D 2595 0xA800, // A800..A82F; Syloti Nagri 2596 0xA830, // A830..A83F; Common Indic Number Forms 2597 0xA840, // A840..A87F; Phags-pa 2598 0xA880, // A880..A8DF; Saurashtra 2599 0xA8E0, // A8E0..A8FF; Devanagari Extended 2600 0xA900, // A900..A92F; Kayah Li 2601 0xA930, // A930..A95F; Rejang 2602 0xA960, // A960..A97F; Hangul Jamo Extended-A 2603 0xA980, // A980..A9DF; Javanese 2604 0xA9E0, // unassigned 2605 0xAA00, // AA00..AA5F; Cham 2606 0xAA60, // AA60..AA7F; Myanmar Extended-A 2607 0xAA80, // AA80..AADF; Tai Viet 2608 0xAAE0, // unassigned 2609 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2610 0xAB30, // unassigned 2611 0xABC0, // ABC0..ABFF; Meetei Mayek 2612 0xAC00, // AC00..D7AF; Hangul Syllables 2613 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2614 0xD800, // D800..DB7F; High Surrogates 2615 0xDB80, // DB80..DBFF; High Private Use Surrogates 2616 0xDC00, // DC00..DFFF; Low Surrogates 2617 0xE000, // E000..F8FF; Private Use Area 2618 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2619 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2620 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2621 0xFE00, // FE00..FE0F; Variation Selectors 2622 0xFE10, // FE10..FE1F; Vertical Forms 2623 0xFE20, // FE20..FE2F; Combining Half Marks 2624 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2625 0xFE50, // FE50..FE6F; Small Form Variants 2626 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2627 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2628 0xFFF0, // FFF0..FFFF; Specials 2629 0x10000, // 10000..1007F; Linear B Syllabary 2630 0x10080, // 10080..100FF; Linear B Ideograms 2631 0x10100, // 10100..1013F; Aegean Numbers 2632 0x10140, // 10140..1018F; Ancient Greek Numbers 2633 0x10190, // 10190..101CF; Ancient Symbols 2634 0x101D0, // 101D0..101FF; Phaistos Disc 2635 0x10200, // unassigned 2636 0x10280, // 10280..1029F; Lycian 2637 0x102A0, // 102A0..102DF; Carian 2638 0x102E0, // unassigned 2639 0x10300, // 10300..1032F; Old Italic 2640 0x10330, // 10330..1034F; Gothic 2641 0x10350, // unassigned 2642 0x10380, // 10380..1039F; Ugaritic 2643 0x103A0, // 103A0..103DF; Old Persian 2644 0x103E0, // unassigned 2645 0x10400, // 10400..1044F; Deseret 2646 0x10450, // 10450..1047F; Shavian 2647 0x10480, // 10480..104AF; Osmanya 2648 0x104B0, // unassigned 2649 0x10800, // 10800..1083F; Cypriot Syllabary 2650 0x10840, // 10840..1085F; Imperial Aramaic 2651 0x10860, // unassigned 2652 0x10900, // 10900..1091F; Phoenician 2653 0x10920, // 10920..1093F; Lydian 2654 0x10940, // unassigned 2655 0x10A00, // 10A00..10A5F; Kharoshthi 2656 0x10A60, // 10A60..10A7F; Old South Arabian 2657 0x10A80, // unassigned 2658 0x10B00, // 10B00..10B3F; Avestan 2659 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2660 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2661 0x10B80, // unassigned 2662 0x10C00, // 10C00..10C4F; Old Turkic 2663 0x10C50, // unassigned 2664 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2665 0x10E80, // unassigned 2666 0x11000, // 11000..1107F; Brahmi 2667 0x11080, // 11080..110CF; Kaithi 2668 0x110D0, // unassigned 2669 0x12000, // 12000..123FF; Cuneiform 2670 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2671 0x12480, // unassigned 2672 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2673 0x13430, // unassigned 2674 0x16800, // 16800..16A3F; Bamum Supplement 2675 0x16A40, // unassigned 2676 0x1B000, // 1B000..1B0FF; Kana Supplement 2677 0x1B100, // unassigned 2678 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2679 0x1D100, // 1D100..1D1FF; Musical Symbols 2680 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2681 0x1D250, // unassigned 2682 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2683 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2684 0x1D380, // unassigned 2685 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2686 0x1D800, // unassigned 2687 0x1F000, // 1F000..1F02F; Mahjong Tiles 2688 0x1F030, // 1F030..1F09F; Domino Tiles 2689 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2690 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2691 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2692 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2693 0x1F600, // 1F600..1F64F; Emoticons 2694 0x1F650, // unassigned 2695 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2696 0x1F700, // 1F700..1F77F; Alchemical Symbols 2697 0x1F780, // unassigned 2698 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2699 0x2A6E0, // unassigned 2700 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2701 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2702 0x2B820, // unassigned 2703 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2704 0x2FA20, // unassigned 2705 0xE0000, // E0000..E007F; Tags 2706 0xE0080, // unassigned 2707 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2708 0xE01F0, // unassigned 2709 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2710 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2711 }; 2712 2713 private static final UnicodeBlock[] blocks = { 2714 BASIC_LATIN, 2715 LATIN_1_SUPPLEMENT, 2716 LATIN_EXTENDED_A, 2717 LATIN_EXTENDED_B, 2718 IPA_EXTENSIONS, 2719 SPACING_MODIFIER_LETTERS, 2720 COMBINING_DIACRITICAL_MARKS, 2721 GREEK, 2722 CYRILLIC, 2723 CYRILLIC_SUPPLEMENTARY, 2724 ARMENIAN, 2725 HEBREW, 2726 ARABIC, 2727 SYRIAC, 2728 ARABIC_SUPPLEMENT, 2729 THAANA, 2730 NKO, 2731 SAMARITAN, 2732 MANDAIC, 2733 null, 2734 DEVANAGARI, 2735 BENGALI, 2736 GURMUKHI, 2737 GUJARATI, 2738 ORIYA, 2739 TAMIL, 2740 TELUGU, 2741 KANNADA, 2742 MALAYALAM, 2743 SINHALA, 2744 THAI, 2745 LAO, 2746 TIBETAN, 2747 MYANMAR, 2748 GEORGIAN, 2749 HANGUL_JAMO, 2750 ETHIOPIC, 2751 ETHIOPIC_SUPPLEMENT, 2752 CHEROKEE, 2753 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2754 OGHAM, 2755 RUNIC, 2756 TAGALOG, 2757 HANUNOO, 2758 BUHID, 2759 TAGBANWA, 2760 KHMER, 2761 MONGOLIAN, 2762 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2763 LIMBU, 2764 TAI_LE, 2765 NEW_TAI_LUE, 2766 KHMER_SYMBOLS, 2767 BUGINESE, 2768 TAI_THAM, 2769 null, 2770 BALINESE, 2771 SUNDANESE, 2772 BATAK, 2773 LEPCHA, 2774 OL_CHIKI, 2775 null, 2776 VEDIC_EXTENSIONS, 2777 PHONETIC_EXTENSIONS, 2778 PHONETIC_EXTENSIONS_SUPPLEMENT, 2779 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2780 LATIN_EXTENDED_ADDITIONAL, 2781 GREEK_EXTENDED, 2782 GENERAL_PUNCTUATION, 2783 SUPERSCRIPTS_AND_SUBSCRIPTS, 2784 CURRENCY_SYMBOLS, 2785 COMBINING_MARKS_FOR_SYMBOLS, 2786 LETTERLIKE_SYMBOLS, 2787 NUMBER_FORMS, 2788 ARROWS, 2789 MATHEMATICAL_OPERATORS, 2790 MISCELLANEOUS_TECHNICAL, 2791 CONTROL_PICTURES, 2792 OPTICAL_CHARACTER_RECOGNITION, 2793 ENCLOSED_ALPHANUMERICS, 2794 BOX_DRAWING, 2795 BLOCK_ELEMENTS, 2796 GEOMETRIC_SHAPES, 2797 MISCELLANEOUS_SYMBOLS, 2798 DINGBATS, 2799 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2800 SUPPLEMENTAL_ARROWS_A, 2801 BRAILLE_PATTERNS, 2802 SUPPLEMENTAL_ARROWS_B, 2803 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2804 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2805 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2806 GLAGOLITIC, 2807 LATIN_EXTENDED_C, 2808 COPTIC, 2809 GEORGIAN_SUPPLEMENT, 2810 TIFINAGH, 2811 ETHIOPIC_EXTENDED, 2812 CYRILLIC_EXTENDED_A, 2813 SUPPLEMENTAL_PUNCTUATION, 2814 CJK_RADICALS_SUPPLEMENT, 2815 KANGXI_RADICALS, 2816 null, 2817 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2818 CJK_SYMBOLS_AND_PUNCTUATION, 2819 HIRAGANA, 2820 KATAKANA, 2821 BOPOMOFO, 2822 HANGUL_COMPATIBILITY_JAMO, 2823 KANBUN, 2824 BOPOMOFO_EXTENDED, 2825 CJK_STROKES, 2826 KATAKANA_PHONETIC_EXTENSIONS, 2827 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2828 CJK_COMPATIBILITY, 2829 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2830 YIJING_HEXAGRAM_SYMBOLS, 2831 CJK_UNIFIED_IDEOGRAPHS, 2832 YI_SYLLABLES, 2833 YI_RADICALS, 2834 LISU, 2835 VAI, 2836 CYRILLIC_EXTENDED_B, 2837 BAMUM, 2838 MODIFIER_TONE_LETTERS, 2839 LATIN_EXTENDED_D, 2840 SYLOTI_NAGRI, 2841 COMMON_INDIC_NUMBER_FORMS, 2842 PHAGS_PA, 2843 SAURASHTRA, 2844 DEVANAGARI_EXTENDED, 2845 KAYAH_LI, 2846 REJANG, 2847 HANGUL_JAMO_EXTENDED_A, 2848 JAVANESE, 2849 null, 2850 CHAM, 2851 MYANMAR_EXTENDED_A, 2852 TAI_VIET, 2853 null, 2854 ETHIOPIC_EXTENDED_A, 2855 null, 2856 MEETEI_MAYEK, 2857 HANGUL_SYLLABLES, 2858 HANGUL_JAMO_EXTENDED_B, 2859 HIGH_SURROGATES, 2860 HIGH_PRIVATE_USE_SURROGATES, 2861 LOW_SURROGATES, 2862 PRIVATE_USE_AREA, 2863 CJK_COMPATIBILITY_IDEOGRAPHS, 2864 ALPHABETIC_PRESENTATION_FORMS, 2865 ARABIC_PRESENTATION_FORMS_A, 2866 VARIATION_SELECTORS, 2867 VERTICAL_FORMS, 2868 COMBINING_HALF_MARKS, 2869 CJK_COMPATIBILITY_FORMS, 2870 SMALL_FORM_VARIANTS, 2871 ARABIC_PRESENTATION_FORMS_B, 2872 HALFWIDTH_AND_FULLWIDTH_FORMS, 2873 SPECIALS, 2874 LINEAR_B_SYLLABARY, 2875 LINEAR_B_IDEOGRAMS, 2876 AEGEAN_NUMBERS, 2877 ANCIENT_GREEK_NUMBERS, 2878 ANCIENT_SYMBOLS, 2879 PHAISTOS_DISC, 2880 null, 2881 LYCIAN, 2882 CARIAN, 2883 null, 2884 OLD_ITALIC, 2885 GOTHIC, 2886 null, 2887 UGARITIC, 2888 OLD_PERSIAN, 2889 null, 2890 DESERET, 2891 SHAVIAN, 2892 OSMANYA, 2893 null, 2894 CYPRIOT_SYLLABARY, 2895 IMPERIAL_ARAMAIC, 2896 null, 2897 PHOENICIAN, 2898 LYDIAN, 2899 null, 2900 KHAROSHTHI, 2901 OLD_SOUTH_ARABIAN, 2902 null, 2903 AVESTAN, 2904 INSCRIPTIONAL_PARTHIAN, 2905 INSCRIPTIONAL_PAHLAVI, 2906 null, 2907 OLD_TURKIC, 2908 null, 2909 RUMI_NUMERAL_SYMBOLS, 2910 null, 2911 BRAHMI, 2912 KAITHI, 2913 null, 2914 CUNEIFORM, 2915 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 2916 null, 2917 EGYPTIAN_HIEROGLYPHS, 2918 null, 2919 BAMUM_SUPPLEMENT, 2920 null, 2921 KANA_SUPPLEMENT, 2922 null, 2923 BYZANTINE_MUSICAL_SYMBOLS, 2924 MUSICAL_SYMBOLS, 2925 ANCIENT_GREEK_MUSICAL_NOTATION, 2926 null, 2927 TAI_XUAN_JING_SYMBOLS, 2928 COUNTING_ROD_NUMERALS, 2929 null, 2930 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 2931 null, 2932 MAHJONG_TILES, 2933 DOMINO_TILES, 2934 PLAYING_CARDS, 2935 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 2936 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 2937 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 2938 EMOTICONS, 2939 null, 2940 TRANSPORT_AND_MAP_SYMBOLS, 2941 ALCHEMICAL_SYMBOLS, 2942 null, 2943 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 2944 null, 2945 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 2946 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 2947 null, 2948 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 2949 null, 2950 TAGS, 2951 null, 2952 VARIATION_SELECTORS_SUPPLEMENT, 2953 null, 2954 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 2955 SUPPLEMENTARY_PRIVATE_USE_AREA_B 2956 }; 2957 2958 2959 /** 2960 * Returns the object representing the Unicode block containing the 2961 * given character, or {@code null} if the character is not a 2962 * member of a defined block. 2963 * 2964 * <p><b>Note:</b> This method cannot handle 2965 * <a href="Character.html#supplementary"> supplementary 2966 * characters</a>. To support all Unicode characters, including 2967 * supplementary characters, use the {@link #of(int)} method. 2968 * 2969 * @param c The character in question 2970 * @return The {@code UnicodeBlock} instance representing the 2971 * Unicode block of which this character is a member, or 2972 * {@code null} if the character is not a member of any 2973 * Unicode block 2974 */ 2975 public static UnicodeBlock of(char c) { 2976 return of((int)c); 2977 } 2978 2979 /** 2980 * Returns the object representing the Unicode block 2981 * containing the given character (Unicode code point), or 2982 * {@code null} if the character is not a member of a 2983 * defined block. 2984 * 2985 * @param codePoint the character (Unicode code point) in question. 2986 * @return The {@code UnicodeBlock} instance representing the 2987 * Unicode block of which this character is a member, or 2988 * {@code null} if the character is not a member of any 2989 * Unicode block 2990 * @exception IllegalArgumentException if the specified 2991 * {@code codePoint} is an invalid Unicode code point. 2992 * @see Character#isValidCodePoint(int) 2993 * @since 1.5 2994 */ 2995 public static UnicodeBlock of(int codePoint) { 2996 if (!isValidCodePoint(codePoint)) { 2997 throw new IllegalArgumentException(); 2998 } 2999 3000 int top, bottom, current; 3001 bottom = 0; 3002 top = blockStarts.length; 3003 current = top/2; 3004 3005 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3006 while (top - bottom > 1) { 3007 if (codePoint >= blockStarts[current]) { 3008 bottom = current; 3009 } else { 3010 top = current; 3011 } 3012 current = (top + bottom) / 2; 3013 } 3014 return blocks[current]; 3015 } 3016 3017 /** 3018 * Returns the UnicodeBlock with the given name. Block 3019 * names are determined by The Unicode Standard. The file 3020 * Blocks-<version>.txt defines blocks for a particular 3021 * version of the standard. The {@link Character} class specifies 3022 * the version of the standard that it supports. 3023 * <p> 3024 * This method accepts block names in the following forms: 3025 * <ol> 3026 * <li> Canonical block names as defined by the Unicode Standard. 3027 * For example, the standard defines a "Basic Latin" block. Therefore, this 3028 * method accepts "Basic Latin" as a valid block name. The documentation of 3029 * each UnicodeBlock provides the canonical name. 3030 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3031 * is a valid block name for the "Basic Latin" block. 3032 * <li>The text representation of each constant UnicodeBlock identifier. 3033 * For example, this method will return the {@link #BASIC_LATIN} block if 3034 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3035 * hyphens in the canonical name with underscores. 3036 * </ol> 3037 * Finally, character case is ignored for all of the valid block name forms. 3038 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3039 * The en_US locale's case mapping rules are used to provide case-insensitive 3040 * string comparisons for block name validation. 3041 * <p> 3042 * If the Unicode Standard changes block names, both the previous and 3043 * current names will be accepted. 3044 * 3045 * @param blockName A {@code UnicodeBlock} name. 3046 * @return The {@code UnicodeBlock} instance identified 3047 * by {@code blockName} 3048 * @throws IllegalArgumentException if {@code blockName} is an 3049 * invalid name 3050 * @throws NullPointerException if {@code blockName} is null 3051 * @since 1.5 3052 */ 3053 public static final UnicodeBlock forName(String blockName) { 3054 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3055 if (block == null) { 3056 throw new IllegalArgumentException(); 3057 } 3058 return block; 3059 } 3060 } 3061 3062 3063 /** 3064 * A family of character subsets representing the character scripts 3065 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3066 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3067 * character is assigned to a single Unicode script, either a specific 3068 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3069 * one of the following three special values, 3070 * {@link Character.UnicodeScript#INHERITED Inherited}, 3071 * {@link Character.UnicodeScript#COMMON Common} or 3072 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3073 * 3074 * @since 1.7 3075 */ 3076 public static enum UnicodeScript { 3077 /** 3078 * Unicode script "Common". 3079 */ 3080 COMMON, 3081 3082 /** 3083 * Unicode script "Latin". 3084 */ 3085 LATIN, 3086 3087 /** 3088 * Unicode script "Greek". 3089 */ 3090 GREEK, 3091 3092 /** 3093 * Unicode script "Cyrillic". 3094 */ 3095 CYRILLIC, 3096 3097 /** 3098 * Unicode script "Armenian". 3099 */ 3100 ARMENIAN, 3101 3102 /** 3103 * Unicode script "Hebrew". 3104 */ 3105 HEBREW, 3106 3107 /** 3108 * Unicode script "Arabic". 3109 */ 3110 ARABIC, 3111 3112 /** 3113 * Unicode script "Syriac". 3114 */ 3115 SYRIAC, 3116 3117 /** 3118 * Unicode script "Thaana". 3119 */ 3120 THAANA, 3121 3122 /** 3123 * Unicode script "Devanagari". 3124 */ 3125 DEVANAGARI, 3126 3127 /** 3128 * Unicode script "Bengali". 3129 */ 3130 BENGALI, 3131 3132 /** 3133 * Unicode script "Gurmukhi". 3134 */ 3135 GURMUKHI, 3136 3137 /** 3138 * Unicode script "Gujarati". 3139 */ 3140 GUJARATI, 3141 3142 /** 3143 * Unicode script "Oriya". 3144 */ 3145 ORIYA, 3146 3147 /** 3148 * Unicode script "Tamil". 3149 */ 3150 TAMIL, 3151 3152 /** 3153 * Unicode script "Telugu". 3154 */ 3155 TELUGU, 3156 3157 /** 3158 * Unicode script "Kannada". 3159 */ 3160 KANNADA, 3161 3162 /** 3163 * Unicode script "Malayalam". 3164 */ 3165 MALAYALAM, 3166 3167 /** 3168 * Unicode script "Sinhala". 3169 */ 3170 SINHALA, 3171 3172 /** 3173 * Unicode script "Thai". 3174 */ 3175 THAI, 3176 3177 /** 3178 * Unicode script "Lao". 3179 */ 3180 LAO, 3181 3182 /** 3183 * Unicode script "Tibetan". 3184 */ 3185 TIBETAN, 3186 3187 /** 3188 * Unicode script "Myanmar". 3189 */ 3190 MYANMAR, 3191 3192 /** 3193 * Unicode script "Georgian". 3194 */ 3195 GEORGIAN, 3196 3197 /** 3198 * Unicode script "Hangul". 3199 */ 3200 HANGUL, 3201 3202 /** 3203 * Unicode script "Ethiopic". 3204 */ 3205 ETHIOPIC, 3206 3207 /** 3208 * Unicode script "Cherokee". 3209 */ 3210 CHEROKEE, 3211 3212 /** 3213 * Unicode script "Canadian_Aboriginal". 3214 */ 3215 CANADIAN_ABORIGINAL, 3216 3217 /** 3218 * Unicode script "Ogham". 3219 */ 3220 OGHAM, 3221 3222 /** 3223 * Unicode script "Runic". 3224 */ 3225 RUNIC, 3226 3227 /** 3228 * Unicode script "Khmer". 3229 */ 3230 KHMER, 3231 3232 /** 3233 * Unicode script "Mongolian". 3234 */ 3235 MONGOLIAN, 3236 3237 /** 3238 * Unicode script "Hiragana". 3239 */ 3240 HIRAGANA, 3241 3242 /** 3243 * Unicode script "Katakana". 3244 */ 3245 KATAKANA, 3246 3247 /** 3248 * Unicode script "Bopomofo". 3249 */ 3250 BOPOMOFO, 3251 3252 /** 3253 * Unicode script "Han". 3254 */ 3255 HAN, 3256 3257 /** 3258 * Unicode script "Yi". 3259 */ 3260 YI, 3261 3262 /** 3263 * Unicode script "Old_Italic". 3264 */ 3265 OLD_ITALIC, 3266 3267 /** 3268 * Unicode script "Gothic". 3269 */ 3270 GOTHIC, 3271 3272 /** 3273 * Unicode script "Deseret". 3274 */ 3275 DESERET, 3276 3277 /** 3278 * Unicode script "Inherited". 3279 */ 3280 INHERITED, 3281 3282 /** 3283 * Unicode script "Tagalog". 3284 */ 3285 TAGALOG, 3286 3287 /** 3288 * Unicode script "Hanunoo". 3289 */ 3290 HANUNOO, 3291 3292 /** 3293 * Unicode script "Buhid". 3294 */ 3295 BUHID, 3296 3297 /** 3298 * Unicode script "Tagbanwa". 3299 */ 3300 TAGBANWA, 3301 3302 /** 3303 * Unicode script "Limbu". 3304 */ 3305 LIMBU, 3306 3307 /** 3308 * Unicode script "Tai_Le". 3309 */ 3310 TAI_LE, 3311 3312 /** 3313 * Unicode script "Linear_B". 3314 */ 3315 LINEAR_B, 3316 3317 /** 3318 * Unicode script "Ugaritic". 3319 */ 3320 UGARITIC, 3321 3322 /** 3323 * Unicode script "Shavian". 3324 */ 3325 SHAVIAN, 3326 3327 /** 3328 * Unicode script "Osmanya". 3329 */ 3330 OSMANYA, 3331 3332 /** 3333 * Unicode script "Cypriot". 3334 */ 3335 CYPRIOT, 3336 3337 /** 3338 * Unicode script "Braille". 3339 */ 3340 BRAILLE, 3341 3342 /** 3343 * Unicode script "Buginese". 3344 */ 3345 BUGINESE, 3346 3347 /** 3348 * Unicode script "Coptic". 3349 */ 3350 COPTIC, 3351 3352 /** 3353 * Unicode script "New_Tai_Lue". 3354 */ 3355 NEW_TAI_LUE, 3356 3357 /** 3358 * Unicode script "Glagolitic". 3359 */ 3360 GLAGOLITIC, 3361 3362 /** 3363 * Unicode script "Tifinagh". 3364 */ 3365 TIFINAGH, 3366 3367 /** 3368 * Unicode script "Syloti_Nagri". 3369 */ 3370 SYLOTI_NAGRI, 3371 3372 /** 3373 * Unicode script "Old_Persian". 3374 */ 3375 OLD_PERSIAN, 3376 3377 /** 3378 * Unicode script "Kharoshthi". 3379 */ 3380 KHAROSHTHI, 3381 3382 /** 3383 * Unicode script "Balinese". 3384 */ 3385 BALINESE, 3386 3387 /** 3388 * Unicode script "Cuneiform". 3389 */ 3390 CUNEIFORM, 3391 3392 /** 3393 * Unicode script "Phoenician". 3394 */ 3395 PHOENICIAN, 3396 3397 /** 3398 * Unicode script "Phags_Pa". 3399 */ 3400 PHAGS_PA, 3401 3402 /** 3403 * Unicode script "Nko". 3404 */ 3405 NKO, 3406 3407 /** 3408 * Unicode script "Sundanese". 3409 */ 3410 SUNDANESE, 3411 3412 /** 3413 * Unicode script "Batak". 3414 */ 3415 BATAK, 3416 3417 /** 3418 * Unicode script "Lepcha". 3419 */ 3420 LEPCHA, 3421 3422 /** 3423 * Unicode script "Ol_Chiki". 3424 */ 3425 OL_CHIKI, 3426 3427 /** 3428 * Unicode script "Vai". 3429 */ 3430 VAI, 3431 3432 /** 3433 * Unicode script "Saurashtra". 3434 */ 3435 SAURASHTRA, 3436 3437 /** 3438 * Unicode script "Kayah_Li". 3439 */ 3440 KAYAH_LI, 3441 3442 /** 3443 * Unicode script "Rejang". 3444 */ 3445 REJANG, 3446 3447 /** 3448 * Unicode script "Lycian". 3449 */ 3450 LYCIAN, 3451 3452 /** 3453 * Unicode script "Carian". 3454 */ 3455 CARIAN, 3456 3457 /** 3458 * Unicode script "Lydian". 3459 */ 3460 LYDIAN, 3461 3462 /** 3463 * Unicode script "Cham". 3464 */ 3465 CHAM, 3466 3467 /** 3468 * Unicode script "Tai_Tham". 3469 */ 3470 TAI_THAM, 3471 3472 /** 3473 * Unicode script "Tai_Viet". 3474 */ 3475 TAI_VIET, 3476 3477 /** 3478 * Unicode script "Avestan". 3479 */ 3480 AVESTAN, 3481 3482 /** 3483 * Unicode script "Egyptian_Hieroglyphs". 3484 */ 3485 EGYPTIAN_HIEROGLYPHS, 3486 3487 /** 3488 * Unicode script "Samaritan". 3489 */ 3490 SAMARITAN, 3491 3492 /** 3493 * Unicode script "Mandaic". 3494 */ 3495 MANDAIC, 3496 3497 /** 3498 * Unicode script "Lisu". 3499 */ 3500 LISU, 3501 3502 /** 3503 * Unicode script "Bamum". 3504 */ 3505 BAMUM, 3506 3507 /** 3508 * Unicode script "Javanese". 3509 */ 3510 JAVANESE, 3511 3512 /** 3513 * Unicode script "Meetei_Mayek". 3514 */ 3515 MEETEI_MAYEK, 3516 3517 /** 3518 * Unicode script "Imperial_Aramaic". 3519 */ 3520 IMPERIAL_ARAMAIC, 3521 3522 /** 3523 * Unicode script "Old_South_Arabian". 3524 */ 3525 OLD_SOUTH_ARABIAN, 3526 3527 /** 3528 * Unicode script "Inscriptional_Parthian". 3529 */ 3530 INSCRIPTIONAL_PARTHIAN, 3531 3532 /** 3533 * Unicode script "Inscriptional_Pahlavi". 3534 */ 3535 INSCRIPTIONAL_PAHLAVI, 3536 3537 /** 3538 * Unicode script "Old_Turkic". 3539 */ 3540 OLD_TURKIC, 3541 3542 /** 3543 * Unicode script "Brahmi". 3544 */ 3545 BRAHMI, 3546 3547 /** 3548 * Unicode script "Kaithi". 3549 */ 3550 KAITHI, 3551 3552 /** 3553 * Unicode script "Unknown". 3554 */ 3555 UNKNOWN; 3556 3557 private static final int[] scriptStarts = { 3558 0x0000, // 0000..0040; COMMON 3559 0x0041, // 0041..005A; LATIN 3560 0x005B, // 005B..0060; COMMON 3561 0x0061, // 0061..007A; LATIN 3562 0x007B, // 007B..00A9; COMMON 3563 0x00AA, // 00AA..00AA; LATIN 3564 0x00AB, // 00AB..00B9; COMMON 3565 0x00BA, // 00BA..00BA; LATIN 3566 0x00BB, // 00BB..00BF; COMMON 3567 0x00C0, // 00C0..00D6; LATIN 3568 0x00D7, // 00D7..00D7; COMMON 3569 0x00D8, // 00D8..00F6; LATIN 3570 0x00F7, // 00F7..00F7; COMMON 3571 0x00F8, // 00F8..02B8; LATIN 3572 0x02B9, // 02B9..02DF; COMMON 3573 0x02E0, // 02E0..02E4; LATIN 3574 0x02E5, // 02E5..02E9; COMMON 3575 0x02EA, // 02EA..02EB; BOPOMOFO 3576 0x02EC, // 02EC..02FF; COMMON 3577 0x0300, // 0300..036F; INHERITED 3578 0x0370, // 0370..0373; GREEK 3579 0x0374, // 0374..0374; COMMON 3580 0x0375, // 0375..037D; GREEK 3581 0x037E, // 037E..0383; COMMON 3582 0x0384, // 0384..0384; GREEK 3583 0x0385, // 0385..0385; COMMON 3584 0x0386, // 0386..0386; GREEK 3585 0x0387, // 0387..0387; COMMON 3586 0x0388, // 0388..03E1; GREEK 3587 0x03E2, // 03E2..03EF; COPTIC 3588 0x03F0, // 03F0..03FF; GREEK 3589 0x0400, // 0400..0484; CYRILLIC 3590 0x0485, // 0485..0486; INHERITED 3591 0x0487, // 0487..0530; CYRILLIC 3592 0x0531, // 0531..0588; ARMENIAN 3593 0x0589, // 0589..0589; COMMON 3594 0x058A, // 058A..0590; ARMENIAN 3595 0x0591, // 0591..05FF; HEBREW 3596 0x0600, // 0600..060B; ARABIC 3597 0x060C, // 060C..060C; COMMON 3598 0x060D, // 060D..061A; ARABIC 3599 0x061B, // 061B..061D; COMMON 3600 0x061E, // 061E..061E; ARABIC 3601 0x061F, // 061F..061F; COMMON 3602 0x0620, // 0620..063F; ARABIC 3603 0x0640, // 0640..0640; COMMON 3604 0x0641, // 0641..064A; ARABIC 3605 0x064B, // 064B..0655; INHERITED 3606 0x0656, // 0656..065E; ARABIC 3607 0x065F, // 065F..065F; INHERITED 3608 0x0660, // 0660..0669; COMMON 3609 0x066A, // 066A..066F; ARABIC 3610 0x0670, // 0670..0670; INHERITED 3611 0x0671, // 0671..06DC; ARABIC 3612 0x06DD, // 06DD..06DD; COMMON 3613 0x06DE, // 06DE..06FF; ARABIC 3614 0x0700, // 0700..074F; SYRIAC 3615 0x0750, // 0750..077F; ARABIC 3616 0x0780, // 0780..07BF; THAANA 3617 0x07C0, // 07C0..07FF; NKO 3618 0x0800, // 0800..083F; SAMARITAN 3619 0x0840, // 0840..08FF; MANDAIC 3620 0x0900, // 0900..0950; DEVANAGARI 3621 0x0951, // 0951..0952; INHERITED 3622 0x0953, // 0953..0963; DEVANAGARI 3623 0x0964, // 0964..0965; COMMON 3624 0x0966, // 0966..096F; DEVANAGARI 3625 0x0970, // 0970..0970; COMMON 3626 0x0971, // 0971..0980; DEVANAGARI 3627 0x0981, // 0981..0A00; BENGALI 3628 0x0A01, // 0A01..0A80; GURMUKHI 3629 0x0A81, // 0A81..0B00; GUJARATI 3630 0x0B01, // 0B01..0B81; ORIYA 3631 0x0B82, // 0B82..0C00; TAMIL 3632 0x0C01, // 0C01..0C81; TELUGU 3633 0x0C82, // 0C82..0CF0; KANNADA 3634 0x0D02, // 0D02..0D81; MALAYALAM 3635 0x0D82, // 0D82..0E00; SINHALA 3636 0x0E01, // 0E01..0E3E; THAI 3637 0x0E3F, // 0E3F..0E3F; COMMON 3638 0x0E40, // 0E40..0E80; THAI 3639 0x0E81, // 0E81..0EFF; LAO 3640 0x0F00, // 0F00..0FD4; TIBETAN 3641 0x0FD5, // 0FD5..0FD8; COMMON 3642 0x0FD9, // 0FD9..0FFF; TIBETAN 3643 0x1000, // 1000..109F; MYANMAR 3644 0x10A0, // 10A0..10FA; GEORGIAN 3645 0x10FB, // 10FB..10FB; COMMON 3646 0x10FC, // 10FC..10FF; GEORGIAN 3647 0x1100, // 1100..11FF; HANGUL 3648 0x1200, // 1200..139F; ETHIOPIC 3649 0x13A0, // 13A0..13FF; CHEROKEE 3650 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3651 0x1680, // 1680..169F; OGHAM 3652 0x16A0, // 16A0..16EA; RUNIC 3653 0x16EB, // 16EB..16ED; COMMON 3654 0x16EE, // 16EE..16FF; RUNIC 3655 0x1700, // 1700..171F; TAGALOG 3656 0x1720, // 1720..1734; HANUNOO 3657 0x1735, // 1735..173F; COMMON 3658 0x1740, // 1740..175F; BUHID 3659 0x1760, // 1760..177F; TAGBANWA 3660 0x1780, // 1780..17FF; KHMER 3661 0x1800, // 1800..1801; MONGOLIAN 3662 0x1802, // 1802..1803; COMMON 3663 0x1804, // 1804..1804; MONGOLIAN 3664 0x1805, // 1805..1805; COMMON 3665 0x1806, // 1806..18AF; MONGOLIAN 3666 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3667 0x1900, // 1900..194F; LIMBU 3668 0x1950, // 1950..197F; TAI_LE 3669 0x1980, // 1980..19DF; NEW_TAI_LUE 3670 0x19E0, // 19E0..19FF; KHMER 3671 0x1A00, // 1A00..1A1F; BUGINESE 3672 0x1A20, // 1A20..1AFF; TAI_THAM 3673 0x1B00, // 1B00..1B7F; BALINESE 3674 0x1B80, // 1B80..1BBF; SUNDANESE 3675 0x1BC0, // 1BC0..1BFF; BATAK 3676 0x1C00, // 1C00..1C4F; LEPCHA 3677 0x1C50, // 1C50..1CCF; OL_CHIKI 3678 0x1CD0, // 1CD0..1CD2; INHERITED 3679 0x1CD3, // 1CD3..1CD3; COMMON 3680 0x1CD4, // 1CD4..1CE0; INHERITED 3681 0x1CE1, // 1CE1..1CE1; COMMON 3682 0x1CE2, // 1CE2..1CE8; INHERITED 3683 0x1CE9, // 1CE9..1CEC; COMMON 3684 0x1CED, // 1CED..1CED; INHERITED 3685 0x1CEE, // 1CEE..1CFF; COMMON 3686 0x1D00, // 1D00..1D25; LATIN 3687 0x1D26, // 1D26..1D2A; GREEK 3688 0x1D2B, // 1D2B..1D2B; CYRILLIC 3689 0x1D2C, // 1D2C..1D5C; LATIN 3690 0x1D5D, // 1D5D..1D61; GREEK 3691 0x1D62, // 1D62..1D65; LATIN 3692 0x1D66, // 1D66..1D6A; GREEK 3693 0x1D6B, // 1D6B..1D77; LATIN 3694 0x1D78, // 1D78..1D78; CYRILLIC 3695 0x1D79, // 1D79..1DBE; LATIN 3696 0x1DBF, // 1DBF..1DBF; GREEK 3697 0x1DC0, // 1DC0..1DFF; INHERITED 3698 0x1E00, // 1E00..1EFF; LATIN 3699 0x1F00, // 1F00..1FFF; GREEK 3700 0x2000, // 2000..200B; COMMON 3701 0x200C, // 200C..200D; INHERITED 3702 0x200E, // 200E..2070; COMMON 3703 0x2071, // 2071..2073; LATIN 3704 0x2074, // 2074..207E; COMMON 3705 0x207F, // 207F..207F; LATIN 3706 0x2080, // 2080..208F; COMMON 3707 0x2090, // 2090..209F; LATIN 3708 0x20A0, // 20A0..20CF; COMMON 3709 0x20D0, // 20D0..20FF; INHERITED 3710 0x2100, // 2100..2125; COMMON 3711 0x2126, // 2126..2126; GREEK 3712 0x2127, // 2127..2129; COMMON 3713 0x212A, // 212A..212B; LATIN 3714 0x212C, // 212C..2131; COMMON 3715 0x2132, // 2132..2132; LATIN 3716 0x2133, // 2133..214D; COMMON 3717 0x214E, // 214E..214E; LATIN 3718 0x214F, // 214F..215F; COMMON 3719 0x2160, // 2160..2188; LATIN 3720 0x2189, // 2189..27FF; COMMON 3721 0x2800, // 2800..28FF; BRAILLE 3722 0x2900, // 2900..2BFF; COMMON 3723 0x2C00, // 2C00..2C5F; GLAGOLITIC 3724 0x2C60, // 2C60..2C7F; LATIN 3725 0x2C80, // 2C80..2CFF; COPTIC 3726 0x2D00, // 2D00..2D2F; GEORGIAN 3727 0x2D30, // 2D30..2D7F; TIFINAGH 3728 0x2D80, // 2D80..2DDF; ETHIOPIC 3729 0x2DE0, // 2DE0..2DFF; CYRILLIC 3730 0x2E00, // 2E00..2E7F; COMMON 3731 0x2E80, // 2E80..2FEF; HAN 3732 0x2FF0, // 2FF0..3004; COMMON 3733 0x3005, // 3005..3005; HAN 3734 0x3006, // 3006..3006; COMMON 3735 0x3007, // 3007..3007; HAN 3736 0x3008, // 3008..3020; COMMON 3737 0x3021, // 3021..3029; HAN 3738 0x302A, // 302A..302D; INHERITED 3739 0x302E, // 302E..302F; HANGUL 3740 0x3030, // 3030..3037; COMMON 3741 0x3038, // 3038..303B; HAN 3742 0x303C, // 303C..3040; COMMON 3743 0x3041, // 3041..3098; HIRAGANA 3744 0x3099, // 3099..309A; INHERITED 3745 0x309B, // 309B..309C; COMMON 3746 0x309D, // 309D..309F; HIRAGANA 3747 0x30A0, // 30A0..30A0; COMMON 3748 0x30A1, // 30A1..30FA; KATAKANA 3749 0x30FB, // 30FB..30FC; COMMON 3750 0x30FD, // 30FD..3104; KATAKANA 3751 0x3105, // 3105..3130; BOPOMOFO 3752 0x3131, // 3131..318F; HANGUL 3753 0x3190, // 3190..319F; COMMON 3754 0x31A0, // 31A0..31BF; BOPOMOFO 3755 0x31C0, // 31C0..31EF; COMMON 3756 0x31F0, // 31F0..31FF; KATAKANA 3757 0x3200, // 3200..321F; HANGUL 3758 0x3220, // 3220..325F; COMMON 3759 0x3260, // 3260..327E; HANGUL 3760 0x327F, // 327F..32CF; COMMON 3761 0x32D0, // 32D0..3357; KATAKANA 3762 0x3358, // 3358..33FF; COMMON 3763 0x3400, // 3400..4DBF; HAN 3764 0x4DC0, // 4DC0..4DFF; COMMON 3765 0x4E00, // 4E00..9FFF; HAN 3766 0xA000, // A000..A4CF; YI 3767 0xA4D0, // A4D0..A4FF; LISU 3768 0xA500, // A500..A63F; VAI 3769 0xA640, // A640..A69F; CYRILLIC 3770 0xA6A0, // A6A0..A6FF; BAMUM 3771 0xA700, // A700..A721; COMMON 3772 0xA722, // A722..A787; LATIN 3773 0xA788, // A788..A78A; COMMON 3774 0xA78B, // A78B..A7FF; LATIN 3775 0xA800, // A800..A82F; SYLOTI_NAGRI 3776 0xA830, // A830..A83F; COMMON 3777 0xA840, // A840..A87F; PHAGS_PA 3778 0xA880, // A880..A8DF; SAURASHTRA 3779 0xA8E0, // A8E0..A8FF; DEVANAGARI 3780 0xA900, // A900..A92F; KAYAH_LI 3781 0xA930, // A930..A95F; REJANG 3782 0xA960, // A960..A97F; HANGUL 3783 0xA980, // A980..A9FF; JAVANESE 3784 0xAA00, // AA00..AA5F; CHAM 3785 0xAA60, // AA60..AA7F; MYANMAR 3786 0xAA80, // AA80..AB00; TAI_VIET 3787 0xAB01, // AB01..ABBF; ETHIOPIC 3788 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3789 0xAC00, // AC00..D7FB; HANGUL 3790 0xD7FC, // D7FC..F8FF; UNKNOWN 3791 0xF900, // F900..FAFF; HAN 3792 0xFB00, // FB00..FB12; LATIN 3793 0xFB13, // FB13..FB1C; ARMENIAN 3794 0xFB1D, // FB1D..FB4F; HEBREW 3795 0xFB50, // FB50..FD3D; ARABIC 3796 0xFD3E, // FD3E..FD4F; COMMON 3797 0xFD50, // FD50..FDFC; ARABIC 3798 0xFDFD, // FDFD..FDFF; COMMON 3799 0xFE00, // FE00..FE0F; INHERITED 3800 0xFE10, // FE10..FE1F; COMMON 3801 0xFE20, // FE20..FE2F; INHERITED 3802 0xFE30, // FE30..FE6F; COMMON 3803 0xFE70, // FE70..FEFE; ARABIC 3804 0xFEFF, // FEFF..FF20; COMMON 3805 0xFF21, // FF21..FF3A; LATIN 3806 0xFF3B, // FF3B..FF40; COMMON 3807 0xFF41, // FF41..FF5A; LATIN 3808 0xFF5B, // FF5B..FF65; COMMON 3809 0xFF66, // FF66..FF6F; KATAKANA 3810 0xFF70, // FF70..FF70; COMMON 3811 0xFF71, // FF71..FF9D; KATAKANA 3812 0xFF9E, // FF9E..FF9F; COMMON 3813 0xFFA0, // FFA0..FFDF; HANGUL 3814 0xFFE0, // FFE0..FFFF; COMMON 3815 0x10000, // 10000..100FF; LINEAR_B 3816 0x10100, // 10100..1013F; COMMON 3817 0x10140, // 10140..1018F; GREEK 3818 0x10190, // 10190..101FC; COMMON 3819 0x101FD, // 101FD..1027F; INHERITED 3820 0x10280, // 10280..1029F; LYCIAN 3821 0x102A0, // 102A0..102FF; CARIAN 3822 0x10300, // 10300..1032F; OLD_ITALIC 3823 0x10330, // 10330..1037F; GOTHIC 3824 0x10380, // 10380..1039F; UGARITIC 3825 0x103A0, // 103A0..103FF; OLD_PERSIAN 3826 0x10400, // 10400..1044F; DESERET 3827 0x10450, // 10450..1047F; SHAVIAN 3828 0x10480, // 10480..107FF; OSMANYA 3829 0x10800, // 10800..1083F; CYPRIOT 3830 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3831 0x10900, // 10900..1091F; PHOENICIAN 3832 0x10920, // 10920..109FF; LYDIAN 3833 0x10A00, // 10A00..10A5F; KHAROSHTHI 3834 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3835 0x10B00, // 10B00..10B3F; AVESTAN 3836 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 3837 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 3838 0x10C00, // 10C00..10E5F; OLD_TURKIC 3839 0x10E60, // 10E60..10FFF; ARABIC 3840 0x11000, // 11000..1107F; BRAHMI 3841 0x11080, // 11080..11FFF; KAITHI 3842 0x12000, // 12000..12FFF; CUNEIFORM 3843 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 3844 0x16800, // 16800..16A38; BAMUM 3845 0x1B000, // 1B000..1B000; KATAKANA 3846 0x1B001, // 1B001..1CFFF; HIRAGANA 3847 0x1D000, // 1D000..1D166; COMMON 3848 0x1D167, // 1D167..1D169; INHERITED 3849 0x1D16A, // 1D16A..1D17A; COMMON 3850 0x1D17B, // 1D17B..1D182; INHERITED 3851 0x1D183, // 1D183..1D184; COMMON 3852 0x1D185, // 1D185..1D18B; INHERITED 3853 0x1D18C, // 1D18C..1D1A9; COMMON 3854 0x1D1AA, // 1D1AA..1D1AD; INHERITED 3855 0x1D1AE, // 1D1AE..1D1FF; COMMON 3856 0x1D200, // 1D200..1D2FF; GREEK 3857 0x1D300, // 1D300..1F1FF; COMMON 3858 0x1F200, // 1F200..1F200; HIRAGANA 3859 0x1F201, // 1F210..1FFFF; COMMON 3860 0x20000, // 20000..E0000; HAN 3861 0xE0001, // E0001..E00FF; COMMON 3862 0xE0100, // E0100..E01EF; INHERITED 3863 0xE01F0 // E01F0..10FFFF; UNKNOWN 3864 3865 }; 3866 3867 private static final UnicodeScript[] scripts = { 3868 COMMON, 3869 LATIN, 3870 COMMON, 3871 LATIN, 3872 COMMON, 3873 LATIN, 3874 COMMON, 3875 LATIN, 3876 COMMON, 3877 LATIN, 3878 COMMON, 3879 LATIN, 3880 COMMON, 3881 LATIN, 3882 COMMON, 3883 LATIN, 3884 COMMON, 3885 BOPOMOFO, 3886 COMMON, 3887 INHERITED, 3888 GREEK, 3889 COMMON, 3890 GREEK, 3891 COMMON, 3892 GREEK, 3893 COMMON, 3894 GREEK, 3895 COMMON, 3896 GREEK, 3897 COPTIC, 3898 GREEK, 3899 CYRILLIC, 3900 INHERITED, 3901 CYRILLIC, 3902 ARMENIAN, 3903 COMMON, 3904 ARMENIAN, 3905 HEBREW, 3906 ARABIC, 3907 COMMON, 3908 ARABIC, 3909 COMMON, 3910 ARABIC, 3911 COMMON, 3912 ARABIC, 3913 COMMON, 3914 ARABIC, 3915 INHERITED, 3916 ARABIC, 3917 INHERITED, 3918 COMMON, 3919 ARABIC, 3920 INHERITED, 3921 ARABIC, 3922 COMMON, 3923 ARABIC, 3924 SYRIAC, 3925 ARABIC, 3926 THAANA, 3927 NKO, 3928 SAMARITAN, 3929 MANDAIC, 3930 DEVANAGARI, 3931 INHERITED, 3932 DEVANAGARI, 3933 COMMON, 3934 DEVANAGARI, 3935 COMMON, 3936 DEVANAGARI, 3937 BENGALI, 3938 GURMUKHI, 3939 GUJARATI, 3940 ORIYA, 3941 TAMIL, 3942 TELUGU, 3943 KANNADA, 3944 MALAYALAM, 3945 SINHALA, 3946 THAI, 3947 COMMON, 3948 THAI, 3949 LAO, 3950 TIBETAN, 3951 COMMON, 3952 TIBETAN, 3953 MYANMAR, 3954 GEORGIAN, 3955 COMMON, 3956 GEORGIAN, 3957 HANGUL, 3958 ETHIOPIC, 3959 CHEROKEE, 3960 CANADIAN_ABORIGINAL, 3961 OGHAM, 3962 RUNIC, 3963 COMMON, 3964 RUNIC, 3965 TAGALOG, 3966 HANUNOO, 3967 COMMON, 3968 BUHID, 3969 TAGBANWA, 3970 KHMER, 3971 MONGOLIAN, 3972 COMMON, 3973 MONGOLIAN, 3974 COMMON, 3975 MONGOLIAN, 3976 CANADIAN_ABORIGINAL, 3977 LIMBU, 3978 TAI_LE, 3979 NEW_TAI_LUE, 3980 KHMER, 3981 BUGINESE, 3982 TAI_THAM, 3983 BALINESE, 3984 SUNDANESE, 3985 BATAK, 3986 LEPCHA, 3987 OL_CHIKI, 3988 INHERITED, 3989 COMMON, 3990 INHERITED, 3991 COMMON, 3992 INHERITED, 3993 COMMON, 3994 INHERITED, 3995 COMMON, 3996 LATIN, 3997 GREEK, 3998 CYRILLIC, 3999 LATIN, 4000 GREEK, 4001 LATIN, 4002 GREEK, 4003 LATIN, 4004 CYRILLIC, 4005 LATIN, 4006 GREEK, 4007 INHERITED, 4008 LATIN, 4009 GREEK, 4010 COMMON, 4011 INHERITED, 4012 COMMON, 4013 LATIN, 4014 COMMON, 4015 LATIN, 4016 COMMON, 4017 LATIN, 4018 COMMON, 4019 INHERITED, 4020 COMMON, 4021 GREEK, 4022 COMMON, 4023 LATIN, 4024 COMMON, 4025 LATIN, 4026 COMMON, 4027 LATIN, 4028 COMMON, 4029 LATIN, 4030 COMMON, 4031 BRAILLE, 4032 COMMON, 4033 GLAGOLITIC, 4034 LATIN, 4035 COPTIC, 4036 GEORGIAN, 4037 TIFINAGH, 4038 ETHIOPIC, 4039 CYRILLIC, 4040 COMMON, 4041 HAN, 4042 COMMON, 4043 HAN, 4044 COMMON, 4045 HAN, 4046 COMMON, 4047 HAN, 4048 INHERITED, 4049 HANGUL, 4050 COMMON, 4051 HAN, 4052 COMMON, 4053 HIRAGANA, 4054 INHERITED, 4055 COMMON, 4056 HIRAGANA, 4057 COMMON, 4058 KATAKANA, 4059 COMMON, 4060 KATAKANA, 4061 BOPOMOFO, 4062 HANGUL, 4063 COMMON, 4064 BOPOMOFO, 4065 COMMON, 4066 KATAKANA, 4067 HANGUL, 4068 COMMON, 4069 HANGUL, 4070 COMMON, 4071 KATAKANA, 4072 COMMON, 4073 HAN, 4074 COMMON, 4075 HAN, 4076 YI, 4077 LISU, 4078 VAI, 4079 CYRILLIC, 4080 BAMUM, 4081 COMMON, 4082 LATIN, 4083 COMMON, 4084 LATIN, 4085 SYLOTI_NAGRI, 4086 COMMON, 4087 PHAGS_PA, 4088 SAURASHTRA, 4089 DEVANAGARI, 4090 KAYAH_LI, 4091 REJANG, 4092 HANGUL, 4093 JAVANESE, 4094 CHAM, 4095 MYANMAR, 4096 TAI_VIET, 4097 ETHIOPIC, 4098 MEETEI_MAYEK, 4099 HANGUL, 4100 UNKNOWN, 4101 HAN, 4102 LATIN, 4103 ARMENIAN, 4104 HEBREW, 4105 ARABIC, 4106 COMMON, 4107 ARABIC, 4108 COMMON, 4109 INHERITED, 4110 COMMON, 4111 INHERITED, 4112 COMMON, 4113 ARABIC, 4114 COMMON, 4115 LATIN, 4116 COMMON, 4117 LATIN, 4118 COMMON, 4119 KATAKANA, 4120 COMMON, 4121 KATAKANA, 4122 COMMON, 4123 HANGUL, 4124 COMMON, 4125 LINEAR_B, 4126 COMMON, 4127 GREEK, 4128 COMMON, 4129 INHERITED, 4130 LYCIAN, 4131 CARIAN, 4132 OLD_ITALIC, 4133 GOTHIC, 4134 UGARITIC, 4135 OLD_PERSIAN, 4136 DESERET, 4137 SHAVIAN, 4138 OSMANYA, 4139 CYPRIOT, 4140 IMPERIAL_ARAMAIC, 4141 PHOENICIAN, 4142 LYDIAN, 4143 KHAROSHTHI, 4144 OLD_SOUTH_ARABIAN, 4145 AVESTAN, 4146 INSCRIPTIONAL_PARTHIAN, 4147 INSCRIPTIONAL_PAHLAVI, 4148 OLD_TURKIC, 4149 ARABIC, 4150 BRAHMI, 4151 KAITHI, 4152 CUNEIFORM, 4153 EGYPTIAN_HIEROGLYPHS, 4154 BAMUM, 4155 KATAKANA, 4156 HIRAGANA, 4157 COMMON, 4158 INHERITED, 4159 COMMON, 4160 INHERITED, 4161 COMMON, 4162 INHERITED, 4163 COMMON, 4164 INHERITED, 4165 COMMON, 4166 GREEK, 4167 COMMON, 4168 HIRAGANA, 4169 COMMON, 4170 HAN, 4171 COMMON, 4172 INHERITED, 4173 UNKNOWN 4174 }; 4175 4176 private static HashMap<String, Character.UnicodeScript> aliases; 4177 static { 4178 aliases = new HashMap<>(128); 4179 aliases.put("ARAB", ARABIC); 4180 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4181 aliases.put("ARMN", ARMENIAN); 4182 aliases.put("AVST", AVESTAN); 4183 aliases.put("BALI", BALINESE); 4184 aliases.put("BAMU", BAMUM); 4185 aliases.put("BATK", BATAK); 4186 aliases.put("BENG", BENGALI); 4187 aliases.put("BOPO", BOPOMOFO); 4188 aliases.put("BRAI", BRAILLE); 4189 aliases.put("BRAH", BRAHMI); 4190 aliases.put("BUGI", BUGINESE); 4191 aliases.put("BUHD", BUHID); 4192 aliases.put("CANS", CANADIAN_ABORIGINAL); 4193 aliases.put("CARI", CARIAN); 4194 aliases.put("CHAM", CHAM); 4195 aliases.put("CHER", CHEROKEE); 4196 aliases.put("COPT", COPTIC); 4197 aliases.put("CPRT", CYPRIOT); 4198 aliases.put("CYRL", CYRILLIC); 4199 aliases.put("DEVA", DEVANAGARI); 4200 aliases.put("DSRT", DESERET); 4201 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4202 aliases.put("ETHI", ETHIOPIC); 4203 aliases.put("GEOR", GEORGIAN); 4204 aliases.put("GLAG", GLAGOLITIC); 4205 aliases.put("GOTH", GOTHIC); 4206 aliases.put("GREK", GREEK); 4207 aliases.put("GUJR", GUJARATI); 4208 aliases.put("GURU", GURMUKHI); 4209 aliases.put("HANG", HANGUL); 4210 aliases.put("HANI", HAN); 4211 aliases.put("HANO", HANUNOO); 4212 aliases.put("HEBR", HEBREW); 4213 aliases.put("HIRA", HIRAGANA); 4214 // it appears we don't have the KATAKANA_OR_HIRAGANA 4215 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4216 aliases.put("ITAL", OLD_ITALIC); 4217 aliases.put("JAVA", JAVANESE); 4218 aliases.put("KALI", KAYAH_LI); 4219 aliases.put("KANA", KATAKANA); 4220 aliases.put("KHAR", KHAROSHTHI); 4221 aliases.put("KHMR", KHMER); 4222 aliases.put("KNDA", KANNADA); 4223 aliases.put("KTHI", KAITHI); 4224 aliases.put("LANA", TAI_THAM); 4225 aliases.put("LAOO", LAO); 4226 aliases.put("LATN", LATIN); 4227 aliases.put("LEPC", LEPCHA); 4228 aliases.put("LIMB", LIMBU); 4229 aliases.put("LINB", LINEAR_B); 4230 aliases.put("LISU", LISU); 4231 aliases.put("LYCI", LYCIAN); 4232 aliases.put("LYDI", LYDIAN); 4233 aliases.put("MAND", MANDAIC); 4234 aliases.put("MLYM", MALAYALAM); 4235 aliases.put("MONG", MONGOLIAN); 4236 aliases.put("MTEI", MEETEI_MAYEK); 4237 aliases.put("MYMR", MYANMAR); 4238 aliases.put("NKOO", NKO); 4239 aliases.put("OGAM", OGHAM); 4240 aliases.put("OLCK", OL_CHIKI); 4241 aliases.put("ORKH", OLD_TURKIC); 4242 aliases.put("ORYA", ORIYA); 4243 aliases.put("OSMA", OSMANYA); 4244 aliases.put("PHAG", PHAGS_PA); 4245 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4246 aliases.put("PHNX", PHOENICIAN); 4247 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4248 aliases.put("RJNG", REJANG); 4249 aliases.put("RUNR", RUNIC); 4250 aliases.put("SAMR", SAMARITAN); 4251 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4252 aliases.put("SAUR", SAURASHTRA); 4253 aliases.put("SHAW", SHAVIAN); 4254 aliases.put("SINH", SINHALA); 4255 aliases.put("SUND", SUNDANESE); 4256 aliases.put("SYLO", SYLOTI_NAGRI); 4257 aliases.put("SYRC", SYRIAC); 4258 aliases.put("TAGB", TAGBANWA); 4259 aliases.put("TALE", TAI_LE); 4260 aliases.put("TALU", NEW_TAI_LUE); 4261 aliases.put("TAML", TAMIL); 4262 aliases.put("TAVT", TAI_VIET); 4263 aliases.put("TELU", TELUGU); 4264 aliases.put("TFNG", TIFINAGH); 4265 aliases.put("TGLG", TAGALOG); 4266 aliases.put("THAA", THAANA); 4267 aliases.put("THAI", THAI); 4268 aliases.put("TIBT", TIBETAN); 4269 aliases.put("UGAR", UGARITIC); 4270 aliases.put("VAII", VAI); 4271 aliases.put("XPEO", OLD_PERSIAN); 4272 aliases.put("XSUX", CUNEIFORM); 4273 aliases.put("YIII", YI); 4274 aliases.put("ZINH", INHERITED); 4275 aliases.put("ZYYY", COMMON); 4276 aliases.put("ZZZZ", UNKNOWN); 4277 } 4278 4279 /** 4280 * Returns the enum constant representing the Unicode script of which 4281 * the given character (Unicode code point) is assigned to. 4282 * 4283 * @param codePoint the character (Unicode code point) in question. 4284 * @return The {@code UnicodeScript} constant representing the 4285 * Unicode script of which this character is assigned to. 4286 * 4287 * @exception IllegalArgumentException if the specified 4288 * {@code codePoint} is an invalid Unicode code point. 4289 * @see Character#isValidCodePoint(int) 4290 * 4291 */ 4292 public static UnicodeScript of(int codePoint) { 4293 if (!isValidCodePoint(codePoint)) 4294 throw new IllegalArgumentException(); 4295 int type = getType(codePoint); 4296 // leave SURROGATE and PRIVATE_USE for table lookup 4297 if (type == UNASSIGNED) 4298 return UNKNOWN; 4299 int index = Arrays.binarySearch(scriptStarts, codePoint); 4300 if (index < 0) 4301 index = -index - 2; 4302 return scripts[index]; 4303 } 4304 4305 /** 4306 * Returns the UnicodeScript constant with the given Unicode script 4307 * name or the script name alias. Script names and their aliases are 4308 * determined by The Unicode Standard. The files Scripts<version>.txt 4309 * and PropertyValueAliases<version>.txt define script names 4310 * and the script name aliases for a particular version of the 4311 * standard. The {@link Character} class specifies the version of 4312 * the standard that it supports. 4313 * <p> 4314 * Character case is ignored for all of the valid script names. 4315 * The en_US locale's case mapping rules are used to provide 4316 * case-insensitive string comparisons for script name validation. 4317 * <p> 4318 * 4319 * @param scriptName A {@code UnicodeScript} name. 4320 * @return The {@code UnicodeScript} constant identified 4321 * by {@code scriptName} 4322 * @throws IllegalArgumentException if {@code scriptName} is an 4323 * invalid name 4324 * @throws NullPointerException if {@code scriptName} is null 4325 */ 4326 public static final UnicodeScript forName(String scriptName) { 4327 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4328 //.replace(' ', '_')); 4329 UnicodeScript sc = aliases.get(scriptName); 4330 if (sc != null) 4331 return sc; 4332 return valueOf(scriptName); 4333 } 4334 } 4335 4336 /** 4337 * The value of the {@code Character}. 4338 * 4339 * @serial 4340 */ 4341 private final char value; 4342 4343 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4344 private static final long serialVersionUID = 3786198910865385080L; 4345 4346 /** 4347 * Constructs a newly allocated {@code Character} object that 4348 * represents the specified {@code char} value. 4349 * 4350 * @param value the value to be represented by the 4351 * {@code Character} object. 4352 */ 4353 public Character(char value) { 4354 this.value = value; 4355 } 4356 4357 private static class CharacterCache { 4358 private CharacterCache(){} 4359 4360 static final Character cache[] = new Character[127 + 1]; 4361 4362 static { 4363 for (int i = 0; i < cache.length; i++) 4364 cache[i] = new Character((char)i); 4365 } 4366 } 4367 4368 /** 4369 * Returns a <tt>Character</tt> instance representing the specified 4370 * <tt>char</tt> value. 4371 * If a new <tt>Character</tt> instance is not required, this method 4372 * should generally be used in preference to the constructor 4373 * {@link #Character(char)}, as this method is likely to yield 4374 * significantly better space and time performance by caching 4375 * frequently requested values. 4376 * 4377 * This method will always cache values in the range {@code 4378 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 4379 * cache other values outside of this range. 4380 * 4381 * @param c a char value. 4382 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4383 * @since 1.5 4384 */ 4385 public static Character valueOf(char c) { 4386 if (c <= 127) { // must cache 4387 return CharacterCache.cache[(int)c]; 4388 } 4389 return new Character(c); 4390 } 4391 4392 /** 4393 * Returns the value of this {@code Character} object. 4394 * @return the primitive {@code char} value represented by 4395 * this object. 4396 */ 4397 public char charValue() { 4398 return value; 4399 } 4400 4401 /** 4402 * Returns a hash code for this {@code Character}; equal to the result 4403 * of invoking {@code charValue()}. 4404 * 4405 * @return a hash code value for this {@code Character} 4406 */ 4407 public int hashCode() { 4408 return (int)value; 4409 } 4410 4411 /** 4412 * Compares this object against the specified object. 4413 * The result is {@code true} if and only if the argument is not 4414 * {@code null} and is a {@code Character} object that 4415 * represents the same {@code char} value as this object. 4416 * 4417 * @param obj the object to compare with. 4418 * @return {@code true} if the objects are the same; 4419 * {@code false} otherwise. 4420 */ 4421 public boolean equals(Object obj) { 4422 if (obj instanceof Character) { 4423 return value == ((Character)obj).charValue(); 4424 } 4425 return false; 4426 } 4427 4428 /** 4429 * Returns a {@code String} object representing this 4430 * {@code Character}'s value. The result is a string of 4431 * length 1 whose sole component is the primitive 4432 * {@code char} value represented by this 4433 * {@code Character} object. 4434 * 4435 * @return a string representation of this object. 4436 */ 4437 public String toString() { 4438 char buf[] = {value}; 4439 return String.valueOf(buf); 4440 } 4441 4442 /** 4443 * Returns a {@code String} object representing the 4444 * specified {@code char}. The result is a string of length 4445 * 1 consisting solely of the specified {@code char}. 4446 * 4447 * @param c the {@code char} to be converted 4448 * @return the string representation of the specified {@code char} 4449 * @since 1.4 4450 */ 4451 public static String toString(char c) { 4452 return String.valueOf(c); 4453 } 4454 4455 /** 4456 * Determines whether the specified code point is a valid 4457 * <a href="http://www.unicode.org/glossary/#code_point"> 4458 * Unicode code point value</a>. 4459 * 4460 * @param codePoint the Unicode code point to be tested 4461 * @return {@code true} if the specified code point value is between 4462 * {@link #MIN_CODE_POINT} and 4463 * {@link #MAX_CODE_POINT} inclusive; 4464 * {@code false} otherwise. 4465 * @since 1.5 4466 */ 4467 public static boolean isValidCodePoint(int codePoint) { 4468 // Optimized form of: 4469 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4470 int plane = codePoint >>> 16; 4471 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4472 } 4473 4474 /** 4475 * Determines whether the specified character (Unicode code point) 4476 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4477 * Such code points can be represented using a single {@code char}. 4478 * 4479 * @param codePoint the character (Unicode code point) to be tested 4480 * @return {@code true} if the specified code point is between 4481 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4482 * {@code false} otherwise. 4483 * @since 1.7 4484 */ 4485 public static boolean isBmpCodePoint(int codePoint) { 4486 return codePoint >>> 16 == 0; 4487 // Optimized form of: 4488 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4489 // We consistently use logical shift (>>>) to facilitate 4490 // additional runtime optimizations. 4491 } 4492 4493 /** 4494 * Determines whether the specified character (Unicode code point) 4495 * is in the <a href="#supplementary">supplementary character</a> range. 4496 * 4497 * @param codePoint the character (Unicode code point) to be tested 4498 * @return {@code true} if the specified code point is between 4499 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4500 * {@link #MAX_CODE_POINT} inclusive; 4501 * {@code false} otherwise. 4502 * @since 1.5 4503 */ 4504 public static boolean isSupplementaryCodePoint(int codePoint) { 4505 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4506 && codePoint < MAX_CODE_POINT + 1; 4507 } 4508 4509 /** 4510 * Determines if the given {@code char} value is a 4511 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4512 * Unicode high-surrogate code unit</a> 4513 * (also known as <i>leading-surrogate code unit</i>). 4514 * 4515 * <p>Such values do not represent characters by themselves, 4516 * but are used in the representation of 4517 * <a href="#supplementary">supplementary characters</a> 4518 * in the UTF-16 encoding. 4519 * 4520 * @param ch the {@code char} value to be tested. 4521 * @return {@code true} if the {@code char} value is between 4522 * {@link #MIN_HIGH_SURROGATE} and 4523 * {@link #MAX_HIGH_SURROGATE} inclusive; 4524 * {@code false} otherwise. 4525 * @see Character#isLowSurrogate(char) 4526 * @see Character.UnicodeBlock#of(int) 4527 * @since 1.5 4528 */ 4529 public static boolean isHighSurrogate(char ch) { 4530 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4531 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4532 } 4533 4534 /** 4535 * Determines if the given {@code char} value is a 4536 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4537 * Unicode low-surrogate code unit</a> 4538 * (also known as <i>trailing-surrogate code unit</i>). 4539 * 4540 * <p>Such values do not represent characters by themselves, 4541 * but are used in the representation of 4542 * <a href="#supplementary">supplementary characters</a> 4543 * in the UTF-16 encoding. 4544 * 4545 * @param ch the {@code char} value to be tested. 4546 * @return {@code true} if the {@code char} value is between 4547 * {@link #MIN_LOW_SURROGATE} and 4548 * {@link #MAX_LOW_SURROGATE} inclusive; 4549 * {@code false} otherwise. 4550 * @see Character#isHighSurrogate(char) 4551 * @since 1.5 4552 */ 4553 public static boolean isLowSurrogate(char ch) { 4554 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4555 } 4556 4557 /** 4558 * Determines if the given {@code char} value is a Unicode 4559 * <i>surrogate code unit</i>. 4560 * 4561 * <p>Such values do not represent characters by themselves, 4562 * but are used in the representation of 4563 * <a href="#supplementary">supplementary characters</a> 4564 * in the UTF-16 encoding. 4565 * 4566 * <p>A char value is a surrogate code unit if and only if it is either 4567 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4568 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4569 * 4570 * @param ch the {@code char} value to be tested. 4571 * @return {@code true} if the {@code char} value is between 4572 * {@link #MIN_SURROGATE} and 4573 * {@link #MAX_SURROGATE} inclusive; 4574 * {@code false} otherwise. 4575 * @since 1.7 4576 */ 4577 public static boolean isSurrogate(char ch) { 4578 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4579 } 4580 4581 /** 4582 * Determines whether the specified pair of {@code char} 4583 * values is a valid 4584 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4585 * Unicode surrogate pair</a>. 4586 4587 * <p>This method is equivalent to the expression: 4588 * <blockquote><pre> 4589 * isHighSurrogate(high) && isLowSurrogate(low) 4590 * </pre></blockquote> 4591 * 4592 * @param high the high-surrogate code value to be tested 4593 * @param low the low-surrogate code value to be tested 4594 * @return {@code true} if the specified high and 4595 * low-surrogate code values represent a valid surrogate pair; 4596 * {@code false} otherwise. 4597 * @since 1.5 4598 */ 4599 public static boolean isSurrogatePair(char high, char low) { 4600 return isHighSurrogate(high) && isLowSurrogate(low); 4601 } 4602 4603 /** 4604 * Determines the number of {@code char} values needed to 4605 * represent the specified character (Unicode code point). If the 4606 * specified character is equal to or greater than 0x10000, then 4607 * the method returns 2. Otherwise, the method returns 1. 4608 * 4609 * <p>This method doesn't validate the specified character to be a 4610 * valid Unicode code point. The caller must validate the 4611 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4612 * if necessary. 4613 * 4614 * @param codePoint the character (Unicode code point) to be tested. 4615 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4616 * @see Character#isSupplementaryCodePoint(int) 4617 * @since 1.5 4618 */ 4619 public static int charCount(int codePoint) { 4620 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4621 } 4622 4623 /** 4624 * Converts the specified surrogate pair to its supplementary code 4625 * point value. This method does not validate the specified 4626 * surrogate pair. The caller must validate it using {@link 4627 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4628 * 4629 * @param high the high-surrogate code unit 4630 * @param low the low-surrogate code unit 4631 * @return the supplementary code point composed from the 4632 * specified surrogate pair. 4633 * @since 1.5 4634 */ 4635 public static int toCodePoint(char high, char low) { 4636 // Optimized form of: 4637 // return ((high - MIN_HIGH_SURROGATE) << 10) 4638 // + (low - MIN_LOW_SURROGATE) 4639 // + MIN_SUPPLEMENTARY_CODE_POINT; 4640 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4641 - (MIN_HIGH_SURROGATE << 10) 4642 - MIN_LOW_SURROGATE); 4643 } 4644 4645 /** 4646 * Returns the code point at the given index of the 4647 * {@code CharSequence}. If the {@code char} value at 4648 * the given index in the {@code CharSequence} is in the 4649 * high-surrogate range, the following index is less than the 4650 * length of the {@code CharSequence}, and the 4651 * {@code char} value at the following index is in the 4652 * low-surrogate range, then the supplementary code point 4653 * corresponding to this surrogate pair is returned. Otherwise, 4654 * the {@code char} value at the given index is returned. 4655 * 4656 * @param seq a sequence of {@code char} values (Unicode code 4657 * units) 4658 * @param index the index to the {@code char} values (Unicode 4659 * code units) in {@code seq} to be converted 4660 * @return the Unicode code point at the given index 4661 * @exception NullPointerException if {@code seq} is null. 4662 * @exception IndexOutOfBoundsException if the value 4663 * {@code index} is negative or not less than 4664 * {@link CharSequence#length() seq.length()}. 4665 * @since 1.5 4666 */ 4667 public static int codePointAt(CharSequence seq, int index) { 4668 char c1 = seq.charAt(index++); 4669 if (isHighSurrogate(c1)) { 4670 if (index < seq.length()) { 4671 char c2 = seq.charAt(index); 4672 if (isLowSurrogate(c2)) { 4673 return toCodePoint(c1, c2); 4674 } 4675 } 4676 } 4677 return c1; 4678 } 4679 4680 /** 4681 * Returns the code point at the given index of the 4682 * {@code char} array. If the {@code char} value at 4683 * the given index in the {@code char} array is in the 4684 * high-surrogate range, the following index is less than the 4685 * length of the {@code char} array, and the 4686 * {@code char} value at the following index is in the 4687 * low-surrogate range, then the supplementary code point 4688 * corresponding to this surrogate pair is returned. Otherwise, 4689 * the {@code char} value at the given index is returned. 4690 * 4691 * @param a the {@code char} array 4692 * @param index the index to the {@code char} values (Unicode 4693 * code units) in the {@code char} array to be converted 4694 * @return the Unicode code point at the given index 4695 * @exception NullPointerException if {@code a} is null. 4696 * @exception IndexOutOfBoundsException if the value 4697 * {@code index} is negative or not less than 4698 * the length of the {@code char} array. 4699 * @since 1.5 4700 */ 4701 public static int codePointAt(char[] a, int index) { 4702 return codePointAtImpl(a, index, a.length); 4703 } 4704 4705 /** 4706 * Returns the code point at the given index of the 4707 * {@code char} array, where only array elements with 4708 * {@code index} less than {@code limit} can be used. If 4709 * the {@code char} value at the given index in the 4710 * {@code char} array is in the high-surrogate range, the 4711 * following index is less than the {@code limit}, and the 4712 * {@code char} value at the following index is in the 4713 * low-surrogate range, then the supplementary code point 4714 * corresponding to this surrogate pair is returned. Otherwise, 4715 * the {@code char} value at the given index is returned. 4716 * 4717 * @param a the {@code char} array 4718 * @param index the index to the {@code char} values (Unicode 4719 * code units) in the {@code char} array to be converted 4720 * @param limit the index after the last array element that 4721 * can be used in the {@code char} array 4722 * @return the Unicode code point at the given index 4723 * @exception NullPointerException if {@code a} is null. 4724 * @exception IndexOutOfBoundsException if the {@code index} 4725 * argument is negative or not less than the {@code limit} 4726 * argument, or if the {@code limit} argument is negative or 4727 * greater than the length of the {@code char} array. 4728 * @since 1.5 4729 */ 4730 public static int codePointAt(char[] a, int index, int limit) { 4731 if (index >= limit || limit < 0 || limit > a.length) { 4732 throw new IndexOutOfBoundsException(); 4733 } 4734 return codePointAtImpl(a, index, limit); 4735 } 4736 4737 // throws ArrayIndexOutofBoundsException if index out of bounds 4738 static int codePointAtImpl(char[] a, int index, int limit) { 4739 char c1 = a[index++]; 4740 if (isHighSurrogate(c1)) { 4741 if (index < limit) { 4742 char c2 = a[index]; 4743 if (isLowSurrogate(c2)) { 4744 return toCodePoint(c1, c2); 4745 } 4746 } 4747 } 4748 return c1; 4749 } 4750 4751 /** 4752 * Returns the code point preceding the given index of the 4753 * {@code CharSequence}. If the {@code char} value at 4754 * {@code (index - 1)} in the {@code CharSequence} is in 4755 * the low-surrogate range, {@code (index - 2)} is not 4756 * negative, and the {@code char} value at {@code (index - 2)} 4757 * in the {@code CharSequence} is in the 4758 * high-surrogate range, then the supplementary code point 4759 * corresponding to this surrogate pair is returned. Otherwise, 4760 * the {@code char} value at {@code (index - 1)} is 4761 * returned. 4762 * 4763 * @param seq the {@code CharSequence} instance 4764 * @param index the index following the code point that should be returned 4765 * @return the Unicode code point value before the given index. 4766 * @exception NullPointerException if {@code seq} is null. 4767 * @exception IndexOutOfBoundsException if the {@code index} 4768 * argument is less than 1 or greater than {@link 4769 * CharSequence#length() seq.length()}. 4770 * @since 1.5 4771 */ 4772 public static int codePointBefore(CharSequence seq, int index) { 4773 char c2 = seq.charAt(--index); 4774 if (isLowSurrogate(c2)) { 4775 if (index > 0) { 4776 char c1 = seq.charAt(--index); 4777 if (isHighSurrogate(c1)) { 4778 return toCodePoint(c1, c2); 4779 } 4780 } 4781 } 4782 return c2; 4783 } 4784 4785 /** 4786 * Returns the code point preceding the given index of the 4787 * {@code char} array. If the {@code char} value at 4788 * {@code (index - 1)} in the {@code char} array is in 4789 * the low-surrogate range, {@code (index - 2)} is not 4790 * negative, and the {@code char} value at {@code (index - 2)} 4791 * in the {@code char} array is in the 4792 * high-surrogate range, then the supplementary code point 4793 * corresponding to this surrogate pair is returned. Otherwise, 4794 * the {@code char} value at {@code (index - 1)} is 4795 * returned. 4796 * 4797 * @param a the {@code char} array 4798 * @param index the index following the code point that should be returned 4799 * @return the Unicode code point value before the given index. 4800 * @exception NullPointerException if {@code a} is null. 4801 * @exception IndexOutOfBoundsException if the {@code index} 4802 * argument is less than 1 or greater than the length of the 4803 * {@code char} array 4804 * @since 1.5 4805 */ 4806 public static int codePointBefore(char[] a, int index) { 4807 return codePointBeforeImpl(a, index, 0); 4808 } 4809 4810 /** 4811 * Returns the code point preceding the given index of the 4812 * {@code char} array, where only array elements with 4813 * {@code index} greater than or equal to {@code start} 4814 * can be used. If the {@code char} value at {@code (index - 1)} 4815 * in the {@code char} array is in the 4816 * low-surrogate range, {@code (index - 2)} is not less than 4817 * {@code start}, and the {@code char} value at 4818 * {@code (index - 2)} in the {@code char} array is in 4819 * the high-surrogate range, then the supplementary code point 4820 * corresponding to this surrogate pair is returned. Otherwise, 4821 * the {@code char} value at {@code (index - 1)} is 4822 * returned. 4823 * 4824 * @param a the {@code char} array 4825 * @param index the index following the code point that should be returned 4826 * @param start the index of the first array element in the 4827 * {@code char} array 4828 * @return the Unicode code point value before the given index. 4829 * @exception NullPointerException if {@code a} is null. 4830 * @exception IndexOutOfBoundsException if the {@code index} 4831 * argument is not greater than the {@code start} argument or 4832 * is greater than the length of the {@code char} array, or 4833 * if the {@code start} argument is negative or not less than 4834 * the length of the {@code char} array. 4835 * @since 1.5 4836 */ 4837 public static int codePointBefore(char[] a, int index, int start) { 4838 if (index <= start || start < 0 || start >= a.length) { 4839 throw new IndexOutOfBoundsException(); 4840 } 4841 return codePointBeforeImpl(a, index, start); 4842 } 4843 4844 // throws ArrayIndexOutofBoundsException if index-1 out of bounds 4845 static int codePointBeforeImpl(char[] a, int index, int start) { 4846 char c2 = a[--index]; 4847 if (isLowSurrogate(c2)) { 4848 if (index > start) { 4849 char c1 = a[--index]; 4850 if (isHighSurrogate(c1)) { 4851 return toCodePoint(c1, c2); 4852 } 4853 } 4854 } 4855 return c2; 4856 } 4857 4858 /** 4859 * Returns the leading surrogate (a 4860 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4861 * high surrogate code unit</a>) of the 4862 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4863 * surrogate pair</a> 4864 * representing the specified supplementary character (Unicode 4865 * code point) in the UTF-16 encoding. If the specified character 4866 * is not a 4867 * <a href="Character.html#supplementary">supplementary character</a>, 4868 * an unspecified {@code char} is returned. 4869 * 4870 * <p>If 4871 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4872 * is {@code true}, then 4873 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 4874 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 4875 * are also always {@code true}. 4876 * 4877 * @param codePoint a supplementary character (Unicode code point) 4878 * @return the leading surrogate code unit used to represent the 4879 * character in the UTF-16 encoding 4880 * @since 1.7 4881 */ 4882 public static char highSurrogate(int codePoint) { 4883 return (char) ((codePoint >>> 10) 4884 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 4885 } 4886 4887 /** 4888 * Returns the trailing surrogate (a 4889 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4890 * low surrogate code unit</a>) of the 4891 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4892 * surrogate pair</a> 4893 * representing the specified supplementary character (Unicode 4894 * code point) in the UTF-16 encoding. If the specified character 4895 * is not a 4896 * <a href="Character.html#supplementary">supplementary character</a>, 4897 * an unspecified {@code char} is returned. 4898 * 4899 * <p>If 4900 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4901 * is {@code true}, then 4902 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 4903 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 4904 * are also always {@code true}. 4905 * 4906 * @param codePoint a supplementary character (Unicode code point) 4907 * @return the trailing surrogate code unit used to represent the 4908 * character in the UTF-16 encoding 4909 * @since 1.7 4910 */ 4911 public static char lowSurrogate(int codePoint) { 4912 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 4913 } 4914 4915 /** 4916 * Converts the specified character (Unicode code point) to its 4917 * UTF-16 representation. If the specified code point is a BMP 4918 * (Basic Multilingual Plane or Plane 0) value, the same value is 4919 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 4920 * specified code point is a supplementary character, its 4921 * surrogate values are stored in {@code dst[dstIndex]} 4922 * (high-surrogate) and {@code dst[dstIndex+1]} 4923 * (low-surrogate), and 2 is returned. 4924 * 4925 * @param codePoint the character (Unicode code point) to be converted. 4926 * @param dst an array of {@code char} in which the 4927 * {@code codePoint}'s UTF-16 value is stored. 4928 * @param dstIndex the start index into the {@code dst} 4929 * array where the converted value is stored. 4930 * @return 1 if the code point is a BMP code point, 2 if the 4931 * code point is a supplementary code point. 4932 * @exception IllegalArgumentException if the specified 4933 * {@code codePoint} is not a valid Unicode code point. 4934 * @exception NullPointerException if the specified {@code dst} is null. 4935 * @exception IndexOutOfBoundsException if {@code dstIndex} 4936 * is negative or not less than {@code dst.length}, or if 4937 * {@code dst} at {@code dstIndex} doesn't have enough 4938 * array element(s) to store the resulting {@code char} 4939 * value(s). (If {@code dstIndex} is equal to 4940 * {@code dst.length-1} and the specified 4941 * {@code codePoint} is a supplementary character, the 4942 * high-surrogate value is not stored in 4943 * {@code dst[dstIndex]}.) 4944 * @since 1.5 4945 */ 4946 public static int toChars(int codePoint, char[] dst, int dstIndex) { 4947 if (isBmpCodePoint(codePoint)) { 4948 dst[dstIndex] = (char) codePoint; 4949 return 1; 4950 } else if (isValidCodePoint(codePoint)) { 4951 toSurrogates(codePoint, dst, dstIndex); 4952 return 2; 4953 } else { 4954 throw new IllegalArgumentException(); 4955 } 4956 } 4957 4958 /** 4959 * Converts the specified character (Unicode code point) to its 4960 * UTF-16 representation stored in a {@code char} array. If 4961 * the specified code point is a BMP (Basic Multilingual Plane or 4962 * Plane 0) value, the resulting {@code char} array has 4963 * the same value as {@code codePoint}. If the specified code 4964 * point is a supplementary code point, the resulting 4965 * {@code char} array has the corresponding surrogate pair. 4966 * 4967 * @param codePoint a Unicode code point 4968 * @return a {@code char} array having 4969 * {@code codePoint}'s UTF-16 representation. 4970 * @exception IllegalArgumentException if the specified 4971 * {@code codePoint} is not a valid Unicode code point. 4972 * @since 1.5 4973 */ 4974 public static char[] toChars(int codePoint) { 4975 if (isBmpCodePoint(codePoint)) { 4976 return new char[] { (char) codePoint }; 4977 } else if (isValidCodePoint(codePoint)) { 4978 char[] result = new char[2]; 4979 toSurrogates(codePoint, result, 0); 4980 return result; 4981 } else { 4982 throw new IllegalArgumentException(); 4983 } 4984 } 4985 4986 static void toSurrogates(int codePoint, char[] dst, int index) { 4987 // We write elements "backwards" to guarantee all-or-nothing 4988 dst[index+1] = lowSurrogate(codePoint); 4989 dst[index] = highSurrogate(codePoint); 4990 } 4991 4992 /** 4993 * Returns the number of Unicode code points in the text range of 4994 * the specified char sequence. The text range begins at the 4995 * specified {@code beginIndex} and extends to the 4996 * {@code char} at index {@code endIndex - 1}. Thus the 4997 * length (in {@code char}s) of the text range is 4998 * {@code endIndex-beginIndex}. Unpaired surrogates within 4999 * the text range count as one code point each. 5000 * 5001 * @param seq the char sequence 5002 * @param beginIndex the index to the first {@code char} of 5003 * the text range. 5004 * @param endIndex the index after the last {@code char} of 5005 * the text range. 5006 * @return the number of Unicode code points in the specified text 5007 * range 5008 * @exception NullPointerException if {@code seq} is null. 5009 * @exception IndexOutOfBoundsException if the 5010 * {@code beginIndex} is negative, or {@code endIndex} 5011 * is larger than the length of the given sequence, or 5012 * {@code beginIndex} is larger than {@code endIndex}. 5013 * @since 1.5 5014 */ 5015 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5016 int length = seq.length(); 5017 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5018 throw new IndexOutOfBoundsException(); 5019 } 5020 int n = endIndex - beginIndex; 5021 for (int i = beginIndex; i < endIndex; ) { 5022 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5023 isLowSurrogate(seq.charAt(i))) { 5024 n--; 5025 i++; 5026 } 5027 } 5028 return n; 5029 } 5030 5031 /** 5032 * Returns the number of Unicode code points in a subarray of the 5033 * {@code char} array argument. The {@code offset} 5034 * argument is the index of the first {@code char} of the 5035 * subarray and the {@code count} argument specifies the 5036 * length of the subarray in {@code char}s. Unpaired 5037 * surrogates within the subarray count as one code point each. 5038 * 5039 * @param a the {@code char} array 5040 * @param offset the index of the first {@code char} in the 5041 * given {@code char} array 5042 * @param count the length of the subarray in {@code char}s 5043 * @return the number of Unicode code points in the specified subarray 5044 * @exception NullPointerException if {@code a} is null. 5045 * @exception IndexOutOfBoundsException if {@code offset} or 5046 * {@code count} is negative, or if {@code offset + 5047 * count} is larger than the length of the given array. 5048 * @since 1.5 5049 */ 5050 public static int codePointCount(char[] a, int offset, int count) { 5051 if (count > a.length - offset || offset < 0 || count < 0) { 5052 throw new IndexOutOfBoundsException(); 5053 } 5054 return codePointCountImpl(a, offset, count); 5055 } 5056 5057 static int codePointCountImpl(char[] a, int offset, int count) { 5058 int endIndex = offset + count; 5059 int n = count; 5060 for (int i = offset; i < endIndex; ) { 5061 if (isHighSurrogate(a[i++]) && i < endIndex && 5062 isLowSurrogate(a[i])) { 5063 n--; 5064 i++; 5065 } 5066 } 5067 return n; 5068 } 5069 5070 /** 5071 * Returns the index within the given char sequence that is offset 5072 * from the given {@code index} by {@code codePointOffset} 5073 * code points. Unpaired surrogates within the text range given by 5074 * {@code index} and {@code codePointOffset} count as 5075 * one code point each. 5076 * 5077 * @param seq the char sequence 5078 * @param index the index to be offset 5079 * @param codePointOffset the offset in code points 5080 * @return the index within the char sequence 5081 * @exception NullPointerException if {@code seq} is null. 5082 * @exception IndexOutOfBoundsException if {@code index} 5083 * is negative or larger then the length of the char sequence, 5084 * or if {@code codePointOffset} is positive and the 5085 * subsequence starting with {@code index} has fewer than 5086 * {@code codePointOffset} code points, or if 5087 * {@code codePointOffset} is negative and the subsequence 5088 * before {@code index} has fewer than the absolute value 5089 * of {@code codePointOffset} code points. 5090 * @since 1.5 5091 */ 5092 public static int offsetByCodePoints(CharSequence seq, int index, 5093 int codePointOffset) { 5094 int length = seq.length(); 5095 if (index < 0 || index > length) { 5096 throw new IndexOutOfBoundsException(); 5097 } 5098 5099 int x = index; 5100 if (codePointOffset >= 0) { 5101 int i; 5102 for (i = 0; x < length && i < codePointOffset; i++) { 5103 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5104 isLowSurrogate(seq.charAt(x))) { 5105 x++; 5106 } 5107 } 5108 if (i < codePointOffset) { 5109 throw new IndexOutOfBoundsException(); 5110 } 5111 } else { 5112 int i; 5113 for (i = codePointOffset; x > 0 && i < 0; i++) { 5114 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5115 isHighSurrogate(seq.charAt(x-1))) { 5116 x--; 5117 } 5118 } 5119 if (i < 0) { 5120 throw new IndexOutOfBoundsException(); 5121 } 5122 } 5123 return x; 5124 } 5125 5126 /** 5127 * Returns the index within the given {@code char} subarray 5128 * that is offset from the given {@code index} by 5129 * {@code codePointOffset} code points. The 5130 * {@code start} and {@code count} arguments specify a 5131 * subarray of the {@code char} array. Unpaired surrogates 5132 * within the text range given by {@code index} and 5133 * {@code codePointOffset} count as one code point each. 5134 * 5135 * @param a the {@code char} array 5136 * @param start the index of the first {@code char} of the 5137 * subarray 5138 * @param count the length of the subarray in {@code char}s 5139 * @param index the index to be offset 5140 * @param codePointOffset the offset in code points 5141 * @return the index within the subarray 5142 * @exception NullPointerException if {@code a} is null. 5143 * @exception IndexOutOfBoundsException 5144 * if {@code start} or {@code count} is negative, 5145 * or if {@code start + count} is larger than the length of 5146 * the given array, 5147 * or if {@code index} is less than {@code start} or 5148 * larger then {@code start + count}, 5149 * or if {@code codePointOffset} is positive and the text range 5150 * starting with {@code index} and ending with {@code start + count - 1} 5151 * has fewer than {@code codePointOffset} code 5152 * points, 5153 * or if {@code codePointOffset} is negative and the text range 5154 * starting with {@code start} and ending with {@code index - 1} 5155 * has fewer than the absolute value of 5156 * {@code codePointOffset} code points. 5157 * @since 1.5 5158 */ 5159 public static int offsetByCodePoints(char[] a, int start, int count, 5160 int index, int codePointOffset) { 5161 if (count > a.length-start || start < 0 || count < 0 5162 || index < start || index > start+count) { 5163 throw new IndexOutOfBoundsException(); 5164 } 5165 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5166 } 5167 5168 static int offsetByCodePointsImpl(char[]a, int start, int count, 5169 int index, int codePointOffset) { 5170 int x = index; 5171 if (codePointOffset >= 0) { 5172 int limit = start + count; 5173 int i; 5174 for (i = 0; x < limit && i < codePointOffset; i++) { 5175 if (isHighSurrogate(a[x++]) && x < limit && 5176 isLowSurrogate(a[x])) { 5177 x++; 5178 } 5179 } 5180 if (i < codePointOffset) { 5181 throw new IndexOutOfBoundsException(); 5182 } 5183 } else { 5184 int i; 5185 for (i = codePointOffset; x > start && i < 0; i++) { 5186 if (isLowSurrogate(a[--x]) && x > start && 5187 isHighSurrogate(a[x-1])) { 5188 x--; 5189 } 5190 } 5191 if (i < 0) { 5192 throw new IndexOutOfBoundsException(); 5193 } 5194 } 5195 return x; 5196 } 5197 5198 /** 5199 * Determines if the specified character is a lowercase character. 5200 * <p> 5201 * A character is lowercase if its general category type, provided 5202 * by {@code Character.getType(ch)}, is 5203 * {@code LOWERCASE_LETTER}. 5204 * <p> 5205 * The following are examples of lowercase characters: 5206 * <p><blockquote><pre> 5207 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5208 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5209 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5210 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5211 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5212 * </pre></blockquote> 5213 * <p> Many other Unicode characters are lowercase too. 5214 * 5215 * <p><b>Note:</b> This method cannot handle <a 5216 * href="#supplementary"> supplementary characters</a>. To support 5217 * all Unicode characters, including supplementary characters, use 5218 * the {@link #isLowerCase(int)} method. 5219 * 5220 * @param ch the character to be tested. 5221 * @return {@code true} if the character is lowercase; 5222 * {@code false} otherwise. 5223 * @see Character#isLowerCase(char) 5224 * @see Character#isTitleCase(char) 5225 * @see Character#toLowerCase(char) 5226 * @see Character#getType(char) 5227 */ 5228 public static boolean isLowerCase(char ch) { 5229 return isLowerCase((int)ch); 5230 } 5231 5232 /** 5233 * Determines if the specified character (Unicode code point) is a 5234 * lowercase character. 5235 * <p> 5236 * A character is lowercase if its general category type, provided 5237 * by {@link Character#getType getType(codePoint)}, is 5238 * {@code LOWERCASE_LETTER}. 5239 * <p> 5240 * The following are examples of lowercase characters: 5241 * <p><blockquote><pre> 5242 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5243 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5244 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5245 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5246 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5247 * </pre></blockquote> 5248 * <p> Many other Unicode characters are lowercase too. 5249 * 5250 * @param codePoint the character (Unicode code point) to be tested. 5251 * @return {@code true} if the character is lowercase; 5252 * {@code false} otherwise. 5253 * @see Character#isLowerCase(int) 5254 * @see Character#isTitleCase(int) 5255 * @see Character#toLowerCase(int) 5256 * @see Character#getType(int) 5257 * @since 1.5 5258 */ 5259 public static boolean isLowerCase(int codePoint) { 5260 return getType(codePoint) == Character.LOWERCASE_LETTER; 5261 } 5262 5263 /** 5264 * Determines if the specified character is an uppercase character. 5265 * <p> 5266 * A character is uppercase if its general category type, provided by 5267 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 5268 * <p> 5269 * The following are examples of uppercase characters: 5270 * <p><blockquote><pre> 5271 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5272 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5273 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5274 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5275 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5276 * </pre></blockquote> 5277 * <p> Many other Unicode characters are uppercase too.<p> 5278 * 5279 * <p><b>Note:</b> This method cannot handle <a 5280 * href="#supplementary"> supplementary characters</a>. To support 5281 * all Unicode characters, including supplementary characters, use 5282 * the {@link #isUpperCase(int)} method. 5283 * 5284 * @param ch the character to be tested. 5285 * @return {@code true} if the character is uppercase; 5286 * {@code false} otherwise. 5287 * @see Character#isLowerCase(char) 5288 * @see Character#isTitleCase(char) 5289 * @see Character#toUpperCase(char) 5290 * @see Character#getType(char) 5291 * @since 1.0 5292 */ 5293 public static boolean isUpperCase(char ch) { 5294 return isUpperCase((int)ch); 5295 } 5296 5297 /** 5298 * Determines if the specified character (Unicode code point) is an uppercase character. 5299 * <p> 5300 * A character is uppercase if its general category type, provided by 5301 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}. 5302 * <p> 5303 * The following are examples of uppercase characters: 5304 * <p><blockquote><pre> 5305 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5306 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5307 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5308 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5309 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5310 * </pre></blockquote> 5311 * <p> Many other Unicode characters are uppercase too.<p> 5312 * 5313 * @param codePoint the character (Unicode code point) to be tested. 5314 * @return {@code true} if the character is uppercase; 5315 * {@code false} otherwise. 5316 * @see Character#isLowerCase(int) 5317 * @see Character#isTitleCase(int) 5318 * @see Character#toUpperCase(int) 5319 * @see Character#getType(int) 5320 * @since 1.5 5321 */ 5322 public static boolean isUpperCase(int codePoint) { 5323 return getType(codePoint) == Character.UPPERCASE_LETTER; 5324 } 5325 5326 /** 5327 * Determines if the specified character is a titlecase character. 5328 * <p> 5329 * A character is a titlecase character if its general 5330 * category type, provided by {@code Character.getType(ch)}, 5331 * is {@code TITLECASE_LETTER}. 5332 * <p> 5333 * Some characters look like pairs of Latin letters. For example, there 5334 * is an uppercase letter that looks like "LJ" and has a corresponding 5335 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5336 * is the appropriate form to use when rendering a word in lowercase 5337 * with initial capitals, as for a book title. 5338 * <p> 5339 * These are some of the Unicode characters for which this method returns 5340 * {@code true}: 5341 * <ul> 5342 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5343 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5344 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5345 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5346 * </ul> 5347 * <p> Many other Unicode characters are titlecase too.<p> 5348 * 5349 * <p><b>Note:</b> This method cannot handle <a 5350 * href="#supplementary"> supplementary characters</a>. To support 5351 * all Unicode characters, including supplementary characters, use 5352 * the {@link #isTitleCase(int)} method. 5353 * 5354 * @param ch the character to be tested. 5355 * @return {@code true} if the character is titlecase; 5356 * {@code false} otherwise. 5357 * @see Character#isLowerCase(char) 5358 * @see Character#isUpperCase(char) 5359 * @see Character#toTitleCase(char) 5360 * @see Character#getType(char) 5361 * @since 1.0.2 5362 */ 5363 public static boolean isTitleCase(char ch) { 5364 return isTitleCase((int)ch); 5365 } 5366 5367 /** 5368 * Determines if the specified character (Unicode code point) is a titlecase character. 5369 * <p> 5370 * A character is a titlecase character if its general 5371 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5372 * is {@code TITLECASE_LETTER}. 5373 * <p> 5374 * Some characters look like pairs of Latin letters. For example, there 5375 * is an uppercase letter that looks like "LJ" and has a corresponding 5376 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5377 * is the appropriate form to use when rendering a word in lowercase 5378 * with initial capitals, as for a book title. 5379 * <p> 5380 * These are some of the Unicode characters for which this method returns 5381 * {@code true}: 5382 * <ul> 5383 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 5384 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 5385 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 5386 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 5387 * </ul> 5388 * <p> Many other Unicode characters are titlecase too.<p> 5389 * 5390 * @param codePoint the character (Unicode code point) to be tested. 5391 * @return {@code true} if the character is titlecase; 5392 * {@code false} otherwise. 5393 * @see Character#isLowerCase(int) 5394 * @see Character#isUpperCase(int) 5395 * @see Character#toTitleCase(int) 5396 * @see Character#getType(int) 5397 * @since 1.5 5398 */ 5399 public static boolean isTitleCase(int codePoint) { 5400 return getType(codePoint) == Character.TITLECASE_LETTER; 5401 } 5402 5403 /** 5404 * Determines if the specified character is a digit. 5405 * <p> 5406 * A character is a digit if its general category type, provided 5407 * by {@code Character.getType(ch)}, is 5408 * {@code DECIMAL_DIGIT_NUMBER}. 5409 * <p> 5410 * Some Unicode character ranges that contain digits: 5411 * <ul> 5412 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5413 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5414 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5415 * Arabic-Indic digits 5416 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5417 * Extended Arabic-Indic digits 5418 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5419 * Devanagari digits 5420 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5421 * Fullwidth digits 5422 * </ul> 5423 * 5424 * Many other character ranges contain digits as well. 5425 * 5426 * <p><b>Note:</b> This method cannot handle <a 5427 * href="#supplementary"> supplementary characters</a>. To support 5428 * all Unicode characters, including supplementary characters, use 5429 * the {@link #isDigit(int)} method. 5430 * 5431 * @param ch the character to be tested. 5432 * @return {@code true} if the character is a digit; 5433 * {@code false} otherwise. 5434 * @see Character#digit(char, int) 5435 * @see Character#forDigit(int, int) 5436 * @see Character#getType(char) 5437 */ 5438 public static boolean isDigit(char ch) { 5439 return isDigit((int)ch); 5440 } 5441 5442 /** 5443 * Determines if the specified character (Unicode code point) is a digit. 5444 * <p> 5445 * A character is a digit if its general category type, provided 5446 * by {@link Character#getType(int) getType(codePoint)}, is 5447 * {@code DECIMAL_DIGIT_NUMBER}. 5448 * <p> 5449 * Some Unicode character ranges that contain digits: 5450 * <ul> 5451 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 5452 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 5453 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 5454 * Arabic-Indic digits 5455 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 5456 * Extended Arabic-Indic digits 5457 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 5458 * Devanagari digits 5459 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 5460 * Fullwidth digits 5461 * </ul> 5462 * 5463 * Many other character ranges contain digits as well. 5464 * 5465 * @param codePoint the character (Unicode code point) to be tested. 5466 * @return {@code true} if the character is a digit; 5467 * {@code false} otherwise. 5468 * @see Character#forDigit(int, int) 5469 * @see Character#getType(int) 5470 * @since 1.5 5471 */ 5472 public static boolean isDigit(int codePoint) { 5473 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5474 } 5475 5476 /** 5477 * Determines if a character is defined in Unicode. 5478 * <p> 5479 * A character is defined if at least one of the following is true: 5480 * <ul> 5481 * <li>It has an entry in the UnicodeData file. 5482 * <li>It has a value in a range defined by the UnicodeData file. 5483 * </ul> 5484 * 5485 * <p><b>Note:</b> This method cannot handle <a 5486 * href="#supplementary"> supplementary characters</a>. To support 5487 * all Unicode characters, including supplementary characters, use 5488 * the {@link #isDefined(int)} method. 5489 * 5490 * @param ch the character to be tested 5491 * @return {@code true} if the character has a defined meaning 5492 * in Unicode; {@code false} otherwise. 5493 * @see Character#isDigit(char) 5494 * @see Character#isLetter(char) 5495 * @see Character#isLetterOrDigit(char) 5496 * @see Character#isLowerCase(char) 5497 * @see Character#isTitleCase(char) 5498 * @see Character#isUpperCase(char) 5499 * @since 1.0.2 5500 */ 5501 public static boolean isDefined(char ch) { 5502 return isDefined((int)ch); 5503 } 5504 5505 /** 5506 * Determines if a character (Unicode code point) is defined in Unicode. 5507 * <p> 5508 * A character is defined if at least one of the following is true: 5509 * <ul> 5510 * <li>It has an entry in the UnicodeData file. 5511 * <li>It has a value in a range defined by the UnicodeData file. 5512 * </ul> 5513 * 5514 * @param codePoint the character (Unicode code point) to be tested. 5515 * @return {@code true} if the character has a defined meaning 5516 * in Unicode; {@code false} otherwise. 5517 * @see Character#isDigit(int) 5518 * @see Character#isLetter(int) 5519 * @see Character#isLetterOrDigit(int) 5520 * @see Character#isLowerCase(int) 5521 * @see Character#isTitleCase(int) 5522 * @see Character#isUpperCase(int) 5523 * @since 1.5 5524 */ 5525 public static boolean isDefined(int codePoint) { 5526 return getType(codePoint) != Character.UNASSIGNED; 5527 } 5528 5529 /** 5530 * Determines if the specified character is a letter. 5531 * <p> 5532 * A character is considered to be a letter if its general 5533 * category type, provided by {@code Character.getType(ch)}, 5534 * is any of the following: 5535 * <ul> 5536 * <li> {@code UPPERCASE_LETTER} 5537 * <li> {@code LOWERCASE_LETTER} 5538 * <li> {@code TITLECASE_LETTER} 5539 * <li> {@code MODIFIER_LETTER} 5540 * <li> {@code OTHER_LETTER} 5541 * </ul> 5542 * 5543 * Not all letters have case. Many characters are 5544 * letters but are neither uppercase nor lowercase nor titlecase. 5545 * 5546 * <p><b>Note:</b> This method cannot handle <a 5547 * href="#supplementary"> supplementary characters</a>. To support 5548 * all Unicode characters, including supplementary characters, use 5549 * the {@link #isLetter(int)} method. 5550 * 5551 * @param ch the character to be tested. 5552 * @return {@code true} if the character is a letter; 5553 * {@code false} otherwise. 5554 * @see Character#isDigit(char) 5555 * @see Character#isJavaIdentifierStart(char) 5556 * @see Character#isJavaLetter(char) 5557 * @see Character#isJavaLetterOrDigit(char) 5558 * @see Character#isLetterOrDigit(char) 5559 * @see Character#isLowerCase(char) 5560 * @see Character#isTitleCase(char) 5561 * @see Character#isUnicodeIdentifierStart(char) 5562 * @see Character#isUpperCase(char) 5563 */ 5564 public static boolean isLetter(char ch) { 5565 return isLetter((int)ch); 5566 } 5567 5568 /** 5569 * Determines if the specified character (Unicode code point) is a letter. 5570 * <p> 5571 * A character is considered to be a letter if its general 5572 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5573 * is any of the following: 5574 * <ul> 5575 * <li> {@code UPPERCASE_LETTER} 5576 * <li> {@code LOWERCASE_LETTER} 5577 * <li> {@code TITLECASE_LETTER} 5578 * <li> {@code MODIFIER_LETTER} 5579 * <li> {@code OTHER_LETTER} 5580 * </ul> 5581 * 5582 * Not all letters have case. Many characters are 5583 * letters but are neither uppercase nor lowercase nor titlecase. 5584 * 5585 * @param codePoint the character (Unicode code point) to be tested. 5586 * @return {@code true} if the character is a letter; 5587 * {@code false} otherwise. 5588 * @see Character#isDigit(int) 5589 * @see Character#isJavaIdentifierStart(int) 5590 * @see Character#isLetterOrDigit(int) 5591 * @see Character#isLowerCase(int) 5592 * @see Character#isTitleCase(int) 5593 * @see Character#isUnicodeIdentifierStart(int) 5594 * @see Character#isUpperCase(int) 5595 * @since 1.5 5596 */ 5597 public static boolean isLetter(int codePoint) { 5598 return ((((1 << Character.UPPERCASE_LETTER) | 5599 (1 << Character.LOWERCASE_LETTER) | 5600 (1 << Character.TITLECASE_LETTER) | 5601 (1 << Character.MODIFIER_LETTER) | 5602 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5603 != 0; 5604 } 5605 5606 /** 5607 * Determines if the specified character is a letter or digit. 5608 * <p> 5609 * A character is considered to be a letter or digit if either 5610 * {@code Character.isLetter(char ch)} or 5611 * {@code Character.isDigit(char ch)} returns 5612 * {@code true} for the character. 5613 * 5614 * <p><b>Note:</b> This method cannot handle <a 5615 * href="#supplementary"> supplementary characters</a>. To support 5616 * all Unicode characters, including supplementary characters, use 5617 * the {@link #isLetterOrDigit(int)} method. 5618 * 5619 * @param ch the character to be tested. 5620 * @return {@code true} if the character is a letter or digit; 5621 * {@code false} otherwise. 5622 * @see Character#isDigit(char) 5623 * @see Character#isJavaIdentifierPart(char) 5624 * @see Character#isJavaLetter(char) 5625 * @see Character#isJavaLetterOrDigit(char) 5626 * @see Character#isLetter(char) 5627 * @see Character#isUnicodeIdentifierPart(char) 5628 * @since 1.0.2 5629 */ 5630 public static boolean isLetterOrDigit(char ch) { 5631 return isLetterOrDigit((int)ch); 5632 } 5633 5634 /** 5635 * Determines if the specified character (Unicode code point) is a letter or digit. 5636 * <p> 5637 * A character is considered to be a letter or digit if either 5638 * {@link #isLetter(int) isLetter(codePoint)} or 5639 * {@link #isDigit(int) isDigit(codePoint)} returns 5640 * {@code true} for the character. 5641 * 5642 * @param codePoint the character (Unicode code point) to be tested. 5643 * @return {@code true} if the character is a letter or digit; 5644 * {@code false} otherwise. 5645 * @see Character#isDigit(int) 5646 * @see Character#isJavaIdentifierPart(int) 5647 * @see Character#isLetter(int) 5648 * @see Character#isUnicodeIdentifierPart(int) 5649 * @since 1.5 5650 */ 5651 public static boolean isLetterOrDigit(int codePoint) { 5652 return ((((1 << Character.UPPERCASE_LETTER) | 5653 (1 << Character.LOWERCASE_LETTER) | 5654 (1 << Character.TITLECASE_LETTER) | 5655 (1 << Character.MODIFIER_LETTER) | 5656 (1 << Character.OTHER_LETTER) | 5657 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5658 != 0; 5659 } 5660 5661 /** 5662 * Determines if the specified character is permissible as the first 5663 * character in a Java identifier. 5664 * <p> 5665 * A character may start a Java identifier if and only if 5666 * one of the following is true: 5667 * <ul> 5668 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5669 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5670 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5671 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5672 * </ul> 5673 * 5674 * @param ch the character to be tested. 5675 * @return {@code true} if the character may start a Java 5676 * identifier; {@code false} otherwise. 5677 * @see Character#isJavaLetterOrDigit(char) 5678 * @see Character#isJavaIdentifierStart(char) 5679 * @see Character#isJavaIdentifierPart(char) 5680 * @see Character#isLetter(char) 5681 * @see Character#isLetterOrDigit(char) 5682 * @see Character#isUnicodeIdentifierStart(char) 5683 * @since 1.02 5684 * @deprecated Replaced by isJavaIdentifierStart(char). 5685 */ 5686 @Deprecated 5687 public static boolean isJavaLetter(char ch) { 5688 return isJavaIdentifierStart(ch); 5689 } 5690 5691 /** 5692 * Determines if the specified character may be part of a Java 5693 * identifier as other than the first character. 5694 * <p> 5695 * A character may be part of a Java identifier if and only if any 5696 * of the following are true: 5697 * <ul> 5698 * <li> it is a letter 5699 * <li> it is a currency symbol (such as {@code '$'}) 5700 * <li> it is a connecting punctuation character (such as {@code '_'}) 5701 * <li> it is a digit 5702 * <li> it is a numeric letter (such as a Roman numeral character) 5703 * <li> it is a combining mark 5704 * <li> it is a non-spacing mark 5705 * <li> {@code isIdentifierIgnorable} returns 5706 * {@code true} for the character. 5707 * </ul> 5708 * 5709 * @param ch the character to be tested. 5710 * @return {@code true} if the character may be part of a 5711 * Java identifier; {@code false} otherwise. 5712 * @see Character#isJavaLetter(char) 5713 * @see Character#isJavaIdentifierStart(char) 5714 * @see Character#isJavaIdentifierPart(char) 5715 * @see Character#isLetter(char) 5716 * @see Character#isLetterOrDigit(char) 5717 * @see Character#isUnicodeIdentifierPart(char) 5718 * @see Character#isIdentifierIgnorable(char) 5719 * @since 1.02 5720 * @deprecated Replaced by isJavaIdentifierPart(char). 5721 */ 5722 @Deprecated 5723 public static boolean isJavaLetterOrDigit(char ch) { 5724 return isJavaIdentifierPart(ch); 5725 } 5726 5727 /** 5728 * Determines if the specified character is 5729 * permissible as the first character in a Java identifier. 5730 * <p> 5731 * A character may start a Java identifier if and only if 5732 * one of the following conditions is true: 5733 * <ul> 5734 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5735 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 5736 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 5737 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 5738 * </ul> 5739 * 5740 * <p><b>Note:</b> This method cannot handle <a 5741 * href="#supplementary"> supplementary characters</a>. To support 5742 * all Unicode characters, including supplementary characters, use 5743 * the {@link #isJavaIdentifierStart(int)} method. 5744 * 5745 * @param ch the character to be tested. 5746 * @return {@code true} if the character may start a Java identifier; 5747 * {@code false} otherwise. 5748 * @see Character#isJavaIdentifierPart(char) 5749 * @see Character#isLetter(char) 5750 * @see Character#isUnicodeIdentifierStart(char) 5751 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5752 * @since 1.1 5753 */ 5754 public static boolean isJavaIdentifierStart(char ch) { 5755 return isJavaIdentifierStart((int)ch); 5756 } 5757 5758 /** 5759 * Determines if the character (Unicode code point) is 5760 * permissible as the first character in a Java identifier. 5761 * <p> 5762 * A character may start a Java identifier if and only if 5763 * one of the following conditions is true: 5764 * <ul> 5765 * <li> {@link #isLetter(int) isLetter(codePoint)} 5766 * returns {@code true} 5767 * <li> {@link #getType(int) getType(codePoint)} 5768 * returns {@code LETTER_NUMBER} 5769 * <li> the referenced character is a currency symbol (such as {@code '$'}) 5770 * <li> the referenced character is a connecting punctuation character 5771 * (such as {@code '_'}). 5772 * </ul> 5773 * 5774 * @param codePoint the character (Unicode code point) to be tested. 5775 * @return {@code true} if the character may start a Java identifier; 5776 * {@code false} otherwise. 5777 * @see Character#isJavaIdentifierPart(int) 5778 * @see Character#isLetter(int) 5779 * @see Character#isUnicodeIdentifierStart(int) 5780 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5781 * @since 1.5 5782 */ 5783 public static boolean isJavaIdentifierStart(int codePoint) { 5784 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 5785 } 5786 5787 /** 5788 * Determines if the specified character may be part of a Java 5789 * identifier as other than the first character. 5790 * <p> 5791 * A character may be part of a Java identifier if any of the following 5792 * are true: 5793 * <ul> 5794 * <li> it is a letter 5795 * <li> it is a currency symbol (such as {@code '$'}) 5796 * <li> it is a connecting punctuation character (such as {@code '_'}) 5797 * <li> it is a digit 5798 * <li> it is a numeric letter (such as a Roman numeral character) 5799 * <li> it is a combining mark 5800 * <li> it is a non-spacing mark 5801 * <li> {@code isIdentifierIgnorable} returns 5802 * {@code true} for the character 5803 * </ul> 5804 * 5805 * <p><b>Note:</b> This method cannot handle <a 5806 * href="#supplementary"> supplementary characters</a>. To support 5807 * all Unicode characters, including supplementary characters, use 5808 * the {@link #isJavaIdentifierPart(int)} method. 5809 * 5810 * @param ch the character to be tested. 5811 * @return {@code true} if the character may be part of a 5812 * Java identifier; {@code false} otherwise. 5813 * @see Character#isIdentifierIgnorable(char) 5814 * @see Character#isJavaIdentifierStart(char) 5815 * @see Character#isLetterOrDigit(char) 5816 * @see Character#isUnicodeIdentifierPart(char) 5817 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5818 * @since 1.1 5819 */ 5820 public static boolean isJavaIdentifierPart(char ch) { 5821 return isJavaIdentifierPart((int)ch); 5822 } 5823 5824 /** 5825 * Determines if the character (Unicode code point) may be part of a Java 5826 * identifier as other than the first character. 5827 * <p> 5828 * A character may be part of a Java identifier if any of the following 5829 * are true: 5830 * <ul> 5831 * <li> it is a letter 5832 * <li> it is a currency symbol (such as {@code '$'}) 5833 * <li> it is a connecting punctuation character (such as {@code '_'}) 5834 * <li> it is a digit 5835 * <li> it is a numeric letter (such as a Roman numeral character) 5836 * <li> it is a combining mark 5837 * <li> it is a non-spacing mark 5838 * <li> {@link #isIdentifierIgnorable(int) 5839 * isIdentifierIgnorable(codePoint)} returns {@code true} for 5840 * the character 5841 * </ul> 5842 * 5843 * @param codePoint the character (Unicode code point) to be tested. 5844 * @return {@code true} if the character may be part of a 5845 * Java identifier; {@code false} otherwise. 5846 * @see Character#isIdentifierIgnorable(int) 5847 * @see Character#isJavaIdentifierStart(int) 5848 * @see Character#isLetterOrDigit(int) 5849 * @see Character#isUnicodeIdentifierPart(int) 5850 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5851 * @since 1.5 5852 */ 5853 public static boolean isJavaIdentifierPart(int codePoint) { 5854 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 5855 } 5856 5857 /** 5858 * Determines if the specified character is permissible as the 5859 * first character in a Unicode identifier. 5860 * <p> 5861 * A character may start a Unicode identifier if and only if 5862 * one of the following conditions is true: 5863 * <ul> 5864 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 5865 * <li> {@link #getType(char) getType(ch)} returns 5866 * {@code LETTER_NUMBER}. 5867 * </ul> 5868 * 5869 * <p><b>Note:</b> This method cannot handle <a 5870 * href="#supplementary"> supplementary characters</a>. To support 5871 * all Unicode characters, including supplementary characters, use 5872 * the {@link #isUnicodeIdentifierStart(int)} method. 5873 * 5874 * @param ch the character to be tested. 5875 * @return {@code true} if the character may start a Unicode 5876 * identifier; {@code false} otherwise. 5877 * @see Character#isJavaIdentifierStart(char) 5878 * @see Character#isLetter(char) 5879 * @see Character#isUnicodeIdentifierPart(char) 5880 * @since 1.1 5881 */ 5882 public static boolean isUnicodeIdentifierStart(char ch) { 5883 return isUnicodeIdentifierStart((int)ch); 5884 } 5885 5886 /** 5887 * Determines if the specified character (Unicode code point) is permissible as the 5888 * first character in a Unicode identifier. 5889 * <p> 5890 * A character may start a Unicode identifier if and only if 5891 * one of the following conditions is true: 5892 * <ul> 5893 * <li> {@link #isLetter(int) isLetter(codePoint)} 5894 * returns {@code true} 5895 * <li> {@link #getType(int) getType(codePoint)} 5896 * returns {@code LETTER_NUMBER}. 5897 * </ul> 5898 * @param codePoint the character (Unicode code point) to be tested. 5899 * @return {@code true} if the character may start a Unicode 5900 * identifier; {@code false} otherwise. 5901 * @see Character#isJavaIdentifierStart(int) 5902 * @see Character#isLetter(int) 5903 * @see Character#isUnicodeIdentifierPart(int) 5904 * @since 1.5 5905 */ 5906 public static boolean isUnicodeIdentifierStart(int codePoint) { 5907 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 5908 } 5909 5910 /** 5911 * Determines if the specified character may be part of a Unicode 5912 * identifier as other than the first character. 5913 * <p> 5914 * A character may be part of a Unicode identifier if and only if 5915 * one of the following statements is true: 5916 * <ul> 5917 * <li> it is a letter 5918 * <li> it is a connecting punctuation character (such as {@code '_'}) 5919 * <li> it is a digit 5920 * <li> it is a numeric letter (such as a Roman numeral character) 5921 * <li> it is a combining mark 5922 * <li> it is a non-spacing mark 5923 * <li> {@code isIdentifierIgnorable} returns 5924 * {@code true} for this character. 5925 * </ul> 5926 * 5927 * <p><b>Note:</b> This method cannot handle <a 5928 * href="#supplementary"> supplementary characters</a>. To support 5929 * all Unicode characters, including supplementary characters, use 5930 * the {@link #isUnicodeIdentifierPart(int)} method. 5931 * 5932 * @param ch the character to be tested. 5933 * @return {@code true} if the character may be part of a 5934 * Unicode identifier; {@code false} otherwise. 5935 * @see Character#isIdentifierIgnorable(char) 5936 * @see Character#isJavaIdentifierPart(char) 5937 * @see Character#isLetterOrDigit(char) 5938 * @see Character#isUnicodeIdentifierStart(char) 5939 * @since 1.1 5940 */ 5941 public static boolean isUnicodeIdentifierPart(char ch) { 5942 return isUnicodeIdentifierPart((int)ch); 5943 } 5944 5945 /** 5946 * Determines if the specified character (Unicode code point) may be part of a Unicode 5947 * identifier as other than the first character. 5948 * <p> 5949 * A character may be part of a Unicode identifier if and only if 5950 * one of the following statements is true: 5951 * <ul> 5952 * <li> it is a letter 5953 * <li> it is a connecting punctuation character (such as {@code '_'}) 5954 * <li> it is a digit 5955 * <li> it is a numeric letter (such as a Roman numeral character) 5956 * <li> it is a combining mark 5957 * <li> it is a non-spacing mark 5958 * <li> {@code isIdentifierIgnorable} returns 5959 * {@code true} for this character. 5960 * </ul> 5961 * @param codePoint the character (Unicode code point) to be tested. 5962 * @return {@code true} if the character may be part of a 5963 * Unicode identifier; {@code false} otherwise. 5964 * @see Character#isIdentifierIgnorable(int) 5965 * @see Character#isJavaIdentifierPart(int) 5966 * @see Character#isLetterOrDigit(int) 5967 * @see Character#isUnicodeIdentifierStart(int) 5968 * @since 1.5 5969 */ 5970 public static boolean isUnicodeIdentifierPart(int codePoint) { 5971 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 5972 } 5973 5974 /** 5975 * Determines if the specified character should be regarded as 5976 * an ignorable character in a Java identifier or a Unicode identifier. 5977 * <p> 5978 * The following Unicode characters are ignorable in a Java identifier 5979 * or a Unicode identifier: 5980 * <ul> 5981 * <li>ISO control characters that are not whitespace 5982 * <ul> 5983 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 5984 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 5985 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 5986 * </ul> 5987 * 5988 * <li>all characters that have the {@code FORMAT} general 5989 * category value 5990 * </ul> 5991 * 5992 * <p><b>Note:</b> This method cannot handle <a 5993 * href="#supplementary"> supplementary characters</a>. To support 5994 * all Unicode characters, including supplementary characters, use 5995 * the {@link #isIdentifierIgnorable(int)} method. 5996 * 5997 * @param ch the character to be tested. 5998 * @return {@code true} if the character is an ignorable control 5999 * character that may be part of a Java or Unicode identifier; 6000 * {@code false} otherwise. 6001 * @see Character#isJavaIdentifierPart(char) 6002 * @see Character#isUnicodeIdentifierPart(char) 6003 * @since 1.1 6004 */ 6005 public static boolean isIdentifierIgnorable(char ch) { 6006 return isIdentifierIgnorable((int)ch); 6007 } 6008 6009 /** 6010 * Determines if the specified character (Unicode code point) should be regarded as 6011 * an ignorable character in a Java identifier or a Unicode identifier. 6012 * <p> 6013 * The following Unicode characters are ignorable in a Java identifier 6014 * or a Unicode identifier: 6015 * <ul> 6016 * <li>ISO control characters that are not whitespace 6017 * <ul> 6018 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 6019 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 6020 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 6021 * </ul> 6022 * 6023 * <li>all characters that have the {@code FORMAT} general 6024 * category value 6025 * </ul> 6026 * 6027 * @param codePoint the character (Unicode code point) to be tested. 6028 * @return {@code true} if the character is an ignorable control 6029 * character that may be part of a Java or Unicode identifier; 6030 * {@code false} otherwise. 6031 * @see Character#isJavaIdentifierPart(int) 6032 * @see Character#isUnicodeIdentifierPart(int) 6033 * @since 1.5 6034 */ 6035 public static boolean isIdentifierIgnorable(int codePoint) { 6036 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6037 } 6038 6039 /** 6040 * Converts the character argument to lowercase using case 6041 * mapping information from the UnicodeData file. 6042 * <p> 6043 * Note that 6044 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 6045 * does not always return {@code true} for some ranges of 6046 * characters, particularly those that are symbols or ideographs. 6047 * 6048 * <p>In general, {@link String#toLowerCase()} should be used to map 6049 * characters to lowercase. {@code String} case mapping methods 6050 * have several benefits over {@code Character} case mapping methods. 6051 * {@code String} case mapping methods can perform locale-sensitive 6052 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6053 * the {@code Character} case mapping methods cannot. 6054 * 6055 * <p><b>Note:</b> This method cannot handle <a 6056 * href="#supplementary"> supplementary characters</a>. To support 6057 * all Unicode characters, including supplementary characters, use 6058 * the {@link #toLowerCase(int)} method. 6059 * 6060 * @param ch the character to be converted. 6061 * @return the lowercase equivalent of the character, if any; 6062 * otherwise, the character itself. 6063 * @see Character#isLowerCase(char) 6064 * @see String#toLowerCase() 6065 */ 6066 public static char toLowerCase(char ch) { 6067 return (char)toLowerCase((int)ch); 6068 } 6069 6070 /** 6071 * Converts the character (Unicode code point) argument to 6072 * lowercase using case mapping information from the UnicodeData 6073 * file. 6074 * 6075 * <p> Note that 6076 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 6077 * does not always return {@code true} for some ranges of 6078 * characters, particularly those that are symbols or ideographs. 6079 * 6080 * <p>In general, {@link String#toLowerCase()} should be used to map 6081 * characters to lowercase. {@code String} case mapping methods 6082 * have several benefits over {@code Character} case mapping methods. 6083 * {@code String} case mapping methods can perform locale-sensitive 6084 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6085 * the {@code Character} case mapping methods cannot. 6086 * 6087 * @param codePoint the character (Unicode code point) to be converted. 6088 * @return the lowercase equivalent of the character (Unicode code 6089 * point), if any; otherwise, the character itself. 6090 * @see Character#isLowerCase(int) 6091 * @see String#toLowerCase() 6092 * 6093 * @since 1.5 6094 */ 6095 public static int toLowerCase(int codePoint) { 6096 return CharacterData.of(codePoint).toLowerCase(codePoint); 6097 } 6098 6099 /** 6100 * Converts the character argument to uppercase using case mapping 6101 * information from the UnicodeData file. 6102 * <p> 6103 * Note that 6104 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 6105 * does not always return {@code true} for some ranges of 6106 * characters, particularly those that are symbols or ideographs. 6107 * 6108 * <p>In general, {@link String#toUpperCase()} should be used to map 6109 * characters to uppercase. {@code String} case mapping methods 6110 * have several benefits over {@code Character} case mapping methods. 6111 * {@code String} case mapping methods can perform locale-sensitive 6112 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6113 * the {@code Character} case mapping methods cannot. 6114 * 6115 * <p><b>Note:</b> This method cannot handle <a 6116 * href="#supplementary"> supplementary characters</a>. To support 6117 * all Unicode characters, including supplementary characters, use 6118 * the {@link #toUpperCase(int)} method. 6119 * 6120 * @param ch the character to be converted. 6121 * @return the uppercase equivalent of the character, if any; 6122 * otherwise, the character itself. 6123 * @see Character#isUpperCase(char) 6124 * @see String#toUpperCase() 6125 */ 6126 public static char toUpperCase(char ch) { 6127 return (char)toUpperCase((int)ch); 6128 } 6129 6130 /** 6131 * Converts the character (Unicode code point) argument to 6132 * uppercase using case mapping information from the UnicodeData 6133 * file. 6134 * 6135 * <p>Note that 6136 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 6137 * does not always return {@code true} for some ranges of 6138 * characters, particularly those that are symbols or ideographs. 6139 * 6140 * <p>In general, {@link String#toUpperCase()} should be used to map 6141 * characters to uppercase. {@code String} case mapping methods 6142 * have several benefits over {@code Character} case mapping methods. 6143 * {@code String} case mapping methods can perform locale-sensitive 6144 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6145 * the {@code Character} case mapping methods cannot. 6146 * 6147 * @param codePoint the character (Unicode code point) to be converted. 6148 * @return the uppercase equivalent of the character, if any; 6149 * otherwise, the character itself. 6150 * @see Character#isUpperCase(int) 6151 * @see String#toUpperCase() 6152 * 6153 * @since 1.5 6154 */ 6155 public static int toUpperCase(int codePoint) { 6156 return CharacterData.of(codePoint).toUpperCase(codePoint); 6157 } 6158 6159 /** 6160 * Converts the character argument to titlecase using case mapping 6161 * information from the UnicodeData file. If a character has no 6162 * explicit titlecase mapping and is not itself a titlecase char 6163 * according to UnicodeData, then the uppercase mapping is 6164 * returned as an equivalent titlecase mapping. If the 6165 * {@code char} argument is already a titlecase 6166 * {@code char}, the same {@code char} value will be 6167 * returned. 6168 * <p> 6169 * Note that 6170 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 6171 * does not always return {@code true} for some ranges of 6172 * characters. 6173 * 6174 * <p><b>Note:</b> This method cannot handle <a 6175 * href="#supplementary"> supplementary characters</a>. To support 6176 * all Unicode characters, including supplementary characters, use 6177 * the {@link #toTitleCase(int)} method. 6178 * 6179 * @param ch the character to be converted. 6180 * @return the titlecase equivalent of the character, if any; 6181 * otherwise, the character itself. 6182 * @see Character#isTitleCase(char) 6183 * @see Character#toLowerCase(char) 6184 * @see Character#toUpperCase(char) 6185 * @since 1.0.2 6186 */ 6187 public static char toTitleCase(char ch) { 6188 return (char)toTitleCase((int)ch); 6189 } 6190 6191 /** 6192 * Converts the character (Unicode code point) argument to titlecase using case mapping 6193 * information from the UnicodeData file. If a character has no 6194 * explicit titlecase mapping and is not itself a titlecase char 6195 * according to UnicodeData, then the uppercase mapping is 6196 * returned as an equivalent titlecase mapping. If the 6197 * character argument is already a titlecase 6198 * character, the same character value will be 6199 * returned. 6200 * 6201 * <p>Note that 6202 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 6203 * does not always return {@code true} for some ranges of 6204 * characters. 6205 * 6206 * @param codePoint the character (Unicode code point) to be converted. 6207 * @return the titlecase equivalent of the character, if any; 6208 * otherwise, the character itself. 6209 * @see Character#isTitleCase(int) 6210 * @see Character#toLowerCase(int) 6211 * @see Character#toUpperCase(int) 6212 * @since 1.5 6213 */ 6214 public static int toTitleCase(int codePoint) { 6215 return CharacterData.of(codePoint).toTitleCase(codePoint); 6216 } 6217 6218 /** 6219 * Returns the numeric value of the character {@code ch} in the 6220 * specified radix. 6221 * <p> 6222 * If the radix is not in the range {@code MIN_RADIX} ≤ 6223 * {@code radix} ≤ {@code MAX_RADIX} or if the 6224 * value of {@code ch} is not a valid digit in the specified 6225 * radix, {@code -1} is returned. A character is a valid digit 6226 * if at least one of the following is true: 6227 * <ul> 6228 * <li>The method {@code isDigit} is {@code true} of the character 6229 * and the Unicode decimal digit value of the character (or its 6230 * single-character decomposition) is less than the specified radix. 6231 * In this case the decimal digit value is returned. 6232 * <li>The character is one of the uppercase Latin letters 6233 * {@code 'A'} through {@code 'Z'} and its code is less than 6234 * {@code radix + 'A' - 10}. 6235 * In this case, {@code ch - 'A' + 10} 6236 * is returned. 6237 * <li>The character is one of the lowercase Latin letters 6238 * {@code 'a'} through {@code 'z'} and its code is less than 6239 * {@code radix + 'a' - 10}. 6240 * In this case, {@code ch - 'a' + 10} 6241 * is returned. 6242 * <li>The character is one of the fullwidth uppercase Latin letters A 6243 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6244 * and its code is less than 6245 * {@code radix + '\u005CuFF21' - 10}. 6246 * In this case, {@code ch - '\u005CuFF21' + 10} 6247 * is returned. 6248 * <li>The character is one of the fullwidth lowercase Latin letters a 6249 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6250 * and its code is less than 6251 * {@code radix + '\u005CuFF41' - 10}. 6252 * In this case, {@code ch - '\u005CuFF41' + 10} 6253 * is returned. 6254 * </ul> 6255 * 6256 * <p><b>Note:</b> This method cannot handle <a 6257 * href="#supplementary"> supplementary characters</a>. To support 6258 * all Unicode characters, including supplementary characters, use 6259 * the {@link #digit(int, int)} method. 6260 * 6261 * @param ch the character to be converted. 6262 * @param radix the radix. 6263 * @return the numeric value represented by the character in the 6264 * specified radix. 6265 * @see Character#forDigit(int, int) 6266 * @see Character#isDigit(char) 6267 */ 6268 public static int digit(char ch, int radix) { 6269 return digit((int)ch, radix); 6270 } 6271 6272 /** 6273 * Returns the numeric value of the specified character (Unicode 6274 * code point) in the specified radix. 6275 * 6276 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 6277 * {@code radix} ≤ {@code MAX_RADIX} or if the 6278 * character is not a valid digit in the specified 6279 * radix, {@code -1} is returned. A character is a valid digit 6280 * if at least one of the following is true: 6281 * <ul> 6282 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 6283 * and the Unicode decimal digit value of the character (or its 6284 * single-character decomposition) is less than the specified radix. 6285 * In this case the decimal digit value is returned. 6286 * <li>The character is one of the uppercase Latin letters 6287 * {@code 'A'} through {@code 'Z'} and its code is less than 6288 * {@code radix + 'A' - 10}. 6289 * In this case, {@code codePoint - 'A' + 10} 6290 * is returned. 6291 * <li>The character is one of the lowercase Latin letters 6292 * {@code 'a'} through {@code 'z'} and its code is less than 6293 * {@code radix + 'a' - 10}. 6294 * In this case, {@code codePoint - 'a' + 10} 6295 * is returned. 6296 * <li>The character is one of the fullwidth uppercase Latin letters A 6297 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 6298 * and its code is less than 6299 * {@code radix + '\u005CuFF21' - 10}. 6300 * In this case, 6301 * {@code codePoint - '\u005CuFF21' + 10} 6302 * is returned. 6303 * <li>The character is one of the fullwidth lowercase Latin letters a 6304 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 6305 * and its code is less than 6306 * {@code radix + '\u005CuFF41'- 10}. 6307 * In this case, 6308 * {@code codePoint - '\u005CuFF41' + 10} 6309 * is returned. 6310 * </ul> 6311 * 6312 * @param codePoint the character (Unicode code point) to be converted. 6313 * @param radix the radix. 6314 * @return the numeric value represented by the character in the 6315 * specified radix. 6316 * @see Character#forDigit(int, int) 6317 * @see Character#isDigit(int) 6318 * @since 1.5 6319 */ 6320 public static int digit(int codePoint, int radix) { 6321 return CharacterData.of(codePoint).digit(codePoint, radix); 6322 } 6323 6324 /** 6325 * Returns the {@code int} value that the specified Unicode 6326 * character represents. For example, the character 6327 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 6328 * an int with a value of 50. 6329 * <p> 6330 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6331 * {@code '\u005Cu005A'}), lowercase 6332 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6333 * full width variant ({@code '\u005CuFF21'} through 6334 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6335 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6336 * through 35. This is independent of the Unicode specification, 6337 * which does not assign numeric values to these {@code char} 6338 * values. 6339 * <p> 6340 * If the character does not have a numeric value, then -1 is returned. 6341 * If the character has a numeric value that cannot be represented as a 6342 * nonnegative integer (for example, a fractional value), then -2 6343 * is returned. 6344 * 6345 * <p><b>Note:</b> This method cannot handle <a 6346 * href="#supplementary"> supplementary characters</a>. To support 6347 * all Unicode characters, including supplementary characters, use 6348 * the {@link #getNumericValue(int)} method. 6349 * 6350 * @param ch the character to be converted. 6351 * @return the numeric value of the character, as a nonnegative {@code int} 6352 * value; -2 if the character has a numeric value that is not a 6353 * nonnegative integer; -1 if the character has no numeric value. 6354 * @see Character#forDigit(int, int) 6355 * @see Character#isDigit(char) 6356 * @since 1.1 6357 */ 6358 public static int getNumericValue(char ch) { 6359 return getNumericValue((int)ch); 6360 } 6361 6362 /** 6363 * Returns the {@code int} value that the specified 6364 * character (Unicode code point) represents. For example, the character 6365 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 6366 * an {@code int} with a value of 50. 6367 * <p> 6368 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 6369 * {@code '\u005Cu005A'}), lowercase 6370 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 6371 * full width variant ({@code '\u005CuFF21'} through 6372 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 6373 * {@code '\u005CuFF5A'}) forms have numeric values from 10 6374 * through 35. This is independent of the Unicode specification, 6375 * which does not assign numeric values to these {@code char} 6376 * values. 6377 * <p> 6378 * If the character does not have a numeric value, then -1 is returned. 6379 * If the character has a numeric value that cannot be represented as a 6380 * nonnegative integer (for example, a fractional value), then -2 6381 * is returned. 6382 * 6383 * @param codePoint the character (Unicode code point) to be converted. 6384 * @return the numeric value of the character, as a nonnegative {@code int} 6385 * value; -2 if the character has a numeric value that is not a 6386 * nonnegative integer; -1 if the character has no numeric value. 6387 * @see Character#forDigit(int, int) 6388 * @see Character#isDigit(int) 6389 * @since 1.5 6390 */ 6391 public static int getNumericValue(int codePoint) { 6392 return CharacterData.of(codePoint).getNumericValue(codePoint); 6393 } 6394 6395 /** 6396 * Determines if the specified character is ISO-LATIN-1 white space. 6397 * This method returns {@code true} for the following five 6398 * characters only: 6399 * <table> 6400 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td> 6401 * <td>{@code HORIZONTAL TABULATION}</td></tr> 6402 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td> 6403 * <td>{@code NEW LINE}</td></tr> 6404 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td> 6405 * <td>{@code FORM FEED}</td></tr> 6406 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td> 6407 * <td>{@code CARRIAGE RETURN}</td></tr> 6408 * <tr><td>{@code ' '}</td> <td>{@code U+0020}</td> 6409 * <td>{@code SPACE}</td></tr> 6410 * </table> 6411 * 6412 * @param ch the character to be tested. 6413 * @return {@code true} if the character is ISO-LATIN-1 white 6414 * space; {@code false} otherwise. 6415 * @see Character#isSpaceChar(char) 6416 * @see Character#isWhitespace(char) 6417 * @deprecated Replaced by isWhitespace(char). 6418 */ 6419 @Deprecated 6420 public static boolean isSpace(char ch) { 6421 return (ch <= 0x0020) && 6422 (((((1L << 0x0009) | 6423 (1L << 0x000A) | 6424 (1L << 0x000C) | 6425 (1L << 0x000D) | 6426 (1L << 0x0020)) >> ch) & 1L) != 0); 6427 } 6428 6429 6430 /** 6431 * Determines if the specified character is a Unicode space character. 6432 * A character is considered to be a space character if and only if 6433 * it is specified to be a space character by the Unicode standard. This 6434 * method returns true if the character's general category type is any of 6435 * the following: 6436 * <ul> 6437 * <li> {@code SPACE_SEPARATOR} 6438 * <li> {@code LINE_SEPARATOR} 6439 * <li> {@code PARAGRAPH_SEPARATOR} 6440 * </ul> 6441 * 6442 * <p><b>Note:</b> This method cannot handle <a 6443 * href="#supplementary"> supplementary characters</a>. To support 6444 * all Unicode characters, including supplementary characters, use 6445 * the {@link #isSpaceChar(int)} method. 6446 * 6447 * @param ch the character to be tested. 6448 * @return {@code true} if the character is a space character; 6449 * {@code false} otherwise. 6450 * @see Character#isWhitespace(char) 6451 * @since 1.1 6452 */ 6453 public static boolean isSpaceChar(char ch) { 6454 return isSpaceChar((int)ch); 6455 } 6456 6457 /** 6458 * Determines if the specified character (Unicode code point) is a 6459 * Unicode space character. A character is considered to be a 6460 * space character if and only if it is specified to be a space 6461 * character by the Unicode standard. This method returns true if 6462 * the character's general category type is any of the following: 6463 * 6464 * <ul> 6465 * <li> {@link #SPACE_SEPARATOR} 6466 * <li> {@link #LINE_SEPARATOR} 6467 * <li> {@link #PARAGRAPH_SEPARATOR} 6468 * </ul> 6469 * 6470 * @param codePoint the character (Unicode code point) to be tested. 6471 * @return {@code true} if the character is a space character; 6472 * {@code false} otherwise. 6473 * @see Character#isWhitespace(int) 6474 * @since 1.5 6475 */ 6476 public static boolean isSpaceChar(int codePoint) { 6477 return ((((1 << Character.SPACE_SEPARATOR) | 6478 (1 << Character.LINE_SEPARATOR) | 6479 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6480 != 0; 6481 } 6482 6483 /** 6484 * Determines if the specified character is white space according to Java. 6485 * A character is a Java whitespace character if and only if it satisfies 6486 * one of the following criteria: 6487 * <ul> 6488 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 6489 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 6490 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6491 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6492 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6493 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6494 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6495 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6496 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6497 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6498 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6499 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6500 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6501 * </ul> 6502 * 6503 * <p><b>Note:</b> This method cannot handle <a 6504 * href="#supplementary"> supplementary characters</a>. To support 6505 * all Unicode characters, including supplementary characters, use 6506 * the {@link #isWhitespace(int)} method. 6507 * 6508 * @param ch the character to be tested. 6509 * @return {@code true} if the character is a Java whitespace 6510 * character; {@code false} otherwise. 6511 * @see Character#isSpaceChar(char) 6512 * @since 1.1 6513 */ 6514 public static boolean isWhitespace(char ch) { 6515 return isWhitespace((int)ch); 6516 } 6517 6518 /** 6519 * Determines if the specified character (Unicode code point) is 6520 * white space according to Java. A character is a Java 6521 * whitespace character if and only if it satisfies one of the 6522 * following criteria: 6523 * <ul> 6524 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6525 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6526 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 6527 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 6528 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 6529 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 6530 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 6531 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 6532 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 6533 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 6534 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 6535 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 6536 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 6537 * </ul> 6538 * <p> 6539 * 6540 * @param codePoint the character (Unicode code point) to be tested. 6541 * @return {@code true} if the character is a Java whitespace 6542 * character; {@code false} otherwise. 6543 * @see Character#isSpaceChar(int) 6544 * @since 1.5 6545 */ 6546 public static boolean isWhitespace(int codePoint) { 6547 return CharacterData.of(codePoint).isWhitespace(codePoint); 6548 } 6549 6550 /** 6551 * Determines if the specified character is an ISO control 6552 * character. A character is considered to be an ISO control 6553 * character if its code is in the range {@code '\u005Cu0000'} 6554 * through {@code '\u005Cu001F'} or in the range 6555 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6556 * 6557 * <p><b>Note:</b> This method cannot handle <a 6558 * href="#supplementary"> supplementary characters</a>. To support 6559 * all Unicode characters, including supplementary characters, use 6560 * the {@link #isISOControl(int)} method. 6561 * 6562 * @param ch the character to be tested. 6563 * @return {@code true} if the character is an ISO control character; 6564 * {@code false} otherwise. 6565 * 6566 * @see Character#isSpaceChar(char) 6567 * @see Character#isWhitespace(char) 6568 * @since 1.1 6569 */ 6570 public static boolean isISOControl(char ch) { 6571 return isISOControl((int)ch); 6572 } 6573 6574 /** 6575 * Determines if the referenced character (Unicode code point) is an ISO control 6576 * character. A character is considered to be an ISO control 6577 * character if its code is in the range {@code '\u005Cu0000'} 6578 * through {@code '\u005Cu001F'} or in the range 6579 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 6580 * 6581 * @param codePoint the character (Unicode code point) to be tested. 6582 * @return {@code true} if the character is an ISO control character; 6583 * {@code false} otherwise. 6584 * @see Character#isSpaceChar(int) 6585 * @see Character#isWhitespace(int) 6586 * @since 1.5 6587 */ 6588 public static boolean isISOControl(int codePoint) { 6589 // Optimized form of: 6590 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6591 // (codePoint >= 0x7F && codePoint <= 0x9F); 6592 return codePoint <= 0x9F && 6593 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6594 } 6595 6596 /** 6597 * Returns a value indicating a character's general category. 6598 * 6599 * <p><b>Note:</b> This method cannot handle <a 6600 * href="#supplementary"> supplementary characters</a>. To support 6601 * all Unicode characters, including supplementary characters, use 6602 * the {@link #getType(int)} method. 6603 * 6604 * @param ch the character to be tested. 6605 * @return a value of type {@code int} representing the 6606 * character's general category. 6607 * @see Character#COMBINING_SPACING_MARK 6608 * @see Character#CONNECTOR_PUNCTUATION 6609 * @see Character#CONTROL 6610 * @see Character#CURRENCY_SYMBOL 6611 * @see Character#DASH_PUNCTUATION 6612 * @see Character#DECIMAL_DIGIT_NUMBER 6613 * @see Character#ENCLOSING_MARK 6614 * @see Character#END_PUNCTUATION 6615 * @see Character#FINAL_QUOTE_PUNCTUATION 6616 * @see Character#FORMAT 6617 * @see Character#INITIAL_QUOTE_PUNCTUATION 6618 * @see Character#LETTER_NUMBER 6619 * @see Character#LINE_SEPARATOR 6620 * @see Character#LOWERCASE_LETTER 6621 * @see Character#MATH_SYMBOL 6622 * @see Character#MODIFIER_LETTER 6623 * @see Character#MODIFIER_SYMBOL 6624 * @see Character#NON_SPACING_MARK 6625 * @see Character#OTHER_LETTER 6626 * @see Character#OTHER_NUMBER 6627 * @see Character#OTHER_PUNCTUATION 6628 * @see Character#OTHER_SYMBOL 6629 * @see Character#PARAGRAPH_SEPARATOR 6630 * @see Character#PRIVATE_USE 6631 * @see Character#SPACE_SEPARATOR 6632 * @see Character#START_PUNCTUATION 6633 * @see Character#SURROGATE 6634 * @see Character#TITLECASE_LETTER 6635 * @see Character#UNASSIGNED 6636 * @see Character#UPPERCASE_LETTER 6637 * @since 1.1 6638 */ 6639 public static int getType(char ch) { 6640 return getType((int)ch); 6641 } 6642 6643 /** 6644 * Returns a value indicating a character's general category. 6645 * 6646 * @param codePoint the character (Unicode code point) to be tested. 6647 * @return a value of type {@code int} representing the 6648 * character's general category. 6649 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6650 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6651 * @see Character#CONTROL CONTROL 6652 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6653 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6654 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6655 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6656 * @see Character#END_PUNCTUATION END_PUNCTUATION 6657 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6658 * @see Character#FORMAT FORMAT 6659 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6660 * @see Character#LETTER_NUMBER LETTER_NUMBER 6661 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6662 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6663 * @see Character#MATH_SYMBOL MATH_SYMBOL 6664 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6665 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6666 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6667 * @see Character#OTHER_LETTER OTHER_LETTER 6668 * @see Character#OTHER_NUMBER OTHER_NUMBER 6669 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6670 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6671 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6672 * @see Character#PRIVATE_USE PRIVATE_USE 6673 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6674 * @see Character#START_PUNCTUATION START_PUNCTUATION 6675 * @see Character#SURROGATE SURROGATE 6676 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6677 * @see Character#UNASSIGNED UNASSIGNED 6678 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6679 * @since 1.5 6680 */ 6681 public static int getType(int codePoint) { 6682 return CharacterData.of(codePoint).getType(codePoint); 6683 } 6684 6685 /** 6686 * Determines the character representation for a specific digit in 6687 * the specified radix. If the value of {@code radix} is not a 6688 * valid radix, or the value of {@code digit} is not a valid 6689 * digit in the specified radix, the null character 6690 * ({@code '\u005Cu0000'}) is returned. 6691 * <p> 6692 * The {@code radix} argument is valid if it is greater than or 6693 * equal to {@code MIN_RADIX} and less than or equal to 6694 * {@code MAX_RADIX}. The {@code digit} argument is valid if 6695 * {@code 0 <= digit < radix}. 6696 * <p> 6697 * If the digit is less than 10, then 6698 * {@code '0' + digit} is returned. Otherwise, the value 6699 * {@code 'a' + digit - 10} is returned. 6700 * 6701 * @param digit the number to convert to a character. 6702 * @param radix the radix. 6703 * @return the {@code char} representation of the specified digit 6704 * in the specified radix. 6705 * @see Character#MIN_RADIX 6706 * @see Character#MAX_RADIX 6707 * @see Character#digit(char, int) 6708 */ 6709 public static char forDigit(int digit, int radix) { 6710 if ((digit >= radix) || (digit < 0)) { 6711 return '\0'; 6712 } 6713 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6714 return '\0'; 6715 } 6716 if (digit < 10) { 6717 return (char)('0' + digit); 6718 } 6719 return (char)('a' - 10 + digit); 6720 } 6721 6722 /** 6723 * Returns the Unicode directionality property for the given 6724 * character. Character directionality is used to calculate the 6725 * visual ordering of text. The directionality value of undefined 6726 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 6727 * 6728 * <p><b>Note:</b> This method cannot handle <a 6729 * href="#supplementary"> supplementary characters</a>. To support 6730 * all Unicode characters, including supplementary characters, use 6731 * the {@link #getDirectionality(int)} method. 6732 * 6733 * @param ch {@code char} for which the directionality property 6734 * is requested. 6735 * @return the directionality property of the {@code char} value. 6736 * 6737 * @see Character#DIRECTIONALITY_UNDEFINED 6738 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6739 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6740 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6741 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6742 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6743 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6744 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6745 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6746 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6747 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6748 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6749 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6750 * @see Character#DIRECTIONALITY_WHITESPACE 6751 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6752 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6753 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6754 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6755 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6756 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6757 * @since 1.4 6758 */ 6759 public static byte getDirectionality(char ch) { 6760 return getDirectionality((int)ch); 6761 } 6762 6763 /** 6764 * Returns the Unicode directionality property for the given 6765 * character (Unicode code point). Character directionality is 6766 * used to calculate the visual ordering of text. The 6767 * directionality value of undefined character is {@link 6768 * #DIRECTIONALITY_UNDEFINED}. 6769 * 6770 * @param codePoint the character (Unicode code point) for which 6771 * the directionality property is requested. 6772 * @return the directionality property of the character. 6773 * 6774 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 6775 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 6776 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 6777 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6778 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 6779 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6780 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6781 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 6782 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6783 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 6784 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 6785 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 6786 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 6787 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 6788 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 6789 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6790 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6791 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6792 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6793 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6794 * @since 1.5 6795 */ 6796 public static byte getDirectionality(int codePoint) { 6797 return CharacterData.of(codePoint).getDirectionality(codePoint); 6798 } 6799 6800 /** 6801 * Determines whether the character is mirrored according to the 6802 * Unicode specification. Mirrored characters should have their 6803 * glyphs horizontally mirrored when displayed in text that is 6804 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 6805 * PARENTHESIS is semantically defined to be an <i>opening 6806 * parenthesis</i>. This will appear as a "(" in text that is 6807 * left-to-right but as a ")" in text that is right-to-left. 6808 * 6809 * <p><b>Note:</b> This method cannot handle <a 6810 * href="#supplementary"> supplementary characters</a>. To support 6811 * all Unicode characters, including supplementary characters, use 6812 * the {@link #isMirrored(int)} method. 6813 * 6814 * @param ch {@code char} for which the mirrored property is requested 6815 * @return {@code true} if the char is mirrored, {@code false} 6816 * if the {@code char} is not mirrored or is not defined. 6817 * @since 1.4 6818 */ 6819 public static boolean isMirrored(char ch) { 6820 return isMirrored((int)ch); 6821 } 6822 6823 /** 6824 * Determines whether the specified character (Unicode code point) 6825 * is mirrored according to the Unicode specification. Mirrored 6826 * characters should have their glyphs horizontally mirrored when 6827 * displayed in text that is right-to-left. For example, 6828 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 6829 * defined to be an <i>opening parenthesis</i>. This will appear 6830 * as a "(" in text that is left-to-right but as a ")" in text 6831 * that is right-to-left. 6832 * 6833 * @param codePoint the character (Unicode code point) to be tested. 6834 * @return {@code true} if the character is mirrored, {@code false} 6835 * if the character is not mirrored or is not defined. 6836 * @since 1.5 6837 */ 6838 public static boolean isMirrored(int codePoint) { 6839 return CharacterData.of(codePoint).isMirrored(codePoint); 6840 } 6841 6842 /** 6843 * Compares two {@code Character} objects numerically. 6844 * 6845 * @param anotherCharacter the {@code Character} to be compared. 6846 6847 * @return the value {@code 0} if the argument {@code Character} 6848 * is equal to this {@code Character}; a value less than 6849 * {@code 0} if this {@code Character} is numerically less 6850 * than the {@code Character} argument; and a value greater than 6851 * {@code 0} if this {@code Character} is numerically greater 6852 * than the {@code Character} argument (unsigned comparison). 6853 * Note that this is strictly a numerical comparison; it is not 6854 * locale-dependent. 6855 * @since 1.2 6856 */ 6857 public int compareTo(Character anotherCharacter) { 6858 return compare(this.value, anotherCharacter.value); 6859 } 6860 6861 /** 6862 * Compares two {@code char} values numerically. 6863 * The value returned is identical to what would be returned by: 6864 * <pre> 6865 * Character.valueOf(x).compareTo(Character.valueOf(y)) 6866 * </pre> 6867 * 6868 * @param x the first {@code char} to compare 6869 * @param y the second {@code char} to compare 6870 * @return the value {@code 0} if {@code x == y}; 6871 * a value less than {@code 0} if {@code x < y}; and 6872 * a value greater than {@code 0} if {@code x > y} 6873 * @since 1.7 6874 */ 6875 public static int compare(char x, char y) { 6876 return x - y; 6877 } 6878 6879 /** 6880 * Converts the character (Unicode code point) argument to uppercase using 6881 * information from the UnicodeData file. 6882 * <p> 6883 * 6884 * @param codePoint the character (Unicode code point) to be converted. 6885 * @return either the uppercase equivalent of the character, if 6886 * any, or an error flag ({@code Character.ERROR}) 6887 * that indicates that a 1:M {@code char} mapping exists. 6888 * @see Character#isLowerCase(char) 6889 * @see Character#isUpperCase(char) 6890 * @see Character#toLowerCase(char) 6891 * @see Character#toTitleCase(char) 6892 * @since 1.4 6893 */ 6894 static int toUpperCaseEx(int codePoint) { 6895 assert isValidCodePoint(codePoint); 6896 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 6897 } 6898 6899 /** 6900 * Converts the character (Unicode code point) argument to uppercase using case 6901 * mapping information from the SpecialCasing file in the Unicode 6902 * specification. If a character has no explicit uppercase 6903 * mapping, then the {@code char} itself is returned in the 6904 * {@code char[]}. 6905 * 6906 * @param codePoint the character (Unicode code point) to be converted. 6907 * @return a {@code char[]} with the uppercased character. 6908 * @since 1.4 6909 */ 6910 static char[] toUpperCaseCharArray(int codePoint) { 6911 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP. 6912 assert isBmpCodePoint(codePoint); 6913 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 6914 } 6915 6916 /** 6917 * The number of bits used to represent a <tt>char</tt> value in unsigned 6918 * binary form, constant {@code 16}. 6919 * 6920 * @since 1.5 6921 */ 6922 public static final int SIZE = 16; 6923 6924 /** 6925 * Returns the value obtained by reversing the order of the bytes in the 6926 * specified <tt>char</tt> value. 6927 * 6928 * @return the value obtained by reversing (or, equivalently, swapping) 6929 * the bytes in the specified <tt>char</tt> value. 6930 * @since 1.5 6931 */ 6932 public static char reverseBytes(char ch) { 6933 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 6934 } 6935 6936 /** 6937 * Returns the Unicode name of the specified character 6938 * {@code codePoint}, or null if the code point is 6939 * {@link #UNASSIGNED unassigned}. 6940 * <p> 6941 * Note: if the specified character is not assigned a name by 6942 * the <i>UnicodeData</i> file (part of the Unicode Character 6943 * Database maintained by the Unicode Consortium), the returned 6944 * name is the same as the result of expression 6945 * 6946 * <blockquote>{@code 6947 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 6948 * + " " 6949 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6950 * 6951 * }</blockquote> 6952 * 6953 * @param codePoint the character (Unicode code point) 6954 * 6955 * @return the Unicode name of the specified character, or null if 6956 * the code point is unassigned. 6957 * 6958 * @exception IllegalArgumentException if the specified 6959 * {@code codePoint} is not a valid Unicode 6960 * code point. 6961 * 6962 * @since 1.7 6963 */ 6964 public static String getName(int codePoint) { 6965 if (!isValidCodePoint(codePoint)) { 6966 throw new IllegalArgumentException(); 6967 } 6968 String name = CharacterName.get(codePoint); 6969 if (name != null) 6970 return name; 6971 if (getType(codePoint) == UNASSIGNED) 6972 return null; 6973 UnicodeBlock block = UnicodeBlock.of(codePoint); 6974 if (block != null) 6975 return block.toString().replace('_', ' ') + " " 6976 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6977 // should never come here 6978 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6979 } 6980 }