1 /* 2 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The <code>Character</code> class wraps a value of the primitive 35 * type <code>char</code> in an object. An object of type 36 * <code>Character</code> contains a single field whose type is 37 * <code>char</code>. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.0.0. 44 * <p> 45 * The methods and data of class <code>Character</code> are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h4><a name="unicode">Unicode Character Representations</a></h4> 58 * 59 * <p>The <code>char</code> data type (and therefore the value that a 60 * <code>Character</code> object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in <code>char</code> arrays and 76 * in the <code>String</code> and <code>StringBuffer</code> classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of <code>char</code> values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A <code>char</code> value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * <code>int</code> value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of <code>int</code> are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate <code>char</code> values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a <code>char</code> value cannot support 95 * supplementary characters. They treat <code>char</code> values from the 96 * surrogate ranges as undefined characters. For example, 97 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an <code>int</code> value support all 102 * Unicode characters, including supplementary characters. For 103 * example, <code>Character.isLetter(0x2F81A)</code> returns 104 * <code>true</code> because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * <code>char</code> values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * <code>digit</code> method, the <code>forDigit</code> 129 * method, and the <code>toString</code> method of class 130 * <code>Integer</code>. 131 * 132 * @see Character#digit(char, int) 133 * @see Character#forDigit(int, int) 134 * @see Integer#toString(int, int) 135 * @see Integer#valueOf(String) 136 */ 137 public static final int MIN_RADIX = 2; 138 139 /** 140 * The maximum radix available for conversion to and from strings. 141 * The constant value of this field is the largest value permitted 142 * for the radix argument in radix-conversion methods such as the 143 * <code>digit</code> method, the <code>forDigit</code> 144 * method, and the <code>toString</code> method of class 145 * <code>Integer</code>. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * <code>char</code>, <code>'\u0000'</code>. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * <code>char</code>, <code>'\uFFFF'</code>. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The <code>Class</code> instance representing the primitive type 172 * <code>char</code>. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined <code>char</code> 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * The minimum value of a 496 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 497 * Unicode high-surrogate code unit</a> 498 * in the UTF-16 encoding, constant <code>'\uD800'</code>. 499 * A high-surrogate is also known as a <i>leading-surrogate</i>. 500 * 501 * @since 1.5 502 */ 503 public static final char MIN_HIGH_SURROGATE = '\uD800'; 504 505 /** 506 * The maximum value of a 507 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 508 * Unicode high-surrogate code unit</a> 509 * in the UTF-16 encoding, constant <code>'\uDBFF'</code>. 510 * A high-surrogate is also known as a <i>leading-surrogate</i>. 511 * 512 * @since 1.5 513 */ 514 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 515 516 /** 517 * The minimum value of a 518 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 519 * Unicode low-surrogate code unit</a> 520 * in the UTF-16 encoding, constant <code>'\uDC00'</code>. 521 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 522 * 523 * @since 1.5 524 */ 525 public static final char MIN_LOW_SURROGATE = '\uDC00'; 526 527 /** 528 * The maximum value of a 529 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 530 * Unicode low-surrogate code unit</a> 531 * in the UTF-16 encoding, constant <code>'\uDFFF'</code>. 532 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 533 * 534 * @since 1.5 535 */ 536 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 537 538 /** 539 * The minimum value of a Unicode surrogate code unit in the 540 * UTF-16 encoding, constant <code>'\uD800'</code>. 541 * 542 * @since 1.5 543 */ 544 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 545 546 /** 547 * The maximum value of a Unicode surrogate code unit in the 548 * UTF-16 encoding, constant <code>'\uDFFF'</code>. 549 * 550 * @since 1.5 551 */ 552 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 553 554 /** 555 * The minimum value of a 556 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 557 * Unicode supplementary code point</a>, constant {@code U+10000}. 558 * 559 * @since 1.5 560 */ 561 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 562 563 /** 564 * The minimum value of a 565 * <a href="http://www.unicode.org/glossary/#code_point"> 566 * Unicode code point</a>, constant {@code U+0000}. 567 * 568 * @since 1.5 569 */ 570 public static final int MIN_CODE_POINT = 0x000000; 571 572 /** 573 * The maximum value of a 574 * <a href="http://www.unicode.org/glossary/#code_point"> 575 * Unicode code point</a>, constant {@code U+10FFFF}. 576 * 577 * @since 1.5 578 */ 579 public static final int MAX_CODE_POINT = 0X10FFFF; 580 581 582 /** 583 * Instances of this class represent particular subsets of the Unicode 584 * character set. The only family of subsets defined in the 585 * <code>Character</code> class is {@link Character.UnicodeBlock}. 586 * Other portions of the Java API may define other subsets for their 587 * own purposes. 588 * 589 * @since 1.2 590 */ 591 public static class Subset { 592 593 private String name; 594 595 /** 596 * Constructs a new <code>Subset</code> instance. 597 * 598 * @param name The name of this subset 599 * @exception NullPointerException if name is <code>null</code> 600 */ 601 protected Subset(String name) { 602 if (name == null) { 603 throw new NullPointerException("name"); 604 } 605 this.name = name; 606 } 607 608 /** 609 * Compares two <code>Subset</code> objects for equality. 610 * This method returns <code>true</code> if and only if 611 * <code>this</code> and the argument refer to the same 612 * object; since this method is <code>final</code>, this 613 * guarantee holds for all subclasses. 614 */ 615 public final boolean equals(Object obj) { 616 return (this == obj); 617 } 618 619 /** 620 * Returns the standard hash code as defined by the 621 * <code>{@link Object#hashCode}</code> method. This method 622 * is <code>final</code> in order to ensure that the 623 * <code>equals</code> and <code>hashCode</code> methods will 624 * be consistent in all subclasses. 625 */ 626 public final int hashCode() { 627 return super.hashCode(); 628 } 629 630 /** 631 * Returns the name of this subset. 632 */ 633 public final String toString() { 634 return name; 635 } 636 } 637 638 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 639 // for the latest specification of Unicode Blocks. 640 641 /** 642 * A family of character subsets representing the character blocks in the 643 * Unicode specification. Character blocks generally define characters 644 * used for a specific script or purpose. A character is contained by 645 * at most one Unicode block. 646 * 647 * @since 1.2 648 */ 649 public static final class UnicodeBlock extends Subset { 650 651 private static Map<String, UnicodeBlock> map 652 = new HashMap<>(256); 653 654 /** 655 * Creates a UnicodeBlock with the given identifier name. 656 * This name must be the same as the block identifier. 657 */ 658 private UnicodeBlock(String idName) { 659 super(idName); 660 map.put(idName, this); 661 } 662 663 /** 664 * Creates a UnicodeBlock with the given identifier name and 665 * alias name. 666 */ 667 private UnicodeBlock(String idName, String alias) { 668 this(idName); 669 map.put(alias, this); 670 } 671 672 /** 673 * Creates a UnicodeBlock with the given identifier name and 674 * alias names. 675 */ 676 private UnicodeBlock(String idName, String... aliases) { 677 this(idName); 678 for (String alias : aliases) 679 map.put(alias, this); 680 } 681 682 /** 683 * Constant for the "Basic Latin" Unicode character block. 684 * @since 1.2 685 */ 686 public static final UnicodeBlock BASIC_LATIN = 687 new UnicodeBlock("BASIC_LATIN", 688 "BASIC LATIN", 689 "BASICLATIN"); 690 691 /** 692 * Constant for the "Latin-1 Supplement" Unicode character block. 693 * @since 1.2 694 */ 695 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 696 new UnicodeBlock("LATIN_1_SUPPLEMENT", 697 "LATIN-1 SUPPLEMENT", 698 "LATIN-1SUPPLEMENT"); 699 700 /** 701 * Constant for the "Latin Extended-A" Unicode character block. 702 * @since 1.2 703 */ 704 public static final UnicodeBlock LATIN_EXTENDED_A = 705 new UnicodeBlock("LATIN_EXTENDED_A", 706 "LATIN EXTENDED-A", 707 "LATINEXTENDED-A"); 708 709 /** 710 * Constant for the "Latin Extended-B" Unicode character block. 711 * @since 1.2 712 */ 713 public static final UnicodeBlock LATIN_EXTENDED_B = 714 new UnicodeBlock("LATIN_EXTENDED_B", 715 "LATIN EXTENDED-B", 716 "LATINEXTENDED-B"); 717 718 /** 719 * Constant for the "IPA Extensions" Unicode character block. 720 * @since 1.2 721 */ 722 public static final UnicodeBlock IPA_EXTENSIONS = 723 new UnicodeBlock("IPA_EXTENSIONS", 724 "IPA EXTENSIONS", 725 "IPAEXTENSIONS"); 726 727 /** 728 * Constant for the "Spacing Modifier Letters" Unicode character block. 729 * @since 1.2 730 */ 731 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 732 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 733 "SPACING MODIFIER LETTERS", 734 "SPACINGMODIFIERLETTERS"); 735 736 /** 737 * Constant for the "Combining Diacritical Marks" Unicode character block. 738 * @since 1.2 739 */ 740 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 741 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 742 "COMBINING DIACRITICAL MARKS", 743 "COMBININGDIACRITICALMARKS"); 744 745 /** 746 * Constant for the "Greek and Coptic" Unicode character block. 747 * <p> 748 * This block was previously known as the "Greek" block. 749 * 750 * @since 1.2 751 */ 752 public static final UnicodeBlock GREEK = 753 new UnicodeBlock("GREEK", 754 "GREEK AND COPTIC", 755 "GREEKANDCOPTIC"); 756 757 /** 758 * Constant for the "Cyrillic" Unicode character block. 759 * @since 1.2 760 */ 761 public static final UnicodeBlock CYRILLIC = 762 new UnicodeBlock("CYRILLIC"); 763 764 /** 765 * Constant for the "Armenian" Unicode character block. 766 * @since 1.2 767 */ 768 public static final UnicodeBlock ARMENIAN = 769 new UnicodeBlock("ARMENIAN"); 770 771 /** 772 * Constant for the "Hebrew" Unicode character block. 773 * @since 1.2 774 */ 775 public static final UnicodeBlock HEBREW = 776 new UnicodeBlock("HEBREW"); 777 778 /** 779 * Constant for the "Arabic" Unicode character block. 780 * @since 1.2 781 */ 782 public static final UnicodeBlock ARABIC = 783 new UnicodeBlock("ARABIC"); 784 785 /** 786 * Constant for the "Devanagari" Unicode character block. 787 * @since 1.2 788 */ 789 public static final UnicodeBlock DEVANAGARI = 790 new UnicodeBlock("DEVANAGARI"); 791 792 /** 793 * Constant for the "Bengali" Unicode character block. 794 * @since 1.2 795 */ 796 public static final UnicodeBlock BENGALI = 797 new UnicodeBlock("BENGALI"); 798 799 /** 800 * Constant for the "Gurmukhi" Unicode character block. 801 * @since 1.2 802 */ 803 public static final UnicodeBlock GURMUKHI = 804 new UnicodeBlock("GURMUKHI"); 805 806 /** 807 * Constant for the "Gujarati" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock GUJARATI = 811 new UnicodeBlock("GUJARATI"); 812 813 /** 814 * Constant for the "Oriya" Unicode character block. 815 * @since 1.2 816 */ 817 public static final UnicodeBlock ORIYA = 818 new UnicodeBlock("ORIYA"); 819 820 /** 821 * Constant for the "Tamil" Unicode character block. 822 * @since 1.2 823 */ 824 public static final UnicodeBlock TAMIL = 825 new UnicodeBlock("TAMIL"); 826 827 /** 828 * Constant for the "Telugu" Unicode character block. 829 * @since 1.2 830 */ 831 public static final UnicodeBlock TELUGU = 832 new UnicodeBlock("TELUGU"); 833 834 /** 835 * Constant for the "Kannada" Unicode character block. 836 * @since 1.2 837 */ 838 public static final UnicodeBlock KANNADA = 839 new UnicodeBlock("KANNADA"); 840 841 /** 842 * Constant for the "Malayalam" Unicode character block. 843 * @since 1.2 844 */ 845 public static final UnicodeBlock MALAYALAM = 846 new UnicodeBlock("MALAYALAM"); 847 848 /** 849 * Constant for the "Thai" Unicode character block. 850 * @since 1.2 851 */ 852 public static final UnicodeBlock THAI = 853 new UnicodeBlock("THAI"); 854 855 /** 856 * Constant for the "Lao" Unicode character block. 857 * @since 1.2 858 */ 859 public static final UnicodeBlock LAO = 860 new UnicodeBlock("LAO"); 861 862 /** 863 * Constant for the "Tibetan" Unicode character block. 864 * @since 1.2 865 */ 866 public static final UnicodeBlock TIBETAN = 867 new UnicodeBlock("TIBETAN"); 868 869 /** 870 * Constant for the "Georgian" Unicode character block. 871 * @since 1.2 872 */ 873 public static final UnicodeBlock GEORGIAN = 874 new UnicodeBlock("GEORGIAN"); 875 876 /** 877 * Constant for the "Hangul Jamo" Unicode character block. 878 * @since 1.2 879 */ 880 public static final UnicodeBlock HANGUL_JAMO = 881 new UnicodeBlock("HANGUL_JAMO", 882 "HANGUL JAMO", 883 "HANGULJAMO"); 884 885 /** 886 * Constant for the "Latin Extended Additional" Unicode character block. 887 * @since 1.2 888 */ 889 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 890 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 891 "LATIN EXTENDED ADDITIONAL", 892 "LATINEXTENDEDADDITIONAL"); 893 894 /** 895 * Constant for the "Greek Extended" Unicode character block. 896 * @since 1.2 897 */ 898 public static final UnicodeBlock GREEK_EXTENDED = 899 new UnicodeBlock("GREEK_EXTENDED", 900 "GREEK EXTENDED", 901 "GREEKEXTENDED"); 902 903 /** 904 * Constant for the "General Punctuation" Unicode character block. 905 * @since 1.2 906 */ 907 public static final UnicodeBlock GENERAL_PUNCTUATION = 908 new UnicodeBlock("GENERAL_PUNCTUATION", 909 "GENERAL PUNCTUATION", 910 "GENERALPUNCTUATION"); 911 912 /** 913 * Constant for the "Superscripts and Subscripts" Unicode character 914 * block. 915 * @since 1.2 916 */ 917 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 918 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 919 "SUPERSCRIPTS AND SUBSCRIPTS", 920 "SUPERSCRIPTSANDSUBSCRIPTS"); 921 922 /** 923 * Constant for the "Currency Symbols" Unicode character block. 924 * @since 1.2 925 */ 926 public static final UnicodeBlock CURRENCY_SYMBOLS = 927 new UnicodeBlock("CURRENCY_SYMBOLS", 928 "CURRENCY SYMBOLS", 929 "CURRENCYSYMBOLS"); 930 931 /** 932 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 933 * character block. 934 * <p> 935 * This block was previously known as "Combining Marks for Symbols". 936 * @since 1.2 937 */ 938 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 939 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 940 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 941 "COMBININGDIACRITICALMARKSFORSYMBOLS", 942 "COMBINING MARKS FOR SYMBOLS", 943 "COMBININGMARKSFORSYMBOLS"); 944 945 /** 946 * Constant for the "Letterlike Symbols" Unicode character block. 947 * @since 1.2 948 */ 949 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 950 new UnicodeBlock("LETTERLIKE_SYMBOLS", 951 "LETTERLIKE SYMBOLS", 952 "LETTERLIKESYMBOLS"); 953 954 /** 955 * Constant for the "Number Forms" Unicode character block. 956 * @since 1.2 957 */ 958 public static final UnicodeBlock NUMBER_FORMS = 959 new UnicodeBlock("NUMBER_FORMS", 960 "NUMBER FORMS", 961 "NUMBERFORMS"); 962 963 /** 964 * Constant for the "Arrows" Unicode character block. 965 * @since 1.2 966 */ 967 public static final UnicodeBlock ARROWS = 968 new UnicodeBlock("ARROWS"); 969 970 /** 971 * Constant for the "Mathematical Operators" Unicode character block. 972 * @since 1.2 973 */ 974 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 975 new UnicodeBlock("MATHEMATICAL_OPERATORS", 976 "MATHEMATICAL OPERATORS", 977 "MATHEMATICALOPERATORS"); 978 979 /** 980 * Constant for the "Miscellaneous Technical" Unicode character block. 981 * @since 1.2 982 */ 983 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 984 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 985 "MISCELLANEOUS TECHNICAL", 986 "MISCELLANEOUSTECHNICAL"); 987 988 /** 989 * Constant for the "Control Pictures" Unicode character block. 990 * @since 1.2 991 */ 992 public static final UnicodeBlock CONTROL_PICTURES = 993 new UnicodeBlock("CONTROL_PICTURES", 994 "CONTROL PICTURES", 995 "CONTROLPICTURES"); 996 997 /** 998 * Constant for the "Optical Character Recognition" Unicode character block. 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1002 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1003 "OPTICAL CHARACTER RECOGNITION", 1004 "OPTICALCHARACTERRECOGNITION"); 1005 1006 /** 1007 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1008 * @since 1.2 1009 */ 1010 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1011 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1012 "ENCLOSED ALPHANUMERICS", 1013 "ENCLOSEDALPHANUMERICS"); 1014 1015 /** 1016 * Constant for the "Box Drawing" Unicode character block. 1017 * @since 1.2 1018 */ 1019 public static final UnicodeBlock BOX_DRAWING = 1020 new UnicodeBlock("BOX_DRAWING", 1021 "BOX DRAWING", 1022 "BOXDRAWING"); 1023 1024 /** 1025 * Constant for the "Block Elements" Unicode character block. 1026 * @since 1.2 1027 */ 1028 public static final UnicodeBlock BLOCK_ELEMENTS = 1029 new UnicodeBlock("BLOCK_ELEMENTS", 1030 "BLOCK ELEMENTS", 1031 "BLOCKELEMENTS"); 1032 1033 /** 1034 * Constant for the "Geometric Shapes" Unicode character block. 1035 * @since 1.2 1036 */ 1037 public static final UnicodeBlock GEOMETRIC_SHAPES = 1038 new UnicodeBlock("GEOMETRIC_SHAPES", 1039 "GEOMETRIC SHAPES", 1040 "GEOMETRICSHAPES"); 1041 1042 /** 1043 * Constant for the "Miscellaneous Symbols" Unicode character block. 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1047 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1048 "MISCELLANEOUS SYMBOLS", 1049 "MISCELLANEOUSSYMBOLS"); 1050 1051 /** 1052 * Constant for the "Dingbats" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock DINGBATS = 1056 new UnicodeBlock("DINGBATS"); 1057 1058 /** 1059 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1060 * @since 1.2 1061 */ 1062 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1063 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1064 "CJK SYMBOLS AND PUNCTUATION", 1065 "CJKSYMBOLSANDPUNCTUATION"); 1066 1067 /** 1068 * Constant for the "Hiragana" Unicode character block. 1069 * @since 1.2 1070 */ 1071 public static final UnicodeBlock HIRAGANA = 1072 new UnicodeBlock("HIRAGANA"); 1073 1074 /** 1075 * Constant for the "Katakana" Unicode character block. 1076 * @since 1.2 1077 */ 1078 public static final UnicodeBlock KATAKANA = 1079 new UnicodeBlock("KATAKANA"); 1080 1081 /** 1082 * Constant for the "Bopomofo" Unicode character block. 1083 * @since 1.2 1084 */ 1085 public static final UnicodeBlock BOPOMOFO = 1086 new UnicodeBlock("BOPOMOFO"); 1087 1088 /** 1089 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1090 * @since 1.2 1091 */ 1092 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1093 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1094 "HANGUL COMPATIBILITY JAMO", 1095 "HANGULCOMPATIBILITYJAMO"); 1096 1097 /** 1098 * Constant for the "Kanbun" Unicode character block. 1099 * @since 1.2 1100 */ 1101 public static final UnicodeBlock KANBUN = 1102 new UnicodeBlock("KANBUN"); 1103 1104 /** 1105 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1106 * @since 1.2 1107 */ 1108 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1109 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1110 "ENCLOSED CJK LETTERS AND MONTHS", 1111 "ENCLOSEDCJKLETTERSANDMONTHS"); 1112 1113 /** 1114 * Constant for the "CJK Compatibility" Unicode character block. 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock CJK_COMPATIBILITY = 1118 new UnicodeBlock("CJK_COMPATIBILITY", 1119 "CJK COMPATIBILITY", 1120 "CJKCOMPATIBILITY"); 1121 1122 /** 1123 * Constant for the "CJK Unified Ideographs" Unicode character block. 1124 * @since 1.2 1125 */ 1126 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1127 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1128 "CJK UNIFIED IDEOGRAPHS", 1129 "CJKUNIFIEDIDEOGRAPHS"); 1130 1131 /** 1132 * Constant for the "Hangul Syllables" Unicode character block. 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock HANGUL_SYLLABLES = 1136 new UnicodeBlock("HANGUL_SYLLABLES", 1137 "HANGUL SYLLABLES", 1138 "HANGULSYLLABLES"); 1139 1140 /** 1141 * Constant for the "Private Use Area" Unicode character block. 1142 * @since 1.2 1143 */ 1144 public static final UnicodeBlock PRIVATE_USE_AREA = 1145 new UnicodeBlock("PRIVATE_USE_AREA", 1146 "PRIVATE USE AREA", 1147 "PRIVATEUSEAREA"); 1148 1149 /** 1150 * Constant for the "CJK Compatibility Ideographs" Unicode character 1151 * block. 1152 * @since 1.2 1153 */ 1154 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1155 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1156 "CJK COMPATIBILITY IDEOGRAPHS", 1157 "CJKCOMPATIBILITYIDEOGRAPHS"); 1158 1159 /** 1160 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1164 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1165 "ALPHABETIC PRESENTATION FORMS", 1166 "ALPHABETICPRESENTATIONFORMS"); 1167 1168 /** 1169 * Constant for the "Arabic Presentation Forms-A" Unicode character 1170 * block. 1171 * @since 1.2 1172 */ 1173 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1174 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1175 "ARABIC PRESENTATION FORMS-A", 1176 "ARABICPRESENTATIONFORMS-A"); 1177 1178 /** 1179 * Constant for the "Combining Half Marks" Unicode character block. 1180 * @since 1.2 1181 */ 1182 public static final UnicodeBlock COMBINING_HALF_MARKS = 1183 new UnicodeBlock("COMBINING_HALF_MARKS", 1184 "COMBINING HALF MARKS", 1185 "COMBININGHALFMARKS"); 1186 1187 /** 1188 * Constant for the "CJK Compatibility Forms" Unicode character block. 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1192 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1193 "CJK COMPATIBILITY FORMS", 1194 "CJKCOMPATIBILITYFORMS"); 1195 1196 /** 1197 * Constant for the "Small Form Variants" Unicode character block. 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1201 new UnicodeBlock("SMALL_FORM_VARIANTS", 1202 "SMALL FORM VARIANTS", 1203 "SMALLFORMVARIANTS"); 1204 1205 /** 1206 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1210 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1211 "ARABIC PRESENTATION FORMS-B", 1212 "ARABICPRESENTATIONFORMS-B"); 1213 1214 /** 1215 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1216 * block. 1217 * @since 1.2 1218 */ 1219 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1220 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1221 "HALFWIDTH AND FULLWIDTH FORMS", 1222 "HALFWIDTHANDFULLWIDTHFORMS"); 1223 1224 /** 1225 * Constant for the "Specials" Unicode character block. 1226 * @since 1.2 1227 */ 1228 public static final UnicodeBlock SPECIALS = 1229 new UnicodeBlock("SPECIALS"); 1230 1231 /** 1232 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1233 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1234 * {@link #LOW_SURROGATES}. These new constants match 1235 * the block definitions of the Unicode Standard. 1236 * The {@link #of(char)} and {@link #of(int)} methods 1237 * return the new constants, not SURROGATES_AREA. 1238 */ 1239 @Deprecated 1240 public static final UnicodeBlock SURROGATES_AREA = 1241 new UnicodeBlock("SURROGATES_AREA"); 1242 1243 /** 1244 * Constant for the "Syriac" Unicode character block. 1245 * @since 1.4 1246 */ 1247 public static final UnicodeBlock SYRIAC = 1248 new UnicodeBlock("SYRIAC"); 1249 1250 /** 1251 * Constant for the "Thaana" Unicode character block. 1252 * @since 1.4 1253 */ 1254 public static final UnicodeBlock THAANA = 1255 new UnicodeBlock("THAANA"); 1256 1257 /** 1258 * Constant for the "Sinhala" Unicode character block. 1259 * @since 1.4 1260 */ 1261 public static final UnicodeBlock SINHALA = 1262 new UnicodeBlock("SINHALA"); 1263 1264 /** 1265 * Constant for the "Myanmar" Unicode character block. 1266 * @since 1.4 1267 */ 1268 public static final UnicodeBlock MYANMAR = 1269 new UnicodeBlock("MYANMAR"); 1270 1271 /** 1272 * Constant for the "Ethiopic" Unicode character block. 1273 * @since 1.4 1274 */ 1275 public static final UnicodeBlock ETHIOPIC = 1276 new UnicodeBlock("ETHIOPIC"); 1277 1278 /** 1279 * Constant for the "Cherokee" Unicode character block. 1280 * @since 1.4 1281 */ 1282 public static final UnicodeBlock CHEROKEE = 1283 new UnicodeBlock("CHEROKEE"); 1284 1285 /** 1286 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1287 * @since 1.4 1288 */ 1289 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1290 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1291 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1292 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1293 1294 /** 1295 * Constant for the "Ogham" Unicode character block. 1296 * @since 1.4 1297 */ 1298 public static final UnicodeBlock OGHAM = 1299 new UnicodeBlock("OGHAM"); 1300 1301 /** 1302 * Constant for the "Runic" Unicode character block. 1303 * @since 1.4 1304 */ 1305 public static final UnicodeBlock RUNIC = 1306 new UnicodeBlock("RUNIC"); 1307 1308 /** 1309 * Constant for the "Khmer" Unicode character block. 1310 * @since 1.4 1311 */ 1312 public static final UnicodeBlock KHMER = 1313 new UnicodeBlock("KHMER"); 1314 1315 /** 1316 * Constant for the "Mongolian" Unicode character block. 1317 * @since 1.4 1318 */ 1319 public static final UnicodeBlock MONGOLIAN = 1320 new UnicodeBlock("MONGOLIAN"); 1321 1322 /** 1323 * Constant for the "Braille Patterns" Unicode character block. 1324 * @since 1.4 1325 */ 1326 public static final UnicodeBlock BRAILLE_PATTERNS = 1327 new UnicodeBlock("BRAILLE_PATTERNS", 1328 "BRAILLE PATTERNS", 1329 "BRAILLEPATTERNS"); 1330 1331 /** 1332 * Constant for the "CJK Radicals Supplement" Unicode character block. 1333 * @since 1.4 1334 */ 1335 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1336 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1337 "CJK RADICALS SUPPLEMENT", 1338 "CJKRADICALSSUPPLEMENT"); 1339 1340 /** 1341 * Constant for the "Kangxi Radicals" Unicode character block. 1342 * @since 1.4 1343 */ 1344 public static final UnicodeBlock KANGXI_RADICALS = 1345 new UnicodeBlock("KANGXI_RADICALS", 1346 "KANGXI RADICALS", 1347 "KANGXIRADICALS"); 1348 1349 /** 1350 * Constant for the "Ideographic Description Characters" Unicode character block. 1351 * @since 1.4 1352 */ 1353 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1354 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1355 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1356 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1357 1358 /** 1359 * Constant for the "Bopomofo Extended" Unicode character block. 1360 * @since 1.4 1361 */ 1362 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1363 new UnicodeBlock("BOPOMOFO_EXTENDED", 1364 "BOPOMOFO EXTENDED", 1365 "BOPOMOFOEXTENDED"); 1366 1367 /** 1368 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1369 * @since 1.4 1370 */ 1371 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1372 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1373 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1374 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1375 1376 /** 1377 * Constant for the "Yi Syllables" Unicode character block. 1378 * @since 1.4 1379 */ 1380 public static final UnicodeBlock YI_SYLLABLES = 1381 new UnicodeBlock("YI_SYLLABLES", 1382 "YI SYLLABLES", 1383 "YISYLLABLES"); 1384 1385 /** 1386 * Constant for the "Yi Radicals" Unicode character block. 1387 * @since 1.4 1388 */ 1389 public static final UnicodeBlock YI_RADICALS = 1390 new UnicodeBlock("YI_RADICALS", 1391 "YI RADICALS", 1392 "YIRADICALS"); 1393 1394 /** 1395 * Constant for the "Cyrillic Supplementary" Unicode character block. 1396 * @since 1.5 1397 */ 1398 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1399 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1400 "CYRILLIC SUPPLEMENTARY", 1401 "CYRILLICSUPPLEMENTARY", 1402 "CYRILLIC SUPPLEMENT", 1403 "CYRILLICSUPPLEMENT"); 1404 1405 /** 1406 * Constant for the "Tagalog" Unicode character block. 1407 * @since 1.5 1408 */ 1409 public static final UnicodeBlock TAGALOG = 1410 new UnicodeBlock("TAGALOG"); 1411 1412 /** 1413 * Constant for the "Hanunoo" Unicode character block. 1414 * @since 1.5 1415 */ 1416 public static final UnicodeBlock HANUNOO = 1417 new UnicodeBlock("HANUNOO"); 1418 1419 /** 1420 * Constant for the "Buhid" Unicode character block. 1421 * @since 1.5 1422 */ 1423 public static final UnicodeBlock BUHID = 1424 new UnicodeBlock("BUHID"); 1425 1426 /** 1427 * Constant for the "Tagbanwa" Unicode character block. 1428 * @since 1.5 1429 */ 1430 public static final UnicodeBlock TAGBANWA = 1431 new UnicodeBlock("TAGBANWA"); 1432 1433 /** 1434 * Constant for the "Limbu" Unicode character block. 1435 * @since 1.5 1436 */ 1437 public static final UnicodeBlock LIMBU = 1438 new UnicodeBlock("LIMBU"); 1439 1440 /** 1441 * Constant for the "Tai Le" Unicode character block. 1442 * @since 1.5 1443 */ 1444 public static final UnicodeBlock TAI_LE = 1445 new UnicodeBlock("TAI_LE", 1446 "TAI LE", 1447 "TAILE"); 1448 1449 /** 1450 * Constant for the "Khmer Symbols" Unicode character block. 1451 * @since 1.5 1452 */ 1453 public static final UnicodeBlock KHMER_SYMBOLS = 1454 new UnicodeBlock("KHMER_SYMBOLS", 1455 "KHMER SYMBOLS", 1456 "KHMERSYMBOLS"); 1457 1458 /** 1459 * Constant for the "Phonetic Extensions" Unicode character block. 1460 * @since 1.5 1461 */ 1462 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1463 new UnicodeBlock("PHONETIC_EXTENSIONS", 1464 "PHONETIC EXTENSIONS", 1465 "PHONETICEXTENSIONS"); 1466 1467 /** 1468 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1469 * @since 1.5 1470 */ 1471 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1472 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1473 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1474 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1475 1476 /** 1477 * Constant for the "Supplemental Arrows-A" Unicode character block. 1478 * @since 1.5 1479 */ 1480 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1481 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1482 "SUPPLEMENTAL ARROWS-A", 1483 "SUPPLEMENTALARROWS-A"); 1484 1485 /** 1486 * Constant for the "Supplemental Arrows-B" Unicode character block. 1487 * @since 1.5 1488 */ 1489 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1490 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1491 "SUPPLEMENTAL ARROWS-B", 1492 "SUPPLEMENTALARROWS-B"); 1493 1494 /** 1495 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1496 * character block. 1497 * @since 1.5 1498 */ 1499 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1500 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1501 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1502 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1503 1504 /** 1505 * Constant for the "Supplemental Mathematical Operators" Unicode 1506 * character block. 1507 * @since 1.5 1508 */ 1509 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1510 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1511 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1512 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1513 1514 /** 1515 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1516 * block. 1517 * @since 1.5 1518 */ 1519 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1520 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1521 "MISCELLANEOUS SYMBOLS AND ARROWS", 1522 "MISCELLANEOUSSYMBOLSANDARROWS"); 1523 1524 /** 1525 * Constant for the "Katakana Phonetic Extensions" Unicode character 1526 * block. 1527 * @since 1.5 1528 */ 1529 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1530 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1531 "KATAKANA PHONETIC EXTENSIONS", 1532 "KATAKANAPHONETICEXTENSIONS"); 1533 1534 /** 1535 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1536 * @since 1.5 1537 */ 1538 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1539 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1540 "YIJING HEXAGRAM SYMBOLS", 1541 "YIJINGHEXAGRAMSYMBOLS"); 1542 1543 /** 1544 * Constant for the "Variation Selectors" Unicode character block. 1545 * @since 1.5 1546 */ 1547 public static final UnicodeBlock VARIATION_SELECTORS = 1548 new UnicodeBlock("VARIATION_SELECTORS", 1549 "VARIATION SELECTORS", 1550 "VARIATIONSELECTORS"); 1551 1552 /** 1553 * Constant for the "Linear B Syllabary" Unicode character block. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1557 new UnicodeBlock("LINEAR_B_SYLLABARY", 1558 "LINEAR B SYLLABARY", 1559 "LINEARBSYLLABARY"); 1560 1561 /** 1562 * Constant for the "Linear B Ideograms" Unicode character block. 1563 * @since 1.5 1564 */ 1565 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1566 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1567 "LINEAR B IDEOGRAMS", 1568 "LINEARBIDEOGRAMS"); 1569 1570 /** 1571 * Constant for the "Aegean Numbers" Unicode character block. 1572 * @since 1.5 1573 */ 1574 public static final UnicodeBlock AEGEAN_NUMBERS = 1575 new UnicodeBlock("AEGEAN_NUMBERS", 1576 "AEGEAN NUMBERS", 1577 "AEGEANNUMBERS"); 1578 1579 /** 1580 * Constant for the "Old Italic" Unicode character block. 1581 * @since 1.5 1582 */ 1583 public static final UnicodeBlock OLD_ITALIC = 1584 new UnicodeBlock("OLD_ITALIC", 1585 "OLD ITALIC", 1586 "OLDITALIC"); 1587 1588 /** 1589 * Constant for the "Gothic" Unicode character block. 1590 * @since 1.5 1591 */ 1592 public static final UnicodeBlock GOTHIC = 1593 new UnicodeBlock("GOTHIC"); 1594 1595 /** 1596 * Constant for the "Ugaritic" Unicode character block. 1597 * @since 1.5 1598 */ 1599 public static final UnicodeBlock UGARITIC = 1600 new UnicodeBlock("UGARITIC"); 1601 1602 /** 1603 * Constant for the "Deseret" Unicode character block. 1604 * @since 1.5 1605 */ 1606 public static final UnicodeBlock DESERET = 1607 new UnicodeBlock("DESERET"); 1608 1609 /** 1610 * Constant for the "Shavian" Unicode character block. 1611 * @since 1.5 1612 */ 1613 public static final UnicodeBlock SHAVIAN = 1614 new UnicodeBlock("SHAVIAN"); 1615 1616 /** 1617 * Constant for the "Osmanya" Unicode character block. 1618 * @since 1.5 1619 */ 1620 public static final UnicodeBlock OSMANYA = 1621 new UnicodeBlock("OSMANYA"); 1622 1623 /** 1624 * Constant for the "Cypriot Syllabary" Unicode character block. 1625 * @since 1.5 1626 */ 1627 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1628 new UnicodeBlock("CYPRIOT_SYLLABARY", 1629 "CYPRIOT SYLLABARY", 1630 "CYPRIOTSYLLABARY"); 1631 1632 /** 1633 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1634 * @since 1.5 1635 */ 1636 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1637 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1638 "BYZANTINE MUSICAL SYMBOLS", 1639 "BYZANTINEMUSICALSYMBOLS"); 1640 1641 /** 1642 * Constant for the "Musical Symbols" Unicode character block. 1643 * @since 1.5 1644 */ 1645 public static final UnicodeBlock MUSICAL_SYMBOLS = 1646 new UnicodeBlock("MUSICAL_SYMBOLS", 1647 "MUSICAL SYMBOLS", 1648 "MUSICALSYMBOLS"); 1649 1650 /** 1651 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1655 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1656 "TAI XUAN JING SYMBOLS", 1657 "TAIXUANJINGSYMBOLS"); 1658 1659 /** 1660 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1661 * character block. 1662 * @since 1.5 1663 */ 1664 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1665 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1666 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1667 "MATHEMATICALALPHANUMERICSYMBOLS"); 1668 1669 /** 1670 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1671 * character block. 1672 * @since 1.5 1673 */ 1674 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1675 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1676 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1677 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1678 1679 /** 1680 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1681 * @since 1.5 1682 */ 1683 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1684 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1685 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1686 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1687 1688 /** 1689 * Constant for the "Tags" Unicode character block. 1690 * @since 1.5 1691 */ 1692 public static final UnicodeBlock TAGS = 1693 new UnicodeBlock("TAGS"); 1694 1695 /** 1696 * Constant for the "Variation Selectors Supplement" Unicode character 1697 * block. 1698 * @since 1.5 1699 */ 1700 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1701 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1702 "VARIATION SELECTORS SUPPLEMENT", 1703 "VARIATIONSELECTORSSUPPLEMENT"); 1704 1705 /** 1706 * Constant for the "Supplementary Private Use Area-A" Unicode character 1707 * block. 1708 * @since 1.5 1709 */ 1710 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1711 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1712 "SUPPLEMENTARY PRIVATE USE AREA-A", 1713 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1714 1715 /** 1716 * Constant for the "Supplementary Private Use Area-B" Unicode character 1717 * block. 1718 * @since 1.5 1719 */ 1720 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1721 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1722 "SUPPLEMENTARY PRIVATE USE AREA-B", 1723 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1724 1725 /** 1726 * Constant for the "High Surrogates" Unicode character block. 1727 * This block represents codepoint values in the high surrogate 1728 * range: U+D800 through U+DB7F 1729 * 1730 * @since 1.5 1731 */ 1732 public static final UnicodeBlock HIGH_SURROGATES = 1733 new UnicodeBlock("HIGH_SURROGATES", 1734 "HIGH SURROGATES", 1735 "HIGHSURROGATES"); 1736 1737 /** 1738 * Constant for the "High Private Use Surrogates" Unicode character 1739 * block. 1740 * This block represents codepoint values in the private use high 1741 * surrogate range: U+DB80 through U+DBFF 1742 * 1743 * @since 1.5 1744 */ 1745 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1746 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1747 "HIGH PRIVATE USE SURROGATES", 1748 "HIGHPRIVATEUSESURROGATES"); 1749 1750 /** 1751 * Constant for the "Low Surrogates" Unicode character block. 1752 * This block represents codepoint values in the low surrogate 1753 * range: U+DC00 through U+DFFF 1754 * 1755 * @since 1.5 1756 */ 1757 public static final UnicodeBlock LOW_SURROGATES = 1758 new UnicodeBlock("LOW_SURROGATES", 1759 "LOW SURROGATES", 1760 "LOWSURROGATES"); 1761 1762 /** 1763 * Constant for the "Arabic Supplement" Unicode character block. 1764 * @since 1.7 1765 */ 1766 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1767 new UnicodeBlock("ARABIC_SUPPLEMENT", 1768 "ARABIC SUPPLEMENT", 1769 "ARABICSUPPLEMENT"); 1770 1771 /** 1772 * Constant for the "NKo" Unicode character block. 1773 * @since 1.7 1774 */ 1775 public static final UnicodeBlock NKO = 1776 new UnicodeBlock("NKO"); 1777 1778 /** 1779 * Constant for the "Samaritan" Unicode character block. 1780 * @since 1.7 1781 */ 1782 public static final UnicodeBlock SAMARITAN = 1783 new UnicodeBlock("SAMARITAN"); 1784 1785 /** 1786 * Constant for the "Mandaic" Unicode character block. 1787 * @since 1.7 1788 */ 1789 public static final UnicodeBlock MANDAIC = 1790 new UnicodeBlock("MANDAIC"); 1791 1792 /** 1793 * Constant for the "Ethiopic Supplement" Unicode character block. 1794 * @since 1.7 1795 */ 1796 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1797 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1798 "ETHIOPIC SUPPLEMENT", 1799 "ETHIOPICSUPPLEMENT"); 1800 1801 /** 1802 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1803 * Unicode character block. 1804 * @since 1.7 1805 */ 1806 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1807 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1808 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1809 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1810 1811 /** 1812 * Constant for the "New Tai Lue" Unicode character block. 1813 * @since 1.7 1814 */ 1815 public static final UnicodeBlock NEW_TAI_LUE = 1816 new UnicodeBlock("NEW_TAI_LUE", 1817 "NEW TAI LUE", 1818 "NEWTAILUE"); 1819 1820 /** 1821 * Constant for the "Buginese" Unicode character block. 1822 * @since 1.7 1823 */ 1824 public static final UnicodeBlock BUGINESE = 1825 new UnicodeBlock("BUGINESE"); 1826 1827 /** 1828 * Constant for the "Tai Tham" Unicode character block. 1829 * @since 1.7 1830 */ 1831 public static final UnicodeBlock TAI_THAM = 1832 new UnicodeBlock("TAI_THAM", 1833 "TAI THAM", 1834 "TAITHAM"); 1835 1836 /** 1837 * Constant for the "Balinese" Unicode character block. 1838 * @since 1.7 1839 */ 1840 public static final UnicodeBlock BALINESE = 1841 new UnicodeBlock("BALINESE"); 1842 1843 /** 1844 * Constant for the "Sundanese" Unicode character block. 1845 * @since 1.7 1846 */ 1847 public static final UnicodeBlock SUNDANESE = 1848 new UnicodeBlock("SUNDANESE"); 1849 1850 /** 1851 * Constant for the "Batak" Unicode character block. 1852 * @since 1.7 1853 */ 1854 public static final UnicodeBlock BATAK = 1855 new UnicodeBlock("BATAK"); 1856 1857 /** 1858 * Constant for the "Lepcha" Unicode character block. 1859 * @since 1.7 1860 */ 1861 public static final UnicodeBlock LEPCHA = 1862 new UnicodeBlock("LEPCHA"); 1863 1864 /** 1865 * Constant for the "Ol Chiki" Unicode character block. 1866 * @since 1.7 1867 */ 1868 public static final UnicodeBlock OL_CHIKI = 1869 new UnicodeBlock("OL_CHIKI", 1870 "OL CHIKI", 1871 "OLCHIKI"); 1872 1873 /** 1874 * Constant for the "Vedic Extensions" Unicode character block. 1875 * @since 1.7 1876 */ 1877 public static final UnicodeBlock VEDIC_EXTENSIONS = 1878 new UnicodeBlock("VEDIC_EXTENSIONS", 1879 "VEDIC EXTENSIONS", 1880 "VEDICEXTENSIONS"); 1881 1882 /** 1883 * Constant for the "Phonetic Extensions Supplement" Unicode character 1884 * block. 1885 * @since 1.7 1886 */ 1887 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1888 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1889 "PHONETIC EXTENSIONS SUPPLEMENT", 1890 "PHONETICEXTENSIONSSUPPLEMENT"); 1891 1892 /** 1893 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1894 * character block. 1895 * @since 1.7 1896 */ 1897 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1898 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1899 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1900 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1901 1902 /** 1903 * Constant for the "Glagolitic" Unicode character block. 1904 * @since 1.7 1905 */ 1906 public static final UnicodeBlock GLAGOLITIC = 1907 new UnicodeBlock("GLAGOLITIC"); 1908 1909 /** 1910 * Constant for the "Latin Extended-C" Unicode character block. 1911 * @since 1.7 1912 */ 1913 public static final UnicodeBlock LATIN_EXTENDED_C = 1914 new UnicodeBlock("LATIN_EXTENDED_C", 1915 "LATIN EXTENDED-C", 1916 "LATINEXTENDED-C"); 1917 1918 /** 1919 * Constant for the "Coptic" Unicode character block. 1920 * @since 1.7 1921 */ 1922 public static final UnicodeBlock COPTIC = 1923 new UnicodeBlock("COPTIC"); 1924 1925 /** 1926 * Constant for the "Georgian Supplement" Unicode character block. 1927 * @since 1.7 1928 */ 1929 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1930 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1931 "GEORGIAN SUPPLEMENT", 1932 "GEORGIANSUPPLEMENT"); 1933 1934 /** 1935 * Constant for the "Tifinagh" Unicode character block. 1936 * @since 1.7 1937 */ 1938 public static final UnicodeBlock TIFINAGH = 1939 new UnicodeBlock("TIFINAGH"); 1940 1941 /** 1942 * Constant for the "Ethiopic Extended" Unicode character block. 1943 * @since 1.7 1944 */ 1945 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1946 new UnicodeBlock("ETHIOPIC_EXTENDED", 1947 "ETHIOPIC EXTENDED", 1948 "ETHIOPICEXTENDED"); 1949 1950 /** 1951 * Constant for the "Cyrillic Extended-A" Unicode character block. 1952 * @since 1.7 1953 */ 1954 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1955 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1956 "CYRILLIC EXTENDED-A", 1957 "CYRILLICEXTENDED-A"); 1958 1959 /** 1960 * Constant for the "Supplemental Punctuation" Unicode character block. 1961 * @since 1.7 1962 */ 1963 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1964 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1965 "SUPPLEMENTAL PUNCTUATION", 1966 "SUPPLEMENTALPUNCTUATION"); 1967 1968 /** 1969 * Constant for the "CJK Strokes" Unicode character block. 1970 * @since 1.7 1971 */ 1972 public static final UnicodeBlock CJK_STROKES = 1973 new UnicodeBlock("CJK_STROKES", 1974 "CJK STROKES", 1975 "CJKSTROKES"); 1976 1977 /** 1978 * Constant for the "Lisu" Unicode character block. 1979 * @since 1.7 1980 */ 1981 public static final UnicodeBlock LISU = 1982 new UnicodeBlock("LISU"); 1983 1984 /** 1985 * Constant for the "Vai" Unicode character block. 1986 * @since 1.7 1987 */ 1988 public static final UnicodeBlock VAI = 1989 new UnicodeBlock("VAI"); 1990 1991 /** 1992 * Constant for the "Cyrillic Extended-B" Unicode character block. 1993 * @since 1.7 1994 */ 1995 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1996 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1997 "CYRILLIC EXTENDED-B", 1998 "CYRILLICEXTENDED-B"); 1999 2000 /** 2001 * Constant for the "Bamum" Unicode character block. 2002 * @since 1.7 2003 */ 2004 public static final UnicodeBlock BAMUM = 2005 new UnicodeBlock("BAMUM"); 2006 2007 /** 2008 * Constant for the "Modifier Tone Letters" Unicode character block. 2009 * @since 1.7 2010 */ 2011 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2012 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2013 "MODIFIER TONE LETTERS", 2014 "MODIFIERTONELETTERS"); 2015 2016 /** 2017 * Constant for the "Latin Extended-D" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock LATIN_EXTENDED_D = 2021 new UnicodeBlock("LATIN_EXTENDED_D", 2022 "LATIN EXTENDED-D", 2023 "LATINEXTENDED-D"); 2024 2025 /** 2026 * Constant for the "Syloti Nagri" Unicode character block. 2027 * @since 1.7 2028 */ 2029 public static final UnicodeBlock SYLOTI_NAGRI = 2030 new UnicodeBlock("SYLOTI_NAGRI", 2031 "SYLOTI NAGRI", 2032 "SYLOTINAGRI"); 2033 2034 /** 2035 * Constant for the "Common Indic Number Forms" Unicode character block. 2036 * @since 1.7 2037 */ 2038 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2039 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2040 "COMMON INDIC NUMBER FORMS", 2041 "COMMONINDICNUMBERFORMS"); 2042 2043 /** 2044 * Constant for the "Phags-pa" Unicode character block. 2045 * @since 1.7 2046 */ 2047 public static final UnicodeBlock PHAGS_PA = 2048 new UnicodeBlock("PHAGS_PA", 2049 "PHAGS-PA"); 2050 2051 /** 2052 * Constant for the "Saurashtra" Unicode character block. 2053 * @since 1.7 2054 */ 2055 public static final UnicodeBlock SAURASHTRA = 2056 new UnicodeBlock("SAURASHTRA"); 2057 2058 /** 2059 * Constant for the "Devanagari Extended" Unicode character block. 2060 * @since 1.7 2061 */ 2062 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2063 new UnicodeBlock("DEVANAGARI_EXTENDED", 2064 "DEVANAGARI EXTENDED", 2065 "DEVANAGARIEXTENDED"); 2066 2067 /** 2068 * Constant for the "Kayah Li" Unicode character block. 2069 * @since 1.7 2070 */ 2071 public static final UnicodeBlock KAYAH_LI = 2072 new UnicodeBlock("KAYAH_LI", 2073 "KAYAH LI", 2074 "KAYAHLI"); 2075 2076 /** 2077 * Constant for the "Rejang" Unicode character block. 2078 * @since 1.7 2079 */ 2080 public static final UnicodeBlock REJANG = 2081 new UnicodeBlock("REJANG"); 2082 2083 /** 2084 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2085 * @since 1.7 2086 */ 2087 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2088 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2089 "HANGUL JAMO EXTENDED-A", 2090 "HANGULJAMOEXTENDED-A"); 2091 2092 /** 2093 * Constant for the "Javanese" Unicode character block. 2094 * @since 1.7 2095 */ 2096 public static final UnicodeBlock JAVANESE = 2097 new UnicodeBlock("JAVANESE"); 2098 2099 /** 2100 * Constant for the "Cham" Unicode character block. 2101 * @since 1.7 2102 */ 2103 public static final UnicodeBlock CHAM = 2104 new UnicodeBlock("CHAM"); 2105 2106 /** 2107 * Constant for the "Myanmar Extended-A" Unicode character block. 2108 * @since 1.7 2109 */ 2110 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2111 new UnicodeBlock("MYANMAR_EXTENDED_A", 2112 "MYANMAR EXTENDED-A", 2113 "MYANMAREXTENDED-A"); 2114 2115 /** 2116 * Constant for the "Tai Viet" Unicode character block. 2117 * @since 1.7 2118 */ 2119 public static final UnicodeBlock TAI_VIET = 2120 new UnicodeBlock("TAI_VIET", 2121 "TAI VIET", 2122 "TAIVIET"); 2123 2124 /** 2125 * Constant for the "Ethiopic Extended-A" Unicode character block. 2126 * @since 1.7 2127 */ 2128 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2129 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2130 "ETHIOPIC EXTENDED-A", 2131 "ETHIOPICEXTENDED-A"); 2132 2133 /** 2134 * Constant for the "Meetei Mayek" Unicode character block. 2135 * @since 1.7 2136 */ 2137 public static final UnicodeBlock MEETEI_MAYEK = 2138 new UnicodeBlock("MEETEI_MAYEK", 2139 "MEETEI MAYEK", 2140 "MEETEIMAYEK"); 2141 2142 /** 2143 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2144 * @since 1.7 2145 */ 2146 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2147 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2148 "HANGUL JAMO EXTENDED-B", 2149 "HANGULJAMOEXTENDED-B"); 2150 2151 /** 2152 * Constant for the "Vertical Forms" Unicode character block. 2153 * @since 1.7 2154 */ 2155 public static final UnicodeBlock VERTICAL_FORMS = 2156 new UnicodeBlock("VERTICAL_FORMS", 2157 "VERTICAL FORMS", 2158 "VERTICALFORMS"); 2159 2160 /** 2161 * Constant for the "Ancient Greek Numbers" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2165 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2166 "ANCIENT GREEK NUMBERS", 2167 "ANCIENTGREEKNUMBERS"); 2168 2169 /** 2170 * Constant for the "Ancient Symbols" Unicode character block. 2171 * @since 1.7 2172 */ 2173 public static final UnicodeBlock ANCIENT_SYMBOLS = 2174 new UnicodeBlock("ANCIENT_SYMBOLS", 2175 "ANCIENT SYMBOLS", 2176 "ANCIENTSYMBOLS"); 2177 2178 /** 2179 * Constant for the "Phaistos Disc" Unicode character block. 2180 * @since 1.7 2181 */ 2182 public static final UnicodeBlock PHAISTOS_DISC = 2183 new UnicodeBlock("PHAISTOS_DISC", 2184 "PHAISTOS DISC", 2185 "PHAISTOSDISC"); 2186 2187 /** 2188 * Constant for the "Lycian" Unicode character block. 2189 * @since 1.7 2190 */ 2191 public static final UnicodeBlock LYCIAN = 2192 new UnicodeBlock("LYCIAN"); 2193 2194 /** 2195 * Constant for the "Carian" Unicode character block. 2196 * @since 1.7 2197 */ 2198 public static final UnicodeBlock CARIAN = 2199 new UnicodeBlock("CARIAN"); 2200 2201 /** 2202 * Constant for the "Old Persian" Unicode character block. 2203 * @since 1.7 2204 */ 2205 public static final UnicodeBlock OLD_PERSIAN = 2206 new UnicodeBlock("OLD_PERSIAN", 2207 "OLD PERSIAN", 2208 "OLDPERSIAN"); 2209 2210 /** 2211 * Constant for the "Imperial Aramaic" Unicode character block. 2212 * @since 1.7 2213 */ 2214 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2215 new UnicodeBlock("IMPERIAL_ARAMAIC", 2216 "IMPERIAL ARAMAIC", 2217 "IMPERIALARAMAIC"); 2218 2219 /** 2220 * Constant for the "Phoenician" Unicode character block. 2221 * @since 1.7 2222 */ 2223 public static final UnicodeBlock PHOENICIAN = 2224 new UnicodeBlock("PHOENICIAN"); 2225 2226 /** 2227 * Constant for the "Lydian" Unicode character block. 2228 * @since 1.7 2229 */ 2230 public static final UnicodeBlock LYDIAN = 2231 new UnicodeBlock("LYDIAN"); 2232 2233 /** 2234 * Constant for the "Kharoshthi" Unicode character block. 2235 * @since 1.7 2236 */ 2237 public static final UnicodeBlock KHAROSHTHI = 2238 new UnicodeBlock("KHAROSHTHI"); 2239 2240 /** 2241 * Constant for the "Old South Arabian" Unicode character block. 2242 * @since 1.7 2243 */ 2244 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2245 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2246 "OLD SOUTH ARABIAN", 2247 "OLDSOUTHARABIAN"); 2248 2249 /** 2250 * Constant for the "Avestan" Unicode character block. 2251 * @since 1.7 2252 */ 2253 public static final UnicodeBlock AVESTAN = 2254 new UnicodeBlock("AVESTAN"); 2255 2256 /** 2257 * Constant for the "Inscriptional Parthian" Unicode character block. 2258 * @since 1.7 2259 */ 2260 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2261 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2262 "INSCRIPTIONAL PARTHIAN", 2263 "INSCRIPTIONALPARTHIAN"); 2264 2265 /** 2266 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2267 * @since 1.7 2268 */ 2269 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2270 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2271 "INSCRIPTIONAL PAHLAVI", 2272 "INSCRIPTIONALPAHLAVI"); 2273 2274 /** 2275 * Constant for the "Old Turkic" Unicode character block. 2276 * @since 1.7 2277 */ 2278 public static final UnicodeBlock OLD_TURKIC = 2279 new UnicodeBlock("OLD_TURKIC", 2280 "OLD TURKIC", 2281 "OLDTURKIC"); 2282 2283 /** 2284 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2285 * @since 1.7 2286 */ 2287 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2288 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2289 "RUMI NUMERAL SYMBOLS", 2290 "RUMINUMERALSYMBOLS"); 2291 2292 /** 2293 * Constant for the "Brahmi" Unicode character block. 2294 * @since 1.7 2295 */ 2296 public static final UnicodeBlock BRAHMI = 2297 new UnicodeBlock("BRAHMI"); 2298 2299 /** 2300 * Constant for the "Kaithi" Unicode character block. 2301 * @since 1.7 2302 */ 2303 public static final UnicodeBlock KAITHI = 2304 new UnicodeBlock("KAITHI"); 2305 2306 /** 2307 * Constant for the "Cuneiform" Unicode character block. 2308 * @since 1.7 2309 */ 2310 public static final UnicodeBlock CUNEIFORM = 2311 new UnicodeBlock("CUNEIFORM"); 2312 2313 /** 2314 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2315 * character block. 2316 * @since 1.7 2317 */ 2318 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2319 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2320 "CUNEIFORM NUMBERS AND PUNCTUATION", 2321 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2322 2323 /** 2324 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2325 * @since 1.7 2326 */ 2327 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2328 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2329 "EGYPTIAN HIEROGLYPHS", 2330 "EGYPTIANHIEROGLYPHS"); 2331 2332 /** 2333 * Constant for the "Bamum Supplement" Unicode character block. 2334 * @since 1.7 2335 */ 2336 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2337 new UnicodeBlock("BAMUM_SUPPLEMENT", 2338 "BAMUM SUPPLEMENT", 2339 "BAMUMSUPPLEMENT"); 2340 2341 /** 2342 * Constant for the "Kana Supplement" Unicode character block. 2343 * @since 1.7 2344 */ 2345 public static final UnicodeBlock KANA_SUPPLEMENT = 2346 new UnicodeBlock("KANA_SUPPLEMENT", 2347 "KANA SUPPLEMENT", 2348 "KANASUPPLEMENT"); 2349 2350 /** 2351 * Constant for the "Ancient Greek Musical Notation" Unicode character 2352 * block. 2353 * @since 1.7 2354 */ 2355 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2356 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2357 "ANCIENT GREEK MUSICAL NOTATION", 2358 "ANCIENTGREEKMUSICALNOTATION"); 2359 2360 /** 2361 * Constant for the "Counting Rod Numerals" Unicode character block. 2362 * @since 1.7 2363 */ 2364 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2365 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2366 "COUNTING ROD NUMERALS", 2367 "COUNTINGRODNUMERALS"); 2368 2369 /** 2370 * Constant for the "Mahjong Tiles" Unicode character block. 2371 * @since 1.7 2372 */ 2373 public static final UnicodeBlock MAHJONG_TILES = 2374 new UnicodeBlock("MAHJONG_TILES", 2375 "MAHJONG TILES", 2376 "MAHJONGTILES"); 2377 2378 /** 2379 * Constant for the "Domino Tiles" Unicode character block. 2380 * @since 1.7 2381 */ 2382 public static final UnicodeBlock DOMINO_TILES = 2383 new UnicodeBlock("DOMINO_TILES", 2384 "DOMINO TILES", 2385 "DOMINOTILES"); 2386 2387 /** 2388 * Constant for the "Playing Cards" Unicode character block. 2389 * @since 1.7 2390 */ 2391 public static final UnicodeBlock PLAYING_CARDS = 2392 new UnicodeBlock("PLAYING_CARDS", 2393 "PLAYING CARDS", 2394 "PLAYINGCARDS"); 2395 2396 /** 2397 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2398 * block. 2399 * @since 1.7 2400 */ 2401 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2402 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2403 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2404 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2405 2406 /** 2407 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2408 * block. 2409 * @since 1.7 2410 */ 2411 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2412 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2413 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2414 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2415 2416 /** 2417 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2418 * character block. 2419 * @since 1.7 2420 */ 2421 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2422 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2423 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2424 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2425 2426 /** 2427 * Constant for the "Emoticons" Unicode character block. 2428 * @since 1.7 2429 */ 2430 public static final UnicodeBlock EMOTICONS = 2431 new UnicodeBlock("EMOTICONS"); 2432 2433 /** 2434 * Constant for the "Transport And Map Symbols" Unicode character block. 2435 * @since 1.7 2436 */ 2437 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2438 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2439 "TRANSPORT AND MAP SYMBOLS", 2440 "TRANSPORTANDMAPSYMBOLS"); 2441 2442 /** 2443 * Constant for the "Alchemical Symbols" Unicode character block. 2444 * @since 1.7 2445 */ 2446 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2447 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2448 "ALCHEMICAL SYMBOLS", 2449 "ALCHEMICALSYMBOLS"); 2450 2451 /** 2452 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2453 * character block. 2454 * @since 1.7 2455 */ 2456 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2457 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2458 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2459 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2460 2461 /** 2462 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2463 * character block. 2464 * @since 1.7 2465 */ 2466 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2467 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2468 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2469 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2470 2471 private static final int blockStarts[] = { 2472 0x0000, // 0000..007F; Basic Latin 2473 0x0080, // 0080..00FF; Latin-1 Supplement 2474 0x0100, // 0100..017F; Latin Extended-A 2475 0x0180, // 0180..024F; Latin Extended-B 2476 0x0250, // 0250..02AF; IPA Extensions 2477 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2478 0x0300, // 0300..036F; Combining Diacritical Marks 2479 0x0370, // 0370..03FF; Greek and Coptic 2480 0x0400, // 0400..04FF; Cyrillic 2481 0x0500, // 0500..052F; Cyrillic Supplement 2482 0x0530, // 0530..058F; Armenian 2483 0x0590, // 0590..05FF; Hebrew 2484 0x0600, // 0600..06FF; Arabic 2485 0x0700, // 0700..074F; Syriac 2486 0x0750, // 0750..077F; Arabic Supplement 2487 0x0780, // 0780..07BF; Thaana 2488 0x07C0, // 07C0..07FF; NKo 2489 0x0800, // 0800..083F; Samaritan 2490 0x0840, // 0840..085F; Mandaic 2491 0x0860, // unassigned 2492 0x0900, // 0900..097F; Devanagari 2493 0x0980, // 0980..09FF; Bengali 2494 0x0A00, // 0A00..0A7F; Gurmukhi 2495 0x0A80, // 0A80..0AFF; Gujarati 2496 0x0B00, // 0B00..0B7F; Oriya 2497 0x0B80, // 0B80..0BFF; Tamil 2498 0x0C00, // 0C00..0C7F; Telugu 2499 0x0C80, // 0C80..0CFF; Kannada 2500 0x0D00, // 0D00..0D7F; Malayalam 2501 0x0D80, // 0D80..0DFF; Sinhala 2502 0x0E00, // 0E00..0E7F; Thai 2503 0x0E80, // 0E80..0EFF; Lao 2504 0x0F00, // 0F00..0FFF; Tibetan 2505 0x1000, // 1000..109F; Myanmar 2506 0x10A0, // 10A0..10FF; Georgian 2507 0x1100, // 1100..11FF; Hangul Jamo 2508 0x1200, // 1200..137F; Ethiopic 2509 0x1380, // 1380..139F; Ethiopic Supplement 2510 0x13A0, // 13A0..13FF; Cherokee 2511 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2512 0x1680, // 1680..169F; Ogham 2513 0x16A0, // 16A0..16FF; Runic 2514 0x1700, // 1700..171F; Tagalog 2515 0x1720, // 1720..173F; Hanunoo 2516 0x1740, // 1740..175F; Buhid 2517 0x1760, // 1760..177F; Tagbanwa 2518 0x1780, // 1780..17FF; Khmer 2519 0x1800, // 1800..18AF; Mongolian 2520 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2521 0x1900, // 1900..194F; Limbu 2522 0x1950, // 1950..197F; Tai Le 2523 0x1980, // 1980..19DF; New Tai Lue 2524 0x19E0, // 19E0..19FF; Khmer Symbols 2525 0x1A00, // 1A00..1A1F; Buginese 2526 0x1A20, // 1A20..1AAF; Tai Tham 2527 0x1AB0, // unassigned 2528 0x1B00, // 1B00..1B7F; Balinese 2529 0x1B80, // 1B80..1BBF; Sundanese 2530 0x1BC0, // 1BC0..1BFF; Batak 2531 0x1C00, // 1C00..1C4F; Lepcha 2532 0x1C50, // 1C50..1C7F; Ol Chiki 2533 0x1C80, // unassigned 2534 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2535 0x1D00, // 1D00..1D7F; Phonetic Extensions 2536 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2537 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2538 0x1E00, // 1E00..1EFF; Latin Extended Additional 2539 0x1F00, // 1F00..1FFF; Greek Extended 2540 0x2000, // 2000..206F; General Punctuation 2541 0x2070, // 2070..209F; Superscripts and Subscripts 2542 0x20A0, // 20A0..20CF; Currency Symbols 2543 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2544 0x2100, // 2100..214F; Letterlike Symbols 2545 0x2150, // 2150..218F; Number Forms 2546 0x2190, // 2190..21FF; Arrows 2547 0x2200, // 2200..22FF; Mathematical Operators 2548 0x2300, // 2300..23FF; Miscellaneous Technical 2549 0x2400, // 2400..243F; Control Pictures 2550 0x2440, // 2440..245F; Optical Character Recognition 2551 0x2460, // 2460..24FF; Enclosed Alphanumerics 2552 0x2500, // 2500..257F; Box Drawing 2553 0x2580, // 2580..259F; Block Elements 2554 0x25A0, // 25A0..25FF; Geometric Shapes 2555 0x2600, // 2600..26FF; Miscellaneous Symbols 2556 0x2700, // 2700..27BF; Dingbats 2557 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2558 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2559 0x2800, // 2800..28FF; Braille Patterns 2560 0x2900, // 2900..297F; Supplemental Arrows-B 2561 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2562 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2563 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2564 0x2C00, // 2C00..2C5F; Glagolitic 2565 0x2C60, // 2C60..2C7F; Latin Extended-C 2566 0x2C80, // 2C80..2CFF; Coptic 2567 0x2D00, // 2D00..2D2F; Georgian Supplement 2568 0x2D30, // 2D30..2D7F; Tifinagh 2569 0x2D80, // 2D80..2DDF; Ethiopic Extended 2570 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2571 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2572 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2573 0x2F00, // 2F00..2FDF; Kangxi Radicals 2574 0x2FE0, // unassigned 2575 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2576 0x3000, // 3000..303F; CJK Symbols and Punctuation 2577 0x3040, // 3040..309F; Hiragana 2578 0x30A0, // 30A0..30FF; Katakana 2579 0x3100, // 3100..312F; Bopomofo 2580 0x3130, // 3130..318F; Hangul Compatibility Jamo 2581 0x3190, // 3190..319F; Kanbun 2582 0x31A0, // 31A0..31BF; Bopomofo Extended 2583 0x31C0, // 31C0..31EF; CJK Strokes 2584 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2585 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2586 0x3300, // 3300..33FF; CJK Compatibility 2587 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2588 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2589 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2590 0xA000, // A000..A48F; Yi Syllables 2591 0xA490, // A490..A4CF; Yi Radicals 2592 0xA4D0, // A4D0..A4FF; Lisu 2593 0xA500, // A500..A63F; Vai 2594 0xA640, // A640..A69F; Cyrillic Extended-B 2595 0xA6A0, // A6A0..A6FF; Bamum 2596 0xA700, // A700..A71F; Modifier Tone Letters 2597 0xA720, // A720..A7FF; Latin Extended-D 2598 0xA800, // A800..A82F; Syloti Nagri 2599 0xA830, // A830..A83F; Common Indic Number Forms 2600 0xA840, // A840..A87F; Phags-pa 2601 0xA880, // A880..A8DF; Saurashtra 2602 0xA8E0, // A8E0..A8FF; Devanagari Extended 2603 0xA900, // A900..A92F; Kayah Li 2604 0xA930, // A930..A95F; Rejang 2605 0xA960, // A960..A97F; Hangul Jamo Extended-A 2606 0xA980, // A980..A9DF; Javanese 2607 0xA9E0, // unassigned 2608 0xAA00, // AA00..AA5F; Cham 2609 0xAA60, // AA60..AA7F; Myanmar Extended-A 2610 0xAA80, // AA80..AADF; Tai Viet 2611 0xAAE0, // unassigned 2612 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2613 0xAB30, // unassigned 2614 0xABC0, // ABC0..ABFF; Meetei Mayek 2615 0xAC00, // AC00..D7AF; Hangul Syllables 2616 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2617 0xD800, // D800..DB7F; High Surrogates 2618 0xDB80, // DB80..DBFF; High Private Use Surrogates 2619 0xDC00, // DC00..DFFF; Low Surrogates 2620 0xE000, // E000..F8FF; Private Use Area 2621 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2622 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2623 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2624 0xFE00, // FE00..FE0F; Variation Selectors 2625 0xFE10, // FE10..FE1F; Vertical Forms 2626 0xFE20, // FE20..FE2F; Combining Half Marks 2627 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2628 0xFE50, // FE50..FE6F; Small Form Variants 2629 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2630 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2631 0xFFF0, // FFF0..FFFF; Specials 2632 0x10000, // 10000..1007F; Linear B Syllabary 2633 0x10080, // 10080..100FF; Linear B Ideograms 2634 0x10100, // 10100..1013F; Aegean Numbers 2635 0x10140, // 10140..1018F; Ancient Greek Numbers 2636 0x10190, // 10190..101CF; Ancient Symbols 2637 0x101D0, // 101D0..101FF; Phaistos Disc 2638 0x10200, // unassigned 2639 0x10280, // 10280..1029F; Lycian 2640 0x102A0, // 102A0..102DF; Carian 2641 0x102E0, // unassigned 2642 0x10300, // 10300..1032F; Old Italic 2643 0x10330, // 10330..1034F; Gothic 2644 0x10350, // unassigned 2645 0x10380, // 10380..1039F; Ugaritic 2646 0x103A0, // 103A0..103DF; Old Persian 2647 0x103E0, // unassigned 2648 0x10400, // 10400..1044F; Deseret 2649 0x10450, // 10450..1047F; Shavian 2650 0x10480, // 10480..104AF; Osmanya 2651 0x104B0, // unassigned 2652 0x10800, // 10800..1083F; Cypriot Syllabary 2653 0x10840, // 10840..1085F; Imperial Aramaic 2654 0x10860, // unassigned 2655 0x10900, // 10900..1091F; Phoenician 2656 0x10920, // 10920..1093F; Lydian 2657 0x10940, // unassigned 2658 0x10A00, // 10A00..10A5F; Kharoshthi 2659 0x10A60, // 10A60..10A7F; Old South Arabian 2660 0x10A80, // unassigned 2661 0x10B00, // 10B00..10B3F; Avestan 2662 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2663 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2664 0x10B80, // unassigned 2665 0x10C00, // 10C00..10C4F; Old Turkic 2666 0x10C50, // unassigned 2667 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2668 0x10E80, // unassigned 2669 0x11000, // 11000..1107F; Brahmi 2670 0x11080, // 11080..110CF; Kaithi 2671 0x110D0, // unassigned 2672 0x12000, // 12000..123FF; Cuneiform 2673 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2674 0x12480, // unassigned 2675 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2676 0x13430, // unassigned 2677 0x16800, // 16800..16A3F; Bamum Supplement 2678 0x16A40, // unassigned 2679 0x1B000, // 1B000..1B0FF; Kana Supplement 2680 0x1B100, // unassigned 2681 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2682 0x1D100, // 1D100..1D1FF; Musical Symbols 2683 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2684 0x1D250, // unassigned 2685 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2686 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2687 0x1D380, // unassigned 2688 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2689 0x1D800, // unassigned 2690 0x1F000, // 1F000..1F02F; Mahjong Tiles 2691 0x1F030, // 1F030..1F09F; Domino Tiles 2692 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2693 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2694 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2695 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2696 0x1F600, // 1F600..1F64F; Emoticons 2697 0x1F650, // unassigned 2698 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2699 0x1F700, // 1F700..1F77F; Alchemical Symbols 2700 0x1F780, // unassigned 2701 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2702 0x2A6E0, // unassigned 2703 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2704 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2705 0x2B820, // unassigned 2706 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2707 0x2FA20, // unassigned 2708 0xE0000, // E0000..E007F; Tags 2709 0xE0080, // unassigned 2710 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2711 0xE01F0, // unassigned 2712 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2713 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2714 }; 2715 2716 private static final UnicodeBlock[] blocks = { 2717 BASIC_LATIN, 2718 LATIN_1_SUPPLEMENT, 2719 LATIN_EXTENDED_A, 2720 LATIN_EXTENDED_B, 2721 IPA_EXTENSIONS, 2722 SPACING_MODIFIER_LETTERS, 2723 COMBINING_DIACRITICAL_MARKS, 2724 GREEK, 2725 CYRILLIC, 2726 CYRILLIC_SUPPLEMENTARY, 2727 ARMENIAN, 2728 HEBREW, 2729 ARABIC, 2730 SYRIAC, 2731 ARABIC_SUPPLEMENT, 2732 THAANA, 2733 NKO, 2734 SAMARITAN, 2735 MANDAIC, 2736 null, 2737 DEVANAGARI, 2738 BENGALI, 2739 GURMUKHI, 2740 GUJARATI, 2741 ORIYA, 2742 TAMIL, 2743 TELUGU, 2744 KANNADA, 2745 MALAYALAM, 2746 SINHALA, 2747 THAI, 2748 LAO, 2749 TIBETAN, 2750 MYANMAR, 2751 GEORGIAN, 2752 HANGUL_JAMO, 2753 ETHIOPIC, 2754 ETHIOPIC_SUPPLEMENT, 2755 CHEROKEE, 2756 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2757 OGHAM, 2758 RUNIC, 2759 TAGALOG, 2760 HANUNOO, 2761 BUHID, 2762 TAGBANWA, 2763 KHMER, 2764 MONGOLIAN, 2765 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2766 LIMBU, 2767 TAI_LE, 2768 NEW_TAI_LUE, 2769 KHMER_SYMBOLS, 2770 BUGINESE, 2771 TAI_THAM, 2772 null, 2773 BALINESE, 2774 SUNDANESE, 2775 BATAK, 2776 LEPCHA, 2777 OL_CHIKI, 2778 null, 2779 VEDIC_EXTENSIONS, 2780 PHONETIC_EXTENSIONS, 2781 PHONETIC_EXTENSIONS_SUPPLEMENT, 2782 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2783 LATIN_EXTENDED_ADDITIONAL, 2784 GREEK_EXTENDED, 2785 GENERAL_PUNCTUATION, 2786 SUPERSCRIPTS_AND_SUBSCRIPTS, 2787 CURRENCY_SYMBOLS, 2788 COMBINING_MARKS_FOR_SYMBOLS, 2789 LETTERLIKE_SYMBOLS, 2790 NUMBER_FORMS, 2791 ARROWS, 2792 MATHEMATICAL_OPERATORS, 2793 MISCELLANEOUS_TECHNICAL, 2794 CONTROL_PICTURES, 2795 OPTICAL_CHARACTER_RECOGNITION, 2796 ENCLOSED_ALPHANUMERICS, 2797 BOX_DRAWING, 2798 BLOCK_ELEMENTS, 2799 GEOMETRIC_SHAPES, 2800 MISCELLANEOUS_SYMBOLS, 2801 DINGBATS, 2802 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2803 SUPPLEMENTAL_ARROWS_A, 2804 BRAILLE_PATTERNS, 2805 SUPPLEMENTAL_ARROWS_B, 2806 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2807 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2808 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2809 GLAGOLITIC, 2810 LATIN_EXTENDED_C, 2811 COPTIC, 2812 GEORGIAN_SUPPLEMENT, 2813 TIFINAGH, 2814 ETHIOPIC_EXTENDED, 2815 CYRILLIC_EXTENDED_A, 2816 SUPPLEMENTAL_PUNCTUATION, 2817 CJK_RADICALS_SUPPLEMENT, 2818 KANGXI_RADICALS, 2819 null, 2820 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2821 CJK_SYMBOLS_AND_PUNCTUATION, 2822 HIRAGANA, 2823 KATAKANA, 2824 BOPOMOFO, 2825 HANGUL_COMPATIBILITY_JAMO, 2826 KANBUN, 2827 BOPOMOFO_EXTENDED, 2828 CJK_STROKES, 2829 KATAKANA_PHONETIC_EXTENSIONS, 2830 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2831 CJK_COMPATIBILITY, 2832 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2833 YIJING_HEXAGRAM_SYMBOLS, 2834 CJK_UNIFIED_IDEOGRAPHS, 2835 YI_SYLLABLES, 2836 YI_RADICALS, 2837 LISU, 2838 VAI, 2839 CYRILLIC_EXTENDED_B, 2840 BAMUM, 2841 MODIFIER_TONE_LETTERS, 2842 LATIN_EXTENDED_D, 2843 SYLOTI_NAGRI, 2844 COMMON_INDIC_NUMBER_FORMS, 2845 PHAGS_PA, 2846 SAURASHTRA, 2847 DEVANAGARI_EXTENDED, 2848 KAYAH_LI, 2849 REJANG, 2850 HANGUL_JAMO_EXTENDED_A, 2851 JAVANESE, 2852 null, 2853 CHAM, 2854 MYANMAR_EXTENDED_A, 2855 TAI_VIET, 2856 null, 2857 ETHIOPIC_EXTENDED_A, 2858 null, 2859 MEETEI_MAYEK, 2860 HANGUL_SYLLABLES, 2861 HANGUL_JAMO_EXTENDED_B, 2862 HIGH_SURROGATES, 2863 HIGH_PRIVATE_USE_SURROGATES, 2864 LOW_SURROGATES, 2865 PRIVATE_USE_AREA, 2866 CJK_COMPATIBILITY_IDEOGRAPHS, 2867 ALPHABETIC_PRESENTATION_FORMS, 2868 ARABIC_PRESENTATION_FORMS_A, 2869 VARIATION_SELECTORS, 2870 VERTICAL_FORMS, 2871 COMBINING_HALF_MARKS, 2872 CJK_COMPATIBILITY_FORMS, 2873 SMALL_FORM_VARIANTS, 2874 ARABIC_PRESENTATION_FORMS_B, 2875 HALFWIDTH_AND_FULLWIDTH_FORMS, 2876 SPECIALS, 2877 LINEAR_B_SYLLABARY, 2878 LINEAR_B_IDEOGRAMS, 2879 AEGEAN_NUMBERS, 2880 ANCIENT_GREEK_NUMBERS, 2881 ANCIENT_SYMBOLS, 2882 PHAISTOS_DISC, 2883 null, 2884 LYCIAN, 2885 CARIAN, 2886 null, 2887 OLD_ITALIC, 2888 GOTHIC, 2889 null, 2890 UGARITIC, 2891 OLD_PERSIAN, 2892 null, 2893 DESERET, 2894 SHAVIAN, 2895 OSMANYA, 2896 null, 2897 CYPRIOT_SYLLABARY, 2898 IMPERIAL_ARAMAIC, 2899 null, 2900 PHOENICIAN, 2901 LYDIAN, 2902 null, 2903 KHAROSHTHI, 2904 OLD_SOUTH_ARABIAN, 2905 null, 2906 AVESTAN, 2907 INSCRIPTIONAL_PARTHIAN, 2908 INSCRIPTIONAL_PAHLAVI, 2909 null, 2910 OLD_TURKIC, 2911 null, 2912 RUMI_NUMERAL_SYMBOLS, 2913 null, 2914 BRAHMI, 2915 KAITHI, 2916 null, 2917 CUNEIFORM, 2918 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 2919 null, 2920 EGYPTIAN_HIEROGLYPHS, 2921 null, 2922 BAMUM_SUPPLEMENT, 2923 null, 2924 KANA_SUPPLEMENT, 2925 null, 2926 BYZANTINE_MUSICAL_SYMBOLS, 2927 MUSICAL_SYMBOLS, 2928 ANCIENT_GREEK_MUSICAL_NOTATION, 2929 null, 2930 TAI_XUAN_JING_SYMBOLS, 2931 COUNTING_ROD_NUMERALS, 2932 null, 2933 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 2934 null, 2935 MAHJONG_TILES, 2936 DOMINO_TILES, 2937 PLAYING_CARDS, 2938 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 2939 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 2940 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 2941 EMOTICONS, 2942 null, 2943 TRANSPORT_AND_MAP_SYMBOLS, 2944 ALCHEMICAL_SYMBOLS, 2945 null, 2946 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 2947 null, 2948 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 2949 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 2950 null, 2951 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 2952 null, 2953 TAGS, 2954 null, 2955 VARIATION_SELECTORS_SUPPLEMENT, 2956 null, 2957 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 2958 SUPPLEMENTARY_PRIVATE_USE_AREA_B 2959 }; 2960 2961 2962 /** 2963 * Returns the object representing the Unicode block containing the 2964 * given character, or <code>null</code> if the character is not a 2965 * member of a defined block. 2966 * 2967 * <p><b>Note:</b> This method cannot handle 2968 * <a href="Character.html#supplementary"> supplementary 2969 * characters</a>. To support all Unicode characters, including 2970 * supplementary characters, use the {@link #of(int)} method. 2971 * 2972 * @param c The character in question 2973 * @return The <code>UnicodeBlock</code> instance representing the 2974 * Unicode block of which this character is a member, or 2975 * <code>null</code> if the character is not a member of any 2976 * Unicode block 2977 */ 2978 public static UnicodeBlock of(char c) { 2979 return of((int)c); 2980 } 2981 2982 /** 2983 * Returns the object representing the Unicode block 2984 * containing the given character (Unicode code point), or 2985 * <code>null</code> if the character is not a member of a 2986 * defined block. 2987 * 2988 * @param codePoint the character (Unicode code point) in question. 2989 * @return The <code>UnicodeBlock</code> instance representing the 2990 * Unicode block of which this character is a member, or 2991 * <code>null</code> if the character is not a member of any 2992 * Unicode block 2993 * @exception IllegalArgumentException if the specified 2994 * <code>codePoint</code> is an invalid Unicode code point. 2995 * @see Character#isValidCodePoint(int) 2996 * @since 1.5 2997 */ 2998 public static UnicodeBlock of(int codePoint) { 2999 if (!isValidCodePoint(codePoint)) { 3000 throw new IllegalArgumentException(); 3001 } 3002 3003 int top, bottom, current; 3004 bottom = 0; 3005 top = blockStarts.length; 3006 current = top/2; 3007 3008 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3009 while (top - bottom > 1) { 3010 if (codePoint >= blockStarts[current]) { 3011 bottom = current; 3012 } else { 3013 top = current; 3014 } 3015 current = (top + bottom) / 2; 3016 } 3017 return blocks[current]; 3018 } 3019 3020 /** 3021 * Returns the UnicodeBlock with the given name. Block 3022 * names are determined by The Unicode Standard. The file 3023 * Blocks-<version>.txt defines blocks for a particular 3024 * version of the standard. The {@link Character} class specifies 3025 * the version of the standard that it supports. 3026 * <p> 3027 * This method accepts block names in the following forms: 3028 * <ol> 3029 * <li> Canonical block names as defined by the Unicode Standard. 3030 * For example, the standard defines a "Basic Latin" block. Therefore, this 3031 * method accepts "Basic Latin" as a valid block name. The documentation of 3032 * each UnicodeBlock provides the canonical name. 3033 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3034 * is a valid block name for the "Basic Latin" block. 3035 * <li>The text representation of each constant UnicodeBlock identifier. 3036 * For example, this method will return the {@link #BASIC_LATIN} block if 3037 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3038 * hyphens in the canonical name with underscores. 3039 * </ol> 3040 * Finally, character case is ignored for all of the valid block name forms. 3041 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3042 * The en_US locale's case mapping rules are used to provide case-insensitive 3043 * string comparisons for block name validation. 3044 * <p> 3045 * If the Unicode Standard changes block names, both the previous and 3046 * current names will be accepted. 3047 * 3048 * @param blockName A <code>UnicodeBlock</code> name. 3049 * @return The <code>UnicodeBlock</code> instance identified 3050 * by <code>blockName</code> 3051 * @throws IllegalArgumentException if <code>blockName</code> is an 3052 * invalid name 3053 * @throws NullPointerException if <code>blockName</code> is null 3054 * @since 1.5 3055 */ 3056 public static final UnicodeBlock forName(String blockName) { 3057 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3058 if (block == null) { 3059 throw new IllegalArgumentException(); 3060 } 3061 return block; 3062 } 3063 } 3064 3065 3066 /** 3067 * A family of character subsets representing the character scripts 3068 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3069 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3070 * character is assigned to a single Unicode script, either a specific 3071 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3072 * one of the following three special values, 3073 * {@link Character.UnicodeScript#INHERITED Inherited}, 3074 * {@link Character.UnicodeScript#COMMON Common} or 3075 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3076 * 3077 * @since 1.7 3078 */ 3079 public static enum UnicodeScript { 3080 /** 3081 * Unicode script "Common". 3082 */ 3083 COMMON, 3084 3085 /** 3086 * Unicode script "Latin". 3087 */ 3088 LATIN, 3089 3090 /** 3091 * Unicode script "Greek". 3092 */ 3093 GREEK, 3094 3095 /** 3096 * Unicode script "Cyrillic". 3097 */ 3098 CYRILLIC, 3099 3100 /** 3101 * Unicode script "Armenian". 3102 */ 3103 ARMENIAN, 3104 3105 /** 3106 * Unicode script "Hebrew". 3107 */ 3108 HEBREW, 3109 3110 /** 3111 * Unicode script "Arabic". 3112 */ 3113 ARABIC, 3114 3115 /** 3116 * Unicode script "Syriac". 3117 */ 3118 SYRIAC, 3119 3120 /** 3121 * Unicode script "Thaana". 3122 */ 3123 THAANA, 3124 3125 /** 3126 * Unicode script "Devanagari". 3127 */ 3128 DEVANAGARI, 3129 3130 /** 3131 * Unicode script "Bengali". 3132 */ 3133 BENGALI, 3134 3135 /** 3136 * Unicode script "Gurmukhi". 3137 */ 3138 GURMUKHI, 3139 3140 /** 3141 * Unicode script "Gujarati". 3142 */ 3143 GUJARATI, 3144 3145 /** 3146 * Unicode script "Oriya". 3147 */ 3148 ORIYA, 3149 3150 /** 3151 * Unicode script "Tamil". 3152 */ 3153 TAMIL, 3154 3155 /** 3156 * Unicode script "Telugu". 3157 */ 3158 TELUGU, 3159 3160 /** 3161 * Unicode script "Kannada". 3162 */ 3163 KANNADA, 3164 3165 /** 3166 * Unicode script "Malayalam". 3167 */ 3168 MALAYALAM, 3169 3170 /** 3171 * Unicode script "Sinhala". 3172 */ 3173 SINHALA, 3174 3175 /** 3176 * Unicode script "Thai". 3177 */ 3178 THAI, 3179 3180 /** 3181 * Unicode script "Lao". 3182 */ 3183 LAO, 3184 3185 /** 3186 * Unicode script "Tibetan". 3187 */ 3188 TIBETAN, 3189 3190 /** 3191 * Unicode script "Myanmar". 3192 */ 3193 MYANMAR, 3194 3195 /** 3196 * Unicode script "Georgian". 3197 */ 3198 GEORGIAN, 3199 3200 /** 3201 * Unicode script "Hangul". 3202 */ 3203 HANGUL, 3204 3205 /** 3206 * Unicode script "Ethiopic". 3207 */ 3208 ETHIOPIC, 3209 3210 /** 3211 * Unicode script "Cherokee". 3212 */ 3213 CHEROKEE, 3214 3215 /** 3216 * Unicode script "Canadian_Aboriginal". 3217 */ 3218 CANADIAN_ABORIGINAL, 3219 3220 /** 3221 * Unicode script "Ogham". 3222 */ 3223 OGHAM, 3224 3225 /** 3226 * Unicode script "Runic". 3227 */ 3228 RUNIC, 3229 3230 /** 3231 * Unicode script "Khmer". 3232 */ 3233 KHMER, 3234 3235 /** 3236 * Unicode script "Mongolian". 3237 */ 3238 MONGOLIAN, 3239 3240 /** 3241 * Unicode script "Hiragana". 3242 */ 3243 HIRAGANA, 3244 3245 /** 3246 * Unicode script "Katakana". 3247 */ 3248 KATAKANA, 3249 3250 /** 3251 * Unicode script "Bopomofo". 3252 */ 3253 BOPOMOFO, 3254 3255 /** 3256 * Unicode script "Han". 3257 */ 3258 HAN, 3259 3260 /** 3261 * Unicode script "Yi". 3262 */ 3263 YI, 3264 3265 /** 3266 * Unicode script "Old_Italic". 3267 */ 3268 OLD_ITALIC, 3269 3270 /** 3271 * Unicode script "Gothic". 3272 */ 3273 GOTHIC, 3274 3275 /** 3276 * Unicode script "Deseret". 3277 */ 3278 DESERET, 3279 3280 /** 3281 * Unicode script "Inherited". 3282 */ 3283 INHERITED, 3284 3285 /** 3286 * Unicode script "Tagalog". 3287 */ 3288 TAGALOG, 3289 3290 /** 3291 * Unicode script "Hanunoo". 3292 */ 3293 HANUNOO, 3294 3295 /** 3296 * Unicode script "Buhid". 3297 */ 3298 BUHID, 3299 3300 /** 3301 * Unicode script "Tagbanwa". 3302 */ 3303 TAGBANWA, 3304 3305 /** 3306 * Unicode script "Limbu". 3307 */ 3308 LIMBU, 3309 3310 /** 3311 * Unicode script "Tai_Le". 3312 */ 3313 TAI_LE, 3314 3315 /** 3316 * Unicode script "Linear_B". 3317 */ 3318 LINEAR_B, 3319 3320 /** 3321 * Unicode script "Ugaritic". 3322 */ 3323 UGARITIC, 3324 3325 /** 3326 * Unicode script "Shavian". 3327 */ 3328 SHAVIAN, 3329 3330 /** 3331 * Unicode script "Osmanya". 3332 */ 3333 OSMANYA, 3334 3335 /** 3336 * Unicode script "Cypriot". 3337 */ 3338 CYPRIOT, 3339 3340 /** 3341 * Unicode script "Braille". 3342 */ 3343 BRAILLE, 3344 3345 /** 3346 * Unicode script "Buginese". 3347 */ 3348 BUGINESE, 3349 3350 /** 3351 * Unicode script "Coptic". 3352 */ 3353 COPTIC, 3354 3355 /** 3356 * Unicode script "New_Tai_Lue". 3357 */ 3358 NEW_TAI_LUE, 3359 3360 /** 3361 * Unicode script "Glagolitic". 3362 */ 3363 GLAGOLITIC, 3364 3365 /** 3366 * Unicode script "Tifinagh". 3367 */ 3368 TIFINAGH, 3369 3370 /** 3371 * Unicode script "Syloti_Nagri". 3372 */ 3373 SYLOTI_NAGRI, 3374 3375 /** 3376 * Unicode script "Old_Persian". 3377 */ 3378 OLD_PERSIAN, 3379 3380 /** 3381 * Unicode script "Kharoshthi". 3382 */ 3383 KHAROSHTHI, 3384 3385 /** 3386 * Unicode script "Balinese". 3387 */ 3388 BALINESE, 3389 3390 /** 3391 * Unicode script "Cuneiform". 3392 */ 3393 CUNEIFORM, 3394 3395 /** 3396 * Unicode script "Phoenician". 3397 */ 3398 PHOENICIAN, 3399 3400 /** 3401 * Unicode script "Phags_Pa". 3402 */ 3403 PHAGS_PA, 3404 3405 /** 3406 * Unicode script "Nko". 3407 */ 3408 NKO, 3409 3410 /** 3411 * Unicode script "Sundanese". 3412 */ 3413 SUNDANESE, 3414 3415 /** 3416 * Unicode script "Batak". 3417 */ 3418 BATAK, 3419 3420 /** 3421 * Unicode script "Lepcha". 3422 */ 3423 LEPCHA, 3424 3425 /** 3426 * Unicode script "Ol_Chiki". 3427 */ 3428 OL_CHIKI, 3429 3430 /** 3431 * Unicode script "Vai". 3432 */ 3433 VAI, 3434 3435 /** 3436 * Unicode script "Saurashtra". 3437 */ 3438 SAURASHTRA, 3439 3440 /** 3441 * Unicode script "Kayah_Li". 3442 */ 3443 KAYAH_LI, 3444 3445 /** 3446 * Unicode script "Rejang". 3447 */ 3448 REJANG, 3449 3450 /** 3451 * Unicode script "Lycian". 3452 */ 3453 LYCIAN, 3454 3455 /** 3456 * Unicode script "Carian". 3457 */ 3458 CARIAN, 3459 3460 /** 3461 * Unicode script "Lydian". 3462 */ 3463 LYDIAN, 3464 3465 /** 3466 * Unicode script "Cham". 3467 */ 3468 CHAM, 3469 3470 /** 3471 * Unicode script "Tai_Tham". 3472 */ 3473 TAI_THAM, 3474 3475 /** 3476 * Unicode script "Tai_Viet". 3477 */ 3478 TAI_VIET, 3479 3480 /** 3481 * Unicode script "Avestan". 3482 */ 3483 AVESTAN, 3484 3485 /** 3486 * Unicode script "Egyptian_Hieroglyphs". 3487 */ 3488 EGYPTIAN_HIEROGLYPHS, 3489 3490 /** 3491 * Unicode script "Samaritan". 3492 */ 3493 SAMARITAN, 3494 3495 /** 3496 * Unicode script "Mandaic". 3497 */ 3498 MANDAIC, 3499 3500 /** 3501 * Unicode script "Lisu". 3502 */ 3503 LISU, 3504 3505 /** 3506 * Unicode script "Bamum". 3507 */ 3508 BAMUM, 3509 3510 /** 3511 * Unicode script "Javanese". 3512 */ 3513 JAVANESE, 3514 3515 /** 3516 * Unicode script "Meetei_Mayek". 3517 */ 3518 MEETEI_MAYEK, 3519 3520 /** 3521 * Unicode script "Imperial_Aramaic". 3522 */ 3523 IMPERIAL_ARAMAIC, 3524 3525 /** 3526 * Unicode script "Old_South_Arabian". 3527 */ 3528 OLD_SOUTH_ARABIAN, 3529 3530 /** 3531 * Unicode script "Inscriptional_Parthian". 3532 */ 3533 INSCRIPTIONAL_PARTHIAN, 3534 3535 /** 3536 * Unicode script "Inscriptional_Pahlavi". 3537 */ 3538 INSCRIPTIONAL_PAHLAVI, 3539 3540 /** 3541 * Unicode script "Old_Turkic". 3542 */ 3543 OLD_TURKIC, 3544 3545 /** 3546 * Unicode script "Brahmi". 3547 */ 3548 BRAHMI, 3549 3550 /** 3551 * Unicode script "Kaithi". 3552 */ 3553 KAITHI, 3554 3555 /** 3556 * Unicode script "Unknown". 3557 */ 3558 UNKNOWN; 3559 3560 private static final int[] scriptStarts = { 3561 0x0000, // 0000..0040; COMMON 3562 0x0041, // 0041..005A; LATIN 3563 0x005B, // 005B..0060; COMMON 3564 0x0061, // 0061..007A; LATIN 3565 0x007B, // 007B..00A9; COMMON 3566 0x00AA, // 00AA..00AA; LATIN 3567 0x00AB, // 00AB..00B9; COMMON 3568 0x00BA, // 00BA..00BA; LATIN 3569 0x00BB, // 00BB..00BF; COMMON 3570 0x00C0, // 00C0..00D6; LATIN 3571 0x00D7, // 00D7..00D7; COMMON 3572 0x00D8, // 00D8..00F6; LATIN 3573 0x00F7, // 00F7..00F7; COMMON 3574 0x00F8, // 00F8..02B8; LATIN 3575 0x02B9, // 02B9..02DF; COMMON 3576 0x02E0, // 02E0..02E4; LATIN 3577 0x02E5, // 02E5..02E9; COMMON 3578 0x02EA, // 02EA..02EB; BOPOMOFO 3579 0x02EC, // 02EC..02FF; COMMON 3580 0x0300, // 0300..036F; INHERITED 3581 0x0370, // 0370..0373; GREEK 3582 0x0374, // 0374..0374; COMMON 3583 0x0375, // 0375..037D; GREEK 3584 0x037E, // 037E..0383; COMMON 3585 0x0384, // 0384..0384; GREEK 3586 0x0385, // 0385..0385; COMMON 3587 0x0386, // 0386..0386; GREEK 3588 0x0387, // 0387..0387; COMMON 3589 0x0388, // 0388..03E1; GREEK 3590 0x03E2, // 03E2..03EF; COPTIC 3591 0x03F0, // 03F0..03FF; GREEK 3592 0x0400, // 0400..0484; CYRILLIC 3593 0x0485, // 0485..0486; INHERITED 3594 0x0487, // 0487..0530; CYRILLIC 3595 0x0531, // 0531..0588; ARMENIAN 3596 0x0589, // 0589..0589; COMMON 3597 0x058A, // 058A..0590; ARMENIAN 3598 0x0591, // 0591..05FF; HEBREW 3599 0x0600, // 0600..060B; ARABIC 3600 0x060C, // 060C..060C; COMMON 3601 0x060D, // 060D..061A; ARABIC 3602 0x061B, // 061B..061D; COMMON 3603 0x061E, // 061E..061E; ARABIC 3604 0x061F, // 061F..061F; COMMON 3605 0x0620, // 0620..063F; ARABIC 3606 0x0640, // 0640..0640; COMMON 3607 0x0641, // 0641..064A; ARABIC 3608 0x064B, // 064B..0655; INHERITED 3609 0x0656, // 0656..065E; ARABIC 3610 0x065F, // 065F..065F; INHERITED 3611 0x0660, // 0660..0669; COMMON 3612 0x066A, // 066A..066F; ARABIC 3613 0x0670, // 0670..0670; INHERITED 3614 0x0671, // 0671..06DC; ARABIC 3615 0x06DD, // 06DD..06DD; COMMON 3616 0x06DE, // 06DE..06FF; ARABIC 3617 0x0700, // 0700..074F; SYRIAC 3618 0x0750, // 0750..077F; ARABIC 3619 0x0780, // 0780..07BF; THAANA 3620 0x07C0, // 07C0..07FF; NKO 3621 0x0800, // 0800..083F; SAMARITAN 3622 0x0840, // 0840..08FF; MANDAIC 3623 0x0900, // 0900..0950; DEVANAGARI 3624 0x0951, // 0951..0952; INHERITED 3625 0x0953, // 0953..0963; DEVANAGARI 3626 0x0964, // 0964..0965; COMMON 3627 0x0966, // 0966..096F; DEVANAGARI 3628 0x0970, // 0970..0970; COMMON 3629 0x0971, // 0971..0980; DEVANAGARI 3630 0x0981, // 0981..0A00; BENGALI 3631 0x0A01, // 0A01..0A80; GURMUKHI 3632 0x0A81, // 0A81..0B00; GUJARATI 3633 0x0B01, // 0B01..0B81; ORIYA 3634 0x0B82, // 0B82..0C00; TAMIL 3635 0x0C01, // 0C01..0C81; TELUGU 3636 0x0C82, // 0C82..0CF0; KANNADA 3637 0x0D02, // 0D02..0D81; MALAYALAM 3638 0x0D82, // 0D82..0E00; SINHALA 3639 0x0E01, // 0E01..0E3E; THAI 3640 0x0E3F, // 0E3F..0E3F; COMMON 3641 0x0E40, // 0E40..0E80; THAI 3642 0x0E81, // 0E81..0EFF; LAO 3643 0x0F00, // 0F00..0FD4; TIBETAN 3644 0x0FD5, // 0FD5..0FD8; COMMON 3645 0x0FD9, // 0FD9..0FFF; TIBETAN 3646 0x1000, // 1000..109F; MYANMAR 3647 0x10A0, // 10A0..10FA; GEORGIAN 3648 0x10FB, // 10FB..10FB; COMMON 3649 0x10FC, // 10FC..10FF; GEORGIAN 3650 0x1100, // 1100..11FF; HANGUL 3651 0x1200, // 1200..139F; ETHIOPIC 3652 0x13A0, // 13A0..13FF; CHEROKEE 3653 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3654 0x1680, // 1680..169F; OGHAM 3655 0x16A0, // 16A0..16EA; RUNIC 3656 0x16EB, // 16EB..16ED; COMMON 3657 0x16EE, // 16EE..16FF; RUNIC 3658 0x1700, // 1700..171F; TAGALOG 3659 0x1720, // 1720..1734; HANUNOO 3660 0x1735, // 1735..173F; COMMON 3661 0x1740, // 1740..175F; BUHID 3662 0x1760, // 1760..177F; TAGBANWA 3663 0x1780, // 1780..17FF; KHMER 3664 0x1800, // 1800..1801; MONGOLIAN 3665 0x1802, // 1802..1803; COMMON 3666 0x1804, // 1804..1804; MONGOLIAN 3667 0x1805, // 1805..1805; COMMON 3668 0x1806, // 1806..18AF; MONGOLIAN 3669 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3670 0x1900, // 1900..194F; LIMBU 3671 0x1950, // 1950..197F; TAI_LE 3672 0x1980, // 1980..19DF; NEW_TAI_LUE 3673 0x19E0, // 19E0..19FF; KHMER 3674 0x1A00, // 1A00..1A1F; BUGINESE 3675 0x1A20, // 1A20..1AFF; TAI_THAM 3676 0x1B00, // 1B00..1B7F; BALINESE 3677 0x1B80, // 1B80..1BBF; SUNDANESE 3678 0x1BC0, // 1BC0..1BFF; BATAK 3679 0x1C00, // 1C00..1C4F; LEPCHA 3680 0x1C50, // 1C50..1CCF; OL_CHIKI 3681 0x1CD0, // 1CD0..1CD2; INHERITED 3682 0x1CD3, // 1CD3..1CD3; COMMON 3683 0x1CD4, // 1CD4..1CE0; INHERITED 3684 0x1CE1, // 1CE1..1CE1; COMMON 3685 0x1CE2, // 1CE2..1CE8; INHERITED 3686 0x1CE9, // 1CE9..1CEC; COMMON 3687 0x1CED, // 1CED..1CED; INHERITED 3688 0x1CEE, // 1CEE..1CFF; COMMON 3689 0x1D00, // 1D00..1D25; LATIN 3690 0x1D26, // 1D26..1D2A; GREEK 3691 0x1D2B, // 1D2B..1D2B; CYRILLIC 3692 0x1D2C, // 1D2C..1D5C; LATIN 3693 0x1D5D, // 1D5D..1D61; GREEK 3694 0x1D62, // 1D62..1D65; LATIN 3695 0x1D66, // 1D66..1D6A; GREEK 3696 0x1D6B, // 1D6B..1D77; LATIN 3697 0x1D78, // 1D78..1D78; CYRILLIC 3698 0x1D79, // 1D79..1DBE; LATIN 3699 0x1DBF, // 1DBF..1DBF; GREEK 3700 0x1DC0, // 1DC0..1DFF; INHERITED 3701 0x1E00, // 1E00..1EFF; LATIN 3702 0x1F00, // 1F00..1FFF; GREEK 3703 0x2000, // 2000..200B; COMMON 3704 0x200C, // 200C..200D; INHERITED 3705 0x200E, // 200E..2070; COMMON 3706 0x2071, // 2071..2073; LATIN 3707 0x2074, // 2074..207E; COMMON 3708 0x207F, // 207F..207F; LATIN 3709 0x2080, // 2080..208F; COMMON 3710 0x2090, // 2090..209F; LATIN 3711 0x20A0, // 20A0..20CF; COMMON 3712 0x20D0, // 20D0..20FF; INHERITED 3713 0x2100, // 2100..2125; COMMON 3714 0x2126, // 2126..2126; GREEK 3715 0x2127, // 2127..2129; COMMON 3716 0x212A, // 212A..212B; LATIN 3717 0x212C, // 212C..2131; COMMON 3718 0x2132, // 2132..2132; LATIN 3719 0x2133, // 2133..214D; COMMON 3720 0x214E, // 214E..214E; LATIN 3721 0x214F, // 214F..215F; COMMON 3722 0x2160, // 2160..2188; LATIN 3723 0x2189, // 2189..27FF; COMMON 3724 0x2800, // 2800..28FF; BRAILLE 3725 0x2900, // 2900..2BFF; COMMON 3726 0x2C00, // 2C00..2C5F; GLAGOLITIC 3727 0x2C60, // 2C60..2C7F; LATIN 3728 0x2C80, // 2C80..2CFF; COPTIC 3729 0x2D00, // 2D00..2D2F; GEORGIAN 3730 0x2D30, // 2D30..2D7F; TIFINAGH 3731 0x2D80, // 2D80..2DDF; ETHIOPIC 3732 0x2DE0, // 2DE0..2DFF; CYRILLIC 3733 0x2E00, // 2E00..2E7F; COMMON 3734 0x2E80, // 2E80..2FEF; HAN 3735 0x2FF0, // 2FF0..3004; COMMON 3736 0x3005, // 3005..3005; HAN 3737 0x3006, // 3006..3006; COMMON 3738 0x3007, // 3007..3007; HAN 3739 0x3008, // 3008..3020; COMMON 3740 0x3021, // 3021..3029; HAN 3741 0x302A, // 302A..302D; INHERITED 3742 0x302E, // 302E..302F; HANGUL 3743 0x3030, // 3030..3037; COMMON 3744 0x3038, // 3038..303B; HAN 3745 0x303C, // 303C..3040; COMMON 3746 0x3041, // 3041..3098; HIRAGANA 3747 0x3099, // 3099..309A; INHERITED 3748 0x309B, // 309B..309C; COMMON 3749 0x309D, // 309D..309F; HIRAGANA 3750 0x30A0, // 30A0..30A0; COMMON 3751 0x30A1, // 30A1..30FA; KATAKANA 3752 0x30FB, // 30FB..30FC; COMMON 3753 0x30FD, // 30FD..3104; KATAKANA 3754 0x3105, // 3105..3130; BOPOMOFO 3755 0x3131, // 3131..318F; HANGUL 3756 0x3190, // 3190..319F; COMMON 3757 0x31A0, // 31A0..31BF; BOPOMOFO 3758 0x31C0, // 31C0..31EF; COMMON 3759 0x31F0, // 31F0..31FF; KATAKANA 3760 0x3200, // 3200..321F; HANGUL 3761 0x3220, // 3220..325F; COMMON 3762 0x3260, // 3260..327E; HANGUL 3763 0x327F, // 327F..32CF; COMMON 3764 0x32D0, // 32D0..3357; KATAKANA 3765 0x3358, // 3358..33FF; COMMON 3766 0x3400, // 3400..4DBF; HAN 3767 0x4DC0, // 4DC0..4DFF; COMMON 3768 0x4E00, // 4E00..9FFF; HAN 3769 0xA000, // A000..A4CF; YI 3770 0xA4D0, // A4D0..A4FF; LISU 3771 0xA500, // A500..A63F; VAI 3772 0xA640, // A640..A69F; CYRILLIC 3773 0xA6A0, // A6A0..A6FF; BAMUM 3774 0xA700, // A700..A721; COMMON 3775 0xA722, // A722..A787; LATIN 3776 0xA788, // A788..A78A; COMMON 3777 0xA78B, // A78B..A7FF; LATIN 3778 0xA800, // A800..A82F; SYLOTI_NAGRI 3779 0xA830, // A830..A83F; COMMON 3780 0xA840, // A840..A87F; PHAGS_PA 3781 0xA880, // A880..A8DF; SAURASHTRA 3782 0xA8E0, // A8E0..A8FF; DEVANAGARI 3783 0xA900, // A900..A92F; KAYAH_LI 3784 0xA930, // A930..A95F; REJANG 3785 0xA960, // A960..A97F; HANGUL 3786 0xA980, // A980..A9FF; JAVANESE 3787 0xAA00, // AA00..AA5F; CHAM 3788 0xAA60, // AA60..AA7F; MYANMAR 3789 0xAA80, // AA80..AB00; TAI_VIET 3790 0xAB01, // AB01..ABBF; ETHIOPIC 3791 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3792 0xAC00, // AC00..D7FB; HANGUL 3793 0xD7FC, // D7FC..F8FF; UNKNOWN 3794 0xF900, // F900..FAFF; HAN 3795 0xFB00, // FB00..FB12; LATIN 3796 0xFB13, // FB13..FB1C; ARMENIAN 3797 0xFB1D, // FB1D..FB4F; HEBREW 3798 0xFB50, // FB50..FD3D; ARABIC 3799 0xFD3E, // FD3E..FD4F; COMMON 3800 0xFD50, // FD50..FDFC; ARABIC 3801 0xFDFD, // FDFD..FDFF; COMMON 3802 0xFE00, // FE00..FE0F; INHERITED 3803 0xFE10, // FE10..FE1F; COMMON 3804 0xFE20, // FE20..FE2F; INHERITED 3805 0xFE30, // FE30..FE6F; COMMON 3806 0xFE70, // FE70..FEFE; ARABIC 3807 0xFEFF, // FEFF..FF20; COMMON 3808 0xFF21, // FF21..FF3A; LATIN 3809 0xFF3B, // FF3B..FF40; COMMON 3810 0xFF41, // FF41..FF5A; LATIN 3811 0xFF5B, // FF5B..FF65; COMMON 3812 0xFF66, // FF66..FF6F; KATAKANA 3813 0xFF70, // FF70..FF70; COMMON 3814 0xFF71, // FF71..FF9D; KATAKANA 3815 0xFF9E, // FF9E..FF9F; COMMON 3816 0xFFA0, // FFA0..FFDF; HANGUL 3817 0xFFE0, // FFE0..FFFF; COMMON 3818 0x10000, // 10000..100FF; LINEAR_B 3819 0x10100, // 10100..1013F; COMMON 3820 0x10140, // 10140..1018F; GREEK 3821 0x10190, // 10190..101FC; COMMON 3822 0x101FD, // 101FD..1027F; INHERITED 3823 0x10280, // 10280..1029F; LYCIAN 3824 0x102A0, // 102A0..102FF; CARIAN 3825 0x10300, // 10300..1032F; OLD_ITALIC 3826 0x10330, // 10330..1037F; GOTHIC 3827 0x10380, // 10380..1039F; UGARITIC 3828 0x103A0, // 103A0..103FF; OLD_PERSIAN 3829 0x10400, // 10400..1044F; DESERET 3830 0x10450, // 10450..1047F; SHAVIAN 3831 0x10480, // 10480..107FF; OSMANYA 3832 0x10800, // 10800..1083F; CYPRIOT 3833 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3834 0x10900, // 10900..1091F; PHOENICIAN 3835 0x10920, // 10920..109FF; LYDIAN 3836 0x10A00, // 10A00..10A5F; KHAROSHTHI 3837 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3838 0x10B00, // 10B00..10B3F; AVESTAN 3839 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 3840 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 3841 0x10C00, // 10C00..10E5F; OLD_TURKIC 3842 0x10E60, // 10E60..10FFF; ARABIC 3843 0x11000, // 11000..1107F; BRAHMI 3844 0x11080, // 11080..11FFF; KAITHI 3845 0x12000, // 12000..12FFF; CUNEIFORM 3846 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 3847 0x16800, // 16800..16A38; BAMUM 3848 0x1B000, // 1B000..1B000; KATAKANA 3849 0x1B001, // 1B001..1CFFF; HIRAGANA 3850 0x1D000, // 1D000..1D166; COMMON 3851 0x1D167, // 1D167..1D169; INHERITED 3852 0x1D16A, // 1D16A..1D17A; COMMON 3853 0x1D17B, // 1D17B..1D182; INHERITED 3854 0x1D183, // 1D183..1D184; COMMON 3855 0x1D185, // 1D185..1D18B; INHERITED 3856 0x1D18C, // 1D18C..1D1A9; COMMON 3857 0x1D1AA, // 1D1AA..1D1AD; INHERITED 3858 0x1D1AE, // 1D1AE..1D1FF; COMMON 3859 0x1D200, // 1D200..1D2FF; GREEK 3860 0x1D300, // 1D300..1F1FF; COMMON 3861 0x1F200, // 1F200..1F200; HIRAGANA 3862 0x1F201, // 1F210..1FFFF; COMMON 3863 0x20000, // 20000..E0000; HAN 3864 0xE0001, // E0001..E00FF; COMMON 3865 0xE0100, // E0100..E01EF; INHERITED 3866 0xE01F0 // E01F0..10FFFF; UNKNOWN 3867 3868 }; 3869 3870 private static final UnicodeScript[] scripts = { 3871 COMMON, 3872 LATIN, 3873 COMMON, 3874 LATIN, 3875 COMMON, 3876 LATIN, 3877 COMMON, 3878 LATIN, 3879 COMMON, 3880 LATIN, 3881 COMMON, 3882 LATIN, 3883 COMMON, 3884 LATIN, 3885 COMMON, 3886 LATIN, 3887 COMMON, 3888 BOPOMOFO, 3889 COMMON, 3890 INHERITED, 3891 GREEK, 3892 COMMON, 3893 GREEK, 3894 COMMON, 3895 GREEK, 3896 COMMON, 3897 GREEK, 3898 COMMON, 3899 GREEK, 3900 COPTIC, 3901 GREEK, 3902 CYRILLIC, 3903 INHERITED, 3904 CYRILLIC, 3905 ARMENIAN, 3906 COMMON, 3907 ARMENIAN, 3908 HEBREW, 3909 ARABIC, 3910 COMMON, 3911 ARABIC, 3912 COMMON, 3913 ARABIC, 3914 COMMON, 3915 ARABIC, 3916 COMMON, 3917 ARABIC, 3918 INHERITED, 3919 ARABIC, 3920 INHERITED, 3921 COMMON, 3922 ARABIC, 3923 INHERITED, 3924 ARABIC, 3925 COMMON, 3926 ARABIC, 3927 SYRIAC, 3928 ARABIC, 3929 THAANA, 3930 NKO, 3931 SAMARITAN, 3932 MANDAIC, 3933 DEVANAGARI, 3934 INHERITED, 3935 DEVANAGARI, 3936 COMMON, 3937 DEVANAGARI, 3938 COMMON, 3939 DEVANAGARI, 3940 BENGALI, 3941 GURMUKHI, 3942 GUJARATI, 3943 ORIYA, 3944 TAMIL, 3945 TELUGU, 3946 KANNADA, 3947 MALAYALAM, 3948 SINHALA, 3949 THAI, 3950 COMMON, 3951 THAI, 3952 LAO, 3953 TIBETAN, 3954 COMMON, 3955 TIBETAN, 3956 MYANMAR, 3957 GEORGIAN, 3958 COMMON, 3959 GEORGIAN, 3960 HANGUL, 3961 ETHIOPIC, 3962 CHEROKEE, 3963 CANADIAN_ABORIGINAL, 3964 OGHAM, 3965 RUNIC, 3966 COMMON, 3967 RUNIC, 3968 TAGALOG, 3969 HANUNOO, 3970 COMMON, 3971 BUHID, 3972 TAGBANWA, 3973 KHMER, 3974 MONGOLIAN, 3975 COMMON, 3976 MONGOLIAN, 3977 COMMON, 3978 MONGOLIAN, 3979 CANADIAN_ABORIGINAL, 3980 LIMBU, 3981 TAI_LE, 3982 NEW_TAI_LUE, 3983 KHMER, 3984 BUGINESE, 3985 TAI_THAM, 3986 BALINESE, 3987 SUNDANESE, 3988 BATAK, 3989 LEPCHA, 3990 OL_CHIKI, 3991 INHERITED, 3992 COMMON, 3993 INHERITED, 3994 COMMON, 3995 INHERITED, 3996 COMMON, 3997 INHERITED, 3998 COMMON, 3999 LATIN, 4000 GREEK, 4001 CYRILLIC, 4002 LATIN, 4003 GREEK, 4004 LATIN, 4005 GREEK, 4006 LATIN, 4007 CYRILLIC, 4008 LATIN, 4009 GREEK, 4010 INHERITED, 4011 LATIN, 4012 GREEK, 4013 COMMON, 4014 INHERITED, 4015 COMMON, 4016 LATIN, 4017 COMMON, 4018 LATIN, 4019 COMMON, 4020 LATIN, 4021 COMMON, 4022 INHERITED, 4023 COMMON, 4024 GREEK, 4025 COMMON, 4026 LATIN, 4027 COMMON, 4028 LATIN, 4029 COMMON, 4030 LATIN, 4031 COMMON, 4032 LATIN, 4033 COMMON, 4034 BRAILLE, 4035 COMMON, 4036 GLAGOLITIC, 4037 LATIN, 4038 COPTIC, 4039 GEORGIAN, 4040 TIFINAGH, 4041 ETHIOPIC, 4042 CYRILLIC, 4043 COMMON, 4044 HAN, 4045 COMMON, 4046 HAN, 4047 COMMON, 4048 HAN, 4049 COMMON, 4050 HAN, 4051 INHERITED, 4052 HANGUL, 4053 COMMON, 4054 HAN, 4055 COMMON, 4056 HIRAGANA, 4057 INHERITED, 4058 COMMON, 4059 HIRAGANA, 4060 COMMON, 4061 KATAKANA, 4062 COMMON, 4063 KATAKANA, 4064 BOPOMOFO, 4065 HANGUL, 4066 COMMON, 4067 BOPOMOFO, 4068 COMMON, 4069 KATAKANA, 4070 HANGUL, 4071 COMMON, 4072 HANGUL, 4073 COMMON, 4074 KATAKANA, 4075 COMMON, 4076 HAN, 4077 COMMON, 4078 HAN, 4079 YI, 4080 LISU, 4081 VAI, 4082 CYRILLIC, 4083 BAMUM, 4084 COMMON, 4085 LATIN, 4086 COMMON, 4087 LATIN, 4088 SYLOTI_NAGRI, 4089 COMMON, 4090 PHAGS_PA, 4091 SAURASHTRA, 4092 DEVANAGARI, 4093 KAYAH_LI, 4094 REJANG, 4095 HANGUL, 4096 JAVANESE, 4097 CHAM, 4098 MYANMAR, 4099 TAI_VIET, 4100 ETHIOPIC, 4101 MEETEI_MAYEK, 4102 HANGUL, 4103 UNKNOWN, 4104 HAN, 4105 LATIN, 4106 ARMENIAN, 4107 HEBREW, 4108 ARABIC, 4109 COMMON, 4110 ARABIC, 4111 COMMON, 4112 INHERITED, 4113 COMMON, 4114 INHERITED, 4115 COMMON, 4116 ARABIC, 4117 COMMON, 4118 LATIN, 4119 COMMON, 4120 LATIN, 4121 COMMON, 4122 KATAKANA, 4123 COMMON, 4124 KATAKANA, 4125 COMMON, 4126 HANGUL, 4127 COMMON, 4128 LINEAR_B, 4129 COMMON, 4130 GREEK, 4131 COMMON, 4132 INHERITED, 4133 LYCIAN, 4134 CARIAN, 4135 OLD_ITALIC, 4136 GOTHIC, 4137 UGARITIC, 4138 OLD_PERSIAN, 4139 DESERET, 4140 SHAVIAN, 4141 OSMANYA, 4142 CYPRIOT, 4143 IMPERIAL_ARAMAIC, 4144 PHOENICIAN, 4145 LYDIAN, 4146 KHAROSHTHI, 4147 OLD_SOUTH_ARABIAN, 4148 AVESTAN, 4149 INSCRIPTIONAL_PARTHIAN, 4150 INSCRIPTIONAL_PAHLAVI, 4151 OLD_TURKIC, 4152 ARABIC, 4153 BRAHMI, 4154 KAITHI, 4155 CUNEIFORM, 4156 EGYPTIAN_HIEROGLYPHS, 4157 BAMUM, 4158 KATAKANA, 4159 HIRAGANA, 4160 COMMON, 4161 INHERITED, 4162 COMMON, 4163 INHERITED, 4164 COMMON, 4165 INHERITED, 4166 COMMON, 4167 INHERITED, 4168 COMMON, 4169 GREEK, 4170 COMMON, 4171 HIRAGANA, 4172 COMMON, 4173 HAN, 4174 COMMON, 4175 INHERITED, 4176 UNKNOWN 4177 }; 4178 4179 private static HashMap<String, Character.UnicodeScript> aliases; 4180 static { 4181 aliases = new HashMap<>(128); 4182 aliases.put("ARAB", ARABIC); 4183 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4184 aliases.put("ARMN", ARMENIAN); 4185 aliases.put("AVST", AVESTAN); 4186 aliases.put("BALI", BALINESE); 4187 aliases.put("BAMU", BAMUM); 4188 aliases.put("BENG", BENGALI); 4189 aliases.put("BOPO", BOPOMOFO); 4190 aliases.put("BRAI", BRAILLE); 4191 aliases.put("BUGI", BUGINESE); 4192 aliases.put("BUHD", BUHID); 4193 aliases.put("CANS", CANADIAN_ABORIGINAL); 4194 aliases.put("CARI", CARIAN); 4195 aliases.put("CHAM", CHAM); 4196 aliases.put("CHER", CHEROKEE); 4197 aliases.put("COPT", COPTIC); 4198 aliases.put("CPRT", CYPRIOT); 4199 aliases.put("CYRL", CYRILLIC); 4200 aliases.put("DEVA", DEVANAGARI); 4201 aliases.put("DSRT", DESERET); 4202 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4203 aliases.put("ETHI", ETHIOPIC); 4204 aliases.put("GEOR", GEORGIAN); 4205 aliases.put("GLAG", GLAGOLITIC); 4206 aliases.put("GOTH", GOTHIC); 4207 aliases.put("GREK", GREEK); 4208 aliases.put("GUJR", GUJARATI); 4209 aliases.put("GURU", GURMUKHI); 4210 aliases.put("HANG", HANGUL); 4211 aliases.put("HANI", HAN); 4212 aliases.put("HANO", HANUNOO); 4213 aliases.put("HEBR", HEBREW); 4214 aliases.put("HIRA", HIRAGANA); 4215 // it appears we don't have the KATAKANA_OR_HIRAGANA 4216 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4217 aliases.put("ITAL", OLD_ITALIC); 4218 aliases.put("JAVA", JAVANESE); 4219 aliases.put("KALI", KAYAH_LI); 4220 aliases.put("KANA", KATAKANA); 4221 aliases.put("KHAR", KHAROSHTHI); 4222 aliases.put("KHMR", KHMER); 4223 aliases.put("KNDA", KANNADA); 4224 aliases.put("KTHI", KAITHI); 4225 aliases.put("LANA", TAI_THAM); 4226 aliases.put("LAOO", LAO); 4227 aliases.put("LATN", LATIN); 4228 aliases.put("LEPC", LEPCHA); 4229 aliases.put("LIMB", LIMBU); 4230 aliases.put("LINB", LINEAR_B); 4231 aliases.put("LISU", LISU); 4232 aliases.put("LYCI", LYCIAN); 4233 aliases.put("LYDI", LYDIAN); 4234 aliases.put("MLYM", MALAYALAM); 4235 aliases.put("MONG", MONGOLIAN); 4236 aliases.put("MTEI", MEETEI_MAYEK); 4237 aliases.put("MYMR", MYANMAR); 4238 aliases.put("NKOO", NKO); 4239 aliases.put("OGAM", OGHAM); 4240 aliases.put("OLCK", OL_CHIKI); 4241 aliases.put("ORKH", OLD_TURKIC); 4242 aliases.put("ORYA", ORIYA); 4243 aliases.put("OSMA", OSMANYA); 4244 aliases.put("PHAG", PHAGS_PA); 4245 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4246 aliases.put("PHNX", PHOENICIAN); 4247 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4248 aliases.put("RJNG", REJANG); 4249 aliases.put("RUNR", RUNIC); 4250 aliases.put("SAMR", SAMARITAN); 4251 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4252 aliases.put("SAUR", SAURASHTRA); 4253 aliases.put("SHAW", SHAVIAN); 4254 aliases.put("SINH", SINHALA); 4255 aliases.put("SUND", SUNDANESE); 4256 aliases.put("SYLO", SYLOTI_NAGRI); 4257 aliases.put("SYRC", SYRIAC); 4258 aliases.put("TAGB", TAGBANWA); 4259 aliases.put("TALE", TAI_LE); 4260 aliases.put("TALU", NEW_TAI_LUE); 4261 aliases.put("TAML", TAMIL); 4262 aliases.put("TAVT", TAI_VIET); 4263 aliases.put("TELU", TELUGU); 4264 aliases.put("TFNG", TIFINAGH); 4265 aliases.put("TGLG", TAGALOG); 4266 aliases.put("THAA", THAANA); 4267 aliases.put("THAI", THAI); 4268 aliases.put("TIBT", TIBETAN); 4269 aliases.put("UGAR", UGARITIC); 4270 aliases.put("VAII", VAI); 4271 aliases.put("XPEO", OLD_PERSIAN); 4272 aliases.put("XSUX", CUNEIFORM); 4273 aliases.put("YIII", YI); 4274 aliases.put("ZINH", INHERITED); 4275 aliases.put("ZYYY", COMMON); 4276 aliases.put("ZZZZ", UNKNOWN); 4277 } 4278 4279 /** 4280 * Returns the enum constant representing the Unicode script of which 4281 * the given character (Unicode code point) is assigned to. 4282 * 4283 * @param codePoint the character (Unicode code point) in question. 4284 * @return The <code>UnicodeScript</code> constant representing the 4285 * Unicode script of which this character is assigned to. 4286 * 4287 * @exception IllegalArgumentException if the specified 4288 * <code>codePoint</code> is an invalid Unicode code point. 4289 * @see Character#isValidCodePoint(int) 4290 * 4291 */ 4292 public static UnicodeScript of(int codePoint) { 4293 if (!isValidCodePoint(codePoint)) 4294 throw new IllegalArgumentException(); 4295 int type = getType(codePoint); 4296 // leave SURROGATE and PRIVATE_USE for table lookup 4297 if (type == UNASSIGNED) 4298 return UNKNOWN; 4299 int index = Arrays.binarySearch(scriptStarts, codePoint); 4300 if (index < 0) 4301 index = -index - 2; 4302 return scripts[index]; 4303 } 4304 4305 /** 4306 * Returns the UnicodeScript constant with the given Unicode script 4307 * name or the script name alias. Script names and their aliases are 4308 * determined by The Unicode Standard. The files Scripts<version>.txt 4309 * and PropertyValueAliases<version>.txt define script names 4310 * and the script name aliases for a particular version of the 4311 * standard. The {@link Character} class specifies the version of 4312 * the standard that it supports. 4313 * <p> 4314 * Character case is ignored for all of the valid script names. 4315 * The en_US locale's case mapping rules are used to provide 4316 * case-insensitive string comparisons for script name validation. 4317 * <p> 4318 * 4319 * @param scriptName A <code>UnicodeScript</code> name. 4320 * @return The <code>UnicodeScript</code> constant identified 4321 * by <code>scriptName</code> 4322 * @throws IllegalArgumentException if <code>scriptName</code> is an 4323 * invalid name 4324 * @throws NullPointerException if <code>scriptName</code> is null 4325 */ 4326 public static final UnicodeScript forName(String scriptName) { 4327 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4328 //.replace(' ', '_')); 4329 UnicodeScript sc = aliases.get(scriptName); 4330 if (sc != null) 4331 return sc; 4332 return valueOf(scriptName); 4333 } 4334 } 4335 4336 /** 4337 * The value of the <code>Character</code>. 4338 * 4339 * @serial 4340 */ 4341 private final char value; 4342 4343 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4344 private static final long serialVersionUID = 3786198910865385080L; 4345 4346 /** 4347 * Constructs a newly allocated <code>Character</code> object that 4348 * represents the specified <code>char</code> value. 4349 * 4350 * @param value the value to be represented by the 4351 * <code>Character</code> object. 4352 */ 4353 public Character(char value) { 4354 this.value = value; 4355 } 4356 4357 private static class CharacterCache { 4358 private CharacterCache(){} 4359 4360 static final Character cache[] = new Character[127 + 1]; 4361 4362 static { 4363 for (int i = 0; i < cache.length; i++) 4364 cache[i] = new Character((char)i); 4365 } 4366 } 4367 4368 /** 4369 * Returns a <tt>Character</tt> instance representing the specified 4370 * <tt>char</tt> value. 4371 * If a new <tt>Character</tt> instance is not required, this method 4372 * should generally be used in preference to the constructor 4373 * {@link #Character(char)}, as this method is likely to yield 4374 * significantly better space and time performance by caching 4375 * frequently requested values. 4376 * 4377 * This method will always cache values in the range {@code 4378 * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may 4379 * cache other values outside of this range. 4380 * 4381 * @param c a char value. 4382 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4383 * @since 1.5 4384 */ 4385 public static Character valueOf(char c) { 4386 if (c <= 127) { // must cache 4387 return CharacterCache.cache[(int)c]; 4388 } 4389 return new Character(c); 4390 } 4391 4392 /** 4393 * Returns the value of this <code>Character</code> object. 4394 * @return the primitive <code>char</code> value represented by 4395 * this object. 4396 */ 4397 public char charValue() { 4398 return value; 4399 } 4400 4401 /** 4402 * Returns a hash code for this {@code Character}; equal to the result 4403 * of invoking {@code charValue()}. 4404 * 4405 * @return a hash code value for this {@code Character} 4406 */ 4407 public int hashCode() { 4408 return (int)value; 4409 } 4410 4411 /** 4412 * Compares this object against the specified object. 4413 * The result is <code>true</code> if and only if the argument is not 4414 * <code>null</code> and is a <code>Character</code> object that 4415 * represents the same <code>char</code> value as this object. 4416 * 4417 * @param obj the object to compare with. 4418 * @return <code>true</code> if the objects are the same; 4419 * <code>false</code> otherwise. 4420 */ 4421 public boolean equals(Object obj) { 4422 if (obj instanceof Character) { 4423 return value == ((Character)obj).charValue(); 4424 } 4425 return false; 4426 } 4427 4428 /** 4429 * Returns a <code>String</code> object representing this 4430 * <code>Character</code>'s value. The result is a string of 4431 * length 1 whose sole component is the primitive 4432 * <code>char</code> value represented by this 4433 * <code>Character</code> object. 4434 * 4435 * @return a string representation of this object. 4436 */ 4437 public String toString() { 4438 char buf[] = {value}; 4439 return String.valueOf(buf); 4440 } 4441 4442 /** 4443 * Returns a <code>String</code> object representing the 4444 * specified <code>char</code>. The result is a string of length 4445 * 1 consisting solely of the specified <code>char</code>. 4446 * 4447 * @param c the <code>char</code> to be converted 4448 * @return the string representation of the specified <code>char</code> 4449 * @since 1.4 4450 */ 4451 public static String toString(char c) { 4452 return String.valueOf(c); 4453 } 4454 4455 /** 4456 * Determines whether the specified code point is a valid 4457 * <a href="http://www.unicode.org/glossary/#code_point"> 4458 * Unicode code point value</a>. 4459 * 4460 * @param codePoint the Unicode code point to be tested 4461 * @return {@code true} if the specified code point value is between 4462 * {@link #MIN_CODE_POINT} and 4463 * {@link #MAX_CODE_POINT} inclusive; 4464 * {@code false} otherwise. 4465 * @since 1.5 4466 */ 4467 public static boolean isValidCodePoint(int codePoint) { 4468 // Optimized form of: 4469 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4470 int plane = codePoint >>> 16; 4471 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4472 } 4473 4474 /** 4475 * Determines whether the specified character (Unicode code point) 4476 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4477 * Such code points can be represented using a single {@code char}. 4478 * 4479 * @param codePoint the character (Unicode code point) to be tested 4480 * @return {@code true} if the specified code point is between 4481 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4482 * {@code false} otherwise. 4483 * @since 1.7 4484 */ 4485 public static boolean isBmpCodePoint(int codePoint) { 4486 return codePoint >>> 16 == 0; 4487 // Optimized form of: 4488 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4489 // We consistently use logical shift (>>>) to facilitate 4490 // additional runtime optimizations. 4491 } 4492 4493 /** 4494 * Determines whether the specified character (Unicode code point) 4495 * is in the <a href="#supplementary">supplementary character</a> range. 4496 * 4497 * @param codePoint the character (Unicode code point) to be tested 4498 * @return {@code true} if the specified code point is between 4499 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4500 * {@link #MAX_CODE_POINT} inclusive; 4501 * {@code false} otherwise. 4502 * @since 1.5 4503 */ 4504 public static boolean isSupplementaryCodePoint(int codePoint) { 4505 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4506 && codePoint < MAX_CODE_POINT + 1; 4507 } 4508 4509 /** 4510 * Determines if the given {@code char} value is a 4511 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4512 * Unicode high-surrogate code unit</a> 4513 * (also known as <i>leading-surrogate code unit</i>). 4514 * 4515 * <p>Such values do not represent characters by themselves, 4516 * but are used in the representation of 4517 * <a href="#supplementary">supplementary characters</a> 4518 * in the UTF-16 encoding. 4519 * 4520 * @param ch the {@code char} value to be tested. 4521 * @return {@code true} if the {@code char} value is between 4522 * {@link #MIN_HIGH_SURROGATE} and 4523 * {@link #MAX_HIGH_SURROGATE} inclusive; 4524 * {@code false} otherwise. 4525 * @see Character#isLowSurrogate(char) 4526 * @see Character.UnicodeBlock#of(int) 4527 * @since 1.5 4528 */ 4529 public static boolean isHighSurrogate(char ch) { 4530 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4531 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4532 } 4533 4534 /** 4535 * Determines if the given {@code char} value is a 4536 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4537 * Unicode low-surrogate code unit</a> 4538 * (also known as <i>trailing-surrogate code unit</i>). 4539 * 4540 * <p>Such values do not represent characters by themselves, 4541 * but are used in the representation of 4542 * <a href="#supplementary">supplementary characters</a> 4543 * in the UTF-16 encoding. 4544 * 4545 * @param ch the {@code char} value to be tested. 4546 * @return {@code true} if the {@code char} value is between 4547 * {@link #MIN_LOW_SURROGATE} and 4548 * {@link #MAX_LOW_SURROGATE} inclusive; 4549 * {@code false} otherwise. 4550 * @see Character#isHighSurrogate(char) 4551 * @since 1.5 4552 */ 4553 public static boolean isLowSurrogate(char ch) { 4554 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4555 } 4556 4557 /** 4558 * Determines if the given {@code char} value is a Unicode 4559 * <i>surrogate code unit</i>. 4560 * 4561 * <p>Such values do not represent characters by themselves, 4562 * but are used in the representation of 4563 * <a href="#supplementary">supplementary characters</a> 4564 * in the UTF-16 encoding. 4565 * 4566 * <p>A char value is a surrogate code unit if and only if it is either 4567 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4568 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4569 * 4570 * @param ch the {@code char} value to be tested. 4571 * @return {@code true} if the {@code char} value is between 4572 * {@link #MIN_SURROGATE} and 4573 * {@link #MAX_SURROGATE} inclusive; 4574 * {@code false} otherwise. 4575 * @since 1.7 4576 */ 4577 public static boolean isSurrogate(char ch) { 4578 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4579 } 4580 4581 /** 4582 * Determines whether the specified pair of <code>char</code> 4583 * values is a valid 4584 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4585 * Unicode surrogate pair</a>. 4586 4587 * <p>This method is equivalent to the expression: 4588 * <blockquote><pre> 4589 * isHighSurrogate(high) && isLowSurrogate(low) 4590 * </pre></blockquote> 4591 * 4592 * @param high the high-surrogate code value to be tested 4593 * @param low the low-surrogate code value to be tested 4594 * @return <code>true</code> if the specified high and 4595 * low-surrogate code values represent a valid surrogate pair; 4596 * <code>false</code> otherwise. 4597 * @since 1.5 4598 */ 4599 public static boolean isSurrogatePair(char high, char low) { 4600 return isHighSurrogate(high) && isLowSurrogate(low); 4601 } 4602 4603 /** 4604 * Determines the number of <code>char</code> values needed to 4605 * represent the specified character (Unicode code point). If the 4606 * specified character is equal to or greater than 0x10000, then 4607 * the method returns 2. Otherwise, the method returns 1. 4608 * 4609 * <p>This method doesn't validate the specified character to be a 4610 * valid Unicode code point. The caller must validate the 4611 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4612 * if necessary. 4613 * 4614 * @param codePoint the character (Unicode code point) to be tested. 4615 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4616 * @see Character#isSupplementaryCodePoint(int) 4617 * @since 1.5 4618 */ 4619 public static int charCount(int codePoint) { 4620 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4621 } 4622 4623 /** 4624 * Converts the specified surrogate pair to its supplementary code 4625 * point value. This method does not validate the specified 4626 * surrogate pair. The caller must validate it using {@link 4627 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4628 * 4629 * @param high the high-surrogate code unit 4630 * @param low the low-surrogate code unit 4631 * @return the supplementary code point composed from the 4632 * specified surrogate pair. 4633 * @since 1.5 4634 */ 4635 public static int toCodePoint(char high, char low) { 4636 // Optimized form of: 4637 // return ((high - MIN_HIGH_SURROGATE) << 10) 4638 // + (low - MIN_LOW_SURROGATE) 4639 // + MIN_SUPPLEMENTARY_CODE_POINT; 4640 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4641 - (MIN_HIGH_SURROGATE << 10) 4642 - MIN_LOW_SURROGATE); 4643 } 4644 4645 /** 4646 * Returns the code point at the given index of the 4647 * <code>CharSequence</code>. If the <code>char</code> value at 4648 * the given index in the <code>CharSequence</code> is in the 4649 * high-surrogate range, the following index is less than the 4650 * length of the <code>CharSequence</code>, and the 4651 * <code>char</code> value at the following index is in the 4652 * low-surrogate range, then the supplementary code point 4653 * corresponding to this surrogate pair is returned. Otherwise, 4654 * the <code>char</code> value at the given index is returned. 4655 * 4656 * @param seq a sequence of <code>char</code> values (Unicode code 4657 * units) 4658 * @param index the index to the <code>char</code> values (Unicode 4659 * code units) in <code>seq</code> to be converted 4660 * @return the Unicode code point at the given index 4661 * @exception NullPointerException if <code>seq</code> is null. 4662 * @exception IndexOutOfBoundsException if the value 4663 * <code>index</code> is negative or not less than 4664 * {@link CharSequence#length() seq.length()}. 4665 * @since 1.5 4666 */ 4667 public static int codePointAt(CharSequence seq, int index) { 4668 char c1 = seq.charAt(index++); 4669 if (isHighSurrogate(c1)) { 4670 if (index < seq.length()) { 4671 char c2 = seq.charAt(index); 4672 if (isLowSurrogate(c2)) { 4673 return toCodePoint(c1, c2); 4674 } 4675 } 4676 } 4677 return c1; 4678 } 4679 4680 /** 4681 * Returns the code point at the given index of the 4682 * <code>char</code> array. If the <code>char</code> value at 4683 * the given index in the <code>char</code> array is in the 4684 * high-surrogate range, the following index is less than the 4685 * length of the <code>char</code> array, and the 4686 * <code>char</code> value at the following index is in the 4687 * low-surrogate range, then the supplementary code point 4688 * corresponding to this surrogate pair is returned. Otherwise, 4689 * the <code>char</code> value at the given index is returned. 4690 * 4691 * @param a the <code>char</code> array 4692 * @param index the index to the <code>char</code> values (Unicode 4693 * code units) in the <code>char</code> array to be converted 4694 * @return the Unicode code point at the given index 4695 * @exception NullPointerException if <code>a</code> is null. 4696 * @exception IndexOutOfBoundsException if the value 4697 * <code>index</code> is negative or not less than 4698 * the length of the <code>char</code> array. 4699 * @since 1.5 4700 */ 4701 public static int codePointAt(char[] a, int index) { 4702 return codePointAtImpl(a, index, a.length); 4703 } 4704 4705 /** 4706 * Returns the code point at the given index of the 4707 * <code>char</code> array, where only array elements with 4708 * <code>index</code> less than <code>limit</code> can be used. If 4709 * the <code>char</code> value at the given index in the 4710 * <code>char</code> array is in the high-surrogate range, the 4711 * following index is less than the <code>limit</code>, and the 4712 * <code>char</code> value at the following index is in the 4713 * low-surrogate range, then the supplementary code point 4714 * corresponding to this surrogate pair is returned. Otherwise, 4715 * the <code>char</code> value at the given index is returned. 4716 * 4717 * @param a the <code>char</code> array 4718 * @param index the index to the <code>char</code> values (Unicode 4719 * code units) in the <code>char</code> array to be converted 4720 * @param limit the index after the last array element that can be used in the 4721 * <code>char</code> array 4722 * @return the Unicode code point at the given index 4723 * @exception NullPointerException if <code>a</code> is null. 4724 * @exception IndexOutOfBoundsException if the <code>index</code> 4725 * argument is negative or not less than the <code>limit</code> 4726 * argument, or if the <code>limit</code> argument is negative or 4727 * greater than the length of the <code>char</code> array. 4728 * @since 1.5 4729 */ 4730 public static int codePointAt(char[] a, int index, int limit) { 4731 if (index >= limit || limit < 0 || limit > a.length) { 4732 throw new IndexOutOfBoundsException(); 4733 } 4734 return codePointAtImpl(a, index, limit); 4735 } 4736 4737 // throws ArrayIndexOutofBoundsException if index out of bounds 4738 static int codePointAtImpl(char[] a, int index, int limit) { 4739 char c1 = a[index++]; 4740 if (isHighSurrogate(c1)) { 4741 if (index < limit) { 4742 char c2 = a[index]; 4743 if (isLowSurrogate(c2)) { 4744 return toCodePoint(c1, c2); 4745 } 4746 } 4747 } 4748 return c1; 4749 } 4750 4751 /** 4752 * Returns the code point preceding the given index of the 4753 * <code>CharSequence</code>. If the <code>char</code> value at 4754 * <code>(index - 1)</code> in the <code>CharSequence</code> is in 4755 * the low-surrogate range, <code>(index - 2)</code> is not 4756 * negative, and the <code>char</code> value at <code>(index - 4757 * 2)</code> in the <code>CharSequence</code> is in the 4758 * high-surrogate range, then the supplementary code point 4759 * corresponding to this surrogate pair is returned. Otherwise, 4760 * the <code>char</code> value at <code>(index - 1)</code> is 4761 * returned. 4762 * 4763 * @param seq the <code>CharSequence</code> instance 4764 * @param index the index following the code point that should be returned 4765 * @return the Unicode code point value before the given index. 4766 * @exception NullPointerException if <code>seq</code> is null. 4767 * @exception IndexOutOfBoundsException if the <code>index</code> 4768 * argument is less than 1 or greater than {@link 4769 * CharSequence#length() seq.length()}. 4770 * @since 1.5 4771 */ 4772 public static int codePointBefore(CharSequence seq, int index) { 4773 char c2 = seq.charAt(--index); 4774 if (isLowSurrogate(c2)) { 4775 if (index > 0) { 4776 char c1 = seq.charAt(--index); 4777 if (isHighSurrogate(c1)) { 4778 return toCodePoint(c1, c2); 4779 } 4780 } 4781 } 4782 return c2; 4783 } 4784 4785 /** 4786 * Returns the code point preceding the given index of the 4787 * <code>char</code> array. If the <code>char</code> value at 4788 * <code>(index - 1)</code> in the <code>char</code> array is in 4789 * the low-surrogate range, <code>(index - 2)</code> is not 4790 * negative, and the <code>char</code> value at <code>(index - 4791 * 2)</code> in the <code>char</code> array is in the 4792 * high-surrogate range, then the supplementary code point 4793 * corresponding to this surrogate pair is returned. Otherwise, 4794 * the <code>char</code> value at <code>(index - 1)</code> is 4795 * returned. 4796 * 4797 * @param a the <code>char</code> array 4798 * @param index the index following the code point that should be returned 4799 * @return the Unicode code point value before the given index. 4800 * @exception NullPointerException if <code>a</code> is null. 4801 * @exception IndexOutOfBoundsException if the <code>index</code> 4802 * argument is less than 1 or greater than the length of the 4803 * <code>char</code> array 4804 * @since 1.5 4805 */ 4806 public static int codePointBefore(char[] a, int index) { 4807 return codePointBeforeImpl(a, index, 0); 4808 } 4809 4810 /** 4811 * Returns the code point preceding the given index of the 4812 * <code>char</code> array, where only array elements with 4813 * <code>index</code> greater than or equal to <code>start</code> 4814 * can be used. If the <code>char</code> value at <code>(index - 4815 * 1)</code> in the <code>char</code> array is in the 4816 * low-surrogate range, <code>(index - 2)</code> is not less than 4817 * <code>start</code>, and the <code>char</code> value at 4818 * <code>(index - 2)</code> in the <code>char</code> array is in 4819 * the high-surrogate range, then the supplementary code point 4820 * corresponding to this surrogate pair is returned. Otherwise, 4821 * the <code>char</code> value at <code>(index - 1)</code> is 4822 * returned. 4823 * 4824 * @param a the <code>char</code> array 4825 * @param index the index following the code point that should be returned 4826 * @param start the index of the first array element in the 4827 * <code>char</code> array 4828 * @return the Unicode code point value before the given index. 4829 * @exception NullPointerException if <code>a</code> is null. 4830 * @exception IndexOutOfBoundsException if the <code>index</code> 4831 * argument is not greater than the <code>start</code> argument or 4832 * is greater than the length of the <code>char</code> array, or 4833 * if the <code>start</code> argument is negative or not less than 4834 * the length of the <code>char</code> array. 4835 * @since 1.5 4836 */ 4837 public static int codePointBefore(char[] a, int index, int start) { 4838 if (index <= start || start < 0 || start >= a.length) { 4839 throw new IndexOutOfBoundsException(); 4840 } 4841 return codePointBeforeImpl(a, index, start); 4842 } 4843 4844 // throws ArrayIndexOutofBoundsException if index-1 out of bounds 4845 static int codePointBeforeImpl(char[] a, int index, int start) { 4846 char c2 = a[--index]; 4847 if (isLowSurrogate(c2)) { 4848 if (index > start) { 4849 char c1 = a[--index]; 4850 if (isHighSurrogate(c1)) { 4851 return toCodePoint(c1, c2); 4852 } 4853 } 4854 } 4855 return c2; 4856 } 4857 4858 /** 4859 * Returns the leading surrogate (a 4860 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4861 * high surrogate code unit</a>) of the 4862 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4863 * surrogate pair</a> 4864 * representing the specified supplementary character (Unicode 4865 * code point) in the UTF-16 encoding. If the specified character 4866 * is not a 4867 * <a href="Character.html#supplementary">supplementary character</a>, 4868 * an unspecified {@code char} is returned. 4869 * 4870 * <p>If 4871 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4872 * is {@code true}, then 4873 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 4874 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 4875 * are also always {@code true}. 4876 * 4877 * @param codePoint a supplementary character (Unicode code point) 4878 * @return the leading surrogate code unit used to represent the 4879 * character in the UTF-16 encoding 4880 * @since 1.7 4881 */ 4882 public static char highSurrogate(int codePoint) { 4883 return (char) ((codePoint >>> 10) 4884 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 4885 } 4886 4887 /** 4888 * Returns the trailing surrogate (a 4889 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4890 * low surrogate code unit</a>) of the 4891 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4892 * surrogate pair</a> 4893 * representing the specified supplementary character (Unicode 4894 * code point) in the UTF-16 encoding. If the specified character 4895 * is not a 4896 * <a href="Character.html#supplementary">supplementary character</a>, 4897 * an unspecified {@code char} is returned. 4898 * 4899 * <p>If 4900 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4901 * is {@code true}, then 4902 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 4903 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 4904 * are also always {@code true}. 4905 * 4906 * @param codePoint a supplementary character (Unicode code point) 4907 * @return the trailing surrogate code unit used to represent the 4908 * character in the UTF-16 encoding 4909 * @since 1.7 4910 */ 4911 public static char lowSurrogate(int codePoint) { 4912 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 4913 } 4914 4915 /** 4916 * Converts the specified character (Unicode code point) to its 4917 * UTF-16 representation. If the specified code point is a BMP 4918 * (Basic Multilingual Plane or Plane 0) value, the same value is 4919 * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the 4920 * specified code point is a supplementary character, its 4921 * surrogate values are stored in <code>dst[dstIndex]</code> 4922 * (high-surrogate) and <code>dst[dstIndex+1]</code> 4923 * (low-surrogate), and 2 is returned. 4924 * 4925 * @param codePoint the character (Unicode code point) to be converted. 4926 * @param dst an array of <code>char</code> in which the 4927 * <code>codePoint</code>'s UTF-16 value is stored. 4928 * @param dstIndex the start index into the <code>dst</code> 4929 * array where the converted value is stored. 4930 * @return 1 if the code point is a BMP code point, 2 if the 4931 * code point is a supplementary code point. 4932 * @exception IllegalArgumentException if the specified 4933 * <code>codePoint</code> is not a valid Unicode code point. 4934 * @exception NullPointerException if the specified <code>dst</code> is null. 4935 * @exception IndexOutOfBoundsException if <code>dstIndex</code> 4936 * is negative or not less than <code>dst.length</code>, or if 4937 * <code>dst</code> at <code>dstIndex</code> doesn't have enough 4938 * array element(s) to store the resulting <code>char</code> 4939 * value(s). (If <code>dstIndex</code> is equal to 4940 * <code>dst.length-1</code> and the specified 4941 * <code>codePoint</code> is a supplementary character, the 4942 * high-surrogate value is not stored in 4943 * <code>dst[dstIndex]</code>.) 4944 * @since 1.5 4945 */ 4946 public static int toChars(int codePoint, char[] dst, int dstIndex) { 4947 if (isBmpCodePoint(codePoint)) { 4948 dst[dstIndex] = (char) codePoint; 4949 return 1; 4950 } else if (isValidCodePoint(codePoint)) { 4951 toSurrogates(codePoint, dst, dstIndex); 4952 return 2; 4953 } else { 4954 throw new IllegalArgumentException(); 4955 } 4956 } 4957 4958 /** 4959 * Converts the specified character (Unicode code point) to its 4960 * UTF-16 representation stored in a <code>char</code> array. If 4961 * the specified code point is a BMP (Basic Multilingual Plane or 4962 * Plane 0) value, the resulting <code>char</code> array has 4963 * the same value as <code>codePoint</code>. If the specified code 4964 * point is a supplementary code point, the resulting 4965 * <code>char</code> array has the corresponding surrogate pair. 4966 * 4967 * @param codePoint a Unicode code point 4968 * @return a <code>char</code> array having 4969 * <code>codePoint</code>'s UTF-16 representation. 4970 * @exception IllegalArgumentException if the specified 4971 * <code>codePoint</code> is not a valid Unicode code point. 4972 * @since 1.5 4973 */ 4974 public static char[] toChars(int codePoint) { 4975 if (isBmpCodePoint(codePoint)) { 4976 return new char[] { (char) codePoint }; 4977 } else if (isValidCodePoint(codePoint)) { 4978 char[] result = new char[2]; 4979 toSurrogates(codePoint, result, 0); 4980 return result; 4981 } else { 4982 throw new IllegalArgumentException(); 4983 } 4984 } 4985 4986 static void toSurrogates(int codePoint, char[] dst, int index) { 4987 // We write elements "backwards" to guarantee all-or-nothing 4988 dst[index+1] = lowSurrogate(codePoint); 4989 dst[index] = highSurrogate(codePoint); 4990 } 4991 4992 /** 4993 * Returns the number of Unicode code points in the text range of 4994 * the specified char sequence. The text range begins at the 4995 * specified <code>beginIndex</code> and extends to the 4996 * <code>char</code> at index <code>endIndex - 1</code>. Thus the 4997 * length (in <code>char</code>s) of the text range is 4998 * <code>endIndex-beginIndex</code>. Unpaired surrogates within 4999 * the text range count as one code point each. 5000 * 5001 * @param seq the char sequence 5002 * @param beginIndex the index to the first <code>char</code> of 5003 * the text range. 5004 * @param endIndex the index after the last <code>char</code> of 5005 * the text range. 5006 * @return the number of Unicode code points in the specified text 5007 * range 5008 * @exception NullPointerException if <code>seq</code> is null. 5009 * @exception IndexOutOfBoundsException if the 5010 * <code>beginIndex</code> is negative, or <code>endIndex</code> 5011 * is larger than the length of the given sequence, or 5012 * <code>beginIndex</code> is larger than <code>endIndex</code>. 5013 * @since 1.5 5014 */ 5015 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5016 int length = seq.length(); 5017 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5018 throw new IndexOutOfBoundsException(); 5019 } 5020 int n = endIndex - beginIndex; 5021 for (int i = beginIndex; i < endIndex; ) { 5022 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5023 isLowSurrogate(seq.charAt(i))) { 5024 n--; 5025 i++; 5026 } 5027 } 5028 return n; 5029 } 5030 5031 /** 5032 * Returns the number of Unicode code points in a subarray of the 5033 * <code>char</code> array argument. The <code>offset</code> 5034 * argument is the index of the first <code>char</code> of the 5035 * subarray and the <code>count</code> argument specifies the 5036 * length of the subarray in <code>char</code>s. Unpaired 5037 * surrogates within the subarray count as one code point each. 5038 * 5039 * @param a the <code>char</code> array 5040 * @param offset the index of the first <code>char</code> in the 5041 * given <code>char</code> array 5042 * @param count the length of the subarray in <code>char</code>s 5043 * @return the number of Unicode code points in the specified subarray 5044 * @exception NullPointerException if <code>a</code> is null. 5045 * @exception IndexOutOfBoundsException if <code>offset</code> or 5046 * <code>count</code> is negative, or if <code>offset + 5047 * count</code> is larger than the length of the given array. 5048 * @since 1.5 5049 */ 5050 public static int codePointCount(char[] a, int offset, int count) { 5051 if (count > a.length - offset || offset < 0 || count < 0) { 5052 throw new IndexOutOfBoundsException(); 5053 } 5054 return codePointCountImpl(a, offset, count); 5055 } 5056 5057 static int codePointCountImpl(char[] a, int offset, int count) { 5058 int endIndex = offset + count; 5059 int n = count; 5060 for (int i = offset; i < endIndex; ) { 5061 if (isHighSurrogate(a[i++]) && i < endIndex && 5062 isLowSurrogate(a[i])) { 5063 n--; 5064 i++; 5065 } 5066 } 5067 return n; 5068 } 5069 5070 /** 5071 * Returns the index within the given char sequence that is offset 5072 * from the given <code>index</code> by <code>codePointOffset</code> 5073 * code points. Unpaired surrogates within the text range given by 5074 * <code>index</code> and <code>codePointOffset</code> count as 5075 * one code point each. 5076 * 5077 * @param seq the char sequence 5078 * @param index the index to be offset 5079 * @param codePointOffset the offset in code points 5080 * @return the index within the char sequence 5081 * @exception NullPointerException if <code>seq</code> is null. 5082 * @exception IndexOutOfBoundsException if <code>index</code> 5083 * is negative or larger then the length of the char sequence, 5084 * or if <code>codePointOffset</code> is positive and the 5085 * subsequence starting with <code>index</code> has fewer than 5086 * <code>codePointOffset</code> code points, or if 5087 * <code>codePointOffset</code> is negative and the subsequence 5088 * before <code>index</code> has fewer than the absolute value 5089 * of <code>codePointOffset</code> code points. 5090 * @since 1.5 5091 */ 5092 public static int offsetByCodePoints(CharSequence seq, int index, 5093 int codePointOffset) { 5094 int length = seq.length(); 5095 if (index < 0 || index > length) { 5096 throw new IndexOutOfBoundsException(); 5097 } 5098 5099 int x = index; 5100 if (codePointOffset >= 0) { 5101 int i; 5102 for (i = 0; x < length && i < codePointOffset; i++) { 5103 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5104 isLowSurrogate(seq.charAt(x))) { 5105 x++; 5106 } 5107 } 5108 if (i < codePointOffset) { 5109 throw new IndexOutOfBoundsException(); 5110 } 5111 } else { 5112 int i; 5113 for (i = codePointOffset; x > 0 && i < 0; i++) { 5114 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5115 isHighSurrogate(seq.charAt(x-1))) { 5116 x--; 5117 } 5118 } 5119 if (i < 0) { 5120 throw new IndexOutOfBoundsException(); 5121 } 5122 } 5123 return x; 5124 } 5125 5126 /** 5127 * Returns the index within the given <code>char</code> subarray 5128 * that is offset from the given <code>index</code> by 5129 * <code>codePointOffset</code> code points. The 5130 * <code>start</code> and <code>count</code> arguments specify a 5131 * subarray of the <code>char</code> array. Unpaired surrogates 5132 * within the text range given by <code>index</code> and 5133 * <code>codePointOffset</code> count as one code point each. 5134 * 5135 * @param a the <code>char</code> array 5136 * @param start the index of the first <code>char</code> of the 5137 * subarray 5138 * @param count the length of the subarray in <code>char</code>s 5139 * @param index the index to be offset 5140 * @param codePointOffset the offset in code points 5141 * @return the index within the subarray 5142 * @exception NullPointerException if <code>a</code> is null. 5143 * @exception IndexOutOfBoundsException 5144 * if <code>start</code> or <code>count</code> is negative, 5145 * or if <code>start + count</code> is larger than the length of 5146 * the given array, 5147 * or if <code>index</code> is less than <code>start</code> or 5148 * larger then <code>start + count</code>, 5149 * or if <code>codePointOffset</code> is positive and the text range 5150 * starting with <code>index</code> and ending with <code>start 5151 * + count - 1</code> has fewer than <code>codePointOffset</code> code 5152 * points, 5153 * or if <code>codePointOffset</code> is negative and the text range 5154 * starting with <code>start</code> and ending with <code>index 5155 * - 1</code> has fewer than the absolute value of 5156 * <code>codePointOffset</code> code points. 5157 * @since 1.5 5158 */ 5159 public static int offsetByCodePoints(char[] a, int start, int count, 5160 int index, int codePointOffset) { 5161 if (count > a.length-start || start < 0 || count < 0 5162 || index < start || index > start+count) { 5163 throw new IndexOutOfBoundsException(); 5164 } 5165 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5166 } 5167 5168 static int offsetByCodePointsImpl(char[]a, int start, int count, 5169 int index, int codePointOffset) { 5170 int x = index; 5171 if (codePointOffset >= 0) { 5172 int limit = start + count; 5173 int i; 5174 for (i = 0; x < limit && i < codePointOffset; i++) { 5175 if (isHighSurrogate(a[x++]) && x < limit && 5176 isLowSurrogate(a[x])) { 5177 x++; 5178 } 5179 } 5180 if (i < codePointOffset) { 5181 throw new IndexOutOfBoundsException(); 5182 } 5183 } else { 5184 int i; 5185 for (i = codePointOffset; x > start && i < 0; i++) { 5186 if (isLowSurrogate(a[--x]) && x > start && 5187 isHighSurrogate(a[x-1])) { 5188 x--; 5189 } 5190 } 5191 if (i < 0) { 5192 throw new IndexOutOfBoundsException(); 5193 } 5194 } 5195 return x; 5196 } 5197 5198 /** 5199 * Determines if the specified character is a lowercase character. 5200 * <p> 5201 * A character is lowercase if its general category type, provided 5202 * by <code>Character.getType(ch)</code>, is 5203 * <code>LOWERCASE_LETTER</code>. 5204 * <p> 5205 * The following are examples of lowercase characters: 5206 * <p><blockquote><pre> 5207 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5208 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5209 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5210 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5211 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5212 * </pre></blockquote> 5213 * <p> Many other Unicode characters are lowercase too. 5214 * 5215 * <p><b>Note:</b> This method cannot handle <a 5216 * href="#supplementary"> supplementary characters</a>. To support 5217 * all Unicode characters, including supplementary characters, use 5218 * the {@link #isLowerCase(int)} method. 5219 * 5220 * @param ch the character to be tested. 5221 * @return <code>true</code> if the character is lowercase; 5222 * <code>false</code> otherwise. 5223 * @see Character#isLowerCase(char) 5224 * @see Character#isTitleCase(char) 5225 * @see Character#toLowerCase(char) 5226 * @see Character#getType(char) 5227 */ 5228 public static boolean isLowerCase(char ch) { 5229 return isLowerCase((int)ch); 5230 } 5231 5232 /** 5233 * Determines if the specified character (Unicode code point) is a 5234 * lowercase character. 5235 * <p> 5236 * A character is lowercase if its general category type, provided 5237 * by {@link Character#getType getType(codePoint)}, is 5238 * <code>LOWERCASE_LETTER</code>. 5239 * <p> 5240 * The following are examples of lowercase characters: 5241 * <p><blockquote><pre> 5242 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5243 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5244 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5245 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5246 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5247 * </pre></blockquote> 5248 * <p> Many other Unicode characters are lowercase too. 5249 * 5250 * @param codePoint the character (Unicode code point) to be tested. 5251 * @return <code>true</code> if the character is lowercase; 5252 * <code>false</code> otherwise. 5253 * @see Character#isLowerCase(int) 5254 * @see Character#isTitleCase(int) 5255 * @see Character#toLowerCase(int) 5256 * @see Character#getType(int) 5257 * @since 1.5 5258 */ 5259 public static boolean isLowerCase(int codePoint) { 5260 return getType(codePoint) == Character.LOWERCASE_LETTER; 5261 } 5262 5263 /** 5264 * Determines if the specified character is an uppercase character. 5265 * <p> 5266 * A character is uppercase if its general category type, provided by 5267 * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>. 5268 * <p> 5269 * The following are examples of uppercase characters: 5270 * <p><blockquote><pre> 5271 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5272 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5273 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5274 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5275 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5276 * </pre></blockquote> 5277 * <p> Many other Unicode characters are uppercase too.<p> 5278 * 5279 * <p><b>Note:</b> This method cannot handle <a 5280 * href="#supplementary"> supplementary characters</a>. To support 5281 * all Unicode characters, including supplementary characters, use 5282 * the {@link #isUpperCase(int)} method. 5283 * 5284 * @param ch the character to be tested. 5285 * @return <code>true</code> if the character is uppercase; 5286 * <code>false</code> otherwise. 5287 * @see Character#isLowerCase(char) 5288 * @see Character#isTitleCase(char) 5289 * @see Character#toUpperCase(char) 5290 * @see Character#getType(char) 5291 * @since 1.0 5292 */ 5293 public static boolean isUpperCase(char ch) { 5294 return isUpperCase((int)ch); 5295 } 5296 5297 /** 5298 * Determines if the specified character (Unicode code point) is an uppercase character. 5299 * <p> 5300 * A character is uppercase if its general category type, provided by 5301 * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>. 5302 * <p> 5303 * The following are examples of uppercase characters: 5304 * <p><blockquote><pre> 5305 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5306 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5307 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5308 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5309 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5310 * </pre></blockquote> 5311 * <p> Many other Unicode characters are uppercase too.<p> 5312 * 5313 * @param codePoint the character (Unicode code point) to be tested. 5314 * @return <code>true</code> if the character is uppercase; 5315 * <code>false</code> otherwise. 5316 * @see Character#isLowerCase(int) 5317 * @see Character#isTitleCase(int) 5318 * @see Character#toUpperCase(int) 5319 * @see Character#getType(int) 5320 * @since 1.5 5321 */ 5322 public static boolean isUpperCase(int codePoint) { 5323 return getType(codePoint) == Character.UPPERCASE_LETTER; 5324 } 5325 5326 /** 5327 * Determines if the specified character is a titlecase character. 5328 * <p> 5329 * A character is a titlecase character if its general 5330 * category type, provided by <code>Character.getType(ch)</code>, 5331 * is <code>TITLECASE_LETTER</code>. 5332 * <p> 5333 * Some characters look like pairs of Latin letters. For example, there 5334 * is an uppercase letter that looks like "LJ" and has a corresponding 5335 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5336 * is the appropriate form to use when rendering a word in lowercase 5337 * with initial capitals, as for a book title. 5338 * <p> 5339 * These are some of the Unicode characters for which this method returns 5340 * <code>true</code>: 5341 * <ul> 5342 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 5343 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 5344 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 5345 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 5346 * </ul> 5347 * <p> Many other Unicode characters are titlecase too.<p> 5348 * 5349 * <p><b>Note:</b> This method cannot handle <a 5350 * href="#supplementary"> supplementary characters</a>. To support 5351 * all Unicode characters, including supplementary characters, use 5352 * the {@link #isTitleCase(int)} method. 5353 * 5354 * @param ch the character to be tested. 5355 * @return <code>true</code> if the character is titlecase; 5356 * <code>false</code> otherwise. 5357 * @see Character#isLowerCase(char) 5358 * @see Character#isUpperCase(char) 5359 * @see Character#toTitleCase(char) 5360 * @see Character#getType(char) 5361 * @since 1.0.2 5362 */ 5363 public static boolean isTitleCase(char ch) { 5364 return isTitleCase((int)ch); 5365 } 5366 5367 /** 5368 * Determines if the specified character (Unicode code point) is a titlecase character. 5369 * <p> 5370 * A character is a titlecase character if its general 5371 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5372 * is <code>TITLECASE_LETTER</code>. 5373 * <p> 5374 * Some characters look like pairs of Latin letters. For example, there 5375 * is an uppercase letter that looks like "LJ" and has a corresponding 5376 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5377 * is the appropriate form to use when rendering a word in lowercase 5378 * with initial capitals, as for a book title. 5379 * <p> 5380 * These are some of the Unicode characters for which this method returns 5381 * <code>true</code>: 5382 * <ul> 5383 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 5384 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 5385 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 5386 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 5387 * </ul> 5388 * <p> Many other Unicode characters are titlecase too.<p> 5389 * 5390 * @param codePoint the character (Unicode code point) to be tested. 5391 * @return <code>true</code> if the character is titlecase; 5392 * <code>false</code> otherwise. 5393 * @see Character#isLowerCase(int) 5394 * @see Character#isUpperCase(int) 5395 * @see Character#toTitleCase(int) 5396 * @see Character#getType(int) 5397 * @since 1.5 5398 */ 5399 public static boolean isTitleCase(int codePoint) { 5400 return getType(codePoint) == Character.TITLECASE_LETTER; 5401 } 5402 5403 /** 5404 * Determines if the specified character is a digit. 5405 * <p> 5406 * A character is a digit if its general category type, provided 5407 * by <code>Character.getType(ch)</code>, is 5408 * <code>DECIMAL_DIGIT_NUMBER</code>. 5409 * <p> 5410 * Some Unicode character ranges that contain digits: 5411 * <ul> 5412 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 5413 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 5414 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 5415 * Arabic-Indic digits 5416 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 5417 * Extended Arabic-Indic digits 5418 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 5419 * Devanagari digits 5420 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 5421 * Fullwidth digits 5422 * </ul> 5423 * 5424 * Many other character ranges contain digits as well. 5425 * 5426 * <p><b>Note:</b> This method cannot handle <a 5427 * href="#supplementary"> supplementary characters</a>. To support 5428 * all Unicode characters, including supplementary characters, use 5429 * the {@link #isDigit(int)} method. 5430 * 5431 * @param ch the character to be tested. 5432 * @return <code>true</code> if the character is a digit; 5433 * <code>false</code> otherwise. 5434 * @see Character#digit(char, int) 5435 * @see Character#forDigit(int, int) 5436 * @see Character#getType(char) 5437 */ 5438 public static boolean isDigit(char ch) { 5439 return isDigit((int)ch); 5440 } 5441 5442 /** 5443 * Determines if the specified character (Unicode code point) is a digit. 5444 * <p> 5445 * A character is a digit if its general category type, provided 5446 * by {@link Character#getType(int) getType(codePoint)}, is 5447 * <code>DECIMAL_DIGIT_NUMBER</code>. 5448 * <p> 5449 * Some Unicode character ranges that contain digits: 5450 * <ul> 5451 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 5452 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 5453 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 5454 * Arabic-Indic digits 5455 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 5456 * Extended Arabic-Indic digits 5457 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 5458 * Devanagari digits 5459 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 5460 * Fullwidth digits 5461 * </ul> 5462 * 5463 * Many other character ranges contain digits as well. 5464 * 5465 * @param codePoint the character (Unicode code point) to be tested. 5466 * @return <code>true</code> if the character is a digit; 5467 * <code>false</code> otherwise. 5468 * @see Character#forDigit(int, int) 5469 * @see Character#getType(int) 5470 * @since 1.5 5471 */ 5472 public static boolean isDigit(int codePoint) { 5473 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5474 } 5475 5476 /** 5477 * Determines if a character is defined in Unicode. 5478 * <p> 5479 * A character is defined if at least one of the following is true: 5480 * <ul> 5481 * <li>It has an entry in the UnicodeData file. 5482 * <li>It has a value in a range defined by the UnicodeData file. 5483 * </ul> 5484 * 5485 * <p><b>Note:</b> This method cannot handle <a 5486 * href="#supplementary"> supplementary characters</a>. To support 5487 * all Unicode characters, including supplementary characters, use 5488 * the {@link #isDefined(int)} method. 5489 * 5490 * @param ch the character to be tested 5491 * @return <code>true</code> if the character has a defined meaning 5492 * in Unicode; <code>false</code> otherwise. 5493 * @see Character#isDigit(char) 5494 * @see Character#isLetter(char) 5495 * @see Character#isLetterOrDigit(char) 5496 * @see Character#isLowerCase(char) 5497 * @see Character#isTitleCase(char) 5498 * @see Character#isUpperCase(char) 5499 * @since 1.0.2 5500 */ 5501 public static boolean isDefined(char ch) { 5502 return isDefined((int)ch); 5503 } 5504 5505 /** 5506 * Determines if a character (Unicode code point) is defined in Unicode. 5507 * <p> 5508 * A character is defined if at least one of the following is true: 5509 * <ul> 5510 * <li>It has an entry in the UnicodeData file. 5511 * <li>It has a value in a range defined by the UnicodeData file. 5512 * </ul> 5513 * 5514 * @param codePoint the character (Unicode code point) to be tested. 5515 * @return <code>true</code> if the character has a defined meaning 5516 * in Unicode; <code>false</code> otherwise. 5517 * @see Character#isDigit(int) 5518 * @see Character#isLetter(int) 5519 * @see Character#isLetterOrDigit(int) 5520 * @see Character#isLowerCase(int) 5521 * @see Character#isTitleCase(int) 5522 * @see Character#isUpperCase(int) 5523 * @since 1.5 5524 */ 5525 public static boolean isDefined(int codePoint) { 5526 return getType(codePoint) != Character.UNASSIGNED; 5527 } 5528 5529 /** 5530 * Determines if the specified character is a letter. 5531 * <p> 5532 * A character is considered to be a letter if its general 5533 * category type, provided by <code>Character.getType(ch)</code>, 5534 * is any of the following: 5535 * <ul> 5536 * <li> <code>UPPERCASE_LETTER</code> 5537 * <li> <code>LOWERCASE_LETTER</code> 5538 * <li> <code>TITLECASE_LETTER</code> 5539 * <li> <code>MODIFIER_LETTER</code> 5540 * <li> <code>OTHER_LETTER</code> 5541 * </ul> 5542 * 5543 * Not all letters have case. Many characters are 5544 * letters but are neither uppercase nor lowercase nor titlecase. 5545 * 5546 * <p><b>Note:</b> This method cannot handle <a 5547 * href="#supplementary"> supplementary characters</a>. To support 5548 * all Unicode characters, including supplementary characters, use 5549 * the {@link #isLetter(int)} method. 5550 * 5551 * @param ch the character to be tested. 5552 * @return <code>true</code> if the character is a letter; 5553 * <code>false</code> otherwise. 5554 * @see Character#isDigit(char) 5555 * @see Character#isJavaIdentifierStart(char) 5556 * @see Character#isJavaLetter(char) 5557 * @see Character#isJavaLetterOrDigit(char) 5558 * @see Character#isLetterOrDigit(char) 5559 * @see Character#isLowerCase(char) 5560 * @see Character#isTitleCase(char) 5561 * @see Character#isUnicodeIdentifierStart(char) 5562 * @see Character#isUpperCase(char) 5563 */ 5564 public static boolean isLetter(char ch) { 5565 return isLetter((int)ch); 5566 } 5567 5568 /** 5569 * Determines if the specified character (Unicode code point) is a letter. 5570 * <p> 5571 * A character is considered to be a letter if its general 5572 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5573 * is any of the following: 5574 * <ul> 5575 * <li> <code>UPPERCASE_LETTER</code> 5576 * <li> <code>LOWERCASE_LETTER</code> 5577 * <li> <code>TITLECASE_LETTER</code> 5578 * <li> <code>MODIFIER_LETTER</code> 5579 * <li> <code>OTHER_LETTER</code> 5580 * </ul> 5581 * 5582 * Not all letters have case. Many characters are 5583 * letters but are neither uppercase nor lowercase nor titlecase. 5584 * 5585 * @param codePoint the character (Unicode code point) to be tested. 5586 * @return <code>true</code> if the character is a letter; 5587 * <code>false</code> otherwise. 5588 * @see Character#isDigit(int) 5589 * @see Character#isJavaIdentifierStart(int) 5590 * @see Character#isLetterOrDigit(int) 5591 * @see Character#isLowerCase(int) 5592 * @see Character#isTitleCase(int) 5593 * @see Character#isUnicodeIdentifierStart(int) 5594 * @see Character#isUpperCase(int) 5595 * @since 1.5 5596 */ 5597 public static boolean isLetter(int codePoint) { 5598 return ((((1 << Character.UPPERCASE_LETTER) | 5599 (1 << Character.LOWERCASE_LETTER) | 5600 (1 << Character.TITLECASE_LETTER) | 5601 (1 << Character.MODIFIER_LETTER) | 5602 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5603 != 0; 5604 } 5605 5606 /** 5607 * Determines if the specified character is a letter or digit. 5608 * <p> 5609 * A character is considered to be a letter or digit if either 5610 * <code>Character.isLetter(char ch)</code> or 5611 * <code>Character.isDigit(char ch)</code> returns 5612 * <code>true</code> for the character. 5613 * 5614 * <p><b>Note:</b> This method cannot handle <a 5615 * href="#supplementary"> supplementary characters</a>. To support 5616 * all Unicode characters, including supplementary characters, use 5617 * the {@link #isLetterOrDigit(int)} method. 5618 * 5619 * @param ch the character to be tested. 5620 * @return <code>true</code> if the character is a letter or digit; 5621 * <code>false</code> otherwise. 5622 * @see Character#isDigit(char) 5623 * @see Character#isJavaIdentifierPart(char) 5624 * @see Character#isJavaLetter(char) 5625 * @see Character#isJavaLetterOrDigit(char) 5626 * @see Character#isLetter(char) 5627 * @see Character#isUnicodeIdentifierPart(char) 5628 * @since 1.0.2 5629 */ 5630 public static boolean isLetterOrDigit(char ch) { 5631 return isLetterOrDigit((int)ch); 5632 } 5633 5634 /** 5635 * Determines if the specified character (Unicode code point) is a letter or digit. 5636 * <p> 5637 * A character is considered to be a letter or digit if either 5638 * {@link #isLetter(int) isLetter(codePoint)} or 5639 * {@link #isDigit(int) isDigit(codePoint)} returns 5640 * <code>true</code> for the character. 5641 * 5642 * @param codePoint the character (Unicode code point) to be tested. 5643 * @return <code>true</code> if the character is a letter or digit; 5644 * <code>false</code> otherwise. 5645 * @see Character#isDigit(int) 5646 * @see Character#isJavaIdentifierPart(int) 5647 * @see Character#isLetter(int) 5648 * @see Character#isUnicodeIdentifierPart(int) 5649 * @since 1.5 5650 */ 5651 public static boolean isLetterOrDigit(int codePoint) { 5652 return ((((1 << Character.UPPERCASE_LETTER) | 5653 (1 << Character.LOWERCASE_LETTER) | 5654 (1 << Character.TITLECASE_LETTER) | 5655 (1 << Character.MODIFIER_LETTER) | 5656 (1 << Character.OTHER_LETTER) | 5657 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5658 != 0; 5659 } 5660 5661 /** 5662 * Determines if the specified character is permissible as the first 5663 * character in a Java identifier. 5664 * <p> 5665 * A character may start a Java identifier if and only if 5666 * one of the following is true: 5667 * <ul> 5668 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5669 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 5670 * <li> ch is a currency symbol (such as "$") 5671 * <li> ch is a connecting punctuation character (such as "_"). 5672 * </ul> 5673 * 5674 * @param ch the character to be tested. 5675 * @return <code>true</code> if the character may start a Java 5676 * identifier; <code>false</code> otherwise. 5677 * @see Character#isJavaLetterOrDigit(char) 5678 * @see Character#isJavaIdentifierStart(char) 5679 * @see Character#isJavaIdentifierPart(char) 5680 * @see Character#isLetter(char) 5681 * @see Character#isLetterOrDigit(char) 5682 * @see Character#isUnicodeIdentifierStart(char) 5683 * @since 1.02 5684 * @deprecated Replaced by isJavaIdentifierStart(char). 5685 */ 5686 @Deprecated 5687 public static boolean isJavaLetter(char ch) { 5688 return isJavaIdentifierStart(ch); 5689 } 5690 5691 /** 5692 * Determines if the specified character may be part of a Java 5693 * identifier as other than the first character. 5694 * <p> 5695 * A character may be part of a Java identifier if and only if any 5696 * of the following are true: 5697 * <ul> 5698 * <li> it is a letter 5699 * <li> it is a currency symbol (such as <code>'$'</code>) 5700 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5701 * <li> it is a digit 5702 * <li> it is a numeric letter (such as a Roman numeral character) 5703 * <li> it is a combining mark 5704 * <li> it is a non-spacing mark 5705 * <li> <code>isIdentifierIgnorable</code> returns 5706 * <code>true</code> for the character. 5707 * </ul> 5708 * 5709 * @param ch the character to be tested. 5710 * @return <code>true</code> if the character may be part of a 5711 * Java identifier; <code>false</code> otherwise. 5712 * @see Character#isJavaLetter(char) 5713 * @see Character#isJavaIdentifierStart(char) 5714 * @see Character#isJavaIdentifierPart(char) 5715 * @see Character#isLetter(char) 5716 * @see Character#isLetterOrDigit(char) 5717 * @see Character#isUnicodeIdentifierPart(char) 5718 * @see Character#isIdentifierIgnorable(char) 5719 * @since 1.02 5720 * @deprecated Replaced by isJavaIdentifierPart(char). 5721 */ 5722 @Deprecated 5723 public static boolean isJavaLetterOrDigit(char ch) { 5724 return isJavaIdentifierPart(ch); 5725 } 5726 5727 /** 5728 * Determines if the specified character is 5729 * permissible as the first character in a Java identifier. 5730 * <p> 5731 * A character may start a Java identifier if and only if 5732 * one of the following conditions is true: 5733 * <ul> 5734 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5735 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 5736 * <li> ch is a currency symbol (such as "$") 5737 * <li> ch is a connecting punctuation character (such as "_"). 5738 * </ul> 5739 * 5740 * <p><b>Note:</b> This method cannot handle <a 5741 * href="#supplementary"> supplementary characters</a>. To support 5742 * all Unicode characters, including supplementary characters, use 5743 * the {@link #isJavaIdentifierStart(int)} method. 5744 * 5745 * @param ch the character to be tested. 5746 * @return <code>true</code> if the character may start a Java identifier; 5747 * <code>false</code> otherwise. 5748 * @see Character#isJavaIdentifierPart(char) 5749 * @see Character#isLetter(char) 5750 * @see Character#isUnicodeIdentifierStart(char) 5751 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5752 * @since 1.1 5753 */ 5754 public static boolean isJavaIdentifierStart(char ch) { 5755 return isJavaIdentifierStart((int)ch); 5756 } 5757 5758 /** 5759 * Determines if the character (Unicode code point) is 5760 * permissible as the first character in a Java identifier. 5761 * <p> 5762 * A character may start a Java identifier if and only if 5763 * one of the following conditions is true: 5764 * <ul> 5765 * <li> {@link #isLetter(int) isLetter(codePoint)} 5766 * returns <code>true</code> 5767 * <li> {@link #getType(int) getType(codePoint)} 5768 * returns <code>LETTER_NUMBER</code> 5769 * <li> the referenced character is a currency symbol (such as "$") 5770 * <li> the referenced character is a connecting punctuation character 5771 * (such as "_"). 5772 * </ul> 5773 * 5774 * @param codePoint the character (Unicode code point) to be tested. 5775 * @return <code>true</code> if the character may start a Java identifier; 5776 * <code>false</code> otherwise. 5777 * @see Character#isJavaIdentifierPart(int) 5778 * @see Character#isLetter(int) 5779 * @see Character#isUnicodeIdentifierStart(int) 5780 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5781 * @since 1.5 5782 */ 5783 public static boolean isJavaIdentifierStart(int codePoint) { 5784 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 5785 } 5786 5787 /** 5788 * Determines if the specified character may be part of a Java 5789 * identifier as other than the first character. 5790 * <p> 5791 * A character may be part of a Java identifier if any of the following 5792 * are true: 5793 * <ul> 5794 * <li> it is a letter 5795 * <li> it is a currency symbol (such as <code>'$'</code>) 5796 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5797 * <li> it is a digit 5798 * <li> it is a numeric letter (such as a Roman numeral character) 5799 * <li> it is a combining mark 5800 * <li> it is a non-spacing mark 5801 * <li> <code>isIdentifierIgnorable</code> returns 5802 * <code>true</code> for the character 5803 * </ul> 5804 * 5805 * <p><b>Note:</b> This method cannot handle <a 5806 * href="#supplementary"> supplementary characters</a>. To support 5807 * all Unicode characters, including supplementary characters, use 5808 * the {@link #isJavaIdentifierPart(int)} method. 5809 * 5810 * @param ch the character to be tested. 5811 * @return <code>true</code> if the character may be part of a 5812 * Java identifier; <code>false</code> otherwise. 5813 * @see Character#isIdentifierIgnorable(char) 5814 * @see Character#isJavaIdentifierStart(char) 5815 * @see Character#isLetterOrDigit(char) 5816 * @see Character#isUnicodeIdentifierPart(char) 5817 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5818 * @since 1.1 5819 */ 5820 public static boolean isJavaIdentifierPart(char ch) { 5821 return isJavaIdentifierPart((int)ch); 5822 } 5823 5824 /** 5825 * Determines if the character (Unicode code point) may be part of a Java 5826 * identifier as other than the first character. 5827 * <p> 5828 * A character may be part of a Java identifier if any of the following 5829 * are true: 5830 * <ul> 5831 * <li> it is a letter 5832 * <li> it is a currency symbol (such as <code>'$'</code>) 5833 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5834 * <li> it is a digit 5835 * <li> it is a numeric letter (such as a Roman numeral character) 5836 * <li> it is a combining mark 5837 * <li> it is a non-spacing mark 5838 * <li> {@link #isIdentifierIgnorable(int) 5839 * isIdentifierIgnorable(codePoint)} returns <code>true</code> for 5840 * the character 5841 * </ul> 5842 * 5843 * @param codePoint the character (Unicode code point) to be tested. 5844 * @return <code>true</code> if the character may be part of a 5845 * Java identifier; <code>false</code> otherwise. 5846 * @see Character#isIdentifierIgnorable(int) 5847 * @see Character#isJavaIdentifierStart(int) 5848 * @see Character#isLetterOrDigit(int) 5849 * @see Character#isUnicodeIdentifierPart(int) 5850 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5851 * @since 1.5 5852 */ 5853 public static boolean isJavaIdentifierPart(int codePoint) { 5854 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 5855 } 5856 5857 /** 5858 * Determines if the specified character is permissible as the 5859 * first character in a Unicode identifier. 5860 * <p> 5861 * A character may start a Unicode identifier if and only if 5862 * one of the following conditions is true: 5863 * <ul> 5864 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5865 * <li> {@link #getType(char) getType(ch)} returns 5866 * <code>LETTER_NUMBER</code>. 5867 * </ul> 5868 * 5869 * <p><b>Note:</b> This method cannot handle <a 5870 * href="#supplementary"> supplementary characters</a>. To support 5871 * all Unicode characters, including supplementary characters, use 5872 * the {@link #isUnicodeIdentifierStart(int)} method. 5873 * 5874 * @param ch the character to be tested. 5875 * @return <code>true</code> if the character may start a Unicode 5876 * identifier; <code>false</code> otherwise. 5877 * @see Character#isJavaIdentifierStart(char) 5878 * @see Character#isLetter(char) 5879 * @see Character#isUnicodeIdentifierPart(char) 5880 * @since 1.1 5881 */ 5882 public static boolean isUnicodeIdentifierStart(char ch) { 5883 return isUnicodeIdentifierStart((int)ch); 5884 } 5885 5886 /** 5887 * Determines if the specified character (Unicode code point) is permissible as the 5888 * first character in a Unicode identifier. 5889 * <p> 5890 * A character may start a Unicode identifier if and only if 5891 * one of the following conditions is true: 5892 * <ul> 5893 * <li> {@link #isLetter(int) isLetter(codePoint)} 5894 * returns <code>true</code> 5895 * <li> {@link #getType(int) getType(codePoint)} 5896 * returns <code>LETTER_NUMBER</code>. 5897 * </ul> 5898 * @param codePoint the character (Unicode code point) to be tested. 5899 * @return <code>true</code> if the character may start a Unicode 5900 * identifier; <code>false</code> otherwise. 5901 * @see Character#isJavaIdentifierStart(int) 5902 * @see Character#isLetter(int) 5903 * @see Character#isUnicodeIdentifierPart(int) 5904 * @since 1.5 5905 */ 5906 public static boolean isUnicodeIdentifierStart(int codePoint) { 5907 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 5908 } 5909 5910 /** 5911 * Determines if the specified character may be part of a Unicode 5912 * identifier as other than the first character. 5913 * <p> 5914 * A character may be part of a Unicode identifier if and only if 5915 * one of the following statements is true: 5916 * <ul> 5917 * <li> it is a letter 5918 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5919 * <li> it is a digit 5920 * <li> it is a numeric letter (such as a Roman numeral character) 5921 * <li> it is a combining mark 5922 * <li> it is a non-spacing mark 5923 * <li> <code>isIdentifierIgnorable</code> returns 5924 * <code>true</code> for this character. 5925 * </ul> 5926 * 5927 * <p><b>Note:</b> This method cannot handle <a 5928 * href="#supplementary"> supplementary characters</a>. To support 5929 * all Unicode characters, including supplementary characters, use 5930 * the {@link #isUnicodeIdentifierPart(int)} method. 5931 * 5932 * @param ch the character to be tested. 5933 * @return <code>true</code> if the character may be part of a 5934 * Unicode identifier; <code>false</code> otherwise. 5935 * @see Character#isIdentifierIgnorable(char) 5936 * @see Character#isJavaIdentifierPart(char) 5937 * @see Character#isLetterOrDigit(char) 5938 * @see Character#isUnicodeIdentifierStart(char) 5939 * @since 1.1 5940 */ 5941 public static boolean isUnicodeIdentifierPart(char ch) { 5942 return isUnicodeIdentifierPart((int)ch); 5943 } 5944 5945 /** 5946 * Determines if the specified character (Unicode code point) may be part of a Unicode 5947 * identifier as other than the first character. 5948 * <p> 5949 * A character may be part of a Unicode identifier if and only if 5950 * one of the following statements is true: 5951 * <ul> 5952 * <li> it is a letter 5953 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5954 * <li> it is a digit 5955 * <li> it is a numeric letter (such as a Roman numeral character) 5956 * <li> it is a combining mark 5957 * <li> it is a non-spacing mark 5958 * <li> <code>isIdentifierIgnorable</code> returns 5959 * <code>true</code> for this character. 5960 * </ul> 5961 * @param codePoint the character (Unicode code point) to be tested. 5962 * @return <code>true</code> if the character may be part of a 5963 * Unicode identifier; <code>false</code> otherwise. 5964 * @see Character#isIdentifierIgnorable(int) 5965 * @see Character#isJavaIdentifierPart(int) 5966 * @see Character#isLetterOrDigit(int) 5967 * @see Character#isUnicodeIdentifierStart(int) 5968 * @since 1.5 5969 */ 5970 public static boolean isUnicodeIdentifierPart(int codePoint) { 5971 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 5972 } 5973 5974 /** 5975 * Determines if the specified character should be regarded as 5976 * an ignorable character in a Java identifier or a Unicode identifier. 5977 * <p> 5978 * The following Unicode characters are ignorable in a Java identifier 5979 * or a Unicode identifier: 5980 * <ul> 5981 * <li>ISO control characters that are not whitespace 5982 * <ul> 5983 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 5984 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 5985 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 5986 * </ul> 5987 * 5988 * <li>all characters that have the <code>FORMAT</code> general 5989 * category value 5990 * </ul> 5991 * 5992 * <p><b>Note:</b> This method cannot handle <a 5993 * href="#supplementary"> supplementary characters</a>. To support 5994 * all Unicode characters, including supplementary characters, use 5995 * the {@link #isIdentifierIgnorable(int)} method. 5996 * 5997 * @param ch the character to be tested. 5998 * @return <code>true</code> if the character is an ignorable control 5999 * character that may be part of a Java or Unicode identifier; 6000 * <code>false</code> otherwise. 6001 * @see Character#isJavaIdentifierPart(char) 6002 * @see Character#isUnicodeIdentifierPart(char) 6003 * @since 1.1 6004 */ 6005 public static boolean isIdentifierIgnorable(char ch) { 6006 return isIdentifierIgnorable((int)ch); 6007 } 6008 6009 /** 6010 * Determines if the specified character (Unicode code point) should be regarded as 6011 * an ignorable character in a Java identifier or a Unicode identifier. 6012 * <p> 6013 * The following Unicode characters are ignorable in a Java identifier 6014 * or a Unicode identifier: 6015 * <ul> 6016 * <li>ISO control characters that are not whitespace 6017 * <ul> 6018 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 6019 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 6020 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 6021 * </ul> 6022 * 6023 * <li>all characters that have the <code>FORMAT</code> general 6024 * category value 6025 * </ul> 6026 * 6027 * @param codePoint the character (Unicode code point) to be tested. 6028 * @return <code>true</code> if the character is an ignorable control 6029 * character that may be part of a Java or Unicode identifier; 6030 * <code>false</code> otherwise. 6031 * @see Character#isJavaIdentifierPart(int) 6032 * @see Character#isUnicodeIdentifierPart(int) 6033 * @since 1.5 6034 */ 6035 public static boolean isIdentifierIgnorable(int codePoint) { 6036 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6037 } 6038 6039 /** 6040 * Converts the character argument to lowercase using case 6041 * mapping information from the UnicodeData file. 6042 * <p> 6043 * Note that 6044 * <code>Character.isLowerCase(Character.toLowerCase(ch))</code> 6045 * does not always return <code>true</code> for some ranges of 6046 * characters, particularly those that are symbols or ideographs. 6047 * 6048 * <p>In general, {@link String#toLowerCase()} should be used to map 6049 * characters to lowercase. <code>String</code> case mapping methods 6050 * have several benefits over <code>Character</code> case mapping methods. 6051 * <code>String</code> case mapping methods can perform locale-sensitive 6052 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6053 * the <code>Character</code> case mapping methods cannot. 6054 * 6055 * <p><b>Note:</b> This method cannot handle <a 6056 * href="#supplementary"> supplementary characters</a>. To support 6057 * all Unicode characters, including supplementary characters, use 6058 * the {@link #toLowerCase(int)} method. 6059 * 6060 * @param ch the character to be converted. 6061 * @return the lowercase equivalent of the character, if any; 6062 * otherwise, the character itself. 6063 * @see Character#isLowerCase(char) 6064 * @see String#toLowerCase() 6065 */ 6066 public static char toLowerCase(char ch) { 6067 return (char)toLowerCase((int)ch); 6068 } 6069 6070 /** 6071 * Converts the character (Unicode code point) argument to 6072 * lowercase using case mapping information from the UnicodeData 6073 * file. 6074 * 6075 * <p> Note that 6076 * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code> 6077 * does not always return <code>true</code> for some ranges of 6078 * characters, particularly those that are symbols or ideographs. 6079 * 6080 * <p>In general, {@link String#toLowerCase()} should be used to map 6081 * characters to lowercase. <code>String</code> case mapping methods 6082 * have several benefits over <code>Character</code> case mapping methods. 6083 * <code>String</code> case mapping methods can perform locale-sensitive 6084 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6085 * the <code>Character</code> case mapping methods cannot. 6086 * 6087 * @param codePoint the character (Unicode code point) to be converted. 6088 * @return the lowercase equivalent of the character (Unicode code 6089 * point), if any; otherwise, the character itself. 6090 * @see Character#isLowerCase(int) 6091 * @see String#toLowerCase() 6092 * 6093 * @since 1.5 6094 */ 6095 public static int toLowerCase(int codePoint) { 6096 return CharacterData.of(codePoint).toLowerCase(codePoint); 6097 } 6098 6099 /** 6100 * Converts the character argument to uppercase using case mapping 6101 * information from the UnicodeData file. 6102 * <p> 6103 * Note that 6104 * <code>Character.isUpperCase(Character.toUpperCase(ch))</code> 6105 * does not always return <code>true</code> for some ranges of 6106 * characters, particularly those that are symbols or ideographs. 6107 * 6108 * <p>In general, {@link String#toUpperCase()} should be used to map 6109 * characters to uppercase. <code>String</code> case mapping methods 6110 * have several benefits over <code>Character</code> case mapping methods. 6111 * <code>String</code> case mapping methods can perform locale-sensitive 6112 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6113 * the <code>Character</code> case mapping methods cannot. 6114 * 6115 * <p><b>Note:</b> This method cannot handle <a 6116 * href="#supplementary"> supplementary characters</a>. To support 6117 * all Unicode characters, including supplementary characters, use 6118 * the {@link #toUpperCase(int)} method. 6119 * 6120 * @param ch the character to be converted. 6121 * @return the uppercase equivalent of the character, if any; 6122 * otherwise, the character itself. 6123 * @see Character#isUpperCase(char) 6124 * @see String#toUpperCase() 6125 */ 6126 public static char toUpperCase(char ch) { 6127 return (char)toUpperCase((int)ch); 6128 } 6129 6130 /** 6131 * Converts the character (Unicode code point) argument to 6132 * uppercase using case mapping information from the UnicodeData 6133 * file. 6134 * 6135 * <p>Note that 6136 * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code> 6137 * does not always return <code>true</code> for some ranges of 6138 * characters, particularly those that are symbols or ideographs. 6139 * 6140 * <p>In general, {@link String#toUpperCase()} should be used to map 6141 * characters to uppercase. <code>String</code> case mapping methods 6142 * have several benefits over <code>Character</code> case mapping methods. 6143 * <code>String</code> case mapping methods can perform locale-sensitive 6144 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6145 * the <code>Character</code> case mapping methods cannot. 6146 * 6147 * @param codePoint the character (Unicode code point) to be converted. 6148 * @return the uppercase equivalent of the character, if any; 6149 * otherwise, the character itself. 6150 * @see Character#isUpperCase(int) 6151 * @see String#toUpperCase() 6152 * 6153 * @since 1.5 6154 */ 6155 public static int toUpperCase(int codePoint) { 6156 return CharacterData.of(codePoint).toUpperCase(codePoint); 6157 } 6158 6159 /** 6160 * Converts the character argument to titlecase using case mapping 6161 * information from the UnicodeData file. If a character has no 6162 * explicit titlecase mapping and is not itself a titlecase char 6163 * according to UnicodeData, then the uppercase mapping is 6164 * returned as an equivalent titlecase mapping. If the 6165 * <code>char</code> argument is already a titlecase 6166 * <code>char</code>, the same <code>char</code> value will be 6167 * returned. 6168 * <p> 6169 * Note that 6170 * <code>Character.isTitleCase(Character.toTitleCase(ch))</code> 6171 * does not always return <code>true</code> for some ranges of 6172 * characters. 6173 * 6174 * <p><b>Note:</b> This method cannot handle <a 6175 * href="#supplementary"> supplementary characters</a>. To support 6176 * all Unicode characters, including supplementary characters, use 6177 * the {@link #toTitleCase(int)} method. 6178 * 6179 * @param ch the character to be converted. 6180 * @return the titlecase equivalent of the character, if any; 6181 * otherwise, the character itself. 6182 * @see Character#isTitleCase(char) 6183 * @see Character#toLowerCase(char) 6184 * @see Character#toUpperCase(char) 6185 * @since 1.0.2 6186 */ 6187 public static char toTitleCase(char ch) { 6188 return (char)toTitleCase((int)ch); 6189 } 6190 6191 /** 6192 * Converts the character (Unicode code point) argument to titlecase using case mapping 6193 * information from the UnicodeData file. If a character has no 6194 * explicit titlecase mapping and is not itself a titlecase char 6195 * according to UnicodeData, then the uppercase mapping is 6196 * returned as an equivalent titlecase mapping. If the 6197 * character argument is already a titlecase 6198 * character, the same character value will be 6199 * returned. 6200 * 6201 * <p>Note that 6202 * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code> 6203 * does not always return <code>true</code> for some ranges of 6204 * characters. 6205 * 6206 * @param codePoint the character (Unicode code point) to be converted. 6207 * @return the titlecase equivalent of the character, if any; 6208 * otherwise, the character itself. 6209 * @see Character#isTitleCase(int) 6210 * @see Character#toLowerCase(int) 6211 * @see Character#toUpperCase(int) 6212 * @since 1.5 6213 */ 6214 public static int toTitleCase(int codePoint) { 6215 return CharacterData.of(codePoint).toTitleCase(codePoint); 6216 } 6217 6218 /** 6219 * Returns the numeric value of the character <code>ch</code> in the 6220 * specified radix. 6221 * <p> 6222 * If the radix is not in the range <code>MIN_RADIX</code> <= 6223 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 6224 * value of <code>ch</code> is not a valid digit in the specified 6225 * radix, <code>-1</code> is returned. A character is a valid digit 6226 * if at least one of the following is true: 6227 * <ul> 6228 * <li>The method <code>isDigit</code> is <code>true</code> of the character 6229 * and the Unicode decimal digit value of the character (or its 6230 * single-character decomposition) is less than the specified radix. 6231 * In this case the decimal digit value is returned. 6232 * <li>The character is one of the uppercase Latin letters 6233 * <code>'A'</code> through <code>'Z'</code> and its code is less than 6234 * <code>radix + 'A' - 10</code>. 6235 * In this case, <code>ch - 'A' + 10</code> 6236 * is returned. 6237 * <li>The character is one of the lowercase Latin letters 6238 * <code>'a'</code> through <code>'z'</code> and its code is less than 6239 * <code>radix + 'a' - 10</code>. 6240 * In this case, <code>ch - 'a' + 10</code> 6241 * is returned. 6242 * </ul> 6243 * 6244 * <p><b>Note:</b> This method cannot handle <a 6245 * href="#supplementary"> supplementary characters</a>. To support 6246 * all Unicode characters, including supplementary characters, use 6247 * the {@link #digit(int, int)} method. 6248 * 6249 * @param ch the character to be converted. 6250 * @param radix the radix. 6251 * @return the numeric value represented by the character in the 6252 * specified radix. 6253 * @see Character#forDigit(int, int) 6254 * @see Character#isDigit(char) 6255 */ 6256 public static int digit(char ch, int radix) { 6257 return digit((int)ch, radix); 6258 } 6259 6260 /** 6261 * Returns the numeric value of the specified character (Unicode 6262 * code point) in the specified radix. 6263 * 6264 * <p>If the radix is not in the range <code>MIN_RADIX</code> <= 6265 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 6266 * character is not a valid digit in the specified 6267 * radix, <code>-1</code> is returned. A character is a valid digit 6268 * if at least one of the following is true: 6269 * <ul> 6270 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character 6271 * and the Unicode decimal digit value of the character (or its 6272 * single-character decomposition) is less than the specified radix. 6273 * In this case the decimal digit value is returned. 6274 * <li>The character is one of the uppercase Latin letters 6275 * <code>'A'</code> through <code>'Z'</code> and its code is less than 6276 * <code>radix + 'A' - 10</code>. 6277 * In this case, <code>ch - 'A' + 10</code> 6278 * is returned. 6279 * <li>The character is one of the lowercase Latin letters 6280 * <code>'a'</code> through <code>'z'</code> and its code is less than 6281 * <code>radix + 'a' - 10</code>. 6282 * In this case, <code>ch - 'a' + 10</code> 6283 * is returned. 6284 * </ul> 6285 * 6286 * @param codePoint the character (Unicode code point) to be converted. 6287 * @param radix the radix. 6288 * @return the numeric value represented by the character in the 6289 * specified radix. 6290 * @see Character#forDigit(int, int) 6291 * @see Character#isDigit(int) 6292 * @since 1.5 6293 */ 6294 public static int digit(int codePoint, int radix) { 6295 return CharacterData.of(codePoint).digit(codePoint, radix); 6296 } 6297 6298 /** 6299 * Returns the <code>int</code> value that the specified Unicode 6300 * character represents. For example, the character 6301 * <code>'\u216C'</code> (the roman numeral fifty) will return 6302 * an int with a value of 50. 6303 * <p> 6304 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 6305 * <code>'\u005A'</code>), lowercase 6306 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 6307 * full width variant (<code>'\uFF21'</code> through 6308 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 6309 * <code>'\uFF5A'</code>) forms have numeric values from 10 6310 * through 35. This is independent of the Unicode specification, 6311 * which does not assign numeric values to these <code>char</code> 6312 * values. 6313 * <p> 6314 * If the character does not have a numeric value, then -1 is returned. 6315 * If the character has a numeric value that cannot be represented as a 6316 * nonnegative integer (for example, a fractional value), then -2 6317 * is returned. 6318 * 6319 * <p><b>Note:</b> This method cannot handle <a 6320 * href="#supplementary"> supplementary characters</a>. To support 6321 * all Unicode characters, including supplementary characters, use 6322 * the {@link #getNumericValue(int)} method. 6323 * 6324 * @param ch the character to be converted. 6325 * @return the numeric value of the character, as a nonnegative <code>int</code> 6326 * value; -2 if the character has a numeric value that is not a 6327 * nonnegative integer; -1 if the character has no numeric value. 6328 * @see Character#forDigit(int, int) 6329 * @see Character#isDigit(char) 6330 * @since 1.1 6331 */ 6332 public static int getNumericValue(char ch) { 6333 return getNumericValue((int)ch); 6334 } 6335 6336 /** 6337 * Returns the <code>int</code> value that the specified 6338 * character (Unicode code point) represents. For example, the character 6339 * <code>'\u216C'</code> (the Roman numeral fifty) will return 6340 * an <code>int</code> with a value of 50. 6341 * <p> 6342 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 6343 * <code>'\u005A'</code>), lowercase 6344 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 6345 * full width variant (<code>'\uFF21'</code> through 6346 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 6347 * <code>'\uFF5A'</code>) forms have numeric values from 10 6348 * through 35. This is independent of the Unicode specification, 6349 * which does not assign numeric values to these <code>char</code> 6350 * values. 6351 * <p> 6352 * If the character does not have a numeric value, then -1 is returned. 6353 * If the character has a numeric value that cannot be represented as a 6354 * nonnegative integer (for example, a fractional value), then -2 6355 * is returned. 6356 * 6357 * @param codePoint the character (Unicode code point) to be converted. 6358 * @return the numeric value of the character, as a nonnegative <code>int</code> 6359 * value; -2 if the character has a numeric value that is not a 6360 * nonnegative integer; -1 if the character has no numeric value. 6361 * @see Character#forDigit(int, int) 6362 * @see Character#isDigit(int) 6363 * @since 1.5 6364 */ 6365 public static int getNumericValue(int codePoint) { 6366 return CharacterData.of(codePoint).getNumericValue(codePoint); 6367 } 6368 6369 /** 6370 * Determines if the specified character is ISO-LATIN-1 white space. 6371 * This method returns <code>true</code> for the following five 6372 * characters only: 6373 * <table> 6374 * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td> 6375 * <td><code>HORIZONTAL TABULATION</code></td></tr> 6376 * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td> 6377 * <td><code>NEW LINE</code></td></tr> 6378 * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td> 6379 * <td><code>FORM FEED</code></td></tr> 6380 * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td> 6381 * <td><code>CARRIAGE RETURN</code></td></tr> 6382 * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td> 6383 * <td><code>SPACE</code></td></tr> 6384 * </table> 6385 * 6386 * @param ch the character to be tested. 6387 * @return <code>true</code> if the character is ISO-LATIN-1 white 6388 * space; <code>false</code> otherwise. 6389 * @see Character#isSpaceChar(char) 6390 * @see Character#isWhitespace(char) 6391 * @deprecated Replaced by isWhitespace(char). 6392 */ 6393 @Deprecated 6394 public static boolean isSpace(char ch) { 6395 return (ch <= 0x0020) && 6396 (((((1L << 0x0009) | 6397 (1L << 0x000A) | 6398 (1L << 0x000C) | 6399 (1L << 0x000D) | 6400 (1L << 0x0020)) >> ch) & 1L) != 0); 6401 } 6402 6403 6404 /** 6405 * Determines if the specified character is a Unicode space character. 6406 * A character is considered to be a space character if and only if 6407 * it is specified to be a space character by the Unicode standard. This 6408 * method returns true if the character's general category type is any of 6409 * the following: 6410 * <ul> 6411 * <li> <code>SPACE_SEPARATOR</code> 6412 * <li> <code>LINE_SEPARATOR</code> 6413 * <li> <code>PARAGRAPH_SEPARATOR</code> 6414 * </ul> 6415 * 6416 * <p><b>Note:</b> This method cannot handle <a 6417 * href="#supplementary"> supplementary characters</a>. To support 6418 * all Unicode characters, including supplementary characters, use 6419 * the {@link #isSpaceChar(int)} method. 6420 * 6421 * @param ch the character to be tested. 6422 * @return <code>true</code> if the character is a space character; 6423 * <code>false</code> otherwise. 6424 * @see Character#isWhitespace(char) 6425 * @since 1.1 6426 */ 6427 public static boolean isSpaceChar(char ch) { 6428 return isSpaceChar((int)ch); 6429 } 6430 6431 /** 6432 * Determines if the specified character (Unicode code point) is a 6433 * Unicode space character. A character is considered to be a 6434 * space character if and only if it is specified to be a space 6435 * character by the Unicode standard. This method returns true if 6436 * the character's general category type is any of the following: 6437 * 6438 * <ul> 6439 * <li> {@link #SPACE_SEPARATOR} 6440 * <li> {@link #LINE_SEPARATOR} 6441 * <li> {@link #PARAGRAPH_SEPARATOR} 6442 * </ul> 6443 * 6444 * @param codePoint the character (Unicode code point) to be tested. 6445 * @return <code>true</code> if the character is a space character; 6446 * <code>false</code> otherwise. 6447 * @see Character#isWhitespace(int) 6448 * @since 1.5 6449 */ 6450 public static boolean isSpaceChar(int codePoint) { 6451 return ((((1 << Character.SPACE_SEPARATOR) | 6452 (1 << Character.LINE_SEPARATOR) | 6453 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6454 != 0; 6455 } 6456 6457 /** 6458 * Determines if the specified character is white space according to Java. 6459 * A character is a Java whitespace character if and only if it satisfies 6460 * one of the following criteria: 6461 * <ul> 6462 * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>, 6463 * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>) 6464 * but is not also a non-breaking space (<code>'\u00A0'</code>, 6465 * <code>'\u2007'</code>, <code>'\u202F'</code>). 6466 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION. 6467 * <li> It is <code>'\u000A'</code>, LINE FEED. 6468 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION. 6469 * <li> It is <code>'\u000C'</code>, FORM FEED. 6470 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN. 6471 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR. 6472 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR. 6473 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR. 6474 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR. 6475 * </ul> 6476 * 6477 * <p><b>Note:</b> This method cannot handle <a 6478 * href="#supplementary"> supplementary characters</a>. To support 6479 * all Unicode characters, including supplementary characters, use 6480 * the {@link #isWhitespace(int)} method. 6481 * 6482 * @param ch the character to be tested. 6483 * @return <code>true</code> if the character is a Java whitespace 6484 * character; <code>false</code> otherwise. 6485 * @see Character#isSpaceChar(char) 6486 * @since 1.1 6487 */ 6488 public static boolean isWhitespace(char ch) { 6489 return isWhitespace((int)ch); 6490 } 6491 6492 /** 6493 * Determines if the specified character (Unicode code point) is 6494 * white space according to Java. A character is a Java 6495 * whitespace character if and only if it satisfies one of the 6496 * following criteria: 6497 * <ul> 6498 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6499 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6500 * but is not also a non-breaking space (<code>'\u00A0'</code>, 6501 * <code>'\u2007'</code>, <code>'\u202F'</code>). 6502 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION. 6503 * <li> It is <code>'\u000A'</code>, LINE FEED. 6504 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION. 6505 * <li> It is <code>'\u000C'</code>, FORM FEED. 6506 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN. 6507 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR. 6508 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR. 6509 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR. 6510 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR. 6511 * </ul> 6512 * <p> 6513 * 6514 * @param codePoint the character (Unicode code point) to be tested. 6515 * @return <code>true</code> if the character is a Java whitespace 6516 * character; <code>false</code> otherwise. 6517 * @see Character#isSpaceChar(int) 6518 * @since 1.5 6519 */ 6520 public static boolean isWhitespace(int codePoint) { 6521 return CharacterData.of(codePoint).isWhitespace(codePoint); 6522 } 6523 6524 /** 6525 * Determines if the specified character is an ISO control 6526 * character. A character is considered to be an ISO control 6527 * character if its code is in the range <code>'\u0000'</code> 6528 * through <code>'\u001F'</code> or in the range 6529 * <code>'\u007F'</code> through <code>'\u009F'</code>. 6530 * 6531 * <p><b>Note:</b> This method cannot handle <a 6532 * href="#supplementary"> supplementary characters</a>. To support 6533 * all Unicode characters, including supplementary characters, use 6534 * the {@link #isISOControl(int)} method. 6535 * 6536 * @param ch the character to be tested. 6537 * @return <code>true</code> if the character is an ISO control character; 6538 * <code>false</code> otherwise. 6539 * 6540 * @see Character#isSpaceChar(char) 6541 * @see Character#isWhitespace(char) 6542 * @since 1.1 6543 */ 6544 public static boolean isISOControl(char ch) { 6545 return isISOControl((int)ch); 6546 } 6547 6548 /** 6549 * Determines if the referenced character (Unicode code point) is an ISO control 6550 * character. A character is considered to be an ISO control 6551 * character if its code is in the range <code>'\u0000'</code> 6552 * through <code>'\u001F'</code> or in the range 6553 * <code>'\u007F'</code> through <code>'\u009F'</code>. 6554 * 6555 * @param codePoint the character (Unicode code point) to be tested. 6556 * @return <code>true</code> if the character is an ISO control character; 6557 * <code>false</code> otherwise. 6558 * @see Character#isSpaceChar(int) 6559 * @see Character#isWhitespace(int) 6560 * @since 1.5 6561 */ 6562 public static boolean isISOControl(int codePoint) { 6563 // Optimized form of: 6564 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6565 // (codePoint >= 0x7F && codePoint <= 0x9F); 6566 return codePoint <= 0x9F && 6567 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6568 } 6569 6570 /** 6571 * Returns a value indicating a character's general category. 6572 * 6573 * <p><b>Note:</b> This method cannot handle <a 6574 * href="#supplementary"> supplementary characters</a>. To support 6575 * all Unicode characters, including supplementary characters, use 6576 * the {@link #getType(int)} method. 6577 * 6578 * @param ch the character to be tested. 6579 * @return a value of type <code>int</code> representing the 6580 * character's general category. 6581 * @see Character#COMBINING_SPACING_MARK 6582 * @see Character#CONNECTOR_PUNCTUATION 6583 * @see Character#CONTROL 6584 * @see Character#CURRENCY_SYMBOL 6585 * @see Character#DASH_PUNCTUATION 6586 * @see Character#DECIMAL_DIGIT_NUMBER 6587 * @see Character#ENCLOSING_MARK 6588 * @see Character#END_PUNCTUATION 6589 * @see Character#FINAL_QUOTE_PUNCTUATION 6590 * @see Character#FORMAT 6591 * @see Character#INITIAL_QUOTE_PUNCTUATION 6592 * @see Character#LETTER_NUMBER 6593 * @see Character#LINE_SEPARATOR 6594 * @see Character#LOWERCASE_LETTER 6595 * @see Character#MATH_SYMBOL 6596 * @see Character#MODIFIER_LETTER 6597 * @see Character#MODIFIER_SYMBOL 6598 * @see Character#NON_SPACING_MARK 6599 * @see Character#OTHER_LETTER 6600 * @see Character#OTHER_NUMBER 6601 * @see Character#OTHER_PUNCTUATION 6602 * @see Character#OTHER_SYMBOL 6603 * @see Character#PARAGRAPH_SEPARATOR 6604 * @see Character#PRIVATE_USE 6605 * @see Character#SPACE_SEPARATOR 6606 * @see Character#START_PUNCTUATION 6607 * @see Character#SURROGATE 6608 * @see Character#TITLECASE_LETTER 6609 * @see Character#UNASSIGNED 6610 * @see Character#UPPERCASE_LETTER 6611 * @since 1.1 6612 */ 6613 public static int getType(char ch) { 6614 return getType((int)ch); 6615 } 6616 6617 /** 6618 * Returns a value indicating a character's general category. 6619 * 6620 * @param codePoint the character (Unicode code point) to be tested. 6621 * @return a value of type <code>int</code> representing the 6622 * character's general category. 6623 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6624 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6625 * @see Character#CONTROL CONTROL 6626 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6627 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6628 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6629 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6630 * @see Character#END_PUNCTUATION END_PUNCTUATION 6631 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6632 * @see Character#FORMAT FORMAT 6633 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6634 * @see Character#LETTER_NUMBER LETTER_NUMBER 6635 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6636 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6637 * @see Character#MATH_SYMBOL MATH_SYMBOL 6638 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6639 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6640 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6641 * @see Character#OTHER_LETTER OTHER_LETTER 6642 * @see Character#OTHER_NUMBER OTHER_NUMBER 6643 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6644 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6645 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6646 * @see Character#PRIVATE_USE PRIVATE_USE 6647 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6648 * @see Character#START_PUNCTUATION START_PUNCTUATION 6649 * @see Character#SURROGATE SURROGATE 6650 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6651 * @see Character#UNASSIGNED UNASSIGNED 6652 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6653 * @since 1.5 6654 */ 6655 public static int getType(int codePoint) { 6656 return CharacterData.of(codePoint).getType(codePoint); 6657 } 6658 6659 /** 6660 * Determines the character representation for a specific digit in 6661 * the specified radix. If the value of <code>radix</code> is not a 6662 * valid radix, or the value of <code>digit</code> is not a valid 6663 * digit in the specified radix, the null character 6664 * (<code>'\u0000'</code>) is returned. 6665 * <p> 6666 * The <code>radix</code> argument is valid if it is greater than or 6667 * equal to <code>MIN_RADIX</code> and less than or equal to 6668 * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if 6669 * <code>0 <=digit < radix</code>. 6670 * <p> 6671 * If the digit is less than 10, then 6672 * <code>'0' + digit</code> is returned. Otherwise, the value 6673 * <code>'a' + digit - 10</code> is returned. 6674 * 6675 * @param digit the number to convert to a character. 6676 * @param radix the radix. 6677 * @return the <code>char</code> representation of the specified digit 6678 * in the specified radix. 6679 * @see Character#MIN_RADIX 6680 * @see Character#MAX_RADIX 6681 * @see Character#digit(char, int) 6682 */ 6683 public static char forDigit(int digit, int radix) { 6684 if ((digit >= radix) || (digit < 0)) { 6685 return '\0'; 6686 } 6687 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6688 return '\0'; 6689 } 6690 if (digit < 10) { 6691 return (char)('0' + digit); 6692 } 6693 return (char)('a' - 10 + digit); 6694 } 6695 6696 /** 6697 * Returns the Unicode directionality property for the given 6698 * character. Character directionality is used to calculate the 6699 * visual ordering of text. The directionality value of undefined 6700 * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>. 6701 * 6702 * <p><b>Note:</b> This method cannot handle <a 6703 * href="#supplementary"> supplementary characters</a>. To support 6704 * all Unicode characters, including supplementary characters, use 6705 * the {@link #getDirectionality(int)} method. 6706 * 6707 * @param ch <code>char</code> for which the directionality property 6708 * is requested. 6709 * @return the directionality property of the <code>char</code> value. 6710 * 6711 * @see Character#DIRECTIONALITY_UNDEFINED 6712 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6713 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6714 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6715 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6716 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6717 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6718 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6719 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6720 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6721 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6722 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6723 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6724 * @see Character#DIRECTIONALITY_WHITESPACE 6725 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6726 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6727 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6728 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6729 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6730 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6731 * @since 1.4 6732 */ 6733 public static byte getDirectionality(char ch) { 6734 return getDirectionality((int)ch); 6735 } 6736 6737 /** 6738 * Returns the Unicode directionality property for the given 6739 * character (Unicode code point). Character directionality is 6740 * used to calculate the visual ordering of text. The 6741 * directionality value of undefined character is {@link 6742 * #DIRECTIONALITY_UNDEFINED}. 6743 * 6744 * @param codePoint the character (Unicode code point) for which 6745 * the directionality property is requested. 6746 * @return the directionality property of the character. 6747 * 6748 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 6749 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 6750 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 6751 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6752 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 6753 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6754 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6755 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 6756 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6757 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 6758 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 6759 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 6760 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 6761 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 6762 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 6763 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6764 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6765 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6766 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6767 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6768 * @since 1.5 6769 */ 6770 public static byte getDirectionality(int codePoint) { 6771 return CharacterData.of(codePoint).getDirectionality(codePoint); 6772 } 6773 6774 /** 6775 * Determines whether the character is mirrored according to the 6776 * Unicode specification. Mirrored characters should have their 6777 * glyphs horizontally mirrored when displayed in text that is 6778 * right-to-left. For example, <code>'\u0028'</code> LEFT 6779 * PARENTHESIS is semantically defined to be an <i>opening 6780 * parenthesis</i>. This will appear as a "(" in text that is 6781 * left-to-right but as a ")" in text that is right-to-left. 6782 * 6783 * <p><b>Note:</b> This method cannot handle <a 6784 * href="#supplementary"> supplementary characters</a>. To support 6785 * all Unicode characters, including supplementary characters, use 6786 * the {@link #isMirrored(int)} method. 6787 * 6788 * @param ch <code>char</code> for which the mirrored property is requested 6789 * @return <code>true</code> if the char is mirrored, <code>false</code> 6790 * if the <code>char</code> is not mirrored or is not defined. 6791 * @since 1.4 6792 */ 6793 public static boolean isMirrored(char ch) { 6794 return isMirrored((int)ch); 6795 } 6796 6797 /** 6798 * Determines whether the specified character (Unicode code point) 6799 * is mirrored according to the Unicode specification. Mirrored 6800 * characters should have their glyphs horizontally mirrored when 6801 * displayed in text that is right-to-left. For example, 6802 * <code>'\u0028'</code> LEFT PARENTHESIS is semantically 6803 * defined to be an <i>opening parenthesis</i>. This will appear 6804 * as a "(" in text that is left-to-right but as a ")" in text 6805 * that is right-to-left. 6806 * 6807 * @param codePoint the character (Unicode code point) to be tested. 6808 * @return <code>true</code> if the character is mirrored, <code>false</code> 6809 * if the character is not mirrored or is not defined. 6810 * @since 1.5 6811 */ 6812 public static boolean isMirrored(int codePoint) { 6813 return CharacterData.of(codePoint).isMirrored(codePoint); 6814 } 6815 6816 /** 6817 * Compares two <code>Character</code> objects numerically. 6818 * 6819 * @param anotherCharacter the <code>Character</code> to be compared. 6820 6821 * @return the value <code>0</code> if the argument <code>Character</code> 6822 * is equal to this <code>Character</code>; a value less than 6823 * <code>0</code> if this <code>Character</code> is numerically less 6824 * than the <code>Character</code> argument; and a value greater than 6825 * <code>0</code> if this <code>Character</code> is numerically greater 6826 * than the <code>Character</code> argument (unsigned comparison). 6827 * Note that this is strictly a numerical comparison; it is not 6828 * locale-dependent. 6829 * @since 1.2 6830 */ 6831 public int compareTo(Character anotherCharacter) { 6832 return compare(this.value, anotherCharacter.value); 6833 } 6834 6835 /** 6836 * Compares two {@code char} values numerically. 6837 * The value returned is identical to what would be returned by: 6838 * <pre> 6839 * Character.valueOf(x).compareTo(Character.valueOf(y)) 6840 * </pre> 6841 * 6842 * @param x the first {@code char} to compare 6843 * @param y the second {@code char} to compare 6844 * @return the value {@code 0} if {@code x == y}; 6845 * a value less than {@code 0} if {@code x < y}; and 6846 * a value greater than {@code 0} if {@code x > y} 6847 * @since 1.7 6848 */ 6849 public static int compare(char x, char y) { 6850 return x - y; 6851 } 6852 6853 /** 6854 * Converts the character (Unicode code point) argument to uppercase using 6855 * information from the UnicodeData file. 6856 * <p> 6857 * 6858 * @param codePoint the character (Unicode code point) to be converted. 6859 * @return either the uppercase equivalent of the character, if 6860 * any, or an error flag (<code>Character.ERROR</code>) 6861 * that indicates that a 1:M <code>char</code> mapping exists. 6862 * @see Character#isLowerCase(char) 6863 * @see Character#isUpperCase(char) 6864 * @see Character#toLowerCase(char) 6865 * @see Character#toTitleCase(char) 6866 * @since 1.4 6867 */ 6868 static int toUpperCaseEx(int codePoint) { 6869 assert isValidCodePoint(codePoint); 6870 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 6871 } 6872 6873 /** 6874 * Converts the character (Unicode code point) argument to uppercase using case 6875 * mapping information from the SpecialCasing file in the Unicode 6876 * specification. If a character has no explicit uppercase 6877 * mapping, then the <code>char</code> itself is returned in the 6878 * <code>char[]</code>. 6879 * 6880 * @param codePoint the character (Unicode code point) to be converted. 6881 * @return a <code>char[]</code> with the uppercased character. 6882 * @since 1.4 6883 */ 6884 static char[] toUpperCaseCharArray(int codePoint) { 6885 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP. 6886 assert isBmpCodePoint(codePoint); 6887 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 6888 } 6889 6890 /** 6891 * The number of bits used to represent a <tt>char</tt> value in unsigned 6892 * binary form, constant {@code 16}. 6893 * 6894 * @since 1.5 6895 */ 6896 public static final int SIZE = 16; 6897 6898 /** 6899 * Returns the value obtained by reversing the order of the bytes in the 6900 * specified <tt>char</tt> value. 6901 * 6902 * @return the value obtained by reversing (or, equivalently, swapping) 6903 * the bytes in the specified <tt>char</tt> value. 6904 * @since 1.5 6905 */ 6906 public static char reverseBytes(char ch) { 6907 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 6908 } 6909 6910 /** 6911 * Returns the Unicode name of the specified character 6912 * <code>codePoint</code>, or null if the code point is 6913 * {@link #UNASSIGNED unassigned}. 6914 * <p> 6915 * Note: if the specified character is not assigned a name by 6916 * the <i>UnicodeData</i> file (part of the Unicode Character 6917 * Database maintained by the Unicode Consortium), the returned 6918 * name is the same as the result of expression 6919 * 6920 * <blockquote><code> 6921 * Character.UnicodeBlock.of(codePoint) 6922 * .toString() 6923 * .replace('_', ' ') 6924 * + " " 6925 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6926 * 6927 * </code></blockquote> 6928 * 6929 * @param codePoint the character (Unicode code point) 6930 * 6931 * @return the Unicode name of the specified character, or null if 6932 * the code point is unassigned. 6933 * 6934 * @exception IllegalArgumentException if the specified 6935 * <code>codePoint</code> is not a valid Unicode 6936 * code point. 6937 * 6938 * @since 1.7 6939 */ 6940 public static String getName(int codePoint) { 6941 if (!isValidCodePoint(codePoint)) { 6942 throw new IllegalArgumentException(); 6943 } 6944 String name = CharacterName.get(codePoint); 6945 if (name != null) 6946 return name; 6947 if (getType(codePoint) == UNASSIGNED) 6948 return null; 6949 UnicodeBlock block = UnicodeBlock.of(codePoint); 6950 if (block != null) 6951 return block.toString().replace('_', ' ') + " " 6952 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6953 // should never come here 6954 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6955 } 6956 }