1 /* 2 * Copyright 2002-2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 package java.lang; 27 import java.util.Map; 28 import java.util.HashMap; 29 import java.util.Locale; 30 31 /** 32 * The <code>Character</code> class wraps a value of the primitive 33 * type <code>char</code> in an object. An object of type 34 * <code>Character</code> contains a single field whose type is 35 * <code>char</code>. 36 * <p> 37 * In addition, this class provides several methods for determining 38 * a character's category (lowercase letter, digit, etc.) and for converting 39 * characters from uppercase to lowercase and vice versa. 40 * <p> 41 * Character information is based on the Unicode Standard, version 5.1.0. 42 * <p> 43 * The methods and data of class <code>Character</code> are defined by 44 * the information in the <i>UnicodeData</i> file that is part of the 45 * Unicode Character Database maintained by the Unicode 46 * Consortium. This file specifies various properties including name 47 * and general category for every defined Unicode code point or 48 * character range. 49 * <p> 50 * The file and its description are available from the Unicode Consortium at: 51 * <ul> 52 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 53 * </ul> 54 * 55 * <h4><a name="unicode">Unicode Character Representations</a></h4> 56 * 57 * <p>The <code>char</code> data type (and therefore the value that a 58 * <code>Character</code> object encapsulates) are based on the 59 * original Unicode specification, which defined characters as 60 * fixed-width 16-bit entities. The Unicode standard has since been 61 * changed to allow for characters whose representation requires more 62 * than 16 bits. The range of legal <em>code point</em>s is now 63 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 64 * (Refer to the <a 65 * href="http://www.unicode.org/reports/tr27/#notation"><i> 66 * definition</i></a> of the U+<i>n</i> notation in the Unicode 67 * standard.) 68 * 69 * <p>The set of characters from U+0000 to U+FFFF is sometimes 70 * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a 71 * name="supplementary">Characters</a> whose code points are greater 72 * than U+FFFF are called <em>supplementary character</em>s. The Java 73 * 2 platform uses the UTF-16 representation in <code>char</code> 74 * arrays and in the <code>String</code> and <code>StringBuffer</code> 75 * classes. In this representation, supplementary characters are 76 * represented as a pair of <code>char</code> values, the first from 77 * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the 78 * second from the <em>low-surrogates</em> range 79 * (\uDC00-\uDFFF). 80 * 81 * <p>A <code>char</code> value, therefore, represents Basic 82 * Multilingual Plane (BMP) code points, including the surrogate 83 * code points, or code units of the UTF-16 encoding. An 84 * <code>int</code> value represents all Unicode code points, 85 * including supplementary code points. The lower (least significant) 86 * 21 bits of <code>int</code> are used to represent Unicode code 87 * points and the upper (most significant) 11 bits must be zero. 88 * Unless otherwise specified, the behavior with respect to 89 * supplementary characters and surrogate <code>char</code> values is 90 * as follows: 91 * 92 * <ul> 93 * <li>The methods that only accept a <code>char</code> value cannot support 94 * supplementary characters. They treat <code>char</code> values from the 95 * surrogate ranges as undefined characters. For example, 96 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though 97 * this specific value if followed by any low-surrogate value in a string 98 * would represent a letter. 99 * 100 * <li>The methods that accept an <code>int</code> value support all 101 * Unicode characters, including supplementary characters. For 102 * example, <code>Character.isLetter(0x2F81A)</code> returns 103 * <code>true</code> because the code point value represents a letter 104 * (a CJK ideograph). 105 * </ul> 106 * 107 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 108 * used for character values in the range between U+0000 and U+10FFFF, 109 * and <em>Unicode code unit</em> is used for 16-bit 110 * <code>char</code> values that are code units of the <em>UTF-16</em> 111 * encoding. For more information on Unicode terminology, refer to the 112 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 113 * 114 * @author Lee Boynton 115 * @author Guy Steele 116 * @author Akira Tanaka 117 * @since 1.0 118 */ 119 public final 120 class Character extends Object implements java.io.Serializable, Comparable<Character> { 121 /** 122 * The minimum radix available for conversion to and from strings. 123 * The constant value of this field is the smallest value permitted 124 * for the radix argument in radix-conversion methods such as the 125 * <code>digit</code> method, the <code>forDigit</code> 126 * method, and the <code>toString</code> method of class 127 * <code>Integer</code>. 128 * 129 * @see java.lang.Character#digit(char, int) 130 * @see java.lang.Character#forDigit(int, int) 131 * @see java.lang.Integer#toString(int, int) 132 * @see java.lang.Integer#valueOf(java.lang.String) 133 */ 134 public static final int MIN_RADIX = 2; 135 136 /** 137 * The maximum radix available for conversion to and from strings. 138 * The constant value of this field is the largest value permitted 139 * for the radix argument in radix-conversion methods such as the 140 * <code>digit</code> method, the <code>forDigit</code> 141 * method, and the <code>toString</code> method of class 142 * <code>Integer</code>. 143 * 144 * @see java.lang.Character#digit(char, int) 145 * @see java.lang.Character#forDigit(int, int) 146 * @see java.lang.Integer#toString(int, int) 147 * @see java.lang.Integer#valueOf(java.lang.String) 148 */ 149 public static final int MAX_RADIX = 36; 150 151 /** 152 * The constant value of this field is the smallest value of type 153 * <code>char</code>, <code>'\u0000'</code>. 154 * 155 * @since 1.0.2 156 */ 157 public static final char MIN_VALUE = '\u0000'; 158 159 /** 160 * The constant value of this field is the largest value of type 161 * <code>char</code>, <code>'\uFFFF'</code>. 162 * 163 * @since 1.0.2 164 */ 165 public static final char MAX_VALUE = '\uFFFF'; 166 167 /** 168 * The <code>Class</code> instance representing the primitive type 169 * <code>char</code>. 170 * 171 * @since 1.1 172 */ 173 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 174 175 /* 176 * Normative general types 177 */ 178 179 /* 180 * General character types 181 */ 182 183 /** 184 * General category "Cn" in the Unicode specification. 185 * @since 1.1 186 */ 187 public static final byte 188 UNASSIGNED = 0; 189 190 /** 191 * General category "Lu" in the Unicode specification. 192 * @since 1.1 193 */ 194 public static final byte 195 UPPERCASE_LETTER = 1; 196 197 /** 198 * General category "Ll" in the Unicode specification. 199 * @since 1.1 200 */ 201 public static final byte 202 LOWERCASE_LETTER = 2; 203 204 /** 205 * General category "Lt" in the Unicode specification. 206 * @since 1.1 207 */ 208 public static final byte 209 TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte 216 MODIFIER_LETTER = 4; 217 218 /** 219 * General category "Lo" in the Unicode specification. 220 * @since 1.1 221 */ 222 public static final byte 223 OTHER_LETTER = 5; 224 225 /** 226 * General category "Mn" in the Unicode specification. 227 * @since 1.1 228 */ 229 public static final byte 230 NON_SPACING_MARK = 6; 231 232 /** 233 * General category "Me" in the Unicode specification. 234 * @since 1.1 235 */ 236 public static final byte 237 ENCLOSING_MARK = 7; 238 239 /** 240 * General category "Mc" in the Unicode specification. 241 * @since 1.1 242 */ 243 public static final byte 244 COMBINING_SPACING_MARK = 8; 245 246 /** 247 * General category "Nd" in the Unicode specification. 248 * @since 1.1 249 */ 250 public static final byte 251 DECIMAL_DIGIT_NUMBER = 9; 252 253 /** 254 * General category "Nl" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte 258 LETTER_NUMBER = 10; 259 260 /** 261 * General category "No" in the Unicode specification. 262 * @since 1.1 263 */ 264 public static final byte 265 OTHER_NUMBER = 11; 266 267 /** 268 * General category "Zs" in the Unicode specification. 269 * @since 1.1 270 */ 271 public static final byte 272 SPACE_SEPARATOR = 12; 273 274 /** 275 * General category "Zl" in the Unicode specification. 276 * @since 1.1 277 */ 278 public static final byte 279 LINE_SEPARATOR = 13; 280 281 /** 282 * General category "Zp" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte 286 PARAGRAPH_SEPARATOR = 14; 287 288 /** 289 * General category "Cc" in the Unicode specification. 290 * @since 1.1 291 */ 292 public static final byte 293 CONTROL = 15; 294 295 /** 296 * General category "Cf" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte 300 FORMAT = 16; 301 302 /** 303 * General category "Co" in the Unicode specification. 304 * @since 1.1 305 */ 306 public static final byte 307 PRIVATE_USE = 18; 308 309 /** 310 * General category "Cs" in the Unicode specification. 311 * @since 1.1 312 */ 313 public static final byte 314 SURROGATE = 19; 315 316 /** 317 * General category "Pd" in the Unicode specification. 318 * @since 1.1 319 */ 320 public static final byte 321 DASH_PUNCTUATION = 20; 322 323 /** 324 * General category "Ps" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte 328 START_PUNCTUATION = 21; 329 330 /** 331 * General category "Pe" in the Unicode specification. 332 * @since 1.1 333 */ 334 public static final byte 335 END_PUNCTUATION = 22; 336 337 /** 338 * General category "Pc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte 342 CONNECTOR_PUNCTUATION = 23; 343 344 /** 345 * General category "Po" in the Unicode specification. 346 * @since 1.1 347 */ 348 public static final byte 349 OTHER_PUNCTUATION = 24; 350 351 /** 352 * General category "Sm" in the Unicode specification. 353 * @since 1.1 354 */ 355 public static final byte 356 MATH_SYMBOL = 25; 357 358 /** 359 * General category "Sc" in the Unicode specification. 360 * @since 1.1 361 */ 362 public static final byte 363 CURRENCY_SYMBOL = 26; 364 365 /** 366 * General category "Sk" in the Unicode specification. 367 * @since 1.1 368 */ 369 public static final byte 370 MODIFIER_SYMBOL = 27; 371 372 /** 373 * General category "So" in the Unicode specification. 374 * @since 1.1 375 */ 376 public static final byte 377 OTHER_SYMBOL = 28; 378 379 /** 380 * General category "Pi" in the Unicode specification. 381 * @since 1.4 382 */ 383 public static final byte 384 INITIAL_QUOTE_PUNCTUATION = 29; 385 386 /** 387 * General category "Pf" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte 391 FINAL_QUOTE_PUNCTUATION = 30; 392 393 /** 394 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 395 */ 396 static final int ERROR = 0xFFFFFFFF; 397 398 399 /** 400 * Undefined bidirectional character type. Undefined <code>char</code> 401 * values have undefined directionality in the Unicode specification. 402 * @since 1.4 403 */ 404 public static final byte DIRECTIONALITY_UNDEFINED = -1; 405 406 /** 407 * Strong bidirectional character type "L" in the Unicode specification. 408 * @since 1.4 409 */ 410 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 411 412 /** 413 * Strong bidirectional character type "R" in the Unicode specification. 414 * @since 1.4 415 */ 416 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 417 418 /** 419 * Strong bidirectional character type "AL" in the Unicode specification. 420 * @since 1.4 421 */ 422 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 423 424 /** 425 * Weak bidirectional character type "EN" in the Unicode specification. 426 * @since 1.4 427 */ 428 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 429 430 /** 431 * Weak bidirectional character type "ES" in the Unicode specification. 432 * @since 1.4 433 */ 434 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 435 436 /** 437 * Weak bidirectional character type "ET" in the Unicode specification. 438 * @since 1.4 439 */ 440 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 441 442 /** 443 * Weak bidirectional character type "AN" in the Unicode specification. 444 * @since 1.4 445 */ 446 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 447 448 /** 449 * Weak bidirectional character type "CS" in the Unicode specification. 450 * @since 1.4 451 */ 452 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 453 454 /** 455 * Weak bidirectional character type "NSM" in the Unicode specification. 456 * @since 1.4 457 */ 458 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 459 460 /** 461 * Weak bidirectional character type "BN" in the Unicode specification. 462 * @since 1.4 463 */ 464 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 465 466 /** 467 * Neutral bidirectional character type "B" in the Unicode specification. 468 * @since 1.4 469 */ 470 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 471 472 /** 473 * Neutral bidirectional character type "S" in the Unicode specification. 474 * @since 1.4 475 */ 476 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 477 478 /** 479 * Neutral bidirectional character type "WS" in the Unicode specification. 480 * @since 1.4 481 */ 482 public static final byte DIRECTIONALITY_WHITESPACE = 12; 483 484 /** 485 * Neutral bidirectional character type "ON" in the Unicode specification. 486 * @since 1.4 487 */ 488 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 489 490 /** 491 * Strong bidirectional character type "LRE" in the Unicode specification. 492 * @since 1.4 493 */ 494 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 495 496 /** 497 * Strong bidirectional character type "LRO" in the Unicode specification. 498 * @since 1.4 499 */ 500 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 501 502 /** 503 * Strong bidirectional character type "RLE" in the Unicode specification. 504 * @since 1.4 505 */ 506 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 507 508 /** 509 * Strong bidirectional character type "RLO" in the Unicode specification. 510 * @since 1.4 511 */ 512 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 513 514 /** 515 * Weak bidirectional character type "PDF" in the Unicode specification. 516 * @since 1.4 517 */ 518 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 519 520 /** 521 * The minimum value of a 522 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 523 * Unicode high-surrogate code unit</a> 524 * in the UTF-16 encoding, constant <code>'\uD800'</code>. 525 * A high-surrogate is also known as a <i>leading-surrogate</i>. 526 * 527 * @since 1.5 528 */ 529 public static final char MIN_HIGH_SURROGATE = '\uD800'; 530 531 /** 532 * The maximum value of a 533 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 534 * Unicode high-surrogate code unit</a> 535 * in the UTF-16 encoding, constant <code>'\uDBFF'</code>. 536 * A high-surrogate is also known as a <i>leading-surrogate</i>. 537 * 538 * @since 1.5 539 */ 540 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 541 542 /** 543 * The minimum value of a 544 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 545 * Unicode low-surrogate code unit</a> 546 * in the UTF-16 encoding, constant <code>'\uDC00'</code>. 547 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 548 * 549 * @since 1.5 550 */ 551 public static final char MIN_LOW_SURROGATE = '\uDC00'; 552 553 /** 554 * The maximum value of a 555 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 556 * Unicode low-surrogate code unit</a> 557 * in the UTF-16 encoding, constant <code>'\uDFFF'</code>. 558 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 559 * 560 * @since 1.5 561 */ 562 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 563 564 /** 565 * The minimum value of a Unicode surrogate code unit in the 566 * UTF-16 encoding, constant <code>'\uD800'</code>. 567 * 568 * @since 1.5 569 */ 570 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 571 572 /** 573 * The maximum value of a Unicode surrogate code unit in the 574 * UTF-16 encoding, constant <code>'\uDFFF'</code>. 575 * 576 * @since 1.5 577 */ 578 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 579 580 /** 581 * The minimum value of a 582 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 583 * Unicode supplementary code point</a>, constant {@code U+10000}. 584 * 585 * @since 1.5 586 */ 587 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 588 589 /** 590 * The minimum value of a 591 * <a href="http://www.unicode.org/glossary/#code_point"> 592 * Unicode code point</a>, constant {@code U+0000}. 593 * 594 * @since 1.5 595 */ 596 public static final int MIN_CODE_POINT = 0x000000; 597 598 /** 599 * The maximum value of a 600 * <a href="http://www.unicode.org/glossary/#code_point"> 601 * Unicode code point</a>, constant {@code U+10FFFF}. 602 * 603 * @since 1.5 604 */ 605 public static final int MAX_CODE_POINT = 0X10FFFF; 606 607 608 /** 609 * Instances of this class represent particular subsets of the Unicode 610 * character set. The only family of subsets defined in the 611 * <code>Character</code> class is <code>{@link Character.UnicodeBlock 612 * UnicodeBlock}</code>. Other portions of the Java API may define other 613 * subsets for their own purposes. 614 * 615 * @since 1.2 616 */ 617 public static class Subset { 618 619 private String name; 620 621 /** 622 * Constructs a new <code>Subset</code> instance. 623 * 624 * @exception NullPointerException if name is <code>null</code> 625 * @param name The name of this subset 626 */ 627 protected Subset(String name) { 628 if (name == null) { 629 throw new NullPointerException("name"); 630 } 631 this.name = name; 632 } 633 634 /** 635 * Compares two <code>Subset</code> objects for equality. 636 * This method returns <code>true</code> if and only if 637 * <code>this</code> and the argument refer to the same 638 * object; since this method is <code>final</code>, this 639 * guarantee holds for all subclasses. 640 */ 641 public final boolean equals(Object obj) { 642 return (this == obj); 643 } 644 645 /** 646 * Returns the standard hash code as defined by the 647 * <code>{@link Object#hashCode}</code> method. This method 648 * is <code>final</code> in order to ensure that the 649 * <code>equals</code> and <code>hashCode</code> methods will 650 * be consistent in all subclasses. 651 */ 652 public final int hashCode() { 653 return super.hashCode(); 654 } 655 656 /** 657 * Returns the name of this subset. 658 */ 659 public final String toString() { 660 return name; 661 } 662 } 663 664 /** 665 * A family of character subsets representing the character blocks in the 666 * Unicode specification. Character blocks generally define characters 667 * used for a specific script or purpose. A character is contained by 668 * at most one Unicode block. 669 * 670 * @since 1.2 671 */ 672 public static final class UnicodeBlock extends Subset { 673 674 private static Map map = new HashMap(); 675 676 /** 677 * Create a UnicodeBlock with the given identifier name. 678 * This name must be the same as the block identifier. 679 */ 680 private UnicodeBlock(String idName) { 681 super(idName); 682 map.put(idName.toUpperCase(Locale.US), this); 683 } 684 685 /** 686 * Create a UnicodeBlock with the given identifier name and 687 * alias name. 688 */ 689 private UnicodeBlock(String idName, String alias) { 690 this(idName); 691 map.put(alias.toUpperCase(Locale.US), this); 692 } 693 694 /** 695 * Create a UnicodeBlock with the given identifier name and 696 * alias names. 697 */ 698 private UnicodeBlock(String idName, String[] aliasName) { 699 this(idName); 700 if (aliasName != null) { 701 for(int x=0; x<aliasName.length; ++x) { 702 map.put(aliasName[x].toUpperCase(Locale.US), this); 703 } 704 } 705 } 706 707 /** 708 * Constant for the "Basic Latin" Unicode character block. 709 * @since 1.2 710 */ 711 public static final UnicodeBlock BASIC_LATIN = 712 new UnicodeBlock("BASIC_LATIN", new String[] {"Basic Latin", "BasicLatin" }); 713 714 /** 715 * Constant for the "Latin-1 Supplement" Unicode character block. 716 * @since 1.2 717 */ 718 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 719 new UnicodeBlock("LATIN_1_SUPPLEMENT", new String[]{ "Latin-1 Supplement", "Latin-1Supplement"}); 720 721 /** 722 * Constant for the "Latin Extended-A" Unicode character block. 723 * @since 1.2 724 */ 725 public static final UnicodeBlock LATIN_EXTENDED_A = 726 new UnicodeBlock("LATIN_EXTENDED_A", new String[]{ "Latin Extended-A", "LatinExtended-A"}); 727 728 /** 729 * Constant for the "Latin Extended-B" Unicode character block. 730 * @since 1.2 731 */ 732 public static final UnicodeBlock LATIN_EXTENDED_B = 733 new UnicodeBlock("LATIN_EXTENDED_B", new String[] {"Latin Extended-B", "LatinExtended-B"}); 734 735 /** 736 * Constant for the "IPA Extensions" Unicode character block. 737 * @since 1.2 738 */ 739 public static final UnicodeBlock IPA_EXTENSIONS = 740 new UnicodeBlock("IPA_EXTENSIONS", new String[] {"IPA Extensions", "IPAExtensions"}); 741 742 /** 743 * Constant for the "Spacing Modifier Letters" Unicode character block. 744 * @since 1.2 745 */ 746 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 747 new UnicodeBlock("SPACING_MODIFIER_LETTERS", new String[] { "Spacing Modifier Letters", 748 "SpacingModifierLetters"}); 749 750 /** 751 * Constant for the "Combining Diacritical Marks" Unicode character block. 752 * @since 1.2 753 */ 754 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 755 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", new String[] {"Combining Diacritical Marks", 756 "CombiningDiacriticalMarks" }); 757 758 /** 759 * Constant for the "Greek and Coptic" Unicode character block. 760 * <p> 761 * This block was previously known as the "Greek" block. 762 * 763 * @since 1.2 764 */ 765 public static final UnicodeBlock GREEK 766 = new UnicodeBlock("GREEK", new String[] {"Greek and Coptic", "GreekandCoptic"}); 767 768 /** 769 * Constant for the "Cyrillic" Unicode character block. 770 * @since 1.2 771 */ 772 public static final UnicodeBlock CYRILLIC = 773 new UnicodeBlock("CYRILLIC"); 774 775 /** 776 * Constant for the "Armenian" Unicode character block. 777 * @since 1.2 778 */ 779 public static final UnicodeBlock ARMENIAN = 780 new UnicodeBlock("ARMENIAN"); 781 782 /** 783 * Constant for the "Hebrew" Unicode character block. 784 * @since 1.2 785 */ 786 public static final UnicodeBlock HEBREW = 787 new UnicodeBlock("HEBREW"); 788 789 /** 790 * Constant for the "Arabic" Unicode character block. 791 * @since 1.2 792 */ 793 public static final UnicodeBlock ARABIC = 794 new UnicodeBlock("ARABIC"); 795 796 /** 797 * Constant for the "Devanagari" Unicode character block. 798 * @since 1.2 799 */ 800 public static final UnicodeBlock DEVANAGARI = 801 new UnicodeBlock("DEVANAGARI"); 802 803 /** 804 * Constant for the "Bengali" Unicode character block. 805 * @since 1.2 806 */ 807 public static final UnicodeBlock BENGALI = 808 new UnicodeBlock("BENGALI"); 809 810 /** 811 * Constant for the "Gurmukhi" Unicode character block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock GURMUKHI = 815 new UnicodeBlock("GURMUKHI"); 816 817 /** 818 * Constant for the "Gujarati" Unicode character block. 819 * @since 1.2 820 */ 821 public static final UnicodeBlock GUJARATI = 822 new UnicodeBlock("GUJARATI"); 823 824 /** 825 * Constant for the "Oriya" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock ORIYA = 829 new UnicodeBlock("ORIYA"); 830 831 /** 832 * Constant for the "Tamil" Unicode character block. 833 * @since 1.2 834 */ 835 public static final UnicodeBlock TAMIL = 836 new UnicodeBlock("TAMIL"); 837 838 /** 839 * Constant for the "Telugu" Unicode character block. 840 * @since 1.2 841 */ 842 public static final UnicodeBlock TELUGU = 843 new UnicodeBlock("TELUGU"); 844 845 /** 846 * Constant for the "Kannada" Unicode character block. 847 * @since 1.2 848 */ 849 public static final UnicodeBlock KANNADA = 850 new UnicodeBlock("KANNADA"); 851 852 /** 853 * Constant for the "Malayalam" Unicode character block. 854 * @since 1.2 855 */ 856 public static final UnicodeBlock MALAYALAM = 857 new UnicodeBlock("MALAYALAM"); 858 859 /** 860 * Constant for the "Thai" Unicode character block. 861 * @since 1.2 862 */ 863 public static final UnicodeBlock THAI = 864 new UnicodeBlock("THAI"); 865 866 /** 867 * Constant for the "Lao" Unicode character block. 868 * @since 1.2 869 */ 870 public static final UnicodeBlock LAO = 871 new UnicodeBlock("LAO"); 872 873 /** 874 * Constant for the "Tibetan" Unicode character block. 875 * @since 1.2 876 */ 877 public static final UnicodeBlock TIBETAN = 878 new UnicodeBlock("TIBETAN"); 879 880 /** 881 * Constant for the "Georgian" Unicode character block. 882 * @since 1.2 883 */ 884 public static final UnicodeBlock GEORGIAN = 885 new UnicodeBlock("GEORGIAN"); 886 887 /** 888 * Constant for the "Hangul Jamo" Unicode character block. 889 * @since 1.2 890 */ 891 public static final UnicodeBlock HANGUL_JAMO = 892 new UnicodeBlock("HANGUL_JAMO", new String[] {"Hangul Jamo", "HangulJamo"}); 893 894 /** 895 * Constant for the "Latin Extended Additional" Unicode character block. 896 * @since 1.2 897 */ 898 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 899 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", new String[] {"Latin Extended Additional", 900 "LatinExtendedAdditional"}); 901 902 /** 903 * Constant for the "Greek Extended" Unicode character block. 904 * @since 1.2 905 */ 906 public static final UnicodeBlock GREEK_EXTENDED = 907 new UnicodeBlock("GREEK_EXTENDED", new String[] {"Greek Extended", "GreekExtended"}); 908 909 /** 910 * Constant for the "General Punctuation" Unicode character block. 911 * @since 1.2 912 */ 913 public static final UnicodeBlock GENERAL_PUNCTUATION = 914 new UnicodeBlock("GENERAL_PUNCTUATION", new String[] {"General Punctuation", "GeneralPunctuation"}); 915 916 /** 917 * Constant for the "Superscripts and Subscripts" Unicode character block. 918 * @since 1.2 919 */ 920 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 921 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", new String[] {"Superscripts and Subscripts", 922 "SuperscriptsandSubscripts" }); 923 924 /** 925 * Constant for the "Currency Symbols" Unicode character block. 926 * @since 1.2 927 */ 928 public static final UnicodeBlock CURRENCY_SYMBOLS = 929 new UnicodeBlock("CURRENCY_SYMBOLS", new String[] { "Currency Symbols", "CurrencySymbols"}); 930 931 /** 932 * Constant for the "Combining Diacritical Marks for Symbols" Unicode character block. 933 * <p> 934 * This block was previously known as "Combining Marks for Symbols". 935 * @since 1.2 936 */ 937 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 938 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", new String[] {"Combining Diacritical Marks for Symbols", 939 "CombiningDiacriticalMarksforSymbols", 940 "Combining Marks for Symbols", 941 "CombiningMarksforSymbols" }); 942 943 /** 944 * Constant for the "Letterlike Symbols" Unicode character block. 945 * @since 1.2 946 */ 947 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 948 new UnicodeBlock("LETTERLIKE_SYMBOLS", new String[] { "Letterlike Symbols", "LetterlikeSymbols"}); 949 950 /** 951 * Constant for the "Number Forms" Unicode character block. 952 * @since 1.2 953 */ 954 public static final UnicodeBlock NUMBER_FORMS = 955 new UnicodeBlock("NUMBER_FORMS", new String[] {"Number Forms", "NumberForms"}); 956 957 /** 958 * Constant for the "Arrows" Unicode character block. 959 * @since 1.2 960 */ 961 public static final UnicodeBlock ARROWS = 962 new UnicodeBlock("ARROWS"); 963 964 /** 965 * Constant for the "Mathematical Operators" Unicode character block. 966 * @since 1.2 967 */ 968 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 969 new UnicodeBlock("MATHEMATICAL_OPERATORS", new String[] {"Mathematical Operators", 970 "MathematicalOperators"}); 971 972 /** 973 * Constant for the "Miscellaneous Technical" Unicode character block. 974 * @since 1.2 975 */ 976 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 977 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", new String[] {"Miscellaneous Technical", 978 "MiscellaneousTechnical"}); 979 980 /** 981 * Constant for the "Control Pictures" Unicode character block. 982 * @since 1.2 983 */ 984 public static final UnicodeBlock CONTROL_PICTURES = 985 new UnicodeBlock("CONTROL_PICTURES", new String[] {"Control Pictures", "ControlPictures"}); 986 987 /** 988 * Constant for the "Optical Character Recognition" Unicode character block. 989 * @since 1.2 990 */ 991 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 992 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", new String[] {"Optical Character Recognition", 993 "OpticalCharacterRecognition"}); 994 995 /** 996 * Constant for the "Enclosed Alphanumerics" Unicode character block. 997 * @since 1.2 998 */ 999 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1000 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", new String[] {"Enclosed Alphanumerics", 1001 "EnclosedAlphanumerics"}); 1002 1003 /** 1004 * Constant for the "Box Drawing" Unicode character block. 1005 * @since 1.2 1006 */ 1007 public static final UnicodeBlock BOX_DRAWING = 1008 new UnicodeBlock("BOX_DRAWING", new String[] {"Box Drawing", "BoxDrawing"}); 1009 1010 /** 1011 * Constant for the "Block Elements" Unicode character block. 1012 * @since 1.2 1013 */ 1014 public static final UnicodeBlock BLOCK_ELEMENTS = 1015 new UnicodeBlock("BLOCK_ELEMENTS", new String[] {"Block Elements", "BlockElements"}); 1016 1017 /** 1018 * Constant for the "Geometric Shapes" Unicode character block. 1019 * @since 1.2 1020 */ 1021 public static final UnicodeBlock GEOMETRIC_SHAPES = 1022 new UnicodeBlock("GEOMETRIC_SHAPES", new String[] {"Geometric Shapes", "GeometricShapes"}); 1023 1024 /** 1025 * Constant for the "Miscellaneous Symbols" Unicode character block. 1026 * @since 1.2 1027 */ 1028 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1029 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", new String[] {"Miscellaneous Symbols", 1030 "MiscellaneousSymbols"}); 1031 1032 /** 1033 * Constant for the "Dingbats" Unicode character block. 1034 * @since 1.2 1035 */ 1036 public static final UnicodeBlock DINGBATS = 1037 new UnicodeBlock("DINGBATS"); 1038 1039 /** 1040 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1041 * @since 1.2 1042 */ 1043 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1044 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", new String[] {"CJK Symbols and Punctuation", 1045 "CJKSymbolsandPunctuation"}); 1046 1047 /** 1048 * Constant for the "Hiragana" Unicode character block. 1049 * @since 1.2 1050 */ 1051 public static final UnicodeBlock HIRAGANA = 1052 new UnicodeBlock("HIRAGANA"); 1053 1054 /** 1055 * Constant for the "Katakana" Unicode character block. 1056 * @since 1.2 1057 */ 1058 public static final UnicodeBlock KATAKANA = 1059 new UnicodeBlock("KATAKANA"); 1060 1061 /** 1062 * Constant for the "Bopomofo" Unicode character block. 1063 * @since 1.2 1064 */ 1065 public static final UnicodeBlock BOPOMOFO = 1066 new UnicodeBlock("BOPOMOFO"); 1067 1068 /** 1069 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1070 * @since 1.2 1071 */ 1072 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1073 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", new String[] {"Hangul Compatibility Jamo", 1074 "HangulCompatibilityJamo"}); 1075 1076 /** 1077 * Constant for the "Kanbun" Unicode character block. 1078 * @since 1.2 1079 */ 1080 public static final UnicodeBlock KANBUN = 1081 new UnicodeBlock("KANBUN"); 1082 1083 /** 1084 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1085 * @since 1.2 1086 */ 1087 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1088 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", new String[] {"Enclosed CJK Letters and Months", 1089 "EnclosedCJKLettersandMonths"}); 1090 1091 /** 1092 * Constant for the "CJK Compatibility" Unicode character block. 1093 * @since 1.2 1094 */ 1095 public static final UnicodeBlock CJK_COMPATIBILITY = 1096 new UnicodeBlock("CJK_COMPATIBILITY", new String[] {"CJK Compatibility", "CJKCompatibility"}); 1097 1098 /** 1099 * Constant for the "CJK Unified Ideographs" Unicode character block. 1100 * @since 1.2 1101 */ 1102 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1103 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", new String[] {"CJK Unified Ideographs", 1104 "CJKUnifiedIdeographs"}); 1105 1106 /** 1107 * Constant for the "Hangul Syllables" Unicode character block. 1108 * @since 1.2 1109 */ 1110 public static final UnicodeBlock HANGUL_SYLLABLES = 1111 new UnicodeBlock("HANGUL_SYLLABLES", new String[] {"Hangul Syllables", "HangulSyllables"}); 1112 1113 /** 1114 * Constant for the "Private Use Area" Unicode character block. 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock PRIVATE_USE_AREA = 1118 new UnicodeBlock("PRIVATE_USE_AREA", new String[] {"Private Use Area", "PrivateUseArea"}); 1119 1120 /** 1121 * Constant for the "CJK Compatibility Ideographs" Unicode character block. 1122 * @since 1.2 1123 */ 1124 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1125 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1126 new String[] {"CJK Compatibility Ideographs", 1127 "CJKCompatibilityIdeographs"}); 1128 1129 /** 1130 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1131 * @since 1.2 1132 */ 1133 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1134 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", new String[] {"Alphabetic Presentation Forms", 1135 "AlphabeticPresentationForms"}); 1136 1137 /** 1138 * Constant for the "Arabic Presentation Forms-A" Unicode character block. 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1142 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", new String[] {"Arabic Presentation Forms-A", 1143 "ArabicPresentationForms-A"}); 1144 1145 /** 1146 * Constant for the "Combining Half Marks" Unicode character block. 1147 * @since 1.2 1148 */ 1149 public static final UnicodeBlock COMBINING_HALF_MARKS = 1150 new UnicodeBlock("COMBINING_HALF_MARKS", new String[] {"Combining Half Marks", 1151 "CombiningHalfMarks"}); 1152 1153 /** 1154 * Constant for the "CJK Compatibility Forms" Unicode character block. 1155 * @since 1.2 1156 */ 1157 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1158 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", new String[] {"CJK Compatibility Forms", 1159 "CJKCompatibilityForms"}); 1160 1161 /** 1162 * Constant for the "Small Form Variants" Unicode character block. 1163 * @since 1.2 1164 */ 1165 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1166 new UnicodeBlock("SMALL_FORM_VARIANTS", new String[] {"Small Form Variants", 1167 "SmallFormVariants"}); 1168 1169 /** 1170 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1171 * @since 1.2 1172 */ 1173 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1174 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", new String[] {"Arabic Presentation Forms-B", 1175 "ArabicPresentationForms-B"}); 1176 1177 /** 1178 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character block. 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1182 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1183 new String[] {"Halfwidth and Fullwidth Forms", 1184 "HalfwidthandFullwidthForms"}); 1185 1186 /** 1187 * Constant for the "Specials" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock SPECIALS = 1191 new UnicodeBlock("SPECIALS"); 1192 1193 /** 1194 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1195 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1196 * {@link #LOW_SURROGATES}. These new constants match 1197 * the block definitions of the Unicode Standard. 1198 * The {@link #of(char)} and {@link #of(int)} methods 1199 * return the new constants, not SURROGATES_AREA. 1200 */ 1201 @Deprecated 1202 public static final UnicodeBlock SURROGATES_AREA = 1203 new UnicodeBlock("SURROGATES_AREA"); 1204 1205 /** 1206 * Constant for the "Syriac" Unicode character block. 1207 * @since 1.4 1208 */ 1209 public static final UnicodeBlock SYRIAC = 1210 new UnicodeBlock("SYRIAC"); 1211 1212 /** 1213 * Constant for the "Thaana" Unicode character block. 1214 * @since 1.4 1215 */ 1216 public static final UnicodeBlock THAANA = 1217 new UnicodeBlock("THAANA"); 1218 1219 /** 1220 * Constant for the "Sinhala" Unicode character block. 1221 * @since 1.4 1222 */ 1223 public static final UnicodeBlock SINHALA = 1224 new UnicodeBlock("SINHALA"); 1225 1226 /** 1227 * Constant for the "Myanmar" Unicode character block. 1228 * @since 1.4 1229 */ 1230 public static final UnicodeBlock MYANMAR = 1231 new UnicodeBlock("MYANMAR"); 1232 1233 /** 1234 * Constant for the "Ethiopic" Unicode character block. 1235 * @since 1.4 1236 */ 1237 public static final UnicodeBlock ETHIOPIC = 1238 new UnicodeBlock("ETHIOPIC"); 1239 1240 /** 1241 * Constant for the "Cherokee" Unicode character block. 1242 * @since 1.4 1243 */ 1244 public static final UnicodeBlock CHEROKEE = 1245 new UnicodeBlock("CHEROKEE"); 1246 1247 /** 1248 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1249 * @since 1.4 1250 */ 1251 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1252 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1253 new String[] {"Unified Canadian Aboriginal Syllabics", 1254 "UnifiedCanadianAboriginalSyllabics"}); 1255 1256 /** 1257 * Constant for the "Ogham" Unicode character block. 1258 * @since 1.4 1259 */ 1260 public static final UnicodeBlock OGHAM = 1261 new UnicodeBlock("OGHAM"); 1262 1263 /** 1264 * Constant for the "Runic" Unicode character block. 1265 * @since 1.4 1266 */ 1267 public static final UnicodeBlock RUNIC = 1268 new UnicodeBlock("RUNIC"); 1269 1270 /** 1271 * Constant for the "Khmer" Unicode character block. 1272 * @since 1.4 1273 */ 1274 public static final UnicodeBlock KHMER = 1275 new UnicodeBlock("KHMER"); 1276 1277 /** 1278 * Constant for the "Mongolian" Unicode character block. 1279 * @since 1.4 1280 */ 1281 public static final UnicodeBlock MONGOLIAN = 1282 new UnicodeBlock("MONGOLIAN"); 1283 1284 /** 1285 * Constant for the "Braille Patterns" Unicode character block. 1286 * @since 1.4 1287 */ 1288 public static final UnicodeBlock BRAILLE_PATTERNS = 1289 new UnicodeBlock("BRAILLE_PATTERNS", new String[] {"Braille Patterns", 1290 "BraillePatterns"}); 1291 1292 /** 1293 * Constant for the "CJK Radicals Supplement" Unicode character block. 1294 * @since 1.4 1295 */ 1296 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1297 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", new String[] {"CJK Radicals Supplement", 1298 "CJKRadicalsSupplement"}); 1299 1300 /** 1301 * Constant for the "Kangxi Radicals" Unicode character block. 1302 * @since 1.4 1303 */ 1304 public static final UnicodeBlock KANGXI_RADICALS = 1305 new UnicodeBlock("KANGXI_RADICALS", new String[] {"Kangxi Radicals", "KangxiRadicals"}); 1306 1307 /** 1308 * Constant for the "Ideographic Description Characters" Unicode character block. 1309 * @since 1.4 1310 */ 1311 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1312 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", new String[] {"Ideographic Description Characters", 1313 "IdeographicDescriptionCharacters"}); 1314 1315 /** 1316 * Constant for the "Bopomofo Extended" Unicode character block. 1317 * @since 1.4 1318 */ 1319 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1320 new UnicodeBlock("BOPOMOFO_EXTENDED", new String[] {"Bopomofo Extended", 1321 "BopomofoExtended"}); 1322 1323 /** 1324 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1325 * @since 1.4 1326 */ 1327 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1328 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", new String[] {"CJK Unified Ideographs Extension A", 1329 "CJKUnifiedIdeographsExtensionA"}); 1330 1331 /** 1332 * Constant for the "Yi Syllables" Unicode character block. 1333 * @since 1.4 1334 */ 1335 public static final UnicodeBlock YI_SYLLABLES = 1336 new UnicodeBlock("YI_SYLLABLES", new String[] {"Yi Syllables", "YiSyllables"}); 1337 1338 /** 1339 * Constant for the "Yi Radicals" Unicode character block. 1340 * @since 1.4 1341 */ 1342 public static final UnicodeBlock YI_RADICALS = 1343 new UnicodeBlock("YI_RADICALS", new String[] {"Yi Radicals", "YiRadicals"}); 1344 1345 1346 /** 1347 * Constant for the "Cyrillic Supplementary" Unicode character block. 1348 * @since 1.5 1349 */ 1350 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1351 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1352 new String[] {"Cyrillic Supplementary", 1353 "CyrillicSupplementary", 1354 "Cyrillic Supplement", 1355 "CyrillicSupplement"}); 1356 1357 /** 1358 * Constant for the "Tagalog" Unicode character block. 1359 * @since 1.5 1360 */ 1361 public static final UnicodeBlock TAGALOG = 1362 new UnicodeBlock("TAGALOG"); 1363 1364 /** 1365 * Constant for the "Hanunoo" Unicode character block. 1366 * @since 1.5 1367 */ 1368 public static final UnicodeBlock HANUNOO = 1369 new UnicodeBlock("HANUNOO"); 1370 1371 /** 1372 * Constant for the "Buhid" Unicode character block. 1373 * @since 1.5 1374 */ 1375 public static final UnicodeBlock BUHID = 1376 new UnicodeBlock("BUHID"); 1377 1378 /** 1379 * Constant for the "Tagbanwa" Unicode character block. 1380 * @since 1.5 1381 */ 1382 public static final UnicodeBlock TAGBANWA = 1383 new UnicodeBlock("TAGBANWA"); 1384 1385 /** 1386 * Constant for the "Limbu" Unicode character block. 1387 * @since 1.5 1388 */ 1389 public static final UnicodeBlock LIMBU = 1390 new UnicodeBlock("LIMBU"); 1391 1392 /** 1393 * Constant for the "Tai Le" Unicode character block. 1394 * @since 1.5 1395 */ 1396 public static final UnicodeBlock TAI_LE = 1397 new UnicodeBlock("TAI_LE", new String[] {"Tai Le", "TaiLe"}); 1398 1399 /** 1400 * Constant for the "Khmer Symbols" Unicode character block. 1401 * @since 1.5 1402 */ 1403 public static final UnicodeBlock KHMER_SYMBOLS = 1404 new UnicodeBlock("KHMER_SYMBOLS", new String[] {"Khmer Symbols", "KhmerSymbols"}); 1405 1406 /** 1407 * Constant for the "Phonetic Extensions" Unicode character block. 1408 * @since 1.5 1409 */ 1410 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1411 new UnicodeBlock("PHONETIC_EXTENSIONS", new String[] {"Phonetic Extensions", "PhoneticExtensions"}); 1412 1413 /** 1414 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1415 * @since 1.5 1416 */ 1417 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1418 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1419 new String[]{"Miscellaneous Mathematical Symbols-A", 1420 "MiscellaneousMathematicalSymbols-A"}); 1421 1422 /** 1423 * Constant for the "Supplemental Arrows-A" Unicode character block. 1424 * @since 1.5 1425 */ 1426 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1427 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", new String[] {"Supplemental Arrows-A", 1428 "SupplementalArrows-A"}); 1429 1430 /** 1431 * Constant for the "Supplemental Arrows-B" Unicode character block. 1432 * @since 1.5 1433 */ 1434 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1435 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", new String[] {"Supplemental Arrows-B", 1436 "SupplementalArrows-B"}); 1437 1438 /** 1439 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode character block. 1440 * @since 1.5 1441 */ 1442 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1443 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1444 new String[] {"Miscellaneous Mathematical Symbols-B", 1445 "MiscellaneousMathematicalSymbols-B"}); 1446 1447 /** 1448 * Constant for the "Supplemental Mathematical Operators" Unicode character block. 1449 * @since 1.5 1450 */ 1451 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1452 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1453 new String[]{"Supplemental Mathematical Operators", 1454 "SupplementalMathematicalOperators"} ); 1455 1456 /** 1457 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character block. 1458 * @since 1.5 1459 */ 1460 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1461 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", new String[] {"Miscellaneous Symbols and Arrows", 1462 "MiscellaneousSymbolsandArrows"}); 1463 1464 /** 1465 * Constant for the "Katakana Phonetic Extensions" Unicode character block. 1466 * @since 1.5 1467 */ 1468 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1469 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", new String[] {"Katakana Phonetic Extensions", 1470 "KatakanaPhoneticExtensions"}); 1471 1472 /** 1473 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1474 * @since 1.5 1475 */ 1476 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1477 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", new String[] {"Yijing Hexagram Symbols", 1478 "YijingHexagramSymbols"}); 1479 1480 /** 1481 * Constant for the "Variation Selectors" Unicode character block. 1482 * @since 1.5 1483 */ 1484 public static final UnicodeBlock VARIATION_SELECTORS = 1485 new UnicodeBlock("VARIATION_SELECTORS", new String[] {"Variation Selectors", "VariationSelectors"}); 1486 1487 /** 1488 * Constant for the "Linear B Syllabary" Unicode character block. 1489 * @since 1.5 1490 */ 1491 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1492 new UnicodeBlock("LINEAR_B_SYLLABARY", new String[] {"Linear B Syllabary", "LinearBSyllabary"}); 1493 1494 /** 1495 * Constant for the "Linear B Ideograms" Unicode character block. 1496 * @since 1.5 1497 */ 1498 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1499 new UnicodeBlock("LINEAR_B_IDEOGRAMS", new String[] {"Linear B Ideograms", "LinearBIdeograms"}); 1500 1501 /** 1502 * Constant for the "Aegean Numbers" Unicode character block. 1503 * @since 1.5 1504 */ 1505 public static final UnicodeBlock AEGEAN_NUMBERS = 1506 new UnicodeBlock("AEGEAN_NUMBERS", new String[] {"Aegean Numbers", "AegeanNumbers"}); 1507 1508 /** 1509 * Constant for the "Old Italic" Unicode character block. 1510 * @since 1.5 1511 */ 1512 public static final UnicodeBlock OLD_ITALIC = 1513 new UnicodeBlock("OLD_ITALIC", new String[] {"Old Italic", "OldItalic"}); 1514 1515 /** 1516 * Constant for the "Gothic" Unicode character block. 1517 * @since 1.5 1518 */ 1519 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 1520 1521 /** 1522 * Constant for the "Ugaritic" Unicode character block. 1523 * @since 1.5 1524 */ 1525 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 1526 1527 /** 1528 * Constant for the "Deseret" Unicode character block. 1529 * @since 1.5 1530 */ 1531 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 1532 1533 /** 1534 * Constant for the "Shavian" Unicode character block. 1535 * @since 1.5 1536 */ 1537 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 1538 1539 /** 1540 * Constant for the "Osmanya" Unicode character block. 1541 * @since 1.5 1542 */ 1543 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 1544 1545 /** 1546 * Constant for the "Cypriot Syllabary" Unicode character block. 1547 * @since 1.5 1548 */ 1549 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1550 new UnicodeBlock("CYPRIOT_SYLLABARY", new String[] {"Cypriot Syllabary", "CypriotSyllabary"}); 1551 1552 /** 1553 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1554 * @since 1.5 1555 */ 1556 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1557 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", new String[] {"Byzantine Musical Symbols", 1558 "ByzantineMusicalSymbols"}); 1559 1560 /** 1561 * Constant for the "Musical Symbols" Unicode character block. 1562 * @since 1.5 1563 */ 1564 public static final UnicodeBlock MUSICAL_SYMBOLS = 1565 new UnicodeBlock("MUSICAL_SYMBOLS", new String[] {"Musical Symbols", "MusicalSymbols"}); 1566 1567 /** 1568 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1569 * @since 1.5 1570 */ 1571 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1572 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", new String[] {"Tai Xuan Jing Symbols", 1573 "TaiXuanJingSymbols"}); 1574 1575 /** 1576 * Constant for the "Mathematical Alphanumeric Symbols" Unicode character block. 1577 * @since 1.5 1578 */ 1579 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1580 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1581 new String[] {"Mathematical Alphanumeric Symbols", "MathematicalAlphanumericSymbols"}); 1582 1583 /** 1584 * Constant for the "CJK Unified Ideographs Extension B" Unicode character block. 1585 * @since 1.5 1586 */ 1587 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1588 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1589 new String[] {"CJK Unified Ideographs Extension B", "CJKUnifiedIdeographsExtensionB"}); 1590 1591 /** 1592 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1593 * @since 1.5 1594 */ 1595 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1596 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1597 new String[]{"CJK Compatibility Ideographs Supplement", 1598 "CJKCompatibilityIdeographsSupplement"}); 1599 1600 /** 1601 * Constant for the "Tags" Unicode character block. 1602 * @since 1.5 1603 */ 1604 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 1605 1606 /** 1607 * Constant for the "Variation Selectors Supplement" Unicode character block. 1608 * @since 1.5 1609 */ 1610 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1611 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", new String[] {"Variation Selectors Supplement", 1612 "VariationSelectorsSupplement"}); 1613 1614 /** 1615 * Constant for the "Supplementary Private Use Area-A" Unicode character block. 1616 * @since 1.5 1617 */ 1618 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1619 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1620 new String[] {"Supplementary Private Use Area-A", 1621 "SupplementaryPrivateUseArea-A"}); 1622 1623 /** 1624 * Constant for the "Supplementary Private Use Area-B" Unicode character block. 1625 * @since 1.5 1626 */ 1627 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1628 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1629 new String[] {"Supplementary Private Use Area-B", 1630 "SupplementaryPrivateUseArea-B"}); 1631 1632 /** 1633 * Constant for the "High Surrogates" Unicode character block. 1634 * This block represents codepoint values in the high surrogate 1635 * range: 0xD800 through 0xDB7F 1636 * 1637 * @since 1.5 1638 */ 1639 public static final UnicodeBlock HIGH_SURROGATES = 1640 new UnicodeBlock("HIGH_SURROGATES", new String[] {"High Surrogates", "HighSurrogates"}); 1641 1642 /** 1643 * Constant for the "High Private Use Surrogates" Unicode character block. 1644 * This block represents codepoint values in the high surrogate 1645 * range: 0xDB80 through 0xDBFF 1646 * 1647 * @since 1.5 1648 */ 1649 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1650 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", new String[] { "High Private Use Surrogates", 1651 "HighPrivateUseSurrogates"}); 1652 1653 /** 1654 * Constant for the "Low Surrogates" Unicode character block. 1655 * This block represents codepoint values in the high surrogate 1656 * range: 0xDC00 through 0xDFFF 1657 * 1658 * @since 1.5 1659 */ 1660 public static final UnicodeBlock LOW_SURROGATES = 1661 new UnicodeBlock("LOW_SURROGATES", new String[] {"Low Surrogates", "LowSurrogates"}); 1662 1663 /** 1664 * Constant for the "Arabic Supplement" Unicode character block. 1665 * @since 1.7 1666 */ 1667 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1668 new UnicodeBlock("ARABIC_SUPPLEMENT", 1669 new String[] { "Arabic Supplement", 1670 "ArabicSupplement"}); 1671 1672 /** 1673 * Constant for the "NKo" Unicode character block. 1674 * @since 1.7 1675 */ 1676 public static final UnicodeBlock NKO = new UnicodeBlock("NKO"); 1677 1678 /** 1679 * Constant for the "Ethiopic Supplement" Unicode character block. 1680 * @since 1.7 1681 */ 1682 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1683 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1684 new String[] { "Ethiopic Supplement", 1685 "EthiopicSupplement"}); 1686 1687 /** 1688 * Constant for the "New Tai Lue" Unicode character block. 1689 * @since 1.7 1690 */ 1691 public static final UnicodeBlock NEW_TAI_LUE = 1692 new UnicodeBlock("NEW_TAI_LUE", 1693 new String[] { "New Tai Lue", 1694 "NewTaiLue"}); 1695 1696 /** 1697 * Constant for the "Buginese" Unicode character block. 1698 * @since 1.7 1699 */ 1700 public static final UnicodeBlock BUGINESE = 1701 new UnicodeBlock("BUGINESE"); 1702 1703 /** 1704 * Constant for the "Balinese" Unicode character block. 1705 * @since 1.7 1706 */ 1707 public static final UnicodeBlock BALINESE = 1708 new UnicodeBlock("BALINESE"); 1709 1710 /** 1711 * Constant for the "Sundanese" Unicode character block. 1712 * @since 1.7 1713 */ 1714 public static final UnicodeBlock SUNDANESE = 1715 new UnicodeBlock("SUNDANESE"); 1716 1717 /** 1718 * Constant for the "Lepcha" Unicode character block. 1719 * @since 1.7 1720 */ 1721 public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA"); 1722 1723 /** 1724 * Constant for the "Ol Chiki" Unicode character block. 1725 * @since 1.7 1726 */ 1727 public static final UnicodeBlock OL_CHIKI = 1728 new UnicodeBlock("OL_CHIKI", 1729 new String[] { "Ol Chiki", 1730 "OlChiki"}); 1731 1732 /** 1733 * Constant for the "Phonetic Extensions Supplement" Unicode character 1734 * block. 1735 * @since 1.7 1736 */ 1737 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1738 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1739 new String[] { "Phonetic Extensions Supplement", 1740 "PhoneticExtensionsSupplement"}); 1741 1742 /** 1743 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1744 * character block. 1745 * @since 1.7 1746 */ 1747 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1748 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1749 new String[] { "Combining Diacritical Marks Supplement", 1750 "CombiningDiacriticalMarksSupplement"}); 1751 1752 /** 1753 * Constant for the "Glagolitic" Unicode character block. 1754 * @since 1.7 1755 */ 1756 public static final UnicodeBlock GLAGOLITIC = 1757 new UnicodeBlock("GLAGOLITIC"); 1758 1759 /** 1760 * Constant for the "Latin Extended-C" Unicode character block. 1761 * @since 1.7 1762 */ 1763 public static final UnicodeBlock LATIN_EXTENDED_C = 1764 new UnicodeBlock("LATIN_EXTENDED_C", 1765 new String[] { "Latin Extended-C", 1766 "LatinExtended-C"}); 1767 1768 /** 1769 * Constant for the "Coptic" Unicode character block. 1770 * @since 1.7 1771 */ 1772 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC"); 1773 1774 /** 1775 * Constant for the "Georgian Supplement" Unicode character block. 1776 * @since 1.7 1777 */ 1778 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1779 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1780 new String[] { "Georgian Supplement", 1781 "GeorgianSupplement"}); 1782 1783 /** 1784 * Constant for the "Tifinagh" Unicode character block. 1785 * @since 1.7 1786 */ 1787 public static final UnicodeBlock TIFINAGH = 1788 new UnicodeBlock("TIFINAGH"); 1789 1790 /** 1791 * Constant for the "Ethiopic Extended" Unicode character block. 1792 * @since 1.7 1793 */ 1794 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1795 new UnicodeBlock("ETHIOPIC_EXTENDED", 1796 new String[] { "Ethiopic Extended", 1797 "EthiopicExtended"}); 1798 1799 /** 1800 * Constant for the "Cyrillic Extended-A" Unicode character block. 1801 * @since 1.7 1802 */ 1803 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1804 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1805 new String[] { "Cyrillic Extended-A", 1806 "CyrillicExtended-A"}); 1807 1808 /** 1809 * Constant for the "Supplemental Punctuation" Unicode character block. 1810 * @since 1.7 1811 */ 1812 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1813 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1814 new String[] { "Supplemental Punctuation", 1815 "SupplementalPunctuation"}); 1816 1817 /** 1818 * Constant for the "CJK Strokes" Unicode character block. 1819 * @since 1.7 1820 */ 1821 public static final UnicodeBlock CJK_STROKES = 1822 new UnicodeBlock("CJK_STROKES", 1823 new String[] { "CJK Strokes", 1824 "CJKStrokes"}); 1825 1826 /** 1827 * Constant for the "Vai" Unicode character block. 1828 * @since 1.7 1829 */ 1830 public static final UnicodeBlock VAI = new UnicodeBlock("VAI"); 1831 1832 /** 1833 * Constant for the "Cyrillic Extended-B" Unicode character block. 1834 * @since 1.7 1835 */ 1836 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1837 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1838 new String[] { "Cyrillic Extended-B", 1839 "CyrillicExtended-B"}); 1840 1841 /** 1842 * Constant for the "Modifier Tone Letters" Unicode character block. 1843 * @since 1.7 1844 */ 1845 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1846 new UnicodeBlock("MODIFIER_TONE_LETTERS", 1847 new String[] { "Modifier Tone Letters", 1848 "ModifierToneLetters"}); 1849 1850 /** 1851 * Constant for the "Latin Extended-D" Unicode character block. 1852 * @since 1.7 1853 */ 1854 public static final UnicodeBlock LATIN_EXTENDED_D = 1855 new UnicodeBlock("LATIN_EXTENDED_D", 1856 new String[] { "Latin Extended-D", 1857 "LatinExtended-D"}); 1858 1859 /** 1860 * Constant for the "Syloti Nagri" Unicode character block. 1861 * @since 1.7 1862 */ 1863 public static final UnicodeBlock SYLOTI_NAGRI = 1864 new UnicodeBlock("SYLOTI_NAGRI", 1865 new String[] { "Syloti Nagri", 1866 "SylotiNagri"}); 1867 1868 /** 1869 * Constant for the "Phags-pa" Unicode character block. 1870 * @since 1.7 1871 */ 1872 public static final UnicodeBlock PHAGS_PA = 1873 new UnicodeBlock("PHAGS_PA", new String[] { "Phags-pa"}); 1874 1875 /** 1876 * Constant for the "Saurashtra" Unicode character block. 1877 * @since 1.7 1878 */ 1879 public static final UnicodeBlock SAURASHTRA = 1880 new UnicodeBlock("SAURASHTRA"); 1881 1882 /** 1883 * Constant for the "Kayah Li" Unicode character block. 1884 * @since 1.7 1885 */ 1886 public static final UnicodeBlock KAYAH_LI = 1887 new UnicodeBlock("KAYAH_LI", 1888 new String[] { "Kayah Li", 1889 "KayahLi"}); 1890 1891 /** 1892 * Constant for the "Rejang" Unicode character block. 1893 * @since 1.7 1894 */ 1895 public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG"); 1896 1897 /** 1898 * Constant for the "Cham" Unicode character block. 1899 * @since 1.7 1900 */ 1901 public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM"); 1902 1903 /** 1904 * Constant for the "Vertical Forms" Unicode character block. 1905 * @since 1.7 1906 */ 1907 public static final UnicodeBlock VERTICAL_FORMS = 1908 new UnicodeBlock("VERTICAL_FORMS", 1909 new String[] { "Vertical Forms", 1910 "VerticalForms"}); 1911 1912 /** 1913 * Constant for the "Ancient Greek Numbers" Unicode character block. 1914 * @since 1.7 1915 */ 1916 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1917 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 1918 new String[] { "Ancient Greek Numbers", 1919 "AncientGreekNumbers"}); 1920 1921 /** 1922 * Constant for the "Ancient Symbols" Unicode character block. 1923 * @since 1.7 1924 */ 1925 public static final UnicodeBlock ANCIENT_SYMBOLS = 1926 new UnicodeBlock("ANCIENT_SYMBOLS", 1927 new String[] { "Ancient Symbols", 1928 "AncientSymbols"}); 1929 1930 /** 1931 * Constant for the "Phaistos Disc" Unicode character block. 1932 * @since 1.7 1933 */ 1934 public static final UnicodeBlock PHAISTOS_DISC = 1935 new UnicodeBlock("PHAISTOS_DISC", 1936 new String[] { "Phaistos Disc", 1937 "PhaistosDisc"}); 1938 1939 /** 1940 * Constant for the "Lycian" Unicode character block. 1941 * @since 1.7 1942 */ 1943 public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN"); 1944 1945 /** 1946 * Constant for the "Carian" Unicode character block. 1947 * @since 1.7 1948 */ 1949 public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN"); 1950 1951 /** 1952 * Constant for the "Old Persian" Unicode character block. 1953 * @since 1.7 1954 */ 1955 public static final UnicodeBlock OLD_PERSIAN = 1956 new UnicodeBlock("OLD_PERSIAN", 1957 new String[] { "Old Persian", 1958 "OldPersian"}); 1959 1960 /** 1961 * Constant for the "Phoenician" Unicode character block. 1962 * @since 1.7 1963 */ 1964 public static final UnicodeBlock PHOENICIAN = 1965 new UnicodeBlock("PHOENICIAN"); 1966 1967 /** 1968 * Constant for the "Lydian" Unicode character block. 1969 * @since 1.7 1970 */ 1971 public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN"); 1972 1973 /** 1974 * Constant for the "Kharoshthi" Unicode character block. 1975 * @since 1.7 1976 */ 1977 public static final UnicodeBlock KHAROSHTHI = 1978 new UnicodeBlock("KHAROSHTHI"); 1979 1980 /** 1981 * Constant for the "Cuneiform" Unicode character block. 1982 * @since 1.7 1983 */ 1984 public static final UnicodeBlock CUNEIFORM = 1985 new UnicodeBlock("CUNEIFORM"); 1986 1987 /** 1988 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 1989 * character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1993 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1994 new String[] { "Cuneiform Numbers and Punctuation", 1995 "CuneiformNumbersandPunctuation"}); 1996 1997 /** 1998 * Constant for the "Ancient Greek Musical Notation" Unicode character 1999 * block. 2000 * @since 1.7 2001 */ 2002 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2003 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2004 new String[] { "Ancient Greek Musical Notation", 2005 "AncientGreekMusicalNotation"}); 2006 2007 /** 2008 * Constant for the "Counting Rod Numerals" Unicode character block. 2009 * @since 1.7 2010 */ 2011 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2012 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2013 new String[] { "Counting Rod Numerals", 2014 "CountingRodNumerals"}); 2015 2016 /** 2017 * Constant for the "Mahjong Tiles" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock MAHJONG_TILES = 2021 new UnicodeBlock("MAHJONG_TILES", 2022 new String[] { "Mahjong Tiles", 2023 "MahjongTiles"}); 2024 2025 /** 2026 * Constant for the "Domino Tiles" Unicode character block. 2027 * @since 1.7 2028 */ 2029 public static final UnicodeBlock DOMINO_TILES = 2030 new UnicodeBlock("DOMINO_TILES", 2031 new String[] { "Domino Tiles", 2032 "DominoTiles"}); 2033 2034 private static final int blockStarts[] = { 2035 0x0000, // 0000..007F; Basic Latin 2036 0x0080, // 0080..00FF; Latin-1 Supplement 2037 0x0100, // 0100..017F; Latin Extended-A 2038 0x0180, // 0180..024F; Latin Extended-B 2039 0x0250, // 0250..02AF; IPA Extensions 2040 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2041 0x0300, // 0300..036F; Combining Diacritical Marks 2042 0x0370, // 0370..03FF; Greek and Coptic 2043 0x0400, // 0400..04FF; Cyrillic 2044 0x0500, // 0500..052F; Cyrillic Supplement 2045 0x0530, // 0530..058F; Armenian 2046 0x0590, // 0590..05FF; Hebrew 2047 0x0600, // 0600..06FF; Arabic 2048 0x0700, // 0700..074F; Syria 2049 0x0750, // 0750..077F; Arabic Supplement 2050 0x0780, // 0780..07BF; Thaana 2051 0x07C0, // 07C0..07FF; NKo 2052 0x0800, // unassigned 2053 0x0900, // 0900..097F; Devanagari 2054 0x0980, // 0980..09FF; Bengali 2055 0x0A00, // 0A00..0A7F; Gurmukhi 2056 0x0A80, // 0A80..0AFF; Gujarati 2057 0x0B00, // 0B00..0B7F; Oriya 2058 0x0B80, // 0B80..0BFF; Tamil 2059 0x0C00, // 0C00..0C7F; Telugu 2060 0x0C80, // 0C80..0CFF; Kannada 2061 0x0D00, // 0D00..0D7F; Malayalam 2062 0x0D80, // 0D80..0DFF; Sinhala 2063 0x0E00, // 0E00..0E7F; Thai 2064 0x0E80, // 0E80..0EFF; Lao 2065 0x0F00, // 0F00..0FFF; Tibetan 2066 0x1000, // 1000..109F; Myanmar 2067 0x10A0, // 10A0..10FF; Georgian 2068 0x1100, // 1100..11FF; Hangul Jamo 2069 0x1200, // 1200..137F; Ethiopic 2070 0x1380, // 1380..139F; Ethiopic Supplement 2071 0x13A0, // 13A0..13FF; Cherokee 2072 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2073 0x1680, // 1680..169F; Ogham 2074 0x16A0, // 16A0..16FF; Runic 2075 0x1700, // 1700..171F; Tagalog 2076 0x1720, // 1720..173F; Hanunoo 2077 0x1740, // 1740..175F; Buhid 2078 0x1760, // 1760..177F; Tagbanwa 2079 0x1780, // 1780..17FF; Khmer 2080 0x1800, // 1800..18AF; Mongolian 2081 0x18B0, // unassigned 2082 0x1900, // 1900..194F; Limbu 2083 0x1950, // 1950..197F; Tai Le 2084 0x1980, // 1980..19DF; New Tai Lue 2085 0x19E0, // 19E0..19FF; Khmer Symbols 2086 0x1A00, // 1A00..1A1F; Buginese 2087 0x1A20, // unassigned 2088 0x1B00, // 1B00..1B7F; Balinese 2089 0x1B80, // 1B80..1BBF; Sundanese 2090 0x1BC0, // unassigned 2091 0x1C00, // 1C00..1C4F; Lepcha 2092 0x1C50, // 1C50..1C7F; Ol Chiki 2093 0x1C80, // unassigned 2094 0x1D00, // 1D00..1D7F; Phonetic Extensions 2095 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2096 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2097 0x1E00, // 1E00..1EFF; Latin Extended Additional 2098 0x1F00, // 1F00..1FFF; Greek Extended 2099 0x2000, // 2000..206F; General Punctuation 2100 0x2070, // 2070..209F; Superscripts and Subscripts 2101 0x20A0, // 20A0..20CF; Currency Symbols 2102 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2103 0x2100, // 2100..214F; Letterlike Symbols 2104 0x2150, // 2150..218F; Number Forms 2105 0x2190, // 2190..21FF; Arrows 2106 0x2200, // 2200..22FF; Mathematical Operators 2107 0x2300, // 2300..23FF; Miscellaneous Technical 2108 0x2400, // 2400..243F; Control Pictures 2109 0x2440, // 2440..245F; Optical Character Recognition 2110 0x2460, // 2460..24FF; Enclosed Alphanumerics 2111 0x2500, // 2500..257F; Box Drawing 2112 0x2580, // 2580..259F; Block Elements 2113 0x25A0, // 25A0..25FF; Geometric Shapes 2114 0x2600, // 2600..26FF; Miscellaneous Symbols 2115 0x2700, // 2700..27BF; Dingbats 2116 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2117 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2118 0x2800, // 2800..28FF; Braille Patterns 2119 0x2900, // 2900..297F; Supplemental Arrows-B 2120 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2121 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2122 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2123 0x2C00, // 2C00..2C5F; Glagolitic 2124 0x2C60, // 2C60..2C7F; Latin Extended-C 2125 0x2C80, // 2C80..2CFF; Coptic 2126 0x2D00, // 2D00..2D2F; Georgian Supplement 2127 0x2D30, // 2D30..2D7F; Tifinagh 2128 0x2D80, // 2D80..2DDF; Ethiopic Extended 2129 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2130 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2131 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2132 0x2F00, // 2F00..2FDF; Kangxi Radicals 2133 0x2FE0, // unassigned 2134 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2135 0x3000, // 3000..303F; CJK Symbols and Punctuation 2136 0x3040, // 3040..309F; Hiragana 2137 0x30A0, // 30A0..30FF; Katakana 2138 0x3100, // 3100..312F; Bopomofo 2139 0x3130, // 3130..318F; Hangul Compatibility Jamo 2140 0x3190, // 3190..319F; Kanbun 2141 0x31A0, // 31A0..31BF; Bopomofo Extended 2142 0x31C0, // 31C0..31EF; CJK Strokes 2143 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2144 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2145 0x3300, // 3300..33FF; CJK Compatibility 2146 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2147 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2148 0x4E00, // 4E00..9FFF; CJK Unified Ideograph 2149 0xA000, // A000..A48F; Yi Syllables 2150 0xA490, // A490..A4CF; Yi Radicals 2151 0xA4D0, // unassigned 2152 0xA500, // A500..A63F; Vai 2153 0xA640, // A640..A69F; Cyrillic Extended-B 2154 0xA6A0, // unassigned 2155 0xA700, // A700..A71F; Modifier Tone Letters 2156 0xA720, // A720..A7FF; Latin Extended-D 2157 0xA800, // A800..A82F; Syloti Nagri 2158 0xA830, // unassigned 2159 0xA840, // A840..A87F; Phags-pa 2160 0xA880, // A880..A8DF; Saurashtra 2161 0xA8E0, // unassigned 2162 0xA900, // A900..A92F; Kayah Li 2163 0xA930, // A930..A95F; Rejang 2164 0xA960, // unassigned 2165 0xAA00, // AA00..AA5F; Cham 2166 0xAA60, // unassigned 2167 0xAC00, // AC00..D7AF; Hangul Syllables 2168 0xD7B0, // unassigned 2169 0xD800, // D800..DB7F; High Surrogates 2170 0xDB80, // DB80..DBFF; High Private Use Surrogates 2171 0xDC00, // DC00..DFFF; Low Surrogates 2172 0xE000, // E000..F8FF; Private Use Area 2173 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2174 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2175 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2176 0xFE00, // FE00..FE0F; Variation Selectors 2177 0xFE10, // FE10..FE1F; Vertical Forms 2178 0xFE20, // FE20..FE2F; Combining Half Marks 2179 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2180 0xFE50, // FE50..FE6F; Small Form Variants 2181 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2182 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2183 0xFFF0, // FFF0..FFFF; Specials 2184 0x10000, // 10000..1007F; Linear B Syllabary 2185 0x10080, // 10080..100FF; Linear B Ideograms 2186 0x10100, // 10100..1013F; Aegean Numbers 2187 0x10140, // 10140..1018F; Ancient Greek Numbers 2188 0x10190, // 10190..101CF; Ancient Symbols 2189 0x101D0, // 101D0..101FF; Phaistos Disc 2190 0x10200, // unassigned 2191 0x10280, // 10280..1029F; Lycian 2192 0x102A0, // 102A0..102DF; Carian 2193 0x102E0, // unassigned 2194 0x10300, // 10300..1032F; Old Italic 2195 0x10330, // 10330..1034F; Gothic 2196 0x10350, // unassigned 2197 0x10380, // 10380..1039F; Ugaritic 2198 0x103A0, // 103A0..103DF; Old Persian 2199 0x103E0, // unassigned 2200 0x10400, // 10400..1044F; Desere 2201 0x10450, // 10450..1047F; Shavian 2202 0x10480, // 10480..104AF; Osmanya 2203 0x104B0, // unassigned 2204 0x10800, // 10800..1083F; Cypriot Syllabary 2205 0x10840, // unassigned 2206 0x10900, // 10900..1091F; Phoenician 2207 0x10920, // 10920..1093F; Lydian 2208 0x10940, // unassigned 2209 0x10A00, // 10A00..10A5F; Kharoshthi 2210 0x10A60, // unassigned 2211 0x12000, // 12000..123FF; Cuneiform 2212 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2213 0x12480, // unassigned 2214 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2215 0x1D100, // 1D100..1D1FF; Musical Symbols 2216 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2217 0x1D250, // unassigned 2218 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2219 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2220 0x1D380, // unassigned 2221 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2222 0x1D800, // unassigned 2223 0x1F000, // 1F000..1F02F; Mahjong Tiles 2224 0x1F030, // 1F030..1F09F; Domino Tiles 2225 0x1F0A0, // unassigned 2226 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2227 0x2A6E0, // unassigned 2228 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2229 0x2FA20, // unassigned 2230 0xE0000, // E0000..E007F; Tags 2231 0xE0080, // unassigned 2232 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2233 0xE01F0, // unassigned 2234 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2235 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 2236 }; 2237 2238 private static final UnicodeBlock[] blocks = { 2239 BASIC_LATIN, 2240 LATIN_1_SUPPLEMENT, 2241 LATIN_EXTENDED_A, 2242 LATIN_EXTENDED_B, 2243 IPA_EXTENSIONS, 2244 SPACING_MODIFIER_LETTERS, 2245 COMBINING_DIACRITICAL_MARKS, 2246 GREEK, 2247 CYRILLIC, 2248 CYRILLIC_SUPPLEMENTARY, 2249 ARMENIAN, 2250 HEBREW, 2251 ARABIC, 2252 SYRIAC, 2253 ARABIC_SUPPLEMENT, 2254 THAANA, 2255 NKO, 2256 null, 2257 DEVANAGARI, 2258 BENGALI, 2259 GURMUKHI, 2260 GUJARATI, 2261 ORIYA, 2262 TAMIL, 2263 TELUGU, 2264 KANNADA, 2265 MALAYALAM, 2266 SINHALA, 2267 THAI, 2268 LAO, 2269 TIBETAN, 2270 MYANMAR, 2271 GEORGIAN, 2272 HANGUL_JAMO, 2273 ETHIOPIC, 2274 ETHIOPIC_SUPPLEMENT, 2275 CHEROKEE, 2276 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2277 OGHAM, 2278 RUNIC, 2279 TAGALOG, 2280 HANUNOO, 2281 BUHID, 2282 TAGBANWA, 2283 KHMER, 2284 MONGOLIAN, 2285 null, 2286 LIMBU, 2287 TAI_LE, 2288 NEW_TAI_LUE, 2289 KHMER_SYMBOLS, 2290 BUGINESE, 2291 null, 2292 BALINESE, 2293 SUNDANESE, 2294 null, 2295 LEPCHA, 2296 OL_CHIKI, 2297 null, 2298 PHONETIC_EXTENSIONS, 2299 PHONETIC_EXTENSIONS_SUPPLEMENT, 2300 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2301 LATIN_EXTENDED_ADDITIONAL, 2302 GREEK_EXTENDED, 2303 GENERAL_PUNCTUATION, 2304 SUPERSCRIPTS_AND_SUBSCRIPTS, 2305 CURRENCY_SYMBOLS, 2306 COMBINING_MARKS_FOR_SYMBOLS, 2307 LETTERLIKE_SYMBOLS, 2308 NUMBER_FORMS, 2309 ARROWS, 2310 MATHEMATICAL_OPERATORS, 2311 MISCELLANEOUS_TECHNICAL, 2312 CONTROL_PICTURES, 2313 OPTICAL_CHARACTER_RECOGNITION, 2314 ENCLOSED_ALPHANUMERICS, 2315 BOX_DRAWING, 2316 BLOCK_ELEMENTS, 2317 GEOMETRIC_SHAPES, 2318 MISCELLANEOUS_SYMBOLS, 2319 DINGBATS, 2320 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2321 SUPPLEMENTAL_ARROWS_A, 2322 BRAILLE_PATTERNS, 2323 SUPPLEMENTAL_ARROWS_B, 2324 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2325 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2326 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2327 GLAGOLITIC, 2328 LATIN_EXTENDED_C, 2329 COPTIC, 2330 GEORGIAN_SUPPLEMENT, 2331 TIFINAGH, 2332 ETHIOPIC_EXTENDED, 2333 CYRILLIC_EXTENDED_A, 2334 SUPPLEMENTAL_PUNCTUATION, 2335 CJK_RADICALS_SUPPLEMENT, 2336 KANGXI_RADICALS, 2337 null, 2338 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2339 CJK_SYMBOLS_AND_PUNCTUATION, 2340 HIRAGANA, 2341 KATAKANA, 2342 BOPOMOFO, 2343 HANGUL_COMPATIBILITY_JAMO, 2344 KANBUN, 2345 BOPOMOFO_EXTENDED, 2346 CJK_STROKES, 2347 KATAKANA_PHONETIC_EXTENSIONS, 2348 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2349 CJK_COMPATIBILITY, 2350 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2351 YIJING_HEXAGRAM_SYMBOLS, 2352 CJK_UNIFIED_IDEOGRAPHS, 2353 YI_SYLLABLES, 2354 YI_RADICALS, 2355 null, 2356 VAI, 2357 CYRILLIC_EXTENDED_B, 2358 null, 2359 MODIFIER_TONE_LETTERS, 2360 LATIN_EXTENDED_D, 2361 SYLOTI_NAGRI, 2362 null, 2363 PHAGS_PA, 2364 SAURASHTRA, 2365 null, 2366 KAYAH_LI, 2367 REJANG, 2368 null, 2369 CHAM, 2370 null, 2371 HANGUL_SYLLABLES, 2372 null, 2373 HIGH_SURROGATES, 2374 HIGH_PRIVATE_USE_SURROGATES, 2375 LOW_SURROGATES, 2376 PRIVATE_USE_AREA, 2377 CJK_COMPATIBILITY_IDEOGRAPHS, 2378 ALPHABETIC_PRESENTATION_FORMS, 2379 ARABIC_PRESENTATION_FORMS_A, 2380 VARIATION_SELECTORS, 2381 VERTICAL_FORMS, 2382 COMBINING_HALF_MARKS, 2383 CJK_COMPATIBILITY_FORMS, 2384 SMALL_FORM_VARIANTS, 2385 ARABIC_PRESENTATION_FORMS_B, 2386 HALFWIDTH_AND_FULLWIDTH_FORMS, 2387 SPECIALS, 2388 LINEAR_B_SYLLABARY, 2389 LINEAR_B_IDEOGRAMS, 2390 AEGEAN_NUMBERS, 2391 ANCIENT_GREEK_NUMBERS, 2392 ANCIENT_SYMBOLS, 2393 PHAISTOS_DISC, 2394 null, 2395 LYCIAN, 2396 CARIAN, 2397 null, 2398 OLD_ITALIC, 2399 GOTHIC, 2400 null, 2401 UGARITIC, 2402 OLD_PERSIAN, 2403 null, 2404 DESERET, 2405 SHAVIAN, 2406 OSMANYA, 2407 null, 2408 CYPRIOT_SYLLABARY, 2409 null, 2410 PHOENICIAN, 2411 LYDIAN, 2412 null, 2413 KHAROSHTHI, 2414 null, 2415 CUNEIFORM, 2416 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 2417 null, 2418 BYZANTINE_MUSICAL_SYMBOLS, 2419 MUSICAL_SYMBOLS, 2420 ANCIENT_GREEK_MUSICAL_NOTATION, 2421 null, 2422 TAI_XUAN_JING_SYMBOLS, 2423 COUNTING_ROD_NUMERALS, 2424 null, 2425 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 2426 null, 2427 MAHJONG_TILES, 2428 DOMINO_TILES, 2429 null, 2430 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 2431 null, 2432 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 2433 null, 2434 TAGS, 2435 null, 2436 VARIATION_SELECTORS_SUPPLEMENT, 2437 null, 2438 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 2439 SUPPLEMENTARY_PRIVATE_USE_AREA_B 2440 }; 2441 2442 2443 /** 2444 * Returns the object representing the Unicode block containing the 2445 * given character, or <code>null</code> if the character is not a 2446 * member of a defined block. 2447 * 2448 * <p><b>Note:</b> This method cannot handle <a 2449 * href="Character.html#supplementary"> supplementary 2450 * characters</a>. To support all Unicode characters, 2451 * including supplementary characters, use the {@link 2452 * #of(int)} method. 2453 * 2454 * @param c The character in question 2455 * @return The <code>UnicodeBlock</code> instance representing the 2456 * Unicode block of which this character is a member, or 2457 * <code>null</code> if the character is not a member of any 2458 * Unicode block 2459 */ 2460 public static UnicodeBlock of(char c) { 2461 return of((int)c); 2462 } 2463 2464 2465 /** 2466 * Returns the object representing the Unicode block 2467 * containing the given character (Unicode code point), or 2468 * <code>null</code> if the character is not a member of a 2469 * defined block. 2470 * 2471 * @param codePoint the character (Unicode code point) in question. 2472 * @return The <code>UnicodeBlock</code> instance representing the 2473 * Unicode block of which this character is a member, or 2474 * <code>null</code> if the character is not a member of any 2475 * Unicode block 2476 * @exception IllegalArgumentException if the specified 2477 * <code>codePoint</code> is an invalid Unicode code point. 2478 * @see Character#isValidCodePoint(int) 2479 * @since 1.5 2480 */ 2481 public static UnicodeBlock of(int codePoint) { 2482 if (!isValidCodePoint(codePoint)) { 2483 throw new IllegalArgumentException(); 2484 } 2485 2486 int top, bottom, current; 2487 bottom = 0; 2488 top = blockStarts.length; 2489 current = top/2; 2490 2491 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 2492 while (top - bottom > 1) { 2493 if (codePoint >= blockStarts[current]) { 2494 bottom = current; 2495 } else { 2496 top = current; 2497 } 2498 current = (top + bottom) / 2; 2499 } 2500 return blocks[current]; 2501 } 2502 2503 /** 2504 * Returns the UnicodeBlock with the given name. Block 2505 * names are determined by The Unicode Standard. The file 2506 * Blocks-<version>.txt defines blocks for a particular 2507 * version of the standard. The {@link Character} class specifies 2508 * the version of the standard that it supports. 2509 * <p> 2510 * This method accepts block names in the following forms: 2511 * <ol> 2512 * <li> Canonical block names as defined by the Unicode Standard. 2513 * For example, the standard defines a "Basic Latin" block. Therefore, this 2514 * method accepts "Basic Latin" as a valid block name. The documentation of 2515 * each UnicodeBlock provides the canonical name. 2516 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 2517 * is a valid block name for the "Basic Latin" block. 2518 * <li>The text representation of each constant UnicodeBlock identifier. 2519 * For example, this method will return the {@link #BASIC_LATIN} block if 2520 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 2521 * hyphens in the canonical name with underscores. 2522 * </ol> 2523 * Finally, character case is ignored for all of the valid block name forms. 2524 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 2525 * The en_US locale's case mapping rules are used to provide case-insensitive 2526 * string comparisons for block name validation. 2527 * <p> 2528 * If the Unicode Standard changes block names, both the previous and 2529 * current names will be accepted. 2530 * 2531 * @param blockName A <code>UnicodeBlock</code> name. 2532 * @return The <code>UnicodeBlock</code> instance identified 2533 * by <code>blockName</code> 2534 * @throws IllegalArgumentException if <code>blockName</code> is an 2535 * invalid name 2536 * @throws NullPointerException if <code>blockName</code> is null 2537 * @since 1.5 2538 */ 2539 public static final UnicodeBlock forName(String blockName) { 2540 UnicodeBlock block = (UnicodeBlock)map.get(blockName.toUpperCase(Locale.US)); 2541 if (block == null) { 2542 throw new IllegalArgumentException(); 2543 } 2544 return block; 2545 } 2546 } 2547 2548 2549 /** 2550 * The value of the <code>Character</code>. 2551 * 2552 * @serial 2553 */ 2554 private final char value; 2555 2556 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 2557 private static final long serialVersionUID = 3786198910865385080L; 2558 2559 /** 2560 * Constructs a newly allocated <code>Character</code> object that 2561 * represents the specified <code>char</code> value. 2562 * 2563 * @param value the value to be represented by the 2564 * <code>Character</code> object. 2565 */ 2566 public Character(char value) { 2567 this.value = value; 2568 } 2569 2570 private static class CharacterCache { 2571 private CharacterCache(){} 2572 2573 static final Character cache[] = new Character[127 + 1]; 2574 2575 static { 2576 for(int i = 0; i < cache.length; i++) 2577 cache[i] = new Character((char)i); 2578 } 2579 } 2580 2581 /** 2582 * Returns a <tt>Character</tt> instance representing the specified 2583 * <tt>char</tt> value. 2584 * If a new <tt>Character</tt> instance is not required, this method 2585 * should generally be used in preference to the constructor 2586 * {@link #Character(char)}, as this method is likely to yield 2587 * significantly better space and time performance by caching 2588 * frequently requested values. 2589 * 2590 * This method will always cache values in the range {@code 2591 * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may 2592 * cache other values outside of this range. 2593 * 2594 * @param c a char value. 2595 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 2596 * @since 1.5 2597 */ 2598 public static Character valueOf(char c) { 2599 if(c <= 127) { // must cache 2600 return CharacterCache.cache[(int)c]; 2601 } 2602 return new Character(c); 2603 } 2604 2605 /** 2606 * Returns the value of this <code>Character</code> object. 2607 * @return the primitive <code>char</code> value represented by 2608 * this object. 2609 */ 2610 public char charValue() { 2611 return value; 2612 } 2613 2614 /** 2615 * Returns a hash code for this {@code Character}; equal to the result 2616 * of invoking {@code charValue()}. 2617 * 2618 * @return a hash code value for this {@code Character} 2619 */ 2620 public int hashCode() { 2621 return (int)value; 2622 } 2623 2624 /** 2625 * Compares this object against the specified object. 2626 * The result is <code>true</code> if and only if the argument is not 2627 * <code>null</code> and is a <code>Character</code> object that 2628 * represents the same <code>char</code> value as this object. 2629 * 2630 * @param obj the object to compare with. 2631 * @return <code>true</code> if the objects are the same; 2632 * <code>false</code> otherwise. 2633 */ 2634 public boolean equals(Object obj) { 2635 if (obj instanceof Character) { 2636 return value == ((Character)obj).charValue(); 2637 } 2638 return false; 2639 } 2640 2641 /** 2642 * Returns a <code>String</code> object representing this 2643 * <code>Character</code>'s value. The result is a string of 2644 * length 1 whose sole component is the primitive 2645 * <code>char</code> value represented by this 2646 * <code>Character</code> object. 2647 * 2648 * @return a string representation of this object. 2649 */ 2650 public String toString() { 2651 char buf[] = {value}; 2652 return String.valueOf(buf); 2653 } 2654 2655 /** 2656 * Returns a <code>String</code> object representing the 2657 * specified <code>char</code>. The result is a string of length 2658 * 1 consisting solely of the specified <code>char</code>. 2659 * 2660 * @param c the <code>char</code> to be converted 2661 * @return the string representation of the specified <code>char</code> 2662 * @since 1.4 2663 */ 2664 public static String toString(char c) { 2665 return String.valueOf(c); 2666 } 2667 2668 /** 2669 * Determines whether the specified code point is a valid 2670 * <a href="http://www.unicode.org/glossary/#code_point"> 2671 * Unicode code point value</a>. 2672 * 2673 * @param codePoint the Unicode code point to be tested 2674 * @return {@code true} if the specified code point value is between 2675 * {@link #MIN_CODE_POINT} and 2676 * {@link #MAX_CODE_POINT} inclusive; 2677 * {@code false} otherwise. 2678 * @since 1.5 2679 */ 2680 public static boolean isValidCodePoint(int codePoint) { 2681 return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; 2682 } 2683 2684 /** 2685 * Determines whether the specified character (Unicode code point) 2686 * is in the <a href="#supplementary">supplementary character</a> range. 2687 * 2688 * @param codePoint the character (Unicode code point) to be tested 2689 * @return {@code true} if the specified code point is between 2690 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 2691 * {@link #MAX_CODE_POINT} inclusive; 2692 * {@code false} otherwise. 2693 * @since 1.5 2694 */ 2695 public static boolean isSupplementaryCodePoint(int codePoint) { 2696 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 2697 && codePoint <= MAX_CODE_POINT; 2698 } 2699 2700 /** 2701 * Determines if the given {@code char} value is a 2702 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 2703 * Unicode high-surrogate code unit</a> 2704 * (also known as <i>leading-surrogate code unit</i>). 2705 * 2706 * <p>Such values do not represent characters by themselves, 2707 * but are used in the representation of 2708 * <a href="#supplementary">supplementary characters</a> 2709 * in the UTF-16 encoding. 2710 * 2711 * @param ch the {@code char} value to be tested. 2712 * @return {@code true} if the {@code char} value is between 2713 * {@link #MIN_HIGH_SURROGATE} and 2714 * {@link #MAX_HIGH_SURROGATE} inclusive; 2715 * {@code false} otherwise. 2716 * @see #isLowSurrogate(char) 2717 * @see Character.UnicodeBlock#of(int) 2718 * @since 1.5 2719 */ 2720 public static boolean isHighSurrogate(char ch) { 2721 return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; 2722 } 2723 2724 /** 2725 * Determines if the given {@code char} value is a 2726 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 2727 * Unicode low-surrogate code unit</a> 2728 * (also known as <i>trailing-surrogate code unit</i>). 2729 * 2730 * <p>Such values do not represent characters by themselves, 2731 * but are used in the representation of 2732 * <a href="#supplementary">supplementary characters</a> 2733 * in the UTF-16 encoding. 2734 * 2735 * @param ch the {@code char} value to be tested. 2736 * @return {@code true} if the {@code char} value is between 2737 * {@link #MIN_LOW_SURROGATE} and 2738 * {@link #MAX_LOW_SURROGATE} inclusive; 2739 * {@code false} otherwise. 2740 * @see #isHighSurrogate(char) 2741 * @since 1.5 2742 */ 2743 public static boolean isLowSurrogate(char ch) { 2744 return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; 2745 } 2746 2747 /** 2748 * Determines if the given {@code char} value is a Unicode 2749 * <i>surrogate code unit</i>. 2750 * 2751 * <p>Such values do not represent characters by themselves, 2752 * but are used in the representation of 2753 * <a href="#supplementary">supplementary characters</a> 2754 * in the UTF-16 encoding. 2755 * 2756 * <p>A char value is a surrogate code unit if and only if it is either 2757 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 2758 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 2759 * 2760 * @param ch the {@code char} value to be tested. 2761 * @return {@code true} if the {@code char} value is between 2762 * {@link #MIN_SURROGATE} and 2763 * {@link #MAX_SURROGATE} inclusive; 2764 * {@code false} otherwise. 2765 * @since 1.7 2766 */ 2767 public static boolean isSurrogate(char ch) { 2768 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 2769 } 2770 2771 /** 2772 * Determines whether the specified pair of <code>char</code> 2773 * values is a valid 2774 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 2775 * Unicode surrogate pair</a>. 2776 2777 * <p>This method is equivalent to the expression: 2778 * <blockquote><pre> 2779 * isHighSurrogate(high) && isLowSurrogate(low) 2780 * </pre></blockquote> 2781 * 2782 * @param high the high-surrogate code value to be tested 2783 * @param low the low-surrogate code value to be tested 2784 * @return <code>true</code> if the specified high and 2785 * low-surrogate code values represent a valid surrogate pair; 2786 * <code>false</code> otherwise. 2787 * @since 1.5 2788 */ 2789 public static boolean isSurrogatePair(char high, char low) { 2790 return isHighSurrogate(high) && isLowSurrogate(low); 2791 } 2792 2793 /** 2794 * Determines the number of <code>char</code> values needed to 2795 * represent the specified character (Unicode code point). If the 2796 * specified character is equal to or greater than 0x10000, then 2797 * the method returns 2. Otherwise, the method returns 1. 2798 * 2799 * <p>This method doesn't validate the specified character to be a 2800 * valid Unicode code point. The caller must validate the 2801 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 2802 * if necessary. 2803 * 2804 * @param codePoint the character (Unicode code point) to be tested. 2805 * @return 2 if the character is a valid supplementary character; 1 otherwise. 2806 * @see #isSupplementaryCodePoint(int) 2807 * @since 1.5 2808 */ 2809 public static int charCount(int codePoint) { 2810 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1; 2811 } 2812 2813 /** 2814 * Converts the specified surrogate pair to its supplementary code 2815 * point value. This method does not validate the specified 2816 * surrogate pair. The caller must validate it using {@link 2817 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 2818 * 2819 * @param high the high-surrogate code unit 2820 * @param low the low-surrogate code unit 2821 * @return the supplementary code point composed from the 2822 * specified surrogate pair. 2823 * @since 1.5 2824 */ 2825 public static int toCodePoint(char high, char low) { 2826 // Optimized form of: 2827 // return ((high - MIN_HIGH_SURROGATE) << 10) 2828 // + (low - MIN_LOW_SURROGATE) 2829 // + MIN_SUPPLEMENTARY_CODE_POINT; 2830 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 2831 - (MIN_HIGH_SURROGATE << 10) 2832 - MIN_LOW_SURROGATE); 2833 } 2834 2835 /** 2836 * Returns the code point at the given index of the 2837 * <code>CharSequence</code>. If the <code>char</code> value at 2838 * the given index in the <code>CharSequence</code> is in the 2839 * high-surrogate range, the following index is less than the 2840 * length of the <code>CharSequence</code>, and the 2841 * <code>char</code> value at the following index is in the 2842 * low-surrogate range, then the supplementary code point 2843 * corresponding to this surrogate pair is returned. Otherwise, 2844 * the <code>char</code> value at the given index is returned. 2845 * 2846 * @param seq a sequence of <code>char</code> values (Unicode code 2847 * units) 2848 * @param index the index to the <code>char</code> values (Unicode 2849 * code units) in <code>seq</code> to be converted 2850 * @return the Unicode code point at the given index 2851 * @exception NullPointerException if <code>seq</code> is null. 2852 * @exception IndexOutOfBoundsException if the value 2853 * <code>index</code> is negative or not less than 2854 * {@link CharSequence#length() seq.length()}. 2855 * @since 1.5 2856 */ 2857 public static int codePointAt(CharSequence seq, int index) { 2858 char c1 = seq.charAt(index++); 2859 if (isHighSurrogate(c1)) { 2860 if (index < seq.length()) { 2861 char c2 = seq.charAt(index); 2862 if (isLowSurrogate(c2)) { 2863 return toCodePoint(c1, c2); 2864 } 2865 } 2866 } 2867 return c1; 2868 } 2869 2870 /** 2871 * Returns the code point at the given index of the 2872 * <code>char</code> array. If the <code>char</code> value at 2873 * the given index in the <code>char</code> array is in the 2874 * high-surrogate range, the following index is less than the 2875 * length of the <code>char</code> array, and the 2876 * <code>char</code> value at the following index is in the 2877 * low-surrogate range, then the supplementary code point 2878 * corresponding to this surrogate pair is returned. Otherwise, 2879 * the <code>char</code> value at the given index is returned. 2880 * 2881 * @param a the <code>char</code> array 2882 * @param index the index to the <code>char</code> values (Unicode 2883 * code units) in the <code>char</code> array to be converted 2884 * @return the Unicode code point at the given index 2885 * @exception NullPointerException if <code>a</code> is null. 2886 * @exception IndexOutOfBoundsException if the value 2887 * <code>index</code> is negative or not less than 2888 * the length of the <code>char</code> array. 2889 * @since 1.5 2890 */ 2891 public static int codePointAt(char[] a, int index) { 2892 return codePointAtImpl(a, index, a.length); 2893 } 2894 2895 /** 2896 * Returns the code point at the given index of the 2897 * <code>char</code> array, where only array elements with 2898 * <code>index</code> less than <code>limit</code> can be used. If 2899 * the <code>char</code> value at the given index in the 2900 * <code>char</code> array is in the high-surrogate range, the 2901 * following index is less than the <code>limit</code>, and the 2902 * <code>char</code> value at the following index is in the 2903 * low-surrogate range, then the supplementary code point 2904 * corresponding to this surrogate pair is returned. Otherwise, 2905 * the <code>char</code> value at the given index is returned. 2906 * 2907 * @param a the <code>char</code> array 2908 * @param index the index to the <code>char</code> values (Unicode 2909 * code units) in the <code>char</code> array to be converted 2910 * @param limit the index after the last array element that can be used in the 2911 * <code>char</code> array 2912 * @return the Unicode code point at the given index 2913 * @exception NullPointerException if <code>a</code> is null. 2914 * @exception IndexOutOfBoundsException if the <code>index</code> 2915 * argument is negative or not less than the <code>limit</code> 2916 * argument, or if the <code>limit</code> argument is negative or 2917 * greater than the length of the <code>char</code> array. 2918 * @since 1.5 2919 */ 2920 public static int codePointAt(char[] a, int index, int limit) { 2921 if (index >= limit || limit < 0 || limit > a.length) { 2922 throw new IndexOutOfBoundsException(); 2923 } 2924 return codePointAtImpl(a, index, limit); 2925 } 2926 2927 static int codePointAtImpl(char[] a, int index, int limit) { 2928 char c1 = a[index++]; 2929 if (isHighSurrogate(c1)) { 2930 if (index < limit) { 2931 char c2 = a[index]; 2932 if (isLowSurrogate(c2)) { 2933 return toCodePoint(c1, c2); 2934 } 2935 } 2936 } 2937 return c1; 2938 } 2939 2940 /** 2941 * Returns the code point preceding the given index of the 2942 * <code>CharSequence</code>. If the <code>char</code> value at 2943 * <code>(index - 1)</code> in the <code>CharSequence</code> is in 2944 * the low-surrogate range, <code>(index - 2)</code> is not 2945 * negative, and the <code>char</code> value at <code>(index - 2946 * 2)</code> in the <code>CharSequence</code> is in the 2947 * high-surrogate range, then the supplementary code point 2948 * corresponding to this surrogate pair is returned. Otherwise, 2949 * the <code>char</code> value at <code>(index - 1)</code> is 2950 * returned. 2951 * 2952 * @param seq the <code>CharSequence</code> instance 2953 * @param index the index following the code point that should be returned 2954 * @return the Unicode code point value before the given index. 2955 * @exception NullPointerException if <code>seq</code> is null. 2956 * @exception IndexOutOfBoundsException if the <code>index</code> 2957 * argument is less than 1 or greater than {@link 2958 * CharSequence#length() seq.length()}. 2959 * @since 1.5 2960 */ 2961 public static int codePointBefore(CharSequence seq, int index) { 2962 char c2 = seq.charAt(--index); 2963 if (isLowSurrogate(c2)) { 2964 if (index > 0) { 2965 char c1 = seq.charAt(--index); 2966 if (isHighSurrogate(c1)) { 2967 return toCodePoint(c1, c2); 2968 } 2969 } 2970 } 2971 return c2; 2972 } 2973 2974 /** 2975 * Returns the code point preceding the given index of the 2976 * <code>char</code> array. If the <code>char</code> value at 2977 * <code>(index - 1)</code> in the <code>char</code> array is in 2978 * the low-surrogate range, <code>(index - 2)</code> is not 2979 * negative, and the <code>char</code> value at <code>(index - 2980 * 2)</code> in the <code>char</code> array is in the 2981 * high-surrogate range, then the supplementary code point 2982 * corresponding to this surrogate pair is returned. Otherwise, 2983 * the <code>char</code> value at <code>(index - 1)</code> is 2984 * returned. 2985 * 2986 * @param a the <code>char</code> array 2987 * @param index the index following the code point that should be returned 2988 * @return the Unicode code point value before the given index. 2989 * @exception NullPointerException if <code>a</code> is null. 2990 * @exception IndexOutOfBoundsException if the <code>index</code> 2991 * argument is less than 1 or greater than the length of the 2992 * <code>char</code> array 2993 * @since 1.5 2994 */ 2995 public static int codePointBefore(char[] a, int index) { 2996 return codePointBeforeImpl(a, index, 0); 2997 } 2998 2999 /** 3000 * Returns the code point preceding the given index of the 3001 * <code>char</code> array, where only array elements with 3002 * <code>index</code> greater than or equal to <code>start</code> 3003 * can be used. If the <code>char</code> value at <code>(index - 3004 * 1)</code> in the <code>char</code> array is in the 3005 * low-surrogate range, <code>(index - 2)</code> is not less than 3006 * <code>start</code>, and the <code>char</code> value at 3007 * <code>(index - 2)</code> in the <code>char</code> array is in 3008 * the high-surrogate range, then the supplementary code point 3009 * corresponding to this surrogate pair is returned. Otherwise, 3010 * the <code>char</code> value at <code>(index - 1)</code> is 3011 * returned. 3012 * 3013 * @param a the <code>char</code> array 3014 * @param index the index following the code point that should be returned 3015 * @param start the index of the first array element in the 3016 * <code>char</code> array 3017 * @return the Unicode code point value before the given index. 3018 * @exception NullPointerException if <code>a</code> is null. 3019 * @exception IndexOutOfBoundsException if the <code>index</code> 3020 * argument is not greater than the <code>start</code> argument or 3021 * is greater than the length of the <code>char</code> array, or 3022 * if the <code>start</code> argument is negative or not less than 3023 * the length of the <code>char</code> array. 3024 * @since 1.5 3025 */ 3026 public static int codePointBefore(char[] a, int index, int start) { 3027 if (index <= start || start < 0 || start >= a.length) { 3028 throw new IndexOutOfBoundsException(); 3029 } 3030 return codePointBeforeImpl(a, index, start); 3031 } 3032 3033 static int codePointBeforeImpl(char[] a, int index, int start) { 3034 char c2 = a[--index]; 3035 if (isLowSurrogate(c2)) { 3036 if (index > start) { 3037 char c1 = a[--index]; 3038 if (isHighSurrogate(c1)) { 3039 return toCodePoint(c1, c2); 3040 } 3041 } 3042 } 3043 return c2; 3044 } 3045 3046 /** 3047 * Converts the specified character (Unicode code point) to its 3048 * UTF-16 representation. If the specified code point is a BMP 3049 * (Basic Multilingual Plane or Plane 0) value, the same value is 3050 * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the 3051 * specified code point is a supplementary character, its 3052 * surrogate values are stored in <code>dst[dstIndex]</code> 3053 * (high-surrogate) and <code>dst[dstIndex+1]</code> 3054 * (low-surrogate), and 2 is returned. 3055 * 3056 * @param codePoint the character (Unicode code point) to be converted. 3057 * @param dst an array of <code>char</code> in which the 3058 * <code>codePoint</code>'s UTF-16 value is stored. 3059 * @param dstIndex the start index into the <code>dst</code> 3060 * array where the converted value is stored. 3061 * @return 1 if the code point is a BMP code point, 2 if the 3062 * code point is a supplementary code point. 3063 * @exception IllegalArgumentException if the specified 3064 * <code>codePoint</code> is not a valid Unicode code point. 3065 * @exception NullPointerException if the specified <code>dst</code> is null. 3066 * @exception IndexOutOfBoundsException if <code>dstIndex</code> 3067 * is negative or not less than <code>dst.length</code>, or if 3068 * <code>dst</code> at <code>dstIndex</code> doesn't have enough 3069 * array element(s) to store the resulting <code>char</code> 3070 * value(s). (If <code>dstIndex</code> is equal to 3071 * <code>dst.length-1</code> and the specified 3072 * <code>codePoint</code> is a supplementary character, the 3073 * high-surrogate value is not stored in 3074 * <code>dst[dstIndex]</code>.) 3075 * @since 1.5 3076 */ 3077 public static int toChars(int codePoint, char[] dst, int dstIndex) { 3078 if (codePoint < 0 || codePoint > MAX_CODE_POINT) { 3079 throw new IllegalArgumentException(); 3080 } 3081 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) { 3082 dst[dstIndex] = (char) codePoint; 3083 return 1; 3084 } 3085 toSurrogates(codePoint, dst, dstIndex); 3086 return 2; 3087 } 3088 3089 /** 3090 * Converts the specified character (Unicode code point) to its 3091 * UTF-16 representation stored in a <code>char</code> array. If 3092 * the specified code point is a BMP (Basic Multilingual Plane or 3093 * Plane 0) value, the resulting <code>char</code> array has 3094 * the same value as <code>codePoint</code>. If the specified code 3095 * point is a supplementary code point, the resulting 3096 * <code>char</code> array has the corresponding surrogate pair. 3097 * 3098 * @param codePoint a Unicode code point 3099 * @return a <code>char</code> array having 3100 * <code>codePoint</code>'s UTF-16 representation. 3101 * @exception IllegalArgumentException if the specified 3102 * <code>codePoint</code> is not a valid Unicode code point. 3103 * @since 1.5 3104 */ 3105 public static char[] toChars(int codePoint) { 3106 if (codePoint < 0 || codePoint > MAX_CODE_POINT) { 3107 throw new IllegalArgumentException(); 3108 } 3109 if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) { 3110 return new char[] { (char) codePoint }; 3111 } 3112 char[] result = new char[2]; 3113 toSurrogates(codePoint, result, 0); 3114 return result; 3115 } 3116 3117 static void toSurrogates(int codePoint, char[] dst, int index) { 3118 // We write elements "backwards" to guarantee all-or-nothing 3119 dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 3120 dst[index] = (char)((codePoint >>> 10) 3121 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 3122 } 3123 3124 /** 3125 * Returns the number of Unicode code points in the text range of 3126 * the specified char sequence. The text range begins at the 3127 * specified <code>beginIndex</code> and extends to the 3128 * <code>char</code> at index <code>endIndex - 1</code>. Thus the 3129 * length (in <code>char</code>s) of the text range is 3130 * <code>endIndex-beginIndex</code>. Unpaired surrogates within 3131 * the text range count as one code point each. 3132 * 3133 * @param seq the char sequence 3134 * @param beginIndex the index to the first <code>char</code> of 3135 * the text range. 3136 * @param endIndex the index after the last <code>char</code> of 3137 * the text range. 3138 * @return the number of Unicode code points in the specified text 3139 * range 3140 * @exception NullPointerException if <code>seq</code> is null. 3141 * @exception IndexOutOfBoundsException if the 3142 * <code>beginIndex</code> is negative, or <code>endIndex</code> 3143 * is larger than the length of the given sequence, or 3144 * <code>beginIndex</code> is larger than <code>endIndex</code>. 3145 * @since 1.5 3146 */ 3147 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 3148 int length = seq.length(); 3149 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 3150 throw new IndexOutOfBoundsException(); 3151 } 3152 int n = 0; 3153 for (int i = beginIndex; i < endIndex; ) { 3154 n++; 3155 if (isHighSurrogate(seq.charAt(i++))) { 3156 if (i < endIndex && isLowSurrogate(seq.charAt(i))) { 3157 i++; 3158 } 3159 } 3160 } 3161 return n; 3162 } 3163 3164 /** 3165 * Returns the number of Unicode code points in a subarray of the 3166 * <code>char</code> array argument. The <code>offset</code> 3167 * argument is the index of the first <code>char</code> of the 3168 * subarray and the <code>count</code> argument specifies the 3169 * length of the subarray in <code>char</code>s. Unpaired 3170 * surrogates within the subarray count as one code point each. 3171 * 3172 * @param a the <code>char</code> array 3173 * @param offset the index of the first <code>char</code> in the 3174 * given <code>char</code> array 3175 * @param count the length of the subarray in <code>char</code>s 3176 * @return the number of Unicode code points in the specified subarray 3177 * @exception NullPointerException if <code>a</code> is null. 3178 * @exception IndexOutOfBoundsException if <code>offset</code> or 3179 * <code>count</code> is negative, or if <code>offset + 3180 * count</code> is larger than the length of the given array. 3181 * @since 1.5 3182 */ 3183 public static int codePointCount(char[] a, int offset, int count) { 3184 if (count > a.length - offset || offset < 0 || count < 0) { 3185 throw new IndexOutOfBoundsException(); 3186 } 3187 return codePointCountImpl(a, offset, count); 3188 } 3189 3190 static int codePointCountImpl(char[] a, int offset, int count) { 3191 int endIndex = offset + count; 3192 int n = 0; 3193 for (int i = offset; i < endIndex; ) { 3194 n++; 3195 if (isHighSurrogate(a[i++])) { 3196 if (i < endIndex && isLowSurrogate(a[i])) { 3197 i++; 3198 } 3199 } 3200 } 3201 return n; 3202 } 3203 3204 /** 3205 * Returns the index within the given char sequence that is offset 3206 * from the given <code>index</code> by <code>codePointOffset</code> 3207 * code points. Unpaired surrogates within the text range given by 3208 * <code>index</code> and <code>codePointOffset</code> count as 3209 * one code point each. 3210 * 3211 * @param seq the char sequence 3212 * @param index the index to be offset 3213 * @param codePointOffset the offset in code points 3214 * @return the index within the char sequence 3215 * @exception NullPointerException if <code>seq</code> is null. 3216 * @exception IndexOutOfBoundsException if <code>index</code> 3217 * is negative or larger then the length of the char sequence, 3218 * or if <code>codePointOffset</code> is positive and the 3219 * subsequence starting with <code>index</code> has fewer than 3220 * <code>codePointOffset</code> code points, or if 3221 * <code>codePointOffset</code> is negative and the subsequence 3222 * before <code>index</code> has fewer than the absolute value 3223 * of <code>codePointOffset</code> code points. 3224 * @since 1.5 3225 */ 3226 public static int offsetByCodePoints(CharSequence seq, int index, 3227 int codePointOffset) { 3228 int length = seq.length(); 3229 if (index < 0 || index > length) { 3230 throw new IndexOutOfBoundsException(); 3231 } 3232 3233 int x = index; 3234 if (codePointOffset >= 0) { 3235 int i; 3236 for (i = 0; x < length && i < codePointOffset; i++) { 3237 if (isHighSurrogate(seq.charAt(x++))) { 3238 if (x < length && isLowSurrogate(seq.charAt(x))) { 3239 x++; 3240 } 3241 } 3242 } 3243 if (i < codePointOffset) { 3244 throw new IndexOutOfBoundsException(); 3245 } 3246 } else { 3247 int i; 3248 for (i = codePointOffset; x > 0 && i < 0; i++) { 3249 if (isLowSurrogate(seq.charAt(--x))) { 3250 if (x > 0 && isHighSurrogate(seq.charAt(x-1))) { 3251 x--; 3252 } 3253 } 3254 } 3255 if (i < 0) { 3256 throw new IndexOutOfBoundsException(); 3257 } 3258 } 3259 return x; 3260 } 3261 3262 /** 3263 * Returns the index within the given <code>char</code> subarray 3264 * that is offset from the given <code>index</code> by 3265 * <code>codePointOffset</code> code points. The 3266 * <code>start</code> and <code>count</code> arguments specify a 3267 * subarray of the <code>char</code> array. Unpaired surrogates 3268 * within the text range given by <code>index</code> and 3269 * <code>codePointOffset</code> count as one code point each. 3270 * 3271 * @param a the <code>char</code> array 3272 * @param start the index of the first <code>char</code> of the 3273 * subarray 3274 * @param count the length of the subarray in <code>char</code>s 3275 * @param index the index to be offset 3276 * @param codePointOffset the offset in code points 3277 * @return the index within the subarray 3278 * @exception NullPointerException if <code>a</code> is null. 3279 * @exception IndexOutOfBoundsException 3280 * if <code>start</code> or <code>count</code> is negative, 3281 * or if <code>start + count</code> is larger than the length of 3282 * the given array, 3283 * or if <code>index</code> is less than <code>start</code> or 3284 * larger then <code>start + count</code>, 3285 * or if <code>codePointOffset</code> is positive and the text range 3286 * starting with <code>index</code> and ending with <code>start 3287 * + count - 1</code> has fewer than <code>codePointOffset</code> code 3288 * points, 3289 * or if <code>codePointOffset</code> is negative and the text range 3290 * starting with <code>start</code> and ending with <code>index 3291 * - 1</code> has fewer than the absolute value of 3292 * <code>codePointOffset</code> code points. 3293 * @since 1.5 3294 */ 3295 public static int offsetByCodePoints(char[] a, int start, int count, 3296 int index, int codePointOffset) { 3297 if (count > a.length-start || start < 0 || count < 0 3298 || index < start || index > start+count) { 3299 throw new IndexOutOfBoundsException(); 3300 } 3301 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 3302 } 3303 3304 static int offsetByCodePointsImpl(char[]a, int start, int count, 3305 int index, int codePointOffset) { 3306 int x = index; 3307 if (codePointOffset >= 0) { 3308 int limit = start + count; 3309 int i; 3310 for (i = 0; x < limit && i < codePointOffset; i++) { 3311 if (isHighSurrogate(a[x++])) { 3312 if (x < limit && isLowSurrogate(a[x])) { 3313 x++; 3314 } 3315 } 3316 } 3317 if (i < codePointOffset) { 3318 throw new IndexOutOfBoundsException(); 3319 } 3320 } else { 3321 int i; 3322 for (i = codePointOffset; x > start && i < 0; i++) { 3323 if (isLowSurrogate(a[--x])) { 3324 if (x > start && isHighSurrogate(a[x-1])) { 3325 x--; 3326 } 3327 } 3328 } 3329 if (i < 0) { 3330 throw new IndexOutOfBoundsException(); 3331 } 3332 } 3333 return x; 3334 } 3335 3336 /** 3337 * Determines if the specified character is a lowercase character. 3338 * <p> 3339 * A character is lowercase if its general category type, provided 3340 * by <code>Character.getType(ch)</code>, is 3341 * <code>LOWERCASE_LETTER</code>. 3342 * <p> 3343 * The following are examples of lowercase characters: 3344 * <p><blockquote><pre> 3345 * a b c d e f g h i j k l m n o p q r s t u v w x y z 3346 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 3347 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 3348 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 3349 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 3350 * </pre></blockquote> 3351 * <p> Many other Unicode characters are lowercase too. 3352 * 3353 * <p><b>Note:</b> This method cannot handle <a 3354 * href="#supplementary"> supplementary characters</a>. To support 3355 * all Unicode characters, including supplementary characters, use 3356 * the {@link #isLowerCase(int)} method. 3357 * 3358 * @param ch the character to be tested. 3359 * @return <code>true</code> if the character is lowercase; 3360 * <code>false</code> otherwise. 3361 * @see java.lang.Character#isLowerCase(char) 3362 * @see java.lang.Character#isTitleCase(char) 3363 * @see java.lang.Character#toLowerCase(char) 3364 * @see java.lang.Character#getType(char) 3365 */ 3366 public static boolean isLowerCase(char ch) { 3367 return isLowerCase((int)ch); 3368 } 3369 3370 /** 3371 * Determines if the specified character (Unicode code point) is a 3372 * lowercase character. 3373 * <p> 3374 * A character is lowercase if its general category type, provided 3375 * by {@link Character#getType getType(codePoint)}, is 3376 * <code>LOWERCASE_LETTER</code>. 3377 * <p> 3378 * The following are examples of lowercase characters: 3379 * <p><blockquote><pre> 3380 * a b c d e f g h i j k l m n o p q r s t u v w x y z 3381 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 3382 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 3383 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 3384 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 3385 * </pre></blockquote> 3386 * <p> Many other Unicode characters are lowercase too. 3387 * 3388 * @param codePoint the character (Unicode code point) to be tested. 3389 * @return <code>true</code> if the character is lowercase; 3390 * <code>false</code> otherwise. 3391 * @see java.lang.Character#isLowerCase(int) 3392 * @see java.lang.Character#isTitleCase(int) 3393 * @see java.lang.Character#toLowerCase(int) 3394 * @see java.lang.Character#getType(int) 3395 * @since 1.5 3396 */ 3397 public static boolean isLowerCase(int codePoint) { 3398 return getType(codePoint) == Character.LOWERCASE_LETTER; 3399 } 3400 3401 /** 3402 * Determines if the specified character is an uppercase character. 3403 * <p> 3404 * A character is uppercase if its general category type, provided by 3405 * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>. 3406 * <p> 3407 * The following are examples of uppercase characters: 3408 * <p><blockquote><pre> 3409 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 3410 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 3411 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 3412 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 3413 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 3414 * </pre></blockquote> 3415 * <p> Many other Unicode characters are uppercase too.<p> 3416 * 3417 * <p><b>Note:</b> This method cannot handle <a 3418 * href="#supplementary"> supplementary characters</a>. To support 3419 * all Unicode characters, including supplementary characters, use 3420 * the {@link #isUpperCase(int)} method. 3421 * 3422 * @param ch the character to be tested. 3423 * @return <code>true</code> if the character is uppercase; 3424 * <code>false</code> otherwise. 3425 * @see java.lang.Character#isLowerCase(char) 3426 * @see java.lang.Character#isTitleCase(char) 3427 * @see java.lang.Character#toUpperCase(char) 3428 * @see java.lang.Character#getType(char) 3429 * @since 1.0 3430 */ 3431 public static boolean isUpperCase(char ch) { 3432 return isUpperCase((int)ch); 3433 } 3434 3435 /** 3436 * Determines if the specified character (Unicode code point) is an uppercase character. 3437 * <p> 3438 * A character is uppercase if its general category type, provided by 3439 * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>. 3440 * <p> 3441 * The following are examples of uppercase characters: 3442 * <p><blockquote><pre> 3443 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 3444 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 3445 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 3446 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 3447 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 3448 * </pre></blockquote> 3449 * <p> Many other Unicode characters are uppercase too.<p> 3450 * 3451 * @param codePoint the character (Unicode code point) to be tested. 3452 * @return <code>true</code> if the character is uppercase; 3453 * <code>false</code> otherwise. 3454 * @see java.lang.Character#isLowerCase(int) 3455 * @see java.lang.Character#isTitleCase(int) 3456 * @see java.lang.Character#toUpperCase(int) 3457 * @see java.lang.Character#getType(int) 3458 * @since 1.5 3459 */ 3460 public static boolean isUpperCase(int codePoint) { 3461 return getType(codePoint) == Character.UPPERCASE_LETTER; 3462 } 3463 3464 /** 3465 * Determines if the specified character is a titlecase character. 3466 * <p> 3467 * A character is a titlecase character if its general 3468 * category type, provided by <code>Character.getType(ch)</code>, 3469 * is <code>TITLECASE_LETTER</code>. 3470 * <p> 3471 * Some characters look like pairs of Latin letters. For example, there 3472 * is an uppercase letter that looks like "LJ" and has a corresponding 3473 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 3474 * is the appropriate form to use when rendering a word in lowercase 3475 * with initial capitals, as for a book title. 3476 * <p> 3477 * These are some of the Unicode characters for which this method returns 3478 * <code>true</code>: 3479 * <ul> 3480 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 3481 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 3482 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 3483 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 3484 * </ul> 3485 * <p> Many other Unicode characters are titlecase too.<p> 3486 * 3487 * <p><b>Note:</b> This method cannot handle <a 3488 * href="#supplementary"> supplementary characters</a>. To support 3489 * all Unicode characters, including supplementary characters, use 3490 * the {@link #isTitleCase(int)} method. 3491 * 3492 * @param ch the character to be tested. 3493 * @return <code>true</code> if the character is titlecase; 3494 * <code>false</code> otherwise. 3495 * @see java.lang.Character#isLowerCase(char) 3496 * @see java.lang.Character#isUpperCase(char) 3497 * @see java.lang.Character#toTitleCase(char) 3498 * @see java.lang.Character#getType(char) 3499 * @since 1.0.2 3500 */ 3501 public static boolean isTitleCase(char ch) { 3502 return isTitleCase((int)ch); 3503 } 3504 3505 /** 3506 * Determines if the specified character (Unicode code point) is a titlecase character. 3507 * <p> 3508 * A character is a titlecase character if its general 3509 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 3510 * is <code>TITLECASE_LETTER</code>. 3511 * <p> 3512 * Some characters look like pairs of Latin letters. For example, there 3513 * is an uppercase letter that looks like "LJ" and has a corresponding 3514 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 3515 * is the appropriate form to use when rendering a word in lowercase 3516 * with initial capitals, as for a book title. 3517 * <p> 3518 * These are some of the Unicode characters for which this method returns 3519 * <code>true</code>: 3520 * <ul> 3521 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 3522 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 3523 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 3524 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 3525 * </ul> 3526 * <p> Many other Unicode characters are titlecase too.<p> 3527 * 3528 * @param codePoint the character (Unicode code point) to be tested. 3529 * @return <code>true</code> if the character is titlecase; 3530 * <code>false</code> otherwise. 3531 * @see java.lang.Character#isLowerCase(int) 3532 * @see java.lang.Character#isUpperCase(int) 3533 * @see java.lang.Character#toTitleCase(int) 3534 * @see java.lang.Character#getType(int) 3535 * @since 1.5 3536 */ 3537 public static boolean isTitleCase(int codePoint) { 3538 return getType(codePoint) == Character.TITLECASE_LETTER; 3539 } 3540 3541 /** 3542 * Determines if the specified character is a digit. 3543 * <p> 3544 * A character is a digit if its general category type, provided 3545 * by <code>Character.getType(ch)</code>, is 3546 * <code>DECIMAL_DIGIT_NUMBER</code>. 3547 * <p> 3548 * Some Unicode character ranges that contain digits: 3549 * <ul> 3550 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 3551 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 3552 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 3553 * Arabic-Indic digits 3554 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 3555 * Extended Arabic-Indic digits 3556 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 3557 * Devanagari digits 3558 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 3559 * Fullwidth digits 3560 * </ul> 3561 * 3562 * Many other character ranges contain digits as well. 3563 * 3564 * <p><b>Note:</b> This method cannot handle <a 3565 * href="#supplementary"> supplementary characters</a>. To support 3566 * all Unicode characters, including supplementary characters, use 3567 * the {@link #isDigit(int)} method. 3568 * 3569 * @param ch the character to be tested. 3570 * @return <code>true</code> if the character is a digit; 3571 * <code>false</code> otherwise. 3572 * @see java.lang.Character#digit(char, int) 3573 * @see java.lang.Character#forDigit(int, int) 3574 * @see java.lang.Character#getType(char) 3575 */ 3576 public static boolean isDigit(char ch) { 3577 return isDigit((int)ch); 3578 } 3579 3580 /** 3581 * Determines if the specified character (Unicode code point) is a digit. 3582 * <p> 3583 * A character is a digit if its general category type, provided 3584 * by {@link Character#getType(int) getType(codePoint)}, is 3585 * <code>DECIMAL_DIGIT_NUMBER</code>. 3586 * <p> 3587 * Some Unicode character ranges that contain digits: 3588 * <ul> 3589 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 3590 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 3591 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 3592 * Arabic-Indic digits 3593 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 3594 * Extended Arabic-Indic digits 3595 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 3596 * Devanagari digits 3597 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 3598 * Fullwidth digits 3599 * </ul> 3600 * 3601 * Many other character ranges contain digits as well. 3602 * 3603 * @param codePoint the character (Unicode code point) to be tested. 3604 * @return <code>true</code> if the character is a digit; 3605 * <code>false</code> otherwise. 3606 * @see java.lang.Character#forDigit(int, int) 3607 * @see java.lang.Character#getType(int) 3608 * @since 1.5 3609 */ 3610 public static boolean isDigit(int codePoint) { 3611 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 3612 } 3613 3614 /** 3615 * Determines if a character is defined in Unicode. 3616 * <p> 3617 * A character is defined if at least one of the following is true: 3618 * <ul> 3619 * <li>It has an entry in the UnicodeData file. 3620 * <li>It has a value in a range defined by the UnicodeData file. 3621 * </ul> 3622 * 3623 * <p><b>Note:</b> This method cannot handle <a 3624 * href="#supplementary"> supplementary characters</a>. To support 3625 * all Unicode characters, including supplementary characters, use 3626 * the {@link #isDefined(int)} method. 3627 * 3628 * @param ch the character to be tested 3629 * @return <code>true</code> if the character has a defined meaning 3630 * in Unicode; <code>false</code> otherwise. 3631 * @see java.lang.Character#isDigit(char) 3632 * @see java.lang.Character#isLetter(char) 3633 * @see java.lang.Character#isLetterOrDigit(char) 3634 * @see java.lang.Character#isLowerCase(char) 3635 * @see java.lang.Character#isTitleCase(char) 3636 * @see java.lang.Character#isUpperCase(char) 3637 * @since 1.0.2 3638 */ 3639 public static boolean isDefined(char ch) { 3640 return isDefined((int)ch); 3641 } 3642 3643 /** 3644 * Determines if a character (Unicode code point) is defined in Unicode. 3645 * <p> 3646 * A character is defined if at least one of the following is true: 3647 * <ul> 3648 * <li>It has an entry in the UnicodeData file. 3649 * <li>It has a value in a range defined by the UnicodeData file. 3650 * </ul> 3651 * 3652 * @param codePoint the character (Unicode code point) to be tested. 3653 * @return <code>true</code> if the character has a defined meaning 3654 * in Unicode; <code>false</code> otherwise. 3655 * @see java.lang.Character#isDigit(int) 3656 * @see java.lang.Character#isLetter(int) 3657 * @see java.lang.Character#isLetterOrDigit(int) 3658 * @see java.lang.Character#isLowerCase(int) 3659 * @see java.lang.Character#isTitleCase(int) 3660 * @see java.lang.Character#isUpperCase(int) 3661 * @since 1.5 3662 */ 3663 public static boolean isDefined(int codePoint) { 3664 return getType(codePoint) != Character.UNASSIGNED; 3665 } 3666 3667 /** 3668 * Determines if the specified character is a letter. 3669 * <p> 3670 * A character is considered to be a letter if its general 3671 * category type, provided by <code>Character.getType(ch)</code>, 3672 * is any of the following: 3673 * <ul> 3674 * <li> <code>UPPERCASE_LETTER</code> 3675 * <li> <code>LOWERCASE_LETTER</code> 3676 * <li> <code>TITLECASE_LETTER</code> 3677 * <li> <code>MODIFIER_LETTER</code> 3678 * <li> <code>OTHER_LETTER</code> 3679 * </ul> 3680 * 3681 * Not all letters have case. Many characters are 3682 * letters but are neither uppercase nor lowercase nor titlecase. 3683 * 3684 * <p><b>Note:</b> This method cannot handle <a 3685 * href="#supplementary"> supplementary characters</a>. To support 3686 * all Unicode characters, including supplementary characters, use 3687 * the {@link #isLetter(int)} method. 3688 * 3689 * @param ch the character to be tested. 3690 * @return <code>true</code> if the character is a letter; 3691 * <code>false</code> otherwise. 3692 * @see java.lang.Character#isDigit(char) 3693 * @see java.lang.Character#isJavaIdentifierStart(char) 3694 * @see java.lang.Character#isJavaLetter(char) 3695 * @see java.lang.Character#isJavaLetterOrDigit(char) 3696 * @see java.lang.Character#isLetterOrDigit(char) 3697 * @see java.lang.Character#isLowerCase(char) 3698 * @see java.lang.Character#isTitleCase(char) 3699 * @see java.lang.Character#isUnicodeIdentifierStart(char) 3700 * @see java.lang.Character#isUpperCase(char) 3701 */ 3702 public static boolean isLetter(char ch) { 3703 return isLetter((int)ch); 3704 } 3705 3706 /** 3707 * Determines if the specified character (Unicode code point) is a letter. 3708 * <p> 3709 * A character is considered to be a letter if its general 3710 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 3711 * is any of the following: 3712 * <ul> 3713 * <li> <code>UPPERCASE_LETTER</code> 3714 * <li> <code>LOWERCASE_LETTER</code> 3715 * <li> <code>TITLECASE_LETTER</code> 3716 * <li> <code>MODIFIER_LETTER</code> 3717 * <li> <code>OTHER_LETTER</code> 3718 * </ul> 3719 * 3720 * Not all letters have case. Many characters are 3721 * letters but are neither uppercase nor lowercase nor titlecase. 3722 * 3723 * @param codePoint the character (Unicode code point) to be tested. 3724 * @return <code>true</code> if the character is a letter; 3725 * <code>false</code> otherwise. 3726 * @see java.lang.Character#isDigit(int) 3727 * @see java.lang.Character#isJavaIdentifierStart(int) 3728 * @see java.lang.Character#isLetterOrDigit(int) 3729 * @see java.lang.Character#isLowerCase(int) 3730 * @see java.lang.Character#isTitleCase(int) 3731 * @see java.lang.Character#isUnicodeIdentifierStart(int) 3732 * @see java.lang.Character#isUpperCase(int) 3733 * @since 1.5 3734 */ 3735 public static boolean isLetter(int codePoint) { 3736 return ((((1 << Character.UPPERCASE_LETTER) | 3737 (1 << Character.LOWERCASE_LETTER) | 3738 (1 << Character.TITLECASE_LETTER) | 3739 (1 << Character.MODIFIER_LETTER) | 3740 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 3741 != 0; 3742 } 3743 3744 /** 3745 * Determines if the specified character is a letter or digit. 3746 * <p> 3747 * A character is considered to be a letter or digit if either 3748 * <code>Character.isLetter(char ch)</code> or 3749 * <code>Character.isDigit(char ch)</code> returns 3750 * <code>true</code> for the character. 3751 * 3752 * <p><b>Note:</b> This method cannot handle <a 3753 * href="#supplementary"> supplementary characters</a>. To support 3754 * all Unicode characters, including supplementary characters, use 3755 * the {@link #isLetterOrDigit(int)} method. 3756 * 3757 * @param ch the character to be tested. 3758 * @return <code>true</code> if the character is a letter or digit; 3759 * <code>false</code> otherwise. 3760 * @see java.lang.Character#isDigit(char) 3761 * @see java.lang.Character#isJavaIdentifierPart(char) 3762 * @see java.lang.Character#isJavaLetter(char) 3763 * @see java.lang.Character#isJavaLetterOrDigit(char) 3764 * @see java.lang.Character#isLetter(char) 3765 * @see java.lang.Character#isUnicodeIdentifierPart(char) 3766 * @since 1.0.2 3767 */ 3768 public static boolean isLetterOrDigit(char ch) { 3769 return isLetterOrDigit((int)ch); 3770 } 3771 3772 /** 3773 * Determines if the specified character (Unicode code point) is a letter or digit. 3774 * <p> 3775 * A character is considered to be a letter or digit if either 3776 * {@link #isLetter(int) isLetter(codePoint)} or 3777 * {@link #isDigit(int) isDigit(codePoint)} returns 3778 * <code>true</code> for the character. 3779 * 3780 * @param codePoint the character (Unicode code point) to be tested. 3781 * @return <code>true</code> if the character is a letter or digit; 3782 * <code>false</code> otherwise. 3783 * @see java.lang.Character#isDigit(int) 3784 * @see java.lang.Character#isJavaIdentifierPart(int) 3785 * @see java.lang.Character#isLetter(int) 3786 * @see java.lang.Character#isUnicodeIdentifierPart(int) 3787 * @since 1.5 3788 */ 3789 public static boolean isLetterOrDigit(int codePoint) { 3790 return ((((1 << Character.UPPERCASE_LETTER) | 3791 (1 << Character.LOWERCASE_LETTER) | 3792 (1 << Character.TITLECASE_LETTER) | 3793 (1 << Character.MODIFIER_LETTER) | 3794 (1 << Character.OTHER_LETTER) | 3795 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 3796 != 0; 3797 } 3798 3799 /** 3800 * Determines if the specified character is permissible as the first 3801 * character in a Java identifier. 3802 * <p> 3803 * A character may start a Java identifier if and only if 3804 * one of the following is true: 3805 * <ul> 3806 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 3807 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 3808 * <li> ch is a currency symbol (such as "$") 3809 * <li> ch is a connecting punctuation character (such as "_"). 3810 * </ul> 3811 * 3812 * @param ch the character to be tested. 3813 * @return <code>true</code> if the character may start a Java 3814 * identifier; <code>false</code> otherwise. 3815 * @see java.lang.Character#isJavaLetterOrDigit(char) 3816 * @see java.lang.Character#isJavaIdentifierStart(char) 3817 * @see java.lang.Character#isJavaIdentifierPart(char) 3818 * @see java.lang.Character#isLetter(char) 3819 * @see java.lang.Character#isLetterOrDigit(char) 3820 * @see java.lang.Character#isUnicodeIdentifierStart(char) 3821 * @since 1.02 3822 * @deprecated Replaced by isJavaIdentifierStart(char). 3823 */ 3824 @Deprecated 3825 public static boolean isJavaLetter(char ch) { 3826 return isJavaIdentifierStart(ch); 3827 } 3828 3829 /** 3830 * Determines if the specified character may be part of a Java 3831 * identifier as other than the first character. 3832 * <p> 3833 * A character may be part of a Java identifier if and only if any 3834 * of the following are true: 3835 * <ul> 3836 * <li> it is a letter 3837 * <li> it is a currency symbol (such as <code>'$'</code>) 3838 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 3839 * <li> it is a digit 3840 * <li> it is a numeric letter (such as a Roman numeral character) 3841 * <li> it is a combining mark 3842 * <li> it is a non-spacing mark 3843 * <li> <code>isIdentifierIgnorable</code> returns 3844 * <code>true</code> for the character. 3845 * </ul> 3846 * 3847 * @param ch the character to be tested. 3848 * @return <code>true</code> if the character may be part of a 3849 * Java identifier; <code>false</code> otherwise. 3850 * @see java.lang.Character#isJavaLetter(char) 3851 * @see java.lang.Character#isJavaIdentifierStart(char) 3852 * @see java.lang.Character#isJavaIdentifierPart(char) 3853 * @see java.lang.Character#isLetter(char) 3854 * @see java.lang.Character#isLetterOrDigit(char) 3855 * @see java.lang.Character#isUnicodeIdentifierPart(char) 3856 * @see java.lang.Character#isIdentifierIgnorable(char) 3857 * @since 1.02 3858 * @deprecated Replaced by isJavaIdentifierPart(char). 3859 */ 3860 @Deprecated 3861 public static boolean isJavaLetterOrDigit(char ch) { 3862 return isJavaIdentifierPart(ch); 3863 } 3864 3865 /** 3866 * Determines if the specified character is 3867 * permissible as the first character in a Java identifier. 3868 * <p> 3869 * A character may start a Java identifier if and only if 3870 * one of the following conditions is true: 3871 * <ul> 3872 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 3873 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 3874 * <li> ch is a currency symbol (such as "$") 3875 * <li> ch is a connecting punctuation character (such as "_"). 3876 * </ul> 3877 * 3878 * <p><b>Note:</b> This method cannot handle <a 3879 * href="#supplementary"> supplementary characters</a>. To support 3880 * all Unicode characters, including supplementary characters, use 3881 * the {@link #isJavaIdentifierStart(int)} method. 3882 * 3883 * @param ch the character to be tested. 3884 * @return <code>true</code> if the character may start a Java identifier; 3885 * <code>false</code> otherwise. 3886 * @see java.lang.Character#isJavaIdentifierPart(char) 3887 * @see java.lang.Character#isLetter(char) 3888 * @see java.lang.Character#isUnicodeIdentifierStart(char) 3889 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 3890 * @since 1.1 3891 */ 3892 public static boolean isJavaIdentifierStart(char ch) { 3893 return isJavaIdentifierStart((int)ch); 3894 } 3895 3896 /** 3897 * Determines if the character (Unicode code point) is 3898 * permissible as the first character in a Java identifier. 3899 * <p> 3900 * A character may start a Java identifier if and only if 3901 * one of the following conditions is true: 3902 * <ul> 3903 * <li> {@link #isLetter(int) isLetter(codePoint)} 3904 * returns <code>true</code> 3905 * <li> {@link #getType(int) getType(codePoint)} 3906 * returns <code>LETTER_NUMBER</code> 3907 * <li> the referenced character is a currency symbol (such as "$") 3908 * <li> the referenced character is a connecting punctuation character 3909 * (such as "_"). 3910 * </ul> 3911 * 3912 * @param codePoint the character (Unicode code point) to be tested. 3913 * @return <code>true</code> if the character may start a Java identifier; 3914 * <code>false</code> otherwise. 3915 * @see java.lang.Character#isJavaIdentifierPart(int) 3916 * @see java.lang.Character#isLetter(int) 3917 * @see java.lang.Character#isUnicodeIdentifierStart(int) 3918 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 3919 * @since 1.5 3920 */ 3921 public static boolean isJavaIdentifierStart(int codePoint) { 3922 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 3923 } 3924 3925 /** 3926 * Determines if the specified character may be part of a Java 3927 * identifier as other than the first character. 3928 * <p> 3929 * A character may be part of a Java identifier if any of the following 3930 * are true: 3931 * <ul> 3932 * <li> it is a letter 3933 * <li> it is a currency symbol (such as <code>'$'</code>) 3934 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 3935 * <li> it is a digit 3936 * <li> it is a numeric letter (such as a Roman numeral character) 3937 * <li> it is a combining mark 3938 * <li> it is a non-spacing mark 3939 * <li> <code>isIdentifierIgnorable</code> returns 3940 * <code>true</code> for the character 3941 * </ul> 3942 * 3943 * <p><b>Note:</b> This method cannot handle <a 3944 * href="#supplementary"> supplementary characters</a>. To support 3945 * all Unicode characters, including supplementary characters, use 3946 * the {@link #isJavaIdentifierPart(int)} method. 3947 * 3948 * @param ch the character to be tested. 3949 * @return <code>true</code> if the character may be part of a 3950 * Java identifier; <code>false</code> otherwise. 3951 * @see java.lang.Character#isIdentifierIgnorable(char) 3952 * @see java.lang.Character#isJavaIdentifierStart(char) 3953 * @see java.lang.Character#isLetterOrDigit(char) 3954 * @see java.lang.Character#isUnicodeIdentifierPart(char) 3955 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 3956 * @since 1.1 3957 */ 3958 public static boolean isJavaIdentifierPart(char ch) { 3959 return isJavaIdentifierPart((int)ch); 3960 } 3961 3962 /** 3963 * Determines if the character (Unicode code point) may be part of a Java 3964 * identifier as other than the first character. 3965 * <p> 3966 * A character may be part of a Java identifier if any of the following 3967 * are true: 3968 * <ul> 3969 * <li> it is a letter 3970 * <li> it is a currency symbol (such as <code>'$'</code>) 3971 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 3972 * <li> it is a digit 3973 * <li> it is a numeric letter (such as a Roman numeral character) 3974 * <li> it is a combining mark 3975 * <li> it is a non-spacing mark 3976 * <li> {@link #isIdentifierIgnorable(int) 3977 * isIdentifierIgnorable(codePoint)} returns <code>true</code> for 3978 * the character 3979 * </ul> 3980 * 3981 * @param codePoint the character (Unicode code point) to be tested. 3982 * @return <code>true</code> if the character may be part of a 3983 * Java identifier; <code>false</code> otherwise. 3984 * @see java.lang.Character#isIdentifierIgnorable(int) 3985 * @see java.lang.Character#isJavaIdentifierStart(int) 3986 * @see java.lang.Character#isLetterOrDigit(int) 3987 * @see java.lang.Character#isUnicodeIdentifierPart(int) 3988 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 3989 * @since 1.5 3990 */ 3991 public static boolean isJavaIdentifierPart(int codePoint) { 3992 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 3993 } 3994 3995 /** 3996 * Determines if the specified character is permissible as the 3997 * first character in a Unicode identifier. 3998 * <p> 3999 * A character may start a Unicode identifier if and only if 4000 * one of the following conditions is true: 4001 * <ul> 4002 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 4003 * <li> {@link #getType(char) getType(ch)} returns 4004 * <code>LETTER_NUMBER</code>. 4005 * </ul> 4006 * 4007 * <p><b>Note:</b> This method cannot handle <a 4008 * href="#supplementary"> supplementary characters</a>. To support 4009 * all Unicode characters, including supplementary characters, use 4010 * the {@link #isUnicodeIdentifierStart(int)} method. 4011 * 4012 * @param ch the character to be tested. 4013 * @return <code>true</code> if the character may start a Unicode 4014 * identifier; <code>false</code> otherwise. 4015 * @see java.lang.Character#isJavaIdentifierStart(char) 4016 * @see java.lang.Character#isLetter(char) 4017 * @see java.lang.Character#isUnicodeIdentifierPart(char) 4018 * @since 1.1 4019 */ 4020 public static boolean isUnicodeIdentifierStart(char ch) { 4021 return isUnicodeIdentifierStart((int)ch); 4022 } 4023 4024 /** 4025 * Determines if the specified character (Unicode code point) is permissible as the 4026 * first character in a Unicode identifier. 4027 * <p> 4028 * A character may start a Unicode identifier if and only if 4029 * one of the following conditions is true: 4030 * <ul> 4031 * <li> {@link #isLetter(int) isLetter(codePoint)} 4032 * returns <code>true</code> 4033 * <li> {@link #getType(int) getType(codePoint)} 4034 * returns <code>LETTER_NUMBER</code>. 4035 * </ul> 4036 * @param codePoint the character (Unicode code point) to be tested. 4037 * @return <code>true</code> if the character may start a Unicode 4038 * identifier; <code>false</code> otherwise. 4039 * @see java.lang.Character#isJavaIdentifierStart(int) 4040 * @see java.lang.Character#isLetter(int) 4041 * @see java.lang.Character#isUnicodeIdentifierPart(int) 4042 * @since 1.5 4043 */ 4044 public static boolean isUnicodeIdentifierStart(int codePoint) { 4045 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 4046 } 4047 4048 /** 4049 * Determines if the specified character may be part of a Unicode 4050 * identifier as other than the first character. 4051 * <p> 4052 * A character may be part of a Unicode identifier if and only if 4053 * one of the following statements is true: 4054 * <ul> 4055 * <li> it is a letter 4056 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 4057 * <li> it is a digit 4058 * <li> it is a numeric letter (such as a Roman numeral character) 4059 * <li> it is a combining mark 4060 * <li> it is a non-spacing mark 4061 * <li> <code>isIdentifierIgnorable</code> returns 4062 * <code>true</code> for this character. 4063 * </ul> 4064 * 4065 * <p><b>Note:</b> This method cannot handle <a 4066 * href="#supplementary"> supplementary characters</a>. To support 4067 * all Unicode characters, including supplementary characters, use 4068 * the {@link #isUnicodeIdentifierPart(int)} method. 4069 * 4070 * @param ch the character to be tested. 4071 * @return <code>true</code> if the character may be part of a 4072 * Unicode identifier; <code>false</code> otherwise. 4073 * @see java.lang.Character#isIdentifierIgnorable(char) 4074 * @see java.lang.Character#isJavaIdentifierPart(char) 4075 * @see java.lang.Character#isLetterOrDigit(char) 4076 * @see java.lang.Character#isUnicodeIdentifierStart(char) 4077 * @since 1.1 4078 */ 4079 public static boolean isUnicodeIdentifierPart(char ch) { 4080 return isUnicodeIdentifierPart((int)ch); 4081 } 4082 4083 /** 4084 * Determines if the specified character (Unicode code point) may be part of a Unicode 4085 * identifier as other than the first character. 4086 * <p> 4087 * A character may be part of a Unicode identifier if and only if 4088 * one of the following statements is true: 4089 * <ul> 4090 * <li> it is a letter 4091 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 4092 * <li> it is a digit 4093 * <li> it is a numeric letter (such as a Roman numeral character) 4094 * <li> it is a combining mark 4095 * <li> it is a non-spacing mark 4096 * <li> <code>isIdentifierIgnorable</code> returns 4097 * <code>true</code> for this character. 4098 * </ul> 4099 * @param codePoint the character (Unicode code point) to be tested. 4100 * @return <code>true</code> if the character may be part of a 4101 * Unicode identifier; <code>false</code> otherwise. 4102 * @see java.lang.Character#isIdentifierIgnorable(int) 4103 * @see java.lang.Character#isJavaIdentifierPart(int) 4104 * @see java.lang.Character#isLetterOrDigit(int) 4105 * @see java.lang.Character#isUnicodeIdentifierStart(int) 4106 * @since 1.5 4107 */ 4108 public static boolean isUnicodeIdentifierPart(int codePoint) { 4109 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 4110 } 4111 4112 /** 4113 * Determines if the specified character should be regarded as 4114 * an ignorable character in a Java identifier or a Unicode identifier. 4115 * <p> 4116 * The following Unicode characters are ignorable in a Java identifier 4117 * or a Unicode identifier: 4118 * <ul> 4119 * <li>ISO control characters that are not whitespace 4120 * <ul> 4121 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 4122 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 4123 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 4124 * </ul> 4125 * 4126 * <li>all characters that have the <code>FORMAT</code> general 4127 * category value 4128 * </ul> 4129 * 4130 * <p><b>Note:</b> This method cannot handle <a 4131 * href="#supplementary"> supplementary characters</a>. To support 4132 * all Unicode characters, including supplementary characters, use 4133 * the {@link #isIdentifierIgnorable(int)} method. 4134 * 4135 * @param ch the character to be tested. 4136 * @return <code>true</code> if the character is an ignorable control 4137 * character that may be part of a Java or Unicode identifier; 4138 * <code>false</code> otherwise. 4139 * @see java.lang.Character#isJavaIdentifierPart(char) 4140 * @see java.lang.Character#isUnicodeIdentifierPart(char) 4141 * @since 1.1 4142 */ 4143 public static boolean isIdentifierIgnorable(char ch) { 4144 return isIdentifierIgnorable((int)ch); 4145 } 4146 4147 /** 4148 * Determines if the specified character (Unicode code point) should be regarded as 4149 * an ignorable character in a Java identifier or a Unicode identifier. 4150 * <p> 4151 * The following Unicode characters are ignorable in a Java identifier 4152 * or a Unicode identifier: 4153 * <ul> 4154 * <li>ISO control characters that are not whitespace 4155 * <ul> 4156 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 4157 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 4158 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 4159 * </ul> 4160 * 4161 * <li>all characters that have the <code>FORMAT</code> general 4162 * category value 4163 * </ul> 4164 * 4165 * @param codePoint the character (Unicode code point) to be tested. 4166 * @return <code>true</code> if the character is an ignorable control 4167 * character that may be part of a Java or Unicode identifier; 4168 * <code>false</code> otherwise. 4169 * @see java.lang.Character#isJavaIdentifierPart(int) 4170 * @see java.lang.Character#isUnicodeIdentifierPart(int) 4171 * @since 1.5 4172 */ 4173 public static boolean isIdentifierIgnorable(int codePoint) { 4174 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 4175 } 4176 4177 /** 4178 * Converts the character argument to lowercase using case 4179 * mapping information from the UnicodeData file. 4180 * <p> 4181 * Note that 4182 * <code>Character.isLowerCase(Character.toLowerCase(ch))</code> 4183 * does not always return <code>true</code> for some ranges of 4184 * characters, particularly those that are symbols or ideographs. 4185 * 4186 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map 4187 * characters to lowercase. <code>String</code> case mapping methods 4188 * have several benefits over <code>Character</code> case mapping methods. 4189 * <code>String</code> case mapping methods can perform locale-sensitive 4190 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 4191 * the <code>Character</code> case mapping methods cannot. 4192 * 4193 * <p><b>Note:</b> This method cannot handle <a 4194 * href="#supplementary"> supplementary characters</a>. To support 4195 * all Unicode characters, including supplementary characters, use 4196 * the {@link #toLowerCase(int)} method. 4197 * 4198 * @param ch the character to be converted. 4199 * @return the lowercase equivalent of the character, if any; 4200 * otherwise, the character itself. 4201 * @see java.lang.Character#isLowerCase(char) 4202 * @see java.lang.String#toLowerCase() 4203 */ 4204 public static char toLowerCase(char ch) { 4205 return (char)toLowerCase((int)ch); 4206 } 4207 4208 /** 4209 * Converts the character (Unicode code point) argument to 4210 * lowercase using case mapping information from the UnicodeData 4211 * file. 4212 * 4213 * <p> Note that 4214 * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code> 4215 * does not always return <code>true</code> for some ranges of 4216 * characters, particularly those that are symbols or ideographs. 4217 * 4218 * <p>In general, {@link java.lang.String#toLowerCase()} should be used to map 4219 * characters to lowercase. <code>String</code> case mapping methods 4220 * have several benefits over <code>Character</code> case mapping methods. 4221 * <code>String</code> case mapping methods can perform locale-sensitive 4222 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 4223 * the <code>Character</code> case mapping methods cannot. 4224 * 4225 * @param codePoint the character (Unicode code point) to be converted. 4226 * @return the lowercase equivalent of the character (Unicode code 4227 * point), if any; otherwise, the character itself. 4228 * @see java.lang.Character#isLowerCase(int) 4229 * @see java.lang.String#toLowerCase() 4230 * 4231 * @since 1.5 4232 */ 4233 public static int toLowerCase(int codePoint) { 4234 return CharacterData.of(codePoint).toLowerCase(codePoint); 4235 } 4236 4237 /** 4238 * Converts the character argument to uppercase using case mapping 4239 * information from the UnicodeData file. 4240 * <p> 4241 * Note that 4242 * <code>Character.isUpperCase(Character.toUpperCase(ch))</code> 4243 * does not always return <code>true</code> for some ranges of 4244 * characters, particularly those that are symbols or ideographs. 4245 * 4246 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map 4247 * characters to uppercase. <code>String</code> case mapping methods 4248 * have several benefits over <code>Character</code> case mapping methods. 4249 * <code>String</code> case mapping methods can perform locale-sensitive 4250 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 4251 * the <code>Character</code> case mapping methods cannot. 4252 * 4253 * <p><b>Note:</b> This method cannot handle <a 4254 * href="#supplementary"> supplementary characters</a>. To support 4255 * all Unicode characters, including supplementary characters, use 4256 * the {@link #toUpperCase(int)} method. 4257 * 4258 * @param ch the character to be converted. 4259 * @return the uppercase equivalent of the character, if any; 4260 * otherwise, the character itself. 4261 * @see java.lang.Character#isUpperCase(char) 4262 * @see java.lang.String#toUpperCase() 4263 */ 4264 public static char toUpperCase(char ch) { 4265 return (char)toUpperCase((int)ch); 4266 } 4267 4268 /** 4269 * Converts the character (Unicode code point) argument to 4270 * uppercase using case mapping information from the UnicodeData 4271 * file. 4272 * 4273 * <p>Note that 4274 * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code> 4275 * does not always return <code>true</code> for some ranges of 4276 * characters, particularly those that are symbols or ideographs. 4277 * 4278 * <p>In general, {@link java.lang.String#toUpperCase()} should be used to map 4279 * characters to uppercase. <code>String</code> case mapping methods 4280 * have several benefits over <code>Character</code> case mapping methods. 4281 * <code>String</code> case mapping methods can perform locale-sensitive 4282 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 4283 * the <code>Character</code> case mapping methods cannot. 4284 * 4285 * @param codePoint the character (Unicode code point) to be converted. 4286 * @return the uppercase equivalent of the character, if any; 4287 * otherwise, the character itself. 4288 * @see java.lang.Character#isUpperCase(int) 4289 * @see java.lang.String#toUpperCase() 4290 * 4291 * @since 1.5 4292 */ 4293 public static int toUpperCase(int codePoint) { 4294 return CharacterData.of(codePoint).toUpperCase(codePoint); 4295 } 4296 4297 /** 4298 * Converts the character argument to titlecase using case mapping 4299 * information from the UnicodeData file. If a character has no 4300 * explicit titlecase mapping and is not itself a titlecase char 4301 * according to UnicodeData, then the uppercase mapping is 4302 * returned as an equivalent titlecase mapping. If the 4303 * <code>char</code> argument is already a titlecase 4304 * <code>char</code>, the same <code>char</code> value will be 4305 * returned. 4306 * <p> 4307 * Note that 4308 * <code>Character.isTitleCase(Character.toTitleCase(ch))</code> 4309 * does not always return <code>true</code> for some ranges of 4310 * characters. 4311 * 4312 * <p><b>Note:</b> This method cannot handle <a 4313 * href="#supplementary"> supplementary characters</a>. To support 4314 * all Unicode characters, including supplementary characters, use 4315 * the {@link #toTitleCase(int)} method. 4316 * 4317 * @param ch the character to be converted. 4318 * @return the titlecase equivalent of the character, if any; 4319 * otherwise, the character itself. 4320 * @see java.lang.Character#isTitleCase(char) 4321 * @see java.lang.Character#toLowerCase(char) 4322 * @see java.lang.Character#toUpperCase(char) 4323 * @since 1.0.2 4324 */ 4325 public static char toTitleCase(char ch) { 4326 return (char)toTitleCase((int)ch); 4327 } 4328 4329 /** 4330 * Converts the character (Unicode code point) argument to titlecase using case mapping 4331 * information from the UnicodeData file. If a character has no 4332 * explicit titlecase mapping and is not itself a titlecase char 4333 * according to UnicodeData, then the uppercase mapping is 4334 * returned as an equivalent titlecase mapping. If the 4335 * character argument is already a titlecase 4336 * character, the same character value will be 4337 * returned. 4338 * 4339 * <p>Note that 4340 * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code> 4341 * does not always return <code>true</code> for some ranges of 4342 * characters. 4343 * 4344 * @param codePoint the character (Unicode code point) to be converted. 4345 * @return the titlecase equivalent of the character, if any; 4346 * otherwise, the character itself. 4347 * @see java.lang.Character#isTitleCase(int) 4348 * @see java.lang.Character#toLowerCase(int) 4349 * @see java.lang.Character#toUpperCase(int) 4350 * @since 1.5 4351 */ 4352 public static int toTitleCase(int codePoint) { 4353 return CharacterData.of(codePoint).toTitleCase(codePoint); 4354 } 4355 4356 /** 4357 * Returns the numeric value of the character <code>ch</code> in the 4358 * specified radix. 4359 * <p> 4360 * If the radix is not in the range <code>MIN_RADIX</code> <= 4361 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 4362 * value of <code>ch</code> is not a valid digit in the specified 4363 * radix, <code>-1</code> is returned. A character is a valid digit 4364 * if at least one of the following is true: 4365 * <ul> 4366 * <li>The method <code>isDigit</code> is <code>true</code> of the character 4367 * and the Unicode decimal digit value of the character (or its 4368 * single-character decomposition) is less than the specified radix. 4369 * In this case the decimal digit value is returned. 4370 * <li>The character is one of the uppercase Latin letters 4371 * <code>'A'</code> through <code>'Z'</code> and its code is less than 4372 * <code>radix + 'A' - 10</code>. 4373 * In this case, <code>ch - 'A' + 10</code> 4374 * is returned. 4375 * <li>The character is one of the lowercase Latin letters 4376 * <code>'a'</code> through <code>'z'</code> and its code is less than 4377 * <code>radix + 'a' - 10</code>. 4378 * In this case, <code>ch - 'a' + 10</code> 4379 * is returned. 4380 * </ul> 4381 * 4382 * <p><b>Note:</b> This method cannot handle <a 4383 * href="#supplementary"> supplementary characters</a>. To support 4384 * all Unicode characters, including supplementary characters, use 4385 * the {@link #digit(int, int)} method. 4386 * 4387 * @param ch the character to be converted. 4388 * @param radix the radix. 4389 * @return the numeric value represented by the character in the 4390 * specified radix. 4391 * @see java.lang.Character#forDigit(int, int) 4392 * @see java.lang.Character#isDigit(char) 4393 */ 4394 public static int digit(char ch, int radix) { 4395 return digit((int)ch, radix); 4396 } 4397 4398 /** 4399 * Returns the numeric value of the specified character (Unicode 4400 * code point) in the specified radix. 4401 * 4402 * <p>If the radix is not in the range <code>MIN_RADIX</code> <= 4403 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 4404 * character is not a valid digit in the specified 4405 * radix, <code>-1</code> is returned. A character is a valid digit 4406 * if at least one of the following is true: 4407 * <ul> 4408 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character 4409 * and the Unicode decimal digit value of the character (or its 4410 * single-character decomposition) is less than the specified radix. 4411 * In this case the decimal digit value is returned. 4412 * <li>The character is one of the uppercase Latin letters 4413 * <code>'A'</code> through <code>'Z'</code> and its code is less than 4414 * <code>radix + 'A' - 10</code>. 4415 * In this case, <code>ch - 'A' + 10</code> 4416 * is returned. 4417 * <li>The character is one of the lowercase Latin letters 4418 * <code>'a'</code> through <code>'z'</code> and its code is less than 4419 * <code>radix + 'a' - 10</code>. 4420 * In this case, <code>ch - 'a' + 10</code> 4421 * is returned. 4422 * </ul> 4423 * 4424 * @param codePoint the character (Unicode code point) to be converted. 4425 * @param radix the radix. 4426 * @return the numeric value represented by the character in the 4427 * specified radix. 4428 * @see java.lang.Character#forDigit(int, int) 4429 * @see java.lang.Character#isDigit(int) 4430 * @since 1.5 4431 */ 4432 public static int digit(int codePoint, int radix) { 4433 return CharacterData.of(codePoint).digit(codePoint, radix); 4434 } 4435 4436 /** 4437 * Returns the <code>int</code> value that the specified Unicode 4438 * character represents. For example, the character 4439 * <code>'\u216C'</code> (the roman numeral fifty) will return 4440 * an int with a value of 50. 4441 * <p> 4442 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 4443 * <code>'\u005A'</code>), lowercase 4444 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 4445 * full width variant (<code>'\uFF21'</code> through 4446 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 4447 * <code>'\uFF5A'</code>) forms have numeric values from 10 4448 * through 35. This is independent of the Unicode specification, 4449 * which does not assign numeric values to these <code>char</code> 4450 * values. 4451 * <p> 4452 * If the character does not have a numeric value, then -1 is returned. 4453 * If the character has a numeric value that cannot be represented as a 4454 * nonnegative integer (for example, a fractional value), then -2 4455 * is returned. 4456 * 4457 * <p><b>Note:</b> This method cannot handle <a 4458 * href="#supplementary"> supplementary characters</a>. To support 4459 * all Unicode characters, including supplementary characters, use 4460 * the {@link #getNumericValue(int)} method. 4461 * 4462 * @param ch the character to be converted. 4463 * @return the numeric value of the character, as a nonnegative <code>int</code> 4464 * value; -2 if the character has a numeric value that is not a 4465 * nonnegative integer; -1 if the character has no numeric value. 4466 * @see java.lang.Character#forDigit(int, int) 4467 * @see java.lang.Character#isDigit(char) 4468 * @since 1.1 4469 */ 4470 public static int getNumericValue(char ch) { 4471 return getNumericValue((int)ch); 4472 } 4473 4474 /** 4475 * Returns the <code>int</code> value that the specified 4476 * character (Unicode code point) represents. For example, the character 4477 * <code>'\u216C'</code> (the Roman numeral fifty) will return 4478 * an <code>int</code> with a value of 50. 4479 * <p> 4480 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 4481 * <code>'\u005A'</code>), lowercase 4482 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 4483 * full width variant (<code>'\uFF21'</code> through 4484 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 4485 * <code>'\uFF5A'</code>) forms have numeric values from 10 4486 * through 35. This is independent of the Unicode specification, 4487 * which does not assign numeric values to these <code>char</code> 4488 * values. 4489 * <p> 4490 * If the character does not have a numeric value, then -1 is returned. 4491 * If the character has a numeric value that cannot be represented as a 4492 * nonnegative integer (for example, a fractional value), then -2 4493 * is returned. 4494 * 4495 * @param codePoint the character (Unicode code point) to be converted. 4496 * @return the numeric value of the character, as a nonnegative <code>int</code> 4497 * value; -2 if the character has a numeric value that is not a 4498 * nonnegative integer; -1 if the character has no numeric value. 4499 * @see java.lang.Character#forDigit(int, int) 4500 * @see java.lang.Character#isDigit(int) 4501 * @since 1.5 4502 */ 4503 public static int getNumericValue(int codePoint) { 4504 return CharacterData.of(codePoint).getNumericValue(codePoint); 4505 } 4506 4507 /** 4508 * Determines if the specified character is ISO-LATIN-1 white space. 4509 * This method returns <code>true</code> for the following five 4510 * characters only: 4511 * <table> 4512 * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td> 4513 * <td><code>HORIZONTAL TABULATION</code></td></tr> 4514 * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td> 4515 * <td><code>NEW LINE</code></td></tr> 4516 * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td> 4517 * <td><code>FORM FEED</code></td></tr> 4518 * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td> 4519 * <td><code>CARRIAGE RETURN</code></td></tr> 4520 * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td> 4521 * <td><code>SPACE</code></td></tr> 4522 * </table> 4523 * 4524 * @param ch the character to be tested. 4525 * @return <code>true</code> if the character is ISO-LATIN-1 white 4526 * space; <code>false</code> otherwise. 4527 * @see java.lang.Character#isSpaceChar(char) 4528 * @see java.lang.Character#isWhitespace(char) 4529 * @deprecated Replaced by isWhitespace(char). 4530 */ 4531 @Deprecated 4532 public static boolean isSpace(char ch) { 4533 return (ch <= 0x0020) && 4534 (((((1L << 0x0009) | 4535 (1L << 0x000A) | 4536 (1L << 0x000C) | 4537 (1L << 0x000D) | 4538 (1L << 0x0020)) >> ch) & 1L) != 0); 4539 } 4540 4541 4542 /** 4543 * Determines if the specified character is a Unicode space character. 4544 * A character is considered to be a space character if and only if 4545 * it is specified to be a space character by the Unicode standard. This 4546 * method returns true if the character's general category type is any of 4547 * the following: 4548 * <ul> 4549 * <li> <code>SPACE_SEPARATOR</code> 4550 * <li> <code>LINE_SEPARATOR</code> 4551 * <li> <code>PARAGRAPH_SEPARATOR</code> 4552 * </ul> 4553 * 4554 * <p><b>Note:</b> This method cannot handle <a 4555 * href="#supplementary"> supplementary characters</a>. To support 4556 * all Unicode characters, including supplementary characters, use 4557 * the {@link #isSpaceChar(int)} method. 4558 * 4559 * @param ch the character to be tested. 4560 * @return <code>true</code> if the character is a space character; 4561 * <code>false</code> otherwise. 4562 * @see java.lang.Character#isWhitespace(char) 4563 * @since 1.1 4564 */ 4565 public static boolean isSpaceChar(char ch) { 4566 return isSpaceChar((int)ch); 4567 } 4568 4569 /** 4570 * Determines if the specified character (Unicode code point) is a 4571 * Unicode space character. A character is considered to be a 4572 * space character if and only if it is specified to be a space 4573 * character by the Unicode standard. This method returns true if 4574 * the character's general category type is any of the following: 4575 * 4576 * <ul> 4577 * <li> {@link #SPACE_SEPARATOR} 4578 * <li> {@link #LINE_SEPARATOR} 4579 * <li> {@link #PARAGRAPH_SEPARATOR} 4580 * </ul> 4581 * 4582 * @param codePoint the character (Unicode code point) to be tested. 4583 * @return <code>true</code> if the character is a space character; 4584 * <code>false</code> otherwise. 4585 * @see java.lang.Character#isWhitespace(int) 4586 * @since 1.5 4587 */ 4588 public static boolean isSpaceChar(int codePoint) { 4589 return ((((1 << Character.SPACE_SEPARATOR) | 4590 (1 << Character.LINE_SEPARATOR) | 4591 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 4592 != 0; 4593 } 4594 4595 /** 4596 * Determines if the specified character is white space according to Java. 4597 * A character is a Java whitespace character if and only if it satisfies 4598 * one of the following criteria: 4599 * <ul> 4600 * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>, 4601 * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>) 4602 * but is not also a non-breaking space (<code>'\u00A0'</code>, 4603 * <code>'\u2007'</code>, <code>'\u202F'</code>). 4604 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION. 4605 * <li> It is <code>'\u000A'</code>, LINE FEED. 4606 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION. 4607 * <li> It is <code>'\u000C'</code>, FORM FEED. 4608 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN. 4609 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR. 4610 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR. 4611 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR. 4612 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR. 4613 * </ul> 4614 * 4615 * <p><b>Note:</b> This method cannot handle <a 4616 * href="#supplementary"> supplementary characters</a>. To support 4617 * all Unicode characters, including supplementary characters, use 4618 * the {@link #isWhitespace(int)} method. 4619 * 4620 * @param ch the character to be tested. 4621 * @return <code>true</code> if the character is a Java whitespace 4622 * character; <code>false</code> otherwise. 4623 * @see java.lang.Character#isSpaceChar(char) 4624 * @since 1.1 4625 */ 4626 public static boolean isWhitespace(char ch) { 4627 return isWhitespace((int)ch); 4628 } 4629 4630 /** 4631 * Determines if the specified character (Unicode code point) is 4632 * white space according to Java. A character is a Java 4633 * whitespace character if and only if it satisfies one of the 4634 * following criteria: 4635 * <ul> 4636 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 4637 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 4638 * but is not also a non-breaking space (<code>'\u00A0'</code>, 4639 * <code>'\u2007'</code>, <code>'\u202F'</code>). 4640 * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION. 4641 * <li> It is <code>'\u000A'</code>, LINE FEED. 4642 * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION. 4643 * <li> It is <code>'\u000C'</code>, FORM FEED. 4644 * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN. 4645 * <li> It is <code>'\u001C'</code>, FILE SEPARATOR. 4646 * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR. 4647 * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR. 4648 * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR. 4649 * </ul> 4650 * <p> 4651 * 4652 * @param codePoint the character (Unicode code point) to be tested. 4653 * @return <code>true</code> if the character is a Java whitespace 4654 * character; <code>false</code> otherwise. 4655 * @see java.lang.Character#isSpaceChar(int) 4656 * @since 1.5 4657 */ 4658 public static boolean isWhitespace(int codePoint) { 4659 return CharacterData.of(codePoint).isWhitespace(codePoint); 4660 } 4661 4662 /** 4663 * Determines if the specified character is an ISO control 4664 * character. A character is considered to be an ISO control 4665 * character if its code is in the range <code>'\u0000'</code> 4666 * through <code>'\u001F'</code> or in the range 4667 * <code>'\u007F'</code> through <code>'\u009F'</code>. 4668 * 4669 * <p><b>Note:</b> This method cannot handle <a 4670 * href="#supplementary"> supplementary characters</a>. To support 4671 * all Unicode characters, including supplementary characters, use 4672 * the {@link #isISOControl(int)} method. 4673 * 4674 * @param ch the character to be tested. 4675 * @return <code>true</code> if the character is an ISO control character; 4676 * <code>false</code> otherwise. 4677 * 4678 * @see java.lang.Character#isSpaceChar(char) 4679 * @see java.lang.Character#isWhitespace(char) 4680 * @since 1.1 4681 */ 4682 public static boolean isISOControl(char ch) { 4683 return isISOControl((int)ch); 4684 } 4685 4686 /** 4687 * Determines if the referenced character (Unicode code point) is an ISO control 4688 * character. A character is considered to be an ISO control 4689 * character if its code is in the range <code>'\u0000'</code> 4690 * through <code>'\u001F'</code> or in the range 4691 * <code>'\u007F'</code> through <code>'\u009F'</code>. 4692 * 4693 * @param codePoint the character (Unicode code point) to be tested. 4694 * @return <code>true</code> if the character is an ISO control character; 4695 * <code>false</code> otherwise. 4696 * @see java.lang.Character#isSpaceChar(int) 4697 * @see java.lang.Character#isWhitespace(int) 4698 * @since 1.5 4699 */ 4700 public static boolean isISOControl(int codePoint) { 4701 return (codePoint >= 0x0000 && codePoint <= 0x001F) || 4702 (codePoint >= 0x007F && codePoint <= 0x009F); 4703 } 4704 4705 /** 4706 * Returns a value indicating a character's general category. 4707 * 4708 * <p><b>Note:</b> This method cannot handle <a 4709 * href="#supplementary"> supplementary characters</a>. To support 4710 * all Unicode characters, including supplementary characters, use 4711 * the {@link #getType(int)} method. 4712 * 4713 * @param ch the character to be tested. 4714 * @return a value of type <code>int</code> representing the 4715 * character's general category. 4716 * @see java.lang.Character#COMBINING_SPACING_MARK 4717 * @see java.lang.Character#CONNECTOR_PUNCTUATION 4718 * @see java.lang.Character#CONTROL 4719 * @see java.lang.Character#CURRENCY_SYMBOL 4720 * @see java.lang.Character#DASH_PUNCTUATION 4721 * @see java.lang.Character#DECIMAL_DIGIT_NUMBER 4722 * @see java.lang.Character#ENCLOSING_MARK 4723 * @see java.lang.Character#END_PUNCTUATION 4724 * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION 4725 * @see java.lang.Character#FORMAT 4726 * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION 4727 * @see java.lang.Character#LETTER_NUMBER 4728 * @see java.lang.Character#LINE_SEPARATOR 4729 * @see java.lang.Character#LOWERCASE_LETTER 4730 * @see java.lang.Character#MATH_SYMBOL 4731 * @see java.lang.Character#MODIFIER_LETTER 4732 * @see java.lang.Character#MODIFIER_SYMBOL 4733 * @see java.lang.Character#NON_SPACING_MARK 4734 * @see java.lang.Character#OTHER_LETTER 4735 * @see java.lang.Character#OTHER_NUMBER 4736 * @see java.lang.Character#OTHER_PUNCTUATION 4737 * @see java.lang.Character#OTHER_SYMBOL 4738 * @see java.lang.Character#PARAGRAPH_SEPARATOR 4739 * @see java.lang.Character#PRIVATE_USE 4740 * @see java.lang.Character#SPACE_SEPARATOR 4741 * @see java.lang.Character#START_PUNCTUATION 4742 * @see java.lang.Character#SURROGATE 4743 * @see java.lang.Character#TITLECASE_LETTER 4744 * @see java.lang.Character#UNASSIGNED 4745 * @see java.lang.Character#UPPERCASE_LETTER 4746 * @since 1.1 4747 */ 4748 public static int getType(char ch) { 4749 return getType((int)ch); 4750 } 4751 4752 /** 4753 * Returns a value indicating a character's general category. 4754 * 4755 * @param codePoint the character (Unicode code point) to be tested. 4756 * @return a value of type <code>int</code> representing the 4757 * character's general category. 4758 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 4759 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 4760 * @see Character#CONTROL CONTROL 4761 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 4762 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 4763 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 4764 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 4765 * @see Character#END_PUNCTUATION END_PUNCTUATION 4766 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 4767 * @see Character#FORMAT FORMAT 4768 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 4769 * @see Character#LETTER_NUMBER LETTER_NUMBER 4770 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 4771 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 4772 * @see Character#MATH_SYMBOL MATH_SYMBOL 4773 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 4774 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 4775 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 4776 * @see Character#OTHER_LETTER OTHER_LETTER 4777 * @see Character#OTHER_NUMBER OTHER_NUMBER 4778 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 4779 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 4780 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 4781 * @see Character#PRIVATE_USE PRIVATE_USE 4782 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 4783 * @see Character#START_PUNCTUATION START_PUNCTUATION 4784 * @see Character#SURROGATE SURROGATE 4785 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 4786 * @see Character#UNASSIGNED UNASSIGNED 4787 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 4788 * @since 1.5 4789 */ 4790 public static int getType(int codePoint) { 4791 return CharacterData.of(codePoint).getType(codePoint); 4792 } 4793 4794 /** 4795 * Determines the character representation for a specific digit in 4796 * the specified radix. If the value of <code>radix</code> is not a 4797 * valid radix, or the value of <code>digit</code> is not a valid 4798 * digit in the specified radix, the null character 4799 * (<code>'\u0000'</code>) is returned. 4800 * <p> 4801 * The <code>radix</code> argument is valid if it is greater than or 4802 * equal to <code>MIN_RADIX</code> and less than or equal to 4803 * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if 4804 * <code>0 <=digit < radix</code>. 4805 * <p> 4806 * If the digit is less than 10, then 4807 * <code>'0' + digit</code> is returned. Otherwise, the value 4808 * <code>'a' + digit - 10</code> is returned. 4809 * 4810 * @param digit the number to convert to a character. 4811 * @param radix the radix. 4812 * @return the <code>char</code> representation of the specified digit 4813 * in the specified radix. 4814 * @see java.lang.Character#MIN_RADIX 4815 * @see java.lang.Character#MAX_RADIX 4816 * @see java.lang.Character#digit(char, int) 4817 */ 4818 public static char forDigit(int digit, int radix) { 4819 if ((digit >= radix) || (digit < 0)) { 4820 return '\0'; 4821 } 4822 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 4823 return '\0'; 4824 } 4825 if (digit < 10) { 4826 return (char)('0' + digit); 4827 } 4828 return (char)('a' - 10 + digit); 4829 } 4830 4831 /** 4832 * Returns the Unicode directionality property for the given 4833 * character. Character directionality is used to calculate the 4834 * visual ordering of text. The directionality value of undefined 4835 * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>. 4836 * 4837 * <p><b>Note:</b> This method cannot handle <a 4838 * href="#supplementary"> supplementary characters</a>. To support 4839 * all Unicode characters, including supplementary characters, use 4840 * the {@link #getDirectionality(int)} method. 4841 * 4842 * @param ch <code>char</code> for which the directionality property 4843 * is requested. 4844 * @return the directionality property of the <code>char</code> value. 4845 * 4846 * @see Character#DIRECTIONALITY_UNDEFINED 4847 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 4848 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 4849 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 4850 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 4851 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 4852 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 4853 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 4854 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 4855 * @see Character#DIRECTIONALITY_NONSPACING_MARK 4856 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 4857 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 4858 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 4859 * @see Character#DIRECTIONALITY_WHITESPACE 4860 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 4861 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 4862 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 4863 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 4864 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 4865 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 4866 * @since 1.4 4867 */ 4868 public static byte getDirectionality(char ch) { 4869 return getDirectionality((int)ch); 4870 } 4871 4872 /** 4873 * Returns the Unicode directionality property for the given 4874 * character (Unicode code point). Character directionality is 4875 * used to calculate the visual ordering of text. The 4876 * directionality value of undefined character is {@link 4877 * #DIRECTIONALITY_UNDEFINED}. 4878 * 4879 * @param codePoint the character (Unicode code point) for which 4880 * the directionality property is requested. 4881 * @return the directionality property of the character. 4882 * 4883 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 4884 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 4885 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 4886 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 4887 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 4888 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 4889 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 4890 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 4891 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 4892 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 4893 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 4894 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 4895 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 4896 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 4897 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 4898 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 4899 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 4900 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 4901 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 4902 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 4903 * @since 1.5 4904 */ 4905 public static byte getDirectionality(int codePoint) { 4906 return CharacterData.of(codePoint).getDirectionality(codePoint); 4907 } 4908 4909 /** 4910 * Determines whether the character is mirrored according to the 4911 * Unicode specification. Mirrored characters should have their 4912 * glyphs horizontally mirrored when displayed in text that is 4913 * right-to-left. For example, <code>'\u0028'</code> LEFT 4914 * PARENTHESIS is semantically defined to be an <i>opening 4915 * parenthesis</i>. This will appear as a "(" in text that is 4916 * left-to-right but as a ")" in text that is right-to-left. 4917 * 4918 * <p><b>Note:</b> This method cannot handle <a 4919 * href="#supplementary"> supplementary characters</a>. To support 4920 * all Unicode characters, including supplementary characters, use 4921 * the {@link #isMirrored(int)} method. 4922 * 4923 * @param ch <code>char</code> for which the mirrored property is requested 4924 * @return <code>true</code> if the char is mirrored, <code>false</code> 4925 * if the <code>char</code> is not mirrored or is not defined. 4926 * @since 1.4 4927 */ 4928 public static boolean isMirrored(char ch) { 4929 return isMirrored((int)ch); 4930 } 4931 4932 /** 4933 * Determines whether the specified character (Unicode code point) 4934 * is mirrored according to the Unicode specification. Mirrored 4935 * characters should have their glyphs horizontally mirrored when 4936 * displayed in text that is right-to-left. For example, 4937 * <code>'\u0028'</code> LEFT PARENTHESIS is semantically 4938 * defined to be an <i>opening parenthesis</i>. This will appear 4939 * as a "(" in text that is left-to-right but as a ")" in text 4940 * that is right-to-left. 4941 * 4942 * @param codePoint the character (Unicode code point) to be tested. 4943 * @return <code>true</code> if the character is mirrored, <code>false</code> 4944 * if the character is not mirrored or is not defined. 4945 * @since 1.5 4946 */ 4947 public static boolean isMirrored(int codePoint) { 4948 return CharacterData.of(codePoint).isMirrored(codePoint); 4949 } 4950 4951 /** 4952 * Compares two <code>Character</code> objects numerically. 4953 * 4954 * @param anotherCharacter the <code>Character</code> to be compared. 4955 4956 * @return the value <code>0</code> if the argument <code>Character</code> 4957 * is equal to this <code>Character</code>; a value less than 4958 * <code>0</code> if this <code>Character</code> is numerically less 4959 * than the <code>Character</code> argument; and a value greater than 4960 * <code>0</code> if this <code>Character</code> is numerically greater 4961 * than the <code>Character</code> argument (unsigned comparison). 4962 * Note that this is strictly a numerical comparison; it is not 4963 * locale-dependent. 4964 * @since 1.2 4965 */ 4966 public int compareTo(Character anotherCharacter) { 4967 return compare(this.value, anotherCharacter.value); 4968 } 4969 4970 /** 4971 * Compares two {@code char} values numerically. 4972 * The value returned is identical to what would be returned by: 4973 * <pre> 4974 * Character.valueOf(x).compareTo(Character.valueOf(y)) 4975 * </pre> 4976 * 4977 * @param x the first {@code char} to compare 4978 * @param y the second {@code char} to compare 4979 * @return the value {@code 0} if {@code x == y}; 4980 * a value less than {@code 0} if {@code x < y}; and 4981 * a value greater than {@code 0} if {@code x > y} 4982 * @since 1.7 4983 */ 4984 public static int compare(char x, char y) { 4985 return x - y; 4986 } 4987 4988 /** 4989 * Converts the character (Unicode code point) argument to uppercase using 4990 * information from the UnicodeData file. 4991 * <p> 4992 * 4993 * @param codePoint the character (Unicode code point) to be converted. 4994 * @return either the uppercase equivalent of the character, if 4995 * any, or an error flag (<code>Character.ERROR</code>) 4996 * that indicates that a 1:M <code>char</code> mapping exists. 4997 * @see java.lang.Character#isLowerCase(char) 4998 * @see java.lang.Character#isUpperCase(char) 4999 * @see java.lang.Character#toLowerCase(char) 5000 * @see java.lang.Character#toTitleCase(char) 5001 * @since 1.4 5002 */ 5003 static int toUpperCaseEx(int codePoint) { 5004 assert isValidCodePoint(codePoint); 5005 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 5006 } 5007 5008 /** 5009 * Converts the character (Unicode code point) argument to uppercase using case 5010 * mapping information from the SpecialCasing file in the Unicode 5011 * specification. If a character has no explicit uppercase 5012 * mapping, then the <code>char</code> itself is returned in the 5013 * <code>char[]</code>. 5014 * 5015 * @param codePoint the character (Unicode code point) to be converted. 5016 * @return a <code>char[]</code> with the uppercased character. 5017 * @since 1.4 5018 */ 5019 static char[] toUpperCaseCharArray(int codePoint) { 5020 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP. 5021 assert isValidCodePoint(codePoint) && 5022 !isSupplementaryCodePoint(codePoint); 5023 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 5024 } 5025 5026 /** 5027 * The number of bits used to represent a <tt>char</tt> value in unsigned 5028 * binary form, constant {@code 16}. 5029 * 5030 * @since 1.5 5031 */ 5032 public static final int SIZE = 16; 5033 5034 /** 5035 * Returns the value obtained by reversing the order of the bytes in the 5036 * specified <tt>char</tt> value. 5037 * 5038 * @return the value obtained by reversing (or, equivalently, swapping) 5039 * the bytes in the specified <tt>char</tt> value. 5040 * @since 1.5 5041 */ 5042 public static char reverseBytes(char ch) { 5043 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 5044 } 5045 }