1 /* 2 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.Arrays; 29 import java.util.Map; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * The <code>Character</code> class wraps a value of the primitive 35 * type <code>char</code> in an object. An object of type 36 * <code>Character</code> contains a single field whose type is 37 * <code>char</code>. 38 * <p> 39 * In addition, this class provides several methods for determining 40 * a character's category (lowercase letter, digit, etc.) and for converting 41 * characters from uppercase to lowercase and vice versa. 42 * <p> 43 * Character information is based on the Unicode Standard, version 6.0.0. 44 * <p> 45 * The methods and data of class <code>Character</code> are defined by 46 * the information in the <i>UnicodeData</i> file that is part of the 47 * Unicode Character Database maintained by the Unicode 48 * Consortium. This file specifies various properties including name 49 * and general category for every defined Unicode code point or 50 * character range. 51 * <p> 52 * The file and its description are available from the Unicode Consortium at: 53 * <ul> 54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 55 * </ul> 56 * 57 * <h4><a name="unicode">Unicode Character Representations</a></h4> 58 * 59 * <p>The <code>char</code> data type (and therefore the value that a 60 * <code>Character</code> object encapsulates) are based on the 61 * original Unicode specification, which defined characters as 62 * fixed-width 16-bit entities. The Unicode standard has since been 63 * changed to allow for characters whose representation requires more 64 * than 16 bits. The range of legal <em>code point</em>s is now 65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 66 * (Refer to the <a 67 * href="http://www.unicode.org/reports/tr27/#notation"><i> 68 * definition</i></a> of the U+<i>n</i> notation in the Unicode 69 * standard.) 70 * 71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is 72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 73 * <a name="supplementary">Characters</a> whose code points are greater 74 * than U+FFFF are called <em>supplementary character</em>s. The Java 75 * platform uses the UTF-16 representation in <code>char</code> arrays and 76 * in the <code>String</code> and <code>StringBuffer</code> classes. In 77 * this representation, supplementary characters are represented as a pair 78 * of <code>char</code> values, the first from the <em>high-surrogates</em> 79 * range, (\uD800-\uDBFF), the second from the 80 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 81 * 82 * <p>A <code>char</code> value, therefore, represents Basic 83 * Multilingual Plane (BMP) code points, including the surrogate 84 * code points, or code units of the UTF-16 encoding. An 85 * <code>int</code> value represents all Unicode code points, 86 * including supplementary code points. The lower (least significant) 87 * 21 bits of <code>int</code> are used to represent Unicode code 88 * points and the upper (most significant) 11 bits must be zero. 89 * Unless otherwise specified, the behavior with respect to 90 * supplementary characters and surrogate <code>char</code> values is 91 * as follows: 92 * 93 * <ul> 94 * <li>The methods that only accept a <code>char</code> value cannot support 95 * supplementary characters. They treat <code>char</code> values from the 96 * surrogate ranges as undefined characters. For example, 97 * <code>Character.isLetter('\uD840')</code> returns <code>false</code>, even though 98 * this specific value if followed by any low-surrogate value in a string 99 * would represent a letter. 100 * 101 * <li>The methods that accept an <code>int</code> value support all 102 * Unicode characters, including supplementary characters. For 103 * example, <code>Character.isLetter(0x2F81A)</code> returns 104 * <code>true</code> because the code point value represents a letter 105 * (a CJK ideograph). 106 * </ul> 107 * 108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 109 * used for character values in the range between U+0000 and U+10FFFF, 110 * and <em>Unicode code unit</em> is used for 16-bit 111 * <code>char</code> values that are code units of the <em>UTF-16</em> 112 * encoding. For more information on Unicode terminology, refer to the 113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 114 * 115 * @author Lee Boynton 116 * @author Guy Steele 117 * @author Akira Tanaka 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @since 1.0 121 */ 122 public final 123 class Character implements java.io.Serializable, Comparable<Character> { 124 /** 125 * The minimum radix available for conversion to and from strings. 126 * The constant value of this field is the smallest value permitted 127 * for the radix argument in radix-conversion methods such as the 128 * <code>digit</code> method, the <code>forDigit</code> 129 * method, and the <code>toString</code> method of class 130 * <code>Integer</code>. 131 * 132 * @see Character#digit(char, int) 133 * @see Character#forDigit(int, int) 134 * @see Integer#toString(int, int) 135 * @see Integer#valueOf(String) 136 */ 137 public static final int MIN_RADIX = 2; 138 139 /** 140 * The maximum radix available for conversion to and from strings. 141 * The constant value of this field is the largest value permitted 142 * for the radix argument in radix-conversion methods such as the 143 * <code>digit</code> method, the <code>forDigit</code> 144 * method, and the <code>toString</code> method of class 145 * <code>Integer</code>. 146 * 147 * @see Character#digit(char, int) 148 * @see Character#forDigit(int, int) 149 * @see Integer#toString(int, int) 150 * @see Integer#valueOf(String) 151 */ 152 public static final int MAX_RADIX = 36; 153 154 /** 155 * The constant value of this field is the smallest value of type 156 * <code>char</code>, <code>'\u0000'</code>. 157 * 158 * @since 1.0.2 159 */ 160 public static final char MIN_VALUE = '\u0000'; 161 162 /** 163 * The constant value of this field is the largest value of type 164 * <code>char</code>, <code>'\uFFFF'</code>. 165 * 166 * @since 1.0.2 167 */ 168 public static final char MAX_VALUE = '\uFFFF'; 169 170 /** 171 * The <code>Class</code> instance representing the primitive type 172 * <code>char</code>. 173 * 174 * @since 1.1 175 */ 176 @SuppressWarnings("unchecked") 177 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 178 179 /* 180 * Normative general types 181 */ 182 183 /* 184 * General character types 185 */ 186 187 /** 188 * General category "Cn" in the Unicode specification. 189 * @since 1.1 190 */ 191 public static final byte UNASSIGNED = 0; 192 193 /** 194 * General category "Lu" in the Unicode specification. 195 * @since 1.1 196 */ 197 public static final byte UPPERCASE_LETTER = 1; 198 199 /** 200 * General category "Ll" in the Unicode specification. 201 * @since 1.1 202 */ 203 public static final byte LOWERCASE_LETTER = 2; 204 205 /** 206 * General category "Lt" in the Unicode specification. 207 * @since 1.1 208 */ 209 public static final byte TITLECASE_LETTER = 3; 210 211 /** 212 * General category "Lm" in the Unicode specification. 213 * @since 1.1 214 */ 215 public static final byte MODIFIER_LETTER = 4; 216 217 /** 218 * General category "Lo" in the Unicode specification. 219 * @since 1.1 220 */ 221 public static final byte OTHER_LETTER = 5; 222 223 /** 224 * General category "Mn" in the Unicode specification. 225 * @since 1.1 226 */ 227 public static final byte NON_SPACING_MARK = 6; 228 229 /** 230 * General category "Me" in the Unicode specification. 231 * @since 1.1 232 */ 233 public static final byte ENCLOSING_MARK = 7; 234 235 /** 236 * General category "Mc" in the Unicode specification. 237 * @since 1.1 238 */ 239 public static final byte COMBINING_SPACING_MARK = 8; 240 241 /** 242 * General category "Nd" in the Unicode specification. 243 * @since 1.1 244 */ 245 public static final byte DECIMAL_DIGIT_NUMBER = 9; 246 247 /** 248 * General category "Nl" in the Unicode specification. 249 * @since 1.1 250 */ 251 public static final byte LETTER_NUMBER = 10; 252 253 /** 254 * General category "No" in the Unicode specification. 255 * @since 1.1 256 */ 257 public static final byte OTHER_NUMBER = 11; 258 259 /** 260 * General category "Zs" in the Unicode specification. 261 * @since 1.1 262 */ 263 public static final byte SPACE_SEPARATOR = 12; 264 265 /** 266 * General category "Zl" in the Unicode specification. 267 * @since 1.1 268 */ 269 public static final byte LINE_SEPARATOR = 13; 270 271 /** 272 * General category "Zp" in the Unicode specification. 273 * @since 1.1 274 */ 275 public static final byte PARAGRAPH_SEPARATOR = 14; 276 277 /** 278 * General category "Cc" in the Unicode specification. 279 * @since 1.1 280 */ 281 public static final byte CONTROL = 15; 282 283 /** 284 * General category "Cf" in the Unicode specification. 285 * @since 1.1 286 */ 287 public static final byte FORMAT = 16; 288 289 /** 290 * General category "Co" in the Unicode specification. 291 * @since 1.1 292 */ 293 public static final byte PRIVATE_USE = 18; 294 295 /** 296 * General category "Cs" in the Unicode specification. 297 * @since 1.1 298 */ 299 public static final byte SURROGATE = 19; 300 301 /** 302 * General category "Pd" in the Unicode specification. 303 * @since 1.1 304 */ 305 public static final byte DASH_PUNCTUATION = 20; 306 307 /** 308 * General category "Ps" in the Unicode specification. 309 * @since 1.1 310 */ 311 public static final byte START_PUNCTUATION = 21; 312 313 /** 314 * General category "Pe" in the Unicode specification. 315 * @since 1.1 316 */ 317 public static final byte END_PUNCTUATION = 22; 318 319 /** 320 * General category "Pc" in the Unicode specification. 321 * @since 1.1 322 */ 323 public static final byte CONNECTOR_PUNCTUATION = 23; 324 325 /** 326 * General category "Po" in the Unicode specification. 327 * @since 1.1 328 */ 329 public static final byte OTHER_PUNCTUATION = 24; 330 331 /** 332 * General category "Sm" in the Unicode specification. 333 * @since 1.1 334 */ 335 public static final byte MATH_SYMBOL = 25; 336 337 /** 338 * General category "Sc" in the Unicode specification. 339 * @since 1.1 340 */ 341 public static final byte CURRENCY_SYMBOL = 26; 342 343 /** 344 * General category "Sk" in the Unicode specification. 345 * @since 1.1 346 */ 347 public static final byte MODIFIER_SYMBOL = 27; 348 349 /** 350 * General category "So" in the Unicode specification. 351 * @since 1.1 352 */ 353 public static final byte OTHER_SYMBOL = 28; 354 355 /** 356 * General category "Pi" in the Unicode specification. 357 * @since 1.4 358 */ 359 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 360 361 /** 362 * General category "Pf" in the Unicode specification. 363 * @since 1.4 364 */ 365 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 366 367 /** 368 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 369 */ 370 static final int ERROR = 0xFFFFFFFF; 371 372 373 /** 374 * Undefined bidirectional character type. Undefined <code>char</code> 375 * values have undefined directionality in the Unicode specification. 376 * @since 1.4 377 */ 378 public static final byte DIRECTIONALITY_UNDEFINED = -1; 379 380 /** 381 * Strong bidirectional character type "L" in the Unicode specification. 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 385 386 /** 387 * Strong bidirectional character type "R" in the Unicode specification. 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 391 392 /** 393 * Strong bidirectional character type "AL" in the Unicode specification. 394 * @since 1.4 395 */ 396 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 397 398 /** 399 * Weak bidirectional character type "EN" in the Unicode specification. 400 * @since 1.4 401 */ 402 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 403 404 /** 405 * Weak bidirectional character type "ES" in the Unicode specification. 406 * @since 1.4 407 */ 408 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 409 410 /** 411 * Weak bidirectional character type "ET" in the Unicode specification. 412 * @since 1.4 413 */ 414 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 415 416 /** 417 * Weak bidirectional character type "AN" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 421 422 /** 423 * Weak bidirectional character type "CS" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 427 428 /** 429 * Weak bidirectional character type "NSM" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 433 434 /** 435 * Weak bidirectional character type "BN" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 439 440 /** 441 * Neutral bidirectional character type "B" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 445 446 /** 447 * Neutral bidirectional character type "S" in the Unicode specification. 448 * @since 1.4 449 */ 450 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 451 452 /** 453 * Neutral bidirectional character type "WS" in the Unicode specification. 454 * @since 1.4 455 */ 456 public static final byte DIRECTIONALITY_WHITESPACE = 12; 457 458 /** 459 * Neutral bidirectional character type "ON" in the Unicode specification. 460 * @since 1.4 461 */ 462 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 463 464 /** 465 * Strong bidirectional character type "LRE" in the Unicode specification. 466 * @since 1.4 467 */ 468 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 469 470 /** 471 * Strong bidirectional character type "LRO" in the Unicode specification. 472 * @since 1.4 473 */ 474 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 475 476 /** 477 * Strong bidirectional character type "RLE" in the Unicode specification. 478 * @since 1.4 479 */ 480 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 481 482 /** 483 * Strong bidirectional character type "RLO" in the Unicode specification. 484 * @since 1.4 485 */ 486 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 487 488 /** 489 * Weak bidirectional character type "PDF" in the Unicode specification. 490 * @since 1.4 491 */ 492 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 493 494 /** 495 * The minimum value of a 496 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 497 * Unicode high-surrogate code unit</a> 498 * in the UTF-16 encoding, constant <code>'\uD800'</code>. 499 * A high-surrogate is also known as a <i>leading-surrogate</i>. 500 * 501 * @since 1.5 502 */ 503 public static final char MIN_HIGH_SURROGATE = '\uD800'; 504 505 /** 506 * The maximum value of a 507 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 508 * Unicode high-surrogate code unit</a> 509 * in the UTF-16 encoding, constant <code>'\uDBFF'</code>. 510 * A high-surrogate is also known as a <i>leading-surrogate</i>. 511 * 512 * @since 1.5 513 */ 514 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 515 516 /** 517 * The minimum value of a 518 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 519 * Unicode low-surrogate code unit</a> 520 * in the UTF-16 encoding, constant <code>'\uDC00'</code>. 521 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 522 * 523 * @since 1.5 524 */ 525 public static final char MIN_LOW_SURROGATE = '\uDC00'; 526 527 /** 528 * The maximum value of a 529 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 530 * Unicode low-surrogate code unit</a> 531 * in the UTF-16 encoding, constant <code>'\uDFFF'</code>. 532 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 533 * 534 * @since 1.5 535 */ 536 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 537 538 /** 539 * The minimum value of a Unicode surrogate code unit in the 540 * UTF-16 encoding, constant <code>'\uD800'</code>. 541 * 542 * @since 1.5 543 */ 544 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 545 546 /** 547 * The maximum value of a Unicode surrogate code unit in the 548 * UTF-16 encoding, constant <code>'\uDFFF'</code>. 549 * 550 * @since 1.5 551 */ 552 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 553 554 /** 555 * The minimum value of a 556 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 557 * Unicode supplementary code point</a>, constant {@code U+10000}. 558 * 559 * @since 1.5 560 */ 561 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 562 563 /** 564 * The minimum value of a 565 * <a href="http://www.unicode.org/glossary/#code_point"> 566 * Unicode code point</a>, constant {@code U+0000}. 567 * 568 * @since 1.5 569 */ 570 public static final int MIN_CODE_POINT = 0x000000; 571 572 /** 573 * The maximum value of a 574 * <a href="http://www.unicode.org/glossary/#code_point"> 575 * Unicode code point</a>, constant {@code U+10FFFF}. 576 * 577 * @since 1.5 578 */ 579 public static final int MAX_CODE_POINT = 0X10FFFF; 580 581 582 /** 583 * Instances of this class represent particular subsets of the Unicode 584 * character set. The only family of subsets defined in the 585 * <code>Character</code> class is {@link Character.UnicodeBlock}. 586 * Other portions of the Java API may define other subsets for their 587 * own purposes. 588 * 589 * @since 1.2 590 */ 591 public static class Subset { 592 593 private String name; 594 595 /** 596 * Constructs a new <code>Subset</code> instance. 597 * 598 * @param name The name of this subset 599 * @exception NullPointerException if name is <code>null</code> 600 */ 601 protected Subset(String name) { 602 if (name == null) { 603 throw new NullPointerException("name"); 604 } 605 this.name = name; 606 } 607 608 /** 609 * Compares two <code>Subset</code> objects for equality. 610 * This method returns <code>true</code> if and only if 611 * <code>this</code> and the argument refer to the same 612 * object; since this method is <code>final</code>, this 613 * guarantee holds for all subclasses. 614 */ 615 public final boolean equals(Object obj) { 616 return (this == obj); 617 } 618 619 /** 620 * Returns the standard hash code as defined by the 621 * <code>{@link Object#hashCode}</code> method. This method 622 * is <code>final</code> in order to ensure that the 623 * <code>equals</code> and <code>hashCode</code> methods will 624 * be consistent in all subclasses. 625 */ 626 public final int hashCode() { 627 return super.hashCode(); 628 } 629 630 /** 631 * Returns the name of this subset. 632 */ 633 public final String toString() { 634 return name; 635 } 636 } 637 638 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 639 // for the latest specification of Unicode Blocks. 640 641 /** 642 * A family of character subsets representing the character blocks in the 643 * Unicode specification. Character blocks generally define characters 644 * used for a specific script or purpose. A character is contained by 645 * at most one Unicode block. 646 * 647 * @since 1.2 648 */ 649 public static final class UnicodeBlock extends Subset { 650 651 private static Map<String, UnicodeBlock> map = new HashMap<>(256); 652 653 /** 654 * Creates a UnicodeBlock with the given identifier name. 655 * This name must be the same as the block identifier. 656 */ 657 private UnicodeBlock(String idName) { 658 super(idName); 659 map.put(idName, this); 660 } 661 662 /** 663 * Creates a UnicodeBlock with the given identifier name and 664 * alias name. 665 */ 666 private UnicodeBlock(String idName, String alias) { 667 this(idName); 668 map.put(alias, this); 669 } 670 671 /** 672 * Creates a UnicodeBlock with the given identifier name and 673 * alias names. 674 */ 675 private UnicodeBlock(String idName, String... aliases) { 676 this(idName); 677 for (String alias : aliases) 678 map.put(alias, this); 679 } 680 681 /** 682 * Constant for the "Basic Latin" Unicode character block. 683 * @since 1.2 684 */ 685 public static final UnicodeBlock BASIC_LATIN = 686 new UnicodeBlock("BASIC_LATIN", 687 "BASIC LATIN", 688 "BASICLATIN"); 689 690 /** 691 * Constant for the "Latin-1 Supplement" Unicode character block. 692 * @since 1.2 693 */ 694 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 695 new UnicodeBlock("LATIN_1_SUPPLEMENT", 696 "LATIN-1 SUPPLEMENT", 697 "LATIN-1SUPPLEMENT"); 698 699 /** 700 * Constant for the "Latin Extended-A" Unicode character block. 701 * @since 1.2 702 */ 703 public static final UnicodeBlock LATIN_EXTENDED_A = 704 new UnicodeBlock("LATIN_EXTENDED_A", 705 "LATIN EXTENDED-A", 706 "LATINEXTENDED-A"); 707 708 /** 709 * Constant for the "Latin Extended-B" Unicode character block. 710 * @since 1.2 711 */ 712 public static final UnicodeBlock LATIN_EXTENDED_B = 713 new UnicodeBlock("LATIN_EXTENDED_B", 714 "LATIN EXTENDED-B", 715 "LATINEXTENDED-B"); 716 717 /** 718 * Constant for the "IPA Extensions" Unicode character block. 719 * @since 1.2 720 */ 721 public static final UnicodeBlock IPA_EXTENSIONS = 722 new UnicodeBlock("IPA_EXTENSIONS", 723 "IPA EXTENSIONS", 724 "IPAEXTENSIONS"); 725 726 /** 727 * Constant for the "Spacing Modifier Letters" Unicode character block. 728 * @since 1.2 729 */ 730 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 731 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 732 "SPACING MODIFIER LETTERS", 733 "SPACINGMODIFIERLETTERS"); 734 735 /** 736 * Constant for the "Combining Diacritical Marks" Unicode character block. 737 * @since 1.2 738 */ 739 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 740 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 741 "COMBINING DIACRITICAL MARKS", 742 "COMBININGDIACRITICALMARKS"); 743 744 /** 745 * Constant for the "Greek and Coptic" Unicode character block. 746 * <p> 747 * This block was previously known as the "Greek" block. 748 * 749 * @since 1.2 750 */ 751 public static final UnicodeBlock GREEK = 752 new UnicodeBlock("GREEK", 753 "GREEK AND COPTIC", 754 "GREEKANDCOPTIC"); 755 756 /** 757 * Constant for the "Cyrillic" Unicode character block. 758 * @since 1.2 759 */ 760 public static final UnicodeBlock CYRILLIC = 761 new UnicodeBlock("CYRILLIC"); 762 763 /** 764 * Constant for the "Armenian" Unicode character block. 765 * @since 1.2 766 */ 767 public static final UnicodeBlock ARMENIAN = 768 new UnicodeBlock("ARMENIAN"); 769 770 /** 771 * Constant for the "Hebrew" Unicode character block. 772 * @since 1.2 773 */ 774 public static final UnicodeBlock HEBREW = 775 new UnicodeBlock("HEBREW"); 776 777 /** 778 * Constant for the "Arabic" Unicode character block. 779 * @since 1.2 780 */ 781 public static final UnicodeBlock ARABIC = 782 new UnicodeBlock("ARABIC"); 783 784 /** 785 * Constant for the "Devanagari" Unicode character block. 786 * @since 1.2 787 */ 788 public static final UnicodeBlock DEVANAGARI = 789 new UnicodeBlock("DEVANAGARI"); 790 791 /** 792 * Constant for the "Bengali" Unicode character block. 793 * @since 1.2 794 */ 795 public static final UnicodeBlock BENGALI = 796 new UnicodeBlock("BENGALI"); 797 798 /** 799 * Constant for the "Gurmukhi" Unicode character block. 800 * @since 1.2 801 */ 802 public static final UnicodeBlock GURMUKHI = 803 new UnicodeBlock("GURMUKHI"); 804 805 /** 806 * Constant for the "Gujarati" Unicode character block. 807 * @since 1.2 808 */ 809 public static final UnicodeBlock GUJARATI = 810 new UnicodeBlock("GUJARATI"); 811 812 /** 813 * Constant for the "Oriya" Unicode character block. 814 * @since 1.2 815 */ 816 public static final UnicodeBlock ORIYA = 817 new UnicodeBlock("ORIYA"); 818 819 /** 820 * Constant for the "Tamil" Unicode character block. 821 * @since 1.2 822 */ 823 public static final UnicodeBlock TAMIL = 824 new UnicodeBlock("TAMIL"); 825 826 /** 827 * Constant for the "Telugu" Unicode character block. 828 * @since 1.2 829 */ 830 public static final UnicodeBlock TELUGU = 831 new UnicodeBlock("TELUGU"); 832 833 /** 834 * Constant for the "Kannada" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock KANNADA = 838 new UnicodeBlock("KANNADA"); 839 840 /** 841 * Constant for the "Malayalam" Unicode character block. 842 * @since 1.2 843 */ 844 public static final UnicodeBlock MALAYALAM = 845 new UnicodeBlock("MALAYALAM"); 846 847 /** 848 * Constant for the "Thai" Unicode character block. 849 * @since 1.2 850 */ 851 public static final UnicodeBlock THAI = 852 new UnicodeBlock("THAI"); 853 854 /** 855 * Constant for the "Lao" Unicode character block. 856 * @since 1.2 857 */ 858 public static final UnicodeBlock LAO = 859 new UnicodeBlock("LAO"); 860 861 /** 862 * Constant for the "Tibetan" Unicode character block. 863 * @since 1.2 864 */ 865 public static final UnicodeBlock TIBETAN = 866 new UnicodeBlock("TIBETAN"); 867 868 /** 869 * Constant for the "Georgian" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock GEORGIAN = 873 new UnicodeBlock("GEORGIAN"); 874 875 /** 876 * Constant for the "Hangul Jamo" Unicode character block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock HANGUL_JAMO = 880 new UnicodeBlock("HANGUL_JAMO", 881 "HANGUL JAMO", 882 "HANGULJAMO"); 883 884 /** 885 * Constant for the "Latin Extended Additional" Unicode character block. 886 * @since 1.2 887 */ 888 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 889 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 890 "LATIN EXTENDED ADDITIONAL", 891 "LATINEXTENDEDADDITIONAL"); 892 893 /** 894 * Constant for the "Greek Extended" Unicode character block. 895 * @since 1.2 896 */ 897 public static final UnicodeBlock GREEK_EXTENDED = 898 new UnicodeBlock("GREEK_EXTENDED", 899 "GREEK EXTENDED", 900 "GREEKEXTENDED"); 901 902 /** 903 * Constant for the "General Punctuation" Unicode character block. 904 * @since 1.2 905 */ 906 public static final UnicodeBlock GENERAL_PUNCTUATION = 907 new UnicodeBlock("GENERAL_PUNCTUATION", 908 "GENERAL PUNCTUATION", 909 "GENERALPUNCTUATION"); 910 911 /** 912 * Constant for the "Superscripts and Subscripts" Unicode character 913 * block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 917 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 918 "SUPERSCRIPTS AND SUBSCRIPTS", 919 "SUPERSCRIPTSANDSUBSCRIPTS"); 920 921 /** 922 * Constant for the "Currency Symbols" Unicode character block. 923 * @since 1.2 924 */ 925 public static final UnicodeBlock CURRENCY_SYMBOLS = 926 new UnicodeBlock("CURRENCY_SYMBOLS", 927 "CURRENCY SYMBOLS", 928 "CURRENCYSYMBOLS"); 929 930 /** 931 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 932 * character block. 933 * <p> 934 * This block was previously known as "Combining Marks for Symbols". 935 * @since 1.2 936 */ 937 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 938 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 939 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 940 "COMBININGDIACRITICALMARKSFORSYMBOLS", 941 "COMBINING MARKS FOR SYMBOLS", 942 "COMBININGMARKSFORSYMBOLS"); 943 944 /** 945 * Constant for the "Letterlike Symbols" Unicode character block. 946 * @since 1.2 947 */ 948 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 949 new UnicodeBlock("LETTERLIKE_SYMBOLS", 950 "LETTERLIKE SYMBOLS", 951 "LETTERLIKESYMBOLS"); 952 953 /** 954 * Constant for the "Number Forms" Unicode character block. 955 * @since 1.2 956 */ 957 public static final UnicodeBlock NUMBER_FORMS = 958 new UnicodeBlock("NUMBER_FORMS", 959 "NUMBER FORMS", 960 "NUMBERFORMS"); 961 962 /** 963 * Constant for the "Arrows" Unicode character block. 964 * @since 1.2 965 */ 966 public static final UnicodeBlock ARROWS = 967 new UnicodeBlock("ARROWS"); 968 969 /** 970 * Constant for the "Mathematical Operators" Unicode character block. 971 * @since 1.2 972 */ 973 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 974 new UnicodeBlock("MATHEMATICAL_OPERATORS", 975 "MATHEMATICAL OPERATORS", 976 "MATHEMATICALOPERATORS"); 977 978 /** 979 * Constant for the "Miscellaneous Technical" Unicode character block. 980 * @since 1.2 981 */ 982 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 983 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 984 "MISCELLANEOUS TECHNICAL", 985 "MISCELLANEOUSTECHNICAL"); 986 987 /** 988 * Constant for the "Control Pictures" Unicode character block. 989 * @since 1.2 990 */ 991 public static final UnicodeBlock CONTROL_PICTURES = 992 new UnicodeBlock("CONTROL_PICTURES", 993 "CONTROL PICTURES", 994 "CONTROLPICTURES"); 995 996 /** 997 * Constant for the "Optical Character Recognition" Unicode character block. 998 * @since 1.2 999 */ 1000 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1001 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1002 "OPTICAL CHARACTER RECOGNITION", 1003 "OPTICALCHARACTERRECOGNITION"); 1004 1005 /** 1006 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1007 * @since 1.2 1008 */ 1009 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1010 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1011 "ENCLOSED ALPHANUMERICS", 1012 "ENCLOSEDALPHANUMERICS"); 1013 1014 /** 1015 * Constant for the "Box Drawing" Unicode character block. 1016 * @since 1.2 1017 */ 1018 public static final UnicodeBlock BOX_DRAWING = 1019 new UnicodeBlock("BOX_DRAWING", 1020 "BOX DRAWING", 1021 "BOXDRAWING"); 1022 1023 /** 1024 * Constant for the "Block Elements" Unicode character block. 1025 * @since 1.2 1026 */ 1027 public static final UnicodeBlock BLOCK_ELEMENTS = 1028 new UnicodeBlock("BLOCK_ELEMENTS", 1029 "BLOCK ELEMENTS", 1030 "BLOCKELEMENTS"); 1031 1032 /** 1033 * Constant for the "Geometric Shapes" Unicode character block. 1034 * @since 1.2 1035 */ 1036 public static final UnicodeBlock GEOMETRIC_SHAPES = 1037 new UnicodeBlock("GEOMETRIC_SHAPES", 1038 "GEOMETRIC SHAPES", 1039 "GEOMETRICSHAPES"); 1040 1041 /** 1042 * Constant for the "Miscellaneous Symbols" Unicode character block. 1043 * @since 1.2 1044 */ 1045 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1046 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1047 "MISCELLANEOUS SYMBOLS", 1048 "MISCELLANEOUSSYMBOLS"); 1049 1050 /** 1051 * Constant for the "Dingbats" Unicode character block. 1052 * @since 1.2 1053 */ 1054 public static final UnicodeBlock DINGBATS = 1055 new UnicodeBlock("DINGBATS"); 1056 1057 /** 1058 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1059 * @since 1.2 1060 */ 1061 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1062 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1063 "CJK SYMBOLS AND PUNCTUATION", 1064 "CJKSYMBOLSANDPUNCTUATION"); 1065 1066 /** 1067 * Constant for the "Hiragana" Unicode character block. 1068 * @since 1.2 1069 */ 1070 public static final UnicodeBlock HIRAGANA = 1071 new UnicodeBlock("HIRAGANA"); 1072 1073 /** 1074 * Constant for the "Katakana" Unicode character block. 1075 * @since 1.2 1076 */ 1077 public static final UnicodeBlock KATAKANA = 1078 new UnicodeBlock("KATAKANA"); 1079 1080 /** 1081 * Constant for the "Bopomofo" Unicode character block. 1082 * @since 1.2 1083 */ 1084 public static final UnicodeBlock BOPOMOFO = 1085 new UnicodeBlock("BOPOMOFO"); 1086 1087 /** 1088 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1089 * @since 1.2 1090 */ 1091 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1092 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1093 "HANGUL COMPATIBILITY JAMO", 1094 "HANGULCOMPATIBILITYJAMO"); 1095 1096 /** 1097 * Constant for the "Kanbun" Unicode character block. 1098 * @since 1.2 1099 */ 1100 public static final UnicodeBlock KANBUN = 1101 new UnicodeBlock("KANBUN"); 1102 1103 /** 1104 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1108 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1109 "ENCLOSED CJK LETTERS AND MONTHS", 1110 "ENCLOSEDCJKLETTERSANDMONTHS"); 1111 1112 /** 1113 * Constant for the "CJK Compatibility" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock CJK_COMPATIBILITY = 1117 new UnicodeBlock("CJK_COMPATIBILITY", 1118 "CJK COMPATIBILITY", 1119 "CJKCOMPATIBILITY"); 1120 1121 /** 1122 * Constant for the "CJK Unified Ideographs" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1126 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1127 "CJK UNIFIED IDEOGRAPHS", 1128 "CJKUNIFIEDIDEOGRAPHS"); 1129 1130 /** 1131 * Constant for the "Hangul Syllables" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock HANGUL_SYLLABLES = 1135 new UnicodeBlock("HANGUL_SYLLABLES", 1136 "HANGUL SYLLABLES", 1137 "HANGULSYLLABLES"); 1138 1139 /** 1140 * Constant for the "Private Use Area" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock PRIVATE_USE_AREA = 1144 new UnicodeBlock("PRIVATE_USE_AREA", 1145 "PRIVATE USE AREA", 1146 "PRIVATEUSEAREA"); 1147 1148 /** 1149 * Constant for the "CJK Compatibility Ideographs" Unicode character 1150 * block. 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1154 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1155 "CJK COMPATIBILITY IDEOGRAPHS", 1156 "CJKCOMPATIBILITYIDEOGRAPHS"); 1157 1158 /** 1159 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1160 * @since 1.2 1161 */ 1162 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1163 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1164 "ALPHABETIC PRESENTATION FORMS", 1165 "ALPHABETICPRESENTATIONFORMS"); 1166 1167 /** 1168 * Constant for the "Arabic Presentation Forms-A" Unicode character 1169 * block. 1170 * @since 1.2 1171 */ 1172 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1173 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1174 "ARABIC PRESENTATION FORMS-A", 1175 "ARABICPRESENTATIONFORMS-A"); 1176 1177 /** 1178 * Constant for the "Combining Half Marks" Unicode character block. 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock COMBINING_HALF_MARKS = 1182 new UnicodeBlock("COMBINING_HALF_MARKS", 1183 "COMBINING HALF MARKS", 1184 "COMBININGHALFMARKS"); 1185 1186 /** 1187 * Constant for the "CJK Compatibility Forms" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1191 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1192 "CJK COMPATIBILITY FORMS", 1193 "CJKCOMPATIBILITYFORMS"); 1194 1195 /** 1196 * Constant for the "Small Form Variants" Unicode character block. 1197 * @since 1.2 1198 */ 1199 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1200 new UnicodeBlock("SMALL_FORM_VARIANTS", 1201 "SMALL FORM VARIANTS", 1202 "SMALLFORMVARIANTS"); 1203 1204 /** 1205 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1206 * @since 1.2 1207 */ 1208 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1209 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1210 "ARABIC PRESENTATION FORMS-B", 1211 "ARABICPRESENTATIONFORMS-B"); 1212 1213 /** 1214 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1215 * block. 1216 * @since 1.2 1217 */ 1218 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1219 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1220 "HALFWIDTH AND FULLWIDTH FORMS", 1221 "HALFWIDTHANDFULLWIDTHFORMS"); 1222 1223 /** 1224 * Constant for the "Specials" Unicode character block. 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock SPECIALS = 1228 new UnicodeBlock("SPECIALS"); 1229 1230 /** 1231 * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES}, 1232 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and 1233 * {@link #LOW_SURROGATES}. These new constants match 1234 * the block definitions of the Unicode Standard. 1235 * The {@link #of(char)} and {@link #of(int)} methods 1236 * return the new constants, not SURROGATES_AREA. 1237 */ 1238 @Deprecated 1239 public static final UnicodeBlock SURROGATES_AREA = 1240 new UnicodeBlock("SURROGATES_AREA"); 1241 1242 /** 1243 * Constant for the "Syriac" Unicode character block. 1244 * @since 1.4 1245 */ 1246 public static final UnicodeBlock SYRIAC = 1247 new UnicodeBlock("SYRIAC"); 1248 1249 /** 1250 * Constant for the "Thaana" Unicode character block. 1251 * @since 1.4 1252 */ 1253 public static final UnicodeBlock THAANA = 1254 new UnicodeBlock("THAANA"); 1255 1256 /** 1257 * Constant for the "Sinhala" Unicode character block. 1258 * @since 1.4 1259 */ 1260 public static final UnicodeBlock SINHALA = 1261 new UnicodeBlock("SINHALA"); 1262 1263 /** 1264 * Constant for the "Myanmar" Unicode character block. 1265 * @since 1.4 1266 */ 1267 public static final UnicodeBlock MYANMAR = 1268 new UnicodeBlock("MYANMAR"); 1269 1270 /** 1271 * Constant for the "Ethiopic" Unicode character block. 1272 * @since 1.4 1273 */ 1274 public static final UnicodeBlock ETHIOPIC = 1275 new UnicodeBlock("ETHIOPIC"); 1276 1277 /** 1278 * Constant for the "Cherokee" Unicode character block. 1279 * @since 1.4 1280 */ 1281 public static final UnicodeBlock CHEROKEE = 1282 new UnicodeBlock("CHEROKEE"); 1283 1284 /** 1285 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1286 * @since 1.4 1287 */ 1288 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1289 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1290 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1291 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1292 1293 /** 1294 * Constant for the "Ogham" Unicode character block. 1295 * @since 1.4 1296 */ 1297 public static final UnicodeBlock OGHAM = 1298 new UnicodeBlock("OGHAM"); 1299 1300 /** 1301 * Constant for the "Runic" Unicode character block. 1302 * @since 1.4 1303 */ 1304 public static final UnicodeBlock RUNIC = 1305 new UnicodeBlock("RUNIC"); 1306 1307 /** 1308 * Constant for the "Khmer" Unicode character block. 1309 * @since 1.4 1310 */ 1311 public static final UnicodeBlock KHMER = 1312 new UnicodeBlock("KHMER"); 1313 1314 /** 1315 * Constant for the "Mongolian" Unicode character block. 1316 * @since 1.4 1317 */ 1318 public static final UnicodeBlock MONGOLIAN = 1319 new UnicodeBlock("MONGOLIAN"); 1320 1321 /** 1322 * Constant for the "Braille Patterns" Unicode character block. 1323 * @since 1.4 1324 */ 1325 public static final UnicodeBlock BRAILLE_PATTERNS = 1326 new UnicodeBlock("BRAILLE_PATTERNS", 1327 "BRAILLE PATTERNS", 1328 "BRAILLEPATTERNS"); 1329 1330 /** 1331 * Constant for the "CJK Radicals Supplement" Unicode character block. 1332 * @since 1.4 1333 */ 1334 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1335 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1336 "CJK RADICALS SUPPLEMENT", 1337 "CJKRADICALSSUPPLEMENT"); 1338 1339 /** 1340 * Constant for the "Kangxi Radicals" Unicode character block. 1341 * @since 1.4 1342 */ 1343 public static final UnicodeBlock KANGXI_RADICALS = 1344 new UnicodeBlock("KANGXI_RADICALS", 1345 "KANGXI RADICALS", 1346 "KANGXIRADICALS"); 1347 1348 /** 1349 * Constant for the "Ideographic Description Characters" Unicode character block. 1350 * @since 1.4 1351 */ 1352 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1353 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1354 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1355 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1356 1357 /** 1358 * Constant for the "Bopomofo Extended" Unicode character block. 1359 * @since 1.4 1360 */ 1361 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1362 new UnicodeBlock("BOPOMOFO_EXTENDED", 1363 "BOPOMOFO EXTENDED", 1364 "BOPOMOFOEXTENDED"); 1365 1366 /** 1367 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1368 * @since 1.4 1369 */ 1370 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1371 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1372 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1373 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1374 1375 /** 1376 * Constant for the "Yi Syllables" Unicode character block. 1377 * @since 1.4 1378 */ 1379 public static final UnicodeBlock YI_SYLLABLES = 1380 new UnicodeBlock("YI_SYLLABLES", 1381 "YI SYLLABLES", 1382 "YISYLLABLES"); 1383 1384 /** 1385 * Constant for the "Yi Radicals" Unicode character block. 1386 * @since 1.4 1387 */ 1388 public static final UnicodeBlock YI_RADICALS = 1389 new UnicodeBlock("YI_RADICALS", 1390 "YI RADICALS", 1391 "YIRADICALS"); 1392 1393 /** 1394 * Constant for the "Cyrillic Supplementary" Unicode character block. 1395 * @since 1.5 1396 */ 1397 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1398 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1399 "CYRILLIC SUPPLEMENTARY", 1400 "CYRILLICSUPPLEMENTARY", 1401 "CYRILLIC SUPPLEMENT", 1402 "CYRILLICSUPPLEMENT"); 1403 1404 /** 1405 * Constant for the "Tagalog" Unicode character block. 1406 * @since 1.5 1407 */ 1408 public static final UnicodeBlock TAGALOG = 1409 new UnicodeBlock("TAGALOG"); 1410 1411 /** 1412 * Constant for the "Hanunoo" Unicode character block. 1413 * @since 1.5 1414 */ 1415 public static final UnicodeBlock HANUNOO = 1416 new UnicodeBlock("HANUNOO"); 1417 1418 /** 1419 * Constant for the "Buhid" Unicode character block. 1420 * @since 1.5 1421 */ 1422 public static final UnicodeBlock BUHID = 1423 new UnicodeBlock("BUHID"); 1424 1425 /** 1426 * Constant for the "Tagbanwa" Unicode character block. 1427 * @since 1.5 1428 */ 1429 public static final UnicodeBlock TAGBANWA = 1430 new UnicodeBlock("TAGBANWA"); 1431 1432 /** 1433 * Constant for the "Limbu" Unicode character block. 1434 * @since 1.5 1435 */ 1436 public static final UnicodeBlock LIMBU = 1437 new UnicodeBlock("LIMBU"); 1438 1439 /** 1440 * Constant for the "Tai Le" Unicode character block. 1441 * @since 1.5 1442 */ 1443 public static final UnicodeBlock TAI_LE = 1444 new UnicodeBlock("TAI_LE", 1445 "TAI LE", 1446 "TAILE"); 1447 1448 /** 1449 * Constant for the "Khmer Symbols" Unicode character block. 1450 * @since 1.5 1451 */ 1452 public static final UnicodeBlock KHMER_SYMBOLS = 1453 new UnicodeBlock("KHMER_SYMBOLS", 1454 "KHMER SYMBOLS", 1455 "KHMERSYMBOLS"); 1456 1457 /** 1458 * Constant for the "Phonetic Extensions" Unicode character block. 1459 * @since 1.5 1460 */ 1461 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1462 new UnicodeBlock("PHONETIC_EXTENSIONS", 1463 "PHONETIC EXTENSIONS", 1464 "PHONETICEXTENSIONS"); 1465 1466 /** 1467 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1468 * @since 1.5 1469 */ 1470 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1471 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1472 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1473 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1474 1475 /** 1476 * Constant for the "Supplemental Arrows-A" Unicode character block. 1477 * @since 1.5 1478 */ 1479 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1480 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1481 "SUPPLEMENTAL ARROWS-A", 1482 "SUPPLEMENTALARROWS-A"); 1483 1484 /** 1485 * Constant for the "Supplemental Arrows-B" Unicode character block. 1486 * @since 1.5 1487 */ 1488 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1489 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1490 "SUPPLEMENTAL ARROWS-B", 1491 "SUPPLEMENTALARROWS-B"); 1492 1493 /** 1494 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1495 * character block. 1496 * @since 1.5 1497 */ 1498 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1499 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1500 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1501 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1502 1503 /** 1504 * Constant for the "Supplemental Mathematical Operators" Unicode 1505 * character block. 1506 * @since 1.5 1507 */ 1508 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1509 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1510 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1511 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1512 1513 /** 1514 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1515 * block. 1516 * @since 1.5 1517 */ 1518 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1519 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1520 "MISCELLANEOUS SYMBOLS AND ARROWS", 1521 "MISCELLANEOUSSYMBOLSANDARROWS"); 1522 1523 /** 1524 * Constant for the "Katakana Phonetic Extensions" Unicode character 1525 * block. 1526 * @since 1.5 1527 */ 1528 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1529 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1530 "KATAKANA PHONETIC EXTENSIONS", 1531 "KATAKANAPHONETICEXTENSIONS"); 1532 1533 /** 1534 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1535 * @since 1.5 1536 */ 1537 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1538 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1539 "YIJING HEXAGRAM SYMBOLS", 1540 "YIJINGHEXAGRAMSYMBOLS"); 1541 1542 /** 1543 * Constant for the "Variation Selectors" Unicode character block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock VARIATION_SELECTORS = 1547 new UnicodeBlock("VARIATION_SELECTORS", 1548 "VARIATION SELECTORS", 1549 "VARIATIONSELECTORS"); 1550 1551 /** 1552 * Constant for the "Linear B Syllabary" Unicode character block. 1553 * @since 1.5 1554 */ 1555 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1556 new UnicodeBlock("LINEAR_B_SYLLABARY", 1557 "LINEAR B SYLLABARY", 1558 "LINEARBSYLLABARY"); 1559 1560 /** 1561 * Constant for the "Linear B Ideograms" Unicode character block. 1562 * @since 1.5 1563 */ 1564 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1565 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1566 "LINEAR B IDEOGRAMS", 1567 "LINEARBIDEOGRAMS"); 1568 1569 /** 1570 * Constant for the "Aegean Numbers" Unicode character block. 1571 * @since 1.5 1572 */ 1573 public static final UnicodeBlock AEGEAN_NUMBERS = 1574 new UnicodeBlock("AEGEAN_NUMBERS", 1575 "AEGEAN NUMBERS", 1576 "AEGEANNUMBERS"); 1577 1578 /** 1579 * Constant for the "Old Italic" Unicode character block. 1580 * @since 1.5 1581 */ 1582 public static final UnicodeBlock OLD_ITALIC = 1583 new UnicodeBlock("OLD_ITALIC", 1584 "OLD ITALIC", 1585 "OLDITALIC"); 1586 1587 /** 1588 * Constant for the "Gothic" Unicode character block. 1589 * @since 1.5 1590 */ 1591 public static final UnicodeBlock GOTHIC = 1592 new UnicodeBlock("GOTHIC"); 1593 1594 /** 1595 * Constant for the "Ugaritic" Unicode character block. 1596 * @since 1.5 1597 */ 1598 public static final UnicodeBlock UGARITIC = 1599 new UnicodeBlock("UGARITIC"); 1600 1601 /** 1602 * Constant for the "Deseret" Unicode character block. 1603 * @since 1.5 1604 */ 1605 public static final UnicodeBlock DESERET = 1606 new UnicodeBlock("DESERET"); 1607 1608 /** 1609 * Constant for the "Shavian" Unicode character block. 1610 * @since 1.5 1611 */ 1612 public static final UnicodeBlock SHAVIAN = 1613 new UnicodeBlock("SHAVIAN"); 1614 1615 /** 1616 * Constant for the "Osmanya" Unicode character block. 1617 * @since 1.5 1618 */ 1619 public static final UnicodeBlock OSMANYA = 1620 new UnicodeBlock("OSMANYA"); 1621 1622 /** 1623 * Constant for the "Cypriot Syllabary" Unicode character block. 1624 * @since 1.5 1625 */ 1626 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1627 new UnicodeBlock("CYPRIOT_SYLLABARY", 1628 "CYPRIOT SYLLABARY", 1629 "CYPRIOTSYLLABARY"); 1630 1631 /** 1632 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1633 * @since 1.5 1634 */ 1635 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1636 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1637 "BYZANTINE MUSICAL SYMBOLS", 1638 "BYZANTINEMUSICALSYMBOLS"); 1639 1640 /** 1641 * Constant for the "Musical Symbols" Unicode character block. 1642 * @since 1.5 1643 */ 1644 public static final UnicodeBlock MUSICAL_SYMBOLS = 1645 new UnicodeBlock("MUSICAL_SYMBOLS", 1646 "MUSICAL SYMBOLS", 1647 "MUSICALSYMBOLS"); 1648 1649 /** 1650 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1651 * @since 1.5 1652 */ 1653 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1654 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1655 "TAI XUAN JING SYMBOLS", 1656 "TAIXUANJINGSYMBOLS"); 1657 1658 /** 1659 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1660 * character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1664 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1665 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1666 "MATHEMATICALALPHANUMERICSYMBOLS"); 1667 1668 /** 1669 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1670 * character block. 1671 * @since 1.5 1672 */ 1673 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1674 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1675 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1676 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1677 1678 /** 1679 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1680 * @since 1.5 1681 */ 1682 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1683 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1684 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1685 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1686 1687 /** 1688 * Constant for the "Tags" Unicode character block. 1689 * @since 1.5 1690 */ 1691 public static final UnicodeBlock TAGS = 1692 new UnicodeBlock("TAGS"); 1693 1694 /** 1695 * Constant for the "Variation Selectors Supplement" Unicode character 1696 * block. 1697 * @since 1.5 1698 */ 1699 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1700 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1701 "VARIATION SELECTORS SUPPLEMENT", 1702 "VARIATIONSELECTORSSUPPLEMENT"); 1703 1704 /** 1705 * Constant for the "Supplementary Private Use Area-A" Unicode character 1706 * block. 1707 * @since 1.5 1708 */ 1709 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1710 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1711 "SUPPLEMENTARY PRIVATE USE AREA-A", 1712 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1713 1714 /** 1715 * Constant for the "Supplementary Private Use Area-B" Unicode character 1716 * block. 1717 * @since 1.5 1718 */ 1719 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1720 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1721 "SUPPLEMENTARY PRIVATE USE AREA-B", 1722 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1723 1724 /** 1725 * Constant for the "High Surrogates" Unicode character block. 1726 * This block represents codepoint values in the high surrogate 1727 * range: U+D800 through U+DB7F 1728 * 1729 * @since 1.5 1730 */ 1731 public static final UnicodeBlock HIGH_SURROGATES = 1732 new UnicodeBlock("HIGH_SURROGATES", 1733 "HIGH SURROGATES", 1734 "HIGHSURROGATES"); 1735 1736 /** 1737 * Constant for the "High Private Use Surrogates" Unicode character 1738 * block. 1739 * This block represents codepoint values in the private use high 1740 * surrogate range: U+DB80 through U+DBFF 1741 * 1742 * @since 1.5 1743 */ 1744 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1745 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1746 "HIGH PRIVATE USE SURROGATES", 1747 "HIGHPRIVATEUSESURROGATES"); 1748 1749 /** 1750 * Constant for the "Low Surrogates" Unicode character block. 1751 * This block represents codepoint values in the low surrogate 1752 * range: U+DC00 through U+DFFF 1753 * 1754 * @since 1.5 1755 */ 1756 public static final UnicodeBlock LOW_SURROGATES = 1757 new UnicodeBlock("LOW_SURROGATES", 1758 "LOW SURROGATES", 1759 "LOWSURROGATES"); 1760 1761 /** 1762 * Constant for the "Arabic Supplement" Unicode character block. 1763 * @since 1.7 1764 */ 1765 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1766 new UnicodeBlock("ARABIC_SUPPLEMENT", 1767 "ARABIC SUPPLEMENT", 1768 "ARABICSUPPLEMENT"); 1769 1770 /** 1771 * Constant for the "NKo" Unicode character block. 1772 * @since 1.7 1773 */ 1774 public static final UnicodeBlock NKO = 1775 new UnicodeBlock("NKO"); 1776 1777 /** 1778 * Constant for the "Samaritan" Unicode character block. 1779 * @since 1.7 1780 */ 1781 public static final UnicodeBlock SAMARITAN = 1782 new UnicodeBlock("SAMARITAN"); 1783 1784 /** 1785 * Constant for the "Mandaic" Unicode character block. 1786 * @since 1.7 1787 */ 1788 public static final UnicodeBlock MANDAIC = 1789 new UnicodeBlock("MANDAIC"); 1790 1791 /** 1792 * Constant for the "Ethiopic Supplement" Unicode character block. 1793 * @since 1.7 1794 */ 1795 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1796 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1797 "ETHIOPIC SUPPLEMENT", 1798 "ETHIOPICSUPPLEMENT"); 1799 1800 /** 1801 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1802 * Unicode character block. 1803 * @since 1.7 1804 */ 1805 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1806 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1807 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1808 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1809 1810 /** 1811 * Constant for the "New Tai Lue" Unicode character block. 1812 * @since 1.7 1813 */ 1814 public static final UnicodeBlock NEW_TAI_LUE = 1815 new UnicodeBlock("NEW_TAI_LUE", 1816 "NEW TAI LUE", 1817 "NEWTAILUE"); 1818 1819 /** 1820 * Constant for the "Buginese" Unicode character block. 1821 * @since 1.7 1822 */ 1823 public static final UnicodeBlock BUGINESE = 1824 new UnicodeBlock("BUGINESE"); 1825 1826 /** 1827 * Constant for the "Tai Tham" Unicode character block. 1828 * @since 1.7 1829 */ 1830 public static final UnicodeBlock TAI_THAM = 1831 new UnicodeBlock("TAI_THAM", 1832 "TAI THAM", 1833 "TAITHAM"); 1834 1835 /** 1836 * Constant for the "Balinese" Unicode character block. 1837 * @since 1.7 1838 */ 1839 public static final UnicodeBlock BALINESE = 1840 new UnicodeBlock("BALINESE"); 1841 1842 /** 1843 * Constant for the "Sundanese" Unicode character block. 1844 * @since 1.7 1845 */ 1846 public static final UnicodeBlock SUNDANESE = 1847 new UnicodeBlock("SUNDANESE"); 1848 1849 /** 1850 * Constant for the "Batak" Unicode character block. 1851 * @since 1.7 1852 */ 1853 public static final UnicodeBlock BATAK = 1854 new UnicodeBlock("BATAK"); 1855 1856 /** 1857 * Constant for the "Lepcha" Unicode character block. 1858 * @since 1.7 1859 */ 1860 public static final UnicodeBlock LEPCHA = 1861 new UnicodeBlock("LEPCHA"); 1862 1863 /** 1864 * Constant for the "Ol Chiki" Unicode character block. 1865 * @since 1.7 1866 */ 1867 public static final UnicodeBlock OL_CHIKI = 1868 new UnicodeBlock("OL_CHIKI", 1869 "OL CHIKI", 1870 "OLCHIKI"); 1871 1872 /** 1873 * Constant for the "Vedic Extensions" Unicode character block. 1874 * @since 1.7 1875 */ 1876 public static final UnicodeBlock VEDIC_EXTENSIONS = 1877 new UnicodeBlock("VEDIC_EXTENSIONS", 1878 "VEDIC EXTENSIONS", 1879 "VEDICEXTENSIONS"); 1880 1881 /** 1882 * Constant for the "Phonetic Extensions Supplement" Unicode character 1883 * block. 1884 * @since 1.7 1885 */ 1886 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1887 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1888 "PHONETIC EXTENSIONS SUPPLEMENT", 1889 "PHONETICEXTENSIONSSUPPLEMENT"); 1890 1891 /** 1892 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1893 * character block. 1894 * @since 1.7 1895 */ 1896 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1897 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1898 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1899 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1900 1901 /** 1902 * Constant for the "Glagolitic" Unicode character block. 1903 * @since 1.7 1904 */ 1905 public static final UnicodeBlock GLAGOLITIC = 1906 new UnicodeBlock("GLAGOLITIC"); 1907 1908 /** 1909 * Constant for the "Latin Extended-C" Unicode character block. 1910 * @since 1.7 1911 */ 1912 public static final UnicodeBlock LATIN_EXTENDED_C = 1913 new UnicodeBlock("LATIN_EXTENDED_C", 1914 "LATIN EXTENDED-C", 1915 "LATINEXTENDED-C"); 1916 1917 /** 1918 * Constant for the "Coptic" Unicode character block. 1919 * @since 1.7 1920 */ 1921 public static final UnicodeBlock COPTIC = 1922 new UnicodeBlock("COPTIC"); 1923 1924 /** 1925 * Constant for the "Georgian Supplement" Unicode character block. 1926 * @since 1.7 1927 */ 1928 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1929 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 1930 "GEORGIAN SUPPLEMENT", 1931 "GEORGIANSUPPLEMENT"); 1932 1933 /** 1934 * Constant for the "Tifinagh" Unicode character block. 1935 * @since 1.7 1936 */ 1937 public static final UnicodeBlock TIFINAGH = 1938 new UnicodeBlock("TIFINAGH"); 1939 1940 /** 1941 * Constant for the "Ethiopic Extended" Unicode character block. 1942 * @since 1.7 1943 */ 1944 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1945 new UnicodeBlock("ETHIOPIC_EXTENDED", 1946 "ETHIOPIC EXTENDED", 1947 "ETHIOPICEXTENDED"); 1948 1949 /** 1950 * Constant for the "Cyrillic Extended-A" Unicode character block. 1951 * @since 1.7 1952 */ 1953 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1954 new UnicodeBlock("CYRILLIC_EXTENDED_A", 1955 "CYRILLIC EXTENDED-A", 1956 "CYRILLICEXTENDED-A"); 1957 1958 /** 1959 * Constant for the "Supplemental Punctuation" Unicode character block. 1960 * @since 1.7 1961 */ 1962 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1963 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 1964 "SUPPLEMENTAL PUNCTUATION", 1965 "SUPPLEMENTALPUNCTUATION"); 1966 1967 /** 1968 * Constant for the "CJK Strokes" Unicode character block. 1969 * @since 1.7 1970 */ 1971 public static final UnicodeBlock CJK_STROKES = 1972 new UnicodeBlock("CJK_STROKES", 1973 "CJK STROKES", 1974 "CJKSTROKES"); 1975 1976 /** 1977 * Constant for the "Lisu" Unicode character block. 1978 * @since 1.7 1979 */ 1980 public static final UnicodeBlock LISU = 1981 new UnicodeBlock("LISU"); 1982 1983 /** 1984 * Constant for the "Vai" Unicode character block. 1985 * @since 1.7 1986 */ 1987 public static final UnicodeBlock VAI = 1988 new UnicodeBlock("VAI"); 1989 1990 /** 1991 * Constant for the "Cyrillic Extended-B" Unicode character block. 1992 * @since 1.7 1993 */ 1994 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1995 new UnicodeBlock("CYRILLIC_EXTENDED_B", 1996 "CYRILLIC EXTENDED-B", 1997 "CYRILLICEXTENDED-B"); 1998 1999 /** 2000 * Constant for the "Bamum" Unicode character block. 2001 * @since 1.7 2002 */ 2003 public static final UnicodeBlock BAMUM = 2004 new UnicodeBlock("BAMUM"); 2005 2006 /** 2007 * Constant for the "Modifier Tone Letters" Unicode character block. 2008 * @since 1.7 2009 */ 2010 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2011 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2012 "MODIFIER TONE LETTERS", 2013 "MODIFIERTONELETTERS"); 2014 2015 /** 2016 * Constant for the "Latin Extended-D" Unicode character block. 2017 * @since 1.7 2018 */ 2019 public static final UnicodeBlock LATIN_EXTENDED_D = 2020 new UnicodeBlock("LATIN_EXTENDED_D", 2021 "LATIN EXTENDED-D", 2022 "LATINEXTENDED-D"); 2023 2024 /** 2025 * Constant for the "Syloti Nagri" Unicode character block. 2026 * @since 1.7 2027 */ 2028 public static final UnicodeBlock SYLOTI_NAGRI = 2029 new UnicodeBlock("SYLOTI_NAGRI", 2030 "SYLOTI NAGRI", 2031 "SYLOTINAGRI"); 2032 2033 /** 2034 * Constant for the "Common Indic Number Forms" Unicode character block. 2035 * @since 1.7 2036 */ 2037 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2038 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2039 "COMMON INDIC NUMBER FORMS", 2040 "COMMONINDICNUMBERFORMS"); 2041 2042 /** 2043 * Constant for the "Phags-pa" Unicode character block. 2044 * @since 1.7 2045 */ 2046 public static final UnicodeBlock PHAGS_PA = 2047 new UnicodeBlock("PHAGS_PA", 2048 "PHAGS-PA"); 2049 2050 /** 2051 * Constant for the "Saurashtra" Unicode character block. 2052 * @since 1.7 2053 */ 2054 public static final UnicodeBlock SAURASHTRA = 2055 new UnicodeBlock("SAURASHTRA"); 2056 2057 /** 2058 * Constant for the "Devanagari Extended" Unicode character block. 2059 * @since 1.7 2060 */ 2061 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2062 new UnicodeBlock("DEVANAGARI_EXTENDED", 2063 "DEVANAGARI EXTENDED", 2064 "DEVANAGARIEXTENDED"); 2065 2066 /** 2067 * Constant for the "Kayah Li" Unicode character block. 2068 * @since 1.7 2069 */ 2070 public static final UnicodeBlock KAYAH_LI = 2071 new UnicodeBlock("KAYAH_LI", 2072 "KAYAH LI", 2073 "KAYAHLI"); 2074 2075 /** 2076 * Constant for the "Rejang" Unicode character block. 2077 * @since 1.7 2078 */ 2079 public static final UnicodeBlock REJANG = 2080 new UnicodeBlock("REJANG"); 2081 2082 /** 2083 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2084 * @since 1.7 2085 */ 2086 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2087 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2088 "HANGUL JAMO EXTENDED-A", 2089 "HANGULJAMOEXTENDED-A"); 2090 2091 /** 2092 * Constant for the "Javanese" Unicode character block. 2093 * @since 1.7 2094 */ 2095 public static final UnicodeBlock JAVANESE = 2096 new UnicodeBlock("JAVANESE"); 2097 2098 /** 2099 * Constant for the "Cham" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock CHAM = 2103 new UnicodeBlock("CHAM"); 2104 2105 /** 2106 * Constant for the "Myanmar Extended-A" Unicode character block. 2107 * @since 1.7 2108 */ 2109 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2110 new UnicodeBlock("MYANMAR_EXTENDED_A", 2111 "MYANMAR EXTENDED-A", 2112 "MYANMAREXTENDED-A"); 2113 2114 /** 2115 * Constant for the "Tai Viet" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock TAI_VIET = 2119 new UnicodeBlock("TAI_VIET", 2120 "TAI VIET", 2121 "TAIVIET"); 2122 2123 /** 2124 * Constant for the "Ethiopic Extended-A" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2128 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2129 "ETHIOPIC EXTENDED-A", 2130 "ETHIOPICEXTENDED-A"); 2131 2132 /** 2133 * Constant for the "Meetei Mayek" Unicode character block. 2134 * @since 1.7 2135 */ 2136 public static final UnicodeBlock MEETEI_MAYEK = 2137 new UnicodeBlock("MEETEI_MAYEK", 2138 "MEETEI MAYEK", 2139 "MEETEIMAYEK"); 2140 2141 /** 2142 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2143 * @since 1.7 2144 */ 2145 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2146 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2147 "HANGUL JAMO EXTENDED-B", 2148 "HANGULJAMOEXTENDED-B"); 2149 2150 /** 2151 * Constant for the "Vertical Forms" Unicode character block. 2152 * @since 1.7 2153 */ 2154 public static final UnicodeBlock VERTICAL_FORMS = 2155 new UnicodeBlock("VERTICAL_FORMS", 2156 "VERTICAL FORMS", 2157 "VERTICALFORMS"); 2158 2159 /** 2160 * Constant for the "Ancient Greek Numbers" Unicode character block. 2161 * @since 1.7 2162 */ 2163 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2164 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2165 "ANCIENT GREEK NUMBERS", 2166 "ANCIENTGREEKNUMBERS"); 2167 2168 /** 2169 * Constant for the "Ancient Symbols" Unicode character block. 2170 * @since 1.7 2171 */ 2172 public static final UnicodeBlock ANCIENT_SYMBOLS = 2173 new UnicodeBlock("ANCIENT_SYMBOLS", 2174 "ANCIENT SYMBOLS", 2175 "ANCIENTSYMBOLS"); 2176 2177 /** 2178 * Constant for the "Phaistos Disc" Unicode character block. 2179 * @since 1.7 2180 */ 2181 public static final UnicodeBlock PHAISTOS_DISC = 2182 new UnicodeBlock("PHAISTOS_DISC", 2183 "PHAISTOS DISC", 2184 "PHAISTOSDISC"); 2185 2186 /** 2187 * Constant for the "Lycian" Unicode character block. 2188 * @since 1.7 2189 */ 2190 public static final UnicodeBlock LYCIAN = 2191 new UnicodeBlock("LYCIAN"); 2192 2193 /** 2194 * Constant for the "Carian" Unicode character block. 2195 * @since 1.7 2196 */ 2197 public static final UnicodeBlock CARIAN = 2198 new UnicodeBlock("CARIAN"); 2199 2200 /** 2201 * Constant for the "Old Persian" Unicode character block. 2202 * @since 1.7 2203 */ 2204 public static final UnicodeBlock OLD_PERSIAN = 2205 new UnicodeBlock("OLD_PERSIAN", 2206 "OLD PERSIAN", 2207 "OLDPERSIAN"); 2208 2209 /** 2210 * Constant for the "Imperial Aramaic" Unicode character block. 2211 * @since 1.7 2212 */ 2213 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2214 new UnicodeBlock("IMPERIAL_ARAMAIC", 2215 "IMPERIAL ARAMAIC", 2216 "IMPERIALARAMAIC"); 2217 2218 /** 2219 * Constant for the "Phoenician" Unicode character block. 2220 * @since 1.7 2221 */ 2222 public static final UnicodeBlock PHOENICIAN = 2223 new UnicodeBlock("PHOENICIAN"); 2224 2225 /** 2226 * Constant for the "Lydian" Unicode character block. 2227 * @since 1.7 2228 */ 2229 public static final UnicodeBlock LYDIAN = 2230 new UnicodeBlock("LYDIAN"); 2231 2232 /** 2233 * Constant for the "Kharoshthi" Unicode character block. 2234 * @since 1.7 2235 */ 2236 public static final UnicodeBlock KHAROSHTHI = 2237 new UnicodeBlock("KHAROSHTHI"); 2238 2239 /** 2240 * Constant for the "Old South Arabian" Unicode character block. 2241 * @since 1.7 2242 */ 2243 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2244 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2245 "OLD SOUTH ARABIAN", 2246 "OLDSOUTHARABIAN"); 2247 2248 /** 2249 * Constant for the "Avestan" Unicode character block. 2250 * @since 1.7 2251 */ 2252 public static final UnicodeBlock AVESTAN = 2253 new UnicodeBlock("AVESTAN"); 2254 2255 /** 2256 * Constant for the "Inscriptional Parthian" Unicode character block. 2257 * @since 1.7 2258 */ 2259 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2260 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2261 "INSCRIPTIONAL PARTHIAN", 2262 "INSCRIPTIONALPARTHIAN"); 2263 2264 /** 2265 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2266 * @since 1.7 2267 */ 2268 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2269 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2270 "INSCRIPTIONAL PAHLAVI", 2271 "INSCRIPTIONALPAHLAVI"); 2272 2273 /** 2274 * Constant for the "Old Turkic" Unicode character block. 2275 * @since 1.7 2276 */ 2277 public static final UnicodeBlock OLD_TURKIC = 2278 new UnicodeBlock("OLD_TURKIC", 2279 "OLD TURKIC", 2280 "OLDTURKIC"); 2281 2282 /** 2283 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2284 * @since 1.7 2285 */ 2286 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2287 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2288 "RUMI NUMERAL SYMBOLS", 2289 "RUMINUMERALSYMBOLS"); 2290 2291 /** 2292 * Constant for the "Brahmi" Unicode character block. 2293 * @since 1.7 2294 */ 2295 public static final UnicodeBlock BRAHMI = 2296 new UnicodeBlock("BRAHMI"); 2297 2298 /** 2299 * Constant for the "Kaithi" Unicode character block. 2300 * @since 1.7 2301 */ 2302 public static final UnicodeBlock KAITHI = 2303 new UnicodeBlock("KAITHI"); 2304 2305 /** 2306 * Constant for the "Cuneiform" Unicode character block. 2307 * @since 1.7 2308 */ 2309 public static final UnicodeBlock CUNEIFORM = 2310 new UnicodeBlock("CUNEIFORM"); 2311 2312 /** 2313 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2314 * character block. 2315 * @since 1.7 2316 */ 2317 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2318 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2319 "CUNEIFORM NUMBERS AND PUNCTUATION", 2320 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2321 2322 /** 2323 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2324 * @since 1.7 2325 */ 2326 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2327 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2328 "EGYPTIAN HIEROGLYPHS", 2329 "EGYPTIANHIEROGLYPHS"); 2330 2331 /** 2332 * Constant for the "Bamum Supplement" Unicode character block. 2333 * @since 1.7 2334 */ 2335 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2336 new UnicodeBlock("BAMUM_SUPPLEMENT", 2337 "BAMUM SUPPLEMENT", 2338 "BAMUMSUPPLEMENT"); 2339 2340 /** 2341 * Constant for the "Kana Supplement" Unicode character block. 2342 * @since 1.7 2343 */ 2344 public static final UnicodeBlock KANA_SUPPLEMENT = 2345 new UnicodeBlock("KANA_SUPPLEMENT", 2346 "KANA SUPPLEMENT", 2347 "KANASUPPLEMENT"); 2348 2349 /** 2350 * Constant for the "Ancient Greek Musical Notation" Unicode character 2351 * block. 2352 * @since 1.7 2353 */ 2354 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2355 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2356 "ANCIENT GREEK MUSICAL NOTATION", 2357 "ANCIENTGREEKMUSICALNOTATION"); 2358 2359 /** 2360 * Constant for the "Counting Rod Numerals" Unicode character block. 2361 * @since 1.7 2362 */ 2363 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2364 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2365 "COUNTING ROD NUMERALS", 2366 "COUNTINGRODNUMERALS"); 2367 2368 /** 2369 * Constant for the "Mahjong Tiles" Unicode character block. 2370 * @since 1.7 2371 */ 2372 public static final UnicodeBlock MAHJONG_TILES = 2373 new UnicodeBlock("MAHJONG_TILES", 2374 "MAHJONG TILES", 2375 "MAHJONGTILES"); 2376 2377 /** 2378 * Constant for the "Domino Tiles" Unicode character block. 2379 * @since 1.7 2380 */ 2381 public static final UnicodeBlock DOMINO_TILES = 2382 new UnicodeBlock("DOMINO_TILES", 2383 "DOMINO TILES", 2384 "DOMINOTILES"); 2385 2386 /** 2387 * Constant for the "Playing Cards" Unicode character block. 2388 * @since 1.7 2389 */ 2390 public static final UnicodeBlock PLAYING_CARDS = 2391 new UnicodeBlock("PLAYING_CARDS", 2392 "PLAYING CARDS", 2393 "PLAYINGCARDS"); 2394 2395 /** 2396 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2397 * block. 2398 * @since 1.7 2399 */ 2400 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2401 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2402 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2403 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2404 2405 /** 2406 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2407 * block. 2408 * @since 1.7 2409 */ 2410 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2411 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2412 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2413 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2414 2415 /** 2416 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2417 * character block. 2418 * @since 1.7 2419 */ 2420 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2421 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2422 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2423 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2424 2425 /** 2426 * Constant for the "Emoticons" Unicode character block. 2427 * @since 1.7 2428 */ 2429 public static final UnicodeBlock EMOTICONS = 2430 new UnicodeBlock("EMOTICONS"); 2431 2432 /** 2433 * Constant for the "Transport And Map Symbols" Unicode character block. 2434 * @since 1.7 2435 */ 2436 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2437 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2438 "TRANSPORT AND MAP SYMBOLS", 2439 "TRANSPORTANDMAPSYMBOLS"); 2440 2441 /** 2442 * Constant for the "Alchemical Symbols" Unicode character block. 2443 * @since 1.7 2444 */ 2445 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2446 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2447 "ALCHEMICAL SYMBOLS", 2448 "ALCHEMICALSYMBOLS"); 2449 2450 /** 2451 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2452 * character block. 2453 * @since 1.7 2454 */ 2455 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2456 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2457 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2458 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2459 2460 /** 2461 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2462 * character block. 2463 * @since 1.7 2464 */ 2465 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2466 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2467 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2468 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2469 2470 private static final int blockStarts[] = { 2471 0x0000, // 0000..007F; Basic Latin 2472 0x0080, // 0080..00FF; Latin-1 Supplement 2473 0x0100, // 0100..017F; Latin Extended-A 2474 0x0180, // 0180..024F; Latin Extended-B 2475 0x0250, // 0250..02AF; IPA Extensions 2476 0x02B0, // 02B0..02FF; Spacing Modifier Letters 2477 0x0300, // 0300..036F; Combining Diacritical Marks 2478 0x0370, // 0370..03FF; Greek and Coptic 2479 0x0400, // 0400..04FF; Cyrillic 2480 0x0500, // 0500..052F; Cyrillic Supplement 2481 0x0530, // 0530..058F; Armenian 2482 0x0590, // 0590..05FF; Hebrew 2483 0x0600, // 0600..06FF; Arabic 2484 0x0700, // 0700..074F; Syriac 2485 0x0750, // 0750..077F; Arabic Supplement 2486 0x0780, // 0780..07BF; Thaana 2487 0x07C0, // 07C0..07FF; NKo 2488 0x0800, // 0800..083F; Samaritan 2489 0x0840, // 0840..085F; Mandaic 2490 0x0860, // unassigned 2491 0x0900, // 0900..097F; Devanagari 2492 0x0980, // 0980..09FF; Bengali 2493 0x0A00, // 0A00..0A7F; Gurmukhi 2494 0x0A80, // 0A80..0AFF; Gujarati 2495 0x0B00, // 0B00..0B7F; Oriya 2496 0x0B80, // 0B80..0BFF; Tamil 2497 0x0C00, // 0C00..0C7F; Telugu 2498 0x0C80, // 0C80..0CFF; Kannada 2499 0x0D00, // 0D00..0D7F; Malayalam 2500 0x0D80, // 0D80..0DFF; Sinhala 2501 0x0E00, // 0E00..0E7F; Thai 2502 0x0E80, // 0E80..0EFF; Lao 2503 0x0F00, // 0F00..0FFF; Tibetan 2504 0x1000, // 1000..109F; Myanmar 2505 0x10A0, // 10A0..10FF; Georgian 2506 0x1100, // 1100..11FF; Hangul Jamo 2507 0x1200, // 1200..137F; Ethiopic 2508 0x1380, // 1380..139F; Ethiopic Supplement 2509 0x13A0, // 13A0..13FF; Cherokee 2510 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 2511 0x1680, // 1680..169F; Ogham 2512 0x16A0, // 16A0..16FF; Runic 2513 0x1700, // 1700..171F; Tagalog 2514 0x1720, // 1720..173F; Hanunoo 2515 0x1740, // 1740..175F; Buhid 2516 0x1760, // 1760..177F; Tagbanwa 2517 0x1780, // 1780..17FF; Khmer 2518 0x1800, // 1800..18AF; Mongolian 2519 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 2520 0x1900, // 1900..194F; Limbu 2521 0x1950, // 1950..197F; Tai Le 2522 0x1980, // 1980..19DF; New Tai Lue 2523 0x19E0, // 19E0..19FF; Khmer Symbols 2524 0x1A00, // 1A00..1A1F; Buginese 2525 0x1A20, // 1A20..1AAF; Tai Tham 2526 0x1AB0, // unassigned 2527 0x1B00, // 1B00..1B7F; Balinese 2528 0x1B80, // 1B80..1BBF; Sundanese 2529 0x1BC0, // 1BC0..1BFF; Batak 2530 0x1C00, // 1C00..1C4F; Lepcha 2531 0x1C50, // 1C50..1C7F; Ol Chiki 2532 0x1C80, // unassigned 2533 0x1CD0, // 1CD0..1CFF; Vedic Extensions 2534 0x1D00, // 1D00..1D7F; Phonetic Extensions 2535 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 2536 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 2537 0x1E00, // 1E00..1EFF; Latin Extended Additional 2538 0x1F00, // 1F00..1FFF; Greek Extended 2539 0x2000, // 2000..206F; General Punctuation 2540 0x2070, // 2070..209F; Superscripts and Subscripts 2541 0x20A0, // 20A0..20CF; Currency Symbols 2542 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 2543 0x2100, // 2100..214F; Letterlike Symbols 2544 0x2150, // 2150..218F; Number Forms 2545 0x2190, // 2190..21FF; Arrows 2546 0x2200, // 2200..22FF; Mathematical Operators 2547 0x2300, // 2300..23FF; Miscellaneous Technical 2548 0x2400, // 2400..243F; Control Pictures 2549 0x2440, // 2440..245F; Optical Character Recognition 2550 0x2460, // 2460..24FF; Enclosed Alphanumerics 2551 0x2500, // 2500..257F; Box Drawing 2552 0x2580, // 2580..259F; Block Elements 2553 0x25A0, // 25A0..25FF; Geometric Shapes 2554 0x2600, // 2600..26FF; Miscellaneous Symbols 2555 0x2700, // 2700..27BF; Dingbats 2556 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 2557 0x27F0, // 27F0..27FF; Supplemental Arrows-A 2558 0x2800, // 2800..28FF; Braille Patterns 2559 0x2900, // 2900..297F; Supplemental Arrows-B 2560 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 2561 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 2562 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 2563 0x2C00, // 2C00..2C5F; Glagolitic 2564 0x2C60, // 2C60..2C7F; Latin Extended-C 2565 0x2C80, // 2C80..2CFF; Coptic 2566 0x2D00, // 2D00..2D2F; Georgian Supplement 2567 0x2D30, // 2D30..2D7F; Tifinagh 2568 0x2D80, // 2D80..2DDF; Ethiopic Extended 2569 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 2570 0x2E00, // 2E00..2E7F; Supplemental Punctuation 2571 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 2572 0x2F00, // 2F00..2FDF; Kangxi Radicals 2573 0x2FE0, // unassigned 2574 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 2575 0x3000, // 3000..303F; CJK Symbols and Punctuation 2576 0x3040, // 3040..309F; Hiragana 2577 0x30A0, // 30A0..30FF; Katakana 2578 0x3100, // 3100..312F; Bopomofo 2579 0x3130, // 3130..318F; Hangul Compatibility Jamo 2580 0x3190, // 3190..319F; Kanbun 2581 0x31A0, // 31A0..31BF; Bopomofo Extended 2582 0x31C0, // 31C0..31EF; CJK Strokes 2583 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 2584 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 2585 0x3300, // 3300..33FF; CJK Compatibility 2586 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 2587 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 2588 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 2589 0xA000, // A000..A48F; Yi Syllables 2590 0xA490, // A490..A4CF; Yi Radicals 2591 0xA4D0, // A4D0..A4FF; Lisu 2592 0xA500, // A500..A63F; Vai 2593 0xA640, // A640..A69F; Cyrillic Extended-B 2594 0xA6A0, // A6A0..A6FF; Bamum 2595 0xA700, // A700..A71F; Modifier Tone Letters 2596 0xA720, // A720..A7FF; Latin Extended-D 2597 0xA800, // A800..A82F; Syloti Nagri 2598 0xA830, // A830..A83F; Common Indic Number Forms 2599 0xA840, // A840..A87F; Phags-pa 2600 0xA880, // A880..A8DF; Saurashtra 2601 0xA8E0, // A8E0..A8FF; Devanagari Extended 2602 0xA900, // A900..A92F; Kayah Li 2603 0xA930, // A930..A95F; Rejang 2604 0xA960, // A960..A97F; Hangul Jamo Extended-A 2605 0xA980, // A980..A9DF; Javanese 2606 0xA9E0, // unassigned 2607 0xAA00, // AA00..AA5F; Cham 2608 0xAA60, // AA60..AA7F; Myanmar Extended-A 2609 0xAA80, // AA80..AADF; Tai Viet 2610 0xAAE0, // unassigned 2611 0xAB00, // AB00..AB2F; Ethiopic Extended-A 2612 0xAB30, // unassigned 2613 0xABC0, // ABC0..ABFF; Meetei Mayek 2614 0xAC00, // AC00..D7AF; Hangul Syllables 2615 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 2616 0xD800, // D800..DB7F; High Surrogates 2617 0xDB80, // DB80..DBFF; High Private Use Surrogates 2618 0xDC00, // DC00..DFFF; Low Surrogates 2619 0xE000, // E000..F8FF; Private Use Area 2620 0xF900, // F900..FAFF; CJK Compatibility Ideographs 2621 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 2622 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 2623 0xFE00, // FE00..FE0F; Variation Selectors 2624 0xFE10, // FE10..FE1F; Vertical Forms 2625 0xFE20, // FE20..FE2F; Combining Half Marks 2626 0xFE30, // FE30..FE4F; CJK Compatibility Forms 2627 0xFE50, // FE50..FE6F; Small Form Variants 2628 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 2629 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 2630 0xFFF0, // FFF0..FFFF; Specials 2631 0x10000, // 10000..1007F; Linear B Syllabary 2632 0x10080, // 10080..100FF; Linear B Ideograms 2633 0x10100, // 10100..1013F; Aegean Numbers 2634 0x10140, // 10140..1018F; Ancient Greek Numbers 2635 0x10190, // 10190..101CF; Ancient Symbols 2636 0x101D0, // 101D0..101FF; Phaistos Disc 2637 0x10200, // unassigned 2638 0x10280, // 10280..1029F; Lycian 2639 0x102A0, // 102A0..102DF; Carian 2640 0x102E0, // unassigned 2641 0x10300, // 10300..1032F; Old Italic 2642 0x10330, // 10330..1034F; Gothic 2643 0x10350, // unassigned 2644 0x10380, // 10380..1039F; Ugaritic 2645 0x103A0, // 103A0..103DF; Old Persian 2646 0x103E0, // unassigned 2647 0x10400, // 10400..1044F; Deseret 2648 0x10450, // 10450..1047F; Shavian 2649 0x10480, // 10480..104AF; Osmanya 2650 0x104B0, // unassigned 2651 0x10800, // 10800..1083F; Cypriot Syllabary 2652 0x10840, // 10840..1085F; Imperial Aramaic 2653 0x10860, // unassigned 2654 0x10900, // 10900..1091F; Phoenician 2655 0x10920, // 10920..1093F; Lydian 2656 0x10940, // unassigned 2657 0x10A00, // 10A00..10A5F; Kharoshthi 2658 0x10A60, // 10A60..10A7F; Old South Arabian 2659 0x10A80, // unassigned 2660 0x10B00, // 10B00..10B3F; Avestan 2661 0x10B40, // 10B40..10B5F; Inscriptional Parthian 2662 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 2663 0x10B80, // unassigned 2664 0x10C00, // 10C00..10C4F; Old Turkic 2665 0x10C50, // unassigned 2666 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 2667 0x10E80, // unassigned 2668 0x11000, // 11000..1107F; Brahmi 2669 0x11080, // 11080..110CF; Kaithi 2670 0x110D0, // unassigned 2671 0x12000, // 12000..123FF; Cuneiform 2672 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 2673 0x12480, // unassigned 2674 0x13000, // 13000..1342F; Egyptian Hieroglyphs 2675 0x13430, // unassigned 2676 0x16800, // 16800..16A3F; Bamum Supplement 2677 0x16A40, // unassigned 2678 0x1B000, // 1B000..1B0FF; Kana Supplement 2679 0x1B100, // unassigned 2680 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 2681 0x1D100, // 1D100..1D1FF; Musical Symbols 2682 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 2683 0x1D250, // unassigned 2684 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 2685 0x1D360, // 1D360..1D37F; Counting Rod Numerals 2686 0x1D380, // unassigned 2687 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 2688 0x1D800, // unassigned 2689 0x1F000, // 1F000..1F02F; Mahjong Tiles 2690 0x1F030, // 1F030..1F09F; Domino Tiles 2691 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 2692 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 2693 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 2694 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs 2695 0x1F600, // 1F600..1F64F; Emoticons 2696 0x1F650, // unassigned 2697 0x1F680, // 1F680..1F6FF; Transport And Map Symbols 2698 0x1F700, // 1F700..1F77F; Alchemical Symbols 2699 0x1F780, // unassigned 2700 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 2701 0x2A6E0, // unassigned 2702 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 2703 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 2704 0x2B820, // unassigned 2705 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2706 0x2FA20, // unassigned 2707 0xE0000, // E0000..E007F; Tags 2708 0xE0080, // unassigned 2709 0xE0100, // E0100..E01EF; Variation Selectors Supplement 2710 0xE01F0, // unassigned 2711 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 2712 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B 2713 }; 2714 2715 private static final UnicodeBlock[] blocks = { 2716 BASIC_LATIN, 2717 LATIN_1_SUPPLEMENT, 2718 LATIN_EXTENDED_A, 2719 LATIN_EXTENDED_B, 2720 IPA_EXTENSIONS, 2721 SPACING_MODIFIER_LETTERS, 2722 COMBINING_DIACRITICAL_MARKS, 2723 GREEK, 2724 CYRILLIC, 2725 CYRILLIC_SUPPLEMENTARY, 2726 ARMENIAN, 2727 HEBREW, 2728 ARABIC, 2729 SYRIAC, 2730 ARABIC_SUPPLEMENT, 2731 THAANA, 2732 NKO, 2733 SAMARITAN, 2734 MANDAIC, 2735 null, 2736 DEVANAGARI, 2737 BENGALI, 2738 GURMUKHI, 2739 GUJARATI, 2740 ORIYA, 2741 TAMIL, 2742 TELUGU, 2743 KANNADA, 2744 MALAYALAM, 2745 SINHALA, 2746 THAI, 2747 LAO, 2748 TIBETAN, 2749 MYANMAR, 2750 GEORGIAN, 2751 HANGUL_JAMO, 2752 ETHIOPIC, 2753 ETHIOPIC_SUPPLEMENT, 2754 CHEROKEE, 2755 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 2756 OGHAM, 2757 RUNIC, 2758 TAGALOG, 2759 HANUNOO, 2760 BUHID, 2761 TAGBANWA, 2762 KHMER, 2763 MONGOLIAN, 2764 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 2765 LIMBU, 2766 TAI_LE, 2767 NEW_TAI_LUE, 2768 KHMER_SYMBOLS, 2769 BUGINESE, 2770 TAI_THAM, 2771 null, 2772 BALINESE, 2773 SUNDANESE, 2774 BATAK, 2775 LEPCHA, 2776 OL_CHIKI, 2777 null, 2778 VEDIC_EXTENSIONS, 2779 PHONETIC_EXTENSIONS, 2780 PHONETIC_EXTENSIONS_SUPPLEMENT, 2781 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 2782 LATIN_EXTENDED_ADDITIONAL, 2783 GREEK_EXTENDED, 2784 GENERAL_PUNCTUATION, 2785 SUPERSCRIPTS_AND_SUBSCRIPTS, 2786 CURRENCY_SYMBOLS, 2787 COMBINING_MARKS_FOR_SYMBOLS, 2788 LETTERLIKE_SYMBOLS, 2789 NUMBER_FORMS, 2790 ARROWS, 2791 MATHEMATICAL_OPERATORS, 2792 MISCELLANEOUS_TECHNICAL, 2793 CONTROL_PICTURES, 2794 OPTICAL_CHARACTER_RECOGNITION, 2795 ENCLOSED_ALPHANUMERICS, 2796 BOX_DRAWING, 2797 BLOCK_ELEMENTS, 2798 GEOMETRIC_SHAPES, 2799 MISCELLANEOUS_SYMBOLS, 2800 DINGBATS, 2801 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 2802 SUPPLEMENTAL_ARROWS_A, 2803 BRAILLE_PATTERNS, 2804 SUPPLEMENTAL_ARROWS_B, 2805 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 2806 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 2807 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 2808 GLAGOLITIC, 2809 LATIN_EXTENDED_C, 2810 COPTIC, 2811 GEORGIAN_SUPPLEMENT, 2812 TIFINAGH, 2813 ETHIOPIC_EXTENDED, 2814 CYRILLIC_EXTENDED_A, 2815 SUPPLEMENTAL_PUNCTUATION, 2816 CJK_RADICALS_SUPPLEMENT, 2817 KANGXI_RADICALS, 2818 null, 2819 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 2820 CJK_SYMBOLS_AND_PUNCTUATION, 2821 HIRAGANA, 2822 KATAKANA, 2823 BOPOMOFO, 2824 HANGUL_COMPATIBILITY_JAMO, 2825 KANBUN, 2826 BOPOMOFO_EXTENDED, 2827 CJK_STROKES, 2828 KATAKANA_PHONETIC_EXTENSIONS, 2829 ENCLOSED_CJK_LETTERS_AND_MONTHS, 2830 CJK_COMPATIBILITY, 2831 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 2832 YIJING_HEXAGRAM_SYMBOLS, 2833 CJK_UNIFIED_IDEOGRAPHS, 2834 YI_SYLLABLES, 2835 YI_RADICALS, 2836 LISU, 2837 VAI, 2838 CYRILLIC_EXTENDED_B, 2839 BAMUM, 2840 MODIFIER_TONE_LETTERS, 2841 LATIN_EXTENDED_D, 2842 SYLOTI_NAGRI, 2843 COMMON_INDIC_NUMBER_FORMS, 2844 PHAGS_PA, 2845 SAURASHTRA, 2846 DEVANAGARI_EXTENDED, 2847 KAYAH_LI, 2848 REJANG, 2849 HANGUL_JAMO_EXTENDED_A, 2850 JAVANESE, 2851 null, 2852 CHAM, 2853 MYANMAR_EXTENDED_A, 2854 TAI_VIET, 2855 null, 2856 ETHIOPIC_EXTENDED_A, 2857 null, 2858 MEETEI_MAYEK, 2859 HANGUL_SYLLABLES, 2860 HANGUL_JAMO_EXTENDED_B, 2861 HIGH_SURROGATES, 2862 HIGH_PRIVATE_USE_SURROGATES, 2863 LOW_SURROGATES, 2864 PRIVATE_USE_AREA, 2865 CJK_COMPATIBILITY_IDEOGRAPHS, 2866 ALPHABETIC_PRESENTATION_FORMS, 2867 ARABIC_PRESENTATION_FORMS_A, 2868 VARIATION_SELECTORS, 2869 VERTICAL_FORMS, 2870 COMBINING_HALF_MARKS, 2871 CJK_COMPATIBILITY_FORMS, 2872 SMALL_FORM_VARIANTS, 2873 ARABIC_PRESENTATION_FORMS_B, 2874 HALFWIDTH_AND_FULLWIDTH_FORMS, 2875 SPECIALS, 2876 LINEAR_B_SYLLABARY, 2877 LINEAR_B_IDEOGRAMS, 2878 AEGEAN_NUMBERS, 2879 ANCIENT_GREEK_NUMBERS, 2880 ANCIENT_SYMBOLS, 2881 PHAISTOS_DISC, 2882 null, 2883 LYCIAN, 2884 CARIAN, 2885 null, 2886 OLD_ITALIC, 2887 GOTHIC, 2888 null, 2889 UGARITIC, 2890 OLD_PERSIAN, 2891 null, 2892 DESERET, 2893 SHAVIAN, 2894 OSMANYA, 2895 null, 2896 CYPRIOT_SYLLABARY, 2897 IMPERIAL_ARAMAIC, 2898 null, 2899 PHOENICIAN, 2900 LYDIAN, 2901 null, 2902 KHAROSHTHI, 2903 OLD_SOUTH_ARABIAN, 2904 null, 2905 AVESTAN, 2906 INSCRIPTIONAL_PARTHIAN, 2907 INSCRIPTIONAL_PAHLAVI, 2908 null, 2909 OLD_TURKIC, 2910 null, 2911 RUMI_NUMERAL_SYMBOLS, 2912 null, 2913 BRAHMI, 2914 KAITHI, 2915 null, 2916 CUNEIFORM, 2917 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 2918 null, 2919 EGYPTIAN_HIEROGLYPHS, 2920 null, 2921 BAMUM_SUPPLEMENT, 2922 null, 2923 KANA_SUPPLEMENT, 2924 null, 2925 BYZANTINE_MUSICAL_SYMBOLS, 2926 MUSICAL_SYMBOLS, 2927 ANCIENT_GREEK_MUSICAL_NOTATION, 2928 null, 2929 TAI_XUAN_JING_SYMBOLS, 2930 COUNTING_ROD_NUMERALS, 2931 null, 2932 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 2933 null, 2934 MAHJONG_TILES, 2935 DOMINO_TILES, 2936 PLAYING_CARDS, 2937 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 2938 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 2939 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 2940 EMOTICONS, 2941 null, 2942 TRANSPORT_AND_MAP_SYMBOLS, 2943 ALCHEMICAL_SYMBOLS, 2944 null, 2945 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 2946 null, 2947 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 2948 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 2949 null, 2950 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 2951 null, 2952 TAGS, 2953 null, 2954 VARIATION_SELECTORS_SUPPLEMENT, 2955 null, 2956 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 2957 SUPPLEMENTARY_PRIVATE_USE_AREA_B 2958 }; 2959 2960 2961 /** 2962 * Returns the object representing the Unicode block containing the 2963 * given character, or <code>null</code> if the character is not a 2964 * member of a defined block. 2965 * 2966 * <p><b>Note:</b> This method cannot handle 2967 * <a href="Character.html#supplementary"> supplementary 2968 * characters</a>. To support all Unicode characters, including 2969 * supplementary characters, use the {@link #of(int)} method. 2970 * 2971 * @param c The character in question 2972 * @return The <code>UnicodeBlock</code> instance representing the 2973 * Unicode block of which this character is a member, or 2974 * <code>null</code> if the character is not a member of any 2975 * Unicode block 2976 */ 2977 public static UnicodeBlock of(char c) { 2978 return of((int)c); 2979 } 2980 2981 /** 2982 * Returns the object representing the Unicode block 2983 * containing the given character (Unicode code point), or 2984 * <code>null</code> if the character is not a member of a 2985 * defined block. 2986 * 2987 * @param codePoint the character (Unicode code point) in question. 2988 * @return The <code>UnicodeBlock</code> instance representing the 2989 * Unicode block of which this character is a member, or 2990 * <code>null</code> if the character is not a member of any 2991 * Unicode block 2992 * @exception IllegalArgumentException if the specified 2993 * <code>codePoint</code> is an invalid Unicode code point. 2994 * @see Character#isValidCodePoint(int) 2995 * @since 1.5 2996 */ 2997 public static UnicodeBlock of(int codePoint) { 2998 if (!isValidCodePoint(codePoint)) { 2999 throw new IllegalArgumentException(); 3000 } 3001 3002 int top, bottom, current; 3003 bottom = 0; 3004 top = blockStarts.length; 3005 current = top/2; 3006 3007 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 3008 while (top - bottom > 1) { 3009 if (codePoint >= blockStarts[current]) { 3010 bottom = current; 3011 } else { 3012 top = current; 3013 } 3014 current = (top + bottom) / 2; 3015 } 3016 return blocks[current]; 3017 } 3018 3019 /** 3020 * Returns the UnicodeBlock with the given name. Block 3021 * names are determined by The Unicode Standard. The file 3022 * Blocks-<version>.txt defines blocks for a particular 3023 * version of the standard. The {@link Character} class specifies 3024 * the version of the standard that it supports. 3025 * <p> 3026 * This method accepts block names in the following forms: 3027 * <ol> 3028 * <li> Canonical block names as defined by the Unicode Standard. 3029 * For example, the standard defines a "Basic Latin" block. Therefore, this 3030 * method accepts "Basic Latin" as a valid block name. The documentation of 3031 * each UnicodeBlock provides the canonical name. 3032 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 3033 * is a valid block name for the "Basic Latin" block. 3034 * <li>The text representation of each constant UnicodeBlock identifier. 3035 * For example, this method will return the {@link #BASIC_LATIN} block if 3036 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 3037 * hyphens in the canonical name with underscores. 3038 * </ol> 3039 * Finally, character case is ignored for all of the valid block name forms. 3040 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 3041 * The en_US locale's case mapping rules are used to provide case-insensitive 3042 * string comparisons for block name validation. 3043 * <p> 3044 * If the Unicode Standard changes block names, both the previous and 3045 * current names will be accepted. 3046 * 3047 * @param blockName A <code>UnicodeBlock</code> name. 3048 * @return The <code>UnicodeBlock</code> instance identified 3049 * by <code>blockName</code> 3050 * @throws IllegalArgumentException if <code>blockName</code> is an 3051 * invalid name 3052 * @throws NullPointerException if <code>blockName</code> is null 3053 * @since 1.5 3054 */ 3055 public static final UnicodeBlock forName(String blockName) { 3056 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 3057 if (block == null) { 3058 throw new IllegalArgumentException(); 3059 } 3060 return block; 3061 } 3062 } 3063 3064 3065 /** 3066 * A family of character subsets representing the character scripts 3067 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 3068 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 3069 * character is assigned to a single Unicode script, either a specific 3070 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 3071 * one of the following three special values, 3072 * {@link Character.UnicodeScript#INHERITED Inherited}, 3073 * {@link Character.UnicodeScript#COMMON Common} or 3074 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 3075 * 3076 * @since 1.7 3077 */ 3078 public static enum UnicodeScript { 3079 /** 3080 * Unicode script "Common". 3081 */ 3082 COMMON, 3083 3084 /** 3085 * Unicode script "Latin". 3086 */ 3087 LATIN, 3088 3089 /** 3090 * Unicode script "Greek". 3091 */ 3092 GREEK, 3093 3094 /** 3095 * Unicode script "Cyrillic". 3096 */ 3097 CYRILLIC, 3098 3099 /** 3100 * Unicode script "Armenian". 3101 */ 3102 ARMENIAN, 3103 3104 /** 3105 * Unicode script "Hebrew". 3106 */ 3107 HEBREW, 3108 3109 /** 3110 * Unicode script "Arabic". 3111 */ 3112 ARABIC, 3113 3114 /** 3115 * Unicode script "Syriac". 3116 */ 3117 SYRIAC, 3118 3119 /** 3120 * Unicode script "Thaana". 3121 */ 3122 THAANA, 3123 3124 /** 3125 * Unicode script "Devanagari". 3126 */ 3127 DEVANAGARI, 3128 3129 /** 3130 * Unicode script "Bengali". 3131 */ 3132 BENGALI, 3133 3134 /** 3135 * Unicode script "Gurmukhi". 3136 */ 3137 GURMUKHI, 3138 3139 /** 3140 * Unicode script "Gujarati". 3141 */ 3142 GUJARATI, 3143 3144 /** 3145 * Unicode script "Oriya". 3146 */ 3147 ORIYA, 3148 3149 /** 3150 * Unicode script "Tamil". 3151 */ 3152 TAMIL, 3153 3154 /** 3155 * Unicode script "Telugu". 3156 */ 3157 TELUGU, 3158 3159 /** 3160 * Unicode script "Kannada". 3161 */ 3162 KANNADA, 3163 3164 /** 3165 * Unicode script "Malayalam". 3166 */ 3167 MALAYALAM, 3168 3169 /** 3170 * Unicode script "Sinhala". 3171 */ 3172 SINHALA, 3173 3174 /** 3175 * Unicode script "Thai". 3176 */ 3177 THAI, 3178 3179 /** 3180 * Unicode script "Lao". 3181 */ 3182 LAO, 3183 3184 /** 3185 * Unicode script "Tibetan". 3186 */ 3187 TIBETAN, 3188 3189 /** 3190 * Unicode script "Myanmar". 3191 */ 3192 MYANMAR, 3193 3194 /** 3195 * Unicode script "Georgian". 3196 */ 3197 GEORGIAN, 3198 3199 /** 3200 * Unicode script "Hangul". 3201 */ 3202 HANGUL, 3203 3204 /** 3205 * Unicode script "Ethiopic". 3206 */ 3207 ETHIOPIC, 3208 3209 /** 3210 * Unicode script "Cherokee". 3211 */ 3212 CHEROKEE, 3213 3214 /** 3215 * Unicode script "Canadian_Aboriginal". 3216 */ 3217 CANADIAN_ABORIGINAL, 3218 3219 /** 3220 * Unicode script "Ogham". 3221 */ 3222 OGHAM, 3223 3224 /** 3225 * Unicode script "Runic". 3226 */ 3227 RUNIC, 3228 3229 /** 3230 * Unicode script "Khmer". 3231 */ 3232 KHMER, 3233 3234 /** 3235 * Unicode script "Mongolian". 3236 */ 3237 MONGOLIAN, 3238 3239 /** 3240 * Unicode script "Hiragana". 3241 */ 3242 HIRAGANA, 3243 3244 /** 3245 * Unicode script "Katakana". 3246 */ 3247 KATAKANA, 3248 3249 /** 3250 * Unicode script "Bopomofo". 3251 */ 3252 BOPOMOFO, 3253 3254 /** 3255 * Unicode script "Han". 3256 */ 3257 HAN, 3258 3259 /** 3260 * Unicode script "Yi". 3261 */ 3262 YI, 3263 3264 /** 3265 * Unicode script "Old_Italic". 3266 */ 3267 OLD_ITALIC, 3268 3269 /** 3270 * Unicode script "Gothic". 3271 */ 3272 GOTHIC, 3273 3274 /** 3275 * Unicode script "Deseret". 3276 */ 3277 DESERET, 3278 3279 /** 3280 * Unicode script "Inherited". 3281 */ 3282 INHERITED, 3283 3284 /** 3285 * Unicode script "Tagalog". 3286 */ 3287 TAGALOG, 3288 3289 /** 3290 * Unicode script "Hanunoo". 3291 */ 3292 HANUNOO, 3293 3294 /** 3295 * Unicode script "Buhid". 3296 */ 3297 BUHID, 3298 3299 /** 3300 * Unicode script "Tagbanwa". 3301 */ 3302 TAGBANWA, 3303 3304 /** 3305 * Unicode script "Limbu". 3306 */ 3307 LIMBU, 3308 3309 /** 3310 * Unicode script "Tai_Le". 3311 */ 3312 TAI_LE, 3313 3314 /** 3315 * Unicode script "Linear_B". 3316 */ 3317 LINEAR_B, 3318 3319 /** 3320 * Unicode script "Ugaritic". 3321 */ 3322 UGARITIC, 3323 3324 /** 3325 * Unicode script "Shavian". 3326 */ 3327 SHAVIAN, 3328 3329 /** 3330 * Unicode script "Osmanya". 3331 */ 3332 OSMANYA, 3333 3334 /** 3335 * Unicode script "Cypriot". 3336 */ 3337 CYPRIOT, 3338 3339 /** 3340 * Unicode script "Braille". 3341 */ 3342 BRAILLE, 3343 3344 /** 3345 * Unicode script "Buginese". 3346 */ 3347 BUGINESE, 3348 3349 /** 3350 * Unicode script "Coptic". 3351 */ 3352 COPTIC, 3353 3354 /** 3355 * Unicode script "New_Tai_Lue". 3356 */ 3357 NEW_TAI_LUE, 3358 3359 /** 3360 * Unicode script "Glagolitic". 3361 */ 3362 GLAGOLITIC, 3363 3364 /** 3365 * Unicode script "Tifinagh". 3366 */ 3367 TIFINAGH, 3368 3369 /** 3370 * Unicode script "Syloti_Nagri". 3371 */ 3372 SYLOTI_NAGRI, 3373 3374 /** 3375 * Unicode script "Old_Persian". 3376 */ 3377 OLD_PERSIAN, 3378 3379 /** 3380 * Unicode script "Kharoshthi". 3381 */ 3382 KHAROSHTHI, 3383 3384 /** 3385 * Unicode script "Balinese". 3386 */ 3387 BALINESE, 3388 3389 /** 3390 * Unicode script "Cuneiform". 3391 */ 3392 CUNEIFORM, 3393 3394 /** 3395 * Unicode script "Phoenician". 3396 */ 3397 PHOENICIAN, 3398 3399 /** 3400 * Unicode script "Phags_Pa". 3401 */ 3402 PHAGS_PA, 3403 3404 /** 3405 * Unicode script "Nko". 3406 */ 3407 NKO, 3408 3409 /** 3410 * Unicode script "Sundanese". 3411 */ 3412 SUNDANESE, 3413 3414 /** 3415 * Unicode script "Batak". 3416 */ 3417 BATAK, 3418 3419 /** 3420 * Unicode script "Lepcha". 3421 */ 3422 LEPCHA, 3423 3424 /** 3425 * Unicode script "Ol_Chiki". 3426 */ 3427 OL_CHIKI, 3428 3429 /** 3430 * Unicode script "Vai". 3431 */ 3432 VAI, 3433 3434 /** 3435 * Unicode script "Saurashtra". 3436 */ 3437 SAURASHTRA, 3438 3439 /** 3440 * Unicode script "Kayah_Li". 3441 */ 3442 KAYAH_LI, 3443 3444 /** 3445 * Unicode script "Rejang". 3446 */ 3447 REJANG, 3448 3449 /** 3450 * Unicode script "Lycian". 3451 */ 3452 LYCIAN, 3453 3454 /** 3455 * Unicode script "Carian". 3456 */ 3457 CARIAN, 3458 3459 /** 3460 * Unicode script "Lydian". 3461 */ 3462 LYDIAN, 3463 3464 /** 3465 * Unicode script "Cham". 3466 */ 3467 CHAM, 3468 3469 /** 3470 * Unicode script "Tai_Tham". 3471 */ 3472 TAI_THAM, 3473 3474 /** 3475 * Unicode script "Tai_Viet". 3476 */ 3477 TAI_VIET, 3478 3479 /** 3480 * Unicode script "Avestan". 3481 */ 3482 AVESTAN, 3483 3484 /** 3485 * Unicode script "Egyptian_Hieroglyphs". 3486 */ 3487 EGYPTIAN_HIEROGLYPHS, 3488 3489 /** 3490 * Unicode script "Samaritan". 3491 */ 3492 SAMARITAN, 3493 3494 /** 3495 * Unicode script "Mandaic". 3496 */ 3497 MANDAIC, 3498 3499 /** 3500 * Unicode script "Lisu". 3501 */ 3502 LISU, 3503 3504 /** 3505 * Unicode script "Bamum". 3506 */ 3507 BAMUM, 3508 3509 /** 3510 * Unicode script "Javanese". 3511 */ 3512 JAVANESE, 3513 3514 /** 3515 * Unicode script "Meetei_Mayek". 3516 */ 3517 MEETEI_MAYEK, 3518 3519 /** 3520 * Unicode script "Imperial_Aramaic". 3521 */ 3522 IMPERIAL_ARAMAIC, 3523 3524 /** 3525 * Unicode script "Old_South_Arabian". 3526 */ 3527 OLD_SOUTH_ARABIAN, 3528 3529 /** 3530 * Unicode script "Inscriptional_Parthian". 3531 */ 3532 INSCRIPTIONAL_PARTHIAN, 3533 3534 /** 3535 * Unicode script "Inscriptional_Pahlavi". 3536 */ 3537 INSCRIPTIONAL_PAHLAVI, 3538 3539 /** 3540 * Unicode script "Old_Turkic". 3541 */ 3542 OLD_TURKIC, 3543 3544 /** 3545 * Unicode script "Brahmi". 3546 */ 3547 BRAHMI, 3548 3549 /** 3550 * Unicode script "Kaithi". 3551 */ 3552 KAITHI, 3553 3554 /** 3555 * Unicode script "Unknown". 3556 */ 3557 UNKNOWN; 3558 3559 private static final int[] scriptStarts = { 3560 0x0000, // 0000..0040; COMMON 3561 0x0041, // 0041..005A; LATIN 3562 0x005B, // 005B..0060; COMMON 3563 0x0061, // 0061..007A; LATIN 3564 0x007B, // 007B..00A9; COMMON 3565 0x00AA, // 00AA..00AA; LATIN 3566 0x00AB, // 00AB..00B9; COMMON 3567 0x00BA, // 00BA..00BA; LATIN 3568 0x00BB, // 00BB..00BF; COMMON 3569 0x00C0, // 00C0..00D6; LATIN 3570 0x00D7, // 00D7..00D7; COMMON 3571 0x00D8, // 00D8..00F6; LATIN 3572 0x00F7, // 00F7..00F7; COMMON 3573 0x00F8, // 00F8..02B8; LATIN 3574 0x02B9, // 02B9..02DF; COMMON 3575 0x02E0, // 02E0..02E4; LATIN 3576 0x02E5, // 02E5..02E9; COMMON 3577 0x02EA, // 02EA..02EB; BOPOMOFO 3578 0x02EC, // 02EC..02FF; COMMON 3579 0x0300, // 0300..036F; INHERITED 3580 0x0370, // 0370..0373; GREEK 3581 0x0374, // 0374..0374; COMMON 3582 0x0375, // 0375..037D; GREEK 3583 0x037E, // 037E..0383; COMMON 3584 0x0384, // 0384..0384; GREEK 3585 0x0385, // 0385..0385; COMMON 3586 0x0386, // 0386..0386; GREEK 3587 0x0387, // 0387..0387; COMMON 3588 0x0388, // 0388..03E1; GREEK 3589 0x03E2, // 03E2..03EF; COPTIC 3590 0x03F0, // 03F0..03FF; GREEK 3591 0x0400, // 0400..0484; CYRILLIC 3592 0x0485, // 0485..0486; INHERITED 3593 0x0487, // 0487..0530; CYRILLIC 3594 0x0531, // 0531..0588; ARMENIAN 3595 0x0589, // 0589..0589; COMMON 3596 0x058A, // 058A..0590; ARMENIAN 3597 0x0591, // 0591..05FF; HEBREW 3598 0x0600, // 0600..060B; ARABIC 3599 0x060C, // 060C..060C; COMMON 3600 0x060D, // 060D..061A; ARABIC 3601 0x061B, // 061B..061D; COMMON 3602 0x061E, // 061E..061E; ARABIC 3603 0x061F, // 061F..061F; COMMON 3604 0x0620, // 0620..063F; ARABIC 3605 0x0640, // 0640..0640; COMMON 3606 0x0641, // 0641..064A; ARABIC 3607 0x064B, // 064B..0655; INHERITED 3608 0x0656, // 0656..065E; ARABIC 3609 0x065F, // 065F..065F; INHERITED 3610 0x0660, // 0660..0669; COMMON 3611 0x066A, // 066A..066F; ARABIC 3612 0x0670, // 0670..0670; INHERITED 3613 0x0671, // 0671..06DC; ARABIC 3614 0x06DD, // 06DD..06DD; COMMON 3615 0x06DE, // 06DE..06FF; ARABIC 3616 0x0700, // 0700..074F; SYRIAC 3617 0x0750, // 0750..077F; ARABIC 3618 0x0780, // 0780..07BF; THAANA 3619 0x07C0, // 07C0..07FF; NKO 3620 0x0800, // 0800..083F; SAMARITAN 3621 0x0840, // 0840..08FF; MANDAIC 3622 0x0900, // 0900..0950; DEVANAGARI 3623 0x0951, // 0951..0952; INHERITED 3624 0x0953, // 0953..0963; DEVANAGARI 3625 0x0964, // 0964..0965; COMMON 3626 0x0966, // 0966..096F; DEVANAGARI 3627 0x0970, // 0970..0970; COMMON 3628 0x0971, // 0971..0980; DEVANAGARI 3629 0x0981, // 0981..0A00; BENGALI 3630 0x0A01, // 0A01..0A80; GURMUKHI 3631 0x0A81, // 0A81..0B00; GUJARATI 3632 0x0B01, // 0B01..0B81; ORIYA 3633 0x0B82, // 0B82..0C00; TAMIL 3634 0x0C01, // 0C01..0C81; TELUGU 3635 0x0C82, // 0C82..0CF0; KANNADA 3636 0x0D02, // 0D02..0D81; MALAYALAM 3637 0x0D82, // 0D82..0E00; SINHALA 3638 0x0E01, // 0E01..0E3E; THAI 3639 0x0E3F, // 0E3F..0E3F; COMMON 3640 0x0E40, // 0E40..0E80; THAI 3641 0x0E81, // 0E81..0EFF; LAO 3642 0x0F00, // 0F00..0FD4; TIBETAN 3643 0x0FD5, // 0FD5..0FD8; COMMON 3644 0x0FD9, // 0FD9..0FFF; TIBETAN 3645 0x1000, // 1000..109F; MYANMAR 3646 0x10A0, // 10A0..10FA; GEORGIAN 3647 0x10FB, // 10FB..10FB; COMMON 3648 0x10FC, // 10FC..10FF; GEORGIAN 3649 0x1100, // 1100..11FF; HANGUL 3650 0x1200, // 1200..139F; ETHIOPIC 3651 0x13A0, // 13A0..13FF; CHEROKEE 3652 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 3653 0x1680, // 1680..169F; OGHAM 3654 0x16A0, // 16A0..16EA; RUNIC 3655 0x16EB, // 16EB..16ED; COMMON 3656 0x16EE, // 16EE..16FF; RUNIC 3657 0x1700, // 1700..171F; TAGALOG 3658 0x1720, // 1720..1734; HANUNOO 3659 0x1735, // 1735..173F; COMMON 3660 0x1740, // 1740..175F; BUHID 3661 0x1760, // 1760..177F; TAGBANWA 3662 0x1780, // 1780..17FF; KHMER 3663 0x1800, // 1800..1801; MONGOLIAN 3664 0x1802, // 1802..1803; COMMON 3665 0x1804, // 1804..1804; MONGOLIAN 3666 0x1805, // 1805..1805; COMMON 3667 0x1806, // 1806..18AF; MONGOLIAN 3668 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL 3669 0x1900, // 1900..194F; LIMBU 3670 0x1950, // 1950..197F; TAI_LE 3671 0x1980, // 1980..19DF; NEW_TAI_LUE 3672 0x19E0, // 19E0..19FF; KHMER 3673 0x1A00, // 1A00..1A1F; BUGINESE 3674 0x1A20, // 1A20..1AFF; TAI_THAM 3675 0x1B00, // 1B00..1B7F; BALINESE 3676 0x1B80, // 1B80..1BBF; SUNDANESE 3677 0x1BC0, // 1BC0..1BFF; BATAK 3678 0x1C00, // 1C00..1C4F; LEPCHA 3679 0x1C50, // 1C50..1CCF; OL_CHIKI 3680 0x1CD0, // 1CD0..1CD2; INHERITED 3681 0x1CD3, // 1CD3..1CD3; COMMON 3682 0x1CD4, // 1CD4..1CE0; INHERITED 3683 0x1CE1, // 1CE1..1CE1; COMMON 3684 0x1CE2, // 1CE2..1CE8; INHERITED 3685 0x1CE9, // 1CE9..1CEC; COMMON 3686 0x1CED, // 1CED..1CED; INHERITED 3687 0x1CEE, // 1CEE..1CFF; COMMON 3688 0x1D00, // 1D00..1D25; LATIN 3689 0x1D26, // 1D26..1D2A; GREEK 3690 0x1D2B, // 1D2B..1D2B; CYRILLIC 3691 0x1D2C, // 1D2C..1D5C; LATIN 3692 0x1D5D, // 1D5D..1D61; GREEK 3693 0x1D62, // 1D62..1D65; LATIN 3694 0x1D66, // 1D66..1D6A; GREEK 3695 0x1D6B, // 1D6B..1D77; LATIN 3696 0x1D78, // 1D78..1D78; CYRILLIC 3697 0x1D79, // 1D79..1DBE; LATIN 3698 0x1DBF, // 1DBF..1DBF; GREEK 3699 0x1DC0, // 1DC0..1DFF; INHERITED 3700 0x1E00, // 1E00..1EFF; LATIN 3701 0x1F00, // 1F00..1FFF; GREEK 3702 0x2000, // 2000..200B; COMMON 3703 0x200C, // 200C..200D; INHERITED 3704 0x200E, // 200E..2070; COMMON 3705 0x2071, // 2071..2073; LATIN 3706 0x2074, // 2074..207E; COMMON 3707 0x207F, // 207F..207F; LATIN 3708 0x2080, // 2080..208F; COMMON 3709 0x2090, // 2090..209F; LATIN 3710 0x20A0, // 20A0..20CF; COMMON 3711 0x20D0, // 20D0..20FF; INHERITED 3712 0x2100, // 2100..2125; COMMON 3713 0x2126, // 2126..2126; GREEK 3714 0x2127, // 2127..2129; COMMON 3715 0x212A, // 212A..212B; LATIN 3716 0x212C, // 212C..2131; COMMON 3717 0x2132, // 2132..2132; LATIN 3718 0x2133, // 2133..214D; COMMON 3719 0x214E, // 214E..214E; LATIN 3720 0x214F, // 214F..215F; COMMON 3721 0x2160, // 2160..2188; LATIN 3722 0x2189, // 2189..27FF; COMMON 3723 0x2800, // 2800..28FF; BRAILLE 3724 0x2900, // 2900..2BFF; COMMON 3725 0x2C00, // 2C00..2C5F; GLAGOLITIC 3726 0x2C60, // 2C60..2C7F; LATIN 3727 0x2C80, // 2C80..2CFF; COPTIC 3728 0x2D00, // 2D00..2D2F; GEORGIAN 3729 0x2D30, // 2D30..2D7F; TIFINAGH 3730 0x2D80, // 2D80..2DDF; ETHIOPIC 3731 0x2DE0, // 2DE0..2DFF; CYRILLIC 3732 0x2E00, // 2E00..2E7F; COMMON 3733 0x2E80, // 2E80..2FEF; HAN 3734 0x2FF0, // 2FF0..3004; COMMON 3735 0x3005, // 3005..3005; HAN 3736 0x3006, // 3006..3006; COMMON 3737 0x3007, // 3007..3007; HAN 3738 0x3008, // 3008..3020; COMMON 3739 0x3021, // 3021..3029; HAN 3740 0x302A, // 302A..302D; INHERITED 3741 0x302E, // 302E..302F; HANGUL 3742 0x3030, // 3030..3037; COMMON 3743 0x3038, // 3038..303B; HAN 3744 0x303C, // 303C..3040; COMMON 3745 0x3041, // 3041..3098; HIRAGANA 3746 0x3099, // 3099..309A; INHERITED 3747 0x309B, // 309B..309C; COMMON 3748 0x309D, // 309D..309F; HIRAGANA 3749 0x30A0, // 30A0..30A0; COMMON 3750 0x30A1, // 30A1..30FA; KATAKANA 3751 0x30FB, // 30FB..30FC; COMMON 3752 0x30FD, // 30FD..3104; KATAKANA 3753 0x3105, // 3105..3130; BOPOMOFO 3754 0x3131, // 3131..318F; HANGUL 3755 0x3190, // 3190..319F; COMMON 3756 0x31A0, // 31A0..31BF; BOPOMOFO 3757 0x31C0, // 31C0..31EF; COMMON 3758 0x31F0, // 31F0..31FF; KATAKANA 3759 0x3200, // 3200..321F; HANGUL 3760 0x3220, // 3220..325F; COMMON 3761 0x3260, // 3260..327E; HANGUL 3762 0x327F, // 327F..32CF; COMMON 3763 0x32D0, // 32D0..3357; KATAKANA 3764 0x3358, // 3358..33FF; COMMON 3765 0x3400, // 3400..4DBF; HAN 3766 0x4DC0, // 4DC0..4DFF; COMMON 3767 0x4E00, // 4E00..9FFF; HAN 3768 0xA000, // A000..A4CF; YI 3769 0xA4D0, // A4D0..A4FF; LISU 3770 0xA500, // A500..A63F; VAI 3771 0xA640, // A640..A69F; CYRILLIC 3772 0xA6A0, // A6A0..A6FF; BAMUM 3773 0xA700, // A700..A721; COMMON 3774 0xA722, // A722..A787; LATIN 3775 0xA788, // A788..A78A; COMMON 3776 0xA78B, // A78B..A7FF; LATIN 3777 0xA800, // A800..A82F; SYLOTI_NAGRI 3778 0xA830, // A830..A83F; COMMON 3779 0xA840, // A840..A87F; PHAGS_PA 3780 0xA880, // A880..A8DF; SAURASHTRA 3781 0xA8E0, // A8E0..A8FF; DEVANAGARI 3782 0xA900, // A900..A92F; KAYAH_LI 3783 0xA930, // A930..A95F; REJANG 3784 0xA960, // A960..A97F; HANGUL 3785 0xA980, // A980..A9FF; JAVANESE 3786 0xAA00, // AA00..AA5F; CHAM 3787 0xAA60, // AA60..AA7F; MYANMAR 3788 0xAA80, // AA80..AB00; TAI_VIET 3789 0xAB01, // AB01..ABBF; ETHIOPIC 3790 0xABC0, // ABC0..ABFF; MEETEI_MAYEK 3791 0xAC00, // AC00..D7FB; HANGUL 3792 0xD7FC, // D7FC..F8FF; UNKNOWN 3793 0xF900, // F900..FAFF; HAN 3794 0xFB00, // FB00..FB12; LATIN 3795 0xFB13, // FB13..FB1C; ARMENIAN 3796 0xFB1D, // FB1D..FB4F; HEBREW 3797 0xFB50, // FB50..FD3D; ARABIC 3798 0xFD3E, // FD3E..FD4F; COMMON 3799 0xFD50, // FD50..FDFC; ARABIC 3800 0xFDFD, // FDFD..FDFF; COMMON 3801 0xFE00, // FE00..FE0F; INHERITED 3802 0xFE10, // FE10..FE1F; COMMON 3803 0xFE20, // FE20..FE2F; INHERITED 3804 0xFE30, // FE30..FE6F; COMMON 3805 0xFE70, // FE70..FEFE; ARABIC 3806 0xFEFF, // FEFF..FF20; COMMON 3807 0xFF21, // FF21..FF3A; LATIN 3808 0xFF3B, // FF3B..FF40; COMMON 3809 0xFF41, // FF41..FF5A; LATIN 3810 0xFF5B, // FF5B..FF65; COMMON 3811 0xFF66, // FF66..FF6F; KATAKANA 3812 0xFF70, // FF70..FF70; COMMON 3813 0xFF71, // FF71..FF9D; KATAKANA 3814 0xFF9E, // FF9E..FF9F; COMMON 3815 0xFFA0, // FFA0..FFDF; HANGUL 3816 0xFFE0, // FFE0..FFFF; COMMON 3817 0x10000, // 10000..100FF; LINEAR_B 3818 0x10100, // 10100..1013F; COMMON 3819 0x10140, // 10140..1018F; GREEK 3820 0x10190, // 10190..101FC; COMMON 3821 0x101FD, // 101FD..1027F; INHERITED 3822 0x10280, // 10280..1029F; LYCIAN 3823 0x102A0, // 102A0..102FF; CARIAN 3824 0x10300, // 10300..1032F; OLD_ITALIC 3825 0x10330, // 10330..1037F; GOTHIC 3826 0x10380, // 10380..1039F; UGARITIC 3827 0x103A0, // 103A0..103FF; OLD_PERSIAN 3828 0x10400, // 10400..1044F; DESERET 3829 0x10450, // 10450..1047F; SHAVIAN 3830 0x10480, // 10480..107FF; OSMANYA 3831 0x10800, // 10800..1083F; CYPRIOT 3832 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC 3833 0x10900, // 10900..1091F; PHOENICIAN 3834 0x10920, // 10920..109FF; LYDIAN 3835 0x10A00, // 10A00..10A5F; KHAROSHTHI 3836 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN 3837 0x10B00, // 10B00..10B3F; AVESTAN 3838 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN 3839 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI 3840 0x10C00, // 10C00..10E5F; OLD_TURKIC 3841 0x10E60, // 10E60..10FFF; ARABIC 3842 0x11000, // 11000..1107F; BRAHMI 3843 0x11080, // 11080..11FFF; KAITHI 3844 0x12000, // 12000..12FFF; CUNEIFORM 3845 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS 3846 0x16800, // 16800..16A38; BAMUM 3847 0x1B000, // 1B000..1B000; KATAKANA 3848 0x1B001, // 1B001..1CFFF; HIRAGANA 3849 0x1D000, // 1D000..1D166; COMMON 3850 0x1D167, // 1D167..1D169; INHERITED 3851 0x1D16A, // 1D16A..1D17A; COMMON 3852 0x1D17B, // 1D17B..1D182; INHERITED 3853 0x1D183, // 1D183..1D184; COMMON 3854 0x1D185, // 1D185..1D18B; INHERITED 3855 0x1D18C, // 1D18C..1D1A9; COMMON 3856 0x1D1AA, // 1D1AA..1D1AD; INHERITED 3857 0x1D1AE, // 1D1AE..1D1FF; COMMON 3858 0x1D200, // 1D200..1D2FF; GREEK 3859 0x1D300, // 1D300..1F1FF; COMMON 3860 0x1F200, // 1F200..1F200; HIRAGANA 3861 0x1F201, // 1F210..1FFFF; COMMON 3862 0x20000, // 20000..E0000; HAN 3863 0xE0001, // E0001..E00FF; COMMON 3864 0xE0100, // E0100..E01EF; INHERITED 3865 0xE01F0 // E01F0..10FFFF; UNKNOWN 3866 3867 }; 3868 3869 private static final UnicodeScript[] scripts = { 3870 COMMON, 3871 LATIN, 3872 COMMON, 3873 LATIN, 3874 COMMON, 3875 LATIN, 3876 COMMON, 3877 LATIN, 3878 COMMON, 3879 LATIN, 3880 COMMON, 3881 LATIN, 3882 COMMON, 3883 LATIN, 3884 COMMON, 3885 LATIN, 3886 COMMON, 3887 BOPOMOFO, 3888 COMMON, 3889 INHERITED, 3890 GREEK, 3891 COMMON, 3892 GREEK, 3893 COMMON, 3894 GREEK, 3895 COMMON, 3896 GREEK, 3897 COMMON, 3898 GREEK, 3899 COPTIC, 3900 GREEK, 3901 CYRILLIC, 3902 INHERITED, 3903 CYRILLIC, 3904 ARMENIAN, 3905 COMMON, 3906 ARMENIAN, 3907 HEBREW, 3908 ARABIC, 3909 COMMON, 3910 ARABIC, 3911 COMMON, 3912 ARABIC, 3913 COMMON, 3914 ARABIC, 3915 COMMON, 3916 ARABIC, 3917 INHERITED, 3918 ARABIC, 3919 INHERITED, 3920 COMMON, 3921 ARABIC, 3922 INHERITED, 3923 ARABIC, 3924 COMMON, 3925 ARABIC, 3926 SYRIAC, 3927 ARABIC, 3928 THAANA, 3929 NKO, 3930 SAMARITAN, 3931 MANDAIC, 3932 DEVANAGARI, 3933 INHERITED, 3934 DEVANAGARI, 3935 COMMON, 3936 DEVANAGARI, 3937 COMMON, 3938 DEVANAGARI, 3939 BENGALI, 3940 GURMUKHI, 3941 GUJARATI, 3942 ORIYA, 3943 TAMIL, 3944 TELUGU, 3945 KANNADA, 3946 MALAYALAM, 3947 SINHALA, 3948 THAI, 3949 COMMON, 3950 THAI, 3951 LAO, 3952 TIBETAN, 3953 COMMON, 3954 TIBETAN, 3955 MYANMAR, 3956 GEORGIAN, 3957 COMMON, 3958 GEORGIAN, 3959 HANGUL, 3960 ETHIOPIC, 3961 CHEROKEE, 3962 CANADIAN_ABORIGINAL, 3963 OGHAM, 3964 RUNIC, 3965 COMMON, 3966 RUNIC, 3967 TAGALOG, 3968 HANUNOO, 3969 COMMON, 3970 BUHID, 3971 TAGBANWA, 3972 KHMER, 3973 MONGOLIAN, 3974 COMMON, 3975 MONGOLIAN, 3976 COMMON, 3977 MONGOLIAN, 3978 CANADIAN_ABORIGINAL, 3979 LIMBU, 3980 TAI_LE, 3981 NEW_TAI_LUE, 3982 KHMER, 3983 BUGINESE, 3984 TAI_THAM, 3985 BALINESE, 3986 SUNDANESE, 3987 BATAK, 3988 LEPCHA, 3989 OL_CHIKI, 3990 INHERITED, 3991 COMMON, 3992 INHERITED, 3993 COMMON, 3994 INHERITED, 3995 COMMON, 3996 INHERITED, 3997 COMMON, 3998 LATIN, 3999 GREEK, 4000 CYRILLIC, 4001 LATIN, 4002 GREEK, 4003 LATIN, 4004 GREEK, 4005 LATIN, 4006 CYRILLIC, 4007 LATIN, 4008 GREEK, 4009 INHERITED, 4010 LATIN, 4011 GREEK, 4012 COMMON, 4013 INHERITED, 4014 COMMON, 4015 LATIN, 4016 COMMON, 4017 LATIN, 4018 COMMON, 4019 LATIN, 4020 COMMON, 4021 INHERITED, 4022 COMMON, 4023 GREEK, 4024 COMMON, 4025 LATIN, 4026 COMMON, 4027 LATIN, 4028 COMMON, 4029 LATIN, 4030 COMMON, 4031 LATIN, 4032 COMMON, 4033 BRAILLE, 4034 COMMON, 4035 GLAGOLITIC, 4036 LATIN, 4037 COPTIC, 4038 GEORGIAN, 4039 TIFINAGH, 4040 ETHIOPIC, 4041 CYRILLIC, 4042 COMMON, 4043 HAN, 4044 COMMON, 4045 HAN, 4046 COMMON, 4047 HAN, 4048 COMMON, 4049 HAN, 4050 INHERITED, 4051 HANGUL, 4052 COMMON, 4053 HAN, 4054 COMMON, 4055 HIRAGANA, 4056 INHERITED, 4057 COMMON, 4058 HIRAGANA, 4059 COMMON, 4060 KATAKANA, 4061 COMMON, 4062 KATAKANA, 4063 BOPOMOFO, 4064 HANGUL, 4065 COMMON, 4066 BOPOMOFO, 4067 COMMON, 4068 KATAKANA, 4069 HANGUL, 4070 COMMON, 4071 HANGUL, 4072 COMMON, 4073 KATAKANA, 4074 COMMON, 4075 HAN, 4076 COMMON, 4077 HAN, 4078 YI, 4079 LISU, 4080 VAI, 4081 CYRILLIC, 4082 BAMUM, 4083 COMMON, 4084 LATIN, 4085 COMMON, 4086 LATIN, 4087 SYLOTI_NAGRI, 4088 COMMON, 4089 PHAGS_PA, 4090 SAURASHTRA, 4091 DEVANAGARI, 4092 KAYAH_LI, 4093 REJANG, 4094 HANGUL, 4095 JAVANESE, 4096 CHAM, 4097 MYANMAR, 4098 TAI_VIET, 4099 ETHIOPIC, 4100 MEETEI_MAYEK, 4101 HANGUL, 4102 UNKNOWN, 4103 HAN, 4104 LATIN, 4105 ARMENIAN, 4106 HEBREW, 4107 ARABIC, 4108 COMMON, 4109 ARABIC, 4110 COMMON, 4111 INHERITED, 4112 COMMON, 4113 INHERITED, 4114 COMMON, 4115 ARABIC, 4116 COMMON, 4117 LATIN, 4118 COMMON, 4119 LATIN, 4120 COMMON, 4121 KATAKANA, 4122 COMMON, 4123 KATAKANA, 4124 COMMON, 4125 HANGUL, 4126 COMMON, 4127 LINEAR_B, 4128 COMMON, 4129 GREEK, 4130 COMMON, 4131 INHERITED, 4132 LYCIAN, 4133 CARIAN, 4134 OLD_ITALIC, 4135 GOTHIC, 4136 UGARITIC, 4137 OLD_PERSIAN, 4138 DESERET, 4139 SHAVIAN, 4140 OSMANYA, 4141 CYPRIOT, 4142 IMPERIAL_ARAMAIC, 4143 PHOENICIAN, 4144 LYDIAN, 4145 KHAROSHTHI, 4146 OLD_SOUTH_ARABIAN, 4147 AVESTAN, 4148 INSCRIPTIONAL_PARTHIAN, 4149 INSCRIPTIONAL_PAHLAVI, 4150 OLD_TURKIC, 4151 ARABIC, 4152 BRAHMI, 4153 KAITHI, 4154 CUNEIFORM, 4155 EGYPTIAN_HIEROGLYPHS, 4156 BAMUM, 4157 KATAKANA, 4158 HIRAGANA, 4159 COMMON, 4160 INHERITED, 4161 COMMON, 4162 INHERITED, 4163 COMMON, 4164 INHERITED, 4165 COMMON, 4166 INHERITED, 4167 COMMON, 4168 GREEK, 4169 COMMON, 4170 HIRAGANA, 4171 COMMON, 4172 HAN, 4173 COMMON, 4174 INHERITED, 4175 UNKNOWN 4176 }; 4177 4178 private static HashMap<String, Character.UnicodeScript> aliases; 4179 static { 4180 aliases = new HashMap<>(128); 4181 aliases.put("ARAB", ARABIC); 4182 aliases.put("ARMI", IMPERIAL_ARAMAIC); 4183 aliases.put("ARMN", ARMENIAN); 4184 aliases.put("AVST", AVESTAN); 4185 aliases.put("BALI", BALINESE); 4186 aliases.put("BAMU", BAMUM); 4187 aliases.put("BENG", BENGALI); 4188 aliases.put("BOPO", BOPOMOFO); 4189 aliases.put("BRAI", BRAILLE); 4190 aliases.put("BUGI", BUGINESE); 4191 aliases.put("BUHD", BUHID); 4192 aliases.put("CANS", CANADIAN_ABORIGINAL); 4193 aliases.put("CARI", CARIAN); 4194 aliases.put("CHAM", CHAM); 4195 aliases.put("CHER", CHEROKEE); 4196 aliases.put("COPT", COPTIC); 4197 aliases.put("CPRT", CYPRIOT); 4198 aliases.put("CYRL", CYRILLIC); 4199 aliases.put("DEVA", DEVANAGARI); 4200 aliases.put("DSRT", DESERET); 4201 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 4202 aliases.put("ETHI", ETHIOPIC); 4203 aliases.put("GEOR", GEORGIAN); 4204 aliases.put("GLAG", GLAGOLITIC); 4205 aliases.put("GOTH", GOTHIC); 4206 aliases.put("GREK", GREEK); 4207 aliases.put("GUJR", GUJARATI); 4208 aliases.put("GURU", GURMUKHI); 4209 aliases.put("HANG", HANGUL); 4210 aliases.put("HANI", HAN); 4211 aliases.put("HANO", HANUNOO); 4212 aliases.put("HEBR", HEBREW); 4213 aliases.put("HIRA", HIRAGANA); 4214 // it appears we don't have the KATAKANA_OR_HIRAGANA 4215 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 4216 aliases.put("ITAL", OLD_ITALIC); 4217 aliases.put("JAVA", JAVANESE); 4218 aliases.put("KALI", KAYAH_LI); 4219 aliases.put("KANA", KATAKANA); 4220 aliases.put("KHAR", KHAROSHTHI); 4221 aliases.put("KHMR", KHMER); 4222 aliases.put("KNDA", KANNADA); 4223 aliases.put("KTHI", KAITHI); 4224 aliases.put("LANA", TAI_THAM); 4225 aliases.put("LAOO", LAO); 4226 aliases.put("LATN", LATIN); 4227 aliases.put("LEPC", LEPCHA); 4228 aliases.put("LIMB", LIMBU); 4229 aliases.put("LINB", LINEAR_B); 4230 aliases.put("LISU", LISU); 4231 aliases.put("LYCI", LYCIAN); 4232 aliases.put("LYDI", LYDIAN); 4233 aliases.put("MLYM", MALAYALAM); 4234 aliases.put("MONG", MONGOLIAN); 4235 aliases.put("MTEI", MEETEI_MAYEK); 4236 aliases.put("MYMR", MYANMAR); 4237 aliases.put("NKOO", NKO); 4238 aliases.put("OGAM", OGHAM); 4239 aliases.put("OLCK", OL_CHIKI); 4240 aliases.put("ORKH", OLD_TURKIC); 4241 aliases.put("ORYA", ORIYA); 4242 aliases.put("OSMA", OSMANYA); 4243 aliases.put("PHAG", PHAGS_PA); 4244 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 4245 aliases.put("PHNX", PHOENICIAN); 4246 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 4247 aliases.put("RJNG", REJANG); 4248 aliases.put("RUNR", RUNIC); 4249 aliases.put("SAMR", SAMARITAN); 4250 aliases.put("SARB", OLD_SOUTH_ARABIAN); 4251 aliases.put("SAUR", SAURASHTRA); 4252 aliases.put("SHAW", SHAVIAN); 4253 aliases.put("SINH", SINHALA); 4254 aliases.put("SUND", SUNDANESE); 4255 aliases.put("SYLO", SYLOTI_NAGRI); 4256 aliases.put("SYRC", SYRIAC); 4257 aliases.put("TAGB", TAGBANWA); 4258 aliases.put("TALE", TAI_LE); 4259 aliases.put("TALU", NEW_TAI_LUE); 4260 aliases.put("TAML", TAMIL); 4261 aliases.put("TAVT", TAI_VIET); 4262 aliases.put("TELU", TELUGU); 4263 aliases.put("TFNG", TIFINAGH); 4264 aliases.put("TGLG", TAGALOG); 4265 aliases.put("THAA", THAANA); 4266 aliases.put("THAI", THAI); 4267 aliases.put("TIBT", TIBETAN); 4268 aliases.put("UGAR", UGARITIC); 4269 aliases.put("VAII", VAI); 4270 aliases.put("XPEO", OLD_PERSIAN); 4271 aliases.put("XSUX", CUNEIFORM); 4272 aliases.put("YIII", YI); 4273 aliases.put("ZINH", INHERITED); 4274 aliases.put("ZYYY", COMMON); 4275 aliases.put("ZZZZ", UNKNOWN); 4276 } 4277 4278 /** 4279 * Returns the enum constant representing the Unicode script of which 4280 * the given character (Unicode code point) is assigned to. 4281 * 4282 * @param codePoint the character (Unicode code point) in question. 4283 * @return The <code>UnicodeScript</code> constant representing the 4284 * Unicode script of which this character is assigned to. 4285 * 4286 * @exception IllegalArgumentException if the specified 4287 * <code>codePoint</code> is an invalid Unicode code point. 4288 * @see Character#isValidCodePoint(int) 4289 * 4290 */ 4291 public static UnicodeScript of(int codePoint) { 4292 if (!isValidCodePoint(codePoint)) 4293 throw new IllegalArgumentException(); 4294 int type = getType(codePoint); 4295 // leave SURROGATE and PRIVATE_USE for table lookup 4296 if (type == UNASSIGNED) 4297 return UNKNOWN; 4298 int index = Arrays.binarySearch(scriptStarts, codePoint); 4299 if (index < 0) 4300 index = -index - 2; 4301 return scripts[index]; 4302 } 4303 4304 /** 4305 * Returns the UnicodeScript constant with the given Unicode script 4306 * name or the script name alias. Script names and their aliases are 4307 * determined by The Unicode Standard. The files Scripts<version>.txt 4308 * and PropertyValueAliases<version>.txt define script names 4309 * and the script name aliases for a particular version of the 4310 * standard. The {@link Character} class specifies the version of 4311 * the standard that it supports. 4312 * <p> 4313 * Character case is ignored for all of the valid script names. 4314 * The en_US locale's case mapping rules are used to provide 4315 * case-insensitive string comparisons for script name validation. 4316 * <p> 4317 * 4318 * @param scriptName A <code>UnicodeScript</code> name. 4319 * @return The <code>UnicodeScript</code> constant identified 4320 * by <code>scriptName</code> 4321 * @throws IllegalArgumentException if <code>scriptName</code> is an 4322 * invalid name 4323 * @throws NullPointerException if <code>scriptName</code> is null 4324 */ 4325 public static final UnicodeScript forName(String scriptName) { 4326 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 4327 //.replace(' ', '_')); 4328 UnicodeScript sc = aliases.get(scriptName); 4329 if (sc != null) 4330 return sc; 4331 return valueOf(scriptName); 4332 } 4333 } 4334 4335 /** 4336 * The value of the <code>Character</code>. 4337 * 4338 * @serial 4339 */ 4340 private final char value; 4341 4342 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 4343 private static final long serialVersionUID = 3786198910865385080L; 4344 4345 /** 4346 * Constructs a newly allocated <code>Character</code> object that 4347 * represents the specified <code>char</code> value. 4348 * 4349 * @param value the value to be represented by the 4350 * <code>Character</code> object. 4351 */ 4352 public Character(char value) { 4353 this.value = value; 4354 } 4355 4356 private static class CharacterCache { 4357 private CharacterCache(){} 4358 4359 static final Character cache[] = new Character[127 + 1]; 4360 4361 static { 4362 for (int i = 0; i < cache.length; i++) 4363 cache[i] = new Character((char)i); 4364 } 4365 } 4366 4367 /** 4368 * Returns a <tt>Character</tt> instance representing the specified 4369 * <tt>char</tt> value. 4370 * If a new <tt>Character</tt> instance is not required, this method 4371 * should generally be used in preference to the constructor 4372 * {@link #Character(char)}, as this method is likely to yield 4373 * significantly better space and time performance by caching 4374 * frequently requested values. 4375 * 4376 * This method will always cache values in the range {@code 4377 * '\u005Cu0000'} to {@code '\u005Cu007f'}, inclusive, and may 4378 * cache other values outside of this range. 4379 * 4380 * @param c a char value. 4381 * @return a <tt>Character</tt> instance representing <tt>c</tt>. 4382 * @since 1.5 4383 */ 4384 public static Character valueOf(char c) { 4385 if (c <= 127) { // must cache 4386 return CharacterCache.cache[(int)c]; 4387 } 4388 return new Character(c); 4389 } 4390 4391 /** 4392 * Returns the value of this <code>Character</code> object. 4393 * @return the primitive <code>char</code> value represented by 4394 * this object. 4395 */ 4396 public char charValue() { 4397 return value; 4398 } 4399 4400 /** 4401 * Returns a hash code for this {@code Character}; equal to the result 4402 * of invoking {@code charValue()}. 4403 * 4404 * @return a hash code value for this {@code Character} 4405 */ 4406 public int hashCode() { 4407 return (int)value; 4408 } 4409 4410 /** 4411 * Compares this object against the specified object. 4412 * The result is <code>true</code> if and only if the argument is not 4413 * <code>null</code> and is a <code>Character</code> object that 4414 * represents the same <code>char</code> value as this object. 4415 * 4416 * @param obj the object to compare with. 4417 * @return <code>true</code> if the objects are the same; 4418 * <code>false</code> otherwise. 4419 */ 4420 public boolean equals(Object obj) { 4421 if (obj instanceof Character) { 4422 return value == ((Character)obj).charValue(); 4423 } 4424 return false; 4425 } 4426 4427 /** 4428 * Returns a <code>String</code> object representing this 4429 * <code>Character</code>'s value. The result is a string of 4430 * length 1 whose sole component is the primitive 4431 * <code>char</code> value represented by this 4432 * <code>Character</code> object. 4433 * 4434 * @return a string representation of this object. 4435 */ 4436 public String toString() { 4437 char buf[] = {value}; 4438 return String.valueOf(buf); 4439 } 4440 4441 /** 4442 * Returns a <code>String</code> object representing the 4443 * specified <code>char</code>. The result is a string of length 4444 * 1 consisting solely of the specified <code>char</code>. 4445 * 4446 * @param c the <code>char</code> to be converted 4447 * @return the string representation of the specified <code>char</code> 4448 * @since 1.4 4449 */ 4450 public static String toString(char c) { 4451 return String.valueOf(c); 4452 } 4453 4454 /** 4455 * Determines whether the specified code point is a valid 4456 * <a href="http://www.unicode.org/glossary/#code_point"> 4457 * Unicode code point value</a>. 4458 * 4459 * @param codePoint the Unicode code point to be tested 4460 * @return {@code true} if the specified code point value is between 4461 * {@link #MIN_CODE_POINT} and 4462 * {@link #MAX_CODE_POINT} inclusive; 4463 * {@code false} otherwise. 4464 * @since 1.5 4465 */ 4466 public static boolean isValidCodePoint(int codePoint) { 4467 // Optimized form of: 4468 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 4469 int plane = codePoint >>> 16; 4470 return plane < ((MAX_CODE_POINT + 1) >>> 16); 4471 } 4472 4473 /** 4474 * Determines whether the specified character (Unicode code point) 4475 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 4476 * Such code points can be represented using a single {@code char}. 4477 * 4478 * @param codePoint the character (Unicode code point) to be tested 4479 * @return {@code true} if the specified code point is between 4480 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 4481 * {@code false} otherwise. 4482 * @since 1.7 4483 */ 4484 public static boolean isBmpCodePoint(int codePoint) { 4485 return codePoint >>> 16 == 0; 4486 // Optimized form of: 4487 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 4488 // We consistently use logical shift (>>>) to facilitate 4489 // additional runtime optimizations. 4490 } 4491 4492 /** 4493 * Determines whether the specified character (Unicode code point) 4494 * is in the <a href="#supplementary">supplementary character</a> range. 4495 * 4496 * @param codePoint the character (Unicode code point) to be tested 4497 * @return {@code true} if the specified code point is between 4498 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 4499 * {@link #MAX_CODE_POINT} inclusive; 4500 * {@code false} otherwise. 4501 * @since 1.5 4502 */ 4503 public static boolean isSupplementaryCodePoint(int codePoint) { 4504 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 4505 && codePoint < MAX_CODE_POINT + 1; 4506 } 4507 4508 /** 4509 * Determines if the given {@code char} value is a 4510 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4511 * Unicode high-surrogate code unit</a> 4512 * (also known as <i>leading-surrogate code unit</i>). 4513 * 4514 * <p>Such values do not represent characters by themselves, 4515 * but are used in the representation of 4516 * <a href="#supplementary">supplementary characters</a> 4517 * in the UTF-16 encoding. 4518 * 4519 * @param ch the {@code char} value to be tested. 4520 * @return {@code true} if the {@code char} value is between 4521 * {@link #MIN_HIGH_SURROGATE} and 4522 * {@link #MAX_HIGH_SURROGATE} inclusive; 4523 * {@code false} otherwise. 4524 * @see Character#isLowSurrogate(char) 4525 * @see Character.UnicodeBlock#of(int) 4526 * @since 1.5 4527 */ 4528 public static boolean isHighSurrogate(char ch) { 4529 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 4530 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 4531 } 4532 4533 /** 4534 * Determines if the given {@code char} value is a 4535 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4536 * Unicode low-surrogate code unit</a> 4537 * (also known as <i>trailing-surrogate code unit</i>). 4538 * 4539 * <p>Such values do not represent characters by themselves, 4540 * but are used in the representation of 4541 * <a href="#supplementary">supplementary characters</a> 4542 * in the UTF-16 encoding. 4543 * 4544 * @param ch the {@code char} value to be tested. 4545 * @return {@code true} if the {@code char} value is between 4546 * {@link #MIN_LOW_SURROGATE} and 4547 * {@link #MAX_LOW_SURROGATE} inclusive; 4548 * {@code false} otherwise. 4549 * @see Character#isHighSurrogate(char) 4550 * @since 1.5 4551 */ 4552 public static boolean isLowSurrogate(char ch) { 4553 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 4554 } 4555 4556 /** 4557 * Determines if the given {@code char} value is a Unicode 4558 * <i>surrogate code unit</i>. 4559 * 4560 * <p>Such values do not represent characters by themselves, 4561 * but are used in the representation of 4562 * <a href="#supplementary">supplementary characters</a> 4563 * in the UTF-16 encoding. 4564 * 4565 * <p>A char value is a surrogate code unit if and only if it is either 4566 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 4567 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 4568 * 4569 * @param ch the {@code char} value to be tested. 4570 * @return {@code true} if the {@code char} value is between 4571 * {@link #MIN_SURROGATE} and 4572 * {@link #MAX_SURROGATE} inclusive; 4573 * {@code false} otherwise. 4574 * @since 1.7 4575 */ 4576 public static boolean isSurrogate(char ch) { 4577 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 4578 } 4579 4580 /** 4581 * Determines whether the specified pair of <code>char</code> 4582 * values is a valid 4583 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4584 * Unicode surrogate pair</a>. 4585 4586 * <p>This method is equivalent to the expression: 4587 * <blockquote><pre> 4588 * isHighSurrogate(high) && isLowSurrogate(low) 4589 * </pre></blockquote> 4590 * 4591 * @param high the high-surrogate code value to be tested 4592 * @param low the low-surrogate code value to be tested 4593 * @return <code>true</code> if the specified high and 4594 * low-surrogate code values represent a valid surrogate pair; 4595 * <code>false</code> otherwise. 4596 * @since 1.5 4597 */ 4598 public static boolean isSurrogatePair(char high, char low) { 4599 return isHighSurrogate(high) && isLowSurrogate(low); 4600 } 4601 4602 /** 4603 * Determines the number of <code>char</code> values needed to 4604 * represent the specified character (Unicode code point). If the 4605 * specified character is equal to or greater than 0x10000, then 4606 * the method returns 2. Otherwise, the method returns 1. 4607 * 4608 * <p>This method doesn't validate the specified character to be a 4609 * valid Unicode code point. The caller must validate the 4610 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 4611 * if necessary. 4612 * 4613 * @param codePoint the character (Unicode code point) to be tested. 4614 * @return 2 if the character is a valid supplementary character; 1 otherwise. 4615 * @see Character#isSupplementaryCodePoint(int) 4616 * @since 1.5 4617 */ 4618 public static int charCount(int codePoint) { 4619 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 4620 } 4621 4622 /** 4623 * Converts the specified surrogate pair to its supplementary code 4624 * point value. This method does not validate the specified 4625 * surrogate pair. The caller must validate it using {@link 4626 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 4627 * 4628 * @param high the high-surrogate code unit 4629 * @param low the low-surrogate code unit 4630 * @return the supplementary code point composed from the 4631 * specified surrogate pair. 4632 * @since 1.5 4633 */ 4634 public static int toCodePoint(char high, char low) { 4635 // Optimized form of: 4636 // return ((high - MIN_HIGH_SURROGATE) << 10) 4637 // + (low - MIN_LOW_SURROGATE) 4638 // + MIN_SUPPLEMENTARY_CODE_POINT; 4639 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 4640 - (MIN_HIGH_SURROGATE << 10) 4641 - MIN_LOW_SURROGATE); 4642 } 4643 4644 /** 4645 * Returns the code point at the given index of the 4646 * <code>CharSequence</code>. If the <code>char</code> value at 4647 * the given index in the <code>CharSequence</code> is in the 4648 * high-surrogate range, the following index is less than the 4649 * length of the <code>CharSequence</code>, and the 4650 * <code>char</code> value at the following index is in the 4651 * low-surrogate range, then the supplementary code point 4652 * corresponding to this surrogate pair is returned. Otherwise, 4653 * the <code>char</code> value at the given index is returned. 4654 * 4655 * @param seq a sequence of <code>char</code> values (Unicode code 4656 * units) 4657 * @param index the index to the <code>char</code> values (Unicode 4658 * code units) in <code>seq</code> to be converted 4659 * @return the Unicode code point at the given index 4660 * @exception NullPointerException if <code>seq</code> is null. 4661 * @exception IndexOutOfBoundsException if the value 4662 * <code>index</code> is negative or not less than 4663 * {@link CharSequence#length() seq.length()}. 4664 * @since 1.5 4665 */ 4666 public static int codePointAt(CharSequence seq, int index) { 4667 char c1 = seq.charAt(index++); 4668 if (isHighSurrogate(c1)) { 4669 if (index < seq.length()) { 4670 char c2 = seq.charAt(index); 4671 if (isLowSurrogate(c2)) { 4672 return toCodePoint(c1, c2); 4673 } 4674 } 4675 } 4676 return c1; 4677 } 4678 4679 /** 4680 * Returns the code point at the given index of the 4681 * <code>char</code> array. If the <code>char</code> value at 4682 * the given index in the <code>char</code> array is in the 4683 * high-surrogate range, the following index is less than the 4684 * length of the <code>char</code> array, and the 4685 * <code>char</code> value at the following index is in the 4686 * low-surrogate range, then the supplementary code point 4687 * corresponding to this surrogate pair is returned. Otherwise, 4688 * the <code>char</code> value at the given index is returned. 4689 * 4690 * @param a the <code>char</code> array 4691 * @param index the index to the <code>char</code> values (Unicode 4692 * code units) in the <code>char</code> array to be converted 4693 * @return the Unicode code point at the given index 4694 * @exception NullPointerException if <code>a</code> is null. 4695 * @exception IndexOutOfBoundsException if the value 4696 * <code>index</code> is negative or not less than 4697 * the length of the <code>char</code> array. 4698 * @since 1.5 4699 */ 4700 public static int codePointAt(char[] a, int index) { 4701 return codePointAtImpl(a, index, a.length); 4702 } 4703 4704 /** 4705 * Returns the code point at the given index of the 4706 * <code>char</code> array, where only array elements with 4707 * <code>index</code> less than <code>limit</code> can be used. If 4708 * the <code>char</code> value at the given index in the 4709 * <code>char</code> array is in the high-surrogate range, the 4710 * following index is less than the <code>limit</code>, and the 4711 * <code>char</code> value at the following index is in the 4712 * low-surrogate range, then the supplementary code point 4713 * corresponding to this surrogate pair is returned. Otherwise, 4714 * the <code>char</code> value at the given index is returned. 4715 * 4716 * @param a the <code>char</code> array 4717 * @param index the index to the <code>char</code> values (Unicode 4718 * code units) in the <code>char</code> array to be converted 4719 * @param limit the index after the last array element that can be used in the 4720 * <code>char</code> array 4721 * @return the Unicode code point at the given index 4722 * @exception NullPointerException if <code>a</code> is null. 4723 * @exception IndexOutOfBoundsException if the <code>index</code> 4724 * argument is negative or not less than the <code>limit</code> 4725 * argument, or if the <code>limit</code> argument is negative or 4726 * greater than the length of the <code>char</code> array. 4727 * @since 1.5 4728 */ 4729 public static int codePointAt(char[] a, int index, int limit) { 4730 if (index >= limit || limit < 0 || limit > a.length) { 4731 throw new IndexOutOfBoundsException(); 4732 } 4733 return codePointAtImpl(a, index, limit); 4734 } 4735 4736 // throws ArrayIndexOutofBoundsException if index out of bounds 4737 static int codePointAtImpl(char[] a, int index, int limit) { 4738 char c1 = a[index++]; 4739 if (isHighSurrogate(c1)) { 4740 if (index < limit) { 4741 char c2 = a[index]; 4742 if (isLowSurrogate(c2)) { 4743 return toCodePoint(c1, c2); 4744 } 4745 } 4746 } 4747 return c1; 4748 } 4749 4750 /** 4751 * Returns the code point preceding the given index of the 4752 * <code>CharSequence</code>. If the <code>char</code> value at 4753 * <code>(index - 1)</code> in the <code>CharSequence</code> is in 4754 * the low-surrogate range, <code>(index - 2)</code> is not 4755 * negative, and the <code>char</code> value at <code>(index - 4756 * 2)</code> in the <code>CharSequence</code> is in the 4757 * high-surrogate range, then the supplementary code point 4758 * corresponding to this surrogate pair is returned. Otherwise, 4759 * the <code>char</code> value at <code>(index - 1)</code> is 4760 * returned. 4761 * 4762 * @param seq the <code>CharSequence</code> instance 4763 * @param index the index following the code point that should be returned 4764 * @return the Unicode code point value before the given index. 4765 * @exception NullPointerException if <code>seq</code> is null. 4766 * @exception IndexOutOfBoundsException if the <code>index</code> 4767 * argument is less than 1 or greater than {@link 4768 * CharSequence#length() seq.length()}. 4769 * @since 1.5 4770 */ 4771 public static int codePointBefore(CharSequence seq, int index) { 4772 char c2 = seq.charAt(--index); 4773 if (isLowSurrogate(c2)) { 4774 if (index > 0) { 4775 char c1 = seq.charAt(--index); 4776 if (isHighSurrogate(c1)) { 4777 return toCodePoint(c1, c2); 4778 } 4779 } 4780 } 4781 return c2; 4782 } 4783 4784 /** 4785 * Returns the code point preceding the given index of the 4786 * <code>char</code> array. If the <code>char</code> value at 4787 * <code>(index - 1)</code> in the <code>char</code> array is in 4788 * the low-surrogate range, <code>(index - 2)</code> is not 4789 * negative, and the <code>char</code> value at <code>(index - 4790 * 2)</code> in the <code>char</code> array is in the 4791 * high-surrogate range, then the supplementary code point 4792 * corresponding to this surrogate pair is returned. Otherwise, 4793 * the <code>char</code> value at <code>(index - 1)</code> is 4794 * returned. 4795 * 4796 * @param a the <code>char</code> array 4797 * @param index the index following the code point that should be returned 4798 * @return the Unicode code point value before the given index. 4799 * @exception NullPointerException if <code>a</code> is null. 4800 * @exception IndexOutOfBoundsException if the <code>index</code> 4801 * argument is less than 1 or greater than the length of the 4802 * <code>char</code> array 4803 * @since 1.5 4804 */ 4805 public static int codePointBefore(char[] a, int index) { 4806 return codePointBeforeImpl(a, index, 0); 4807 } 4808 4809 /** 4810 * Returns the code point preceding the given index of the 4811 * <code>char</code> array, where only array elements with 4812 * <code>index</code> greater than or equal to <code>start</code> 4813 * can be used. If the <code>char</code> value at <code>(index - 4814 * 1)</code> in the <code>char</code> array is in the 4815 * low-surrogate range, <code>(index - 2)</code> is not less than 4816 * <code>start</code>, and the <code>char</code> value at 4817 * <code>(index - 2)</code> in the <code>char</code> array is in 4818 * the high-surrogate range, then the supplementary code point 4819 * corresponding to this surrogate pair is returned. Otherwise, 4820 * the <code>char</code> value at <code>(index - 1)</code> is 4821 * returned. 4822 * 4823 * @param a the <code>char</code> array 4824 * @param index the index following the code point that should be returned 4825 * @param start the index of the first array element in the 4826 * <code>char</code> array 4827 * @return the Unicode code point value before the given index. 4828 * @exception NullPointerException if <code>a</code> is null. 4829 * @exception IndexOutOfBoundsException if the <code>index</code> 4830 * argument is not greater than the <code>start</code> argument or 4831 * is greater than the length of the <code>char</code> array, or 4832 * if the <code>start</code> argument is negative or not less than 4833 * the length of the <code>char</code> array. 4834 * @since 1.5 4835 */ 4836 public static int codePointBefore(char[] a, int index, int start) { 4837 if (index <= start || start < 0 || start >= a.length) { 4838 throw new IndexOutOfBoundsException(); 4839 } 4840 return codePointBeforeImpl(a, index, start); 4841 } 4842 4843 // throws ArrayIndexOutofBoundsException if index-1 out of bounds 4844 static int codePointBeforeImpl(char[] a, int index, int start) { 4845 char c2 = a[--index]; 4846 if (isLowSurrogate(c2)) { 4847 if (index > start) { 4848 char c1 = a[--index]; 4849 if (isHighSurrogate(c1)) { 4850 return toCodePoint(c1, c2); 4851 } 4852 } 4853 } 4854 return c2; 4855 } 4856 4857 /** 4858 * Returns the leading surrogate (a 4859 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 4860 * high surrogate code unit</a>) of the 4861 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4862 * surrogate pair</a> 4863 * representing the specified supplementary character (Unicode 4864 * code point) in the UTF-16 encoding. If the specified character 4865 * is not a 4866 * <a href="Character.html#supplementary">supplementary character</a>, 4867 * an unspecified {@code char} is returned. 4868 * 4869 * <p>If 4870 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4871 * is {@code true}, then 4872 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 4873 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 4874 * are also always {@code true}. 4875 * 4876 * @param codePoint a supplementary character (Unicode code point) 4877 * @return the leading surrogate code unit used to represent the 4878 * character in the UTF-16 encoding 4879 * @since 1.7 4880 */ 4881 public static char highSurrogate(int codePoint) { 4882 return (char) ((codePoint >>> 10) 4883 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 4884 } 4885 4886 /** 4887 * Returns the trailing surrogate (a 4888 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 4889 * low surrogate code unit</a>) of the 4890 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 4891 * surrogate pair</a> 4892 * representing the specified supplementary character (Unicode 4893 * code point) in the UTF-16 encoding. If the specified character 4894 * is not a 4895 * <a href="Character.html#supplementary">supplementary character</a>, 4896 * an unspecified {@code char} is returned. 4897 * 4898 * <p>If 4899 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 4900 * is {@code true}, then 4901 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 4902 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 4903 * are also always {@code true}. 4904 * 4905 * @param codePoint a supplementary character (Unicode code point) 4906 * @return the trailing surrogate code unit used to represent the 4907 * character in the UTF-16 encoding 4908 * @since 1.7 4909 */ 4910 public static char lowSurrogate(int codePoint) { 4911 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 4912 } 4913 4914 /** 4915 * Converts the specified character (Unicode code point) to its 4916 * UTF-16 representation. If the specified code point is a BMP 4917 * (Basic Multilingual Plane or Plane 0) value, the same value is 4918 * stored in <code>dst[dstIndex]</code>, and 1 is returned. If the 4919 * specified code point is a supplementary character, its 4920 * surrogate values are stored in <code>dst[dstIndex]</code> 4921 * (high-surrogate) and <code>dst[dstIndex+1]</code> 4922 * (low-surrogate), and 2 is returned. 4923 * 4924 * @param codePoint the character (Unicode code point) to be converted. 4925 * @param dst an array of <code>char</code> in which the 4926 * <code>codePoint</code>'s UTF-16 value is stored. 4927 * @param dstIndex the start index into the <code>dst</code> 4928 * array where the converted value is stored. 4929 * @return 1 if the code point is a BMP code point, 2 if the 4930 * code point is a supplementary code point. 4931 * @exception IllegalArgumentException if the specified 4932 * <code>codePoint</code> is not a valid Unicode code point. 4933 * @exception NullPointerException if the specified <code>dst</code> is null. 4934 * @exception IndexOutOfBoundsException if <code>dstIndex</code> 4935 * is negative or not less than <code>dst.length</code>, or if 4936 * <code>dst</code> at <code>dstIndex</code> doesn't have enough 4937 * array element(s) to store the resulting <code>char</code> 4938 * value(s). (If <code>dstIndex</code> is equal to 4939 * <code>dst.length-1</code> and the specified 4940 * <code>codePoint</code> is a supplementary character, the 4941 * high-surrogate value is not stored in 4942 * <code>dst[dstIndex]</code>.) 4943 * @since 1.5 4944 */ 4945 public static int toChars(int codePoint, char[] dst, int dstIndex) { 4946 if (isBmpCodePoint(codePoint)) { 4947 dst[dstIndex] = (char) codePoint; 4948 return 1; 4949 } else if (isValidCodePoint(codePoint)) { 4950 toSurrogates(codePoint, dst, dstIndex); 4951 return 2; 4952 } else { 4953 throw new IllegalArgumentException(); 4954 } 4955 } 4956 4957 /** 4958 * Converts the specified character (Unicode code point) to its 4959 * UTF-16 representation stored in a <code>char</code> array. If 4960 * the specified code point is a BMP (Basic Multilingual Plane or 4961 * Plane 0) value, the resulting <code>char</code> array has 4962 * the same value as <code>codePoint</code>. If the specified code 4963 * point is a supplementary code point, the resulting 4964 * <code>char</code> array has the corresponding surrogate pair. 4965 * 4966 * @param codePoint a Unicode code point 4967 * @return a <code>char</code> array having 4968 * <code>codePoint</code>'s UTF-16 representation. 4969 * @exception IllegalArgumentException if the specified 4970 * <code>codePoint</code> is not a valid Unicode code point. 4971 * @since 1.5 4972 */ 4973 public static char[] toChars(int codePoint) { 4974 if (isBmpCodePoint(codePoint)) { 4975 return new char[] { (char) codePoint }; 4976 } else if (isValidCodePoint(codePoint)) { 4977 char[] result = new char[2]; 4978 toSurrogates(codePoint, result, 0); 4979 return result; 4980 } else { 4981 throw new IllegalArgumentException(); 4982 } 4983 } 4984 4985 static void toSurrogates(int codePoint, char[] dst, int index) { 4986 // We write elements "backwards" to guarantee all-or-nothing 4987 dst[index+1] = lowSurrogate(codePoint); 4988 dst[index] = highSurrogate(codePoint); 4989 } 4990 4991 /** 4992 * Returns the number of Unicode code points in the text range of 4993 * the specified char sequence. The text range begins at the 4994 * specified <code>beginIndex</code> and extends to the 4995 * <code>char</code> at index <code>endIndex - 1</code>. Thus the 4996 * length (in <code>char</code>s) of the text range is 4997 * <code>endIndex-beginIndex</code>. Unpaired surrogates within 4998 * the text range count as one code point each. 4999 * 5000 * @param seq the char sequence 5001 * @param beginIndex the index to the first <code>char</code> of 5002 * the text range. 5003 * @param endIndex the index after the last <code>char</code> of 5004 * the text range. 5005 * @return the number of Unicode code points in the specified text 5006 * range 5007 * @exception NullPointerException if <code>seq</code> is null. 5008 * @exception IndexOutOfBoundsException if the 5009 * <code>beginIndex</code> is negative, or <code>endIndex</code> 5010 * is larger than the length of the given sequence, or 5011 * <code>beginIndex</code> is larger than <code>endIndex</code>. 5012 * @since 1.5 5013 */ 5014 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 5015 int length = seq.length(); 5016 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 5017 throw new IndexOutOfBoundsException(); 5018 } 5019 int n = endIndex - beginIndex; 5020 for (int i = beginIndex; i < endIndex; ) { 5021 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 5022 isLowSurrogate(seq.charAt(i))) { 5023 n--; 5024 i++; 5025 } 5026 } 5027 return n; 5028 } 5029 5030 /** 5031 * Returns the number of Unicode code points in a subarray of the 5032 * <code>char</code> array argument. The <code>offset</code> 5033 * argument is the index of the first <code>char</code> of the 5034 * subarray and the <code>count</code> argument specifies the 5035 * length of the subarray in <code>char</code>s. Unpaired 5036 * surrogates within the subarray count as one code point each. 5037 * 5038 * @param a the <code>char</code> array 5039 * @param offset the index of the first <code>char</code> in the 5040 * given <code>char</code> array 5041 * @param count the length of the subarray in <code>char</code>s 5042 * @return the number of Unicode code points in the specified subarray 5043 * @exception NullPointerException if <code>a</code> is null. 5044 * @exception IndexOutOfBoundsException if <code>offset</code> or 5045 * <code>count</code> is negative, or if <code>offset + 5046 * count</code> is larger than the length of the given array. 5047 * @since 1.5 5048 */ 5049 public static int codePointCount(char[] a, int offset, int count) { 5050 if (count > a.length - offset || offset < 0 || count < 0) { 5051 throw new IndexOutOfBoundsException(); 5052 } 5053 return codePointCountImpl(a, offset, count); 5054 } 5055 5056 static int codePointCountImpl(char[] a, int offset, int count) { 5057 int endIndex = offset + count; 5058 int n = count; 5059 for (int i = offset; i < endIndex; ) { 5060 if (isHighSurrogate(a[i++]) && i < endIndex && 5061 isLowSurrogate(a[i])) { 5062 n--; 5063 i++; 5064 } 5065 } 5066 return n; 5067 } 5068 5069 /** 5070 * Returns the index within the given char sequence that is offset 5071 * from the given <code>index</code> by <code>codePointOffset</code> 5072 * code points. Unpaired surrogates within the text range given by 5073 * <code>index</code> and <code>codePointOffset</code> count as 5074 * one code point each. 5075 * 5076 * @param seq the char sequence 5077 * @param index the index to be offset 5078 * @param codePointOffset the offset in code points 5079 * @return the index within the char sequence 5080 * @exception NullPointerException if <code>seq</code> is null. 5081 * @exception IndexOutOfBoundsException if <code>index</code> 5082 * is negative or larger then the length of the char sequence, 5083 * or if <code>codePointOffset</code> is positive and the 5084 * subsequence starting with <code>index</code> has fewer than 5085 * <code>codePointOffset</code> code points, or if 5086 * <code>codePointOffset</code> is negative and the subsequence 5087 * before <code>index</code> has fewer than the absolute value 5088 * of <code>codePointOffset</code> code points. 5089 * @since 1.5 5090 */ 5091 public static int offsetByCodePoints(CharSequence seq, int index, 5092 int codePointOffset) { 5093 int length = seq.length(); 5094 if (index < 0 || index > length) { 5095 throw new IndexOutOfBoundsException(); 5096 } 5097 5098 int x = index; 5099 if (codePointOffset >= 0) { 5100 int i; 5101 for (i = 0; x < length && i < codePointOffset; i++) { 5102 if (isHighSurrogate(seq.charAt(x++)) && x < length && 5103 isLowSurrogate(seq.charAt(x))) { 5104 x++; 5105 } 5106 } 5107 if (i < codePointOffset) { 5108 throw new IndexOutOfBoundsException(); 5109 } 5110 } else { 5111 int i; 5112 for (i = codePointOffset; x > 0 && i < 0; i++) { 5113 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 5114 isHighSurrogate(seq.charAt(x-1))) { 5115 x--; 5116 } 5117 } 5118 if (i < 0) { 5119 throw new IndexOutOfBoundsException(); 5120 } 5121 } 5122 return x; 5123 } 5124 5125 /** 5126 * Returns the index within the given <code>char</code> subarray 5127 * that is offset from the given <code>index</code> by 5128 * <code>codePointOffset</code> code points. The 5129 * <code>start</code> and <code>count</code> arguments specify a 5130 * subarray of the <code>char</code> array. Unpaired surrogates 5131 * within the text range given by <code>index</code> and 5132 * <code>codePointOffset</code> count as one code point each. 5133 * 5134 * @param a the <code>char</code> array 5135 * @param start the index of the first <code>char</code> of the 5136 * subarray 5137 * @param count the length of the subarray in <code>char</code>s 5138 * @param index the index to be offset 5139 * @param codePointOffset the offset in code points 5140 * @return the index within the subarray 5141 * @exception NullPointerException if <code>a</code> is null. 5142 * @exception IndexOutOfBoundsException 5143 * if <code>start</code> or <code>count</code> is negative, 5144 * or if <code>start + count</code> is larger than the length of 5145 * the given array, 5146 * or if <code>index</code> is less than <code>start</code> or 5147 * larger then <code>start + count</code>, 5148 * or if <code>codePointOffset</code> is positive and the text range 5149 * starting with <code>index</code> and ending with <code>start 5150 * + count - 1</code> has fewer than <code>codePointOffset</code> code 5151 * points, 5152 * or if <code>codePointOffset</code> is negative and the text range 5153 * starting with <code>start</code> and ending with <code>index 5154 * - 1</code> has fewer than the absolute value of 5155 * <code>codePointOffset</code> code points. 5156 * @since 1.5 5157 */ 5158 public static int offsetByCodePoints(char[] a, int start, int count, 5159 int index, int codePointOffset) { 5160 if (count > a.length-start || start < 0 || count < 0 5161 || index < start || index > start+count) { 5162 throw new IndexOutOfBoundsException(); 5163 } 5164 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 5165 } 5166 5167 static int offsetByCodePointsImpl(char[]a, int start, int count, 5168 int index, int codePointOffset) { 5169 int x = index; 5170 if (codePointOffset >= 0) { 5171 int limit = start + count; 5172 int i; 5173 for (i = 0; x < limit && i < codePointOffset; i++) { 5174 if (isHighSurrogate(a[x++]) && x < limit && 5175 isLowSurrogate(a[x])) { 5176 x++; 5177 } 5178 } 5179 if (i < codePointOffset) { 5180 throw new IndexOutOfBoundsException(); 5181 } 5182 } else { 5183 int i; 5184 for (i = codePointOffset; x > start && i < 0; i++) { 5185 if (isLowSurrogate(a[--x]) && x > start && 5186 isHighSurrogate(a[x-1])) { 5187 x--; 5188 } 5189 } 5190 if (i < 0) { 5191 throw new IndexOutOfBoundsException(); 5192 } 5193 } 5194 return x; 5195 } 5196 5197 /** 5198 * Determines if the specified character is a lowercase character. 5199 * <p> 5200 * A character is lowercase if its general category type, provided 5201 * by <code>Character.getType(ch)</code>, is 5202 * <code>LOWERCASE_LETTER</code>. 5203 * <p> 5204 * The following are examples of lowercase characters: 5205 * <p><blockquote><pre> 5206 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5207 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5208 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5209 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5210 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5211 * </pre></blockquote> 5212 * <p> Many other Unicode characters are lowercase too. 5213 * 5214 * <p><b>Note:</b> This method cannot handle <a 5215 * href="#supplementary"> supplementary characters</a>. To support 5216 * all Unicode characters, including supplementary characters, use 5217 * the {@link #isLowerCase(int)} method. 5218 * 5219 * @param ch the character to be tested. 5220 * @return <code>true</code> if the character is lowercase; 5221 * <code>false</code> otherwise. 5222 * @see Character#isLowerCase(char) 5223 * @see Character#isTitleCase(char) 5224 * @see Character#toLowerCase(char) 5225 * @see Character#getType(char) 5226 */ 5227 public static boolean isLowerCase(char ch) { 5228 return isLowerCase((int)ch); 5229 } 5230 5231 /** 5232 * Determines if the specified character (Unicode code point) is a 5233 * lowercase character. 5234 * <p> 5235 * A character is lowercase if its general category type, provided 5236 * by {@link Character#getType getType(codePoint)}, is 5237 * <code>LOWERCASE_LETTER</code>. 5238 * <p> 5239 * The following are examples of lowercase characters: 5240 * <p><blockquote><pre> 5241 * a b c d e f g h i j k l m n o p q r s t u v w x y z 5242 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 5243 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 5244 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 5245 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 5246 * </pre></blockquote> 5247 * <p> Many other Unicode characters are lowercase too. 5248 * 5249 * @param codePoint the character (Unicode code point) to be tested. 5250 * @return <code>true</code> if the character is lowercase; 5251 * <code>false</code> otherwise. 5252 * @see Character#isLowerCase(int) 5253 * @see Character#isTitleCase(int) 5254 * @see Character#toLowerCase(int) 5255 * @see Character#getType(int) 5256 * @since 1.5 5257 */ 5258 public static boolean isLowerCase(int codePoint) { 5259 return getType(codePoint) == Character.LOWERCASE_LETTER; 5260 } 5261 5262 /** 5263 * Determines if the specified character is an uppercase character. 5264 * <p> 5265 * A character is uppercase if its general category type, provided by 5266 * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>. 5267 * <p> 5268 * The following are examples of uppercase characters: 5269 * <p><blockquote><pre> 5270 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5271 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5272 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5273 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5274 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5275 * </pre></blockquote> 5276 * <p> Many other Unicode characters are uppercase too.<p> 5277 * 5278 * <p><b>Note:</b> This method cannot handle <a 5279 * href="#supplementary"> supplementary characters</a>. To support 5280 * all Unicode characters, including supplementary characters, use 5281 * the {@link #isUpperCase(int)} method. 5282 * 5283 * @param ch the character to be tested. 5284 * @return <code>true</code> if the character is uppercase; 5285 * <code>false</code> otherwise. 5286 * @see Character#isLowerCase(char) 5287 * @see Character#isTitleCase(char) 5288 * @see Character#toUpperCase(char) 5289 * @see Character#getType(char) 5290 * @since 1.0 5291 */ 5292 public static boolean isUpperCase(char ch) { 5293 return isUpperCase((int)ch); 5294 } 5295 5296 /** 5297 * Determines if the specified character (Unicode code point) is an uppercase character. 5298 * <p> 5299 * A character is uppercase if its general category type, provided by 5300 * {@link Character#getType(int) getType(codePoint)}, is <code>UPPERCASE_LETTER</code>. 5301 * <p> 5302 * The following are examples of uppercase characters: 5303 * <p><blockquote><pre> 5304 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 5305 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 5306 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 5307 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 5308 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 5309 * </pre></blockquote> 5310 * <p> Many other Unicode characters are uppercase too.<p> 5311 * 5312 * @param codePoint the character (Unicode code point) to be tested. 5313 * @return <code>true</code> if the character is uppercase; 5314 * <code>false</code> otherwise. 5315 * @see Character#isLowerCase(int) 5316 * @see Character#isTitleCase(int) 5317 * @see Character#toUpperCase(int) 5318 * @see Character#getType(int) 5319 * @since 1.5 5320 */ 5321 public static boolean isUpperCase(int codePoint) { 5322 return getType(codePoint) == Character.UPPERCASE_LETTER; 5323 } 5324 5325 /** 5326 * Determines if the specified character is a titlecase character. 5327 * <p> 5328 * A character is a titlecase character if its general 5329 * category type, provided by <code>Character.getType(ch)</code>, 5330 * is <code>TITLECASE_LETTER</code>. 5331 * <p> 5332 * Some characters look like pairs of Latin letters. For example, there 5333 * is an uppercase letter that looks like "LJ" and has a corresponding 5334 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5335 * is the appropriate form to use when rendering a word in lowercase 5336 * with initial capitals, as for a book title. 5337 * <p> 5338 * These are some of the Unicode characters for which this method returns 5339 * <code>true</code>: 5340 * <ul> 5341 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 5342 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 5343 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 5344 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 5345 * </ul> 5346 * <p> Many other Unicode characters are titlecase too.<p> 5347 * 5348 * <p><b>Note:</b> This method cannot handle <a 5349 * href="#supplementary"> supplementary characters</a>. To support 5350 * all Unicode characters, including supplementary characters, use 5351 * the {@link #isTitleCase(int)} method. 5352 * 5353 * @param ch the character to be tested. 5354 * @return <code>true</code> if the character is titlecase; 5355 * <code>false</code> otherwise. 5356 * @see Character#isLowerCase(char) 5357 * @see Character#isUpperCase(char) 5358 * @see Character#toTitleCase(char) 5359 * @see Character#getType(char) 5360 * @since 1.0.2 5361 */ 5362 public static boolean isTitleCase(char ch) { 5363 return isTitleCase((int)ch); 5364 } 5365 5366 /** 5367 * Determines if the specified character (Unicode code point) is a titlecase character. 5368 * <p> 5369 * A character is a titlecase character if its general 5370 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5371 * is <code>TITLECASE_LETTER</code>. 5372 * <p> 5373 * Some characters look like pairs of Latin letters. For example, there 5374 * is an uppercase letter that looks like "LJ" and has a corresponding 5375 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 5376 * is the appropriate form to use when rendering a word in lowercase 5377 * with initial capitals, as for a book title. 5378 * <p> 5379 * These are some of the Unicode characters for which this method returns 5380 * <code>true</code>: 5381 * <ul> 5382 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> 5383 * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> 5384 * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> 5385 * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> 5386 * </ul> 5387 * <p> Many other Unicode characters are titlecase too.<p> 5388 * 5389 * @param codePoint the character (Unicode code point) to be tested. 5390 * @return <code>true</code> if the character is titlecase; 5391 * <code>false</code> otherwise. 5392 * @see Character#isLowerCase(int) 5393 * @see Character#isUpperCase(int) 5394 * @see Character#toTitleCase(int) 5395 * @see Character#getType(int) 5396 * @since 1.5 5397 */ 5398 public static boolean isTitleCase(int codePoint) { 5399 return getType(codePoint) == Character.TITLECASE_LETTER; 5400 } 5401 5402 /** 5403 * Determines if the specified character is a digit. 5404 * <p> 5405 * A character is a digit if its general category type, provided 5406 * by <code>Character.getType(ch)</code>, is 5407 * <code>DECIMAL_DIGIT_NUMBER</code>. 5408 * <p> 5409 * Some Unicode character ranges that contain digits: 5410 * <ul> 5411 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 5412 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 5413 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 5414 * Arabic-Indic digits 5415 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 5416 * Extended Arabic-Indic digits 5417 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 5418 * Devanagari digits 5419 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 5420 * Fullwidth digits 5421 * </ul> 5422 * 5423 * Many other character ranges contain digits as well. 5424 * 5425 * <p><b>Note:</b> This method cannot handle <a 5426 * href="#supplementary"> supplementary characters</a>. To support 5427 * all Unicode characters, including supplementary characters, use 5428 * the {@link #isDigit(int)} method. 5429 * 5430 * @param ch the character to be tested. 5431 * @return <code>true</code> if the character is a digit; 5432 * <code>false</code> otherwise. 5433 * @see Character#digit(char, int) 5434 * @see Character#forDigit(int, int) 5435 * @see Character#getType(char) 5436 */ 5437 public static boolean isDigit(char ch) { 5438 return isDigit((int)ch); 5439 } 5440 5441 /** 5442 * Determines if the specified character (Unicode code point) is a digit. 5443 * <p> 5444 * A character is a digit if its general category type, provided 5445 * by {@link Character#getType(int) getType(codePoint)}, is 5446 * <code>DECIMAL_DIGIT_NUMBER</code>. 5447 * <p> 5448 * Some Unicode character ranges that contain digits: 5449 * <ul> 5450 * <li><code>'\u0030'</code> through <code>'\u0039'</code>, 5451 * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) 5452 * <li><code>'\u0660'</code> through <code>'\u0669'</code>, 5453 * Arabic-Indic digits 5454 * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, 5455 * Extended Arabic-Indic digits 5456 * <li><code>'\u0966'</code> through <code>'\u096F'</code>, 5457 * Devanagari digits 5458 * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, 5459 * Fullwidth digits 5460 * </ul> 5461 * 5462 * Many other character ranges contain digits as well. 5463 * 5464 * @param codePoint the character (Unicode code point) to be tested. 5465 * @return <code>true</code> if the character is a digit; 5466 * <code>false</code> otherwise. 5467 * @see Character#forDigit(int, int) 5468 * @see Character#getType(int) 5469 * @since 1.5 5470 */ 5471 public static boolean isDigit(int codePoint) { 5472 return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER; 5473 } 5474 5475 /** 5476 * Determines if a character is defined in Unicode. 5477 * <p> 5478 * A character is defined if at least one of the following is true: 5479 * <ul> 5480 * <li>It has an entry in the UnicodeData file. 5481 * <li>It has a value in a range defined by the UnicodeData file. 5482 * </ul> 5483 * 5484 * <p><b>Note:</b> This method cannot handle <a 5485 * href="#supplementary"> supplementary characters</a>. To support 5486 * all Unicode characters, including supplementary characters, use 5487 * the {@link #isDefined(int)} method. 5488 * 5489 * @param ch the character to be tested 5490 * @return <code>true</code> if the character has a defined meaning 5491 * in Unicode; <code>false</code> otherwise. 5492 * @see Character#isDigit(char) 5493 * @see Character#isLetter(char) 5494 * @see Character#isLetterOrDigit(char) 5495 * @see Character#isLowerCase(char) 5496 * @see Character#isTitleCase(char) 5497 * @see Character#isUpperCase(char) 5498 * @since 1.0.2 5499 */ 5500 public static boolean isDefined(char ch) { 5501 return isDefined((int)ch); 5502 } 5503 5504 /** 5505 * Determines if a character (Unicode code point) is defined in Unicode. 5506 * <p> 5507 * A character is defined if at least one of the following is true: 5508 * <ul> 5509 * <li>It has an entry in the UnicodeData file. 5510 * <li>It has a value in a range defined by the UnicodeData file. 5511 * </ul> 5512 * 5513 * @param codePoint the character (Unicode code point) to be tested. 5514 * @return <code>true</code> if the character has a defined meaning 5515 * in Unicode; <code>false</code> otherwise. 5516 * @see Character#isDigit(int) 5517 * @see Character#isLetter(int) 5518 * @see Character#isLetterOrDigit(int) 5519 * @see Character#isLowerCase(int) 5520 * @see Character#isTitleCase(int) 5521 * @see Character#isUpperCase(int) 5522 * @since 1.5 5523 */ 5524 public static boolean isDefined(int codePoint) { 5525 return getType(codePoint) != Character.UNASSIGNED; 5526 } 5527 5528 /** 5529 * Determines if the specified character is a letter. 5530 * <p> 5531 * A character is considered to be a letter if its general 5532 * category type, provided by <code>Character.getType(ch)</code>, 5533 * is any of the following: 5534 * <ul> 5535 * <li> <code>UPPERCASE_LETTER</code> 5536 * <li> <code>LOWERCASE_LETTER</code> 5537 * <li> <code>TITLECASE_LETTER</code> 5538 * <li> <code>MODIFIER_LETTER</code> 5539 * <li> <code>OTHER_LETTER</code> 5540 * </ul> 5541 * 5542 * Not all letters have case. Many characters are 5543 * letters but are neither uppercase nor lowercase nor titlecase. 5544 * 5545 * <p><b>Note:</b> This method cannot handle <a 5546 * href="#supplementary"> supplementary characters</a>. To support 5547 * all Unicode characters, including supplementary characters, use 5548 * the {@link #isLetter(int)} method. 5549 * 5550 * @param ch the character to be tested. 5551 * @return <code>true</code> if the character is a letter; 5552 * <code>false</code> otherwise. 5553 * @see Character#isDigit(char) 5554 * @see Character#isJavaIdentifierStart(char) 5555 * @see Character#isJavaLetter(char) 5556 * @see Character#isJavaLetterOrDigit(char) 5557 * @see Character#isLetterOrDigit(char) 5558 * @see Character#isLowerCase(char) 5559 * @see Character#isTitleCase(char) 5560 * @see Character#isUnicodeIdentifierStart(char) 5561 * @see Character#isUpperCase(char) 5562 */ 5563 public static boolean isLetter(char ch) { 5564 return isLetter((int)ch); 5565 } 5566 5567 /** 5568 * Determines if the specified character (Unicode code point) is a letter. 5569 * <p> 5570 * A character is considered to be a letter if its general 5571 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 5572 * is any of the following: 5573 * <ul> 5574 * <li> <code>UPPERCASE_LETTER</code> 5575 * <li> <code>LOWERCASE_LETTER</code> 5576 * <li> <code>TITLECASE_LETTER</code> 5577 * <li> <code>MODIFIER_LETTER</code> 5578 * <li> <code>OTHER_LETTER</code> 5579 * </ul> 5580 * 5581 * Not all letters have case. Many characters are 5582 * letters but are neither uppercase nor lowercase nor titlecase. 5583 * 5584 * @param codePoint the character (Unicode code point) to be tested. 5585 * @return <code>true</code> if the character is a letter; 5586 * <code>false</code> otherwise. 5587 * @see Character#isDigit(int) 5588 * @see Character#isJavaIdentifierStart(int) 5589 * @see Character#isLetterOrDigit(int) 5590 * @see Character#isLowerCase(int) 5591 * @see Character#isTitleCase(int) 5592 * @see Character#isUnicodeIdentifierStart(int) 5593 * @see Character#isUpperCase(int) 5594 * @since 1.5 5595 */ 5596 public static boolean isLetter(int codePoint) { 5597 return ((((1 << Character.UPPERCASE_LETTER) | 5598 (1 << Character.LOWERCASE_LETTER) | 5599 (1 << Character.TITLECASE_LETTER) | 5600 (1 << Character.MODIFIER_LETTER) | 5601 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 5602 != 0; 5603 } 5604 5605 /** 5606 * Determines if the specified character is a letter or digit. 5607 * <p> 5608 * A character is considered to be a letter or digit if either 5609 * <code>Character.isLetter(char ch)</code> or 5610 * <code>Character.isDigit(char ch)</code> returns 5611 * <code>true</code> for the character. 5612 * 5613 * <p><b>Note:</b> This method cannot handle <a 5614 * href="#supplementary"> supplementary characters</a>. To support 5615 * all Unicode characters, including supplementary characters, use 5616 * the {@link #isLetterOrDigit(int)} method. 5617 * 5618 * @param ch the character to be tested. 5619 * @return <code>true</code> if the character is a letter or digit; 5620 * <code>false</code> otherwise. 5621 * @see Character#isDigit(char) 5622 * @see Character#isJavaIdentifierPart(char) 5623 * @see Character#isJavaLetter(char) 5624 * @see Character#isJavaLetterOrDigit(char) 5625 * @see Character#isLetter(char) 5626 * @see Character#isUnicodeIdentifierPart(char) 5627 * @since 1.0.2 5628 */ 5629 public static boolean isLetterOrDigit(char ch) { 5630 return isLetterOrDigit((int)ch); 5631 } 5632 5633 /** 5634 * Determines if the specified character (Unicode code point) is a letter or digit. 5635 * <p> 5636 * A character is considered to be a letter or digit if either 5637 * {@link #isLetter(int) isLetter(codePoint)} or 5638 * {@link #isDigit(int) isDigit(codePoint)} returns 5639 * <code>true</code> for the character. 5640 * 5641 * @param codePoint the character (Unicode code point) to be tested. 5642 * @return <code>true</code> if the character is a letter or digit; 5643 * <code>false</code> otherwise. 5644 * @see Character#isDigit(int) 5645 * @see Character#isJavaIdentifierPart(int) 5646 * @see Character#isLetter(int) 5647 * @see Character#isUnicodeIdentifierPart(int) 5648 * @since 1.5 5649 */ 5650 public static boolean isLetterOrDigit(int codePoint) { 5651 return ((((1 << Character.UPPERCASE_LETTER) | 5652 (1 << Character.LOWERCASE_LETTER) | 5653 (1 << Character.TITLECASE_LETTER) | 5654 (1 << Character.MODIFIER_LETTER) | 5655 (1 << Character.OTHER_LETTER) | 5656 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 5657 != 0; 5658 } 5659 5660 /** 5661 * Determines if the specified character is permissible as the first 5662 * character in a Java identifier. 5663 * <p> 5664 * A character may start a Java identifier if and only if 5665 * one of the following is true: 5666 * <ul> 5667 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5668 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 5669 * <li> ch is a currency symbol (such as "$") 5670 * <li> ch is a connecting punctuation character (such as "_"). 5671 * </ul> 5672 * 5673 * @param ch the character to be tested. 5674 * @return <code>true</code> if the character may start a Java 5675 * identifier; <code>false</code> otherwise. 5676 * @see Character#isJavaLetterOrDigit(char) 5677 * @see Character#isJavaIdentifierStart(char) 5678 * @see Character#isJavaIdentifierPart(char) 5679 * @see Character#isLetter(char) 5680 * @see Character#isLetterOrDigit(char) 5681 * @see Character#isUnicodeIdentifierStart(char) 5682 * @since 1.02 5683 * @deprecated Replaced by isJavaIdentifierStart(char). 5684 */ 5685 @Deprecated 5686 public static boolean isJavaLetter(char ch) { 5687 return isJavaIdentifierStart(ch); 5688 } 5689 5690 /** 5691 * Determines if the specified character may be part of a Java 5692 * identifier as other than the first character. 5693 * <p> 5694 * A character may be part of a Java identifier if and only if any 5695 * of the following are true: 5696 * <ul> 5697 * <li> it is a letter 5698 * <li> it is a currency symbol (such as <code>'$'</code>) 5699 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5700 * <li> it is a digit 5701 * <li> it is a numeric letter (such as a Roman numeral character) 5702 * <li> it is a combining mark 5703 * <li> it is a non-spacing mark 5704 * <li> <code>isIdentifierIgnorable</code> returns 5705 * <code>true</code> for the character. 5706 * </ul> 5707 * 5708 * @param ch the character to be tested. 5709 * @return <code>true</code> if the character may be part of a 5710 * Java identifier; <code>false</code> otherwise. 5711 * @see Character#isJavaLetter(char) 5712 * @see Character#isJavaIdentifierStart(char) 5713 * @see Character#isJavaIdentifierPart(char) 5714 * @see Character#isLetter(char) 5715 * @see Character#isLetterOrDigit(char) 5716 * @see Character#isUnicodeIdentifierPart(char) 5717 * @see Character#isIdentifierIgnorable(char) 5718 * @since 1.02 5719 * @deprecated Replaced by isJavaIdentifierPart(char). 5720 */ 5721 @Deprecated 5722 public static boolean isJavaLetterOrDigit(char ch) { 5723 return isJavaIdentifierPart(ch); 5724 } 5725 5726 /** 5727 * Determines if the specified character is 5728 * permissible as the first character in a Java identifier. 5729 * <p> 5730 * A character may start a Java identifier if and only if 5731 * one of the following conditions is true: 5732 * <ul> 5733 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5734 * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> 5735 * <li> ch is a currency symbol (such as "$") 5736 * <li> ch is a connecting punctuation character (such as "_"). 5737 * </ul> 5738 * 5739 * <p><b>Note:</b> This method cannot handle <a 5740 * href="#supplementary"> supplementary characters</a>. To support 5741 * all Unicode characters, including supplementary characters, use 5742 * the {@link #isJavaIdentifierStart(int)} method. 5743 * 5744 * @param ch the character to be tested. 5745 * @return <code>true</code> if the character may start a Java identifier; 5746 * <code>false</code> otherwise. 5747 * @see Character#isJavaIdentifierPart(char) 5748 * @see Character#isLetter(char) 5749 * @see Character#isUnicodeIdentifierStart(char) 5750 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5751 * @since 1.1 5752 */ 5753 public static boolean isJavaIdentifierStart(char ch) { 5754 return isJavaIdentifierStart((int)ch); 5755 } 5756 5757 /** 5758 * Determines if the character (Unicode code point) is 5759 * permissible as the first character in a Java identifier. 5760 * <p> 5761 * A character may start a Java identifier if and only if 5762 * one of the following conditions is true: 5763 * <ul> 5764 * <li> {@link #isLetter(int) isLetter(codePoint)} 5765 * returns <code>true</code> 5766 * <li> {@link #getType(int) getType(codePoint)} 5767 * returns <code>LETTER_NUMBER</code> 5768 * <li> the referenced character is a currency symbol (such as "$") 5769 * <li> the referenced character is a connecting punctuation character 5770 * (such as "_"). 5771 * </ul> 5772 * 5773 * @param codePoint the character (Unicode code point) to be tested. 5774 * @return <code>true</code> if the character may start a Java identifier; 5775 * <code>false</code> otherwise. 5776 * @see Character#isJavaIdentifierPart(int) 5777 * @see Character#isLetter(int) 5778 * @see Character#isUnicodeIdentifierStart(int) 5779 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5780 * @since 1.5 5781 */ 5782 public static boolean isJavaIdentifierStart(int codePoint) { 5783 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 5784 } 5785 5786 /** 5787 * Determines if the specified character may be part of a Java 5788 * identifier as other than the first character. 5789 * <p> 5790 * A character may be part of a Java identifier if any of the following 5791 * are true: 5792 * <ul> 5793 * <li> it is a letter 5794 * <li> it is a currency symbol (such as <code>'$'</code>) 5795 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5796 * <li> it is a digit 5797 * <li> it is a numeric letter (such as a Roman numeral character) 5798 * <li> it is a combining mark 5799 * <li> it is a non-spacing mark 5800 * <li> <code>isIdentifierIgnorable</code> returns 5801 * <code>true</code> for the character 5802 * </ul> 5803 * 5804 * <p><b>Note:</b> This method cannot handle <a 5805 * href="#supplementary"> supplementary characters</a>. To support 5806 * all Unicode characters, including supplementary characters, use 5807 * the {@link #isJavaIdentifierPart(int)} method. 5808 * 5809 * @param ch the character to be tested. 5810 * @return <code>true</code> if the character may be part of a 5811 * Java identifier; <code>false</code> otherwise. 5812 * @see Character#isIdentifierIgnorable(char) 5813 * @see Character#isJavaIdentifierStart(char) 5814 * @see Character#isLetterOrDigit(char) 5815 * @see Character#isUnicodeIdentifierPart(char) 5816 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5817 * @since 1.1 5818 */ 5819 public static boolean isJavaIdentifierPart(char ch) { 5820 return isJavaIdentifierPart((int)ch); 5821 } 5822 5823 /** 5824 * Determines if the character (Unicode code point) may be part of a Java 5825 * identifier as other than the first character. 5826 * <p> 5827 * A character may be part of a Java identifier if any of the following 5828 * are true: 5829 * <ul> 5830 * <li> it is a letter 5831 * <li> it is a currency symbol (such as <code>'$'</code>) 5832 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5833 * <li> it is a digit 5834 * <li> it is a numeric letter (such as a Roman numeral character) 5835 * <li> it is a combining mark 5836 * <li> it is a non-spacing mark 5837 * <li> {@link #isIdentifierIgnorable(int) 5838 * isIdentifierIgnorable(codePoint)} returns <code>true</code> for 5839 * the character 5840 * </ul> 5841 * 5842 * @param codePoint the character (Unicode code point) to be tested. 5843 * @return <code>true</code> if the character may be part of a 5844 * Java identifier; <code>false</code> otherwise. 5845 * @see Character#isIdentifierIgnorable(int) 5846 * @see Character#isJavaIdentifierStart(int) 5847 * @see Character#isLetterOrDigit(int) 5848 * @see Character#isUnicodeIdentifierPart(int) 5849 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 5850 * @since 1.5 5851 */ 5852 public static boolean isJavaIdentifierPart(int codePoint) { 5853 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 5854 } 5855 5856 /** 5857 * Determines if the specified character is permissible as the 5858 * first character in a Unicode identifier. 5859 * <p> 5860 * A character may start a Unicode identifier if and only if 5861 * one of the following conditions is true: 5862 * <ul> 5863 * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> 5864 * <li> {@link #getType(char) getType(ch)} returns 5865 * <code>LETTER_NUMBER</code>. 5866 * </ul> 5867 * 5868 * <p><b>Note:</b> This method cannot handle <a 5869 * href="#supplementary"> supplementary characters</a>. To support 5870 * all Unicode characters, including supplementary characters, use 5871 * the {@link #isUnicodeIdentifierStart(int)} method. 5872 * 5873 * @param ch the character to be tested. 5874 * @return <code>true</code> if the character may start a Unicode 5875 * identifier; <code>false</code> otherwise. 5876 * @see Character#isJavaIdentifierStart(char) 5877 * @see Character#isLetter(char) 5878 * @see Character#isUnicodeIdentifierPart(char) 5879 * @since 1.1 5880 */ 5881 public static boolean isUnicodeIdentifierStart(char ch) { 5882 return isUnicodeIdentifierStart((int)ch); 5883 } 5884 5885 /** 5886 * Determines if the specified character (Unicode code point) is permissible as the 5887 * first character in a Unicode identifier. 5888 * <p> 5889 * A character may start a Unicode identifier if and only if 5890 * one of the following conditions is true: 5891 * <ul> 5892 * <li> {@link #isLetter(int) isLetter(codePoint)} 5893 * returns <code>true</code> 5894 * <li> {@link #getType(int) getType(codePoint)} 5895 * returns <code>LETTER_NUMBER</code>. 5896 * </ul> 5897 * @param codePoint the character (Unicode code point) to be tested. 5898 * @return <code>true</code> if the character may start a Unicode 5899 * identifier; <code>false</code> otherwise. 5900 * @see Character#isJavaIdentifierStart(int) 5901 * @see Character#isLetter(int) 5902 * @see Character#isUnicodeIdentifierPart(int) 5903 * @since 1.5 5904 */ 5905 public static boolean isUnicodeIdentifierStart(int codePoint) { 5906 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 5907 } 5908 5909 /** 5910 * Determines if the specified character may be part of a Unicode 5911 * identifier as other than the first character. 5912 * <p> 5913 * A character may be part of a Unicode identifier if and only if 5914 * one of the following statements is true: 5915 * <ul> 5916 * <li> it is a letter 5917 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5918 * <li> it is a digit 5919 * <li> it is a numeric letter (such as a Roman numeral character) 5920 * <li> it is a combining mark 5921 * <li> it is a non-spacing mark 5922 * <li> <code>isIdentifierIgnorable</code> returns 5923 * <code>true</code> for this character. 5924 * </ul> 5925 * 5926 * <p><b>Note:</b> This method cannot handle <a 5927 * href="#supplementary"> supplementary characters</a>. To support 5928 * all Unicode characters, including supplementary characters, use 5929 * the {@link #isUnicodeIdentifierPart(int)} method. 5930 * 5931 * @param ch the character to be tested. 5932 * @return <code>true</code> if the character may be part of a 5933 * Unicode identifier; <code>false</code> otherwise. 5934 * @see Character#isIdentifierIgnorable(char) 5935 * @see Character#isJavaIdentifierPart(char) 5936 * @see Character#isLetterOrDigit(char) 5937 * @see Character#isUnicodeIdentifierStart(char) 5938 * @since 1.1 5939 */ 5940 public static boolean isUnicodeIdentifierPart(char ch) { 5941 return isUnicodeIdentifierPart((int)ch); 5942 } 5943 5944 /** 5945 * Determines if the specified character (Unicode code point) may be part of a Unicode 5946 * identifier as other than the first character. 5947 * <p> 5948 * A character may be part of a Unicode identifier if and only if 5949 * one of the following statements is true: 5950 * <ul> 5951 * <li> it is a letter 5952 * <li> it is a connecting punctuation character (such as <code>'_'</code>) 5953 * <li> it is a digit 5954 * <li> it is a numeric letter (such as a Roman numeral character) 5955 * <li> it is a combining mark 5956 * <li> it is a non-spacing mark 5957 * <li> <code>isIdentifierIgnorable</code> returns 5958 * <code>true</code> for this character. 5959 * </ul> 5960 * @param codePoint the character (Unicode code point) to be tested. 5961 * @return <code>true</code> if the character may be part of a 5962 * Unicode identifier; <code>false</code> otherwise. 5963 * @see Character#isIdentifierIgnorable(int) 5964 * @see Character#isJavaIdentifierPart(int) 5965 * @see Character#isLetterOrDigit(int) 5966 * @see Character#isUnicodeIdentifierStart(int) 5967 * @since 1.5 5968 */ 5969 public static boolean isUnicodeIdentifierPart(int codePoint) { 5970 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 5971 } 5972 5973 /** 5974 * Determines if the specified character should be regarded as 5975 * an ignorable character in a Java identifier or a Unicode identifier. 5976 * <p> 5977 * The following Unicode characters are ignorable in a Java identifier 5978 * or a Unicode identifier: 5979 * <ul> 5980 * <li>ISO control characters that are not whitespace 5981 * <ul> 5982 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 5983 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 5984 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 5985 * </ul> 5986 * 5987 * <li>all characters that have the <code>FORMAT</code> general 5988 * category value 5989 * </ul> 5990 * 5991 * <p><b>Note:</b> This method cannot handle <a 5992 * href="#supplementary"> supplementary characters</a>. To support 5993 * all Unicode characters, including supplementary characters, use 5994 * the {@link #isIdentifierIgnorable(int)} method. 5995 * 5996 * @param ch the character to be tested. 5997 * @return <code>true</code> if the character is an ignorable control 5998 * character that may be part of a Java or Unicode identifier; 5999 * <code>false</code> otherwise. 6000 * @see Character#isJavaIdentifierPart(char) 6001 * @see Character#isUnicodeIdentifierPart(char) 6002 * @since 1.1 6003 */ 6004 public static boolean isIdentifierIgnorable(char ch) { 6005 return isIdentifierIgnorable((int)ch); 6006 } 6007 6008 /** 6009 * Determines if the specified character (Unicode code point) should be regarded as 6010 * an ignorable character in a Java identifier or a Unicode identifier. 6011 * <p> 6012 * The following Unicode characters are ignorable in a Java identifier 6013 * or a Unicode identifier: 6014 * <ul> 6015 * <li>ISO control characters that are not whitespace 6016 * <ul> 6017 * <li><code>'\u0000'</code> through <code>'\u0008'</code> 6018 * <li><code>'\u000E'</code> through <code>'\u001B'</code> 6019 * <li><code>'\u007F'</code> through <code>'\u009F'</code> 6020 * </ul> 6021 * 6022 * <li>all characters that have the <code>FORMAT</code> general 6023 * category value 6024 * </ul> 6025 * 6026 * @param codePoint the character (Unicode code point) to be tested. 6027 * @return <code>true</code> if the character is an ignorable control 6028 * character that may be part of a Java or Unicode identifier; 6029 * <code>false</code> otherwise. 6030 * @see Character#isJavaIdentifierPart(int) 6031 * @see Character#isUnicodeIdentifierPart(int) 6032 * @since 1.5 6033 */ 6034 public static boolean isIdentifierIgnorable(int codePoint) { 6035 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 6036 } 6037 6038 /** 6039 * Converts the character argument to lowercase using case 6040 * mapping information from the UnicodeData file. 6041 * <p> 6042 * Note that 6043 * <code>Character.isLowerCase(Character.toLowerCase(ch))</code> 6044 * does not always return <code>true</code> for some ranges of 6045 * characters, particularly those that are symbols or ideographs. 6046 * 6047 * <p>In general, {@link String#toLowerCase()} should be used to map 6048 * characters to lowercase. <code>String</code> case mapping methods 6049 * have several benefits over <code>Character</code> case mapping methods. 6050 * <code>String</code> case mapping methods can perform locale-sensitive 6051 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6052 * the <code>Character</code> case mapping methods cannot. 6053 * 6054 * <p><b>Note:</b> This method cannot handle <a 6055 * href="#supplementary"> supplementary characters</a>. To support 6056 * all Unicode characters, including supplementary characters, use 6057 * the {@link #toLowerCase(int)} method. 6058 * 6059 * @param ch the character to be converted. 6060 * @return the lowercase equivalent of the character, if any; 6061 * otherwise, the character itself. 6062 * @see Character#isLowerCase(char) 6063 * @see String#toLowerCase() 6064 */ 6065 public static char toLowerCase(char ch) { 6066 return (char)toLowerCase((int)ch); 6067 } 6068 6069 /** 6070 * Converts the character (Unicode code point) argument to 6071 * lowercase using case mapping information from the UnicodeData 6072 * file. 6073 * 6074 * <p> Note that 6075 * <code>Character.isLowerCase(Character.toLowerCase(codePoint))</code> 6076 * does not always return <code>true</code> for some ranges of 6077 * characters, particularly those that are symbols or ideographs. 6078 * 6079 * <p>In general, {@link String#toLowerCase()} should be used to map 6080 * characters to lowercase. <code>String</code> case mapping methods 6081 * have several benefits over <code>Character</code> case mapping methods. 6082 * <code>String</code> case mapping methods can perform locale-sensitive 6083 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6084 * the <code>Character</code> case mapping methods cannot. 6085 * 6086 * @param codePoint the character (Unicode code point) to be converted. 6087 * @return the lowercase equivalent of the character (Unicode code 6088 * point), if any; otherwise, the character itself. 6089 * @see Character#isLowerCase(int) 6090 * @see String#toLowerCase() 6091 * 6092 * @since 1.5 6093 */ 6094 public static int toLowerCase(int codePoint) { 6095 return CharacterData.of(codePoint).toLowerCase(codePoint); 6096 } 6097 6098 /** 6099 * Converts the character argument to uppercase using case mapping 6100 * information from the UnicodeData file. 6101 * <p> 6102 * Note that 6103 * <code>Character.isUpperCase(Character.toUpperCase(ch))</code> 6104 * does not always return <code>true</code> for some ranges of 6105 * characters, particularly those that are symbols or ideographs. 6106 * 6107 * <p>In general, {@link String#toUpperCase()} should be used to map 6108 * characters to uppercase. <code>String</code> case mapping methods 6109 * have several benefits over <code>Character</code> case mapping methods. 6110 * <code>String</code> case mapping methods can perform locale-sensitive 6111 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6112 * the <code>Character</code> case mapping methods cannot. 6113 * 6114 * <p><b>Note:</b> This method cannot handle <a 6115 * href="#supplementary"> supplementary characters</a>. To support 6116 * all Unicode characters, including supplementary characters, use 6117 * the {@link #toUpperCase(int)} method. 6118 * 6119 * @param ch the character to be converted. 6120 * @return the uppercase equivalent of the character, if any; 6121 * otherwise, the character itself. 6122 * @see Character#isUpperCase(char) 6123 * @see String#toUpperCase() 6124 */ 6125 public static char toUpperCase(char ch) { 6126 return (char)toUpperCase((int)ch); 6127 } 6128 6129 /** 6130 * Converts the character (Unicode code point) argument to 6131 * uppercase using case mapping information from the UnicodeData 6132 * file. 6133 * 6134 * <p>Note that 6135 * <code>Character.isUpperCase(Character.toUpperCase(codePoint))</code> 6136 * does not always return <code>true</code> for some ranges of 6137 * characters, particularly those that are symbols or ideographs. 6138 * 6139 * <p>In general, {@link String#toUpperCase()} should be used to map 6140 * characters to uppercase. <code>String</code> case mapping methods 6141 * have several benefits over <code>Character</code> case mapping methods. 6142 * <code>String</code> case mapping methods can perform locale-sensitive 6143 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 6144 * the <code>Character</code> case mapping methods cannot. 6145 * 6146 * @param codePoint the character (Unicode code point) to be converted. 6147 * @return the uppercase equivalent of the character, if any; 6148 * otherwise, the character itself. 6149 * @see Character#isUpperCase(int) 6150 * @see String#toUpperCase() 6151 * 6152 * @since 1.5 6153 */ 6154 public static int toUpperCase(int codePoint) { 6155 return CharacterData.of(codePoint).toUpperCase(codePoint); 6156 } 6157 6158 /** 6159 * Converts the character argument to titlecase using case mapping 6160 * information from the UnicodeData file. If a character has no 6161 * explicit titlecase mapping and is not itself a titlecase char 6162 * according to UnicodeData, then the uppercase mapping is 6163 * returned as an equivalent titlecase mapping. If the 6164 * <code>char</code> argument is already a titlecase 6165 * <code>char</code>, the same <code>char</code> value will be 6166 * returned. 6167 * <p> 6168 * Note that 6169 * <code>Character.isTitleCase(Character.toTitleCase(ch))</code> 6170 * does not always return <code>true</code> for some ranges of 6171 * characters. 6172 * 6173 * <p><b>Note:</b> This method cannot handle <a 6174 * href="#supplementary"> supplementary characters</a>. To support 6175 * all Unicode characters, including supplementary characters, use 6176 * the {@link #toTitleCase(int)} method. 6177 * 6178 * @param ch the character to be converted. 6179 * @return the titlecase equivalent of the character, if any; 6180 * otherwise, the character itself. 6181 * @see Character#isTitleCase(char) 6182 * @see Character#toLowerCase(char) 6183 * @see Character#toUpperCase(char) 6184 * @since 1.0.2 6185 */ 6186 public static char toTitleCase(char ch) { 6187 return (char)toTitleCase((int)ch); 6188 } 6189 6190 /** 6191 * Converts the character (Unicode code point) argument to titlecase using case mapping 6192 * information from the UnicodeData file. If a character has no 6193 * explicit titlecase mapping and is not itself a titlecase char 6194 * according to UnicodeData, then the uppercase mapping is 6195 * returned as an equivalent titlecase mapping. If the 6196 * character argument is already a titlecase 6197 * character, the same character value will be 6198 * returned. 6199 * 6200 * <p>Note that 6201 * <code>Character.isTitleCase(Character.toTitleCase(codePoint))</code> 6202 * does not always return <code>true</code> for some ranges of 6203 * characters. 6204 * 6205 * @param codePoint the character (Unicode code point) to be converted. 6206 * @return the titlecase equivalent of the character, if any; 6207 * otherwise, the character itself. 6208 * @see Character#isTitleCase(int) 6209 * @see Character#toLowerCase(int) 6210 * @see Character#toUpperCase(int) 6211 * @since 1.5 6212 */ 6213 public static int toTitleCase(int codePoint) { 6214 return CharacterData.of(codePoint).toTitleCase(codePoint); 6215 } 6216 6217 /** 6218 * Returns the numeric value of the character <code>ch</code> in the 6219 * specified radix. 6220 * <p> 6221 * If the radix is not in the range <code>MIN_RADIX</code> <= 6222 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 6223 * value of <code>ch</code> is not a valid digit in the specified 6224 * radix, <code>-1</code> is returned. A character is a valid digit 6225 * if at least one of the following is true: 6226 * <ul> 6227 * <li>The method <code>isDigit</code> is <code>true</code> of the character 6228 * and the Unicode decimal digit value of the character (or its 6229 * single-character decomposition) is less than the specified radix. 6230 * In this case the decimal digit value is returned. 6231 * <li>The character is one of the uppercase Latin letters 6232 * <code>'A'</code> through <code>'Z'</code> and its code is less than 6233 * <code>radix + 'A' - 10</code>. 6234 * In this case, <code>ch - 'A' + 10</code> 6235 * is returned. 6236 * <li>The character is one of the lowercase Latin letters 6237 * <code>'a'</code> through <code>'z'</code> and its code is less than 6238 * <code>radix + 'a' - 10</code>. 6239 * In this case, <code>ch - 'a' + 10</code> 6240 * is returned. 6241 * </ul> 6242 * 6243 * <p><b>Note:</b> This method cannot handle <a 6244 * href="#supplementary"> supplementary characters</a>. To support 6245 * all Unicode characters, including supplementary characters, use 6246 * the {@link #digit(int, int)} method. 6247 * 6248 * @param ch the character to be converted. 6249 * @param radix the radix. 6250 * @return the numeric value represented by the character in the 6251 * specified radix. 6252 * @see Character#forDigit(int, int) 6253 * @see Character#isDigit(char) 6254 */ 6255 public static int digit(char ch, int radix) { 6256 return digit((int)ch, radix); 6257 } 6258 6259 /** 6260 * Returns the numeric value of the specified character (Unicode 6261 * code point) in the specified radix. 6262 * 6263 * <p>If the radix is not in the range <code>MIN_RADIX</code> <= 6264 * <code>radix</code> <= <code>MAX_RADIX</code> or if the 6265 * character is not a valid digit in the specified 6266 * radix, <code>-1</code> is returned. A character is a valid digit 6267 * if at least one of the following is true: 6268 * <ul> 6269 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is <code>true</code> of the character 6270 * and the Unicode decimal digit value of the character (or its 6271 * single-character decomposition) is less than the specified radix. 6272 * In this case the decimal digit value is returned. 6273 * <li>The character is one of the uppercase Latin letters 6274 * <code>'A'</code> through <code>'Z'</code> and its code is less than 6275 * <code>radix + 'A' - 10</code>. 6276 * In this case, <code>ch - 'A' + 10</code> 6277 * is returned. 6278 * <li>The character is one of the lowercase Latin letters 6279 * <code>'a'</code> through <code>'z'</code> and its code is less than 6280 * <code>radix + 'a' - 10</code>. 6281 * In this case, <code>ch - 'a' + 10</code> 6282 * is returned. 6283 * </ul> 6284 * 6285 * @param codePoint the character (Unicode code point) to be converted. 6286 * @param radix the radix. 6287 * @return the numeric value represented by the character in the 6288 * specified radix. 6289 * @see Character#forDigit(int, int) 6290 * @see Character#isDigit(int) 6291 * @since 1.5 6292 */ 6293 public static int digit(int codePoint, int radix) { 6294 return CharacterData.of(codePoint).digit(codePoint, radix); 6295 } 6296 6297 /** 6298 * Returns the <code>int</code> value that the specified Unicode 6299 * character represents. For example, the character 6300 * <code>'\u216C'</code> (the roman numeral fifty) will return 6301 * an int with a value of 50. 6302 * <p> 6303 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 6304 * <code>'\u005A'</code>), lowercase 6305 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 6306 * full width variant (<code>'\uFF21'</code> through 6307 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 6308 * <code>'\uFF5A'</code>) forms have numeric values from 10 6309 * through 35. This is independent of the Unicode specification, 6310 * which does not assign numeric values to these <code>char</code> 6311 * values. 6312 * <p> 6313 * If the character does not have a numeric value, then -1 is returned. 6314 * If the character has a numeric value that cannot be represented as a 6315 * nonnegative integer (for example, a fractional value), then -2 6316 * is returned. 6317 * 6318 * <p><b>Note:</b> This method cannot handle <a 6319 * href="#supplementary"> supplementary characters</a>. To support 6320 * all Unicode characters, including supplementary characters, use 6321 * the {@link #getNumericValue(int)} method. 6322 * 6323 * @param ch the character to be converted. 6324 * @return the numeric value of the character, as a nonnegative <code>int</code> 6325 * value; -2 if the character has a numeric value that is not a 6326 * nonnegative integer; -1 if the character has no numeric value. 6327 * @see Character#forDigit(int, int) 6328 * @see Character#isDigit(char) 6329 * @since 1.1 6330 */ 6331 public static int getNumericValue(char ch) { 6332 return getNumericValue((int)ch); 6333 } 6334 6335 /** 6336 * Returns the <code>int</code> value that the specified 6337 * character (Unicode code point) represents. For example, the character 6338 * <code>'\u216C'</code> (the Roman numeral fifty) will return 6339 * an <code>int</code> with a value of 50. 6340 * <p> 6341 * The letters A-Z in their uppercase (<code>'\u0041'</code> through 6342 * <code>'\u005A'</code>), lowercase 6343 * (<code>'\u0061'</code> through <code>'\u007A'</code>), and 6344 * full width variant (<code>'\uFF21'</code> through 6345 * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through 6346 * <code>'\uFF5A'</code>) forms have numeric values from 10 6347 * through 35. This is independent of the Unicode specification, 6348 * which does not assign numeric values to these <code>char</code> 6349 * values. 6350 * <p> 6351 * If the character does not have a numeric value, then -1 is returned. 6352 * If the character has a numeric value that cannot be represented as a 6353 * nonnegative integer (for example, a fractional value), then -2 6354 * is returned. 6355 * 6356 * @param codePoint the character (Unicode code point) to be converted. 6357 * @return the numeric value of the character, as a nonnegative <code>int</code> 6358 * value; -2 if the character has a numeric value that is not a 6359 * nonnegative integer; -1 if the character has no numeric value. 6360 * @see Character#forDigit(int, int) 6361 * @see Character#isDigit(int) 6362 * @since 1.5 6363 */ 6364 public static int getNumericValue(int codePoint) { 6365 return CharacterData.of(codePoint).getNumericValue(codePoint); 6366 } 6367 6368 /** 6369 * Determines if the specified character is ISO-LATIN-1 white space. 6370 * This method returns <code>true</code> for the following five 6371 * characters only: 6372 * <table> 6373 * <tr><td><code>'\t'</code></td> <td><code>U+0009</code></td> 6374 * <td><code>HORIZONTAL TABULATION</code></td></tr> 6375 * <tr><td><code>'\n'</code></td> <td><code>U+000A</code></td> 6376 * <td><code>NEW LINE</code></td></tr> 6377 * <tr><td><code>'\f'</code></td> <td><code>U+000C</code></td> 6378 * <td><code>FORM FEED</code></td></tr> 6379 * <tr><td><code>'\r'</code></td> <td><code>U+000D</code></td> 6380 * <td><code>CARRIAGE RETURN</code></td></tr> 6381 * <tr><td><code>' '</code></td> <td><code>U+0020</code></td> 6382 * <td><code>SPACE</code></td></tr> 6383 * </table> 6384 * 6385 * @param ch the character to be tested. 6386 * @return <code>true</code> if the character is ISO-LATIN-1 white 6387 * space; <code>false</code> otherwise. 6388 * @see Character#isSpaceChar(char) 6389 * @see Character#isWhitespace(char) 6390 * @deprecated Replaced by isWhitespace(char). 6391 */ 6392 @Deprecated 6393 public static boolean isSpace(char ch) { 6394 return (ch <= 0x0020) && 6395 (((((1L << 0x0009) | 6396 (1L << 0x000A) | 6397 (1L << 0x000C) | 6398 (1L << 0x000D) | 6399 (1L << 0x0020)) >> ch) & 1L) != 0); 6400 } 6401 6402 6403 /** 6404 * Determines if the specified character is a Unicode space character. 6405 * A character is considered to be a space character if and only if 6406 * it is specified to be a space character by the Unicode standard. This 6407 * method returns true if the character's general category type is any of 6408 * the following: 6409 * <ul> 6410 * <li> <code>SPACE_SEPARATOR</code> 6411 * <li> <code>LINE_SEPARATOR</code> 6412 * <li> <code>PARAGRAPH_SEPARATOR</code> 6413 * </ul> 6414 * 6415 * <p><b>Note:</b> This method cannot handle <a 6416 * href="#supplementary"> supplementary characters</a>. To support 6417 * all Unicode characters, including supplementary characters, use 6418 * the {@link #isSpaceChar(int)} method. 6419 * 6420 * @param ch the character to be tested. 6421 * @return <code>true</code> if the character is a space character; 6422 * <code>false</code> otherwise. 6423 * @see Character#isWhitespace(char) 6424 * @since 1.1 6425 */ 6426 public static boolean isSpaceChar(char ch) { 6427 return isSpaceChar((int)ch); 6428 } 6429 6430 /** 6431 * Determines if the specified character (Unicode code point) is a 6432 * Unicode space character. A character is considered to be a 6433 * space character if and only if it is specified to be a space 6434 * character by the Unicode standard. This method returns true if 6435 * the character's general category type is any of the following: 6436 * 6437 * <ul> 6438 * <li> {@link #SPACE_SEPARATOR} 6439 * <li> {@link #LINE_SEPARATOR} 6440 * <li> {@link #PARAGRAPH_SEPARATOR} 6441 * </ul> 6442 * 6443 * @param codePoint the character (Unicode code point) to be tested. 6444 * @return <code>true</code> if the character is a space character; 6445 * <code>false</code> otherwise. 6446 * @see Character#isWhitespace(int) 6447 * @since 1.5 6448 */ 6449 public static boolean isSpaceChar(int codePoint) { 6450 return ((((1 << Character.SPACE_SEPARATOR) | 6451 (1 << Character.LINE_SEPARATOR) | 6452 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 6453 != 0; 6454 } 6455 6456 /** 6457 * Determines if the specified character is white space according to Java. 6458 * A character is a Java whitespace character if and only if it satisfies 6459 * one of the following criteria: 6460 * <ul> 6461 * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>, 6462 * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>) 6463 * but is not also a non-breaking space (<code>'\u00A0'</code>, 6464 * <code>'\u2007'</code>, <code>'\u202F'</code>). 6465 * <li> It is <code>'\t'</code>, U+0009 HORIZONTAL TABULATION. 6466 * <li> It is <code>'\n'</code>, U+000A LINE FEED. 6467 * <li> It is <code>'\u000B'</code>, U+000B VERTICAL TABULATION. 6468 * <li> It is <code>'\f'</code>, U+000C FORM FEED. 6469 * <li> It is <code>'\r'</code>, U+000D CARRIAGE RETURN. 6470 * <li> It is <code>'\u001C'</code>, U+001C FILE SEPARATOR. 6471 * <li> It is <code>'\u001D'</code>, U+001D GROUP SEPARATOR. 6472 * <li> It is <code>'\u001E'</code>, U+001E RECORD SEPARATOR. 6473 * <li> It is <code>'\u001F'</code>, U+001F UNIT SEPARATOR. 6474 * </ul> 6475 * 6476 * <p><b>Note:</b> This method cannot handle <a 6477 * href="#supplementary"> supplementary characters</a>. To support 6478 * all Unicode characters, including supplementary characters, use 6479 * the {@link #isWhitespace(int)} method. 6480 * 6481 * @param ch the character to be tested. 6482 * @return <code>true</code> if the character is a Java whitespace 6483 * character; <code>false</code> otherwise. 6484 * @see Character#isSpaceChar(char) 6485 * @since 1.1 6486 */ 6487 public static boolean isWhitespace(char ch) { 6488 return isWhitespace((int)ch); 6489 } 6490 6491 /** 6492 * Determines if the specified character (Unicode code point) is 6493 * white space according to Java. A character is a Java 6494 * whitespace character if and only if it satisfies one of the 6495 * following criteria: 6496 * <ul> 6497 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 6498 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 6499 * but is not also a non-breaking space (<code>'\u00A0'</code>, 6500 * <code>'\u2007'</code>, <code>'\u202F'</code>). 6501 * <li> It is <code>'\t'</code>, U+0009 HORIZONTAL TABULATION. 6502 * <li> It is <code>'\n'</code>, U+000A LINE FEED. 6503 * <li> It is <code>'\u000B'</code>, U+000B VERTICAL TABULATION. 6504 * <li> It is <code>'\f'</code>, U+000C FORM FEED. 6505 * <li> It is <code>'\r'</code>, U+000D CARRIAGE RETURN. 6506 * <li> It is <code>'\u001C'</code>, U+001C FILE SEPARATOR. 6507 * <li> It is <code>'\u001D'</code>, U+001D GROUP SEPARATOR. 6508 * <li> It is <code>'\u001E'</code>, U+001E RECORD SEPARATOR. 6509 * <li> It is <code>'\u001F'</code>, U+001F UNIT SEPARATOR. 6510 * </ul> 6511 * <p> 6512 * 6513 * @param codePoint the character (Unicode code point) to be tested. 6514 * @return <code>true</code> if the character is a Java whitespace 6515 * character; <code>false</code> otherwise. 6516 * @see Character#isSpaceChar(int) 6517 * @since 1.5 6518 */ 6519 public static boolean isWhitespace(int codePoint) { 6520 return CharacterData.of(codePoint).isWhitespace(codePoint); 6521 } 6522 6523 /** 6524 * Determines if the specified character is an ISO control 6525 * character. A character is considered to be an ISO control 6526 * character if its code is in the range <code>'\u0000'</code> 6527 * through <code>'\u001F'</code> or in the range 6528 * <code>'\u007F'</code> through <code>'\u009F'</code>. 6529 * 6530 * <p><b>Note:</b> This method cannot handle <a 6531 * href="#supplementary"> supplementary characters</a>. To support 6532 * all Unicode characters, including supplementary characters, use 6533 * the {@link #isISOControl(int)} method. 6534 * 6535 * @param ch the character to be tested. 6536 * @return <code>true</code> if the character is an ISO control character; 6537 * <code>false</code> otherwise. 6538 * 6539 * @see Character#isSpaceChar(char) 6540 * @see Character#isWhitespace(char) 6541 * @since 1.1 6542 */ 6543 public static boolean isISOControl(char ch) { 6544 return isISOControl((int)ch); 6545 } 6546 6547 /** 6548 * Determines if the referenced character (Unicode code point) is an ISO control 6549 * character. A character is considered to be an ISO control 6550 * character if its code is in the range <code>'\u0000'</code> 6551 * through <code>'\u001F'</code> or in the range 6552 * <code>'\u007F'</code> through <code>'\u009F'</code>. 6553 * 6554 * @param codePoint the character (Unicode code point) to be tested. 6555 * @return <code>true</code> if the character is an ISO control character; 6556 * <code>false</code> otherwise. 6557 * @see Character#isSpaceChar(int) 6558 * @see Character#isWhitespace(int) 6559 * @since 1.5 6560 */ 6561 public static boolean isISOControl(int codePoint) { 6562 // Optimized form of: 6563 // (codePoint >= 0x00 && codePoint <= 0x1F) || 6564 // (codePoint >= 0x7F && codePoint <= 0x9F); 6565 return codePoint <= 0x9F && 6566 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 6567 } 6568 6569 /** 6570 * Returns a value indicating a character's general category. 6571 * 6572 * <p><b>Note:</b> This method cannot handle <a 6573 * href="#supplementary"> supplementary characters</a>. To support 6574 * all Unicode characters, including supplementary characters, use 6575 * the {@link #getType(int)} method. 6576 * 6577 * @param ch the character to be tested. 6578 * @return a value of type <code>int</code> representing the 6579 * character's general category. 6580 * @see Character#COMBINING_SPACING_MARK 6581 * @see Character#CONNECTOR_PUNCTUATION 6582 * @see Character#CONTROL 6583 * @see Character#CURRENCY_SYMBOL 6584 * @see Character#DASH_PUNCTUATION 6585 * @see Character#DECIMAL_DIGIT_NUMBER 6586 * @see Character#ENCLOSING_MARK 6587 * @see Character#END_PUNCTUATION 6588 * @see Character#FINAL_QUOTE_PUNCTUATION 6589 * @see Character#FORMAT 6590 * @see Character#INITIAL_QUOTE_PUNCTUATION 6591 * @see Character#LETTER_NUMBER 6592 * @see Character#LINE_SEPARATOR 6593 * @see Character#LOWERCASE_LETTER 6594 * @see Character#MATH_SYMBOL 6595 * @see Character#MODIFIER_LETTER 6596 * @see Character#MODIFIER_SYMBOL 6597 * @see Character#NON_SPACING_MARK 6598 * @see Character#OTHER_LETTER 6599 * @see Character#OTHER_NUMBER 6600 * @see Character#OTHER_PUNCTUATION 6601 * @see Character#OTHER_SYMBOL 6602 * @see Character#PARAGRAPH_SEPARATOR 6603 * @see Character#PRIVATE_USE 6604 * @see Character#SPACE_SEPARATOR 6605 * @see Character#START_PUNCTUATION 6606 * @see Character#SURROGATE 6607 * @see Character#TITLECASE_LETTER 6608 * @see Character#UNASSIGNED 6609 * @see Character#UPPERCASE_LETTER 6610 * @since 1.1 6611 */ 6612 public static int getType(char ch) { 6613 return getType((int)ch); 6614 } 6615 6616 /** 6617 * Returns a value indicating a character's general category. 6618 * 6619 * @param codePoint the character (Unicode code point) to be tested. 6620 * @return a value of type <code>int</code> representing the 6621 * character's general category. 6622 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 6623 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 6624 * @see Character#CONTROL CONTROL 6625 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 6626 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 6627 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 6628 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 6629 * @see Character#END_PUNCTUATION END_PUNCTUATION 6630 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 6631 * @see Character#FORMAT FORMAT 6632 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 6633 * @see Character#LETTER_NUMBER LETTER_NUMBER 6634 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 6635 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 6636 * @see Character#MATH_SYMBOL MATH_SYMBOL 6637 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 6638 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 6639 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 6640 * @see Character#OTHER_LETTER OTHER_LETTER 6641 * @see Character#OTHER_NUMBER OTHER_NUMBER 6642 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 6643 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 6644 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 6645 * @see Character#PRIVATE_USE PRIVATE_USE 6646 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 6647 * @see Character#START_PUNCTUATION START_PUNCTUATION 6648 * @see Character#SURROGATE SURROGATE 6649 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 6650 * @see Character#UNASSIGNED UNASSIGNED 6651 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 6652 * @since 1.5 6653 */ 6654 public static int getType(int codePoint) { 6655 return CharacterData.of(codePoint).getType(codePoint); 6656 } 6657 6658 /** 6659 * Determines the character representation for a specific digit in 6660 * the specified radix. If the value of <code>radix</code> is not a 6661 * valid radix, or the value of <code>digit</code> is not a valid 6662 * digit in the specified radix, the null character 6663 * (<code>'\u0000'</code>) is returned. 6664 * <p> 6665 * The <code>radix</code> argument is valid if it is greater than or 6666 * equal to <code>MIN_RADIX</code> and less than or equal to 6667 * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if 6668 * <code>0 <=digit < radix</code>. 6669 * <p> 6670 * If the digit is less than 10, then 6671 * <code>'0' + digit</code> is returned. Otherwise, the value 6672 * <code>'a' + digit - 10</code> is returned. 6673 * 6674 * @param digit the number to convert to a character. 6675 * @param radix the radix. 6676 * @return the <code>char</code> representation of the specified digit 6677 * in the specified radix. 6678 * @see Character#MIN_RADIX 6679 * @see Character#MAX_RADIX 6680 * @see Character#digit(char, int) 6681 */ 6682 public static char forDigit(int digit, int radix) { 6683 if ((digit >= radix) || (digit < 0)) { 6684 return '\0'; 6685 } 6686 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 6687 return '\0'; 6688 } 6689 if (digit < 10) { 6690 return (char)('0' + digit); 6691 } 6692 return (char)('a' - 10 + digit); 6693 } 6694 6695 /** 6696 * Returns the Unicode directionality property for the given 6697 * character. Character directionality is used to calculate the 6698 * visual ordering of text. The directionality value of undefined 6699 * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>. 6700 * 6701 * <p><b>Note:</b> This method cannot handle <a 6702 * href="#supplementary"> supplementary characters</a>. To support 6703 * all Unicode characters, including supplementary characters, use 6704 * the {@link #getDirectionality(int)} method. 6705 * 6706 * @param ch <code>char</code> for which the directionality property 6707 * is requested. 6708 * @return the directionality property of the <code>char</code> value. 6709 * 6710 * @see Character#DIRECTIONALITY_UNDEFINED 6711 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 6712 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 6713 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6714 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 6715 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6716 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6717 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 6718 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6719 * @see Character#DIRECTIONALITY_NONSPACING_MARK 6720 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 6721 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 6722 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 6723 * @see Character#DIRECTIONALITY_WHITESPACE 6724 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 6725 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6726 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6727 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6728 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6729 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6730 * @since 1.4 6731 */ 6732 public static byte getDirectionality(char ch) { 6733 return getDirectionality((int)ch); 6734 } 6735 6736 /** 6737 * Returns the Unicode directionality property for the given 6738 * character (Unicode code point). Character directionality is 6739 * used to calculate the visual ordering of text. The 6740 * directionality value of undefined character is {@link 6741 * #DIRECTIONALITY_UNDEFINED}. 6742 * 6743 * @param codePoint the character (Unicode code point) for which 6744 * the directionality property is requested. 6745 * @return the directionality property of the character. 6746 * 6747 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 6748 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 6749 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 6750 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 6751 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 6752 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 6753 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 6754 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 6755 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 6756 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 6757 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 6758 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 6759 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 6760 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 6761 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 6762 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 6763 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 6764 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 6765 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 6766 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 6767 * @since 1.5 6768 */ 6769 public static byte getDirectionality(int codePoint) { 6770 return CharacterData.of(codePoint).getDirectionality(codePoint); 6771 } 6772 6773 /** 6774 * Determines whether the character is mirrored according to the 6775 * Unicode specification. Mirrored characters should have their 6776 * glyphs horizontally mirrored when displayed in text that is 6777 * right-to-left. For example, <code>'\u0028'</code> LEFT 6778 * PARENTHESIS is semantically defined to be an <i>opening 6779 * parenthesis</i>. This will appear as a "(" in text that is 6780 * left-to-right but as a ")" in text that is right-to-left. 6781 * 6782 * <p><b>Note:</b> This method cannot handle <a 6783 * href="#supplementary"> supplementary characters</a>. To support 6784 * all Unicode characters, including supplementary characters, use 6785 * the {@link #isMirrored(int)} method. 6786 * 6787 * @param ch <code>char</code> for which the mirrored property is requested 6788 * @return <code>true</code> if the char is mirrored, <code>false</code> 6789 * if the <code>char</code> is not mirrored or is not defined. 6790 * @since 1.4 6791 */ 6792 public static boolean isMirrored(char ch) { 6793 return isMirrored((int)ch); 6794 } 6795 6796 /** 6797 * Determines whether the specified character (Unicode code point) 6798 * is mirrored according to the Unicode specification. Mirrored 6799 * characters should have their glyphs horizontally mirrored when 6800 * displayed in text that is right-to-left. For example, 6801 * <code>'\u0028'</code> LEFT PARENTHESIS is semantically 6802 * defined to be an <i>opening parenthesis</i>. This will appear 6803 * as a "(" in text that is left-to-right but as a ")" in text 6804 * that is right-to-left. 6805 * 6806 * @param codePoint the character (Unicode code point) to be tested. 6807 * @return <code>true</code> if the character is mirrored, <code>false</code> 6808 * if the character is not mirrored or is not defined. 6809 * @since 1.5 6810 */ 6811 public static boolean isMirrored(int codePoint) { 6812 return CharacterData.of(codePoint).isMirrored(codePoint); 6813 } 6814 6815 /** 6816 * Compares two <code>Character</code> objects numerically. 6817 * 6818 * @param anotherCharacter the <code>Character</code> to be compared. 6819 6820 * @return the value <code>0</code> if the argument <code>Character</code> 6821 * is equal to this <code>Character</code>; a value less than 6822 * <code>0</code> if this <code>Character</code> is numerically less 6823 * than the <code>Character</code> argument; and a value greater than 6824 * <code>0</code> if this <code>Character</code> is numerically greater 6825 * than the <code>Character</code> argument (unsigned comparison). 6826 * Note that this is strictly a numerical comparison; it is not 6827 * locale-dependent. 6828 * @since 1.2 6829 */ 6830 public int compareTo(Character anotherCharacter) { 6831 return compare(this.value, anotherCharacter.value); 6832 } 6833 6834 /** 6835 * Compares two {@code char} values numerically. 6836 * The value returned is identical to what would be returned by: 6837 * <pre> 6838 * Character.valueOf(x).compareTo(Character.valueOf(y)) 6839 * </pre> 6840 * 6841 * @param x the first {@code char} to compare 6842 * @param y the second {@code char} to compare 6843 * @return the value {@code 0} if {@code x == y}; 6844 * a value less than {@code 0} if {@code x < y}; and 6845 * a value greater than {@code 0} if {@code x > y} 6846 * @since 1.7 6847 */ 6848 public static int compare(char x, char y) { 6849 return x - y; 6850 } 6851 6852 /** 6853 * Converts the character (Unicode code point) argument to uppercase using 6854 * information from the UnicodeData file. 6855 * <p> 6856 * 6857 * @param codePoint the character (Unicode code point) to be converted. 6858 * @return either the uppercase equivalent of the character, if 6859 * any, or an error flag (<code>Character.ERROR</code>) 6860 * that indicates that a 1:M <code>char</code> mapping exists. 6861 * @see Character#isLowerCase(char) 6862 * @see Character#isUpperCase(char) 6863 * @see Character#toLowerCase(char) 6864 * @see Character#toTitleCase(char) 6865 * @since 1.4 6866 */ 6867 static int toUpperCaseEx(int codePoint) { 6868 assert isValidCodePoint(codePoint); 6869 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 6870 } 6871 6872 /** 6873 * Converts the character (Unicode code point) argument to uppercase using case 6874 * mapping information from the SpecialCasing file in the Unicode 6875 * specification. If a character has no explicit uppercase 6876 * mapping, then the <code>char</code> itself is returned in the 6877 * <code>char[]</code>. 6878 * 6879 * @param codePoint the character (Unicode code point) to be converted. 6880 * @return a <code>char[]</code> with the uppercased character. 6881 * @since 1.4 6882 */ 6883 static char[] toUpperCaseCharArray(int codePoint) { 6884 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP. 6885 assert isBmpCodePoint(codePoint); 6886 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 6887 } 6888 6889 /** 6890 * The number of bits used to represent a <tt>char</tt> value in unsigned 6891 * binary form, constant {@code 16}. 6892 * 6893 * @since 1.5 6894 */ 6895 public static final int SIZE = 16; 6896 6897 /** 6898 * Returns the value obtained by reversing the order of the bytes in the 6899 * specified <tt>char</tt> value. 6900 * 6901 * @return the value obtained by reversing (or, equivalently, swapping) 6902 * the bytes in the specified <tt>char</tt> value. 6903 * @since 1.5 6904 */ 6905 public static char reverseBytes(char ch) { 6906 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 6907 } 6908 6909 /** 6910 * Returns the Unicode name of the specified character 6911 * <code>codePoint</code>, or null if the code point is 6912 * {@link #UNASSIGNED unassigned}. 6913 * <p> 6914 * Note: if the specified character is not assigned a name by 6915 * the <i>UnicodeData</i> file (part of the Unicode Character 6916 * Database maintained by the Unicode Consortium), the returned 6917 * name is the same as the result of expression 6918 * 6919 * <blockquote><code> 6920 * Character.UnicodeBlock.of(codePoint) 6921 * .toString() 6922 * .replace('_', ' ') 6923 * + " " 6924 * + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6925 * 6926 * </code></blockquote> 6927 * 6928 * @param codePoint the character (Unicode code point) 6929 * 6930 * @return the Unicode name of the specified character, or null if 6931 * the code point is unassigned. 6932 * 6933 * @exception IllegalArgumentException if the specified 6934 * <code>codePoint</code> is not a valid Unicode 6935 * code point. 6936 * 6937 * @since 1.7 6938 */ 6939 public static String getName(int codePoint) { 6940 if (!isValidCodePoint(codePoint)) { 6941 throw new IllegalArgumentException(); 6942 } 6943 String name = CharacterName.get(codePoint); 6944 if (name != null) 6945 return name; 6946 if (getType(codePoint) == UNASSIGNED) 6947 return null; 6948 UnicodeBlock block = UnicodeBlock.of(codePoint); 6949 if (block != null) 6950 return block.toString().replace('_', ' ') + " " 6951 + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6952 // should never come here 6953 return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH); 6954 } 6955 }