23 * questions.
24 */
25
26 package java.lang;
27
28 import java.util.Arrays;
29 import java.util.Map;
30 import java.util.HashMap;
31 import java.util.Locale;
32
33 /**
34 * The {@code Character} class wraps a value of the primitive
35 * type {@code char} in an object. An object of type
36 * {@code Character} contains a single field whose type is
37 * {@code char}.
38 * <p>
39 * In addition, this class provides several methods for determining
40 * a character's category (lowercase letter, digit, etc.) and for converting
41 * characters from uppercase to lowercase and vice versa.
42 * <p>
43 * Character information is based on the Unicode Standard, version 6.2.0.
44 * <p>
45 * The methods and data of class {@code Character} are defined by
46 * the information in the <i>UnicodeData</i> file that is part of the
47 * Unicode Character Database maintained by the Unicode
48 * Consortium. This file specifies various properties including name
49 * and general category for every defined Unicode code point or
50 * character range.
51 * <p>
52 * The file and its description are available from the Unicode Consortium at:
53 * <ul>
54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
55 * </ul>
56 *
57 * <h3><a name="unicode">Unicode Character Representations</a></h3>
58 *
59 * <p>The {@code char} data type (and therefore the value that a
60 * {@code Character} object encapsulates) are based on the
61 * original Unicode specification, which defined characters as
62 * fixed-width 16-bit entities. The Unicode Standard has since been
63 * changed to allow for characters whose representation requires more
473
474 /**
475 * Strong bidirectional character type "RLE" in the Unicode specification.
476 * @since 1.4
477 */
478 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
479
480 /**
481 * Strong bidirectional character type "RLO" in the Unicode specification.
482 * @since 1.4
483 */
484 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
485
486 /**
487 * Weak bidirectional character type "PDF" in the Unicode specification.
488 * @since 1.4
489 */
490 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
491
492 /**
493 * The minimum value of a
494 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
495 * Unicode high-surrogate code unit</a>
496 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
497 * A high-surrogate is also known as a <i>leading-surrogate</i>.
498 *
499 * @since 1.5
500 */
501 public static final char MIN_HIGH_SURROGATE = '\uD800';
502
503 /**
504 * The maximum value of a
505 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
506 * Unicode high-surrogate code unit</a>
507 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
508 * A high-surrogate is also known as a <i>leading-surrogate</i>.
509 *
510 * @since 1.5
511 */
512 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
2544 public static final UnicodeBlock TAKRI =
2545 new UnicodeBlock("TAKRI");
2546
2547 /**
2548 * Constant for the "Miao" Unicode character block.
2549 * @since 1.8
2550 */
2551 public static final UnicodeBlock MIAO =
2552 new UnicodeBlock("MIAO");
2553
2554 /**
2555 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2556 * character block.
2557 * @since 1.8
2558 */
2559 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2560 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2561 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2562 "ARABICMATHEMATICALALPHABETICSYMBOLS");
2563
2564 private static final int blockStarts[] = {
2565 0x0000, // 0000..007F; Basic Latin
2566 0x0080, // 0080..00FF; Latin-1 Supplement
2567 0x0100, // 0100..017F; Latin Extended-A
2568 0x0180, // 0180..024F; Latin Extended-B
2569 0x0250, // 0250..02AF; IPA Extensions
2570 0x02B0, // 02B0..02FF; Spacing Modifier Letters
2571 0x0300, // 0300..036F; Combining Diacritical Marks
2572 0x0370, // 0370..03FF; Greek and Coptic
2573 0x0400, // 0400..04FF; Cyrillic
2574 0x0500, // 0500..052F; Cyrillic Supplement
2575 0x0530, // 0530..058F; Armenian
2576 0x0590, // 0590..05FF; Hebrew
2577 0x0600, // 0600..06FF; Arabic
2578 0x0700, // 0700..074F; Syriac
2579 0x0750, // 0750..077F; Arabic Supplement
2580 0x0780, // 0780..07BF; Thaana
2581 0x07C0, // 07C0..07FF; NKo
2582 0x0800, // 0800..083F; Samaritan
2583 0x0840, // 0840..085F; Mandaic
2601 0x1100, // 1100..11FF; Hangul Jamo
2602 0x1200, // 1200..137F; Ethiopic
2603 0x1380, // 1380..139F; Ethiopic Supplement
2604 0x13A0, // 13A0..13FF; Cherokee
2605 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
2606 0x1680, // 1680..169F; Ogham
2607 0x16A0, // 16A0..16FF; Runic
2608 0x1700, // 1700..171F; Tagalog
2609 0x1720, // 1720..173F; Hanunoo
2610 0x1740, // 1740..175F; Buhid
2611 0x1760, // 1760..177F; Tagbanwa
2612 0x1780, // 1780..17FF; Khmer
2613 0x1800, // 1800..18AF; Mongolian
2614 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2615 0x1900, // 1900..194F; Limbu
2616 0x1950, // 1950..197F; Tai Le
2617 0x1980, // 1980..19DF; New Tai Lue
2618 0x19E0, // 19E0..19FF; Khmer Symbols
2619 0x1A00, // 1A00..1A1F; Buginese
2620 0x1A20, // 1A20..1AAF; Tai Tham
2621 0x1AB0, // unassigned
2622 0x1B00, // 1B00..1B7F; Balinese
2623 0x1B80, // 1B80..1BBF; Sundanese
2624 0x1BC0, // 1BC0..1BFF; Batak
2625 0x1C00, // 1C00..1C4F; Lepcha
2626 0x1C50, // 1C50..1C7F; Ol Chiki
2627 0x1C80, // unassigned
2628 0x1CC0, // 1CC0..1CCF; Sundanese Supplement
2629 0x1CD0, // 1CD0..1CFF; Vedic Extensions
2630 0x1D00, // 1D00..1D7F; Phonetic Extensions
2631 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
2632 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
2633 0x1E00, // 1E00..1EFF; Latin Extended Additional
2634 0x1F00, // 1F00..1FFF; Greek Extended
2635 0x2000, // 2000..206F; General Punctuation
2636 0x2070, // 2070..209F; Superscripts and Subscripts
2637 0x20A0, // 20A0..20CF; Currency Symbols
2638 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
2639 0x2100, // 2100..214F; Letterlike Symbols
2640 0x2150, // 2150..218F; Number Forms
2641 0x2190, // 2190..21FF; Arrows
2682 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
2683 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
2684 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
2685 0xA000, // A000..A48F; Yi Syllables
2686 0xA490, // A490..A4CF; Yi Radicals
2687 0xA4D0, // A4D0..A4FF; Lisu
2688 0xA500, // A500..A63F; Vai
2689 0xA640, // A640..A69F; Cyrillic Extended-B
2690 0xA6A0, // A6A0..A6FF; Bamum
2691 0xA700, // A700..A71F; Modifier Tone Letters
2692 0xA720, // A720..A7FF; Latin Extended-D
2693 0xA800, // A800..A82F; Syloti Nagri
2694 0xA830, // A830..A83F; Common Indic Number Forms
2695 0xA840, // A840..A87F; Phags-pa
2696 0xA880, // A880..A8DF; Saurashtra
2697 0xA8E0, // A8E0..A8FF; Devanagari Extended
2698 0xA900, // A900..A92F; Kayah Li
2699 0xA930, // A930..A95F; Rejang
2700 0xA960, // A960..A97F; Hangul Jamo Extended-A
2701 0xA980, // A980..A9DF; Javanese
2702 0xA9E0, // unassigned
2703 0xAA00, // AA00..AA5F; Cham
2704 0xAA60, // AA60..AA7F; Myanmar Extended-A
2705 0xAA80, // AA80..AADF; Tai Viet
2706 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
2707 0xAB00, // AB00..AB2F; Ethiopic Extended-A
2708 0xAB30, // unassigned
2709 0xABC0, // ABC0..ABFF; Meetei Mayek
2710 0xAC00, // AC00..D7AF; Hangul Syllables
2711 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
2712 0xD800, // D800..DB7F; High Surrogates
2713 0xDB80, // DB80..DBFF; High Private Use Surrogates
2714 0xDC00, // DC00..DFFF; Low Surrogates
2715 0xE000, // E000..F8FF; Private Use Area
2716 0xF900, // F900..FAFF; CJK Compatibility Ideographs
2717 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
2718 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
2719 0xFE00, // FE00..FE0F; Variation Selectors
2720 0xFE10, // FE10..FE1F; Vertical Forms
2721 0xFE20, // FE20..FE2F; Combining Half Marks
2722 0xFE30, // FE30..FE4F; CJK Compatibility Forms
2723 0xFE50, // FE50..FE6F; Small Form Variants
2724 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
2725 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
2726 0xFFF0, // FFF0..FFFF; Specials
2727 0x10000, // 10000..1007F; Linear B Syllabary
2728 0x10080, // 10080..100FF; Linear B Ideograms
2729 0x10100, // 10100..1013F; Aegean Numbers
2730 0x10140, // 10140..1018F; Ancient Greek Numbers
2731 0x10190, // 10190..101CF; Ancient Symbols
2732 0x101D0, // 101D0..101FF; Phaistos Disc
2733 0x10200, // unassigned
2734 0x10280, // 10280..1029F; Lycian
2735 0x102A0, // 102A0..102DF; Carian
2736 0x102E0, // unassigned
2737 0x10300, // 10300..1032F; Old Italic
2738 0x10330, // 10330..1034F; Gothic
2739 0x10350, // unassigned
2740 0x10380, // 10380..1039F; Ugaritic
2741 0x103A0, // 103A0..103DF; Old Persian
2742 0x103E0, // unassigned
2743 0x10400, // 10400..1044F; Deseret
2744 0x10450, // 10450..1047F; Shavian
2745 0x10480, // 10480..104AF; Osmanya
2746 0x104B0, // unassigned
2747 0x10800, // 10800..1083F; Cypriot Syllabary
2748 0x10840, // 10840..1085F; Imperial Aramaic
2749 0x10860, // unassigned
2750 0x10900, // 10900..1091F; Phoenician
2751 0x10920, // 10920..1093F; Lydian
2752 0x10940, // unassigned
2753 0x10980, // 10980..1099F; Meroitic Hieroglyphs
2754 0x109A0, // 109A0..109FF; Meroitic Cursive
2755 0x10A00, // 10A00..10A5F; Kharoshthi
2756 0x10A60, // 10A60..10A7F; Old South Arabian
2757 0x10A80, // unassigned
2758 0x10B00, // 10B00..10B3F; Avestan
2759 0x10B40, // 10B40..10B5F; Inscriptional Parthian
2760 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
2761 0x10B80, // unassigned
2762 0x10C00, // 10C00..10C4F; Old Turkic
2763 0x10C50, // unassigned
2764 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
2765 0x10E80, // unassigned
2766 0x11000, // 11000..1107F; Brahmi
2767 0x11080, // 11080..110CF; Kaithi
2768 0x110D0, // 110D0..110FF; Sora Sompeng
2769 0x11100, // 11100..1114F; Chakma
2770 0x11150, // unassigned
2771 0x11180, // 11180..111DF; Sharada
2772 0x111E0, // unassigned
2773 0x11680, // 11680..116CF; Takri
2774 0x116D0, // unassigned
2775 0x12000, // 12000..123FF; Cuneiform
2776 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
2777 0x12480, // unassigned
2778 0x13000, // 13000..1342F; Egyptian Hieroglyphs
2779 0x13430, // unassigned
2780 0x16800, // 16800..16A3F; Bamum Supplement
2781 0x16A40, // unassigned
2782 0x16F00, // 16F00..16F9F; Miao
2783 0x16FA0, // unassigned
2784 0x1B000, // 1B000..1B0FF; Kana Supplement
2785 0x1B100, // unassigned
2786 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
2787 0x1D100, // 1D100..1D1FF; Musical Symbols
2788 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
2789 0x1D250, // unassigned
2790 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
2791 0x1D360, // 1D360..1D37F; Counting Rod Numerals
2792 0x1D380, // unassigned
2793 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2794 0x1D800, // unassigned
2795 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2796 0x1EF00, // unassigned
2797 0x1F000, // 1F000..1F02F; Mahjong Tiles
2798 0x1F030, // 1F030..1F09F; Domino Tiles
2799 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
2800 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2801 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
2802 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2803 0x1F600, // 1F600..1F64F; Emoticons
2804 0x1F650, // unassigned
2805 0x1F680, // 1F680..1F6FF; Transport And Map Symbols
2806 0x1F700, // 1F700..1F77F; Alchemical Symbols
2807 0x1F780, // unassigned
2808 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
2809 0x2A6E0, // unassigned
2810 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
2811 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
2812 0x2B820, // unassigned
2813 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2814 0x2FA20, // unassigned
2815 0xE0000, // E0000..E007F; Tags
2816 0xE0080, // unassigned
2817 0xE0100, // E0100..E01EF; Variation Selectors Supplement
2818 0xE01F0, // unassigned
2819 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
2820 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B
2821 };
2822
2823 private static final UnicodeBlock[] blocks = {
2824 BASIC_LATIN,
2825 LATIN_1_SUPPLEMENT,
2826 LATIN_EXTENDED_A,
2827 LATIN_EXTENDED_B,
2860 HANGUL_JAMO,
2861 ETHIOPIC,
2862 ETHIOPIC_SUPPLEMENT,
2863 CHEROKEE,
2864 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2865 OGHAM,
2866 RUNIC,
2867 TAGALOG,
2868 HANUNOO,
2869 BUHID,
2870 TAGBANWA,
2871 KHMER,
2872 MONGOLIAN,
2873 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2874 LIMBU,
2875 TAI_LE,
2876 NEW_TAI_LUE,
2877 KHMER_SYMBOLS,
2878 BUGINESE,
2879 TAI_THAM,
2880 null,
2881 BALINESE,
2882 SUNDANESE,
2883 BATAK,
2884 LEPCHA,
2885 OL_CHIKI,
2886 null,
2887 SUNDANESE_SUPPLEMENT,
2888 VEDIC_EXTENSIONS,
2889 PHONETIC_EXTENSIONS,
2890 PHONETIC_EXTENSIONS_SUPPLEMENT,
2891 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2892 LATIN_EXTENDED_ADDITIONAL,
2893 GREEK_EXTENDED,
2894 GENERAL_PUNCTUATION,
2895 SUPERSCRIPTS_AND_SUBSCRIPTS,
2896 CURRENCY_SYMBOLS,
2897 COMBINING_MARKS_FOR_SYMBOLS,
2898 LETTERLIKE_SYMBOLS,
2899 NUMBER_FORMS,
2900 ARROWS,
2941 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2942 YIJING_HEXAGRAM_SYMBOLS,
2943 CJK_UNIFIED_IDEOGRAPHS,
2944 YI_SYLLABLES,
2945 YI_RADICALS,
2946 LISU,
2947 VAI,
2948 CYRILLIC_EXTENDED_B,
2949 BAMUM,
2950 MODIFIER_TONE_LETTERS,
2951 LATIN_EXTENDED_D,
2952 SYLOTI_NAGRI,
2953 COMMON_INDIC_NUMBER_FORMS,
2954 PHAGS_PA,
2955 SAURASHTRA,
2956 DEVANAGARI_EXTENDED,
2957 KAYAH_LI,
2958 REJANG,
2959 HANGUL_JAMO_EXTENDED_A,
2960 JAVANESE,
2961 null,
2962 CHAM,
2963 MYANMAR_EXTENDED_A,
2964 TAI_VIET,
2965 MEETEI_MAYEK_EXTENSIONS,
2966 ETHIOPIC_EXTENDED_A,
2967 null,
2968 MEETEI_MAYEK,
2969 HANGUL_SYLLABLES,
2970 HANGUL_JAMO_EXTENDED_B,
2971 HIGH_SURROGATES,
2972 HIGH_PRIVATE_USE_SURROGATES,
2973 LOW_SURROGATES,
2974 PRIVATE_USE_AREA,
2975 CJK_COMPATIBILITY_IDEOGRAPHS,
2976 ALPHABETIC_PRESENTATION_FORMS,
2977 ARABIC_PRESENTATION_FORMS_A,
2978 VARIATION_SELECTORS,
2979 VERTICAL_FORMS,
2980 COMBINING_HALF_MARKS,
2981 CJK_COMPATIBILITY_FORMS,
2982 SMALL_FORM_VARIANTS,
2983 ARABIC_PRESENTATION_FORMS_B,
2984 HALFWIDTH_AND_FULLWIDTH_FORMS,
2985 SPECIALS,
2986 LINEAR_B_SYLLABARY,
2987 LINEAR_B_IDEOGRAMS,
2988 AEGEAN_NUMBERS,
2989 ANCIENT_GREEK_NUMBERS,
2990 ANCIENT_SYMBOLS,
2991 PHAISTOS_DISC,
2992 null,
2993 LYCIAN,
2994 CARIAN,
2995 null,
2996 OLD_ITALIC,
2997 GOTHIC,
2998 null,
2999 UGARITIC,
3000 OLD_PERSIAN,
3001 null,
3002 DESERET,
3003 SHAVIAN,
3004 OSMANYA,
3005 null,
3006 CYPRIOT_SYLLABARY,
3007 IMPERIAL_ARAMAIC,
3008 null,
3009 PHOENICIAN,
3010 LYDIAN,
3011 null,
3012 MEROITIC_HIEROGLYPHS,
3013 MEROITIC_CURSIVE,
3014 KHAROSHTHI,
3015 OLD_SOUTH_ARABIAN,
3016 null,
3017 AVESTAN,
3018 INSCRIPTIONAL_PARTHIAN,
3019 INSCRIPTIONAL_PAHLAVI,
3020 null,
3021 OLD_TURKIC,
3022 null,
3023 RUMI_NUMERAL_SYMBOLS,
3024 null,
3025 BRAHMI,
3026 KAITHI,
3027 SORA_SOMPENG,
3028 CHAKMA,
3029 null,
3030 SHARADA,
3031 null,
3032 TAKRI,
3033 null,
3034 CUNEIFORM,
3035 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3036 null,
3037 EGYPTIAN_HIEROGLYPHS,
3038 null,
3039 BAMUM_SUPPLEMENT,
3040 null,
3041 MIAO,
3042 null,
3043 KANA_SUPPLEMENT,
3044 null,
3045 BYZANTINE_MUSICAL_SYMBOLS,
3046 MUSICAL_SYMBOLS,
3047 ANCIENT_GREEK_MUSICAL_NOTATION,
3048 null,
3049 TAI_XUAN_JING_SYMBOLS,
3050 COUNTING_ROD_NUMERALS,
3051 null,
3052 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3053 null,
3054 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3055 null,
3056 MAHJONG_TILES,
3057 DOMINO_TILES,
3058 PLAYING_CARDS,
3059 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3060 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3061 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3062 EMOTICONS,
3063 null,
3064 TRANSPORT_AND_MAP_SYMBOLS,
3065 ALCHEMICAL_SYMBOLS,
3066 null,
3067 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3068 null,
3069 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3070 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3071 null,
3072 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3073 null,
3074 TAGS,
3075 null,
3076 VARIATION_SELECTORS_SUPPLEMENT,
3077 null,
3078 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3079 SUPPLEMENTARY_PRIVATE_USE_AREA_B
3080 };
3081
3082
3083 /**
3084 * Returns the object representing the Unicode block containing the
3085 * given character, or {@code null} if the character is not a
3658 */
3659 INSCRIPTIONAL_PAHLAVI,
3660
3661 /**
3662 * Unicode script "Old_Turkic".
3663 */
3664 OLD_TURKIC,
3665
3666 /**
3667 * Unicode script "Brahmi".
3668 */
3669 BRAHMI,
3670
3671 /**
3672 * Unicode script "Kaithi".
3673 */
3674 KAITHI,
3675
3676 /**
3677 * Unicode script "Meroitic Hieroglyphs".
3678 */
3679 MEROITIC_HIEROGLYPHS,
3680
3681 /**
3682 * Unicode script "Meroitic Cursive".
3683 */
3684 MEROITIC_CURSIVE,
3685
3686 /**
3687 * Unicode script "Sora Sompeng".
3688 */
3689 SORA_SOMPENG,
3690
3691 /**
3692 * Unicode script "Chakma".
3693 */
3694 CHAKMA,
3695
3696 /**
3697 * Unicode script "Sharada".
3698 */
3699 SHARADA,
3700
3701 /**
3702 * Unicode script "Takri".
3703 */
3704 TAKRI,
3705
3706 /**
3707 * Unicode script "Miao".
3708 */
3709 MIAO,
3710
3711 /**
3712 * Unicode script "Unknown".
3713 */
3714 UNKNOWN;
3715
3716 private static final int[] scriptStarts = {
3717 0x0000, // 0000..0040; COMMON
3718 0x0041, // 0041..005A; LATIN
3719 0x005B, // 005B..0060; COMMON
3720 0x0061, // 0061..007A; LATIN
3721 0x007B, // 007B..00A9; COMMON
3722 0x00AA, // 00AA..00AA; LATIN
3723 0x00AB, // 00AB..00B9; COMMON
3724 0x00BA, // 00BA..00BA; LATIN
3725 0x00BB, // 00BB..00BF; COMMON
3726 0x00C0, // 00C0..00D6; LATIN
3727 0x00D7, // 00D7..00D7; COMMON
3728 0x00D8, // 00D8..00F6; LATIN
3729 0x00F7, // 00F7..00F7; COMMON
3730 0x00F8, // 00F8..02B8; LATIN
3731 0x02B9, // 02B9..02DF; COMMON
3732 0x02E0, // 02E0..02E4; LATIN
3733 0x02E5, // 02E5..02E9; COMMON
3734 0x02EA, // 02EA..02EB; BOPOMOFO
3735 0x02EC, // 02EC..02FF; COMMON
3736 0x0300, // 0300..036F; INHERITED
3737 0x0370, // 0370..0373; GREEK
3738 0x0374, // 0374..0374; COMMON
3739 0x0375, // 0375..037D; GREEK
3740 0x037E, // 037E..0383; COMMON
3741 0x0384, // 0384..0384; GREEK
3742 0x0385, // 0385..0385; COMMON
3743 0x0386, // 0386..0386; GREEK
3744 0x0387, // 0387..0387; COMMON
3745 0x0388, // 0388..03E1; GREEK
3746 0x03E2, // 03E2..03EF; COPTIC
3747 0x03F0, // 03F0..03FF; GREEK
3748 0x0400, // 0400..0484; CYRILLIC
3749 0x0485, // 0485..0486; INHERITED
3750 0x0487, // 0487..0530; CYRILLIC
3751 0x0531, // 0531..0588; ARMENIAN
3752 0x0589, // 0589..0589; COMMON
3753 0x058A, // 058A..0590; ARMENIAN
3754 0x0591, // 0591..05FF; HEBREW
3755 0x0600, // 0600..060B; ARABIC
3756 0x060C, // 060C..060C; COMMON
3757 0x060D, // 060D..061A; ARABIC
3758 0x061B, // 061B..061D; COMMON
3759 0x061E, // 061E..061E; ARABIC
3760 0x061F, // 061F..061F; COMMON
3761 0x0620, // 0620..063F; ARABIC
3762 0x0640, // 0640..0640; COMMON
3763 0x0641, // 0641..064A; ARABIC
3764 0x064B, // 064B..0655; INHERITED
3765 0x0656, // 0656..065F; ARABIC
3766 0x0660, // 0660..0669; COMMON
3767 0x066A, // 066A..066F; ARABIC
3768 0x0670, // 0670..0670; INHERITED
3769 0x0671, // 0671..06DC; ARABIC
3770 0x06DD, // 06DD..06DD; COMMON
3771 0x06DE, // 06DE..06FF; ARABIC
3772 0x0700, // 0700..074F; SYRIAC
3773 0x0750, // 0750..077F; ARABIC
3774 0x0780, // 0780..07BF; THAANA
3775 0x07C0, // 07C0..07FF; NKO
3776 0x0800, // 0800..083F; SAMARITAN
3777 0x0840, // 0840..089F; MANDAIC
3778 0x08A0, // 08A0..08FF; ARABIC
3779 0x0900, // 0900..0950; DEVANAGARI
3780 0x0951, // 0951..0952; INHERITED
3781 0x0953, // 0953..0963; DEVANAGARI
3782 0x0964, // 0964..0965; COMMON
3783 0x0966, // 0966..0980; DEVANAGARI
3784 0x0981, // 0981..0A00; BENGALI
3785 0x0A01, // 0A01..0A80; GURMUKHI
3786 0x0A81, // 0A81..0B00; GUJARATI
3787 0x0B01, // 0B01..0B81; ORIYA
3788 0x0B82, // 0B82..0C00; TAMIL
3789 0x0C01, // 0C01..0C81; TELUGU
3790 0x0C82, // 0C82..0CF0; KANNADA
3791 0x0D02, // 0D02..0D81; MALAYALAM
3792 0x0D82, // 0D82..0E00; SINHALA
3793 0x0E01, // 0E01..0E3E; THAI
3794 0x0E3F, // 0E3F..0E3F; COMMON
3795 0x0E40, // 0E40..0E80; THAI
3796 0x0E81, // 0E81..0EFF; LAO
3797 0x0F00, // 0F00..0FD4; TIBETAN
3798 0x0FD5, // 0FD5..0FD8; COMMON
3799 0x0FD9, // 0FD9..0FFF; TIBETAN
3800 0x1000, // 1000..109F; MYANMAR
3801 0x10A0, // 10A0..10FA; GEORGIAN
3802 0x10FB, // 10FB..10FB; COMMON
3803 0x10FC, // 10FC..10FF; GEORGIAN
3804 0x1100, // 1100..11FF; HANGUL
3805 0x1200, // 1200..139F; ETHIOPIC
3806 0x13A0, // 13A0..13FF; CHEROKEE
3807 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
3808 0x1680, // 1680..169F; OGHAM
3809 0x16A0, // 16A0..16EA; RUNIC
3810 0x16EB, // 16EB..16ED; COMMON
3811 0x16EE, // 16EE..16FF; RUNIC
3812 0x1700, // 1700..171F; TAGALOG
3813 0x1720, // 1720..1734; HANUNOO
3814 0x1735, // 1735..173F; COMMON
3815 0x1740, // 1740..175F; BUHID
3816 0x1760, // 1760..177F; TAGBANWA
3817 0x1780, // 1780..17FF; KHMER
3818 0x1800, // 1800..1801; MONGOLIAN
3819 0x1802, // 1802..1803; COMMON
3820 0x1804, // 1804..1804; MONGOLIAN
3821 0x1805, // 1805..1805; COMMON
3822 0x1806, // 1806..18AF; MONGOLIAN
3823 0x18B0, // 18B0..18FF; CANADIAN_ABORIGINAL
3824 0x1900, // 1900..194F; LIMBU
3825 0x1950, // 1950..197F; TAI_LE
3826 0x1980, // 1980..19DF; NEW_TAI_LUE
3827 0x19E0, // 19E0..19FF; KHMER
3828 0x1A00, // 1A00..1A1F; BUGINESE
3829 0x1A20, // 1A20..1AFF; TAI_THAM
3830 0x1B00, // 1B00..1B7F; BALINESE
3831 0x1B80, // 1B80..1BBF; SUNDANESE
3832 0x1BC0, // 1BC0..1BFF; BATAK
3833 0x1C00, // 1C00..1C4F; LEPCHA
3834 0x1C50, // 1C50..1CBF; OL_CHIKI
3835 0x1CC0, // 1CC0..1CCF; SUNDANESE
3836 0x1CD0, // 1CD0..1CD2; INHERITED
3837 0x1CD3, // 1CD3..1CD3; COMMON
3838 0x1CD4, // 1CD4..1CE0; INHERITED
3839 0x1CE1, // 1CE1..1CE1; COMMON
3840 0x1CE2, // 1CE2..1CE8; INHERITED
3841 0x1CE9, // 1CE9..1CEC; COMMON
3842 0x1CED, // 1CED..1CED; INHERITED
3843 0x1CEE, // 1CEE..1CF3; COMMON
3844 0x1CF4, // 1CF4..1CF4; INHERITED
3845 0x1CF5, // 1CF5..1CFF; COMMON
3846 0x1D00, // 1D00..1D25; LATIN
3847 0x1D26, // 1D26..1D2A; GREEK
3848 0x1D2B, // 1D2B..1D2B; CYRILLIC
3849 0x1D2C, // 1D2C..1D5C; LATIN
3850 0x1D5D, // 1D5D..1D61; GREEK
3851 0x1D62, // 1D62..1D65; LATIN
3852 0x1D66, // 1D66..1D6A; GREEK
3853 0x1D6B, // 1D6B..1D77; LATIN
3854 0x1D78, // 1D78..1D78; CYRILLIC
3855 0x1D79, // 1D79..1DBE; LATIN
3856 0x1DBF, // 1DBF..1DBF; GREEK
3857 0x1DC0, // 1DC0..1DFF; INHERITED
3858 0x1E00, // 1E00..1EFF; LATIN
3859 0x1F00, // 1F00..1FFF; GREEK
3860 0x2000, // 2000..200B; COMMON
3861 0x200C, // 200C..200D; INHERITED
3862 0x200E, // 200E..2070; COMMON
3863 0x2071, // 2071..2073; LATIN
3864 0x2074, // 2074..207E; COMMON
3865 0x207F, // 207F..207F; LATIN
3866 0x2080, // 2080..208F; COMMON
3867 0x2090, // 2090..209F; LATIN
3868 0x20A0, // 20A0..20CF; COMMON
3869 0x20D0, // 20D0..20FF; INHERITED
3870 0x2100, // 2100..2125; COMMON
3871 0x2126, // 2126..2126; GREEK
3872 0x2127, // 2127..2129; COMMON
3873 0x212A, // 212A..212B; LATIN
3874 0x212C, // 212C..2131; COMMON
3875 0x2132, // 2132..2132; LATIN
3876 0x2133, // 2133..214D; COMMON
3877 0x214E, // 214E..214E; LATIN
3878 0x214F, // 214F..215F; COMMON
3879 0x2160, // 2160..2188; LATIN
3880 0x2189, // 2189..27FF; COMMON
3881 0x2800, // 2800..28FF; BRAILLE
3882 0x2900, // 2900..2BFF; COMMON
3883 0x2C00, // 2C00..2C5F; GLAGOLITIC
3884 0x2C60, // 2C60..2C7F; LATIN
3885 0x2C80, // 2C80..2CFF; COPTIC
3886 0x2D00, // 2D00..2D2F; GEORGIAN
3887 0x2D30, // 2D30..2D7F; TIFINAGH
3888 0x2D80, // 2D80..2DDF; ETHIOPIC
3889 0x2DE0, // 2DE0..2DFF; CYRILLIC
3890 0x2E00, // 2E00..2E7F; COMMON
3891 0x2E80, // 2E80..2FEF; HAN
3892 0x2FF0, // 2FF0..3004; COMMON
3893 0x3005, // 3005..3005; HAN
3894 0x3006, // 3006..3006; COMMON
3895 0x3007, // 3007..3007; HAN
3896 0x3008, // 3008..3020; COMMON
3897 0x3021, // 3021..3029; HAN
3898 0x302A, // 302A..302D; INHERITED
3899 0x302E, // 302E..302F; HANGUL
3900 0x3030, // 3030..3037; COMMON
3901 0x3038, // 3038..303B; HAN
3902 0x303C, // 303C..3040; COMMON
3903 0x3041, // 3041..3098; HIRAGANA
3904 0x3099, // 3099..309A; INHERITED
3905 0x309B, // 309B..309C; COMMON
3906 0x309D, // 309D..309F; HIRAGANA
3907 0x30A0, // 30A0..30A0; COMMON
3908 0x30A1, // 30A1..30FA; KATAKANA
3909 0x30FB, // 30FB..30FC; COMMON
3910 0x30FD, // 30FD..3104; KATAKANA
3911 0x3105, // 3105..3130; BOPOMOFO
3912 0x3131, // 3131..318F; HANGUL
3913 0x3190, // 3190..319F; COMMON
3914 0x31A0, // 31A0..31BF; BOPOMOFO
3915 0x31C0, // 31C0..31EF; COMMON
3916 0x31F0, // 31F0..31FF; KATAKANA
3917 0x3200, // 3200..321F; HANGUL
3918 0x3220, // 3220..325F; COMMON
3919 0x3260, // 3260..327E; HANGUL
3920 0x327F, // 327F..32CF; COMMON
3921 0x32D0, // 32D0..3357; KATAKANA
3922 0x3358, // 3358..33FF; COMMON
3923 0x3400, // 3400..4DBF; HAN
3924 0x4DC0, // 4DC0..4DFF; COMMON
3925 0x4E00, // 4E00..9FFF; HAN
3926 0xA000, // A000..A4CF; YI
3927 0xA4D0, // A4D0..A4FF; LISU
3928 0xA500, // A500..A63F; VAI
3929 0xA640, // A640..A69F; CYRILLIC
3930 0xA6A0, // A6A0..A6FF; BAMUM
3931 0xA700, // A700..A721; COMMON
3932 0xA722, // A722..A787; LATIN
3933 0xA788, // A788..A78A; COMMON
3934 0xA78B, // A78B..A7FF; LATIN
3935 0xA800, // A800..A82F; SYLOTI_NAGRI
3936 0xA830, // A830..A83F; COMMON
3937 0xA840, // A840..A87F; PHAGS_PA
3938 0xA880, // A880..A8DF; SAURASHTRA
3939 0xA8E0, // A8E0..A8FF; DEVANAGARI
3940 0xA900, // A900..A92F; KAYAH_LI
3941 0xA930, // A930..A95F; REJANG
3942 0xA960, // A960..A97F; HANGUL
3943 0xA980, // A980..A9FF; JAVANESE
3944 0xAA00, // AA00..AA5F; CHAM
3945 0xAA60, // AA60..AA7F; MYANMAR
3946 0xAA80, // AA80..AADF; TAI_VIET
3947 0xAAE0, // AAE0..AB00; MEETEI_MAYEK
3948 0xAB01, // AB01..ABBF; ETHIOPIC
3949 0xABC0, // ABC0..ABFF; MEETEI_MAYEK
3950 0xAC00, // AC00..D7FB; HANGUL
3951 0xD7FC, // D7FC..F8FF; UNKNOWN
3952 0xF900, // F900..FAFF; HAN
3953 0xFB00, // FB00..FB12; LATIN
3954 0xFB13, // FB13..FB1C; ARMENIAN
3955 0xFB1D, // FB1D..FB4F; HEBREW
3956 0xFB50, // FB50..FD3D; ARABIC
3957 0xFD3E, // FD3E..FD4F; COMMON
3958 0xFD50, // FD50..FDFC; ARABIC
3959 0xFDFD, // FDFD..FDFF; COMMON
3960 0xFE00, // FE00..FE0F; INHERITED
3961 0xFE10, // FE10..FE1F; COMMON
3962 0xFE20, // FE20..FE2F; INHERITED
3963 0xFE30, // FE30..FE6F; COMMON
3964 0xFE70, // FE70..FEFE; ARABIC
3965 0xFEFF, // FEFF..FF20; COMMON
3966 0xFF21, // FF21..FF3A; LATIN
3967 0xFF3B, // FF3B..FF40; COMMON
3968 0xFF41, // FF41..FF5A; LATIN
3969 0xFF5B, // FF5B..FF65; COMMON
3970 0xFF66, // FF66..FF6F; KATAKANA
3971 0xFF70, // FF70..FF70; COMMON
3972 0xFF71, // FF71..FF9D; KATAKANA
3973 0xFF9E, // FF9E..FF9F; COMMON
3974 0xFFA0, // FFA0..FFDF; HANGUL
3975 0xFFE0, // FFE0..FFFF; COMMON
3976 0x10000, // 10000..100FF; LINEAR_B
3977 0x10100, // 10100..1013F; COMMON
3978 0x10140, // 10140..1018F; GREEK
3979 0x10190, // 10190..101FC; COMMON
3980 0x101FD, // 101FD..1027F; INHERITED
3981 0x10280, // 10280..1029F; LYCIAN
3982 0x102A0, // 102A0..102FF; CARIAN
3983 0x10300, // 10300..1032F; OLD_ITALIC
3984 0x10330, // 10330..1037F; GOTHIC
3985 0x10380, // 10380..1039F; UGARITIC
3986 0x103A0, // 103A0..103FF; OLD_PERSIAN
3987 0x10400, // 10400..1044F; DESERET
3988 0x10450, // 10450..1047F; SHAVIAN
3989 0x10480, // 10480..107FF; OSMANYA
3990 0x10800, // 10800..1083F; CYPRIOT
3991 0x10840, // 10840..108FF; IMPERIAL_ARAMAIC
3992 0x10900, // 10900..1091F; PHOENICIAN
3993 0x10920, // 10920..1097F; LYDIAN
3994 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
3995 0x109A0, // 109A0..109FF; MEROITIC_CURSIVE
3996 0x10A00, // 10A00..10A5F; KHAROSHTHI
3997 0x10A60, // 10A60..10AFF; OLD_SOUTH_ARABIAN
3998 0x10B00, // 10B00..10B3F; AVESTAN
3999 0x10B40, // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4000 0x10B60, // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4001 0x10C00, // 10C00..10E5F; OLD_TURKIC
4002 0x10E60, // 10E60..10FFF; ARABIC
4003 0x11000, // 11000..1107F; BRAHMI
4004 0x11080, // 11080..110CF; KAITHI
4005 0x110D0, // 110D0..110FF; SORA_SOMPENG
4006 0x11100, // 11100..1117F; CHAKMA
4007 0x11180, // 11180..1167F; SHARADA
4008 0x11680, // 11680..116CF; TAKRI
4009 0x12000, // 12000..12FFF; CUNEIFORM
4010 0x13000, // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4011 0x16800, // 16800..16A38; BAMUM
4012 0x16F00, // 16F00..16F9F; MIAO
4013 0x1B000, // 1B000..1B000; KATAKANA
4014 0x1B001, // 1B001..1CFFF; HIRAGANA
4015 0x1D000, // 1D000..1D166; COMMON
4016 0x1D167, // 1D167..1D169; INHERITED
4017 0x1D16A, // 1D16A..1D17A; COMMON
4018 0x1D17B, // 1D17B..1D182; INHERITED
4019 0x1D183, // 1D183..1D184; COMMON
4020 0x1D185, // 1D185..1D18B; INHERITED
4021 0x1D18C, // 1D18C..1D1A9; COMMON
4022 0x1D1AA, // 1D1AA..1D1AD; INHERITED
4023 0x1D1AE, // 1D1AE..1D1FF; COMMON
4024 0x1D200, // 1D200..1D2FF; GREEK
4025 0x1D300, // 1D300..1EDFF; COMMON
4026 0x1EE00, // 1EE00..1EFFF; ARABIC
4027 0x1F000, // 1F000..1F1FF; COMMON
4028 0x1F200, // 1F200..1F200; HIRAGANA
4029 0x1F201, // 1F210..1FFFF; COMMON
4030 0x20000, // 20000..E0000; HAN
4031 0xE0001, // E0001..E00FF; COMMON
4032 0xE0100, // E0100..E01EF; INHERITED
4033 0xE01F0 // E01F0..10FFFF; UNKNOWN
4034
4035 };
4036
4037 private static final UnicodeScript[] scripts = {
4038 COMMON,
4039 LATIN,
4040 COMMON,
4041 LATIN,
4042 COMMON,
4043 LATIN,
4044 COMMON,
4045 LATIN,
4046 COMMON,
4047 LATIN,
4048 COMMON,
4049 LATIN,
4050 COMMON,
4051 LATIN,
4052 COMMON,
4053 LATIN,
4054 COMMON,
4055 BOPOMOFO,
4056 COMMON,
4057 INHERITED,
4058 GREEK,
4059 COMMON,
4060 GREEK,
4061 COMMON,
4062 GREEK,
4063 COMMON,
4064 GREEK,
4065 COMMON,
4066 GREEK,
4067 COPTIC,
4068 GREEK,
4069 CYRILLIC,
4070 INHERITED,
4071 CYRILLIC,
4072 ARMENIAN,
4073 COMMON,
4074 ARMENIAN,
4075 HEBREW,
4076 ARABIC,
4077 COMMON,
4078 ARABIC,
4079 COMMON,
4080 ARABIC,
4081 COMMON,
4082 ARABIC,
4083 COMMON,
4084 ARABIC,
4085 INHERITED,
4086 ARABIC,
4087 COMMON,
4088 ARABIC,
4089 INHERITED,
4090 ARABIC,
4091 COMMON,
4092 ARABIC,
4093 SYRIAC,
4094 ARABIC,
4095 THAANA,
4096 NKO,
4097 SAMARITAN,
4098 MANDAIC,
4099 ARABIC,
4100 DEVANAGARI,
4101 INHERITED,
4102 DEVANAGARI,
4103 COMMON,
4104 DEVANAGARI,
4105 BENGALI,
4106 GURMUKHI,
4107 GUJARATI,
4108 ORIYA,
4109 TAMIL,
4110 TELUGU,
4111 KANNADA,
4112 MALAYALAM,
4113 SINHALA,
4114 THAI,
4115 COMMON,
4116 THAI,
4117 LAO,
4118 TIBETAN,
4119 COMMON,
4120 TIBETAN,
4121 MYANMAR,
4122 GEORGIAN,
4123 COMMON,
4124 GEORGIAN,
4125 HANGUL,
4126 ETHIOPIC,
4127 CHEROKEE,
4128 CANADIAN_ABORIGINAL,
4129 OGHAM,
4130 RUNIC,
4131 COMMON,
4132 RUNIC,
4133 TAGALOG,
4134 HANUNOO,
4135 COMMON,
4136 BUHID,
4137 TAGBANWA,
4138 KHMER,
4139 MONGOLIAN,
4140 COMMON,
4141 MONGOLIAN,
4142 COMMON,
4143 MONGOLIAN,
4144 CANADIAN_ABORIGINAL,
4145 LIMBU,
4146 TAI_LE,
4147 NEW_TAI_LUE,
4148 KHMER,
4149 BUGINESE,
4150 TAI_THAM,
4151 BALINESE,
4152 SUNDANESE,
4153 BATAK,
4154 LEPCHA,
4155 OL_CHIKI,
4156 SUNDANESE,
4157 INHERITED,
4158 COMMON,
4159 INHERITED,
4160 COMMON,
4161 INHERITED,
4162 COMMON,
4163 INHERITED,
4164 COMMON,
4165 INHERITED,
4166 COMMON,
4167 LATIN,
4168 GREEK,
4169 CYRILLIC,
4170 LATIN,
4171 GREEK,
4172 LATIN,
4173 GREEK,
4174 LATIN,
4175 CYRILLIC,
4176 LATIN,
4177 GREEK,
4178 INHERITED,
4179 LATIN,
4180 GREEK,
4181 COMMON,
4182 INHERITED,
4183 COMMON,
4184 LATIN,
4185 COMMON,
4186 LATIN,
4187 COMMON,
4188 LATIN,
4189 COMMON,
4190 INHERITED,
4191 COMMON,
4192 GREEK,
4193 COMMON,
4194 LATIN,
4195 COMMON,
4196 LATIN,
4197 COMMON,
4198 LATIN,
4199 COMMON,
4200 LATIN,
4201 COMMON,
4202 BRAILLE,
4203 COMMON,
4204 GLAGOLITIC,
4205 LATIN,
4206 COPTIC,
4207 GEORGIAN,
4208 TIFINAGH,
4209 ETHIOPIC,
4210 CYRILLIC,
4211 COMMON,
4212 HAN,
4213 COMMON,
4214 HAN,
4215 COMMON,
4216 HAN,
4217 COMMON,
4218 HAN,
4219 INHERITED,
4220 HANGUL,
4221 COMMON,
4222 HAN,
4223 COMMON,
4224 HIRAGANA,
4225 INHERITED,
4226 COMMON,
4227 HIRAGANA,
4228 COMMON,
4229 KATAKANA,
4230 COMMON,
4231 KATAKANA,
4232 BOPOMOFO,
4233 HANGUL,
4234 COMMON,
4235 BOPOMOFO,
4236 COMMON,
4237 KATAKANA,
4238 HANGUL,
4239 COMMON,
4240 HANGUL,
4241 COMMON,
4242 KATAKANA,
4243 COMMON,
4244 HAN,
4245 COMMON,
4246 HAN,
4247 YI,
4248 LISU,
4249 VAI,
4250 CYRILLIC,
4251 BAMUM,
4252 COMMON,
4253 LATIN,
4254 COMMON,
4255 LATIN,
4256 SYLOTI_NAGRI,
4257 COMMON,
4258 PHAGS_PA,
4259 SAURASHTRA,
4260 DEVANAGARI,
4261 KAYAH_LI,
4262 REJANG,
4263 HANGUL,
4264 JAVANESE,
4265 CHAM,
4266 MYANMAR,
4267 TAI_VIET,
4268 MEETEI_MAYEK,
4269 ETHIOPIC,
4270 MEETEI_MAYEK,
4271 HANGUL,
4272 UNKNOWN ,
4273 HAN,
4274 LATIN,
4275 ARMENIAN,
4276 HEBREW,
4277 ARABIC,
4278 COMMON,
4279 ARABIC,
4280 COMMON,
4281 INHERITED,
4282 COMMON,
4283 INHERITED,
4284 COMMON,
4285 ARABIC,
4286 COMMON,
4287 LATIN,
4288 COMMON,
4289 LATIN,
4290 COMMON,
4291 KATAKANA,
4292 COMMON,
4293 KATAKANA,
4294 COMMON,
4295 HANGUL,
4296 COMMON,
4297 LINEAR_B,
4298 COMMON,
4299 GREEK,
4300 COMMON,
4301 INHERITED,
4302 LYCIAN,
4303 CARIAN,
4304 OLD_ITALIC,
4305 GOTHIC,
4306 UGARITIC,
4307 OLD_PERSIAN,
4308 DESERET,
4309 SHAVIAN,
4310 OSMANYA,
4311 CYPRIOT,
4312 IMPERIAL_ARAMAIC,
4313 PHOENICIAN,
4314 LYDIAN,
4315 MEROITIC_HIEROGLYPHS,
4316 MEROITIC_CURSIVE,
4317 KHAROSHTHI,
4318 OLD_SOUTH_ARABIAN,
4319 AVESTAN,
4320 INSCRIPTIONAL_PARTHIAN,
4321 INSCRIPTIONAL_PAHLAVI,
4322 OLD_TURKIC,
4323 ARABIC,
4324 BRAHMI,
4325 KAITHI,
4326 SORA_SOMPENG,
4327 CHAKMA,
4328 SHARADA,
4329 TAKRI,
4330 CUNEIFORM,
4331 EGYPTIAN_HIEROGLYPHS,
4332 BAMUM,
4333 MIAO,
4334 KATAKANA,
4335 HIRAGANA,
4336 COMMON,
4337 INHERITED,
4338 COMMON,
4339 INHERITED,
4340 COMMON,
4341 INHERITED,
4342 COMMON,
4343 INHERITED,
4344 COMMON,
4345 GREEK,
4346 COMMON,
4347 ARABIC,
4348 COMMON,
4349 HIRAGANA,
4350 COMMON,
4351 HAN,
4352 COMMON,
4353 INHERITED,
4354 UNKNOWN
4355 };
4356
4357 private static HashMap<String, Character.UnicodeScript> aliases;
4358 static {
4359 aliases = new HashMap<>(128);
4360 aliases.put("ARAB", ARABIC);
4361 aliases.put("ARMI", IMPERIAL_ARAMAIC);
4362 aliases.put("ARMN", ARMENIAN);
4363 aliases.put("AVST", AVESTAN);
4364 aliases.put("BALI", BALINESE);
4365 aliases.put("BAMU", BAMUM);
4366 aliases.put("BATK", BATAK);
4367 aliases.put("BENG", BENGALI);
4368 aliases.put("BOPO", BOPOMOFO);
4369 aliases.put("BRAI", BRAILLE);
4370 aliases.put("BRAH", BRAHMI);
4371 aliases.put("BUGI", BUGINESE);
4372 aliases.put("BUHD", BUHID);
4373 aliases.put("CAKM", CHAKMA);
4374 aliases.put("CANS", CANADIAN_ABORIGINAL);
4375 aliases.put("CARI", CARIAN);
4376 aliases.put("CHAM", CHAM);
4377 aliases.put("CHER", CHEROKEE);
4378 aliases.put("COPT", COPTIC);
4379 aliases.put("CPRT", CYPRIOT);
4380 aliases.put("CYRL", CYRILLIC);
4381 aliases.put("DEVA", DEVANAGARI);
4382 aliases.put("DSRT", DESERET);
4383 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4384 aliases.put("ETHI", ETHIOPIC);
4385 aliases.put("GEOR", GEORGIAN);
4386 aliases.put("GLAG", GLAGOLITIC);
4387 aliases.put("GOTH", GOTHIC);
4388 aliases.put("GREK", GREEK);
4389 aliases.put("GUJR", GUJARATI);
4390 aliases.put("GURU", GURMUKHI);
4391 aliases.put("HANG", HANGUL);
4392 aliases.put("HANI", HAN);
4393 aliases.put("HANO", HANUNOO);
4394 aliases.put("HEBR", HEBREW);
4395 aliases.put("HIRA", HIRAGANA);
4396 // it appears we don't have the KATAKANA_OR_HIRAGANA
4397 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4398 aliases.put("ITAL", OLD_ITALIC);
4399 aliases.put("JAVA", JAVANESE);
4400 aliases.put("KALI", KAYAH_LI);
4401 aliases.put("KANA", KATAKANA);
4402 aliases.put("KHAR", KHAROSHTHI);
4403 aliases.put("KHMR", KHMER);
4404 aliases.put("KNDA", KANNADA);
4405 aliases.put("KTHI", KAITHI);
4406 aliases.put("LANA", TAI_THAM);
4407 aliases.put("LAOO", LAO);
4408 aliases.put("LATN", LATIN);
4409 aliases.put("LEPC", LEPCHA);
4410 aliases.put("LIMB", LIMBU);
4411 aliases.put("LINB", LINEAR_B);
4412 aliases.put("LISU", LISU);
4413 aliases.put("LYCI", LYCIAN);
4414 aliases.put("LYDI", LYDIAN);
4415 aliases.put("MAND", MANDAIC);
4416 aliases.put("MERC", MEROITIC_CURSIVE);
4417 aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4418 aliases.put("MLYM", MALAYALAM);
4419 aliases.put("MONG", MONGOLIAN);
4420 aliases.put("MTEI", MEETEI_MAYEK);
4421 aliases.put("MYMR", MYANMAR);
4422 aliases.put("NKOO", NKO);
4423 aliases.put("OGAM", OGHAM);
4424 aliases.put("OLCK", OL_CHIKI);
4425 aliases.put("ORKH", OLD_TURKIC);
4426 aliases.put("ORYA", ORIYA);
4427 aliases.put("OSMA", OSMANYA);
4428 aliases.put("PHAG", PHAGS_PA);
4429 aliases.put("PLRD", MIAO);
4430 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4431 aliases.put("PHNX", PHOENICIAN);
4432 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4433 aliases.put("RJNG", REJANG);
4434 aliases.put("RUNR", RUNIC);
4435 aliases.put("SAMR", SAMARITAN);
4436 aliases.put("SARB", OLD_SOUTH_ARABIAN);
4437 aliases.put("SAUR", SAURASHTRA);
4438 aliases.put("SHAW", SHAVIAN);
4439 aliases.put("SHRD", SHARADA);
4440 aliases.put("SINH", SINHALA);
4441 aliases.put("SORA", SORA_SOMPENG);
4442 aliases.put("SUND", SUNDANESE);
4443 aliases.put("SYLO", SYLOTI_NAGRI);
4444 aliases.put("SYRC", SYRIAC);
4445 aliases.put("TAGB", TAGBANWA);
4446 aliases.put("TALE", TAI_LE);
4447 aliases.put("TAKR", TAKRI);
4448 aliases.put("TALU", NEW_TAI_LUE);
4449 aliases.put("TAML", TAMIL);
4450 aliases.put("TAVT", TAI_VIET);
4451 aliases.put("TELU", TELUGU);
4452 aliases.put("TFNG", TIFINAGH);
4453 aliases.put("TGLG", TAGALOG);
4454 aliases.put("THAA", THAANA);
4455 aliases.put("THAI", THAI);
4456 aliases.put("TIBT", TIBETAN);
4457 aliases.put("UGAR", UGARITIC);
4458 aliases.put("VAII", VAI);
4459 aliases.put("XPEO", OLD_PERSIAN);
4460 aliases.put("XSUX", CUNEIFORM);
4461 aliases.put("YIII", YI);
4462 aliases.put("ZINH", INHERITED);
4463 aliases.put("ZYYY", COMMON);
4464 aliases.put("ZZZZ", UNKNOWN);
4465 }
4466
4467 /**
4468 * Returns the enum constant representing the Unicode script of which
4469 * the given character (Unicode code point) is assigned to.
4470 *
4471 * @param codePoint the character (Unicode code point) in question.
4472 * @return The {@code UnicodeScript} constant representing the
4473 * Unicode script of which this character is assigned to.
4474 *
4475 * @exception IllegalArgumentException if the specified
4476 * {@code codePoint} is an invalid Unicode code point.
4477 * @see Character#isValidCodePoint(int)
4478 *
6577 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6578 * full width variant ({@code '\u005CuFF21'} through
6579 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6580 * {@code '\u005CuFF5A'}) forms have numeric values from 10
6581 * through 35. This is independent of the Unicode specification,
6582 * which does not assign numeric values to these {@code char}
6583 * values.
6584 * <p>
6585 * If the character does not have a numeric value, then -1 is returned.
6586 * If the character has a numeric value that cannot be represented as a
6587 * nonnegative integer (for example, a fractional value), then -2
6588 * is returned.
6589 *
6590 * <p><b>Note:</b> This method cannot handle <a
6591 * href="#supplementary"> supplementary characters</a>. To support
6592 * all Unicode characters, including supplementary characters, use
6593 * the {@link #getNumericValue(int)} method.
6594 *
6595 * @param ch the character to be converted.
6596 * @return the numeric value of the character, as a nonnegative {@code int}
6597 * value; -2 if the character has a numeric value that is not a
6598 * nonnegative integer; -1 if the character has no numeric value.
6599 * @see Character#forDigit(int, int)
6600 * @see Character#isDigit(char)
6601 * @since 1.1
6602 */
6603 public static int getNumericValue(char ch) {
6604 return getNumericValue((int)ch);
6605 }
6606
6607 /**
6608 * Returns the {@code int} value that the specified
6609 * character (Unicode code point) represents. For example, the character
6610 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6611 * an {@code int} with a value of 50.
6612 * <p>
6613 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6614 * {@code '\u005Cu005A'}), lowercase
6615 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6616 * full width variant ({@code '\u005CuFF21'} through
6617 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6618 * {@code '\u005CuFF5A'}) forms have numeric values from 10
6619 * through 35. This is independent of the Unicode specification,
6620 * which does not assign numeric values to these {@code char}
6621 * values.
6622 * <p>
6623 * If the character does not have a numeric value, then -1 is returned.
6624 * If the character has a numeric value that cannot be represented as a
6625 * nonnegative integer (for example, a fractional value), then -2
6626 * is returned.
6627 *
6628 * @param codePoint the character (Unicode code point) to be converted.
6629 * @return the numeric value of the character, as a nonnegative {@code int}
6630 * value; -2 if the character has a numeric value that is not a
6631 * nonnegative integer; -1 if the character has no numeric value.
6632 * @see Character#forDigit(int, int)
6633 * @see Character#isDigit(int)
6634 * @since 1.5
6635 */
6636 public static int getNumericValue(int codePoint) {
6637 return CharacterData.of(codePoint).getNumericValue(codePoint);
6638 }
6639
6640 /**
6641 * Determines if the specified character is ISO-LATIN-1 white space.
6642 * This method returns {@code true} for the following five
6643 * characters only:
6644 * <table summary="truechars">
6645 * <tr><td>{@code '\t'}</td> <td>{@code U+0009}</td>
6646 * <td>{@code HORIZONTAL TABULATION}</td></tr>
6647 * <tr><td>{@code '\n'}</td> <td>{@code U+000A}</td>
6648 * <td>{@code NEW LINE}</td></tr>
6649 * <tr><td>{@code '\f'}</td> <td>{@code U+000C}</td>
6650 * <td>{@code FORM FEED}</td></tr>
6651 * <tr><td>{@code '\r'}</td> <td>{@code U+000D}</td>
6981 * @see Character#DIRECTIONALITY_UNDEFINED
6982 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6983 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6984 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6985 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6986 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6987 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6988 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6989 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6990 * @see Character#DIRECTIONALITY_NONSPACING_MARK
6991 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6992 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6993 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6994 * @see Character#DIRECTIONALITY_WHITESPACE
6995 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6996 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6997 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6998 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6999 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7000 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7001 * @since 1.4
7002 */
7003 public static byte getDirectionality(char ch) {
7004 return getDirectionality((int)ch);
7005 }
7006
7007 /**
7008 * Returns the Unicode directionality property for the given
7009 * character (Unicode code point). Character directionality is
7010 * used to calculate the visual ordering of text. The
7011 * directionality value of undefined character is {@link
7012 * #DIRECTIONALITY_UNDEFINED}.
7013 *
7014 * @param codePoint the character (Unicode code point) for which
7015 * the directionality property is requested.
7016 * @return the directionality property of the character.
7017 *
7018 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7019 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7020 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7021 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7022 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7023 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7024 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7025 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7026 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7027 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7028 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7029 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7030 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7031 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7032 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7033 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7034 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7035 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7036 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7037 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7038 * @since 1.5
7039 */
7040 public static byte getDirectionality(int codePoint) {
7041 return CharacterData.of(codePoint).getDirectionality(codePoint);
7042 }
7043
7044 /**
7045 * Determines whether the character is mirrored according to the
7046 * Unicode specification. Mirrored characters should have their
7047 * glyphs horizontally mirrored when displayed in text that is
7048 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
7049 * PARENTHESIS is semantically defined to be an <i>opening
7050 * parenthesis</i>. This will appear as a "(" in text that is
7051 * left-to-right but as a ")" in text that is right-to-left.
7052 *
7053 * <p><b>Note:</b> This method cannot handle <a
7054 * href="#supplementary"> supplementary characters</a>. To support
7055 * all Unicode characters, including supplementary characters, use
7056 * the {@link #isMirrored(int)} method.
7057 *
|
|