< prev index next >

jdk/src/java.base/share/classes/java/lang/Character.java

Print this page




  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.2.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more


 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
























 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';


2544         public static final UnicodeBlock TAKRI =
2545             new UnicodeBlock("TAKRI");
2546 
2547         /**
2548          * Constant for the "Miao" Unicode character block.
2549          * @since 1.8
2550          */
2551         public static final UnicodeBlock MIAO =
2552             new UnicodeBlock("MIAO");
2553 
2554         /**
2555          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2556          * character block.
2557          * @since 1.8
2558          */
2559         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2560             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2561                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2562                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2563 







































































































































































































































































2564         private static final int blockStarts[] = {
2565             0x0000,   // 0000..007F; Basic Latin
2566             0x0080,   // 0080..00FF; Latin-1 Supplement
2567             0x0100,   // 0100..017F; Latin Extended-A
2568             0x0180,   // 0180..024F; Latin Extended-B
2569             0x0250,   // 0250..02AF; IPA Extensions
2570             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2571             0x0300,   // 0300..036F; Combining Diacritical Marks
2572             0x0370,   // 0370..03FF; Greek and Coptic
2573             0x0400,   // 0400..04FF; Cyrillic
2574             0x0500,   // 0500..052F; Cyrillic Supplement
2575             0x0530,   // 0530..058F; Armenian
2576             0x0590,   // 0590..05FF; Hebrew
2577             0x0600,   // 0600..06FF; Arabic
2578             0x0700,   // 0700..074F; Syriac
2579             0x0750,   // 0750..077F; Arabic Supplement
2580             0x0780,   // 0780..07BF; Thaana
2581             0x07C0,   // 07C0..07FF; NKo
2582             0x0800,   // 0800..083F; Samaritan
2583             0x0840,   // 0840..085F; Mandaic


2601             0x1100,   // 1100..11FF; Hangul Jamo
2602             0x1200,   // 1200..137F; Ethiopic
2603             0x1380,   // 1380..139F; Ethiopic Supplement
2604             0x13A0,   // 13A0..13FF; Cherokee
2605             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2606             0x1680,   // 1680..169F; Ogham
2607             0x16A0,   // 16A0..16FF; Runic
2608             0x1700,   // 1700..171F; Tagalog
2609             0x1720,   // 1720..173F; Hanunoo
2610             0x1740,   // 1740..175F; Buhid
2611             0x1760,   // 1760..177F; Tagbanwa
2612             0x1780,   // 1780..17FF; Khmer
2613             0x1800,   // 1800..18AF; Mongolian
2614             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2615             0x1900,   // 1900..194F; Limbu
2616             0x1950,   // 1950..197F; Tai Le
2617             0x1980,   // 1980..19DF; New Tai Lue
2618             0x19E0,   // 19E0..19FF; Khmer Symbols
2619             0x1A00,   // 1A00..1A1F; Buginese
2620             0x1A20,   // 1A20..1AAF; Tai Tham
2621             0x1AB0,   //             unassigned
2622             0x1B00,   // 1B00..1B7F; Balinese
2623             0x1B80,   // 1B80..1BBF; Sundanese
2624             0x1BC0,   // 1BC0..1BFF; Batak
2625             0x1C00,   // 1C00..1C4F; Lepcha
2626             0x1C50,   // 1C50..1C7F; Ol Chiki
2627             0x1C80,   //             unassigned
2628             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2629             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2630             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2631             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2632             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2633             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2634             0x1F00,   // 1F00..1FFF; Greek Extended
2635             0x2000,   // 2000..206F; General Punctuation
2636             0x2070,   // 2070..209F; Superscripts and Subscripts
2637             0x20A0,   // 20A0..20CF; Currency Symbols
2638             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2639             0x2100,   // 2100..214F; Letterlike Symbols
2640             0x2150,   // 2150..218F; Number Forms
2641             0x2190,   // 2190..21FF; Arrows


2682             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2683             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2684             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2685             0xA000,   // A000..A48F; Yi Syllables
2686             0xA490,   // A490..A4CF; Yi Radicals
2687             0xA4D0,   // A4D0..A4FF; Lisu
2688             0xA500,   // A500..A63F; Vai
2689             0xA640,   // A640..A69F; Cyrillic Extended-B
2690             0xA6A0,   // A6A0..A6FF; Bamum
2691             0xA700,   // A700..A71F; Modifier Tone Letters
2692             0xA720,   // A720..A7FF; Latin Extended-D
2693             0xA800,   // A800..A82F; Syloti Nagri
2694             0xA830,   // A830..A83F; Common Indic Number Forms
2695             0xA840,   // A840..A87F; Phags-pa
2696             0xA880,   // A880..A8DF; Saurashtra
2697             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2698             0xA900,   // A900..A92F; Kayah Li
2699             0xA930,   // A930..A95F; Rejang
2700             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2701             0xA980,   // A980..A9DF; Javanese
2702             0xA9E0,   //             unassigned
2703             0xAA00,   // AA00..AA5F; Cham
2704             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2705             0xAA80,   // AA80..AADF; Tai Viet
2706             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2707             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2708             0xAB30,   //             unassigned

2709             0xABC0,   // ABC0..ABFF; Meetei Mayek
2710             0xAC00,   // AC00..D7AF; Hangul Syllables
2711             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2712             0xD800,   // D800..DB7F; High Surrogates
2713             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2714             0xDC00,   // DC00..DFFF; Low Surrogates
2715             0xE000,   // E000..F8FF; Private Use Area
2716             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2717             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2718             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2719             0xFE00,   // FE00..FE0F; Variation Selectors
2720             0xFE10,   // FE10..FE1F; Vertical Forms
2721             0xFE20,   // FE20..FE2F; Combining Half Marks
2722             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2723             0xFE50,   // FE50..FE6F; Small Form Variants
2724             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2725             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2726             0xFFF0,   // FFF0..FFFF; Specials
2727             0x10000,  // 10000..1007F; Linear B Syllabary
2728             0x10080,  // 10080..100FF; Linear B Ideograms
2729             0x10100,  // 10100..1013F; Aegean Numbers
2730             0x10140,  // 10140..1018F; Ancient Greek Numbers
2731             0x10190,  // 10190..101CF; Ancient Symbols
2732             0x101D0,  // 101D0..101FF; Phaistos Disc
2733             0x10200,  //               unassigned
2734             0x10280,  // 10280..1029F; Lycian
2735             0x102A0,  // 102A0..102DF; Carian
2736             0x102E0,  //               unassigned
2737             0x10300,  // 10300..1032F; Old Italic
2738             0x10330,  // 10330..1034F; Gothic
2739             0x10350,  //               unassigned
2740             0x10380,  // 10380..1039F; Ugaritic
2741             0x103A0,  // 103A0..103DF; Old Persian
2742             0x103E0,  //               unassigned
2743             0x10400,  // 10400..1044F; Deseret
2744             0x10450,  // 10450..1047F; Shavian
2745             0x10480,  // 10480..104AF; Osmanya
2746             0x104B0,  //               unassigned





2747             0x10800,  // 10800..1083F; Cypriot Syllabary
2748             0x10840,  // 10840..1085F; Imperial Aramaic
2749             0x10860,  //               unassigned


2750             0x10900,  // 10900..1091F; Phoenician
2751             0x10920,  // 10920..1093F; Lydian
2752             0x10940,  //               unassigned
2753             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2754             0x109A0,  // 109A0..109FF; Meroitic Cursive
2755             0x10A00,  // 10A00..10A5F; Kharoshthi
2756             0x10A60,  // 10A60..10A7F; Old South Arabian
2757             0x10A80,  //               unassigned


2758             0x10B00,  // 10B00..10B3F; Avestan
2759             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2760             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2761             0x10B80,  //               unassigned

2762             0x10C00,  // 10C00..10C4F; Old Turkic
2763             0x10C50,  //               unassigned
2764             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2765             0x10E80,  //               unassigned
2766             0x11000,  // 11000..1107F; Brahmi
2767             0x11080,  // 11080..110CF; Kaithi
2768             0x110D0,  // 110D0..110FF; Sora Sompeng
2769             0x11100,  // 11100..1114F; Chakma
2770             0x11150,  //               unassigned
2771             0x11180,  // 11180..111DF; Sharada
2772             0x111E0,  //               unassigned










2773             0x11680,  // 11680..116CF; Takri
2774             0x116D0,  //               unassigned




2775             0x12000,  // 12000..123FF; Cuneiform
2776             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2777             0x12480,  //               unassigned
2778             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2779             0x13430,  //               unassigned
2780             0x16800,  // 16800..16A3F; Bamum Supplement
2781             0x16A40,  //               unassigned




2782             0x16F00,  // 16F00..16F9F; Miao
2783             0x16FA0,  //               unassigned
2784             0x1B000,  // 1B000..1B0FF; Kana Supplement
2785             0x1B100,  //               unassigned



2786             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2787             0x1D100,  // 1D100..1D1FF; Musical Symbols
2788             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2789             0x1D250,  //               unassigned
2790             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2791             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2792             0x1D380,  //               unassigned
2793             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2794             0x1D800,  //               unassigned


2795             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2796             0x1EF00,  //               unassigned
2797             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2798             0x1F030,  // 1F030..1F09F; Domino Tiles
2799             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2800             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2801             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2802             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2803             0x1F600,  // 1F600..1F64F; Emoticons
2804             0x1F650,  //               unassigned
2805             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2806             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2807             0x1F780,  //               unassigned


2808             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2809             0x2A6E0,  //               unassigned
2810             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2811             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2812             0x2B820,  //               unassigned
2813             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2814             0x2FA20,  //               unassigned
2815             0xE0000,  // E0000..E007F; Tags
2816             0xE0080,  //               unassigned
2817             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2818             0xE01F0,  //               unassigned
2819             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2820             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2821         };
2822 
2823         private static final UnicodeBlock[] blocks = {
2824             BASIC_LATIN,
2825             LATIN_1_SUPPLEMENT,
2826             LATIN_EXTENDED_A,
2827             LATIN_EXTENDED_B,


2860             HANGUL_JAMO,
2861             ETHIOPIC,
2862             ETHIOPIC_SUPPLEMENT,
2863             CHEROKEE,
2864             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2865             OGHAM,
2866             RUNIC,
2867             TAGALOG,
2868             HANUNOO,
2869             BUHID,
2870             TAGBANWA,
2871             KHMER,
2872             MONGOLIAN,
2873             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2874             LIMBU,
2875             TAI_LE,
2876             NEW_TAI_LUE,
2877             KHMER_SYMBOLS,
2878             BUGINESE,
2879             TAI_THAM,
2880             null,
2881             BALINESE,
2882             SUNDANESE,
2883             BATAK,
2884             LEPCHA,
2885             OL_CHIKI,
2886             null,
2887             SUNDANESE_SUPPLEMENT,
2888             VEDIC_EXTENSIONS,
2889             PHONETIC_EXTENSIONS,
2890             PHONETIC_EXTENSIONS_SUPPLEMENT,
2891             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2892             LATIN_EXTENDED_ADDITIONAL,
2893             GREEK_EXTENDED,
2894             GENERAL_PUNCTUATION,
2895             SUPERSCRIPTS_AND_SUBSCRIPTS,
2896             CURRENCY_SYMBOLS,
2897             COMBINING_MARKS_FOR_SYMBOLS,
2898             LETTERLIKE_SYMBOLS,
2899             NUMBER_FORMS,
2900             ARROWS,


2941             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2942             YIJING_HEXAGRAM_SYMBOLS,
2943             CJK_UNIFIED_IDEOGRAPHS,
2944             YI_SYLLABLES,
2945             YI_RADICALS,
2946             LISU,
2947             VAI,
2948             CYRILLIC_EXTENDED_B,
2949             BAMUM,
2950             MODIFIER_TONE_LETTERS,
2951             LATIN_EXTENDED_D,
2952             SYLOTI_NAGRI,
2953             COMMON_INDIC_NUMBER_FORMS,
2954             PHAGS_PA,
2955             SAURASHTRA,
2956             DEVANAGARI_EXTENDED,
2957             KAYAH_LI,
2958             REJANG,
2959             HANGUL_JAMO_EXTENDED_A,
2960             JAVANESE,
2961             null,
2962             CHAM,
2963             MYANMAR_EXTENDED_A,
2964             TAI_VIET,
2965             MEETEI_MAYEK_EXTENSIONS,
2966             ETHIOPIC_EXTENDED_A,

2967             null,
2968             MEETEI_MAYEK,
2969             HANGUL_SYLLABLES,
2970             HANGUL_JAMO_EXTENDED_B,
2971             HIGH_SURROGATES,
2972             HIGH_PRIVATE_USE_SURROGATES,
2973             LOW_SURROGATES,
2974             PRIVATE_USE_AREA,
2975             CJK_COMPATIBILITY_IDEOGRAPHS,
2976             ALPHABETIC_PRESENTATION_FORMS,
2977             ARABIC_PRESENTATION_FORMS_A,
2978             VARIATION_SELECTORS,
2979             VERTICAL_FORMS,
2980             COMBINING_HALF_MARKS,
2981             CJK_COMPATIBILITY_FORMS,
2982             SMALL_FORM_VARIANTS,
2983             ARABIC_PRESENTATION_FORMS_B,
2984             HALFWIDTH_AND_FULLWIDTH_FORMS,
2985             SPECIALS,
2986             LINEAR_B_SYLLABARY,
2987             LINEAR_B_IDEOGRAMS,
2988             AEGEAN_NUMBERS,
2989             ANCIENT_GREEK_NUMBERS,
2990             ANCIENT_SYMBOLS,
2991             PHAISTOS_DISC,
2992             null,
2993             LYCIAN,
2994             CARIAN,
2995             null,
2996             OLD_ITALIC,
2997             GOTHIC,
2998             null,
2999             UGARITIC,
3000             OLD_PERSIAN,
3001             null,
3002             DESERET,
3003             SHAVIAN,
3004             OSMANYA,
3005             null,





3006             CYPRIOT_SYLLABARY,
3007             IMPERIAL_ARAMAIC,


3008             null,
3009             PHOENICIAN,
3010             LYDIAN,
3011             null,
3012             MEROITIC_HIEROGLYPHS,
3013             MEROITIC_CURSIVE,
3014             KHAROSHTHI,
3015             OLD_SOUTH_ARABIAN,

3016             null,

3017             AVESTAN,
3018             INSCRIPTIONAL_PARTHIAN,
3019             INSCRIPTIONAL_PAHLAVI,

3020             null,
3021             OLD_TURKIC,
3022             null,
3023             RUMI_NUMERAL_SYMBOLS,
3024             null,
3025             BRAHMI,
3026             KAITHI,
3027             SORA_SOMPENG,
3028             CHAKMA,
3029             null,
3030             SHARADA,










3031             null,
3032             TAKRI,
3033             null,




3034             CUNEIFORM,
3035             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3036             null,
3037             EGYPTIAN_HIEROGLYPHS,
3038             null,
3039             BAMUM_SUPPLEMENT,




3040             null,
3041             MIAO,
3042             null,
3043             KANA_SUPPLEMENT,
3044             null,



3045             BYZANTINE_MUSICAL_SYMBOLS,
3046             MUSICAL_SYMBOLS,
3047             ANCIENT_GREEK_MUSICAL_NOTATION,
3048             null,
3049             TAI_XUAN_JING_SYMBOLS,
3050             COUNTING_ROD_NUMERALS,
3051             null,
3052             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3053             null,


3054             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3055             null,
3056             MAHJONG_TILES,
3057             DOMINO_TILES,
3058             PLAYING_CARDS,
3059             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3060             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3061             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3062             EMOTICONS,
3063             null,
3064             TRANSPORT_AND_MAP_SYMBOLS,
3065             ALCHEMICAL_SYMBOLS,


3066             null,
3067             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3068             null,
3069             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3070             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3071             null,
3072             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3073             null,
3074             TAGS,
3075             null,
3076             VARIATION_SELECTORS_SUPPLEMENT,
3077             null,
3078             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3079             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3080         };
3081 
3082 
3083         /**
3084          * Returns the object representing the Unicode block containing the
3085          * given character, or {@code null} if the character is not a


3658          */
3659         INSCRIPTIONAL_PAHLAVI,
3660 
3661         /**
3662          * Unicode script "Old_Turkic".
3663          */
3664         OLD_TURKIC,
3665 
3666         /**
3667          * Unicode script "Brahmi".
3668          */
3669         BRAHMI,
3670 
3671         /**
3672          * Unicode script "Kaithi".
3673          */
3674         KAITHI,
3675 
3676         /**
3677          * Unicode script "Meroitic Hieroglyphs".

3678          */
3679         MEROITIC_HIEROGLYPHS,
3680 
3681         /**
3682          * Unicode script "Meroitic Cursive".

3683          */
3684         MEROITIC_CURSIVE,
3685 
3686         /**
3687          * Unicode script "Sora Sompeng".

3688          */
3689         SORA_SOMPENG,
3690 
3691         /**
3692          * Unicode script "Chakma".

3693          */
3694         CHAKMA,
3695 
3696         /**
3697          * Unicode script "Sharada".

3698          */
3699         SHARADA,
3700 
3701         /**
3702          * Unicode script "Takri".

3703          */
3704         TAKRI,
3705 
3706         /**
3707          * Unicode script "Miao".

3708          */
3709         MIAO,
3710 
3711         /**










































































































































3712          * Unicode script "Unknown".
3713          */
3714         UNKNOWN;
3715 
3716         private static final int[] scriptStarts = {
3717             0x0000,   // 0000..0040; COMMON
3718             0x0041,   // 0041..005A; LATIN
3719             0x005B,   // 005B..0060; COMMON
3720             0x0061,   // 0061..007A; LATIN
3721             0x007B,   // 007B..00A9; COMMON
3722             0x00AA,   // 00AA..00AA; LATIN
3723             0x00AB,   // 00AB..00B9; COMMON
3724             0x00BA,   // 00BA..00BA; LATIN
3725             0x00BB,   // 00BB..00BF; COMMON
3726             0x00C0,   // 00C0..00D6; LATIN
3727             0x00D7,   // 00D7..00D7; COMMON
3728             0x00D8,   // 00D8..00F6; LATIN
3729             0x00F7,   // 00F7..00F7; COMMON
3730             0x00F8,   // 00F8..02B8; LATIN
3731             0x02B9,   // 02B9..02DF; COMMON
3732             0x02E0,   // 02E0..02E4; LATIN
3733             0x02E5,   // 02E5..02E9; COMMON
3734             0x02EA,   // 02EA..02EB; BOPOMOFO
3735             0x02EC,   // 02EC..02FF; COMMON
3736             0x0300,   // 0300..036F; INHERITED
3737             0x0370,   // 0370..0373; GREEK
3738             0x0374,   // 0374..0374; COMMON
3739             0x0375,   // 0375..037D; GREEK
3740             0x037E,   // 037E..0383; COMMON
3741             0x0384,   // 0384..0384; GREEK
3742             0x0385,   // 0385..0385; COMMON
3743             0x0386,   // 0386..0386; GREEK
3744             0x0387,   // 0387..0387; COMMON
3745             0x0388,   // 0388..03E1; GREEK










3746             0x03E2,   // 03E2..03EF; COPTIC
3747             0x03F0,   // 03F0..03FF; GREEK
3748             0x0400,   // 0400..0484; CYRILLIC
3749             0x0485,   // 0485..0486; INHERITED
3750             0x0487,   // 0487..0530; CYRILLIC
3751             0x0531,   // 0531..0588; ARMENIAN
3752             0x0589,   // 0589..0589; COMMON
3753             0x058A,   // 058A..0590; ARMENIAN
3754             0x0591,   // 0591..05FF; HEBREW
3755             0x0600,   // 0600..060B; ARABIC
3756             0x060C,   // 060C..060C; COMMON
















3757             0x060D,   // 060D..061A; ARABIC
3758             0x061B,   // 061B..061D; COMMON
3759             0x061E,   // 061E..061E; ARABIC
3760             0x061F,   // 061F..061F; COMMON

3761             0x0620,   // 0620..063F; ARABIC
3762             0x0640,   // 0640..0640; COMMON
3763             0x0641,   // 0641..064A; ARABIC
3764             0x064B,   // 064B..0655; INHERITED
3765             0x0656,   // 0656..065F; ARABIC
3766             0x0660,   // 0660..0669; COMMON
3767             0x066A,   // 066A..066F; ARABIC
3768             0x0670,   // 0670..0670; INHERITED
3769             0x0671,   // 0671..06DC; ARABIC
3770             0x06DD,   // 06DD..06DD; COMMON
3771             0x06DE,   // 06DE..06FF; ARABIC
3772             0x0700,   // 0700..074F; SYRIAC




3773             0x0750,   // 0750..077F; ARABIC
3774             0x0780,   // 0780..07BF; THAANA
3775             0x07C0,   // 07C0..07FF; NKO
3776             0x0800,   // 0800..083F; SAMARITAN
3777             0x0840,   // 0840..089F; MANDAIC
3778             0x08A0,   // 08A0..08FF; ARABIC










3779             0x0900,   // 0900..0950; DEVANAGARI
3780             0x0951,   // 0951..0952; INHERITED
3781             0x0953,   // 0953..0963; DEVANAGARI
3782             0x0964,   // 0964..0965; COMMON
3783             0x0966,   // 0966..0980; DEVANAGARI
3784             0x0981,   // 0981..0A00; BENGALI
3785             0x0A01,   // 0A01..0A80; GURMUKHI
3786             0x0A81,   // 0A81..0B00; GUJARATI
3787             0x0B01,   // 0B01..0B81; ORIYA
3788             0x0B82,   // 0B82..0C00; TAMIL
3789             0x0C01,   // 0C01..0C81; TELUGU
3790             0x0C82,   // 0C82..0CF0; KANNADA
3791             0x0D02,   // 0D02..0D81; MALAYALAM
3792             0x0D82,   // 0D82..0E00; SINHALA
3793             0x0E01,   // 0E01..0E3E; THAI
3794             0x0E3F,   // 0E3F..0E3F; COMMON
3795             0x0E40,   // 0E40..0E80; THAI
3796             0x0E81,   // 0E81..0EFF; LAO
3797             0x0F00,   // 0F00..0FD4; TIBETAN




























































































































































































































































































3798             0x0FD5,   // 0FD5..0FD8; COMMON
3799             0x0FD9,   // 0FD9..0FFF; TIBETAN

3800             0x1000,   // 1000..109F; MYANMAR
3801             0x10A0,   // 10A0..10FA; GEORGIAN
3802             0x10FB,   // 10FB..10FB; COMMON






3803             0x10FC,   // 10FC..10FF; GEORGIAN
3804             0x1100,   // 1100..11FF; HANGUL
3805             0x1200,   // 1200..139F; ETHIOPIC
3806             0x13A0,   // 13A0..13FF; CHEROKEE




































3807             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3808             0x1680,   // 1680..169F; OGHAM

3809             0x16A0,   // 16A0..16EA; RUNIC
3810             0x16EB,   // 16EB..16ED; COMMON
3811             0x16EE,   // 16EE..16FF; RUNIC
3812             0x1700,   // 1700..171F; TAGALOG




3813             0x1720,   // 1720..1734; HANUNOO
3814             0x1735,   // 1735..173F; COMMON
3815             0x1740,   // 1740..175F; BUHID
3816             0x1760,   // 1760..177F; TAGBANWA
3817             0x1780,   // 1780..17FF; KHMER












3818             0x1800,   // 1800..1801; MONGOLIAN
3819             0x1802,   // 1802..1803; COMMON
3820             0x1804,   // 1804..1804; MONGOLIAN
3821             0x1805,   // 1805..1805; COMMON
3822             0x1806,   // 1806..18AF; MONGOLIAN
3823             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3824             0x1900,   // 1900..194F; LIMBU
3825             0x1950,   // 1950..197F; TAI_LE
3826             0x1980,   // 1980..19DF; NEW_TAI_LUE

























3827             0x19E0,   // 19E0..19FF; KHMER
3828             0x1A00,   // 1A00..1A1F; BUGINESE
3829             0x1A20,   // 1A20..1AFF; TAI_THAM
3830             0x1B00,   // 1B00..1B7F; BALINESE
















3831             0x1B80,   // 1B80..1BBF; SUNDANESE
3832             0x1BC0,   // 1BC0..1BFF; BATAK
3833             0x1C00,   // 1C00..1C4F; LEPCHA
3834             0x1C50,   // 1C50..1CBF; OL_CHIKI
3835             0x1CC0,   // 1CC0..1CCF; SUNDANESE








3836             0x1CD0,   // 1CD0..1CD2; INHERITED
3837             0x1CD3,   // 1CD3..1CD3; COMMON
3838             0x1CD4,   // 1CD4..1CE0; INHERITED
3839             0x1CE1,   // 1CE1..1CE1; COMMON
3840             0x1CE2,   // 1CE2..1CE8; INHERITED
3841             0x1CE9,   // 1CE9..1CEC; COMMON
3842             0x1CED,   // 1CED..1CED; INHERITED
3843             0x1CEE,   // 1CEE..1CF3; COMMON
3844             0x1CF4,   // 1CF4..1CF4; INHERITED
3845             0x1CF5,   // 1CF5..1CFF; COMMON



3846             0x1D00,   // 1D00..1D25; LATIN
3847             0x1D26,   // 1D26..1D2A; GREEK
3848             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3849             0x1D2C,   // 1D2C..1D5C; LATIN
3850             0x1D5D,   // 1D5D..1D61; GREEK
3851             0x1D62,   // 1D62..1D65; LATIN
3852             0x1D66,   // 1D66..1D6A; GREEK
3853             0x1D6B,   // 1D6B..1D77; LATIN
3854             0x1D78,   // 1D78..1D78; CYRILLIC
3855             0x1D79,   // 1D79..1DBE; LATIN
3856             0x1DBF,   // 1DBF..1DBF; GREEK
3857             0x1DC0,   // 1DC0..1DFF; INHERITED


3858             0x1E00,   // 1E00..1EFF; LATIN
3859             0x1F00,   // 1F00..1FFF; GREEK































3860             0x2000,   // 2000..200B; COMMON
3861             0x200C,   // 200C..200D; INHERITED
3862             0x200E,   // 200E..2070; COMMON
3863             0x2071,   // 2071..2073; LATIN



3864             0x2074,   // 2074..207E; COMMON
3865             0x207F,   // 207F..207F; LATIN
3866             0x2080,   // 2080..208F; COMMON
3867             0x2090,   // 2090..209F; LATIN
3868             0x20A0,   // 20A0..20CF; COMMON
3869             0x20D0,   // 20D0..20FF; INHERITED




3870             0x2100,   // 2100..2125; COMMON
3871             0x2126,   // 2126..2126; GREEK
3872             0x2127,   // 2127..2129; COMMON
3873             0x212A,   // 212A..212B; LATIN
3874             0x212C,   // 212C..2131; COMMON
3875             0x2132,   // 2132..2132; LATIN
3876             0x2133,   // 2133..214D; COMMON
3877             0x214E,   // 214E..214E; LATIN
3878             0x214F,   // 214F..215F; COMMON
3879             0x2160,   // 2160..2188; LATIN
3880             0x2189,   // 2189..27FF; COMMON








3881             0x2800,   // 2800..28FF; BRAILLE
3882             0x2900,   // 2900..2BFF; COMMON
3883             0x2C00,   // 2C00..2C5F; GLAGOLITIC












3884             0x2C60,   // 2C60..2C7F; LATIN
3885             0x2C80,   // 2C80..2CFF; COPTIC
3886             0x2D00,   // 2D00..2D2F; GEORGIAN
3887             0x2D30,   // 2D30..2D7F; TIFINAGH
3888             0x2D80,   // 2D80..2DDF; ETHIOPIC




























3889             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3890             0x2E00,   // 2E00..2E7F; COMMON
3891             0x2E80,   // 2E80..2FEF; HAN
3892             0x2FF0,   // 2FF0..3004; COMMON
3893             0x3005,   // 3005..3005; HAN
3894             0x3006,   // 3006..3006; COMMON
3895             0x3007,   // 3007..3007; HAN








3896             0x3008,   // 3008..3020; COMMON
3897             0x3021,   // 3021..3029; HAN
3898             0x302A,   // 302A..302D; INHERITED
3899             0x302E,   // 302E..302F; HANGUL
3900             0x3030,   // 3030..3037; COMMON
3901             0x3038,   // 3038..303B; HAN
3902             0x303C,   // 303C..3040; COMMON
3903             0x3041,   // 3041..3098; HIRAGANA


3904             0x3099,   // 3099..309A; INHERITED
3905             0x309B,   // 309B..309C; COMMON
3906             0x309D,   // 309D..309F; HIRAGANA
3907             0x30A0,   // 30A0..30A0; COMMON
3908             0x30A1,   // 30A1..30FA; KATAKANA
3909             0x30FB,   // 30FB..30FC; COMMON
3910             0x30FD,   // 30FD..3104; KATAKANA
3911             0x3105,   // 3105..3130; BOPOMOFO
3912             0x3131,   // 3131..318F; HANGUL



3913             0x3190,   // 3190..319F; COMMON
3914             0x31A0,   // 31A0..31BF; BOPOMOFO
3915             0x31C0,   // 31C0..31EF; COMMON


3916             0x31F0,   // 31F0..31FF; KATAKANA
3917             0x3200,   // 3200..321F; HANGUL

3918             0x3220,   // 3220..325F; COMMON
3919             0x3260,   // 3260..327E; HANGUL
3920             0x327F,   // 327F..32CF; COMMON
3921             0x32D0,   // 32D0..3357; KATAKANA


3922             0x3358,   // 3358..33FF; COMMON
3923             0x3400,   // 3400..4DBF; HAN

3924             0x4DC0,   // 4DC0..4DFF; COMMON
3925             0x4E00,   // 4E00..9FFF; HAN
3926             0xA000,   // A000..A4CF; YI




3927             0xA4D0,   // A4D0..A4FF; LISU
3928             0xA500,   // A500..A63F; VAI
3929             0xA640,   // A640..A69F; CYRILLIC
3930             0xA6A0,   // A6A0..A6FF; BAMUM




3931             0xA700,   // A700..A721; COMMON
3932             0xA722,   // A722..A787; LATIN
3933             0xA788,   // A788..A78A; COMMON
3934             0xA78B,   // A78B..A7FF; LATIN
3935             0xA800,   // A800..A82F; SYLOTI_NAGRI
3936             0xA830,   // A830..A83F; COMMON
3937             0xA840,   // A840..A87F; PHAGS_PA
3938             0xA880,   // A880..A8DF; SAURASHTRA
3939             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3940             0xA900,   // A900..A92F; KAYAH_LI
3941             0xA930,   // A930..A95F; REJANG
3942             0xA960,   // A960..A97F; HANGUL
3943             0xA980,   // A980..A9FF; JAVANESE
3944             0xAA00,   // AA00..AA5F; CHAM































3945             0xAA60,   // AA60..AA7F; MYANMAR
3946             0xAA80,   // AA80..AADF; TAI_VIET
3947             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3948             0xAB01,   // AB01..ABBF; ETHIOPIC
3949             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3950             0xAC00,   // AC00..D7FB; HANGUL


























3951             0xD7FC,   // D7FC..F8FF; UNKNOWN
3952             0xF900,   // F900..FAFF; HAN
3953             0xFB00,   // FB00..FB12; LATIN
3954             0xFB13,   // FB13..FB1C; ARMENIAN
3955             0xFB1D,   // FB1D..FB4F; HEBREW
3956             0xFB50,   // FB50..FD3D; ARABIC
3957             0xFD3E,   // FD3E..FD4F; COMMON
3958             0xFD50,   // FD50..FDFC; ARABIC
3959             0xFDFD,   // FDFD..FDFF; COMMON






















3960             0xFE00,   // FE00..FE0F; INHERITED
3961             0xFE10,   // FE10..FE1F; COMMON
3962             0xFE20,   // FE20..FE2F; INHERITED
3963             0xFE30,   // FE30..FE6F; COMMON
3964             0xFE70,   // FE70..FEFE; ARABIC
3965             0xFEFF,   // FEFF..FF20; COMMON












3966             0xFF21,   // FF21..FF3A; LATIN
3967             0xFF3B,   // FF3B..FF40; COMMON
3968             0xFF41,   // FF41..FF5A; LATIN
3969             0xFF5B,   // FF5B..FF65; COMMON
3970             0xFF66,   // FF66..FF6F; KATAKANA
3971             0xFF70,   // FF70..FF70; COMMON
3972             0xFF71,   // FF71..FF9D; KATAKANA
3973             0xFF9E,   // FF9E..FF9F; COMMON
3974             0xFFA0,   // FFA0..FFDF; HANGUL
3975             0xFFE0,   // FFE0..FFFF; COMMON
3976             0x10000,  // 10000..100FF; LINEAR_B
3977             0x10100,  // 10100..1013F; COMMON
3978             0x10140,  // 10140..1018F; GREEK
3979             0x10190,  // 10190..101FC; COMMON
3980             0x101FD,  // 101FD..1027F; INHERITED
3981             0x10280,  // 10280..1029F; LYCIAN
3982             0x102A0,  // 102A0..102FF; CARIAN
3983             0x10300,  // 10300..1032F; OLD_ITALIC
3984             0x10330,  // 10330..1037F; GOTHIC
3985             0x10380,  // 10380..1039F; UGARITIC
3986             0x103A0,  // 103A0..103FF; OLD_PERSIAN



















































3987             0x10400,  // 10400..1044F; DESERET
3988             0x10450,  // 10450..1047F; SHAVIAN
3989             0x10480,  // 10480..107FF; OSMANYA
3990             0x10800,  // 10800..1083F; CYPRIOT
3991             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3992             0x10900,  // 10900..1091F; PHOENICIAN
3993             0x10920,  // 10920..1097F; LYDIAN





































3994             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3995             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3996             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3997             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3998             0x10B00,  // 10B00..10B3F; AVESTAN
3999             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4000             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4001             0x10C00,  // 10C00..10E5F; OLD_TURKIC
4002             0x10E60,  // 10E60..10FFF; ARABIC
4003             0x11000,  // 11000..1107F; BRAHMI
4004             0x11080,  // 11080..110CF; KAITHI
4005             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4006             0x11100,  // 11100..1117F; CHAKMA
4007             0x11180,  // 11180..1167F; SHARADA
4008             0x11680,  // 11680..116CF; TAKRI
4009             0x12000,  // 12000..12FFF; CUNEIFORM
4010             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS

























































































































4011             0x16800,  // 16800..16A38; BAMUM
4012             0x16F00,  // 16F00..16F9F; MIAO
4013             0x1B000,  // 1B000..1B000; KATAKANA
4014             0x1B001,  // 1B001..1CFFF; HIRAGANA
4015             0x1D000,  // 1D000..1D166; COMMON










































4016             0x1D167,  // 1D167..1D169; INHERITED
4017             0x1D16A,  // 1D16A..1D17A; COMMON
4018             0x1D17B,  // 1D17B..1D182; INHERITED
4019             0x1D183,  // 1D183..1D184; COMMON
4020             0x1D185,  // 1D185..1D18B; INHERITED
4021             0x1D18C,  // 1D18C..1D1A9; COMMON
4022             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4023             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4024             0x1D200,  // 1D200..1D2FF; GREEK
4025             0x1D300,  // 1D300..1EDFF; COMMON
4026             0x1EE00,  // 1EE00..1EFFF; ARABIC
4027             0x1F000,  // 1F000..1F1FF; COMMON
4028             0x1F200,  // 1F200..1F200; HIRAGANA
4029             0x1F201,  // 1F210..1FFFF; COMMON
4030             0x20000,  // 20000..E0000; HAN
4031             0xE0001,  // E0001..E00FF; COMMON

































































































































































































4032             0xE0100,  // E0100..E01EF; INHERITED
4033             0xE01F0   // E01F0..10FFFF; UNKNOWN
4034 
4035         };
4036 
4037         private static final UnicodeScript[] scripts = {
4038             COMMON,
4039             LATIN,
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             LATIN,
4048             COMMON,
4049             LATIN,
4050             COMMON,
4051             LATIN,
4052             COMMON,
4053             LATIN,
4054             COMMON,
4055             BOPOMOFO,
4056             COMMON,
4057             INHERITED,
4058             GREEK,
4059             COMMON,
4060             GREEK,
4061             COMMON,
4062             GREEK,
4063             COMMON,
4064             GREEK,
4065             COMMON,
4066             GREEK,
4067             COPTIC,
4068             GREEK,
4069             CYRILLIC,
4070             INHERITED,
4071             CYRILLIC,
4072             ARMENIAN,
4073             COMMON,
4074             ARMENIAN,
4075             HEBREW,
4076             ARABIC,
4077             COMMON,
4078             ARABIC,
4079             COMMON,
4080             ARABIC,
4081             COMMON,
4082             ARABIC,
4083             COMMON,
4084             ARABIC,
4085             INHERITED,
4086             ARABIC,
4087             COMMON,
4088             ARABIC,
4089             INHERITED,
4090             ARABIC,
4091             COMMON,
4092             ARABIC,
4093             SYRIAC,
4094             ARABIC,
4095             THAANA,
4096             NKO,
4097             SAMARITAN,
4098             MANDAIC,
4099             ARABIC,
4100             DEVANAGARI,
4101             INHERITED,
4102             DEVANAGARI,
4103             COMMON,
4104             DEVANAGARI,
4105             BENGALI,
4106             GURMUKHI,
4107             GUJARATI,
4108             ORIYA,
4109             TAMIL,
4110             TELUGU,
4111             KANNADA,
4112             MALAYALAM,
4113             SINHALA,
4114             THAI,
4115             COMMON,
4116             THAI,
4117             LAO,
4118             TIBETAN,
4119             COMMON,
4120             TIBETAN,
4121             MYANMAR,
4122             GEORGIAN,
4123             COMMON,
4124             GEORGIAN,
4125             HANGUL,
4126             ETHIOPIC,
4127             CHEROKEE,
4128             CANADIAN_ABORIGINAL,
4129             OGHAM,
4130             RUNIC,
4131             COMMON,
4132             RUNIC,
4133             TAGALOG,
4134             HANUNOO,
4135             COMMON,
4136             BUHID,
4137             TAGBANWA,
4138             KHMER,
4139             MONGOLIAN,
4140             COMMON,
4141             MONGOLIAN,
4142             COMMON,
4143             MONGOLIAN,
4144             CANADIAN_ABORIGINAL,
4145             LIMBU,
4146             TAI_LE,
4147             NEW_TAI_LUE,
4148             KHMER,
4149             BUGINESE,
4150             TAI_THAM,
4151             BALINESE,
4152             SUNDANESE,
4153             BATAK,
4154             LEPCHA,
4155             OL_CHIKI,
4156             SUNDANESE,
4157             INHERITED,
4158             COMMON,
4159             INHERITED,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             INHERITED,
4164             COMMON,
4165             INHERITED,
4166             COMMON,
4167             LATIN,
4168             GREEK,
4169             CYRILLIC,
4170             LATIN,
4171             GREEK,
4172             LATIN,
4173             GREEK,
4174             LATIN,
4175             CYRILLIC,
4176             LATIN,
4177             GREEK,
4178             INHERITED,
4179             LATIN,
4180             GREEK,
4181             COMMON,
4182             INHERITED,
4183             COMMON,
4184             LATIN,
4185             COMMON,
4186             LATIN,
4187             COMMON,
4188             LATIN,
4189             COMMON,
4190             INHERITED,
4191             COMMON,
4192             GREEK,
4193             COMMON,
4194             LATIN,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             LATIN,
4199             COMMON,
4200             LATIN,
4201             COMMON,
4202             BRAILLE,
4203             COMMON,
4204             GLAGOLITIC,
4205             LATIN,
4206             COPTIC,
4207             GEORGIAN,
4208             TIFINAGH,
4209             ETHIOPIC,
4210             CYRILLIC,
4211             COMMON,
4212             HAN,
4213             COMMON,
4214             HAN,
4215             COMMON,
4216             HAN,
4217             COMMON,
4218             HAN,
4219             INHERITED,
4220             HANGUL,
4221             COMMON,
4222             HAN,
4223             COMMON,
4224             HIRAGANA,
4225             INHERITED,
4226             COMMON,
4227             HIRAGANA,
4228             COMMON,
4229             KATAKANA,
4230             COMMON,
4231             KATAKANA,
4232             BOPOMOFO,
4233             HANGUL,
4234             COMMON,
4235             BOPOMOFO,
4236             COMMON,
4237             KATAKANA,
4238             HANGUL,
4239             COMMON,
4240             HANGUL,
4241             COMMON,
4242             KATAKANA,
4243             COMMON,
4244             HAN,
4245             COMMON,
4246             HAN,
4247             YI,
4248             LISU,
4249             VAI,
4250             CYRILLIC,
4251             BAMUM,
4252             COMMON,
4253             LATIN,
4254             COMMON,
4255             LATIN,
4256             SYLOTI_NAGRI,
4257             COMMON,
4258             PHAGS_PA,
4259             SAURASHTRA,
4260             DEVANAGARI,
4261             KAYAH_LI,
4262             REJANG,
4263             HANGUL,
4264             JAVANESE,
4265             CHAM,
4266             MYANMAR,
4267             TAI_VIET,
4268             MEETEI_MAYEK,
4269             ETHIOPIC,
4270             MEETEI_MAYEK,
4271             HANGUL,
4272             UNKNOWN     ,
4273             HAN,
4274             LATIN,
4275             ARMENIAN,
4276             HEBREW,
4277             ARABIC,
4278             COMMON,
4279             ARABIC,
4280             COMMON,
4281             INHERITED,
4282             COMMON,
4283             INHERITED,
4284             COMMON,
4285             ARABIC,
4286             COMMON,
4287             LATIN,
4288             COMMON,
4289             LATIN,
4290             COMMON,
4291             KATAKANA,
4292             COMMON,
4293             KATAKANA,
4294             COMMON,
4295             HANGUL,
4296             COMMON,
4297             LINEAR_B,
4298             COMMON,
4299             GREEK,
4300             COMMON,
4301             INHERITED,
4302             LYCIAN,
4303             CARIAN,
4304             OLD_ITALIC,
4305             GOTHIC,
4306             UGARITIC,
4307             OLD_PERSIAN,
4308             DESERET,
4309             SHAVIAN,
4310             OSMANYA,
4311             CYPRIOT,
4312             IMPERIAL_ARAMAIC,
4313             PHOENICIAN,
4314             LYDIAN,
4315             MEROITIC_HIEROGLYPHS,
4316             MEROITIC_CURSIVE,
4317             KHAROSHTHI,
4318             OLD_SOUTH_ARABIAN,
4319             AVESTAN,
4320             INSCRIPTIONAL_PARTHIAN,
4321             INSCRIPTIONAL_PAHLAVI,
4322             OLD_TURKIC,
4323             ARABIC,
4324             BRAHMI,
4325             KAITHI,
4326             SORA_SOMPENG,
4327             CHAKMA,
4328             SHARADA,
4329             TAKRI,
4330             CUNEIFORM,
4331             EGYPTIAN_HIEROGLYPHS,
4332             BAMUM,
4333             MIAO,
4334             KATAKANA,
4335             HIRAGANA,
4336             COMMON,
4337             INHERITED,
4338             COMMON,
4339             INHERITED,
4340             COMMON,
4341             INHERITED,
4342             COMMON,
4343             INHERITED,
4344             COMMON,
4345             GREEK,
4346             COMMON,
4347             ARABIC,
4348             COMMON,
4349             HIRAGANA,
4350             COMMON,
4351             HAN,
4352             COMMON,
4353             INHERITED,
4354             UNKNOWN































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































4355         };
4356 
4357         private static HashMap<String, Character.UnicodeScript> aliases;
4358         static {
4359             aliases = new HashMap<>(128);

4360             aliases.put("ARAB", ARABIC);
4361             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4362             aliases.put("ARMN", ARMENIAN);
4363             aliases.put("AVST", AVESTAN);
4364             aliases.put("BALI", BALINESE);
4365             aliases.put("BAMU", BAMUM);

4366             aliases.put("BATK", BATAK);
4367             aliases.put("BENG", BENGALI);
4368             aliases.put("BOPO", BOPOMOFO);
4369             aliases.put("BRAI", BRAILLE);
4370             aliases.put("BRAH", BRAHMI);

4371             aliases.put("BUGI", BUGINESE);
4372             aliases.put("BUHD", BUHID);
4373             aliases.put("CAKM", CHAKMA);
4374             aliases.put("CANS", CANADIAN_ABORIGINAL);
4375             aliases.put("CARI", CARIAN);
4376             aliases.put("CHAM", CHAM);
4377             aliases.put("CHER", CHEROKEE);
4378             aliases.put("COPT", COPTIC);
4379             aliases.put("CPRT", CYPRIOT);
4380             aliases.put("CYRL", CYRILLIC);
4381             aliases.put("DEVA", DEVANAGARI);
4382             aliases.put("DSRT", DESERET);

4383             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);

4384             aliases.put("ETHI", ETHIOPIC);
4385             aliases.put("GEOR", GEORGIAN);
4386             aliases.put("GLAG", GLAGOLITIC);
4387             aliases.put("GOTH", GOTHIC);

4388             aliases.put("GREK", GREEK);
4389             aliases.put("GUJR", GUJARATI);
4390             aliases.put("GURU", GURMUKHI);
4391             aliases.put("HANG", HANGUL);
4392             aliases.put("HANI", HAN);
4393             aliases.put("HANO", HANUNOO);
4394             aliases.put("HEBR", HEBREW);
4395             aliases.put("HIRA", HIRAGANA);

4396             // it appears we don't have the KATAKANA_OR_HIRAGANA
4397             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4398             aliases.put("ITAL", OLD_ITALIC);
4399             aliases.put("JAVA", JAVANESE);
4400             aliases.put("KALI", KAYAH_LI);
4401             aliases.put("KANA", KATAKANA);
4402             aliases.put("KHAR", KHAROSHTHI);
4403             aliases.put("KHMR", KHMER);

4404             aliases.put("KNDA", KANNADA);
4405             aliases.put("KTHI", KAITHI);
4406             aliases.put("LANA", TAI_THAM);
4407             aliases.put("LAOO", LAO);
4408             aliases.put("LATN", LATIN);
4409             aliases.put("LEPC", LEPCHA);
4410             aliases.put("LIMB", LIMBU);

4411             aliases.put("LINB", LINEAR_B);
4412             aliases.put("LISU", LISU);
4413             aliases.put("LYCI", LYCIAN);
4414             aliases.put("LYDI", LYDIAN);

4415             aliases.put("MAND", MANDAIC);


4416             aliases.put("MERC", MEROITIC_CURSIVE);
4417             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4418             aliases.put("MLYM", MALAYALAM);

4419             aliases.put("MONG", MONGOLIAN);

4420             aliases.put("MTEI", MEETEI_MAYEK);
4421             aliases.put("MYMR", MYANMAR);


4422             aliases.put("NKOO", NKO);
4423             aliases.put("OGAM", OGHAM);
4424             aliases.put("OLCK", OL_CHIKI);
4425             aliases.put("ORKH", OLD_TURKIC);
4426             aliases.put("ORYA", ORIYA);
4427             aliases.put("OSMA", OSMANYA);



4428             aliases.put("PHAG", PHAGS_PA);
4429             aliases.put("PLRD", MIAO);
4430             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);

4431             aliases.put("PHNX", PHOENICIAN);

4432             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4433             aliases.put("RJNG", REJANG);
4434             aliases.put("RUNR", RUNIC);
4435             aliases.put("SAMR", SAMARITAN);
4436             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4437             aliases.put("SAUR", SAURASHTRA);
4438             aliases.put("SHAW", SHAVIAN);
4439             aliases.put("SHRD", SHARADA);


4440             aliases.put("SINH", SINHALA);
4441             aliases.put("SORA", SORA_SOMPENG);
4442             aliases.put("SUND", SUNDANESE);
4443             aliases.put("SYLO", SYLOTI_NAGRI);
4444             aliases.put("SYRC", SYRIAC);
4445             aliases.put("TAGB", TAGBANWA);
4446             aliases.put("TALE", TAI_LE);
4447             aliases.put("TAKR", TAKRI);

4448             aliases.put("TALU", NEW_TAI_LUE);
4449             aliases.put("TAML", TAMIL);
4450             aliases.put("TAVT", TAI_VIET);
4451             aliases.put("TELU", TELUGU);
4452             aliases.put("TFNG", TIFINAGH);
4453             aliases.put("TGLG", TAGALOG);
4454             aliases.put("THAA", THAANA);
4455             aliases.put("THAI", THAI);
4456             aliases.put("TIBT", TIBETAN);

4457             aliases.put("UGAR", UGARITIC);
4458             aliases.put("VAII", VAI);

4459             aliases.put("XPEO", OLD_PERSIAN);
4460             aliases.put("XSUX", CUNEIFORM);
4461             aliases.put("YIII", YI);
4462             aliases.put("ZINH", INHERITED);
4463             aliases.put("ZYYY", COMMON);
4464             aliases.put("ZZZZ", UNKNOWN);
4465         }
4466 
4467         /**
4468          * Returns the enum constant representing the Unicode script of which
4469          * the given character (Unicode code point) is assigned to.
4470          *
4471          * @param   codePoint the character (Unicode code point) in question.
4472          * @return  The {@code UnicodeScript} constant representing the
4473          *          Unicode script of which this character is assigned to.
4474          *
4475          * @exception IllegalArgumentException if the specified
4476          * {@code codePoint} is an invalid Unicode code point.
4477          * @see Character#isValidCodePoint(int)
4478          *


6577      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6578      * full width variant ({@code '\u005CuFF21'} through
6579      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6580      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6581      * through 35. This is independent of the Unicode specification,
6582      * which does not assign numeric values to these {@code char}
6583      * values.
6584      * <p>
6585      * If the character does not have a numeric value, then -1 is returned.
6586      * If the character has a numeric value that cannot be represented as a
6587      * nonnegative integer (for example, a fractional value), then -2
6588      * is returned.
6589      *
6590      * <p><b>Note:</b> This method cannot handle <a
6591      * href="#supplementary"> supplementary characters</a>. To support
6592      * all Unicode characters, including supplementary characters, use
6593      * the {@link #getNumericValue(int)} method.
6594      *
6595      * @param   ch      the character to be converted.
6596      * @return  the numeric value of the character, as a nonnegative {@code int}
6597      *           value; -2 if the character has a numeric value that is not a
6598      *          nonnegative integer; -1 if the character has no numeric value.

6599      * @see     Character#forDigit(int, int)
6600      * @see     Character#isDigit(char)
6601      * @since   1.1
6602      */
6603     public static int getNumericValue(char ch) {
6604         return getNumericValue((int)ch);
6605     }
6606 
6607     /**
6608      * Returns the {@code int} value that the specified
6609      * character (Unicode code point) represents. For example, the character
6610      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6611      * an {@code int} with a value of 50.
6612      * <p>
6613      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6614      * {@code '\u005Cu005A'}), lowercase
6615      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6616      * full width variant ({@code '\u005CuFF21'} through
6617      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6618      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6619      * through 35. This is independent of the Unicode specification,
6620      * which does not assign numeric values to these {@code char}
6621      * values.
6622      * <p>
6623      * If the character does not have a numeric value, then -1 is returned.
6624      * If the character has a numeric value that cannot be represented as a
6625      * nonnegative integer (for example, a fractional value), then -2
6626      * is returned.
6627      *
6628      * @param   codePoint the character (Unicode code point) to be converted.
6629      * @return  the numeric value of the character, as a nonnegative {@code int}
6630      *          value; -2 if the character has a numeric value that is not a
6631      *          nonnegative integer; -1 if the character has no numeric value.

6632      * @see     Character#forDigit(int, int)
6633      * @see     Character#isDigit(int)
6634      * @since   1.5
6635      */
6636     public static int getNumericValue(int codePoint) {
6637         return CharacterData.of(codePoint).getNumericValue(codePoint);
6638     }
6639 
6640     /**
6641      * Determines if the specified character is ISO-LATIN-1 white space.
6642      * This method returns {@code true} for the following five
6643      * characters only:
6644      * <table summary="truechars">
6645      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6646      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6647      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6648      *     <td>{@code NEW LINE}</td></tr>
6649      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6650      *     <td>{@code FORM FEED}</td></tr>
6651      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>


6981      * @see Character#DIRECTIONALITY_UNDEFINED
6982      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6983      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6984      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6985      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6986      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6987      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6988      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6989      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6990      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6991      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6992      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6993      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6994      * @see Character#DIRECTIONALITY_WHITESPACE
6995      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6996      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6997      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6998      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6999      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7000      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT




7001      * @since 1.4
7002      */
7003     public static byte getDirectionality(char ch) {
7004         return getDirectionality((int)ch);
7005     }
7006 
7007     /**
7008      * Returns the Unicode directionality property for the given
7009      * character (Unicode code point).  Character directionality is
7010      * used to calculate the visual ordering of text. The
7011      * directionality value of undefined character is {@link
7012      * #DIRECTIONALITY_UNDEFINED}.
7013      *
7014      * @param   codePoint the character (Unicode code point) for which
7015      *          the directionality property is requested.
7016      * @return the directionality property of the character.
7017      *
7018      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7019      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7020      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7021      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7022      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7023      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7024      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7025      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7026      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7027      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7028      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7029      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7030      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7031      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7032      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7033      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7034      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7035      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7036      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7037      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT




7038      * @since    1.5
7039      */
7040     public static byte getDirectionality(int codePoint) {
7041         return CharacterData.of(codePoint).getDirectionality(codePoint);
7042     }
7043 
7044     /**
7045      * Determines whether the character is mirrored according to the
7046      * Unicode specification.  Mirrored characters should have their
7047      * glyphs horizontally mirrored when displayed in text that is
7048      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7049      * PARENTHESIS is semantically defined to be an <i>opening
7050      * parenthesis</i>.  This will appear as a "(" in text that is
7051      * left-to-right but as a ")" in text that is right-to-left.
7052      *
7053      * <p><b>Note:</b> This method cannot handle <a
7054      * href="#supplementary"> supplementary characters</a>. To support
7055      * all Unicode characters, including supplementary characters, use
7056      * the {@link #isMirrored(int)} method.
7057      *


< prev index next >