src/share/classes/java/lang/Character.java

Print this page

        

@@ -22,10 +22,11 @@
  * CA 95054 USA or visit www.sun.com if you need additional information or
  * have any questions.
  */
 
 package java.lang;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Locale;
 
 /**

@@ -2545,10 +2546,1245 @@
         }
     }
 
 
     /**
+     * A family of character subsets representing the character scripts
+     * defined in the <a href="http://www.unicode.org/reports/tr24/">
+     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
+     * character is assigned to a single Unicode script, either a specific
+     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
+     * one of the following three special values,
+     * {@link Character.UnicodeScript#INHERITED Inherited},
+     * {@link Character.UnicodeScript#COMMON Common} or
+     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
+     *
+     * @since 1.7
+     */
+    public static enum UnicodeScript {
+        /**
+         * Unicode script "Common".
+         */
+        COMMON,
+
+        /**
+         * Unicode script "Latin".
+         */
+        LATIN,
+
+        /**
+         * Unicode script "Greek".
+         */
+        GREEK,
+
+        /**
+         * Unicode script "Cyrillic".
+         */
+        CYRILLIC,
+
+        /**
+         * Unicode script "Armenian".
+         */
+        ARMENIAN,
+
+        /**
+         * Unicode script "Hebrew".
+         */
+        HEBREW,
+
+        /**
+         * Unicode script "Arabic".
+         */
+        ARABIC,
+
+        /**
+         * Unicode script "Syriac".
+         */
+        SYRIAC,
+
+        /**
+         * Unicode script "Thaana".
+         */
+        THAANA,
+
+        /**
+         * Unicode script "Devanagari".
+         */
+        DEVANAGARI,
+
+        /**
+         * Unicode script "Bengali".
+         */
+        BENGALI,
+
+        /**
+         * Unicode script "Gurmukhi".
+         */
+        GURMUKHI,
+
+        /**
+         * Unicode script "Gujarati".
+         */
+        GUJARATI,
+
+        /**
+         * Unicode script "Oriya".
+         */
+        ORIYA,
+
+        /**
+         * Unicode script "Tamil".
+         */
+        TAMIL,
+
+        /**
+         * Unicode script "Telugu".
+         */
+        TELUGU,
+
+        /**
+         * Unicode script "Kannada".
+         */
+        KANNADA,
+
+        /**
+         * Unicode script "Malayalam".
+         */
+        MALAYALAM,
+
+        /**
+         * Unicode script "Sinhala".
+         */
+        SINHALA,
+
+        /**
+         * Unicode script "Thai".
+         */
+        THAI,
+
+        /**
+         * Unicode script "Lao".
+         */
+        LAO,
+
+        /**
+         * Unicode script "Tibetan".
+         */
+        TIBETAN,
+
+        /**
+         * Unicode script "Myanmar".
+         */
+        MYANMAR,
+
+        /**
+         * Unicode script "Georgian".
+         */
+        GEORGIAN,
+
+        /**
+         * Unicode script "Hangul".
+         */
+        HANGUL,
+
+        /**
+         * Unicode script "Ethiopic".
+         */
+        ETHIOPIC,
+
+        /**
+         * Unicode script "Cherokee".
+         */
+        CHEROKEE,
+
+        /**
+         * Unicode script "Canadian_Aboriginal".
+         */
+        CANADIAN_ABORIGINAL,
+
+        /**
+         * Unicode script "Ogham".
+         */
+        OGHAM,
+
+        /**
+         * Unicode script "Runic".
+         */
+        RUNIC,
+
+        /**
+         * Unicode script "Khmer".
+         */
+        KHMER,
+
+        /**
+         * Unicode script "Mongolian".
+         */
+        MONGOLIAN,
+
+        /**
+         * Unicode script "Hiragana".
+         */
+        HIRAGANA,
+
+        /**
+         * Unicode script "Katakana".
+         */
+        KATAKANA,
+
+        /**
+         * Unicode script "Bopomofo".
+         */
+        BOPOMOFO,
+
+        /**
+         * Unicode script "Han".
+         */
+        HAN,
+
+        /**
+         * Unicode script "Yi".
+         */
+        YI,
+
+        /**
+         * Unicode script "Old_Italic".
+         */
+        OLD_ITALIC,
+
+        /**
+         * Unicode script "Gothic".
+         */
+        GOTHIC,
+
+        /**
+         * Unicode script "Deseret".
+         */
+        DESERET,
+
+        /**
+         * Unicode script "Inherited".
+         */
+        INHERITED,
+
+        /**
+         * Unicode script "Tagalog".
+         */
+        TAGALOG,
+
+        /**
+         * Unicode script "Hanunoo".
+         */
+        HANUNOO,
+
+        /**
+         * Unicode script "Buhid".
+         */
+        BUHID,
+
+        /**
+         * Unicode script "Tagbanwa".
+         */
+        TAGBANWA,
+
+        /**
+         * Unicode script "Limbu".
+         */
+        LIMBU,
+
+        /**
+         * Unicode script "Tai_Le".
+         */
+        TAI_LE,
+
+        /**
+         * Unicode script "Linear_B".
+         */
+        LINEAR_B,
+
+        /**
+         * Unicode script "Ugaritic".
+         */
+        UGARITIC,
+
+        /**
+         * Unicode script "Shavian".
+         */
+        SHAVIAN,
+
+        /**
+         * Unicode script "Osmanya".
+         */
+        OSMANYA,
+
+        /**
+         * Unicode script "Cypriot".
+         */
+        CYPRIOT,
+
+        /**
+         * Unicode script "Braille".
+         */
+        BRAILLE,
+
+        /**
+         * Unicode script "Buginese".
+         */
+        BUGINESE,
+
+        /**
+         * Unicode script "Coptic".
+         */
+        COPTIC,
+
+        /**
+         * Unicode script "New_Tai_Lue".
+         */
+        NEW_TAI_LUE,
+
+        /**
+         * Unicode script "Glagolitic".
+         */
+        GLAGOLITIC,
+
+        /**
+         * Unicode script "Tifinagh".
+         */
+        TIFINAGH,
+
+        /**
+         * Unicode script "Syloti_Nagri".
+         */
+        SYLOTI_NAGRI,
+
+        /**
+         * Unicode script "Old_Persian".
+         */
+        OLD_PERSIAN,
+
+        /**
+         * Unicode script "Kharoshthi".
+         */
+        KHAROSHTHI,
+
+        /**
+         * Unicode script "Balinese".
+         */
+        BALINESE,
+
+        /**
+         * Unicode script "Cuneiform".
+         */
+        CUNEIFORM,
+
+        /**
+         * Unicode script "Phoenician".
+         */
+        PHOENICIAN,
+
+        /**
+         * Unicode script "Phags_Pa".
+         */
+        PHAGS_PA,
+
+        /**
+         * Unicode script "Nko".
+         */
+        NKO,
+
+        /**
+         * Unicode script "Sundanese".
+         */
+        SUNDANESE,
+
+        /**
+         * Unicode script "Lepcha".
+         */
+        LEPCHA,
+
+        /**
+         * Unicode script "Ol_Chiki".
+         */
+        OL_CHIKI,
+
+        /**
+         * Unicode script "Vai".
+         */
+        VAI,
+
+        /**
+         * Unicode script "Saurashtra".
+         */
+        SAURASHTRA,
+
+        /**
+         * Unicode script "Kayah_Li".
+         */
+        KAYAH_LI,
+
+        /**
+         * Unicode script "Rejang".
+         */
+        REJANG,
+
+        /**
+         * Unicode script "Lycian".
+         */
+        LYCIAN,
+
+        /**
+         * Unicode script "Carian".
+         */
+        CARIAN,
+
+        /**
+         * Unicode script "Lydian".
+         */
+        LYDIAN,
+
+        /**
+         * Unicode script "Cham".
+         */
+        CHAM,
+
+        /**
+         * Unicode script "Tai_Tham".
+         */
+        TAI_THAM,
+
+        /**
+         * Unicode script "Tai_Viet".
+         */
+        TAI_VIET,
+
+        /**
+         * Unicode script "Avestan".
+         */
+        AVESTAN,
+
+        /**
+         * Unicode script "Egyptian_Hieroglyphs".
+         */
+        EGYPTIAN_HIEROGLYPHS,
+
+        /**
+         * Unicode script "Samaritan".
+         */
+        SAMARITAN,
+
+        /**
+         * Unicode script "Lisu".
+         */
+        LISU,
+
+        /**
+         * Unicode script "Bamum".
+         */
+        BAMUM,
+
+        /**
+         * Unicode script "Javanese".
+         */
+        JAVANESE,
+
+        /**
+         * Unicode script "Meetei_Mayek".
+         */
+        MEETEI_MAYEK,
+
+        /**
+         * Unicode script "Imperial_Aramaic".
+         */
+        IMPERIAL_ARAMAIC,
+
+        /**
+         * Unicode script "Old_South_Arabian".
+         */
+        OLD_SOUTH_ARABIAN,
+
+        /**
+         * Unicode script "Inscriptional_Parthian".
+         */
+        INSCRIPTIONAL_PARTHIAN,
+
+        /**
+         * Unicode script "Inscriptional_Pahlavi".
+         */
+        INSCRIPTIONAL_PAHLAVI,
+
+        /**
+         * Unicode script "Old_Turkic".
+         */
+        OLD_TURKIC,
+
+        /**
+         * Unicode script "Kaithi".
+         */
+        KAITHI,
+
+        /**
+         * Unicode script "Unknown".
+         */
+        UNKNOWN;
+
+        private static final int[] scriptStarts = {
+            0x0000,   // 0000..0040; COMMON
+            0x0041,   // 0041..005A; LATIN
+            0x005B,   // 005B..0060; COMMON
+            0x0061,   // 0061..007A; LATIN
+            0x007B,   // 007B..00A9; COMMON
+            0x00AA,   // 00AA..00AA; LATIN
+            0x00AB,   // 00AB..00B9; COMMON
+            0x00BA,   // 00BA..00BA; LATIN
+            0x00BB,   // 00BB..00BF; COMMON
+            0x00C0,   // 00C0..00D6; LATIN
+            0x00D7,   // 00D7..00D7; COMMON
+            0x00D8,   // 00D8..00F6; LATIN
+            0x00F7,   // 00F7..00F7; COMMON
+            0x00F8,   // 00F8..02B8; LATIN
+            0x02B9,   // 02B9..02DF; COMMON
+            0x02E0,   // 02E0..02E4; LATIN
+            0x02E5,   // 02E5..02FF; COMMON
+            0x0300,   // 0300..036F; INHERITED
+            0x0370,   // 0370..0373; GREEK
+            0x0374,   // 0374..0374; COMMON
+            0x0375,   // 0375..037D; GREEK
+            0x037E,   // 037E..0383; COMMON
+            0x0384,   // 0384..0384; GREEK
+            0x0385,   // 0385..0385; COMMON
+            0x0386,   // 0386..0386; GREEK
+            0x0387,   // 0387..0387; COMMON
+            0x0388,   // 0388..03E1; GREEK
+            0x03E2,   // 03E2..03EF; COPTIC
+            0x03F0,   // 03F0..03FF; GREEK
+            0x0400,   // 0400..0484; CYRILLIC
+            0x0485,   // 0485..0486; INHERITED
+            0x0487,   // 0487..0530; CYRILLIC
+            0x0531,   // 0531..0588; ARMENIAN
+            0x0589,   // 0589..0589; COMMON
+            0x058A,   // 058A..0590; ARMENIAN
+            0x0591,   // 0591..05FF; HEBREW
+            0x0600,   // 0600..0605; COMMON
+            0x0606,   // 0606..060B; ARABIC
+            0x060C,   // 060C..060C; COMMON
+            0x060D,   // 060D..061A; ARABIC
+            0x061B,   // 061B..061D; COMMON
+            0x061E,   // 061E..061E; ARABIC
+            0x061F,   // 061F..0620; COMMON
+            0x0621,   // 0621..063F; ARABIC
+            0x0640,   // 0640..0640; COMMON
+            0x0641,   // 0641..064A; ARABIC
+            0x064B,   // 064B..0655; INHERITED
+            0x0656,   // 0656..065F; ARABIC
+            0x0660,   // 0660..0669; COMMON
+            0x066A,   // 066A..066F; ARABIC
+            0x0670,   // 0670..0670; INHERITED
+            0x0671,   // 0671..06DC; ARABIC
+            0x06DD,   // 06DD..06DD; COMMON
+            0x06DE,   // 06DE..06FF; ARABIC
+            0x0700,   // 0700..074F; SYRIAC
+            0x0750,   // 0750..077F; ARABIC
+            0x0780,   // 0780..07BF; THAANA
+            0x07C0,   // 07C0..07FF; NKO
+            0x0800,   // 0800..08FF; SAMARITAN
+            0x0900,   // 0900..0950; DEVANAGARI
+            0x0951,   // 0951..0952; INHERITED
+            0x0953,   // 0953..0963; DEVANAGARI
+            0x0964,   // 0964..0965; COMMON
+            0x0966,   // 0966..096F; DEVANAGARI
+            0x0970,   // 0970..0970; COMMON
+            0x0971,   // 0971..0980; DEVANAGARI
+            0x0981,   // 0981..0A00; BENGALI
+            0x0A01,   // 0A01..0A80; GURMUKHI
+            0x0A81,   // 0A81..0B00; GUJARATI
+            0x0B01,   // 0B01..0B81; ORIYA
+            0x0B82,   // 0B82..0C00; TAMIL
+            0x0C01,   // 0C01..0C81; TELUGU
+            0x0C82,   // 0C82..0CF0; KANNADA
+            0x0CF1,   // 0CF1..0D01; COMMON
+            0x0D02,   // 0D02..0D81; MALAYALAM
+            0x0D82,   // 0D82..0E00; SINHALA
+            0x0E01,   // 0E01..0E3E; THAI
+            0x0E3F,   // 0E3F..0E3F; COMMON
+            0x0E40,   // 0E40..0E80; THAI
+            0x0E81,   // 0E81..0EFF; LAO
+            0x0F00,   // 0F00..0FD4; TIBETAN
+            0x0FD5,   // 0FD5..0FFF; COMMON
+            0x1000,   // 1000..109F; MYANMAR
+            0x10A0,   // 10A0..10FA; GEORGIAN
+            0x10FB,   // 10FB..10FB; COMMON
+            0x10FC,   // 10FC..10FF; GEORGIAN
+            0x1100,   // 1100..11FF; HANGUL
+            0x1200,   // 1200..139F; ETHIOPIC
+            0x13A0,   // 13A0..13FF; CHEROKEE
+            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
+            0x1680,   // 1680..169F; OGHAM
+            0x16A0,   // 16A0..16EA; RUNIC
+            0x16EB,   // 16EB..16ED; COMMON
+            0x16EE,   // 16EE..16FF; RUNIC
+            0x1700,   // 1700..171F; TAGALOG
+            0x1720,   // 1720..1734; HANUNOO
+            0x1735,   // 1735..173F; COMMON
+            0x1740,   // 1740..175F; BUHID
+            0x1760,   // 1760..177F; TAGBANWA
+            0x1780,   // 1780..17FF; KHMER
+            0x1800,   // 1800..1801; MONGOLIAN
+            0x1802,   // 1802..1803; COMMON
+            0x1804,   // 1804..1804; MONGOLIAN
+            0x1805,   // 1805..1805; COMMON
+            0x1806,   // 1806..18AF; MONGOLIAN
+            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
+            0x1900,   // 1900..194F; LIMBU
+            0x1950,   // 1950..197F; TAI_LE
+            0x1980,   // 1980..19DF; NEW_TAI_LUE
+            0x19E0,   // 19E0..19FF; KHMER
+            0x1A00,   // 1A00..1A1F; BUGINESE
+            0x1A20,   // 1A20..1AFF; TAI_THAM
+            0x1B00,   // 1B00..1B7F; BALINESE
+            0x1B80,   // 1B80..1BFF; SUNDANESE
+            0x1C00,   // 1C00..1C4F; LEPCHA
+            0x1C50,   // 1C50..1CCF; OL_CHIKI
+            0x1CD0,   // 1CD0..1CD2; INHERITED
+            0x1CD3,   // 1CD3..1CD3; COMMON
+            0x1CD4,   // 1CD4..1CE0; INHERITED
+            0x1CE1,   // 1CE1..1CE1; COMMON
+            0x1CE2,   // 1CE2..1CE8; INHERITED
+            0x1CE9,   // 1CE9..1CEC; COMMON
+            0x1CED,   // 1CED..1CED; INHERITED
+            0x1CEE,   // 1CEE..1CFF; COMMON
+            0x1D00,   // 1D00..1D25; LATIN
+            0x1D26,   // 1D26..1D2A; GREEK
+            0x1D2B,   // 1D2B..1D2B; CYRILLIC
+            0x1D2C,   // 1D2C..1D5C; LATIN
+            0x1D5D,   // 1D5D..1D61; GREEK
+            0x1D62,   // 1D62..1D65; LATIN
+            0x1D66,   // 1D66..1D6A; GREEK
+            0x1D6B,   // 1D6B..1D77; LATIN
+            0x1D78,   // 1D78..1D78; CYRILLIC
+            0x1D79,   // 1D79..1DBE; LATIN
+            0x1DBF,   // 1DBF..1DBF; GREEK
+            0x1DC0,   // 1DC0..1DFF; INHERITED
+            0x1E00,   // 1E00..1EFF; LATIN
+            0x1F00,   // 1F00..1FFF; GREEK
+            0x2000,   // 2000..200B; COMMON
+            0x200C,   // 200C..200D; INHERITED
+            0x200E,   // 200E..2070; COMMON
+            0x2071,   // 2071..2073; LATIN
+            0x2074,   // 2074..207E; COMMON
+            0x207F,   // 207F..207F; LATIN
+            0x2080,   // 2080..208F; COMMON
+            0x2090,   // 2090..209F; LATIN
+            0x20A0,   // 20A0..20CF; COMMON
+            0x20D0,   // 20D0..20FF; INHERITED
+            0x2100,   // 2100..2125; COMMON
+            0x2126,   // 2126..2126; GREEK
+            0x2127,   // 2127..2129; COMMON
+            0x212A,   // 212A..212B; LATIN
+            0x212C,   // 212C..2131; COMMON
+            0x2132,   // 2132..2132; LATIN
+            0x2133,   // 2133..214D; COMMON
+            0x214E,   // 214E..214E; LATIN
+            0x214F,   // 214F..215F; COMMON
+            0x2160,   // 2160..2188; LATIN
+            0x2189,   // 2189..27FF; COMMON
+            0x2800,   // 2800..28FF; BRAILLE
+            0x2900,   // 2900..2BFF; COMMON
+            0x2C00,   // 2C00..2C5F; GLAGOLITIC
+            0x2C60,   // 2C60..2C7F; LATIN
+            0x2C80,   // 2C80..2CFF; COPTIC
+            0x2D00,   // 2D00..2D2F; GEORGIAN
+            0x2D30,   // 2D30..2D7F; TIFINAGH
+            0x2D80,   // 2D80..2DDF; ETHIOPIC
+            0x2DE0,   // 2DE0..2DFF; CYRILLIC
+            0x2E00,   // 2E00..2E7F; COMMON
+            0x2E80,   // 2E80..2FEF; HAN
+            0x2FF0,   // 2FF0..3004; COMMON
+            0x3005,   // 3005..3005; HAN
+            0x3006,   // 3006..3006; COMMON
+            0x3007,   // 3007..3007; HAN
+            0x3008,   // 3008..3020; COMMON
+            0x3021,   // 3021..3029; HAN
+            0x302A,   // 302A..302F; INHERITED
+            0x3030,   // 3030..3037; COMMON
+            0x3038,   // 3038..303B; HAN
+            0x303C,   // 303C..3040; COMMON
+            0x3041,   // 3041..3098; HIRAGANA
+            0x3099,   // 3099..309A; INHERITED
+            0x309B,   // 309B..309C; COMMON
+            0x309D,   // 309D..309F; HIRAGANA
+            0x30A0,   // 30A0..30A0; COMMON
+            0x30A1,   // 30A1..30FA; KATAKANA
+            0x30FB,   // 30FB..30FC; COMMON
+            0x30FD,   // 30FD..3104; KATAKANA
+            0x3105,   // 3105..3130; BOPOMOFO
+            0x3131,   // 3131..318F; HANGUL
+            0x3190,   // 3190..319F; COMMON
+            0x31A0,   // 31A0..31BF; BOPOMOFO
+            0x31C0,   // 31C0..31EF; COMMON
+            0x31F0,   // 31F0..31FF; KATAKANA
+            0x3200,   // 3200..321F; HANGUL
+            0x3220,   // 3220..325F; COMMON
+            0x3260,   // 3260..327E; HANGUL
+            0x327F,   // 327F..32CF; COMMON
+            0x32D0,   // 32D0..3357; KATAKANA
+            0x3358,   // 3358..33FF; COMMON
+            0x3400,   // 3400..4DBF; HAN
+            0x4DC0,   // 4DC0..4DFF; COMMON
+            0x4E00,   // 4E00..9FFF; HAN
+            0xA000,   // A000..A4CF; YI
+            0xA4D0,   // A4D0..A4FF; LISU
+            0xA500,   // A500..A63F; VAI
+            0xA640,   // A640..A69F; CYRILLIC
+            0xA6A0,   // A6A0..A6FF; BAMUM
+            0xA700,   // A700..A721; COMMON
+            0xA722,   // A722..A787; LATIN
+            0xA788,   // A788..A78A; COMMON
+            0xA78B,   // A78B..A7FF; LATIN
+            0xA800,   // A800..A82F; SYLOTI_NAGRI
+            0xA830,   // A830..A83F; COMMON
+            0xA840,   // A840..A87F; PHAGS_PA
+            0xA880,   // A880..A8DF; SAURASHTRA
+            0xA8E0,   // A8E0..A8FF; DEVANAGARI
+            0xA900,   // A900..A92F; KAYAH_LI
+            0xA930,   // A930..A95F; REJANG
+            0xA960,   // A960..A97F; HANGUL
+            0xA980,   // A980..A9FF; JAVANESE
+            0xAA00,   // AA00..AA5F; CHAM
+            0xAA60,   // AA60..AA7F; MYANMAR
+            0xAA80,   // AA80..ABBF; TAI_VIET
+            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
+            0xAC00,   // AC00..D7FB; HANGUL
+            0xD7FC,   // D7FC..F8FF; UNKNOWN
+            0xF900,   // F900..FAFF; HAN
+            0xFB00,   // FB00..FB12; LATIN
+            0xFB13,   // FB13..FB1C; ARMENIAN
+            0xFB1D,   // FB1D..FB4F; HEBREW
+            0xFB50,   // FB50..FD3D; ARABIC
+            0xFD3E,   // FD3E..FD4F; COMMON
+            0xFD50,   // FD50..FDFC; ARABIC
+            0xFDFD,   // FDFD..FDFF; COMMON
+            0xFE00,   // FE00..FE0F; INHERITED
+            0xFE10,   // FE10..FE1F; COMMON
+            0xFE20,   // FE20..FE2F; INHERITED
+            0xFE30,   // FE30..FE6F; COMMON
+            0xFE70,   // FE70..FEFE; ARABIC
+            0xFEFF,   // FEFF..FF20; COMMON
+            0xFF21,   // FF21..FF3A; LATIN
+            0xFF3B,   // FF3B..FF40; COMMON
+            0xFF41,   // FF41..FF5A; LATIN
+            0xFF5B,   // FF5B..FF65; COMMON
+            0xFF66,   // FF66..FF6F; KATAKANA
+            0xFF70,   // FF70..FF70; COMMON
+            0xFF71,   // FF71..FF9D; KATAKANA
+            0xFF9E,   // FF9E..FF9F; COMMON
+            0xFFA0,   // FFA0..FFDF; HANGUL
+            0xFFE0,   // FFE0..FFFF; COMMON
+            0x10000,  // 10000..100FF; LINEAR_B
+            0x10100,  // 10100..1013F; COMMON
+            0x10140,  // 10140..1018F; GREEK
+            0x10190,  // 10190..101FC; COMMON
+            0x101FD,  // 101FD..1027F; INHERITED
+            0x10280,  // 10280..1029F; LYCIAN
+            0x102A0,  // 102A0..102FF; CARIAN
+            0x10300,  // 10300..1032F; OLD_ITALIC
+            0x10330,  // 10330..1037F; GOTHIC
+            0x10380,  // 10380..1039F; UGARITIC
+            0x103A0,  // 103A0..103FF; OLD_PERSIAN
+            0x10400,  // 10400..1044F; DESERET
+            0x10450,  // 10450..1047F; SHAVIAN
+            0x10480,  // 10480..107FF; OSMANYA
+            0x10800,  // 10800..1083F; CYPRIOT
+            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
+            0x10900,  // 10900..1091F; PHOENICIAN
+            0x10920,  // 10920..109FF; LYDIAN
+            0x10A00,  // 10A00..10A5F; KHAROSHTHI
+            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
+            0x10B00,  // 10B00..10B3F; AVESTAN
+            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
+            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
+            0x10C00,  // 10C00..10E5F; OLD_TURKIC
+            0x10E60,  // 10E60..1107F; ARABIC
+            0x11080,  // 11080..11FFF; KAITHI
+            0x12000,  // 12000..12FFF; CUNEIFORM
+            0x13000,  // 13000..1CFFF; EGYPTIAN_HIEROGLYPHS
+            0x1D000,  // 1D000..1D166; COMMON
+            0x1D167,  // 1D167..1D169; INHERITED
+            0x1D16A,  // 1D16A..1D17A; COMMON
+            0x1D17B,  // 1D17B..1D182; INHERITED
+            0x1D183,  // 1D183..1D184; COMMON
+            0x1D185,  // 1D185..1D18B; INHERITED
+            0x1D18C,  // 1D18C..1D1A9; COMMON
+            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
+            0x1D1AE,  // 1D1AE..1D1FF; COMMON
+            0x1D200,  // 1D200..1D2FF; GREEK
+            0x1D300,  // 1D300..1F1FF; COMMON
+            0x1F200,  // 1F200..1F20F; HIRAGANA
+            0x1F210,  // 1F210..1FFFF; COMMON
+            0x20000,  // 20000..E0000; HAN
+            0xE0001,  // E0001..E00FF; COMMON
+            0xE0100,  // E0100..E01EF; INHERITED
+            0xE01F0   // E01F0..10FFFF; UNKNOWN
+
+        };
+
+        private static final UnicodeScript[] scripts = {
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            INHERITED,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COMMON,
+            GREEK,
+            COPTIC,
+            GREEK,
+            CYRILLIC,
+            INHERITED,
+            CYRILLIC,
+            ARMENIAN,
+            COMMON,
+            ARMENIAN,
+            HEBREW,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            INHERITED,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            INHERITED,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            SYRIAC,
+            ARABIC,
+            THAANA,
+            NKO,
+            SAMARITAN,
+            DEVANAGARI,
+            INHERITED,
+            DEVANAGARI,
+            COMMON,
+            DEVANAGARI,
+            COMMON,
+            DEVANAGARI,
+            BENGALI,
+            GURMUKHI,
+            GUJARATI,
+            ORIYA,
+            TAMIL,
+            TELUGU,
+            KANNADA,
+            COMMON,
+            MALAYALAM,
+            SINHALA,
+            THAI,
+            COMMON,
+            THAI,
+            LAO,
+            TIBETAN,
+            COMMON,
+            MYANMAR,
+            GEORGIAN,
+            COMMON,
+            GEORGIAN,
+            HANGUL,
+            ETHIOPIC,
+            CHEROKEE,
+            CANADIAN_ABORIGINAL,
+            OGHAM,
+            RUNIC,
+            COMMON,
+            RUNIC,
+            TAGALOG,
+            HANUNOO,
+            COMMON,
+            BUHID,
+            TAGBANWA,
+            KHMER,
+            MONGOLIAN,
+            COMMON,
+            MONGOLIAN,
+            COMMON,
+            MONGOLIAN,
+            CANADIAN_ABORIGINAL,
+            LIMBU,
+            TAI_LE,
+            NEW_TAI_LUE,
+            KHMER,
+            BUGINESE,
+            TAI_THAM,
+            BALINESE,
+            SUNDANESE,
+            LEPCHA,
+            OL_CHIKI,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            LATIN,
+            GREEK,
+            CYRILLIC,
+            LATIN,
+            GREEK,
+            LATIN,
+            GREEK,
+            LATIN,
+            CYRILLIC,
+            LATIN,
+            GREEK,
+            INHERITED,
+            LATIN,
+            GREEK,
+            COMMON,
+            INHERITED,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            INHERITED,
+            COMMON,
+            GREEK,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            BRAILLE,
+            COMMON,
+            GLAGOLITIC,
+            LATIN,
+            COPTIC,
+            GEORGIAN,
+            TIFINAGH,
+            ETHIOPIC,
+            CYRILLIC,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            INHERITED,
+            COMMON,
+            HAN,
+            COMMON,
+            HIRAGANA,
+            INHERITED,
+            COMMON,
+            HIRAGANA,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            KATAKANA,
+            BOPOMOFO,
+            HANGUL,
+            COMMON,
+            BOPOMOFO,
+            COMMON,
+            KATAKANA,
+            HANGUL,
+            COMMON,
+            HANGUL,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            HAN,
+            COMMON,
+            HAN,
+            YI,
+            LISU,
+            VAI,
+            CYRILLIC,
+            BAMUM,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            SYLOTI_NAGRI,
+            COMMON,
+            PHAGS_PA,
+            SAURASHTRA,
+            DEVANAGARI,
+            KAYAH_LI,
+            REJANG,
+            HANGUL,
+            JAVANESE,
+            CHAM,
+            MYANMAR,
+            TAI_VIET,
+            MEETEI_MAYEK,
+            HANGUL,
+            UNKNOWN,
+            HAN,
+            LATIN,
+            ARMENIAN,
+            HEBREW,
+            ARABIC,
+            COMMON,
+            ARABIC,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            ARABIC,
+            COMMON,
+            LATIN,
+            COMMON,
+            LATIN,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            KATAKANA,
+            COMMON,
+            HANGUL,
+            COMMON,
+            LINEAR_B,
+            COMMON,
+            GREEK,
+            COMMON,
+            INHERITED,
+            LYCIAN,
+            CARIAN,
+            OLD_ITALIC,
+            GOTHIC,
+            UGARITIC,
+            OLD_PERSIAN,
+            DESERET,
+            SHAVIAN,
+            OSMANYA,
+            CYPRIOT,
+            IMPERIAL_ARAMAIC,
+            PHOENICIAN,
+            LYDIAN,
+            KHAROSHTHI,
+            OLD_SOUTH_ARABIAN,
+            AVESTAN,
+            INSCRIPTIONAL_PARTHIAN,
+            INSCRIPTIONAL_PAHLAVI,
+            OLD_TURKIC,
+            ARABIC,
+            KAITHI,
+            CUNEIFORM,
+            EGYPTIAN_HIEROGLYPHS,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            INHERITED,
+            COMMON,
+            GREEK,
+            COMMON,
+            HIRAGANA,
+            COMMON,
+            HAN,
+            COMMON,
+            INHERITED,
+            UNKNOWN
+        };
+
+        private static HashMap<String, Character.UnicodeScript> aliases;
+        static {
+            aliases = new HashMap<String, UnicodeScript>();
+            aliases.put("ARAB", ARABIC);
+            aliases.put("ARMI", IMPERIAL_ARAMAIC);
+            aliases.put("ARMN", ARMENIAN);
+            aliases.put("AVST", AVESTAN);
+            aliases.put("BALI", BALINESE);
+            aliases.put("BAMU", BAMUM);
+            aliases.put("BENG", BENGALI);
+            aliases.put("BOPO", BOPOMOFO);
+            aliases.put("BRAI", BRAILLE);
+            aliases.put("BUGI", BUGINESE);
+            aliases.put("BUHD", BUHID);
+            aliases.put("CANS", CANADIAN_ABORIGINAL);
+            aliases.put("CARI", CARIAN);
+            aliases.put("CHAM", CHAM);
+            aliases.put("CHER", CHEROKEE);
+            aliases.put("COPT", COPTIC);
+            aliases.put("CPRT", CYPRIOT);
+            aliases.put("CYRL", CYRILLIC);
+            aliases.put("DEVA", DEVANAGARI);
+            aliases.put("DSRT", DESERET);
+            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
+            aliases.put("ETHI", ETHIOPIC);
+            aliases.put("GEOR", GEORGIAN);
+            aliases.put("GLAG", GLAGOLITIC);
+            aliases.put("GOTH", GOTHIC);
+            aliases.put("GREK", GREEK);
+            aliases.put("GUJR", GUJARATI);
+            aliases.put("GURU", GURMUKHI);
+            aliases.put("HANG", HANGUL);
+            aliases.put("HANI", HAN);
+            aliases.put("HANO", HANUNOO);
+            aliases.put("HEBR", HEBREW);
+            aliases.put("HIRA", HIRAGANA);
+            // it appears we don't have the KATAKANA_OR_HIRAGANA
+            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
+            aliases.put("ITAL", OLD_ITALIC);
+            aliases.put("JAVA", JAVANESE);
+            aliases.put("KALI", KAYAH_LI);
+            aliases.put("KANA", KATAKANA);
+            aliases.put("KHAR", KHAROSHTHI);
+            aliases.put("KHMR", KHMER);
+            aliases.put("KNDA", KANNADA);
+            aliases.put("KTHI", KAITHI);
+            aliases.put("LANA", TAI_THAM);
+            aliases.put("LAOO", LAO);
+            aliases.put("LATN", LATIN);
+            aliases.put("LEPC", LEPCHA);
+            aliases.put("LIMB", LIMBU);
+            aliases.put("LINB", LINEAR_B);
+            aliases.put("LISU", LISU);
+            aliases.put("LYCI", LYCIAN);
+            aliases.put("LYDI", LYDIAN);
+            aliases.put("MLYM", MALAYALAM);
+            aliases.put("MONG", MONGOLIAN);
+            aliases.put("MTEI", MEETEI_MAYEK);
+            aliases.put("MYMR", MYANMAR);
+            aliases.put("NKOO", NKO);
+            aliases.put("OGAM", OGHAM);
+            aliases.put("OLCK", OL_CHIKI);
+            aliases.put("ORKH", OLD_TURKIC);
+            aliases.put("ORYA", ORIYA);
+            aliases.put("OSMA", OSMANYA);
+            aliases.put("PHAG", PHAGS_PA);
+            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
+            aliases.put("PHNX", PHOENICIAN);
+            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
+            aliases.put("RJNG", REJANG);
+            aliases.put("RUNR", RUNIC);
+            aliases.put("SAMR", SAMARITAN);
+            aliases.put("SARB", OLD_SOUTH_ARABIAN);
+            aliases.put("SAUR", SAURASHTRA);
+            aliases.put("SHAW", SHAVIAN);
+            aliases.put("SINH", SINHALA);
+            aliases.put("SUND", SUNDANESE);
+            aliases.put("SYLO", SYLOTI_NAGRI);
+            aliases.put("SYRC", SYRIAC);
+            aliases.put("TAGB", TAGBANWA);
+            aliases.put("TALE", TAI_LE);
+            aliases.put("TALU", NEW_TAI_LUE);
+            aliases.put("TAML", TAMIL);
+            aliases.put("TAVT", TAI_VIET);
+            aliases.put("TELU", TELUGU);
+            aliases.put("TFNG", TIFINAGH);
+            aliases.put("TGLG", TAGALOG);
+            aliases.put("THAA", THAANA);
+            aliases.put("THAI", THAI);
+            aliases.put("TIBT", TIBETAN);
+            aliases.put("UGAR", UGARITIC);
+            aliases.put("VAII", VAI);
+            aliases.put("XPEO", OLD_PERSIAN);
+            aliases.put("XSUX", CUNEIFORM);
+            aliases.put("YIII", YI);
+            aliases.put("ZINH", INHERITED);
+            aliases.put("ZYYY", COMMON);
+            aliases.put("ZZZZ", UNKNOWN);
+        }
+
+        /**
+         * Returns the enum constant representing the Unicode script of which
+         * the given character (Unicode code point) is assigned to.
+         *
+         * @param   codePoint the character (Unicode code point) in question.
+         * @return  The <code>UnicodeScript</code> constant representing the
+         *          Unicode script of which this character is assigned to.
+         *
+         * @exception IllegalArgumentException if the specified
+         * <code>codePoint</code> is an invalid Unicode code point.
+         * @see Character#isValidCodePoint(int)
+         *
+         */
+        public static UnicodeScript of(int codePoint) {
+            if (!isValidCodePoint(codePoint))
+                throw new IllegalArgumentException();
+            int type = getType(codePoint);
+            // leave SURROGATE and PRIVATE_USE for table lookup
+            if (type == UNASSIGNED)
+                return UNKNOWN;
+            int index = Arrays.binarySearch(scriptStarts, codePoint);
+            if (index < 0)
+                index = -index - 2;
+            return scripts[index];
+        }
+
+        /**
+         * Returns the UnicodeScript constant with the given Unicode script
+         * name or the script name alias. Script names and their aliases are
+         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
+         * and PropertyValueAliases&lt;version&gt;.txt define script names
+         * and the script name aliases for a particular version of the
+         * standard. The {@link Character} class specifies the version of
+         * the standard that it supports.
+         * <p>
+         * Character case is ignored for all of the valid script names.
+         * The en_US locale's case mapping rules are used to provide
+         * case-insensitive string comparisons for script name validation.
+         * <p>
+         *
+         * @param scriptName A <code>UnicodeScript</code> name.
+         * @return The <code>UnicodeScript</code> constant identified
+         *         by <code>scriptName</code>
+         * @throws IllegalArgumentException if <code>scriptName</code> is an
+         *         invalid name
+         * @throws NullPointerException if <code>scriptName</code> is null
+         */
+        public static final UnicodeScript forName(String scriptName) {
+            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
+                                 //.replace(' ', '_'));
+            UnicodeScript sc = aliases.get(scriptName);
+            if (sc != null)
+                return sc;
+            return valueOf(scriptName);
+        }
+    }
+
+    /**
      * The value of the <code>Character</code>.
      *
      * @serial
      */
     private final char value;

@@ -5040,6 +6276,53 @@
      * @since 1.5
      */
     public static char reverseBytes(char ch) {
         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
     }
+
+    /**
+     * Returns the Unicode name of the specified character
+     * <code>codePoint</code>, or null if the code point is
+     * {@link #UNASSIGNED unassigned}.
+     * <p>
+     * Note: if the specified character is not assigned a name by
+     * the <i>UnicodeData</i> file (part of the Unicode Character
+     * Database maintained by the Unicode Consortium), the returned
+     * name is the same as the result of expression
+     *
+     * <blockquote><code>
+     *     Character.UnicodeBlock.of(codePoint)
+     *                           .toString()
+     *                           .replace('_', ' ')
+     *     + " "
+     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+     *
+     * </code></blockquote>
+     *
+     * @param  codePoint the character (Unicode code point)
+     *
+     * @return the Unicode name of the specified character, or null if
+     *         the code point is unassigned.
+     *
+     * @exception IllegalArgumentException if the specified
+     *            <code>codePoint</code> is not a valid Unicode
+     *            code point.
+     *
+     * @since 1.7
+     */
+    public static String getName(int codePoint) {
+        if (!isValidCodePoint(codePoint)) {
+            throw new IllegalArgumentException();
+        }
+        String name = CharacterName.get(codePoint);
+        if (name != null)
+            return name;
+        if (getType(codePoint) == UNASSIGNED)
+            return null;
+        UnicodeBlock block = UnicodeBlock.of(codePoint);
+        if (block != null)
+            return block.toString().replace('_', ' ') + " "
+                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+        // should never come here
+        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
+    }
 }