< prev index next >

make/data/unicodedata/PropertyValueAliases.txt

Print this page
rev 54996 : 8221431: Support for Unicode 12.1
Reviewed-by:
   1 # PropertyValueAliases-11.0.0.txt
   2 # Date: 2018-05-20, 09:03:12 GMT
   3 # Copyright (c) 2018 Unicode, Inc.
   4 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
   5 # For terms of use, see http://www.unicode.org/terms_of_use.html
   6 #
   7 # Unicode Character Database
   8 #   For documentation, see http://www.unicode.org/reports/tr44/
   9 #
  10 # This file contains aliases for property values used in the UCD.
  11 # These names can be used for XML formats of UCD data, for regular-expression
  12 # property tests, and other programmatic textual descriptions of Unicode data.
  13 #
  14 # The names may be translated in appropriate environments, and additional
  15 # aliases may be useful.
  16 #
  17 # FORMAT
  18 #
  19 # Each line describes a property value name.
  20 # This consists of three or more fields, separated by semicolons.
  21 #
  22 # First Field: The first field describes the property for which that
  23 # property value name is used.
  24 #
  25 # Second Field: The second field is an abbreviated name.


  26 #
  27 # Third Field: The third field is a long name.

  28 #
  29 # In the case of ccc, there are 4 fields. The second field is numeric, third
  30 # is abbreviated, and fourth is long.
  31 #
  32 # The above are the preferred aliases. Other aliases may be listed in additional fields.
  33 #
  34 # Loose matching should be applied to all property names and property values, with
  35 # the exception of String Property values. With loose matching of property names and
  36 # values, the case distinctions, whitespace, hyphens, and '_' are ignored.
  37 # For Numeric Property values, numeric equivalence is applied: thus "01.00"
  38 # is equivalent to "1".
  39 #
  40 # NOTE: Property value names are NOT unique across properties. For example:
  41 #
  42 #   AL means Arabic Letter for the Bidi_Class property, and
  43 #   AL means Above_Left for the Canonical_Combining_Class property, and
  44 #   AL means Alphabetic for the Line_Break property.
  45 #
  46 # In addition, some property names may be the same as some property value names.
  47 # For example:
  48 #
  49 #   sc means the Script property, and
  50 #   Sc means the General_Category property value Currency_Symbol (Sc)
  51 #
  52 # The combination of property value and property name is, however, unique.
  53 #
  54 # For more information, see UTS #18: Unicode Regular Expressions

  55 # ================================================
  56 
  57 
  58 # ASCII_Hex_Digit (AHex)
  59 
  60 AHex; N                               ; No                               ; F                                ; False
  61 AHex; Y                               ; Yes                              ; T                                ; True
  62 
  63 # Age (age)
  64 
  65 age; 1.1                              ; V1_1
  66 age; 2.0                              ; V2_0
  67 age; 2.1                              ; V2_1
  68 age; 3.0                              ; V3_0
  69 age; 3.1                              ; V3_1
  70 age; 3.2                              ; V3_2
  71 age; 4.0                              ; V4_0
  72 age; 4.1                              ; V4_1
  73 age; 5.0                              ; V5_0
  74 age; 5.1                              ; V5_1
  75 age; 5.2                              ; V5_2
  76 age; 6.0                              ; V6_0
  77 age; 6.1                              ; V6_1
  78 age; 6.2                              ; V6_2
  79 age; 6.3                              ; V6_3
  80 age; 7.0                              ; V7_0
  81 age; 8.0                              ; V8_0
  82 age; 9.0                              ; V9_0
  83 age; 10.0                             ; V10_0
  84 age; 11.0                             ; V11_0


  85 age; NA                               ; Unassigned
  86 
  87 # Alphabetic (Alpha)
  88 
  89 Alpha; N                              ; No                               ; F                                ; False
  90 Alpha; Y                              ; Yes                              ; T                                ; True
  91 
  92 # Bidi_Class (bc)
  93 
  94 bc ; AL                               ; Arabic_Letter
  95 bc ; AN                               ; Arabic_Number
  96 bc ; B                                ; Paragraph_Separator
  97 bc ; BN                               ; Boundary_Neutral
  98 bc ; CS                               ; Common_Separator
  99 bc ; EN                               ; European_Number
 100 bc ; ES                               ; European_Separator
 101 bc ; ET                               ; European_Terminator
 102 bc ; FSI                              ; First_Strong_Isolate
 103 bc ; L                                ; Left_To_Right
 104 bc ; LRE                              ; Left_To_Right_Embedding


 207 blk; Cuneiform_Numbers                ; Cuneiform_Numbers_And_Punctuation
 208 blk; Currency_Symbols                 ; Currency_Symbols
 209 blk; Cypriot_Syllabary                ; Cypriot_Syllabary
 210 blk; Cyrillic                         ; Cyrillic
 211 blk; Cyrillic_Ext_A                   ; Cyrillic_Extended_A
 212 blk; Cyrillic_Ext_B                   ; Cyrillic_Extended_B
 213 blk; Cyrillic_Ext_C                   ; Cyrillic_Extended_C
 214 blk; Cyrillic_Sup                     ; Cyrillic_Supplement              ; Cyrillic_Supplementary
 215 blk; Deseret                          ; Deseret
 216 blk; Devanagari                       ; Devanagari
 217 blk; Devanagari_Ext                   ; Devanagari_Extended
 218 blk; Diacriticals                     ; Combining_Diacritical_Marks
 219 blk; Diacriticals_Ext                 ; Combining_Diacritical_Marks_Extended
 220 blk; Diacriticals_For_Symbols         ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
 221 blk; Diacriticals_Sup                 ; Combining_Diacritical_Marks_Supplement
 222 blk; Dingbats                         ; Dingbats
 223 blk; Dogra                            ; Dogra
 224 blk; Domino                           ; Domino_Tiles
 225 blk; Duployan                         ; Duployan
 226 blk; Early_Dynastic_Cuneiform         ; Early_Dynastic_Cuneiform

 227 blk; Egyptian_Hieroglyphs             ; Egyptian_Hieroglyphs
 228 blk; Elbasan                          ; Elbasan

 229 blk; Emoticons                        ; Emoticons
 230 blk; Enclosed_Alphanum                ; Enclosed_Alphanumerics
 231 blk; Enclosed_Alphanum_Sup            ; Enclosed_Alphanumeric_Supplement
 232 blk; Enclosed_CJK                     ; Enclosed_CJK_Letters_And_Months
 233 blk; Enclosed_Ideographic_Sup         ; Enclosed_Ideographic_Supplement
 234 blk; Ethiopic                         ; Ethiopic
 235 blk; Ethiopic_Ext                     ; Ethiopic_Extended
 236 blk; Ethiopic_Ext_A                   ; Ethiopic_Extended_A
 237 blk; Ethiopic_Sup                     ; Ethiopic_Supplement
 238 blk; Geometric_Shapes                 ; Geometric_Shapes
 239 blk; Geometric_Shapes_Ext             ; Geometric_Shapes_Extended
 240 blk; Georgian                         ; Georgian
 241 blk; Georgian_Ext                     ; Georgian_Extended
 242 blk; Georgian_Sup                     ; Georgian_Supplement
 243 blk; Glagolitic                       ; Glagolitic
 244 blk; Glagolitic_Sup                   ; Glagolitic_Supplement
 245 blk; Gothic                           ; Gothic
 246 blk; Grantha                          ; Grantha
 247 blk; Greek                            ; Greek_And_Coptic
 248 blk; Greek_Ext                        ; Greek_Extended


 322 blk; Meroitic_Hieroglyphs             ; Meroitic_Hieroglyphs
 323 blk; Miao                             ; Miao
 324 blk; Misc_Arrows                      ; Miscellaneous_Symbols_And_Arrows
 325 blk; Misc_Math_Symbols_A              ; Miscellaneous_Mathematical_Symbols_A
 326 blk; Misc_Math_Symbols_B              ; Miscellaneous_Mathematical_Symbols_B
 327 blk; Misc_Pictographs                 ; Miscellaneous_Symbols_And_Pictographs
 328 blk; Misc_Symbols                     ; Miscellaneous_Symbols
 329 blk; Misc_Technical                   ; Miscellaneous_Technical
 330 blk; Modi                             ; Modi
 331 blk; Modifier_Letters                 ; Spacing_Modifier_Letters
 332 blk; Modifier_Tone_Letters            ; Modifier_Tone_Letters
 333 blk; Mongolian                        ; Mongolian
 334 blk; Mongolian_Sup                    ; Mongolian_Supplement
 335 blk; Mro                              ; Mro
 336 blk; Multani                          ; Multani
 337 blk; Music                            ; Musical_Symbols
 338 blk; Myanmar                          ; Myanmar
 339 blk; Myanmar_Ext_A                    ; Myanmar_Extended_A
 340 blk; Myanmar_Ext_B                    ; Myanmar_Extended_B
 341 blk; Nabataean                        ; Nabataean

 342 blk; NB                               ; No_Block
 343 blk; New_Tai_Lue                      ; New_Tai_Lue
 344 blk; Newa                             ; Newa
 345 blk; NKo                              ; NKo
 346 blk; Number_Forms                     ; Number_Forms
 347 blk; Nushu                            ; Nushu

 348 blk; OCR                              ; Optical_Character_Recognition
 349 blk; Ogham                            ; Ogham
 350 blk; Ol_Chiki                         ; Ol_Chiki
 351 blk; Old_Hungarian                    ; Old_Hungarian
 352 blk; Old_Italic                       ; Old_Italic
 353 blk; Old_North_Arabian                ; Old_North_Arabian
 354 blk; Old_Permic                       ; Old_Permic
 355 blk; Old_Persian                      ; Old_Persian
 356 blk; Old_Sogdian                      ; Old_Sogdian
 357 blk; Old_South_Arabian                ; Old_South_Arabian
 358 blk; Old_Turkic                       ; Old_Turkic
 359 blk; Oriya                            ; Oriya
 360 blk; Ornamental_Dingbats              ; Ornamental_Dingbats
 361 blk; Osage                            ; Osage
 362 blk; Osmanya                          ; Osmanya

 363 blk; Pahawh_Hmong                     ; Pahawh_Hmong
 364 blk; Palmyrene                        ; Palmyrene
 365 blk; Pau_Cin_Hau                      ; Pau_Cin_Hau
 366 blk; Phags_Pa                         ; Phags_Pa
 367 blk; Phaistos                         ; Phaistos_Disc
 368 blk; Phoenician                       ; Phoenician
 369 blk; Phonetic_Ext                     ; Phonetic_Extensions
 370 blk; Phonetic_Ext_Sup                 ; Phonetic_Extensions_Supplement
 371 blk; Playing_Cards                    ; Playing_Cards
 372 blk; Psalter_Pahlavi                  ; Psalter_Pahlavi
 373 blk; PUA                              ; Private_Use_Area                 ; Private_Use
 374 blk; Punctuation                      ; General_Punctuation
 375 blk; Rejang                           ; Rejang
 376 blk; Rumi                             ; Rumi_Numeral_Symbols
 377 blk; Runic                            ; Runic
 378 blk; Samaritan                        ; Samaritan
 379 blk; Saurashtra                       ; Saurashtra
 380 blk; Sharada                          ; Sharada
 381 blk; Shavian                          ; Shavian
 382 blk; Shorthand_Format_Controls        ; Shorthand_Format_Controls
 383 blk; Siddham                          ; Siddham
 384 blk; Sinhala                          ; Sinhala
 385 blk; Sinhala_Archaic_Numbers          ; Sinhala_Archaic_Numbers
 386 blk; Small_Forms                      ; Small_Form_Variants

 387 blk; Sogdian                          ; Sogdian
 388 blk; Sora_Sompeng                     ; Sora_Sompeng
 389 blk; Soyombo                          ; Soyombo
 390 blk; Specials                         ; Specials
 391 blk; Sundanese                        ; Sundanese
 392 blk; Sundanese_Sup                    ; Sundanese_Supplement
 393 blk; Sup_Arrows_A                     ; Supplemental_Arrows_A
 394 blk; Sup_Arrows_B                     ; Supplemental_Arrows_B
 395 blk; Sup_Arrows_C                     ; Supplemental_Arrows_C
 396 blk; Sup_Math_Operators               ; Supplemental_Mathematical_Operators
 397 blk; Sup_PUA_A                        ; Supplementary_Private_Use_Area_A
 398 blk; Sup_PUA_B                        ; Supplementary_Private_Use_Area_B
 399 blk; Sup_Punctuation                  ; Supplemental_Punctuation
 400 blk; Sup_Symbols_And_Pictographs      ; Supplemental_Symbols_And_Pictographs
 401 blk; Super_And_Sub                    ; Superscripts_And_Subscripts
 402 blk; Sutton_SignWriting               ; Sutton_SignWriting
 403 blk; Syloti_Nagri                     ; Syloti_Nagri

 404 blk; Syriac                           ; Syriac
 405 blk; Syriac_Sup                       ; Syriac_Supplement
 406 blk; Tagalog                          ; Tagalog
 407 blk; Tagbanwa                         ; Tagbanwa
 408 blk; Tags                             ; Tags
 409 blk; Tai_Le                           ; Tai_Le
 410 blk; Tai_Tham                         ; Tai_Tham
 411 blk; Tai_Viet                         ; Tai_Viet
 412 blk; Tai_Xuan_Jing                    ; Tai_Xuan_Jing_Symbols
 413 blk; Takri                            ; Takri
 414 blk; Tamil                            ; Tamil

 415 blk; Tangut                           ; Tangut
 416 blk; Tangut_Components                ; Tangut_Components
 417 blk; Telugu                           ; Telugu
 418 blk; Thaana                           ; Thaana
 419 blk; Thai                             ; Thai
 420 blk; Tibetan                          ; Tibetan
 421 blk; Tifinagh                         ; Tifinagh
 422 blk; Tirhuta                          ; Tirhuta
 423 blk; Transport_And_Map                ; Transport_And_Map_Symbols
 424 blk; UCAS                             ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
 425 blk; UCAS_Ext                         ; Unified_Canadian_Aboriginal_Syllabics_Extended
 426 blk; Ugaritic                         ; Ugaritic
 427 blk; Vai                              ; Vai
 428 blk; Vedic_Ext                        ; Vedic_Extensions
 429 blk; Vertical_Forms                   ; Vertical_Forms
 430 blk; VS                               ; Variation_Selectors
 431 blk; VS_Sup                           ; Variation_Selectors_Supplement

 432 blk; Warang_Citi                      ; Warang_Citi
 433 blk; Yi_Radicals                      ; Yi_Radicals
 434 blk; Yi_Syllables                     ; Yi_Syllables
 435 blk; Yijing                           ; Yijing_Hexagram_Symbols
 436 blk; Zanabazar_Square                 ; Zanabazar_Square
 437 
 438 # Canonical_Combining_Class (ccc)
 439 
 440 ccc;   0; NR                         ; Not_Reordered
 441 ccc;   1; OV                         ; Overlay
 442 ccc;   7; NK                         ; Nukta
 443 ccc;   8; KV                         ; Kana_Voicing
 444 ccc;   9; VR                         ; Virama
 445 ccc;  10; CCC10                      ; CCC10
 446 ccc;  11; CCC11                      ; CCC11
 447 ccc;  12; CCC12                      ; CCC12
 448 ccc;  13; CCC13                      ; CCC13
 449 ccc;  14; CCC14                      ; CCC14
 450 ccc;  15; CCC15                      ; CCC15
 451 ccc;  16; CCC16                      ; CCC16


1196 sc ; Bhks                             ; Bhaiksuki
1197 sc ; Bopo                             ; Bopomofo
1198 sc ; Brah                             ; Brahmi
1199 sc ; Brai                             ; Braille
1200 sc ; Bugi                             ; Buginese
1201 sc ; Buhd                             ; Buhid
1202 sc ; Cakm                             ; Chakma
1203 sc ; Cans                             ; Canadian_Aboriginal
1204 sc ; Cari                             ; Carian
1205 sc ; Cham                             ; Cham
1206 sc ; Cher                             ; Cherokee
1207 sc ; Copt                             ; Coptic                           ; Qaac
1208 sc ; Cprt                             ; Cypriot
1209 sc ; Cyrl                             ; Cyrillic
1210 sc ; Deva                             ; Devanagari
1211 sc ; Dogr                             ; Dogra
1212 sc ; Dsrt                             ; Deseret
1213 sc ; Dupl                             ; Duployan
1214 sc ; Egyp                             ; Egyptian_Hieroglyphs
1215 sc ; Elba                             ; Elbasan

1216 sc ; Ethi                             ; Ethiopic
1217 sc ; Geor                             ; Georgian
1218 sc ; Glag                             ; Glagolitic
1219 sc ; Gong                             ; Gunjala_Gondi
1220 sc ; Gonm                             ; Masaram_Gondi
1221 sc ; Goth                             ; Gothic
1222 sc ; Gran                             ; Grantha
1223 sc ; Grek                             ; Greek
1224 sc ; Gujr                             ; Gujarati
1225 sc ; Guru                             ; Gurmukhi
1226 sc ; Hang                             ; Hangul
1227 sc ; Hani                             ; Han
1228 sc ; Hano                             ; Hanunoo
1229 sc ; Hatr                             ; Hatran
1230 sc ; Hebr                             ; Hebrew
1231 sc ; Hira                             ; Hiragana
1232 sc ; Hluw                             ; Anatolian_Hieroglyphs
1233 sc ; Hmng                             ; Pahawh_Hmong

1234 sc ; Hrkt                             ; Katakana_Or_Hiragana
1235 sc ; Hung                             ; Old_Hungarian
1236 sc ; Ital                             ; Old_Italic
1237 sc ; Java                             ; Javanese
1238 sc ; Kali                             ; Kayah_Li
1239 sc ; Kana                             ; Katakana
1240 sc ; Khar                             ; Kharoshthi
1241 sc ; Khmr                             ; Khmer
1242 sc ; Khoj                             ; Khojki
1243 sc ; Knda                             ; Kannada
1244 sc ; Kthi                             ; Kaithi
1245 sc ; Lana                             ; Tai_Tham
1246 sc ; Laoo                             ; Lao
1247 sc ; Latn                             ; Latin
1248 sc ; Lepc                             ; Lepcha
1249 sc ; Limb                             ; Limbu
1250 sc ; Lina                             ; Linear_A
1251 sc ; Linb                             ; Linear_B
1252 sc ; Lisu                             ; Lisu
1253 sc ; Lyci                             ; Lycian
1254 sc ; Lydi                             ; Lydian
1255 sc ; Mahj                             ; Mahajani
1256 sc ; Maka                             ; Makasar
1257 sc ; Mand                             ; Mandaic
1258 sc ; Mani                             ; Manichaean
1259 sc ; Marc                             ; Marchen
1260 sc ; Medf                             ; Medefaidrin
1261 sc ; Mend                             ; Mende_Kikakui
1262 sc ; Merc                             ; Meroitic_Cursive
1263 sc ; Mero                             ; Meroitic_Hieroglyphs
1264 sc ; Mlym                             ; Malayalam
1265 sc ; Modi                             ; Modi
1266 sc ; Mong                             ; Mongolian
1267 sc ; Mroo                             ; Mro
1268 sc ; Mtei                             ; Meetei_Mayek
1269 sc ; Mult                             ; Multani
1270 sc ; Mymr                             ; Myanmar

1271 sc ; Narb                             ; Old_North_Arabian
1272 sc ; Nbat                             ; Nabataean
1273 sc ; Newa                             ; Newa
1274 sc ; Nkoo                             ; Nko
1275 sc ; Nshu                             ; Nushu
1276 sc ; Ogam                             ; Ogham
1277 sc ; Olck                             ; Ol_Chiki
1278 sc ; Orkh                             ; Old_Turkic
1279 sc ; Orya                             ; Oriya
1280 sc ; Osge                             ; Osage
1281 sc ; Osma                             ; Osmanya
1282 sc ; Palm                             ; Palmyrene
1283 sc ; Pauc                             ; Pau_Cin_Hau
1284 sc ; Perm                             ; Old_Permic
1285 sc ; Phag                             ; Phags_Pa
1286 sc ; Phli                             ; Inscriptional_Pahlavi
1287 sc ; Phlp                             ; Psalter_Pahlavi
1288 sc ; Phnx                             ; Phoenician
1289 sc ; Plrd                             ; Miao
1290 sc ; Prti                             ; Inscriptional_Parthian


1307 sc ; Sund                             ; Sundanese
1308 sc ; Sylo                             ; Syloti_Nagri
1309 sc ; Syrc                             ; Syriac
1310 sc ; Tagb                             ; Tagbanwa
1311 sc ; Takr                             ; Takri
1312 sc ; Tale                             ; Tai_Le
1313 sc ; Talu                             ; New_Tai_Lue
1314 sc ; Taml                             ; Tamil
1315 sc ; Tang                             ; Tangut
1316 sc ; Tavt                             ; Tai_Viet
1317 sc ; Telu                             ; Telugu
1318 sc ; Tfng                             ; Tifinagh
1319 sc ; Tglg                             ; Tagalog
1320 sc ; Thaa                             ; Thaana
1321 sc ; Thai                             ; Thai
1322 sc ; Tibt                             ; Tibetan
1323 sc ; Tirh                             ; Tirhuta
1324 sc ; Ugar                             ; Ugaritic
1325 sc ; Vaii                             ; Vai
1326 sc ; Wara                             ; Warang_Citi

1327 sc ; Xpeo                             ; Old_Persian
1328 sc ; Xsux                             ; Cuneiform
1329 sc ; Yiii                             ; Yi
1330 sc ; Zanb                             ; Zanabazar_Square
1331 sc ; Zinh                             ; Inherited                        ; Qaai
1332 sc ; Zyyy                             ; Common
1333 sc ; Zzzz                             ; Unknown
1334 
1335 # Script_Extensions (scx)
1336 
1337 # @missing: 0000..10FFFF; Script_Extensions; <script>
1338 
1339 # Sentence_Break (SB)
1340 
1341 SB ; AT                               ; ATerm
1342 SB ; CL                               ; Close
1343 SB ; CR                               ; CR
1344 SB ; EX                               ; Extend
1345 SB ; FO                               ; Format
1346 SB ; LE                               ; OLetter


   1 # PropertyValueAliases-12.1.0.txt
   2 # Date: 2019-03-10, 10:53:18 GMT
   3 # Copyright (c) 2019 Unicode, Inc.
   4 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
   5 # For terms of use, see http://www.unicode.org/terms_of_use.html
   6 #
   7 # Unicode Character Database
   8 #   For documentation, see http://www.unicode.org/reports/tr44/
   9 #
  10 # This file contains aliases for property values used in the UCD.
  11 # These names can be used for XML formats of UCD data, for regular-expression
  12 # property tests, and other programmatic textual descriptions of Unicode data.
  13 #
  14 # The names may be translated in appropriate environments, and additional
  15 # aliases may be useful.
  16 #
  17 # FORMAT
  18 #
  19 # Each line describes a property value name.
  20 # This consists of three or more fields, separated by semicolons.
  21 #
  22 # First Field: The first field describes the property for which that
  23 # property value name is used.
  24 #
  25 # Second Field: The second field is the short name for the property value.
  26 # It is typically an abbreviation, but in a number of cases it is simply
  27 # a duplicate of the "long name" in the third field.
  28 #
  29 # Third Field: The third field is the long name for the property value, 
  30 # typically the formal name used in documentation about the property value.
  31 #
  32 # In the case of Canonical_Combining_Class (ccc), there are 4 fields: 
  33 # The second field is numeric, the third is the short name, and the fourth is the long name.
  34 #
  35 # The above are the preferred aliases. Other aliases may be listed in additional fields.
  36 #
  37 # Loose matching should be applied to all property names and property values, with
  38 # the exception of String Property values. With loose matching of property names and
  39 # values, the case distinctions, whitespace, hyphens, and '_' are ignored.
  40 # For Numeric Property values, numeric equivalence is applied: thus "01.00"
  41 # is equivalent to "1".
  42 #
  43 # NOTE: Property value names are NOT unique across properties. For example:
  44 #
  45 #   AL means Arabic Letter for the Bidi_Class property, and
  46 #   AL means Above_Left for the Canonical_Combining_Class property, and
  47 #   AL means Alphabetic for the Line_Break property.
  48 #
  49 # In addition, some property names may be the same as some property value names.
  50 # For example:
  51 #
  52 #   sc means the Script property, and
  53 #   Sc means the General_Category property value Currency_Symbol (Sc)
  54 #
  55 # The combination of property value and property name is, however, unique.
  56 #
  57 # For more information, see UAX #44, Unicode Character Database, and
  58 # UTS #18, Unicode Regular Expressions.
  59 # ================================================
  60 
  61 
  62 # ASCII_Hex_Digit (AHex)
  63 
  64 AHex; N                               ; No                               ; F                                ; False
  65 AHex; Y                               ; Yes                              ; T                                ; True
  66 
  67 # Age (age)
  68 
  69 age; 1.1                              ; V1_1
  70 age; 2.0                              ; V2_0
  71 age; 2.1                              ; V2_1
  72 age; 3.0                              ; V3_0
  73 age; 3.1                              ; V3_1
  74 age; 3.2                              ; V3_2
  75 age; 4.0                              ; V4_0
  76 age; 4.1                              ; V4_1
  77 age; 5.0                              ; V5_0
  78 age; 5.1                              ; V5_1
  79 age; 5.2                              ; V5_2
  80 age; 6.0                              ; V6_0
  81 age; 6.1                              ; V6_1
  82 age; 6.2                              ; V6_2
  83 age; 6.3                              ; V6_3
  84 age; 7.0                              ; V7_0
  85 age; 8.0                              ; V8_0
  86 age; 9.0                              ; V9_0
  87 age; 10.0                             ; V10_0
  88 age; 11.0                             ; V11_0
  89 age; 12.0                             ; V12_0
  90 age; 12.1                             ; V12_1
  91 age; NA                               ; Unassigned
  92 
  93 # Alphabetic (Alpha)
  94 
  95 Alpha; N                              ; No                               ; F                                ; False
  96 Alpha; Y                              ; Yes                              ; T                                ; True
  97 
  98 # Bidi_Class (bc)
  99 
 100 bc ; AL                               ; Arabic_Letter
 101 bc ; AN                               ; Arabic_Number
 102 bc ; B                                ; Paragraph_Separator
 103 bc ; BN                               ; Boundary_Neutral
 104 bc ; CS                               ; Common_Separator
 105 bc ; EN                               ; European_Number
 106 bc ; ES                               ; European_Separator
 107 bc ; ET                               ; European_Terminator
 108 bc ; FSI                              ; First_Strong_Isolate
 109 bc ; L                                ; Left_To_Right
 110 bc ; LRE                              ; Left_To_Right_Embedding


 213 blk; Cuneiform_Numbers                ; Cuneiform_Numbers_And_Punctuation
 214 blk; Currency_Symbols                 ; Currency_Symbols
 215 blk; Cypriot_Syllabary                ; Cypriot_Syllabary
 216 blk; Cyrillic                         ; Cyrillic
 217 blk; Cyrillic_Ext_A                   ; Cyrillic_Extended_A
 218 blk; Cyrillic_Ext_B                   ; Cyrillic_Extended_B
 219 blk; Cyrillic_Ext_C                   ; Cyrillic_Extended_C
 220 blk; Cyrillic_Sup                     ; Cyrillic_Supplement              ; Cyrillic_Supplementary
 221 blk; Deseret                          ; Deseret
 222 blk; Devanagari                       ; Devanagari
 223 blk; Devanagari_Ext                   ; Devanagari_Extended
 224 blk; Diacriticals                     ; Combining_Diacritical_Marks
 225 blk; Diacriticals_Ext                 ; Combining_Diacritical_Marks_Extended
 226 blk; Diacriticals_For_Symbols         ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols
 227 blk; Diacriticals_Sup                 ; Combining_Diacritical_Marks_Supplement
 228 blk; Dingbats                         ; Dingbats
 229 blk; Dogra                            ; Dogra
 230 blk; Domino                           ; Domino_Tiles
 231 blk; Duployan                         ; Duployan
 232 blk; Early_Dynastic_Cuneiform         ; Early_Dynastic_Cuneiform
 233 blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls
 234 blk; Egyptian_Hieroglyphs             ; Egyptian_Hieroglyphs
 235 blk; Elbasan                          ; Elbasan
 236 blk; Elymaic                          ; Elymaic
 237 blk; Emoticons                        ; Emoticons
 238 blk; Enclosed_Alphanum                ; Enclosed_Alphanumerics
 239 blk; Enclosed_Alphanum_Sup            ; Enclosed_Alphanumeric_Supplement
 240 blk; Enclosed_CJK                     ; Enclosed_CJK_Letters_And_Months
 241 blk; Enclosed_Ideographic_Sup         ; Enclosed_Ideographic_Supplement
 242 blk; Ethiopic                         ; Ethiopic
 243 blk; Ethiopic_Ext                     ; Ethiopic_Extended
 244 blk; Ethiopic_Ext_A                   ; Ethiopic_Extended_A
 245 blk; Ethiopic_Sup                     ; Ethiopic_Supplement
 246 blk; Geometric_Shapes                 ; Geometric_Shapes
 247 blk; Geometric_Shapes_Ext             ; Geometric_Shapes_Extended
 248 blk; Georgian                         ; Georgian
 249 blk; Georgian_Ext                     ; Georgian_Extended
 250 blk; Georgian_Sup                     ; Georgian_Supplement
 251 blk; Glagolitic                       ; Glagolitic
 252 blk; Glagolitic_Sup                   ; Glagolitic_Supplement
 253 blk; Gothic                           ; Gothic
 254 blk; Grantha                          ; Grantha
 255 blk; Greek                            ; Greek_And_Coptic
 256 blk; Greek_Ext                        ; Greek_Extended


 330 blk; Meroitic_Hieroglyphs             ; Meroitic_Hieroglyphs
 331 blk; Miao                             ; Miao
 332 blk; Misc_Arrows                      ; Miscellaneous_Symbols_And_Arrows
 333 blk; Misc_Math_Symbols_A              ; Miscellaneous_Mathematical_Symbols_A
 334 blk; Misc_Math_Symbols_B              ; Miscellaneous_Mathematical_Symbols_B
 335 blk; Misc_Pictographs                 ; Miscellaneous_Symbols_And_Pictographs
 336 blk; Misc_Symbols                     ; Miscellaneous_Symbols
 337 blk; Misc_Technical                   ; Miscellaneous_Technical
 338 blk; Modi                             ; Modi
 339 blk; Modifier_Letters                 ; Spacing_Modifier_Letters
 340 blk; Modifier_Tone_Letters            ; Modifier_Tone_Letters
 341 blk; Mongolian                        ; Mongolian
 342 blk; Mongolian_Sup                    ; Mongolian_Supplement
 343 blk; Mro                              ; Mro
 344 blk; Multani                          ; Multani
 345 blk; Music                            ; Musical_Symbols
 346 blk; Myanmar                          ; Myanmar
 347 blk; Myanmar_Ext_A                    ; Myanmar_Extended_A
 348 blk; Myanmar_Ext_B                    ; Myanmar_Extended_B
 349 blk; Nabataean                        ; Nabataean
 350 blk; Nandinagari                      ; Nandinagari
 351 blk; NB                               ; No_Block
 352 blk; New_Tai_Lue                      ; New_Tai_Lue
 353 blk; Newa                             ; Newa
 354 blk; NKo                              ; NKo
 355 blk; Number_Forms                     ; Number_Forms
 356 blk; Nushu                            ; Nushu
 357 blk; Nyiakeng_Puachue_Hmong           ; Nyiakeng_Puachue_Hmong
 358 blk; OCR                              ; Optical_Character_Recognition
 359 blk; Ogham                            ; Ogham
 360 blk; Ol_Chiki                         ; Ol_Chiki
 361 blk; Old_Hungarian                    ; Old_Hungarian
 362 blk; Old_Italic                       ; Old_Italic
 363 blk; Old_North_Arabian                ; Old_North_Arabian
 364 blk; Old_Permic                       ; Old_Permic
 365 blk; Old_Persian                      ; Old_Persian
 366 blk; Old_Sogdian                      ; Old_Sogdian
 367 blk; Old_South_Arabian                ; Old_South_Arabian
 368 blk; Old_Turkic                       ; Old_Turkic
 369 blk; Oriya                            ; Oriya
 370 blk; Ornamental_Dingbats              ; Ornamental_Dingbats
 371 blk; Osage                            ; Osage
 372 blk; Osmanya                          ; Osmanya
 373 blk; Ottoman_Siyaq_Numbers            ; Ottoman_Siyaq_Numbers
 374 blk; Pahawh_Hmong                     ; Pahawh_Hmong
 375 blk; Palmyrene                        ; Palmyrene
 376 blk; Pau_Cin_Hau                      ; Pau_Cin_Hau
 377 blk; Phags_Pa                         ; Phags_Pa
 378 blk; Phaistos                         ; Phaistos_Disc
 379 blk; Phoenician                       ; Phoenician
 380 blk; Phonetic_Ext                     ; Phonetic_Extensions
 381 blk; Phonetic_Ext_Sup                 ; Phonetic_Extensions_Supplement
 382 blk; Playing_Cards                    ; Playing_Cards
 383 blk; Psalter_Pahlavi                  ; Psalter_Pahlavi
 384 blk; PUA                              ; Private_Use_Area                 ; Private_Use
 385 blk; Punctuation                      ; General_Punctuation
 386 blk; Rejang                           ; Rejang
 387 blk; Rumi                             ; Rumi_Numeral_Symbols
 388 blk; Runic                            ; Runic
 389 blk; Samaritan                        ; Samaritan
 390 blk; Saurashtra                       ; Saurashtra
 391 blk; Sharada                          ; Sharada
 392 blk; Shavian                          ; Shavian
 393 blk; Shorthand_Format_Controls        ; Shorthand_Format_Controls
 394 blk; Siddham                          ; Siddham
 395 blk; Sinhala                          ; Sinhala
 396 blk; Sinhala_Archaic_Numbers          ; Sinhala_Archaic_Numbers
 397 blk; Small_Forms                      ; Small_Form_Variants
 398 blk; Small_Kana_Ext                   ; Small_Kana_Extension
 399 blk; Sogdian                          ; Sogdian
 400 blk; Sora_Sompeng                     ; Sora_Sompeng
 401 blk; Soyombo                          ; Soyombo
 402 blk; Specials                         ; Specials
 403 blk; Sundanese                        ; Sundanese
 404 blk; Sundanese_Sup                    ; Sundanese_Supplement
 405 blk; Sup_Arrows_A                     ; Supplemental_Arrows_A
 406 blk; Sup_Arrows_B                     ; Supplemental_Arrows_B
 407 blk; Sup_Arrows_C                     ; Supplemental_Arrows_C
 408 blk; Sup_Math_Operators               ; Supplemental_Mathematical_Operators
 409 blk; Sup_PUA_A                        ; Supplementary_Private_Use_Area_A
 410 blk; Sup_PUA_B                        ; Supplementary_Private_Use_Area_B
 411 blk; Sup_Punctuation                  ; Supplemental_Punctuation
 412 blk; Sup_Symbols_And_Pictographs      ; Supplemental_Symbols_And_Pictographs
 413 blk; Super_And_Sub                    ; Superscripts_And_Subscripts
 414 blk; Sutton_SignWriting               ; Sutton_SignWriting
 415 blk; Syloti_Nagri                     ; Syloti_Nagri
 416 blk; Symbols_And_Pictographs_Ext_A    ; Symbols_And_Pictographs_Extended_A
 417 blk; Syriac                           ; Syriac
 418 blk; Syriac_Sup                       ; Syriac_Supplement
 419 blk; Tagalog                          ; Tagalog
 420 blk; Tagbanwa                         ; Tagbanwa
 421 blk; Tags                             ; Tags
 422 blk; Tai_Le                           ; Tai_Le
 423 blk; Tai_Tham                         ; Tai_Tham
 424 blk; Tai_Viet                         ; Tai_Viet
 425 blk; Tai_Xuan_Jing                    ; Tai_Xuan_Jing_Symbols
 426 blk; Takri                            ; Takri
 427 blk; Tamil                            ; Tamil
 428 blk; Tamil_Sup                        ; Tamil_Supplement
 429 blk; Tangut                           ; Tangut
 430 blk; Tangut_Components                ; Tangut_Components
 431 blk; Telugu                           ; Telugu
 432 blk; Thaana                           ; Thaana
 433 blk; Thai                             ; Thai
 434 blk; Tibetan                          ; Tibetan
 435 blk; Tifinagh                         ; Tifinagh
 436 blk; Tirhuta                          ; Tirhuta
 437 blk; Transport_And_Map                ; Transport_And_Map_Symbols
 438 blk; UCAS                             ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
 439 blk; UCAS_Ext                         ; Unified_Canadian_Aboriginal_Syllabics_Extended
 440 blk; Ugaritic                         ; Ugaritic
 441 blk; Vai                              ; Vai
 442 blk; Vedic_Ext                        ; Vedic_Extensions
 443 blk; Vertical_Forms                   ; Vertical_Forms
 444 blk; VS                               ; Variation_Selectors
 445 blk; VS_Sup                           ; Variation_Selectors_Supplement
 446 blk; Wancho                           ; Wancho
 447 blk; Warang_Citi                      ; Warang_Citi
 448 blk; Yi_Radicals                      ; Yi_Radicals
 449 blk; Yi_Syllables                     ; Yi_Syllables
 450 blk; Yijing                           ; Yijing_Hexagram_Symbols
 451 blk; Zanabazar_Square                 ; Zanabazar_Square
 452 
 453 # Canonical_Combining_Class (ccc)
 454 
 455 ccc;   0; NR                         ; Not_Reordered
 456 ccc;   1; OV                         ; Overlay
 457 ccc;   7; NK                         ; Nukta
 458 ccc;   8; KV                         ; Kana_Voicing
 459 ccc;   9; VR                         ; Virama
 460 ccc;  10; CCC10                      ; CCC10
 461 ccc;  11; CCC11                      ; CCC11
 462 ccc;  12; CCC12                      ; CCC12
 463 ccc;  13; CCC13                      ; CCC13
 464 ccc;  14; CCC14                      ; CCC14
 465 ccc;  15; CCC15                      ; CCC15
 466 ccc;  16; CCC16                      ; CCC16


1211 sc ; Bhks                             ; Bhaiksuki
1212 sc ; Bopo                             ; Bopomofo
1213 sc ; Brah                             ; Brahmi
1214 sc ; Brai                             ; Braille
1215 sc ; Bugi                             ; Buginese
1216 sc ; Buhd                             ; Buhid
1217 sc ; Cakm                             ; Chakma
1218 sc ; Cans                             ; Canadian_Aboriginal
1219 sc ; Cari                             ; Carian
1220 sc ; Cham                             ; Cham
1221 sc ; Cher                             ; Cherokee
1222 sc ; Copt                             ; Coptic                           ; Qaac
1223 sc ; Cprt                             ; Cypriot
1224 sc ; Cyrl                             ; Cyrillic
1225 sc ; Deva                             ; Devanagari
1226 sc ; Dogr                             ; Dogra
1227 sc ; Dsrt                             ; Deseret
1228 sc ; Dupl                             ; Duployan
1229 sc ; Egyp                             ; Egyptian_Hieroglyphs
1230 sc ; Elba                             ; Elbasan
1231 sc ; Elym                             ; Elymaic
1232 sc ; Ethi                             ; Ethiopic
1233 sc ; Geor                             ; Georgian
1234 sc ; Glag                             ; Glagolitic
1235 sc ; Gong                             ; Gunjala_Gondi
1236 sc ; Gonm                             ; Masaram_Gondi
1237 sc ; Goth                             ; Gothic
1238 sc ; Gran                             ; Grantha
1239 sc ; Grek                             ; Greek
1240 sc ; Gujr                             ; Gujarati
1241 sc ; Guru                             ; Gurmukhi
1242 sc ; Hang                             ; Hangul
1243 sc ; Hani                             ; Han
1244 sc ; Hano                             ; Hanunoo
1245 sc ; Hatr                             ; Hatran
1246 sc ; Hebr                             ; Hebrew
1247 sc ; Hira                             ; Hiragana
1248 sc ; Hluw                             ; Anatolian_Hieroglyphs
1249 sc ; Hmng                             ; Pahawh_Hmong
1250 sc ; Hmnp                             ; Nyiakeng_Puachue_Hmong
1251 sc ; Hrkt                             ; Katakana_Or_Hiragana
1252 sc ; Hung                             ; Old_Hungarian
1253 sc ; Ital                             ; Old_Italic
1254 sc ; Java                             ; Javanese
1255 sc ; Kali                             ; Kayah_Li
1256 sc ; Kana                             ; Katakana
1257 sc ; Khar                             ; Kharoshthi
1258 sc ; Khmr                             ; Khmer
1259 sc ; Khoj                             ; Khojki
1260 sc ; Knda                             ; Kannada
1261 sc ; Kthi                             ; Kaithi
1262 sc ; Lana                             ; Tai_Tham
1263 sc ; Laoo                             ; Lao
1264 sc ; Latn                             ; Latin
1265 sc ; Lepc                             ; Lepcha
1266 sc ; Limb                             ; Limbu
1267 sc ; Lina                             ; Linear_A
1268 sc ; Linb                             ; Linear_B
1269 sc ; Lisu                             ; Lisu
1270 sc ; Lyci                             ; Lycian
1271 sc ; Lydi                             ; Lydian
1272 sc ; Mahj                             ; Mahajani
1273 sc ; Maka                             ; Makasar
1274 sc ; Mand                             ; Mandaic
1275 sc ; Mani                             ; Manichaean
1276 sc ; Marc                             ; Marchen
1277 sc ; Medf                             ; Medefaidrin
1278 sc ; Mend                             ; Mende_Kikakui
1279 sc ; Merc                             ; Meroitic_Cursive
1280 sc ; Mero                             ; Meroitic_Hieroglyphs
1281 sc ; Mlym                             ; Malayalam
1282 sc ; Modi                             ; Modi
1283 sc ; Mong                             ; Mongolian
1284 sc ; Mroo                             ; Mro
1285 sc ; Mtei                             ; Meetei_Mayek
1286 sc ; Mult                             ; Multani
1287 sc ; Mymr                             ; Myanmar
1288 sc ; Nand                             ; Nandinagari
1289 sc ; Narb                             ; Old_North_Arabian
1290 sc ; Nbat                             ; Nabataean
1291 sc ; Newa                             ; Newa
1292 sc ; Nkoo                             ; Nko
1293 sc ; Nshu                             ; Nushu
1294 sc ; Ogam                             ; Ogham
1295 sc ; Olck                             ; Ol_Chiki
1296 sc ; Orkh                             ; Old_Turkic
1297 sc ; Orya                             ; Oriya
1298 sc ; Osge                             ; Osage
1299 sc ; Osma                             ; Osmanya
1300 sc ; Palm                             ; Palmyrene
1301 sc ; Pauc                             ; Pau_Cin_Hau
1302 sc ; Perm                             ; Old_Permic
1303 sc ; Phag                             ; Phags_Pa
1304 sc ; Phli                             ; Inscriptional_Pahlavi
1305 sc ; Phlp                             ; Psalter_Pahlavi
1306 sc ; Phnx                             ; Phoenician
1307 sc ; Plrd                             ; Miao
1308 sc ; Prti                             ; Inscriptional_Parthian


1325 sc ; Sund                             ; Sundanese
1326 sc ; Sylo                             ; Syloti_Nagri
1327 sc ; Syrc                             ; Syriac
1328 sc ; Tagb                             ; Tagbanwa
1329 sc ; Takr                             ; Takri
1330 sc ; Tale                             ; Tai_Le
1331 sc ; Talu                             ; New_Tai_Lue
1332 sc ; Taml                             ; Tamil
1333 sc ; Tang                             ; Tangut
1334 sc ; Tavt                             ; Tai_Viet
1335 sc ; Telu                             ; Telugu
1336 sc ; Tfng                             ; Tifinagh
1337 sc ; Tglg                             ; Tagalog
1338 sc ; Thaa                             ; Thaana
1339 sc ; Thai                             ; Thai
1340 sc ; Tibt                             ; Tibetan
1341 sc ; Tirh                             ; Tirhuta
1342 sc ; Ugar                             ; Ugaritic
1343 sc ; Vaii                             ; Vai
1344 sc ; Wara                             ; Warang_Citi
1345 sc ; Wcho                             ; Wancho
1346 sc ; Xpeo                             ; Old_Persian
1347 sc ; Xsux                             ; Cuneiform
1348 sc ; Yiii                             ; Yi
1349 sc ; Zanb                             ; Zanabazar_Square
1350 sc ; Zinh                             ; Inherited                        ; Qaai
1351 sc ; Zyyy                             ; Common
1352 sc ; Zzzz                             ; Unknown
1353 
1354 # Script_Extensions (scx)
1355 
1356 # @missing: 0000..10FFFF; Script_Extensions; <script>
1357 
1358 # Sentence_Break (SB)
1359 
1360 SB ; AT                               ; ATerm
1361 SB ; CL                               ; Close
1362 SB ; CR                               ; CR
1363 SB ; EX                               ; Extend
1364 SB ; FO                               ; Format
1365 SB ; LE                               ; OLetter


< prev index next >