< prev index next >

src/java.desktop/share/native/libfontmanager/harfbuzz/hb-unicode-private.hh

Print this page




  91 
  92   inline unsigned int decompose_compatibility (hb_codepoint_t  u,
  93                                                hb_codepoint_t *decomposed)
  94   {
  95     unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility);
  96     if (ret == 1 && u == decomposed[0]) {
  97       decomposed[0] = 0;
  98       return 0;
  99     }
 100     decomposed[ret] = 0;
 101     return ret;
 102   }
 103 
 104 
 105   inline unsigned int
 106   modified_combining_class (hb_codepoint_t unicode)
 107   {
 108     /* XXX This hack belongs to the Myanmar shaper. */
 109     if (unlikely (unicode == 0x1037u)) unicode = 0x103Au;
 110 
 111     /* XXX This hack belongs to the SEA shaper (for Tai Tham):
 112      * Reorder SAKOT to ensure it comes after any tone marks. */
 113     if (unlikely (unicode == 0x1A60u)) return 254;
 114 
 115     /* XXX This hack belongs to the Tibetan shaper:
 116      * Reorder PADMA to ensure it comes after any vowel marks. */
 117     if (unlikely (unicode == 0x0FC6u)) return 254;
 118     /* Reorder TSA -PHRU to reorder before U+0F74 */
 119     if (unlikely (unicode == 0x0F39u)) return 127;
 120 
 121     return _hb_modified_combining_class[combining_class (unicode)];
 122   }
 123 
 124   static inline hb_bool_t
 125   is_variation_selector (hb_codepoint_t unicode)
 126   {
 127     /* U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the
 128      * Arabic shaper.  No need to match them here. */
 129     return unlikely (hb_in_ranges (unicode,
 130                                    0xFE00u, 0xFE0Fu, /* VARIATION SELECTOR-1..16 */
 131                                    0xE0100u, 0xE01EFu));  /* VARIATION SELECTOR-17..256 */
 132   }
 133 
 134   /* Default_Ignorable codepoints:
 135    *
 136    * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable,
 137    * we do NOT want to hide them, as the way Uniscribe has implemented them
 138    * is with regular spacing glyphs, and that's the way fonts are made to work.
 139    * As such, we make exceptions for those four.

 140    *
 141    * Unicode 7.0:
 142    * $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/'
 143    * 00AD          # Cf       SOFT HYPHEN
 144    * 034F          # Mn       COMBINING GRAPHEME JOINER
 145    * 061C          # Cf       ARABIC LETTER MARK
 146    * 115F..1160    # Lo   [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
 147    * 17B4..17B5    # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
 148    * 180B..180D    # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
 149    * 180E          # Cf       MONGOLIAN VOWEL SEPARATOR
 150    * 200B..200F    # Cf   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
 151    * 202A..202E    # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
 152    * 2060..2064    # Cf   [5] WORD JOINER..INVISIBLE PLUS
 153    * 2065          # Cn       <reserved-2065>
 154    * 2066..206F    # Cf  [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
 155    * 3164          # Lo       HANGUL FILLER
 156    * FE00..FE0F    # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
 157    * FEFF          # Cf       ZERO WIDTH NO-BREAK SPACE
 158    * FFA0          # Lo       HALFWIDTH HANGUL FILLER
 159    * FFF0..FFF8    # Cn   [9] <reserved-FFF0>..<reserved-FFF8>


 162    * E0000         # Cn       <reserved-E0000>
 163    * E0001         # Cf       LANGUAGE TAG
 164    * E0002..E001F  # Cn  [30] <reserved-E0002>..<reserved-E001F>
 165    * E0020..E007F  # Cf  [96] TAG SPACE..CANCEL TAG
 166    * E0080..E00FF  # Cn [128] <reserved-E0080>..<reserved-E00FF>
 167    * E0100..E01EF  # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 168    * E01F0..E0FFF  # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 169    */
 170   static inline hb_bool_t
 171   is_default_ignorable (hb_codepoint_t ch)
 172   {
 173     hb_codepoint_t plane = ch >> 16;
 174     if (likely (plane == 0))
 175     {
 176       /* BMP */
 177       hb_codepoint_t page = ch >> 8;
 178       switch (page) {
 179         case 0x00: return unlikely (ch == 0x00ADu);
 180         case 0x03: return unlikely (ch == 0x034Fu);
 181         case 0x06: return unlikely (ch == 0x061Cu);
 182         case 0x17: return hb_in_range (ch, 0x17B4u, 0x17B5u);
 183         case 0x18: return hb_in_range (ch, 0x180Bu, 0x180Eu);
 184         case 0x20: return hb_in_ranges (ch, 0x200Bu, 0x200Fu,
 185                                             0x202Au, 0x202Eu,
 186                                             0x2060u, 0x206Fu);
 187         case 0xFE: return hb_in_range (ch, 0xFE00u, 0xFE0Fu) || ch == 0xFEFFu;
 188         case 0xFF: return hb_in_range (ch, 0xFFF0u, 0xFFF8u);
 189         default: return false;
 190       }
 191     }
 192     else
 193     {
 194       /* Other planes */
 195       switch (plane) {
 196         case 0x01: return hb_in_ranges (ch, 0x1BCA0u, 0x1BCA3u,
 197                                             0x1D173u, 0x1D17Au);
 198         case 0x0E: return hb_in_range (ch, 0xE0000u, 0xE0FFFu);
 199         default: return false;
 200       }
 201     }
 202   }
 203 
 204   /* Space estimates based on:
 205    * http://www.unicode.org/charts/PDF/U2000.pdf
 206    * https://www.microsoft.com/typography/developers/fdsspec/spaces.aspx
 207    */
 208   enum space_t {
 209     NOT_SPACE = 0,
 210     SPACE_EM   = 1,
 211     SPACE_EM_2 = 2,
 212     SPACE_EM_3 = 3,
 213     SPACE_EM_4 = 4,
 214     SPACE_EM_5 = 5,
 215     SPACE_EM_6 = 6,
 216     SPACE_EM_16 = 16,
 217     SPACE_4_EM_18,      /* 4/18th of an EM! */
 218     SPACE,


 329  * a non-zero ccc.  That makes them reorder with the Halant that is
 330  * ccc=9.  Just zero them, we don't need them in our Indic shaper.
 331  */
 332 #define HB_MODIFIED_COMBINING_CLASS_CCC84 0 /* length mark */
 333 #define HB_MODIFIED_COMBINING_CLASS_CCC91 0 /* ai length mark */
 334 
 335 /* Thai
 336  *
 337  * Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9).
 338  * Assign 3, which is unassigned otherwise.
 339  * Uniscribe does this reordering too.
 340  */
 341 #define HB_MODIFIED_COMBINING_CLASS_CCC103 3 /* sara u / sara uu */
 342 #define HB_MODIFIED_COMBINING_CLASS_CCC107 107 /* mai * */
 343 
 344 /* Lao */
 345 #define HB_MODIFIED_COMBINING_CLASS_CCC118 118 /* sign u / sign uu */
 346 #define HB_MODIFIED_COMBINING_CLASS_CCC122 122 /* mai * */
 347 
 348 /* Tibetan
 349  * Modify U+0F74 (ccc=132) to reorder before ccc=130 marks.


 350  */
 351 #define HB_MODIFIED_COMBINING_CLASS_CCC129 129 /* sign aa */
 352 #define HB_MODIFIED_COMBINING_CLASS_CCC130 130 /* sign i */
 353 #define HB_MODIFIED_COMBINING_CLASS_CCC132 128 /* sign u */
 354 
 355 
 356 /* Misc */
 357 
 358 #define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \
 359         (FLAG_SAFE (gen_cat) & \
 360          (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 361           FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
 362           FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
 363 
 364 #define HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK_OR_MODIFIER_SYMBOL(gen_cat) \
 365         (FLAG_SAFE (gen_cat) & \
 366          (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 367           FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
 368           FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL)))
 369 
 370 #endif /* HB_UNICODE_PRIVATE_HH */


  91 
  92   inline unsigned int decompose_compatibility (hb_codepoint_t  u,
  93                                                hb_codepoint_t *decomposed)
  94   {
  95     unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility);
  96     if (ret == 1 && u == decomposed[0]) {
  97       decomposed[0] = 0;
  98       return 0;
  99     }
 100     decomposed[ret] = 0;
 101     return ret;
 102   }
 103 
 104 
 105   inline unsigned int
 106   modified_combining_class (hb_codepoint_t unicode)
 107   {
 108     /* XXX This hack belongs to the Myanmar shaper. */
 109     if (unlikely (unicode == 0x1037u)) unicode = 0x103Au;
 110 
 111     /* XXX This hack belongs to the USE shaper (for Tai Tham):
 112      * Reorder SAKOT to ensure it comes after any tone marks. */
 113     if (unlikely (unicode == 0x1A60u)) return 254;
 114 
 115     /* XXX This hack belongs to the Tibetan shaper:
 116      * Reorder PADMA to ensure it comes after any vowel marks. */
 117     if (unlikely (unicode == 0x0FC6u)) return 254;
 118     /* Reorder TSA -PHRU to reorder before U+0F74 */
 119     if (unlikely (unicode == 0x0F39u)) return 127;
 120 
 121     return _hb_modified_combining_class[combining_class (unicode)];
 122   }
 123 
 124   static inline hb_bool_t
 125   is_variation_selector (hb_codepoint_t unicode)
 126   {
 127     /* U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the
 128      * Arabic shaper.  No need to match them here. */
 129     return unlikely (hb_in_ranges<hb_codepoint_t> (unicode,
 130                                    0xFE00u, 0xFE0Fu, /* VARIATION SELECTOR-1..16 */
 131                                    0xE0100u, 0xE01EFu));  /* VARIATION SELECTOR-17..256 */
 132   }
 133 
 134   /* Default_Ignorable codepoints:
 135    *
 136    * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable,
 137    * we do NOT want to hide them, as the way Uniscribe has implemented them
 138    * is with regular spacing glyphs, and that's the way fonts are made to work.
 139    * As such, we make exceptions for those four.
 140    * Also ignoring U+1BCA0..1BCA3. https://github.com/behdad/harfbuzz/issues/503
 141    *
 142    * Unicode 7.0:
 143    * $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/'
 144    * 00AD          # Cf       SOFT HYPHEN
 145    * 034F          # Mn       COMBINING GRAPHEME JOINER
 146    * 061C          # Cf       ARABIC LETTER MARK
 147    * 115F..1160    # Lo   [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
 148    * 17B4..17B5    # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
 149    * 180B..180D    # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
 150    * 180E          # Cf       MONGOLIAN VOWEL SEPARATOR
 151    * 200B..200F    # Cf   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
 152    * 202A..202E    # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
 153    * 2060..2064    # Cf   [5] WORD JOINER..INVISIBLE PLUS
 154    * 2065          # Cn       <reserved-2065>
 155    * 2066..206F    # Cf  [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
 156    * 3164          # Lo       HANGUL FILLER
 157    * FE00..FE0F    # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
 158    * FEFF          # Cf       ZERO WIDTH NO-BREAK SPACE
 159    * FFA0          # Lo       HALFWIDTH HANGUL FILLER
 160    * FFF0..FFF8    # Cn   [9] <reserved-FFF0>..<reserved-FFF8>


 163    * E0000         # Cn       <reserved-E0000>
 164    * E0001         # Cf       LANGUAGE TAG
 165    * E0002..E001F  # Cn  [30] <reserved-E0002>..<reserved-E001F>
 166    * E0020..E007F  # Cf  [96] TAG SPACE..CANCEL TAG
 167    * E0080..E00FF  # Cn [128] <reserved-E0080>..<reserved-E00FF>
 168    * E0100..E01EF  # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 169    * E01F0..E0FFF  # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 170    */
 171   static inline hb_bool_t
 172   is_default_ignorable (hb_codepoint_t ch)
 173   {
 174     hb_codepoint_t plane = ch >> 16;
 175     if (likely (plane == 0))
 176     {
 177       /* BMP */
 178       hb_codepoint_t page = ch >> 8;
 179       switch (page) {
 180         case 0x00: return unlikely (ch == 0x00ADu);
 181         case 0x03: return unlikely (ch == 0x034Fu);
 182         case 0x06: return unlikely (ch == 0x061Cu);
 183         case 0x17: return hb_in_range<hb_codepoint_t> (ch, 0x17B4u, 0x17B5u);
 184         case 0x18: return hb_in_range<hb_codepoint_t> (ch, 0x180Bu, 0x180Eu);
 185         case 0x20: return hb_in_ranges<hb_codepoint_t> (ch, 0x200Bu, 0x200Fu,
 186                                             0x202Au, 0x202Eu,
 187                                             0x2060u, 0x206Fu);
 188         case 0xFE: return hb_in_range<hb_codepoint_t> (ch, 0xFE00u, 0xFE0Fu) || ch == 0xFEFFu;
 189         case 0xFF: return hb_in_range<hb_codepoint_t> (ch, 0xFFF0u, 0xFFF8u);
 190         default: return false;
 191       }
 192     }
 193     else
 194     {
 195       /* Other planes */
 196       switch (plane) {
 197         case 0x01: return hb_in_range<hb_codepoint_t> (ch, 0x1D173u, 0x1D17Au);
 198         case 0x0E: return hb_in_range<hb_codepoint_t> (ch, 0xE0000u, 0xE0FFFu);

 199         default: return false;
 200       }
 201     }
 202   }
 203 
 204   /* Space estimates based on:
 205    * http://www.unicode.org/charts/PDF/U2000.pdf
 206    * https://www.microsoft.com/typography/developers/fdsspec/spaces.aspx
 207    */
 208   enum space_t {
 209     NOT_SPACE = 0,
 210     SPACE_EM   = 1,
 211     SPACE_EM_2 = 2,
 212     SPACE_EM_3 = 3,
 213     SPACE_EM_4 = 4,
 214     SPACE_EM_5 = 5,
 215     SPACE_EM_6 = 6,
 216     SPACE_EM_16 = 16,
 217     SPACE_4_EM_18,      /* 4/18th of an EM! */
 218     SPACE,


 329  * a non-zero ccc.  That makes them reorder with the Halant that is
 330  * ccc=9.  Just zero them, we don't need them in our Indic shaper.
 331  */
 332 #define HB_MODIFIED_COMBINING_CLASS_CCC84 0 /* length mark */
 333 #define HB_MODIFIED_COMBINING_CLASS_CCC91 0 /* ai length mark */
 334 
 335 /* Thai
 336  *
 337  * Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9).
 338  * Assign 3, which is unassigned otherwise.
 339  * Uniscribe does this reordering too.
 340  */
 341 #define HB_MODIFIED_COMBINING_CLASS_CCC103 3 /* sara u / sara uu */
 342 #define HB_MODIFIED_COMBINING_CLASS_CCC107 107 /* mai * */
 343 
 344 /* Lao */
 345 #define HB_MODIFIED_COMBINING_CLASS_CCC118 118 /* sign u / sign uu */
 346 #define HB_MODIFIED_COMBINING_CLASS_CCC122 122 /* mai * */
 347 
 348 /* Tibetan
 349  *
 350  * In case of multiple vowel-signs, use u first (but after achung)
 351  * this allows Dzongkha multi-vowel shortcuts to render correctly
 352  */
 353 #define HB_MODIFIED_COMBINING_CLASS_CCC129 129 /* sign aa */
 354 #define HB_MODIFIED_COMBINING_CLASS_CCC130 132 /* sign i */
 355 #define HB_MODIFIED_COMBINING_CLASS_CCC132 131 /* sign u */

 356 
 357 /* Misc */
 358 
 359 #define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \
 360         (FLAG_UNSAFE (gen_cat) & \
 361          (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 362           FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
 363           FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
 364 
 365 #define HB_UNICODE_GENERAL_CATEGORY_IS_NON_ENCLOSING_MARK_OR_MODIFIER_SYMBOL(gen_cat) \
 366         (FLAG_UNSAFE (gen_cat) & \
 367          (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 368           FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
 369           FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL)))
 370 
 371 #endif /* HB_UNICODE_PRIVATE_HH */
< prev index next >