1 /* 2 * Copyright © 2011,2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #include "hb-ot-shape-complex-indic-private.hh" 28 #include "hb-ot-layout-private.hh" 29 30 /* buffer var allocations */ 31 #define indic_category() complex_var_u8_0() /* indic_category_t */ 32 #define indic_position() complex_var_u8_1() /* indic_position_t */ 33 34 35 /* 36 * Indic shaper. 37 */ 38 39 40 #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) 41 42 #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) 43 #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) 44 #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) 45 #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) 46 #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) 47 #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) 48 #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) 49 #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) 50 #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) 51 #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) 52 #define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780u)) 53 54 55 #define MATRA_POS_LEFT(u) POS_PRE_M 56 #define MATRA_POS_RIGHT(u) ( \ 57 IS_DEVA(u) ? POS_AFTER_SUB : \ 58 IS_BENG(u) ? POS_AFTER_POST : \ 59 IS_GURU(u) ? POS_AFTER_POST : \ 60 IS_GUJR(u) ? POS_AFTER_POST : \ 61 IS_ORYA(u) ? POS_AFTER_POST : \ 62 IS_TAML(u) ? POS_AFTER_POST : \ 63 IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 64 IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 65 IS_MLYM(u) ? POS_AFTER_POST : \ 66 IS_SINH(u) ? POS_AFTER_SUB : \ 67 IS_KHMR(u) ? POS_AFTER_POST : \ 68 /*default*/ POS_AFTER_SUB \ 69 ) 70 #define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ 71 IS_DEVA(u) ? POS_AFTER_SUB : \ 72 IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ 73 IS_GUJR(u) ? POS_AFTER_SUB : \ 74 IS_ORYA(u) ? POS_AFTER_MAIN : \ 75 IS_TAML(u) ? POS_AFTER_SUB : \ 76 IS_TELU(u) ? POS_BEFORE_SUB : \ 77 IS_KNDA(u) ? POS_BEFORE_SUB : \ 78 IS_SINH(u) ? POS_AFTER_SUB : \ 79 IS_KHMR(u) ? POS_AFTER_POST : \ 80 /*default*/ POS_AFTER_SUB \ 81 ) 82 #define MATRA_POS_BOTTOM(u) ( \ 83 IS_DEVA(u) ? POS_AFTER_SUB : \ 84 IS_BENG(u) ? POS_AFTER_SUB : \ 85 IS_GURU(u) ? POS_AFTER_POST : \ 86 IS_GUJR(u) ? POS_AFTER_POST : \ 87 IS_ORYA(u) ? POS_AFTER_SUB : \ 88 IS_TAML(u) ? POS_AFTER_POST : \ 89 IS_TELU(u) ? POS_BEFORE_SUB : \ 90 IS_KNDA(u) ? POS_BEFORE_SUB : \ 91 IS_MLYM(u) ? POS_AFTER_POST : \ 92 IS_SINH(u) ? POS_AFTER_SUB : \ 93 IS_KHMR(u) ? POS_AFTER_POST : \ 94 /*default*/ POS_AFTER_SUB \ 95 ) 96 97 static inline indic_position_t 98 matra_position (hb_codepoint_t u, indic_position_t side) 99 { 100 switch ((int) side) 101 { 102 case POS_PRE_C: return MATRA_POS_LEFT (u); 103 case POS_POST_C: return MATRA_POS_RIGHT (u); 104 case POS_ABOVE_C: return MATRA_POS_TOP (u); 105 case POS_BELOW_C: return MATRA_POS_BOTTOM (u); 106 }; 107 return side; 108 } 109 110 /* XXX 111 * This is a hack for now. We should move this data into the main Indic table. 112 * Or completely remove it and just check in the tables. 113 */ 114 static const hb_codepoint_t ra_chars[] = { 115 0x0930u, /* Devanagari */ 116 0x09B0u, /* Bengali */ 117 0x09F0u, /* Bengali */ 118 0x0A30u, /* Gurmukhi */ /* No Reph */ 119 0x0AB0u, /* Gujarati */ 120 0x0B30u, /* Oriya */ 121 0x0BB0u, /* Tamil */ /* No Reph */ 122 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ 123 0x0CB0u, /* Kannada */ 124 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ 125 126 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ 127 128 0x179Au, /* Khmer */ /* No Reph, Visual Repha */ 129 }; 130 131 static inline bool 132 is_ra (hb_codepoint_t u) 133 { 134 for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) 135 if (u == ra_chars[i]) 136 return true; 137 return false; 138 } 139 140 static inline bool 141 is_one_of (const hb_glyph_info_t &info, unsigned int flags) 142 { 143 /* If it ligated, all bets are off. */ 144 if (_hb_glyph_info_ligated (&info)) return false; 145 return !!(FLAG_UNSAFE (info.indic_category()) & flags); 146 } 147 148 static inline bool 149 is_joiner (const hb_glyph_info_t &info) 150 { 151 return is_one_of (info, JOINER_FLAGS); 152 } 153 154 static inline bool 155 is_consonant (const hb_glyph_info_t &info) 156 { 157 return is_one_of (info, CONSONANT_FLAGS); 158 } 159 160 static inline bool 161 is_halant_or_coeng (const hb_glyph_info_t &info) 162 { 163 return is_one_of (info, HALANT_OR_COENG_FLAGS); 164 } 165 166 static inline void 167 set_indic_properties (hb_glyph_info_t &info) 168 { 169 hb_codepoint_t u = info.codepoint; 170 unsigned int type = hb_indic_get_categories (u); 171 indic_category_t cat = (indic_category_t) (type & 0x7Fu); 172 indic_position_t pos = (indic_position_t) (type >> 8); 173 174 175 /* 176 * Re-assign category 177 */ 178 179 /* The following act more like the Bindus. */ 180 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) 181 cat = OT_SM; 182 /* The following act like consonants. */ 183 else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, 184 0x1CF5u, 0x1CF6u))) 185 cat = OT_C; 186 /* TODO: The following should only be allowed after a Visarga. 187 * For now, just treat them like regular tone marks. */ 188 else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) 189 cat = OT_A; 190 /* TODO: The following should only be allowed after some of 191 * the nasalization marks, maybe only for U+1CE9..U+1CF1. 192 * For now, just treat them like tone marks. */ 193 else if (unlikely (u == 0x1CEDu)) 194 cat = OT_A; 195 /* The following take marks in standalone clusters, similar to Avagraha. */ 196 else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, 197 0x1CE9u, 0x1CECu, 198 0x1CEEu, 0x1CF1u))) 199 { 200 cat = OT_Symbol; 201 static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); 202 } 203 else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) || 204 u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ 205 { 206 /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. 207 * https://github.com/roozbehp/unicode-data/issues/5 */ 208 cat = OT_M; 209 pos = POS_ABOVE_C; 210 } 211 else if (unlikely (u == 0x0A51u)) 212 { 213 /* https://github.com/behdad/harfbuzz/issues/524 */ 214 cat = OT_M; 215 pos = POS_BELOW_C; 216 } 217 218 /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, 219 * so the Indic shaper needs to know their categories. */ 220 else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; 221 else if (unlikely (u == 0x1133cu)) cat = OT_N; 222 223 else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/behdad/harfbuzz/issues/552 */ 224 225 else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/behdad/harfbuzz/issues/538 */ 226 else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ 227 else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) 228 cat = OT_PLACEHOLDER; 229 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; 230 231 232 /* 233 * Re-assign position. 234 */ 235 236 if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) 237 { 238 pos = POS_BASE_C; 239 if (is_ra (u)) 240 cat = OT_Ra; 241 } 242 else if (cat == OT_M) 243 { 244 pos = matra_position (u, pos); 245 } 246 else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Symbol)))) 247 { 248 pos = POS_SMVD; 249 } 250 251 if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ 252 253 254 255 info.indic_category() = cat; 256 info.indic_position() = pos; 257 } 258 259 /* 260 * Things above this line should ideally be moved to the Indic table itself. 261 */ 262 263 264 /* 265 * Indic configurations. Note that we do not want to keep every single script-specific 266 * behavior in these tables necessarily. This should mainly be used for per-script 267 * properties that are cheaper keeping here, than in the code. Ie. if, say, one and 268 * only one script has an exception, that one script can be if'ed directly in the code, 269 * instead of adding a new flag in these structs. 270 */ 271 272 enum base_position_t { 273 BASE_POS_FIRST, 274 BASE_POS_LAST_SINHALA, 275 BASE_POS_LAST 276 }; 277 enum reph_position_t { 278 REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, 279 REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, 280 REPH_POS_AFTER_SUB = POS_AFTER_SUB, 281 REPH_POS_BEFORE_POST = POS_BEFORE_POST, 282 REPH_POS_AFTER_POST = POS_AFTER_POST, 283 REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH 284 }; 285 enum reph_mode_t { 286 REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ 287 REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ 288 REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */ 289 REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ 290 }; 291 enum blwf_mode_t { 292 BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */ 293 BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */ 294 }; 295 struct indic_config_t 296 { 297 hb_script_t script; 298 bool has_old_spec; 299 hb_codepoint_t virama; 300 base_position_t base_pos; 301 reph_position_t reph_pos; 302 reph_mode_t reph_mode; 303 blwf_mode_t blwf_mode; 304 }; 305 306 static const indic_config_t indic_configs[] = 307 { 308 /* Default. Should be first. */ 309 {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 310 {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 311 {HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 312 {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 313 {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 314 {HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 315 {HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, 316 {HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY}, 317 {HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY}, 318 {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST}, 319 {HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA, 320 REPH_POS_AFTER_MAIN, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST}, 321 {HB_SCRIPT_KHMER, false,0x17D2u,BASE_POS_FIRST,REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST}, 322 }; 323 324 325 326 /* 327 * Indic shaper. 328 */ 329 330 struct feature_list_t { 331 hb_tag_t tag; 332 hb_ot_map_feature_flags_t flags; 333 }; 334 335 static const feature_list_t 336 indic_features[] = 337 { 338 /* 339 * Basic features. 340 * These features are applied in order, one at a time, after initial_reordering. 341 */ 342 {HB_TAG('n','u','k','t'), F_GLOBAL}, 343 {HB_TAG('a','k','h','n'), F_GLOBAL}, 344 {HB_TAG('r','p','h','f'), F_NONE}, 345 {HB_TAG('r','k','r','f'), F_GLOBAL}, 346 {HB_TAG('p','r','e','f'), F_NONE}, 347 {HB_TAG('b','l','w','f'), F_NONE}, 348 {HB_TAG('a','b','v','f'), F_NONE}, 349 {HB_TAG('h','a','l','f'), F_NONE}, 350 {HB_TAG('p','s','t','f'), F_NONE}, 351 {HB_TAG('v','a','t','u'), F_GLOBAL}, 352 {HB_TAG('c','j','c','t'), F_GLOBAL}, 353 {HB_TAG('c','f','a','r'), F_NONE}, 354 /* 355 * Other features. 356 * These features are applied all at once, after final_reordering. 357 * Default Bengali font in Windows for example has intermixed 358 * lookups for init,pres,abvs,blws features. 359 */ 360 {HB_TAG('i','n','i','t'), F_NONE}, 361 {HB_TAG('p','r','e','s'), F_GLOBAL}, 362 {HB_TAG('a','b','v','s'), F_GLOBAL}, 363 {HB_TAG('b','l','w','s'), F_GLOBAL}, 364 {HB_TAG('p','s','t','s'), F_GLOBAL}, 365 {HB_TAG('h','a','l','n'), F_GLOBAL}, 366 /* Positioning features, though we don't care about the types. */ 367 {HB_TAG('d','i','s','t'), F_GLOBAL}, 368 {HB_TAG('a','b','v','m'), F_GLOBAL}, 369 {HB_TAG('b','l','w','m'), F_GLOBAL}, 370 }; 371 372 /* 373 * Must be in the same order as the indic_features array. 374 */ 375 enum { 376 _NUKT, 377 _AKHN, 378 RPHF, 379 _RKRF, 380 PREF, 381 BLWF, 382 ABVF, 383 HALF, 384 PSTF, 385 _VATU, 386 _CJCT, 387 CFAR, 388 389 INIT, 390 _PRES, 391 _ABVS, 392 _BLWS, 393 _PSTS, 394 _HALN, 395 _DIST, 396 _ABVM, 397 _BLWM, 398 399 INDIC_NUM_FEATURES, 400 INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */ 401 }; 402 403 static void 404 setup_syllables (const hb_ot_shape_plan_t *plan, 405 hb_font_t *font, 406 hb_buffer_t *buffer); 407 static void 408 initial_reordering (const hb_ot_shape_plan_t *plan, 409 hb_font_t *font, 410 hb_buffer_t *buffer); 411 static void 412 final_reordering (const hb_ot_shape_plan_t *plan, 413 hb_font_t *font, 414 hb_buffer_t *buffer); 415 static void 416 clear_syllables (const hb_ot_shape_plan_t *plan, 417 hb_font_t *font, 418 hb_buffer_t *buffer); 419 420 static void 421 collect_features_indic (hb_ot_shape_planner_t *plan) 422 { 423 hb_ot_map_builder_t *map = &plan->map; 424 425 /* Do this before any lookups have been applied. */ 426 map->add_gsub_pause (setup_syllables); 427 428 map->add_global_bool_feature (HB_TAG('l','o','c','l')); 429 /* The Indic specs do not require ccmp, but we apply it here since if 430 * there is a use of it, it's typically at the beginning. */ 431 map->add_global_bool_feature (HB_TAG('c','c','m','p')); 432 433 434 unsigned int i = 0; 435 map->add_gsub_pause (initial_reordering); 436 for (; i < INDIC_BASIC_FEATURES; i++) { 437 map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); 438 map->add_gsub_pause (nullptr); 439 } 440 map->add_gsub_pause (final_reordering); 441 for (; i < INDIC_NUM_FEATURES; i++) { 442 map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); 443 } 444 445 map->add_global_bool_feature (HB_TAG('c','a','l','t')); 446 map->add_global_bool_feature (HB_TAG('c','l','i','g')); 447 448 map->add_gsub_pause (clear_syllables); 449 } 450 451 static void 452 override_features_indic (hb_ot_shape_planner_t *plan) 453 { 454 /* Uniscribe does not apply 'kern' in Khmer. */ 455 if (hb_options ().uniscribe_bug_compatible) 456 { 457 switch ((hb_tag_t) plan->props.script) 458 { 459 case HB_SCRIPT_KHMER: 460 plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL); 461 break; 462 } 463 } 464 465 plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL); 466 } 467 468 469 struct would_substitute_feature_t 470 { 471 inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) 472 { 473 zero_context = zero_context_; 474 map->get_stage_lookups (0/*GSUB*/, 475 map->get_feature_stage (0/*GSUB*/, feature_tag), 476 &lookups, &count); 477 } 478 479 inline bool would_substitute (const hb_codepoint_t *glyphs, 480 unsigned int glyphs_count, 481 hb_face_t *face) const 482 { 483 for (unsigned int i = 0; i < count; i++) 484 if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context)) 485 return true; 486 return false; 487 } 488 489 private: 490 const hb_ot_map_t::lookup_map_t *lookups; 491 unsigned int count; 492 bool zero_context; 493 }; 494 495 struct indic_shape_plan_t 496 { 497 ASSERT_POD (); 498 499 inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const 500 { 501 hb_codepoint_t glyph = virama_glyph; 502 if (unlikely (virama_glyph == (hb_codepoint_t) -1)) 503 { 504 if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph)) 505 glyph = 0; 506 /* Technically speaking, the spec says we should apply 'locl' to virama too. 507 * Maybe one day... */ 508 509 /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph 510 * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */ 511 virama_glyph = glyph; 512 } 513 514 *pglyph = glyph; 515 return glyph != 0; 516 } 517 518 const indic_config_t *config; 519 520 bool is_old_spec; 521 mutable hb_codepoint_t virama_glyph; 522 523 would_substitute_feature_t rphf; 524 would_substitute_feature_t pref; 525 would_substitute_feature_t blwf; 526 would_substitute_feature_t pstf; 527 528 hb_mask_t mask_array[INDIC_NUM_FEATURES]; 529 }; 530 531 static void * 532 data_create_indic (const hb_ot_shape_plan_t *plan) 533 { 534 indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (indic_shape_plan_t)); 535 if (unlikely (!indic_plan)) 536 return nullptr; 537 538 indic_plan->config = &indic_configs[0]; 539 for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) 540 if (plan->props.script == indic_configs[i].script) { 541 indic_plan->config = &indic_configs[i]; 542 break; 543 } 544 545 indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2'); 546 indic_plan->virama_glyph = (hb_codepoint_t) -1; 547 548 /* Use zero-context would_substitute() matching for new-spec of the main 549 * Indic scripts, and scripts with one spec only, but not for old-specs. 550 * The new-spec for all dual-spec scripts says zero-context matching happens. 551 * 552 * However, testing with Malayalam shows that old and new spec both allow 553 * context. Testing with Bengali new-spec however shows that it doesn't. 554 * So, the heuristic here is the way it is. It should *only* be changed, 555 * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. 556 */ 557 bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM; 558 indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); 559 indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); 560 indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); 561 indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); 562 563 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) 564 indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? 565 0 : plan->map.get_1_mask (indic_features[i].tag); 566 567 return indic_plan; 568 } 569 570 static void 571 data_destroy_indic (void *data) 572 { 573 free (data); 574 } 575 576 static indic_position_t 577 consonant_position_from_face (const indic_shape_plan_t *indic_plan, 578 const hb_codepoint_t consonant, 579 const hb_codepoint_t virama, 580 hb_face_t *face) 581 { 582 /* For old-spec, the order of glyphs is Consonant,Virama, 583 * whereas for new-spec, it's Virama,Consonant. However, 584 * some broken fonts (like Free Sans) simply copied lookups 585 * from old-spec to new-spec without modification. 586 * And oddly enough, Uniscribe seems to respect those lookups. 587 * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds 588 * base at 0. The font however, only has lookups matching 589 * 930,94D in 'blwf', not the expected 94D,930 (with new-spec 590 * table). As such, we simply match both sequences. Seems 591 * to work. */ 592 hb_codepoint_t glyphs[3] = {virama, consonant, virama}; 593 if (indic_plan->blwf.would_substitute (glyphs , 2, face) || 594 indic_plan->blwf.would_substitute (glyphs+1, 2, face)) 595 return POS_BELOW_C; 596 if (indic_plan->pstf.would_substitute (glyphs , 2, face) || 597 indic_plan->pstf.would_substitute (glyphs+1, 2, face)) 598 return POS_POST_C; 599 if (indic_plan->pref.would_substitute (glyphs , 2, face) || 600 indic_plan->pref.would_substitute (glyphs+1, 2, face)) 601 return POS_POST_C; 602 return POS_BASE_C; 603 } 604 605 606 enum syllable_type_t { 607 consonant_syllable, 608 vowel_syllable, 609 standalone_cluster, 610 symbol_cluster, 611 broken_cluster, 612 non_indic_cluster, 613 }; 614 615 #include "hb-ot-shape-complex-indic-machine.hh" 616 617 618 static void 619 setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, 620 hb_buffer_t *buffer, 621 hb_font_t *font HB_UNUSED) 622 { 623 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); 624 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); 625 626 /* We cannot setup masks here. We save information about characters 627 * and setup masks later on in a pause-callback. */ 628 629 unsigned int count = buffer->len; 630 hb_glyph_info_t *info = buffer->info; 631 for (unsigned int i = 0; i < count; i++) 632 set_indic_properties (info[i]); 633 } 634 635 static void 636 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, 637 hb_font_t *font HB_UNUSED, 638 hb_buffer_t *buffer) 639 { 640 find_syllables (buffer); 641 foreach_syllable (buffer, start, end) 642 buffer->unsafe_to_break (start, end); 643 } 644 645 static int 646 compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) 647 { 648 int a = pa->indic_position(); 649 int b = pb->indic_position(); 650 651 return a < b ? -1 : a == b ? 0 : +1; 652 } 653 654 655 656 static void 657 update_consonant_positions (const hb_ot_shape_plan_t *plan, 658 hb_font_t *font, 659 hb_buffer_t *buffer) 660 { 661 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 662 663 if (indic_plan->config->base_pos != BASE_POS_LAST) 664 return; 665 666 hb_codepoint_t virama; 667 if (indic_plan->get_virama_glyph (font, &virama)) 668 { 669 hb_face_t *face = font->face; 670 unsigned int count = buffer->len; 671 hb_glyph_info_t *info = buffer->info; 672 for (unsigned int i = 0; i < count; i++) 673 if (info[i].indic_position() == POS_BASE_C) 674 { 675 hb_codepoint_t consonant = info[i].codepoint; 676 info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face); 677 } 678 } 679 } 680 681 682 /* Rules from: 683 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 684 685 static void 686 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, 687 hb_face_t *face, 688 hb_buffer_t *buffer, 689 unsigned int start, unsigned int end) 690 { 691 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 692 hb_glyph_info_t *info = buffer->info; 693 694 /* https://github.com/behdad/harfbuzz/issues/435#issuecomment-335560167 695 * // For compatibility with legacy usage in Kannada, 696 * // Ra+h+ZWJ must behave like Ra+ZWJ+h... 697 */ 698 if (buffer->props.script == HB_SCRIPT_KANNADA && 699 start + 3 <= end && 700 is_one_of (info[start ], FLAG (OT_Ra)) && 701 is_one_of (info[start+1], FLAG (OT_H)) && 702 is_one_of (info[start+2], FLAG (OT_ZWJ))) 703 { 704 buffer->merge_clusters (start+1, start+3); 705 hb_glyph_info_t tmp = info[start+1]; 706 info[start+1] = info[start+2]; 707 info[start+2] = tmp; 708 } 709 710 /* 1. Find base consonant: 711 * 712 * The shaping engine finds the base consonant of the syllable, using the 713 * following algorithm: starting from the end of the syllable, move backwards 714 * until a consonant is found that does not have a below-base or post-base 715 * form (post-base forms have to follow below-base forms), or that is not a 716 * pre-base reordering Ra, or arrive at the first consonant. The consonant 717 * stopped at will be the base. 718 * 719 * o If the syllable starts with Ra + Halant (in a script that has Reph) 720 * and has more than one consonant, Ra is excluded from candidates for 721 * base consonants. 722 */ 723 724 unsigned int base = end; 725 bool has_reph = false; 726 727 { 728 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 729 * and has more than one consonant, Ra is excluded from candidates for 730 * base consonants. */ 731 unsigned int limit = start; 732 if (indic_plan->config->reph_pos != REPH_POS_DONT_CARE && 733 indic_plan->mask_array[RPHF] && 734 start + 3 <= end && 735 ( 736 (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) || 737 (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ) 738 )) 739 { 740 /* See if it matches the 'rphf' feature. */ 741 hb_codepoint_t glyphs[3] = {info[start].codepoint, 742 info[start + 1].codepoint, 743 indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ? 744 info[start + 2].codepoint : 0}; 745 if (indic_plan->rphf.would_substitute (glyphs, 2, face) || 746 (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && 747 indic_plan->rphf.would_substitute (glyphs, 3, face))) 748 { 749 limit += 2; 750 while (limit < end && is_joiner (info[limit])) 751 limit++; 752 base = start; 753 has_reph = true; 754 } 755 } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha) 756 { 757 limit += 1; 758 while (limit < end && is_joiner (info[limit])) 759 limit++; 760 base = start; 761 has_reph = true; 762 } 763 764 switch (indic_plan->config->base_pos) 765 { 766 case BASE_POS_LAST: 767 { 768 /* -> starting from the end of the syllable, move backwards */ 769 unsigned int i = end; 770 bool seen_below = false; 771 do { 772 i--; 773 /* -> until a consonant is found */ 774 if (is_consonant (info[i])) 775 { 776 /* -> that does not have a below-base or post-base form 777 * (post-base forms have to follow below-base forms), */ 778 if (info[i].indic_position() != POS_BELOW_C && 779 (info[i].indic_position() != POS_POST_C || seen_below)) 780 { 781 base = i; 782 break; 783 } 784 if (info[i].indic_position() == POS_BELOW_C) 785 seen_below = true; 786 787 /* -> or that is not a pre-base reordering Ra, 788 * 789 * IMPLEMENTATION NOTES: 790 * 791 * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped 792 * by the logic above already. 793 */ 794 795 /* -> or arrive at the first consonant. The consonant stopped at will 796 * be the base. */ 797 base = i; 798 } 799 else 800 { 801 /* A ZWJ after a Halant stops the base search, and requests an explicit 802 * half form. 803 * A ZWJ before a Halant, requests a subjoined form instead, and hence 804 * search continues. This is particularly important for Bengali 805 * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ 806 if (start < i && 807 info[i].indic_category() == OT_ZWJ && 808 info[i - 1].indic_category() == OT_H) 809 break; 810 } 811 } while (i > limit); 812 } 813 break; 814 815 case BASE_POS_LAST_SINHALA: 816 { 817 /* Sinhala base positioning is slightly different from main Indic, in that: 818 * 1. Its ZWJ behavior is different, 819 * 2. We don't need to look into the font for consonant positions. 820 */ 821 822 if (!has_reph) 823 base = limit; 824 825 /* Find the last base consonant that is not blocked by ZWJ. If there is 826 * a ZWJ right before a base consonant, that would request a subjoined form. */ 827 for (unsigned int i = limit; i < end; i++) 828 if (is_consonant (info[i])) 829 { 830 if (limit < i && info[i - 1].indic_category() == OT_ZWJ) 831 break; 832 else 833 base = i; 834 } 835 836 /* Mark all subsequent consonants as below. */ 837 for (unsigned int i = base + 1; i < end; i++) 838 if (is_consonant (info[i])) 839 info[i].indic_position() = POS_BELOW_C; 840 } 841 break; 842 843 case BASE_POS_FIRST: 844 { 845 /* The first consonant is always the base. */ 846 847 assert (indic_plan->config->reph_mode == REPH_MODE_VIS_REPHA); 848 assert (!has_reph); 849 850 base = start; 851 852 /* Mark all subsequent consonants as below. */ 853 for (unsigned int i = base + 1; i < end; i++) 854 if (is_consonant (info[i])) 855 info[i].indic_position() = POS_BELOW_C; 856 } 857 break; 858 } 859 860 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 861 * and has more than one consonant, Ra is excluded from candidates for 862 * base consonants. 863 * 864 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ 865 if (has_reph && base == start && limit - base <= 2) { 866 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 867 has_reph = false; 868 } 869 } 870 871 872 /* 2. Decompose and reorder Matras: 873 * 874 * Each matra and any syllable modifier sign in the syllable are moved to the 875 * appropriate position relative to the consonant(s) in the syllable. The 876 * shaping engine decomposes two- or three-part matras into their constituent 877 * parts before any repositioning. Matra characters are classified by which 878 * consonant in a conjunct they have affinity for and are reordered to the 879 * following positions: 880 * 881 * o Before first half form in the syllable 882 * o After subjoined consonants 883 * o After post-form consonant 884 * o After main consonant (for above marks) 885 * 886 * IMPLEMENTATION NOTES: 887 * 888 * The normalize() routine has already decomposed matras for us, so we don't 889 * need to worry about that. 890 */ 891 892 893 /* 3. Reorder marks to canonical order: 894 * 895 * Adjacent nukta and halant or nukta and vedic sign are always repositioned 896 * if necessary, so that the nukta is first. 897 * 898 * IMPLEMENTATION NOTES: 899 * 900 * We don't need to do this: the normalize() routine already did this for us. 901 */ 902 903 904 /* Reorder characters */ 905 906 for (unsigned int i = start; i < base; i++) 907 info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_position()); 908 909 if (base < end) 910 info[base].indic_position() = POS_BASE_C; 911 912 /* Mark final consonants. A final consonant is one appearing after a matra, 913 * like in Khmer. */ 914 for (unsigned int i = base + 1; i < end; i++) 915 if (info[i].indic_category() == OT_M) { 916 for (unsigned int j = i + 1; j < end; j++) 917 if (is_consonant (info[j])) { 918 info[j].indic_position() = POS_FINAL_C; 919 break; 920 } 921 break; 922 } 923 924 /* Handle beginning Ra */ 925 if (has_reph) 926 info[start].indic_position() = POS_RA_TO_BECOME_REPH; 927 928 /* For old-style Indic script tags, move the first post-base Halant after 929 * last consonant. 930 * 931 * Reports suggest that in some scripts Uniscribe does this only if there 932 * is *not* a Halant after last consonant already (eg. Kannada), while it 933 * does it unconditionally in other scripts (eg. Malayalam). We don't 934 * currently know about other scripts, so we single out Malayalam for now. 935 * 936 * Kannada test case: 937 * U+0C9A,U+0CCD,U+0C9A,U+0CCD 938 * With some versions of Lohit Kannada. 939 * https://bugs.freedesktop.org/show_bug.cgi?id=59118 940 * 941 * Malayalam test case: 942 * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D 943 * With lohit-ttf-20121122/Lohit-Malayalam.ttf 944 */ 945 if (indic_plan->is_old_spec) 946 { 947 bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM; 948 for (unsigned int i = base + 1; i < end; i++) 949 if (info[i].indic_category() == OT_H) 950 { 951 unsigned int j; 952 for (j = end - 1; j > i; j--) 953 if (is_consonant (info[j]) || 954 (disallow_double_halants && info[j].indic_category() == OT_H)) 955 break; 956 if (info[j].indic_category() != OT_H && j > i) { 957 /* Move Halant to after last consonant. */ 958 hb_glyph_info_t t = info[i]; 959 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); 960 info[j] = t; 961 } 962 break; 963 } 964 } 965 966 /* Attach misc marks to previous char to move with them. */ 967 { 968 indic_position_t last_pos = POS_START; 969 for (unsigned int i = start; i < end; i++) 970 { 971 if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS))) 972 { 973 info[i].indic_position() = last_pos; 974 if (unlikely (info[i].indic_category() == OT_H && 975 info[i].indic_position() == POS_PRE_M)) 976 { 977 /* 978 * Uniscribe doesn't move the Halant with Left Matra. 979 * TEST: U+092B,U+093F,U+094DE 980 * We follow. This is important for the Sinhala 981 * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA 982 * where U+0DD9 is a left matra and U+0DCA is the virama. 983 * We don't want to move the virama with the left matra. 984 * TEST: U+0D9A,U+0DDA 985 */ 986 for (unsigned int j = i; j > start; j--) 987 if (info[j - 1].indic_position() != POS_PRE_M) { 988 info[i].indic_position() = info[j - 1].indic_position(); 989 break; 990 } 991 } 992 } else if (info[i].indic_position() != POS_SMVD) { 993 last_pos = (indic_position_t) info[i].indic_position(); 994 } 995 } 996 } 997 /* For post-base consonants let them own anything before them 998 * since the last consonant or matra. */ 999 { 1000 unsigned int last = base; 1001 for (unsigned int i = base + 1; i < end; i++) 1002 if (is_consonant (info[i])) 1003 { 1004 for (unsigned int j = last + 1; j < i; j++) 1005 if (info[j].indic_position() < POS_SMVD) 1006 info[j].indic_position() = info[i].indic_position(); 1007 last = i; 1008 } else if (info[i].indic_category() == OT_M) 1009 last = i; 1010 } 1011 1012 1013 { 1014 /* Use syllable() for sort accounting temporarily. */ 1015 unsigned int syllable = info[start].syllable(); 1016 for (unsigned int i = start; i < end; i++) 1017 info[i].syllable() = i - start; 1018 1019 /* Sit tight, rock 'n roll! */ 1020 hb_stable_sort (info + start, end - start, compare_indic_order); 1021 /* Find base again */ 1022 base = end; 1023 for (unsigned int i = start; i < end; i++) 1024 if (info[i].indic_position() == POS_BASE_C) 1025 { 1026 base = i; 1027 break; 1028 } 1029 /* Things are out-of-control for post base positions, they may shuffle 1030 * around like crazy. In old-spec mode, we move halants around, so in 1031 * that case merge all clusters after base. Otherwise, check the sort 1032 * order and merge as needed. 1033 * For pre-base stuff, we handle cluster issues in final reordering. 1034 * 1035 * We could use buffer->sort() for this, if there was no special 1036 * reordering of pre-base stuff happening later... 1037 */ 1038 if (indic_plan->is_old_spec || end - base > 127) 1039 buffer->merge_clusters (base, end); 1040 else 1041 { 1042 /* Note! syllable() is a one-byte field. */ 1043 for (unsigned int i = base; i < end; i++) 1044 if (info[i].syllable() != 255) 1045 { 1046 unsigned int max = i; 1047 unsigned int j = start + info[i].syllable(); 1048 while (j != i) 1049 { 1050 max = MAX (max, j); 1051 unsigned int next = start + info[j].syllable(); 1052 info[j].syllable() = 255; /* So we don't process j later again. */ 1053 j = next; 1054 } 1055 if (i != max) 1056 buffer->merge_clusters (i, max + 1); 1057 } 1058 } 1059 1060 /* Put syllable back in. */ 1061 for (unsigned int i = start; i < end; i++) 1062 info[i].syllable() = syllable; 1063 } 1064 1065 /* Setup masks now */ 1066 1067 { 1068 hb_mask_t mask; 1069 1070 /* Reph */ 1071 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) 1072 info[i].mask |= indic_plan->mask_array[RPHF]; 1073 1074 /* Pre-base */ 1075 mask = indic_plan->mask_array[HALF]; 1076 if (!indic_plan->is_old_spec && 1077 indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST) 1078 mask |= indic_plan->mask_array[BLWF]; 1079 for (unsigned int i = start; i < base; i++) 1080 info[i].mask |= mask; 1081 /* Base */ 1082 mask = 0; 1083 if (base < end) 1084 info[base].mask |= mask; 1085 /* Post-base */ 1086 mask = indic_plan->mask_array[BLWF] | indic_plan->mask_array[ABVF] | indic_plan->mask_array[PSTF]; 1087 for (unsigned int i = base + 1; i < end; i++) 1088 info[i].mask |= mask; 1089 } 1090 1091 if (indic_plan->is_old_spec && 1092 buffer->props.script == HB_SCRIPT_DEVANAGARI) 1093 { 1094 /* Old-spec eye-lash Ra needs special handling. From the 1095 * spec: 1096 * 1097 * "The feature 'below-base form' is applied to consonants 1098 * having below-base forms and following the base consonant. 1099 * The exception is vattu, which may appear below half forms 1100 * as well as below the base glyph. The feature 'below-base 1101 * form' will be applied to all such occurrences of Ra as well." 1102 * 1103 * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 1104 * with Sanskrit 2003 font. 1105 * 1106 * However, note that Ra,Halant,ZWJ is the correct way to 1107 * request eyelash form of Ra, so we wouldbn't inhibit it 1108 * in that sequence. 1109 * 1110 * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 1111 */ 1112 for (unsigned int i = start; i + 1 < base; i++) 1113 if (info[i ].indic_category() == OT_Ra && 1114 info[i+1].indic_category() == OT_H && 1115 (i + 2 == base || 1116 info[i+2].indic_category() != OT_ZWJ)) 1117 { 1118 info[i ].mask |= indic_plan->mask_array[BLWF]; 1119 info[i+1].mask |= indic_plan->mask_array[BLWF]; 1120 } 1121 } 1122 1123 unsigned int pref_len = 2; 1124 if (indic_plan->mask_array[PREF] && base + pref_len < end) 1125 { 1126 /* Find a Halant,Ra sequence and mark it for pre-base reordering processing. */ 1127 for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) { 1128 hb_codepoint_t glyphs[2]; 1129 for (unsigned int j = 0; j < pref_len; j++) 1130 glyphs[j] = info[i + j].codepoint; 1131 if (indic_plan->pref.would_substitute (glyphs, pref_len, face)) 1132 { 1133 for (unsigned int j = 0; j < pref_len; j++) 1134 info[i++].mask |= indic_plan->mask_array[PREF]; 1135 1136 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. 1137 * Read the feature spec. 1138 * This allows distinguishing the following cases with MS Khmer fonts: 1139 * U+1784,U+17D2,U+179A,U+17D2,U+1782 1140 * U+1784,U+17D2,U+1782,U+17D2,U+179A 1141 */ 1142 if (indic_plan->mask_array[CFAR]) 1143 for (; i < end; i++) 1144 info[i].mask |= indic_plan->mask_array[CFAR]; 1145 1146 break; 1147 } 1148 } 1149 } 1150 1151 /* Apply ZWJ/ZWNJ effects */ 1152 for (unsigned int i = start + 1; i < end; i++) 1153 if (is_joiner (info[i])) { 1154 bool non_joiner = info[i].indic_category() == OT_ZWNJ; 1155 unsigned int j = i; 1156 1157 do { 1158 j--; 1159 1160 /* ZWJ/ZWNJ should disable CJCT. They do that by simply 1161 * being there, since we don't skip them for the CJCT 1162 * feature (ie. F_MANUAL_ZWJ) */ 1163 1164 /* A ZWNJ disables HALF. */ 1165 if (non_joiner) 1166 info[j].mask &= ~indic_plan->mask_array[HALF]; 1167 1168 } while (j > start && !is_consonant (info[j])); 1169 } 1170 } 1171 1172 static void 1173 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, 1174 hb_face_t *face, 1175 hb_buffer_t *buffer, 1176 unsigned int start, unsigned int end) 1177 { 1178 /* We treat placeholder/dotted-circle as if they are consonants, so we 1179 * should just chain. Only if not in compatibility mode that is... */ 1180 1181 if (hb_options ().uniscribe_bug_compatible) 1182 { 1183 /* For dotted-circle, this is what Uniscribe does: 1184 * If dotted-circle is the last glyph, it just does nothing. 1185 * Ie. It doesn't form Reph. */ 1186 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) 1187 return; 1188 } 1189 1190 initial_reordering_consonant_syllable (plan, face, buffer, start, end); 1191 } 1192 1193 static void 1194 initial_reordering_syllable (const hb_ot_shape_plan_t *plan, 1195 hb_face_t *face, 1196 hb_buffer_t *buffer, 1197 unsigned int start, unsigned int end) 1198 { 1199 syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); 1200 switch (syllable_type) 1201 { 1202 case vowel_syllable: /* We made the vowels look like consonants. So let's call the consonant logic! */ 1203 case consonant_syllable: 1204 initial_reordering_consonant_syllable (plan, face, buffer, start, end); 1205 break; 1206 1207 case broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */ 1208 case standalone_cluster: 1209 initial_reordering_standalone_cluster (plan, face, buffer, start, end); 1210 break; 1211 1212 case symbol_cluster: 1213 case non_indic_cluster: 1214 break; 1215 } 1216 } 1217 1218 static inline void 1219 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, 1220 hb_font_t *font, 1221 hb_buffer_t *buffer) 1222 { 1223 /* Note: This loop is extra overhead, but should not be measurable. */ 1224 bool has_broken_syllables = false; 1225 unsigned int count = buffer->len; 1226 hb_glyph_info_t *info = buffer->info; 1227 for (unsigned int i = 0; i < count; i++) 1228 if ((info[i].syllable() & 0x0F) == broken_cluster) 1229 { 1230 has_broken_syllables = true; 1231 break; 1232 } 1233 if (likely (!has_broken_syllables)) 1234 return; 1235 1236 1237 hb_codepoint_t dottedcircle_glyph; 1238 if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph)) 1239 return; 1240 1241 hb_glyph_info_t dottedcircle = {0}; 1242 dottedcircle.codepoint = 0x25CCu; 1243 set_indic_properties (dottedcircle); 1244 dottedcircle.codepoint = dottedcircle_glyph; 1245 1246 buffer->clear_output (); 1247 1248 buffer->idx = 0; 1249 unsigned int last_syllable = 0; 1250 while (buffer->idx < buffer->len && !buffer->in_error) 1251 { 1252 unsigned int syllable = buffer->cur().syllable(); 1253 syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); 1254 if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) 1255 { 1256 last_syllable = syllable; 1257 1258 hb_glyph_info_t ginfo = dottedcircle; 1259 ginfo.cluster = buffer->cur().cluster; 1260 ginfo.mask = buffer->cur().mask; 1261 ginfo.syllable() = buffer->cur().syllable(); 1262 /* TODO Set glyph_props? */ 1263 1264 /* Insert dottedcircle after possible Repha. */ 1265 while (buffer->idx < buffer->len && !buffer->in_error && 1266 last_syllable == buffer->cur().syllable() && 1267 buffer->cur().indic_category() == OT_Repha) 1268 buffer->next_glyph (); 1269 1270 buffer->output_info (ginfo); 1271 } 1272 else 1273 buffer->next_glyph (); 1274 } 1275 1276 buffer->swap_buffers (); 1277 } 1278 1279 static void 1280 initial_reordering (const hb_ot_shape_plan_t *plan, 1281 hb_font_t *font, 1282 hb_buffer_t *buffer) 1283 { 1284 update_consonant_positions (plan, font, buffer); 1285 insert_dotted_circles (plan, font, buffer); 1286 1287 foreach_syllable (buffer, start, end) 1288 initial_reordering_syllable (plan, font->face, buffer, start, end); 1289 } 1290 1291 static void 1292 final_reordering_syllable (const hb_ot_shape_plan_t *plan, 1293 hb_buffer_t *buffer, 1294 unsigned int start, unsigned int end) 1295 { 1296 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 1297 hb_glyph_info_t *info = buffer->info; 1298 1299 1300 /* This function relies heavily on halant glyphs. Lots of ligation 1301 * and possibly multiple substitutions happened prior to this 1302 * phase, and that might have messed up our properties. Recover 1303 * from a particular case of that where we're fairly sure that a 1304 * class of OT_H is desired but has been lost. */ 1305 if (indic_plan->virama_glyph) 1306 { 1307 unsigned int virama_glyph = indic_plan->virama_glyph; 1308 for (unsigned int i = start; i < end; i++) 1309 if (info[i].codepoint == virama_glyph && 1310 _hb_glyph_info_ligated (&info[i]) && 1311 _hb_glyph_info_multiplied (&info[i])) 1312 { 1313 /* This will make sure that this glyph passes is_halant_or_coeng() test. */ 1314 info[i].indic_category() = OT_H; 1315 _hb_glyph_info_clear_ligated_and_multiplied (&info[i]); 1316 } 1317 } 1318 1319 1320 /* 4. Final reordering: 1321 * 1322 * After the localized forms and basic shaping forms GSUB features have been 1323 * applied (see below), the shaping engine performs some final glyph 1324 * reordering before applying all the remaining font features to the entire 1325 * syllable. 1326 */ 1327 1328 bool try_pref = !!indic_plan->mask_array[PREF]; 1329 1330 /* Find base again */ 1331 unsigned int base; 1332 for (base = start; base < end; base++) 1333 if (info[base].indic_position() >= POS_BASE_C) 1334 { 1335 if (try_pref && base + 1 < end) 1336 { 1337 for (unsigned int i = base + 1; i < end; i++) 1338 if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) 1339 { 1340 if (!(_hb_glyph_info_substituted (&info[i]) && 1341 _hb_glyph_info_ligated_and_didnt_multiply (&info[i]))) 1342 { 1343 /* Ok, this was a 'pref' candidate but didn't form any. 1344 * Base is around here... */ 1345 base = i; 1346 while (base < end && is_halant_or_coeng (info[base])) 1347 base++; 1348 info[base].indic_position() = POS_BASE_C; 1349 1350 try_pref = false; 1351 } 1352 break; 1353 } 1354 } 1355 /* For Malayalam, skip over unformed below- (but NOT post-) forms. */ 1356 if (buffer->props.script == HB_SCRIPT_MALAYALAM) 1357 { 1358 for (unsigned int i = base + 1; i < end; i++) 1359 { 1360 while (i < end && is_joiner (info[i])) 1361 i++; 1362 if (i == end || !is_halant_or_coeng (info[i])) 1363 break; 1364 i++; /* Skip halant. */ 1365 while (i < end && is_joiner (info[i])) 1366 i++; 1367 if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C) 1368 { 1369 base = i; 1370 info[base].indic_position() = POS_BASE_C; 1371 } 1372 } 1373 } 1374 1375 if (start < base && info[base].indic_position() > POS_BASE_C) 1376 base--; 1377 break; 1378 } 1379 if (base == end && start < base && 1380 is_one_of (info[base - 1], FLAG (OT_ZWJ))) 1381 base--; 1382 if (base < end) 1383 while (start < base && 1384 is_one_of (info[base], (FLAG (OT_N) | HALANT_OR_COENG_FLAGS))) 1385 base--; 1386 1387 1388 /* o Reorder matras: 1389 * 1390 * If a pre-base matra character had been reordered before applying basic 1391 * features, the glyph can be moved closer to the main consonant based on 1392 * whether half-forms had been formed. Actual position for the matra is 1393 * defined as “after last standalone halant glyph, after initial matra 1394 * position and before the main consonant”. If ZWJ or ZWNJ follow this 1395 * halant, position is moved after it. 1396 */ 1397 1398 if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ 1399 { 1400 /* If we lost track of base, alas, position before last thingy. */ 1401 unsigned int new_pos = base == end ? base - 2 : base - 1; 1402 1403 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1404 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1405 * We want to position matra after them. 1406 */ 1407 if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1408 { 1409 while (new_pos > start && 1410 !(is_one_of (info[new_pos], (FLAG (OT_M) | HALANT_OR_COENG_FLAGS)))) 1411 new_pos--; 1412 1413 /* If we found no Halant we are done. 1414 * Otherwise only proceed if the Halant does 1415 * not belong to the Matra itself! */ 1416 if (is_halant_or_coeng (info[new_pos]) && 1417 info[new_pos].indic_position() != POS_PRE_M) 1418 { 1419 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1420 if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) 1421 new_pos++; 1422 } 1423 else 1424 new_pos = start; /* No move. */ 1425 } 1426 1427 if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) 1428 { 1429 /* Now go see if there's actually any matras... */ 1430 for (unsigned int i = new_pos; i > start; i--) 1431 if (info[i - 1].indic_position () == POS_PRE_M) 1432 { 1433 unsigned int old_pos = i - 1; 1434 if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */ 1435 base--; 1436 1437 hb_glyph_info_t tmp = info[old_pos]; 1438 memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); 1439 info[new_pos] = tmp; 1440 1441 /* Note: this merge_clusters() is intentionally *after* the reordering. 1442 * Indic matra reordering is special and tricky... */ 1443 buffer->merge_clusters (new_pos, MIN (end, base + 1)); 1444 1445 new_pos--; 1446 } 1447 } else { 1448 for (unsigned int i = start; i < base; i++) 1449 if (info[i].indic_position () == POS_PRE_M) { 1450 buffer->merge_clusters (i, MIN (end, base + 1)); 1451 break; 1452 } 1453 } 1454 } 1455 1456 1457 /* o Reorder reph: 1458 * 1459 * Reph’s original position is always at the beginning of the syllable, 1460 * (i.e. it is not reordered at the character reordering stage). However, 1461 * it will be reordered according to the basic-forms shaping results. 1462 * Possible positions for reph, depending on the script, are; after main, 1463 * before post-base consonant forms, and after post-base consonant forms. 1464 */ 1465 1466 /* Two cases: 1467 * 1468 * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then 1469 * we should only move it if the sequence ligated to the repha form. 1470 * 1471 * - If repha is encoded separately and in the logical position, we should only 1472 * move it if it did NOT ligate. If it ligated, it's probably the font trying 1473 * to make it work without the reordering. 1474 */ 1475 if (start + 1 < end && 1476 info[start].indic_position() == POS_RA_TO_BECOME_REPH && 1477 ((info[start].indic_category() == OT_Repha) ^ 1478 _hb_glyph_info_ligated_and_didnt_multiply (&info[start]))) 1479 { 1480 unsigned int new_reph_pos; 1481 reph_position_t reph_pos = indic_plan->config->reph_pos; 1482 1483 assert (reph_pos != REPH_POS_DONT_CARE); 1484 1485 /* 1. If reph should be positioned after post-base consonant forms, 1486 * proceed to step 5. 1487 */ 1488 if (reph_pos == REPH_POS_AFTER_POST) 1489 { 1490 goto reph_step_5; 1491 } 1492 1493 /* 2. If the reph repositioning class is not after post-base: target 1494 * position is after the first explicit halant glyph between the 1495 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 1496 * are following this halant, position is moved after it. If such 1497 * position is found, this is the target position. Otherwise, 1498 * proceed to the next step. 1499 * 1500 * Note: in old-implementation fonts, where classifications were 1501 * fixed in shaping engine, there was no case where reph position 1502 * will be found on this step. 1503 */ 1504 { 1505 new_reph_pos = start + 1; 1506 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 1507 new_reph_pos++; 1508 1509 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) 1510 { 1511 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1512 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1513 new_reph_pos++; 1514 goto reph_move; 1515 } 1516 } 1517 1518 /* 3. If reph should be repositioned after the main consonant: find the 1519 * first consonant not ligated with main, or find the first 1520 * consonant that is not a potential pre-base reordering Ra. 1521 */ 1522 if (reph_pos == REPH_POS_AFTER_MAIN) 1523 { 1524 new_reph_pos = base; 1525 while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) 1526 new_reph_pos++; 1527 if (new_reph_pos < end) 1528 goto reph_move; 1529 } 1530 1531 /* 4. If reph should be positioned before post-base consonant, find 1532 * first post-base classified consonant not ligated with main. If no 1533 * consonant is found, the target position should be before the 1534 * first matra, syllable modifier sign or vedic sign. 1535 */ 1536 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 1537 if (reph_pos == REPH_POS_AFTER_SUB) 1538 { 1539 new_reph_pos = base; 1540 while (new_reph_pos + 1 < end && 1541 !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) 1542 new_reph_pos++; 1543 if (new_reph_pos < end) 1544 goto reph_move; 1545 } 1546 1547 /* 5. If no consonant is found in steps 3 or 4, move reph to a position 1548 * immediately before the first post-base matra, syllable modifier 1549 * sign or vedic sign that has a reordering class after the intended 1550 * reph position. For example, if the reordering position for reph 1551 * is post-main, it will skip above-base matras that also have a 1552 * post-main position. 1553 */ 1554 reph_step_5: 1555 { 1556 /* Copied from step 2. */ 1557 new_reph_pos = start + 1; 1558 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 1559 new_reph_pos++; 1560 1561 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) 1562 { 1563 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1564 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1565 new_reph_pos++; 1566 goto reph_move; 1567 } 1568 } 1569 1570 /* 6. Otherwise, reorder reph to the end of the syllable. 1571 */ 1572 { 1573 new_reph_pos = end - 1; 1574 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) 1575 new_reph_pos--; 1576 1577 /* 1578 * If the Reph is to be ending up after a Matra,Halant sequence, 1579 * position it before that Halant so it can interact with the Matra. 1580 * However, if it's a plain Consonant,Halant we shouldn't do that. 1581 * Uniscribe doesn't do this. 1582 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 1583 */ 1584 if (!hb_options ().uniscribe_bug_compatible && 1585 unlikely (is_halant_or_coeng (info[new_reph_pos]))) { 1586 for (unsigned int i = base + 1; i < new_reph_pos; i++) 1587 if (info[i].indic_category() == OT_M) { 1588 /* Ok, got it. */ 1589 new_reph_pos--; 1590 } 1591 } 1592 goto reph_move; 1593 } 1594 1595 reph_move: 1596 { 1597 /* Move */ 1598 buffer->merge_clusters (start, new_reph_pos + 1); 1599 hb_glyph_info_t reph = info[start]; 1600 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); 1601 info[new_reph_pos] = reph; 1602 1603 if (start < base && base <= new_reph_pos) 1604 base--; 1605 } 1606 } 1607 1608 1609 /* o Reorder pre-base reordering consonants: 1610 * 1611 * If a pre-base reordering consonant is found, reorder it according to 1612 * the following rules: 1613 */ 1614 1615 if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */ 1616 { 1617 for (unsigned int i = base + 1; i < end; i++) 1618 if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) 1619 { 1620 /* 1. Only reorder a glyph produced by substitution during application 1621 * of the <pref> feature. (Note that a font may shape a Ra consonant with 1622 * the feature generally but block it in certain contexts.) 1623 */ 1624 /* Note: We just check that something got substituted. We don't check that 1625 * the <pref> feature actually did it... 1626 * 1627 * Reorder pref only if it ligated. */ 1628 if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i])) 1629 { 1630 /* 1631 * 2. Try to find a target position the same way as for pre-base matra. 1632 * If it is found, reorder pre-base consonant glyph. 1633 * 1634 * 3. If position is not found, reorder immediately before main 1635 * consonant. 1636 */ 1637 1638 unsigned int new_pos = base; 1639 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1640 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1641 * We want to position matra after them. 1642 */ 1643 if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1644 { 1645 while (new_pos > start && 1646 !(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS))) 1647 new_pos--; 1648 1649 /* In Khmer coeng model, a H,Ra can go *after* matras. If it goes after a 1650 * split matra, it should be reordered to *before* the left part of such matra. */ 1651 if (new_pos > start && info[new_pos - 1].indic_category() == OT_M) 1652 { 1653 unsigned int old_pos = i; 1654 for (unsigned int j = base + 1; j < old_pos; j++) 1655 if (info[j].indic_category() == OT_M) 1656 { 1657 new_pos--; 1658 break; 1659 } 1660 } 1661 } 1662 1663 if (new_pos > start && is_halant_or_coeng (info[new_pos - 1])) 1664 { 1665 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1666 if (new_pos < end && is_joiner (info[new_pos])) 1667 new_pos++; 1668 } 1669 1670 { 1671 unsigned int old_pos = i; 1672 1673 buffer->merge_clusters (new_pos, old_pos + 1); 1674 hb_glyph_info_t tmp = info[old_pos]; 1675 memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); 1676 info[new_pos] = tmp; 1677 1678 if (new_pos <= base && base < old_pos) 1679 base++; 1680 } 1681 } 1682 1683 break; 1684 } 1685 } 1686 1687 1688 /* Apply 'init' to the Left Matra if it's a word start. */ 1689 if (info[start].indic_position () == POS_PRE_M) 1690 { 1691 if (!start || 1692 !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) & 1693 FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) 1694 info[start].mask |= indic_plan->mask_array[INIT]; 1695 else 1696 buffer->unsafe_to_break (start - 1, start + 1); 1697 } 1698 1699 1700 /* 1701 * Finish off the clusters and go home! 1702 */ 1703 if (hb_options ().uniscribe_bug_compatible) 1704 { 1705 switch ((hb_tag_t) plan->props.script) 1706 { 1707 case HB_SCRIPT_TAMIL: 1708 case HB_SCRIPT_SINHALA: 1709 break; 1710 1711 default: 1712 /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala. 1713 * This means, half forms are submerged into the main consonant's cluster. 1714 * This is unnecessary, and makes cursor positioning harder, but that's what 1715 * Uniscribe does. */ 1716 buffer->merge_clusters (start, end); 1717 break; 1718 } 1719 } 1720 } 1721 1722 1723 static void 1724 final_reordering (const hb_ot_shape_plan_t *plan, 1725 hb_font_t *font HB_UNUSED, 1726 hb_buffer_t *buffer) 1727 { 1728 unsigned int count = buffer->len; 1729 if (unlikely (!count)) return; 1730 1731 foreach_syllable (buffer, start, end) 1732 final_reordering_syllable (plan, buffer, start, end); 1733 1734 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); 1735 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); 1736 } 1737 1738 1739 static void 1740 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, 1741 hb_font_t *font HB_UNUSED, 1742 hb_buffer_t *buffer) 1743 { 1744 hb_glyph_info_t *info = buffer->info; 1745 unsigned int count = buffer->len; 1746 for (unsigned int i = 0; i < count; i++) 1747 info[i].syllable() = 0; 1748 } 1749 1750 1751 static bool 1752 decompose_indic (const hb_ot_shape_normalize_context_t *c, 1753 hb_codepoint_t ab, 1754 hb_codepoint_t *a, 1755 hb_codepoint_t *b) 1756 { 1757 switch (ab) 1758 { 1759 /* Don't decompose these. */ 1760 case 0x0931u : return false; /* DEVANAGARI LETTER RRA */ 1761 case 0x0B94u : return false; /* TAMIL LETTER AU */ 1762 1763 1764 /* 1765 * Decompose split matras that don't have Unicode decompositions. 1766 */ 1767 1768 /* Khmer */ 1769 case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; 1770 case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; 1771 case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; 1772 case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; 1773 case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; 1774 1775 #if 0 1776 /* Gujarati */ 1777 /* This one has no decomposition in Unicode, but needs no decomposition either. */ 1778 /* case 0x0AC9u : return false; */ 1779 1780 /* Oriya */ 1781 case 0x0B57u : *a = no decomp, -> RIGHT; return true; 1782 #endif 1783 } 1784 1785 if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu))) 1786 { 1787 /* 1788 * Sinhala split matras... Let the fun begin. 1789 * 1790 * These four characters have Unicode decompositions. However, Uniscribe 1791 * decomposes them "Khmer-style", that is, it uses the character itself to 1792 * get the second half. The first half of all four decompositions is always 1793 * U+0DD9. 1794 * 1795 * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are 1796 * broken with Uniscribe. But we need to support them. As such, we only 1797 * do the Uniscribe-style decomposition if the character is transformed into 1798 * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to 1799 * Unicode decomposition. 1800 * 1801 * Note that we can't unconditionally use Unicode decomposition. That would 1802 * break some other fonts, that are designed to work with Uniscribe, and 1803 * don't have positioning features for the Unicode-style decomposition. 1804 * 1805 * Argh... 1806 * 1807 * The Uniscribe behavior is now documented in the newly published Sinhala 1808 * spec in 2012: 1809 * 1810 * http://www.microsoft.com/typography/OpenTypeDev/sinhala/intro.htm#shaping 1811 */ 1812 1813 const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; 1814 1815 hb_codepoint_t glyph; 1816 1817 if (hb_options ().uniscribe_bug_compatible || 1818 (c->font->get_nominal_glyph (ab, &glyph) && 1819 indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) 1820 { 1821 /* Ok, safe to use Uniscribe-style decomposition. */ 1822 *a = 0x0DD9u; 1823 *b = ab; 1824 return true; 1825 } 1826 } 1827 1828 return (bool) c->unicode->decompose (ab, a, b); 1829 } 1830 1831 static bool 1832 compose_indic (const hb_ot_shape_normalize_context_t *c, 1833 hb_codepoint_t a, 1834 hb_codepoint_t b, 1835 hb_codepoint_t *ab) 1836 { 1837 /* Avoid recomposing split matras. */ 1838 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) 1839 return false; 1840 1841 /* Composition-exclusion exceptions that we want to recompose. */ 1842 if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } 1843 1844 return (bool) c->unicode->compose (a, b, ab); 1845 } 1846 1847 1848 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = 1849 { 1850 collect_features_indic, 1851 override_features_indic, 1852 data_create_indic, 1853 data_destroy_indic, 1854 nullptr, /* preprocess_text */ 1855 nullptr, /* postprocess_glyphs */ 1856 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, 1857 decompose_indic, 1858 compose_indic, 1859 setup_masks_indic, 1860 nullptr, /* disable_otl */ 1861 nullptr, /* reorder_marks */ 1862 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 1863 false, /* fallback_position */ 1864 };