1 /* 2 * Copyright © 2015 Mozilla Foundation. 3 * Copyright © 2015 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Mozilla Author(s): Jonathan Kew 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29 #include "hb-ot-shape-complex-use.hh" 30 #include "hb-ot-shape-complex-arabic.hh" 31 #include "hb-ot-shape-complex-vowel-constraints.hh" 32 33 /* buffer var allocations */ 34 #define use_category() complex_var_u8_0() 35 36 37 /* 38 * Universal Shaping Engine. 39 * https://docs.microsoft.com/en-us/typography/script-development/use 40 */ 41 42 static const hb_tag_t 43 basic_features[] = 44 { 45 /* 46 * Basic features. 47 * These features are applied all at once, before reordering. 48 */ 49 HB_TAG('r','k','r','f'), 50 HB_TAG('a','b','v','f'), 51 HB_TAG('b','l','w','f'), 52 HB_TAG('h','a','l','f'), 53 HB_TAG('p','s','t','f'), 54 HB_TAG('v','a','t','u'), 55 HB_TAG('c','j','c','t'), 56 }; 57 static const hb_tag_t 58 arabic_features[] = 59 { 60 HB_TAG('i','s','o','l'), 61 HB_TAG('i','n','i','t'), 62 HB_TAG('m','e','d','i'), 63 HB_TAG('f','i','n','a'), 64 /* The spec doesn't specify these but we apply anyway, since our Arabic shaper 65 * does. These are only used in Syriac spec. */ 66 HB_TAG('m','e','d','2'), 67 HB_TAG('f','i','n','2'), 68 HB_TAG('f','i','n','3'), 69 }; 70 /* Same order as arabic_features. Don't need Syriac stuff.*/ 71 enum joining_form_t { 72 ISOL, 73 INIT, 74 MEDI, 75 FINA, 76 _NONE 77 }; 78 static const hb_tag_t 79 other_features[] = 80 { 81 /* 82 * Other features. 83 * These features are applied all at once, after reordering and 84 * clearing syllables. 85 */ 86 HB_TAG('a','b','v','s'), 87 HB_TAG('b','l','w','s'), 88 HB_TAG('h','a','l','n'), 89 HB_TAG('p','r','e','s'), 90 HB_TAG('p','s','t','s'), 91 }; 92 static const hb_tag_t 93 positioning_features[] = 94 { 95 /* 96 * Positioning features. 97 * We don't care about the types. 98 */ 99 HB_TAG('d','i','s','t'), 100 HB_TAG('a','b','v','m'), 101 HB_TAG('b','l','w','m'), 102 }; 103 104 static void 105 setup_syllables (const hb_ot_shape_plan_t *plan, 106 hb_font_t *font, 107 hb_buffer_t *buffer); 108 static void 109 clear_substitution_flags (const hb_ot_shape_plan_t *plan, 110 hb_font_t *font, 111 hb_buffer_t *buffer); 112 static void 113 record_rphf (const hb_ot_shape_plan_t *plan, 114 hb_font_t *font, 115 hb_buffer_t *buffer); 116 static void 117 record_pref (const hb_ot_shape_plan_t *plan, 118 hb_font_t *font, 119 hb_buffer_t *buffer); 120 static void 121 reorder (const hb_ot_shape_plan_t *plan, 122 hb_font_t *font, 123 hb_buffer_t *buffer); 124 static void 125 clear_syllables (const hb_ot_shape_plan_t *plan, 126 hb_font_t *font, 127 hb_buffer_t *buffer); 128 129 static void 130 collect_features_use (hb_ot_shape_planner_t *plan) 131 { 132 hb_ot_map_builder_t *map = &plan->map; 133 134 /* Do this before any lookups have been applied. */ 135 map->add_gsub_pause (setup_syllables); 136 137 /* "Default glyph pre-processing group" */ 138 map->enable_feature (HB_TAG('l','o','c','l')); 139 map->enable_feature (HB_TAG('c','c','m','p')); 140 map->enable_feature (HB_TAG('n','u','k','t')); 141 map->enable_feature (HB_TAG('a','k','h','n'), F_MANUAL_ZWJ); 142 143 /* "Reordering group" */ 144 map->add_gsub_pause (clear_substitution_flags); 145 map->add_feature (HB_TAG('r','p','h','f'), F_MANUAL_ZWJ); 146 map->add_gsub_pause (record_rphf); 147 map->add_gsub_pause (clear_substitution_flags); 148 map->enable_feature (HB_TAG('p','r','e','f'), F_MANUAL_ZWJ); 149 map->add_gsub_pause (record_pref); 150 151 /* "Orthographic unit shaping group" */ 152 for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++) 153 map->enable_feature (basic_features[i], F_MANUAL_ZWJ); 154 155 map->add_gsub_pause (reorder); 156 map->add_gsub_pause (clear_syllables); 157 158 /* "Topographical features" */ 159 for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++) 160 map->add_feature (arabic_features[i]); 161 map->add_gsub_pause (nullptr); 162 163 /* "Standard typographic presentation" */ 164 for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++) 165 map->enable_feature (other_features[i], F_MANUAL_ZWJ); 166 167 /* "Positional feature application" */ 168 for (unsigned int i = 0; i < ARRAY_LENGTH (positioning_features); i++) 169 map->enable_feature (positioning_features[i]); 170 } 171 172 struct use_shape_plan_t 173 { 174 hb_mask_t rphf_mask; 175 176 arabic_shape_plan_t *arabic_plan; 177 }; 178 179 static bool 180 has_arabic_joining (hb_script_t script) 181 { 182 /* List of scripts that have data in arabic-table. */ 183 switch ((int) script) 184 { 185 /* Unicode-1.1 additions */ 186 case HB_SCRIPT_ARABIC: 187 188 /* Unicode-3.0 additions */ 189 case HB_SCRIPT_MONGOLIAN: 190 case HB_SCRIPT_SYRIAC: 191 192 /* Unicode-5.0 additions */ 193 case HB_SCRIPT_NKO: 194 case HB_SCRIPT_PHAGS_PA: 195 196 /* Unicode-6.0 additions */ 197 case HB_SCRIPT_MANDAIC: 198 199 /* Unicode-7.0 additions */ 200 case HB_SCRIPT_MANICHAEAN: 201 case HB_SCRIPT_PSALTER_PAHLAVI: 202 203 /* Unicode-9.0 additions */ 204 case HB_SCRIPT_ADLAM: 205 206 return true; 207 208 default: 209 return false; 210 } 211 } 212 213 static void * 214 data_create_use (const hb_ot_shape_plan_t *plan) 215 { 216 use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t)); 217 if (unlikely (!use_plan)) 218 return nullptr; 219 220 use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f')); 221 222 if (has_arabic_joining (plan->props.script)) 223 { 224 use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan); 225 if (unlikely (!use_plan->arabic_plan)) 226 { 227 free (use_plan); 228 return nullptr; 229 } 230 } 231 232 return use_plan; 233 } 234 235 static void 236 data_destroy_use (void *data) 237 { 238 use_shape_plan_t *use_plan = (use_shape_plan_t *) data; 239 240 if (use_plan->arabic_plan) 241 data_destroy_arabic (use_plan->arabic_plan); 242 243 free (data); 244 } 245 246 enum syllable_type_t { 247 independent_cluster, 248 virama_terminated_cluster, 249 standard_cluster, 250 number_joiner_terminated_cluster, 251 numeral_cluster, 252 symbol_cluster, 253 broken_cluster, 254 non_cluster, 255 }; 256 257 #include "hb-ot-shape-complex-use-machine.hh" 258 259 260 static void 261 setup_masks_use (const hb_ot_shape_plan_t *plan, 262 hb_buffer_t *buffer, 263 hb_font_t *font HB_UNUSED) 264 { 265 const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; 266 267 /* Do this before allocating use_category(). */ 268 if (use_plan->arabic_plan) 269 { 270 setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script); 271 } 272 273 HB_BUFFER_ALLOCATE_VAR (buffer, use_category); 274 275 /* We cannot setup masks here. We save information about characters 276 * and setup masks later on in a pause-callback. */ 277 278 unsigned int count = buffer->len; 279 hb_glyph_info_t *info = buffer->info; 280 for (unsigned int i = 0; i < count; i++) 281 info[i].use_category() = hb_use_get_category (info[i].codepoint); 282 } 283 284 static void 285 setup_rphf_mask (const hb_ot_shape_plan_t *plan, 286 hb_buffer_t *buffer) 287 { 288 const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; 289 290 hb_mask_t mask = use_plan->rphf_mask; 291 if (!mask) return; 292 293 hb_glyph_info_t *info = buffer->info; 294 295 foreach_syllable (buffer, start, end) 296 { 297 unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start); 298 for (unsigned int i = start; i < start + limit; i++) 299 info[i].mask |= mask; 300 } 301 } 302 303 static void 304 setup_topographical_masks (const hb_ot_shape_plan_t *plan, 305 hb_buffer_t *buffer) 306 { 307 const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; 308 if (use_plan->arabic_plan) 309 return; 310 311 static_assert ((INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4), ""); 312 hb_mask_t masks[4], all_masks = 0; 313 for (unsigned int i = 0; i < 4; i++) 314 { 315 masks[i] = plan->map.get_1_mask (arabic_features[i]); 316 if (masks[i] == plan->map.get_global_mask ()) 317 masks[i] = 0; 318 all_masks |= masks[i]; 319 } 320 if (!all_masks) 321 return; 322 hb_mask_t other_masks = ~all_masks; 323 324 unsigned int last_start = 0; 325 joining_form_t last_form = _NONE; 326 hb_glyph_info_t *info = buffer->info; 327 foreach_syllable (buffer, start, end) 328 { 329 syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F); 330 switch (syllable_type) 331 { 332 case independent_cluster: 333 case symbol_cluster: 334 case non_cluster: 335 /* These don't join. Nothing to do. */ 336 last_form = _NONE; 337 break; 338 339 case virama_terminated_cluster: 340 case standard_cluster: 341 case number_joiner_terminated_cluster: 342 case numeral_cluster: 343 case broken_cluster: 344 345 bool join = last_form == FINA || last_form == ISOL; 346 347 if (join) 348 { 349 /* Fixup previous syllable's form. */ 350 last_form = last_form == FINA ? MEDI : INIT; 351 for (unsigned int i = last_start; i < start; i++) 352 info[i].mask = (info[i].mask & other_masks) | masks[last_form]; 353 } 354 355 /* Form for this syllable. */ 356 last_form = join ? FINA : ISOL; 357 for (unsigned int i = start; i < end; i++) 358 info[i].mask = (info[i].mask & other_masks) | masks[last_form]; 359 360 break; 361 } 362 363 last_start = start; 364 } 365 } 366 367 static void 368 setup_syllables (const hb_ot_shape_plan_t *plan, 369 hb_font_t *font HB_UNUSED, 370 hb_buffer_t *buffer) 371 { 372 find_syllables (buffer); 373 foreach_syllable (buffer, start, end) 374 buffer->unsafe_to_break (start, end); 375 setup_rphf_mask (plan, buffer); 376 setup_topographical_masks (plan, buffer); 377 } 378 379 static void 380 clear_substitution_flags (const hb_ot_shape_plan_t *plan HB_UNUSED, 381 hb_font_t *font HB_UNUSED, 382 hb_buffer_t *buffer) 383 { 384 hb_glyph_info_t *info = buffer->info; 385 unsigned int count = buffer->len; 386 for (unsigned int i = 0; i < count; i++) 387 _hb_glyph_info_clear_substituted (&info[i]); 388 } 389 390 static void 391 record_rphf (const hb_ot_shape_plan_t *plan, 392 hb_font_t *font HB_UNUSED, 393 hb_buffer_t *buffer) 394 { 395 const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; 396 397 hb_mask_t mask = use_plan->rphf_mask; 398 if (!mask) return; 399 hb_glyph_info_t *info = buffer->info; 400 401 foreach_syllable (buffer, start, end) 402 { 403 /* Mark a substituted repha as USE_R. */ 404 for (unsigned int i = start; i < end && (info[i].mask & mask); i++) 405 if (_hb_glyph_info_substituted (&info[i])) 406 { 407 info[i].use_category() = USE_R; 408 break; 409 } 410 } 411 } 412 413 static void 414 record_pref (const hb_ot_shape_plan_t *plan HB_UNUSED, 415 hb_font_t *font HB_UNUSED, 416 hb_buffer_t *buffer) 417 { 418 hb_glyph_info_t *info = buffer->info; 419 420 foreach_syllable (buffer, start, end) 421 { 422 /* Mark a substituted pref as VPre, as they behave the same way. */ 423 for (unsigned int i = start; i < end; i++) 424 if (_hb_glyph_info_substituted (&info[i])) 425 { 426 info[i].use_category() = USE_VPre; 427 break; 428 } 429 } 430 } 431 432 static inline bool 433 is_halant (const hb_glyph_info_t &info) 434 { 435 return (info.use_category() == USE_H || info.use_category() == USE_HVM) && 436 !_hb_glyph_info_ligated (&info); 437 } 438 439 static void 440 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end) 441 { 442 syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); 443 /* Only a few syllable types need reordering. */ 444 if (unlikely (!(FLAG_UNSAFE (syllable_type) & 445 (FLAG (virama_terminated_cluster) | 446 FLAG (standard_cluster) | 447 FLAG (broken_cluster) | 448 0)))) 449 return; 450 451 hb_glyph_info_t *info = buffer->info; 452 453 #define POST_BASE_FLAGS64 (FLAG64 (USE_FM) | \ 454 FLAG64 (USE_FAbv) | \ 455 FLAG64 (USE_FBlw) | \ 456 FLAG64 (USE_FPst) | \ 457 FLAG64 (USE_MAbv) | \ 458 FLAG64 (USE_MBlw) | \ 459 FLAG64 (USE_MPst) | \ 460 FLAG64 (USE_MPre) | \ 461 FLAG64 (USE_VAbv) | \ 462 FLAG64 (USE_VBlw) | \ 463 FLAG64 (USE_VPst) | \ 464 FLAG64 (USE_VPre) | \ 465 FLAG64 (USE_VMAbv) | \ 466 FLAG64 (USE_VMBlw) | \ 467 FLAG64 (USE_VMPst) | \ 468 FLAG64 (USE_VMPre)) 469 470 /* Move things forward. */ 471 if (info[start].use_category() == USE_R && end - start > 1) 472 { 473 /* Got a repha. Reorder it towards the end, but before the first post-base 474 * glyph. */ 475 for (unsigned int i = start + 1; i < end; i++) 476 { 477 bool is_post_base_glyph = (FLAG64_UNSAFE (info[i].use_category()) & POST_BASE_FLAGS64) || 478 is_halant (info[i]); 479 if (is_post_base_glyph || i == end - 1) 480 { 481 /* If we hit a post-base glyph, move before it; otherwise move to the 482 * end. Shift things in between backward. */ 483 484 if (is_post_base_glyph) 485 i--; 486 487 buffer->merge_clusters (start, i + 1); 488 hb_glyph_info_t t = info[start]; 489 memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0])); 490 info[i] = t; 491 492 break; 493 } 494 } 495 } 496 497 /* Move things back. */ 498 unsigned int j = start; 499 for (unsigned int i = start; i < end; i++) 500 { 501 uint32_t flag = FLAG_UNSAFE (info[i].use_category()); 502 if (is_halant (info[i])) 503 { 504 /* If we hit a halant, move after it; otherwise move to the beginning, and 505 * shift things in between forward. */ 506 j = i + 1; 507 } 508 else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) && 509 /* Only move the first component of a MultipleSubst. */ 510 0 == _hb_glyph_info_get_lig_comp (&info[i]) && 511 j < i) 512 { 513 buffer->merge_clusters (j, i + 1); 514 hb_glyph_info_t t = info[i]; 515 memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0])); 516 info[j] = t; 517 } 518 } 519 } 520 521 static inline void 522 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, 523 hb_font_t *font, 524 hb_buffer_t *buffer) 525 { 526 /* Note: This loop is extra overhead, but should not be measurable. */ 527 bool has_broken_syllables = false; 528 unsigned int count = buffer->len; 529 hb_glyph_info_t *info = buffer->info; 530 for (unsigned int i = 0; i < count; i++) 531 if ((info[i].syllable() & 0x0F) == broken_cluster) 532 { 533 has_broken_syllables = true; 534 break; 535 } 536 if (likely (!has_broken_syllables)) 537 return; 538 539 hb_glyph_info_t dottedcircle = {0}; 540 if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint)) 541 return; 542 dottedcircle.use_category() = hb_use_get_category (0x25CC); 543 544 buffer->clear_output (); 545 546 buffer->idx = 0; 547 unsigned int last_syllable = 0; 548 while (buffer->idx < buffer->len && buffer->successful) 549 { 550 unsigned int syllable = buffer->cur().syllable(); 551 syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); 552 if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) 553 { 554 last_syllable = syllable; 555 556 hb_glyph_info_t ginfo = dottedcircle; 557 ginfo.cluster = buffer->cur().cluster; 558 ginfo.mask = buffer->cur().mask; 559 ginfo.syllable() = buffer->cur().syllable(); 560 /* TODO Set glyph_props? */ 561 562 /* Insert dottedcircle after possible Repha. */ 563 while (buffer->idx < buffer->len && buffer->successful && 564 last_syllable == buffer->cur().syllable() && 565 buffer->cur().use_category() == USE_R) 566 buffer->next_glyph (); 567 568 buffer->output_info (ginfo); 569 } 570 else 571 buffer->next_glyph (); 572 } 573 buffer->swap_buffers (); 574 } 575 576 static void 577 reorder (const hb_ot_shape_plan_t *plan, 578 hb_font_t *font, 579 hb_buffer_t *buffer) 580 { 581 insert_dotted_circles (plan, font, buffer); 582 583 foreach_syllable (buffer, start, end) 584 reorder_syllable (buffer, start, end); 585 586 HB_BUFFER_DEALLOCATE_VAR (buffer, use_category); 587 } 588 589 static void 590 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, 591 hb_font_t *font HB_UNUSED, 592 hb_buffer_t *buffer) 593 { 594 hb_glyph_info_t *info = buffer->info; 595 unsigned int count = buffer->len; 596 for (unsigned int i = 0; i < count; i++) 597 info[i].syllable() = 0; 598 } 599 600 601 static void 602 preprocess_text_use (const hb_ot_shape_plan_t *plan, 603 hb_buffer_t *buffer, 604 hb_font_t *font) 605 { 606 _hb_preprocess_text_vowel_constraints (plan, buffer, font); 607 } 608 609 static bool 610 compose_use (const hb_ot_shape_normalize_context_t *c, 611 hb_codepoint_t a, 612 hb_codepoint_t b, 613 hb_codepoint_t *ab) 614 { 615 /* Avoid recomposing split matras. */ 616 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) 617 return false; 618 619 return (bool)c->unicode->compose (a, b, ab); 620 } 621 622 623 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use = 624 { 625 collect_features_use, 626 nullptr, /* override_features */ 627 data_create_use, 628 data_destroy_use, 629 preprocess_text_use, 630 nullptr, /* postprocess_glyphs */ 631 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, 632 nullptr, /* decompose */ 633 compose_use, 634 setup_masks_use, 635 HB_TAG_NONE, /* gpos_tag */ 636 nullptr, /* reorder_marks */ 637 HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, 638 false, /* fallback_position */ 639 };