1 /*
   2  * Copyright © 2015  Mozilla Foundation.
   3  * Copyright © 2015  Google, Inc.
   4  *
   5  *  This is part of HarfBuzz, a text shaping library.
   6  *
   7  * Permission is hereby granted, without written agreement and without
   8  * license or royalty fees, to use, copy, modify, and distribute this
   9  * software and its documentation for any purpose, provided that the
  10  * above copyright notice and the following two paragraphs appear in
  11  * all copies of this software.
  12  *
  13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  17  * DAMAGE.
  18  *
  19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  24  *
  25  * Mozilla Author(s): Jonathan Kew
  26  * Google Author(s): Behdad Esfahbod
  27  */
  28 
  29 #include "hb-ot-shape-complex-use.hh"
  30 #include "hb-ot-shape-complex-arabic.hh"
  31 #include "hb-ot-shape-complex-vowel-constraints.hh"
  32 
  33 /* buffer var allocations */
  34 #define use_category() complex_var_u8_0()
  35 
  36 
  37 /*
  38  * Universal Shaping Engine.
  39  * https://docs.microsoft.com/en-us/typography/script-development/use
  40  */
  41 
  42 static const hb_tag_t
  43 basic_features[] =
  44 {
  45   /*
  46    * Basic features.
  47    * These features are applied all at once, before reordering.
  48    */
  49   HB_TAG('r','k','r','f'),
  50   HB_TAG('a','b','v','f'),
  51   HB_TAG('b','l','w','f'),
  52   HB_TAG('h','a','l','f'),
  53   HB_TAG('p','s','t','f'),
  54   HB_TAG('v','a','t','u'),
  55   HB_TAG('c','j','c','t'),
  56 };
  57 static const hb_tag_t
  58 arabic_features[] =
  59 {
  60   HB_TAG('i','s','o','l'),
  61   HB_TAG('i','n','i','t'),
  62   HB_TAG('m','e','d','i'),
  63   HB_TAG('f','i','n','a'),
  64   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
  65    * does.  These are only used in Syriac spec. */
  66   HB_TAG('m','e','d','2'),
  67   HB_TAG('f','i','n','2'),
  68   HB_TAG('f','i','n','3'),
  69 };
  70 /* Same order as arabic_features.  Don't need Syriac stuff.*/
  71 enum joining_form_t {
  72   ISOL,
  73   INIT,
  74   MEDI,
  75   FINA,
  76   _NONE
  77 };
  78 static const hb_tag_t
  79 other_features[] =
  80 {
  81   /*
  82    * Other features.
  83    * These features are applied all at once, after reordering and
  84    * clearing syllables.
  85    */
  86   HB_TAG('a','b','v','s'),
  87   HB_TAG('b','l','w','s'),
  88   HB_TAG('h','a','l','n'),
  89   HB_TAG('p','r','e','s'),
  90   HB_TAG('p','s','t','s'),
  91 };
  92 static const hb_tag_t
  93 positioning_features[] =
  94 {
  95   /*
  96    * Positioning features.
  97    * We don't care about the types.
  98    */
  99   HB_TAG('d','i','s','t'),
 100   HB_TAG('a','b','v','m'),
 101   HB_TAG('b','l','w','m'),
 102 };
 103 
 104 static void
 105 setup_syllables (const hb_ot_shape_plan_t *plan,
 106                  hb_font_t *font,
 107                  hb_buffer_t *buffer);
 108 static void
 109 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 110                           hb_font_t *font,
 111                           hb_buffer_t *buffer);
 112 static void
 113 record_rphf (const hb_ot_shape_plan_t *plan,
 114              hb_font_t *font,
 115              hb_buffer_t *buffer);
 116 static void
 117 record_pref (const hb_ot_shape_plan_t *plan,
 118              hb_font_t *font,
 119              hb_buffer_t *buffer);
 120 static void
 121 reorder (const hb_ot_shape_plan_t *plan,
 122          hb_font_t *font,
 123          hb_buffer_t *buffer);
 124 static void
 125 clear_syllables (const hb_ot_shape_plan_t *plan,
 126                  hb_font_t *font,
 127                  hb_buffer_t *buffer);
 128 
 129 static void
 130 collect_features_use (hb_ot_shape_planner_t *plan)
 131 {
 132   hb_ot_map_builder_t *map = &plan->map;
 133 
 134   /* Do this before any lookups have been applied. */
 135   map->add_gsub_pause (setup_syllables);
 136 
 137   /* "Default glyph pre-processing group" */
 138   map->enable_feature (HB_TAG('l','o','c','l'));
 139   map->enable_feature (HB_TAG('c','c','m','p'));
 140   map->enable_feature (HB_TAG('n','u','k','t'));
 141   map->enable_feature (HB_TAG('a','k','h','n'), F_MANUAL_ZWJ);
 142 
 143   /* "Reordering group" */
 144   map->add_gsub_pause (clear_substitution_flags);
 145   map->add_feature (HB_TAG('r','p','h','f'), F_MANUAL_ZWJ);
 146   map->add_gsub_pause (record_rphf);
 147   map->add_gsub_pause (clear_substitution_flags);
 148   map->enable_feature (HB_TAG('p','r','e','f'), F_MANUAL_ZWJ);
 149   map->add_gsub_pause (record_pref);
 150 
 151   /* "Orthographic unit shaping group" */
 152   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 153     map->enable_feature (basic_features[i], F_MANUAL_ZWJ);
 154 
 155   map->add_gsub_pause (reorder);
 156   map->add_gsub_pause (clear_syllables);
 157 
 158   /* "Topographical features" */
 159   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
 160     map->add_feature (arabic_features[i]);
 161   map->add_gsub_pause (nullptr);
 162 
 163   /* "Standard typographic presentation" */
 164   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 165     map->enable_feature (other_features[i], F_MANUAL_ZWJ);
 166 
 167   /* "Positional feature application" */
 168   for (unsigned int i = 0; i < ARRAY_LENGTH (positioning_features); i++)
 169     map->enable_feature (positioning_features[i]);
 170 }
 171 
 172 struct use_shape_plan_t
 173 {
 174   hb_mask_t rphf_mask;
 175 
 176   arabic_shape_plan_t *arabic_plan;
 177 };
 178 
 179 static bool
 180 has_arabic_joining (hb_script_t script)
 181 {
 182   /* List of scripts that have data in arabic-table. */
 183   switch ((int) script)
 184   {
 185     /* Unicode-1.1 additions */
 186     case HB_SCRIPT_ARABIC:
 187 
 188     /* Unicode-3.0 additions */
 189     case HB_SCRIPT_MONGOLIAN:
 190     case HB_SCRIPT_SYRIAC:
 191 
 192     /* Unicode-5.0 additions */
 193     case HB_SCRIPT_NKO:
 194     case HB_SCRIPT_PHAGS_PA:
 195 
 196     /* Unicode-6.0 additions */
 197     case HB_SCRIPT_MANDAIC:
 198 
 199     /* Unicode-7.0 additions */
 200     case HB_SCRIPT_MANICHAEAN:
 201     case HB_SCRIPT_PSALTER_PAHLAVI:
 202 
 203     /* Unicode-9.0 additions */
 204     case HB_SCRIPT_ADLAM:
 205 
 206       return true;
 207 
 208     default:
 209       return false;
 210   }
 211 }
 212 
 213 static void *
 214 data_create_use (const hb_ot_shape_plan_t *plan)
 215 {
 216   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
 217   if (unlikely (!use_plan))
 218     return nullptr;
 219 
 220   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
 221 
 222   if (has_arabic_joining (plan->props.script))
 223   {
 224     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
 225     if (unlikely (!use_plan->arabic_plan))
 226     {
 227       free (use_plan);
 228       return nullptr;
 229     }
 230   }
 231 
 232   return use_plan;
 233 }
 234 
 235 static void
 236 data_destroy_use (void *data)
 237 {
 238   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
 239 
 240   if (use_plan->arabic_plan)
 241     data_destroy_arabic (use_plan->arabic_plan);
 242 
 243   free (data);
 244 }
 245 
 246 enum syllable_type_t {
 247   independent_cluster,
 248   virama_terminated_cluster,
 249   standard_cluster,
 250   number_joiner_terminated_cluster,
 251   numeral_cluster,
 252   symbol_cluster,
 253   broken_cluster,
 254   non_cluster,
 255 };
 256 
 257 #include "hb-ot-shape-complex-use-machine.hh"
 258 
 259 
 260 static void
 261 setup_masks_use (const hb_ot_shape_plan_t *plan,
 262                  hb_buffer_t              *buffer,
 263                  hb_font_t                *font HB_UNUSED)
 264 {
 265   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 266 
 267   /* Do this before allocating use_category(). */
 268   if (use_plan->arabic_plan)
 269   {
 270     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
 271   }
 272 
 273   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
 274 
 275   /* We cannot setup masks here.  We save information about characters
 276    * and setup masks later on in a pause-callback. */
 277 
 278   unsigned int count = buffer->len;
 279   hb_glyph_info_t *info = buffer->info;
 280   for (unsigned int i = 0; i < count; i++)
 281     info[i].use_category() = hb_use_get_category (info[i].codepoint);
 282 }
 283 
 284 static void
 285 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
 286                  hb_buffer_t *buffer)
 287 {
 288   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 289 
 290   hb_mask_t mask = use_plan->rphf_mask;
 291   if (!mask) return;
 292 
 293   hb_glyph_info_t *info = buffer->info;
 294 
 295   foreach_syllable (buffer, start, end)
 296   {
 297     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
 298     for (unsigned int i = start; i < start + limit; i++)
 299       info[i].mask |= mask;
 300   }
 301 }
 302 
 303 static void
 304 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
 305                            hb_buffer_t *buffer)
 306 {
 307   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 308   if (use_plan->arabic_plan)
 309     return;
 310 
 311   static_assert ((INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4), "");
 312   hb_mask_t masks[4], all_masks = 0;
 313   for (unsigned int i = 0; i < 4; i++)
 314   {
 315     masks[i] = plan->map.get_1_mask (arabic_features[i]);
 316     if (masks[i] == plan->map.get_global_mask ())
 317       masks[i] = 0;
 318     all_masks |= masks[i];
 319   }
 320   if (!all_masks)
 321     return;
 322   hb_mask_t other_masks = ~all_masks;
 323 
 324   unsigned int last_start = 0;
 325   joining_form_t last_form = _NONE;
 326   hb_glyph_info_t *info = buffer->info;
 327   foreach_syllable (buffer, start, end)
 328   {
 329     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
 330     switch (syllable_type)
 331     {
 332       case independent_cluster:
 333       case symbol_cluster:
 334       case non_cluster:
 335         /* These don't join.  Nothing to do. */
 336         last_form = _NONE;
 337         break;
 338 
 339       case virama_terminated_cluster:
 340       case standard_cluster:
 341       case number_joiner_terminated_cluster:
 342       case numeral_cluster:
 343       case broken_cluster:
 344 
 345         bool join = last_form == FINA || last_form == ISOL;
 346 
 347         if (join)
 348         {
 349           /* Fixup previous syllable's form. */
 350           last_form = last_form == FINA ? MEDI : INIT;
 351           for (unsigned int i = last_start; i < start; i++)
 352             info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 353         }
 354 
 355         /* Form for this syllable. */
 356         last_form = join ? FINA : ISOL;
 357         for (unsigned int i = start; i < end; i++)
 358           info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 359 
 360         break;
 361     }
 362 
 363     last_start = start;
 364   }
 365 }
 366 
 367 static void
 368 setup_syllables (const hb_ot_shape_plan_t *plan,
 369                  hb_font_t *font HB_UNUSED,
 370                  hb_buffer_t *buffer)
 371 {
 372   find_syllables (buffer);
 373   foreach_syllable (buffer, start, end)
 374     buffer->unsafe_to_break (start, end);
 375   setup_rphf_mask (plan, buffer);
 376   setup_topographical_masks (plan, buffer);
 377 }
 378 
 379 static void
 380 clear_substitution_flags (const hb_ot_shape_plan_t *plan HB_UNUSED,
 381                           hb_font_t *font HB_UNUSED,
 382                           hb_buffer_t *buffer)
 383 {
 384   hb_glyph_info_t *info = buffer->info;
 385   unsigned int count = buffer->len;
 386   for (unsigned int i = 0; i < count; i++)
 387     _hb_glyph_info_clear_substituted (&info[i]);
 388 }
 389 
 390 static void
 391 record_rphf (const hb_ot_shape_plan_t *plan,
 392              hb_font_t *font HB_UNUSED,
 393              hb_buffer_t *buffer)
 394 {
 395   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 396 
 397   hb_mask_t mask = use_plan->rphf_mask;
 398   if (!mask) return;
 399   hb_glyph_info_t *info = buffer->info;
 400 
 401   foreach_syllable (buffer, start, end)
 402   {
 403     /* Mark a substituted repha as USE_R. */
 404     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
 405       if (_hb_glyph_info_substituted (&info[i]))
 406       {
 407         info[i].use_category() = USE_R;
 408         break;
 409       }
 410   }
 411 }
 412 
 413 static void
 414 record_pref (const hb_ot_shape_plan_t *plan HB_UNUSED,
 415              hb_font_t *font HB_UNUSED,
 416              hb_buffer_t *buffer)
 417 {
 418   hb_glyph_info_t *info = buffer->info;
 419 
 420   foreach_syllable (buffer, start, end)
 421   {
 422     /* Mark a substituted pref as VPre, as they behave the same way. */
 423     for (unsigned int i = start; i < end; i++)
 424       if (_hb_glyph_info_substituted (&info[i]))
 425       {
 426         info[i].use_category() = USE_VPre;
 427         break;
 428       }
 429   }
 430 }
 431 
 432 static inline bool
 433 is_halant (const hb_glyph_info_t &info)
 434 {
 435   return (info.use_category() == USE_H || info.use_category() == USE_HVM) &&
 436          !_hb_glyph_info_ligated (&info);
 437 }
 438 
 439 static void
 440 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
 441 {
 442   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 443   /* Only a few syllable types need reordering. */
 444   if (unlikely (!(FLAG_UNSAFE (syllable_type) &
 445                   (FLAG (virama_terminated_cluster) |
 446                    FLAG (standard_cluster) |
 447                    FLAG (broken_cluster) |
 448                    0))))
 449     return;
 450 
 451   hb_glyph_info_t *info = buffer->info;
 452 
 453 #define POST_BASE_FLAGS64 (FLAG64 (USE_FM) | \
 454                            FLAG64 (USE_FAbv) | \
 455                            FLAG64 (USE_FBlw) | \
 456                            FLAG64 (USE_FPst) | \
 457                            FLAG64 (USE_MAbv) | \
 458                            FLAG64 (USE_MBlw) | \
 459                            FLAG64 (USE_MPst) | \
 460                            FLAG64 (USE_MPre) | \
 461                            FLAG64 (USE_VAbv) | \
 462                            FLAG64 (USE_VBlw) | \
 463                            FLAG64 (USE_VPst) | \
 464                            FLAG64 (USE_VPre) | \
 465                            FLAG64 (USE_VMAbv) | \
 466                            FLAG64 (USE_VMBlw) | \
 467                            FLAG64 (USE_VMPst) | \
 468                            FLAG64 (USE_VMPre))
 469 
 470   /* Move things forward. */
 471   if (info[start].use_category() == USE_R && end - start > 1)
 472   {
 473     /* Got a repha.  Reorder it towards the end, but before the first post-base
 474      * glyph. */
 475     for (unsigned int i = start + 1; i < end; i++)
 476     {
 477       bool is_post_base_glyph = (FLAG64_UNSAFE (info[i].use_category()) & POST_BASE_FLAGS64) ||
 478                                 is_halant (info[i]);
 479       if (is_post_base_glyph || i == end - 1)
 480       {
 481         /* If we hit a post-base glyph, move before it; otherwise move to the
 482          * end. Shift things in between backward. */
 483 
 484         if (is_post_base_glyph)
 485           i--;
 486 
 487         buffer->merge_clusters (start, i + 1);
 488         hb_glyph_info_t t = info[start];
 489         memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
 490         info[i] = t;
 491 
 492         break;
 493       }
 494     }
 495   }
 496 
 497   /* Move things back. */
 498   unsigned int j = start;
 499   for (unsigned int i = start; i < end; i++)
 500   {
 501     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
 502     if (is_halant (info[i]))
 503     {
 504       /* If we hit a halant, move after it; otherwise move to the beginning, and
 505        * shift things in between forward. */
 506       j = i + 1;
 507     }
 508     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
 509              /* Only move the first component of a MultipleSubst. */
 510              0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
 511              j < i)
 512     {
 513       buffer->merge_clusters (j, i + 1);
 514       hb_glyph_info_t t = info[i];
 515       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
 516       info[j] = t;
 517     }
 518   }
 519 }
 520 
 521 static inline void
 522 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 523                        hb_font_t *font,
 524                        hb_buffer_t *buffer)
 525 {
 526   /* Note: This loop is extra overhead, but should not be measurable. */
 527   bool has_broken_syllables = false;
 528   unsigned int count = buffer->len;
 529   hb_glyph_info_t *info = buffer->info;
 530   for (unsigned int i = 0; i < count; i++)
 531     if ((info[i].syllable() & 0x0F) == broken_cluster)
 532     {
 533       has_broken_syllables = true;
 534       break;
 535     }
 536   if (likely (!has_broken_syllables))
 537     return;
 538 
 539   hb_glyph_info_t dottedcircle = {0};
 540   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
 541     return;
 542   dottedcircle.use_category() = hb_use_get_category (0x25CC);
 543 
 544   buffer->clear_output ();
 545 
 546   buffer->idx = 0;
 547   unsigned int last_syllable = 0;
 548   while (buffer->idx < buffer->len && buffer->successful)
 549   {
 550     unsigned int syllable = buffer->cur().syllable();
 551     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 552     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 553     {
 554       last_syllable = syllable;
 555 
 556       hb_glyph_info_t ginfo = dottedcircle;
 557       ginfo.cluster = buffer->cur().cluster;
 558       ginfo.mask = buffer->cur().mask;
 559       ginfo.syllable() = buffer->cur().syllable();
 560       /* TODO Set glyph_props? */
 561 
 562       /* Insert dottedcircle after possible Repha. */
 563       while (buffer->idx < buffer->len && buffer->successful &&
 564              last_syllable == buffer->cur().syllable() &&
 565              buffer->cur().use_category() == USE_R)
 566         buffer->next_glyph ();
 567 
 568       buffer->output_info (ginfo);
 569     }
 570     else
 571       buffer->next_glyph ();
 572   }
 573   buffer->swap_buffers ();
 574 }
 575 
 576 static void
 577 reorder (const hb_ot_shape_plan_t *plan,
 578          hb_font_t *font,
 579          hb_buffer_t *buffer)
 580 {
 581   insert_dotted_circles (plan, font, buffer);
 582 
 583   foreach_syllable (buffer, start, end)
 584     reorder_syllable (buffer, start, end);
 585 
 586   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
 587 }
 588 
 589 static void
 590 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 591                  hb_font_t *font HB_UNUSED,
 592                  hb_buffer_t *buffer)
 593 {
 594   hb_glyph_info_t *info = buffer->info;
 595   unsigned int count = buffer->len;
 596   for (unsigned int i = 0; i < count; i++)
 597     info[i].syllable() = 0;
 598 }
 599 
 600 
 601 static void
 602 preprocess_text_use (const hb_ot_shape_plan_t *plan,
 603                      hb_buffer_t              *buffer,
 604                      hb_font_t                *font)
 605 {
 606   _hb_preprocess_text_vowel_constraints (plan, buffer, font);
 607 }
 608 
 609 static bool
 610 compose_use (const hb_ot_shape_normalize_context_t *c,
 611              hb_codepoint_t  a,
 612              hb_codepoint_t  b,
 613              hb_codepoint_t *ab)
 614 {
 615   /* Avoid recomposing split matras. */
 616   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 617     return false;
 618 
 619   return (bool)c->unicode->compose (a, b, ab);
 620 }
 621 
 622 
 623 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
 624 {
 625   collect_features_use,
 626   nullptr, /* override_features */
 627   data_create_use,
 628   data_destroy_use,
 629   preprocess_text_use,
 630   nullptr, /* postprocess_glyphs */
 631   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 632   nullptr, /* decompose */
 633   compose_use,
 634   setup_masks_use,
 635   HB_TAG_NONE, /* gpos_tag */
 636   nullptr, /* reorder_marks */
 637   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
 638   false, /* fallback_position */
 639 };