1 /*
   2  * Copyright © 2015  Mozilla Foundation.
   3  * Copyright © 2015  Google, Inc.
   4  *
   5  *  This is part of HarfBuzz, a text shaping library.
   6  *
   7  * Permission is hereby granted, without written agreement and without
   8  * license or royalty fees, to use, copy, modify, and distribute this
   9  * software and its documentation for any purpose, provided that the
  10  * above copyright notice and the following two paragraphs appear in
  11  * all copies of this software.
  12  *
  13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  17  * DAMAGE.
  18  *
  19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  24  *
  25  * Mozilla Author(s): Jonathan Kew
  26  * Google Author(s): Behdad Esfahbod
  27  */
  28 
  29 #include "hb-ot-shape-complex-use-private.hh"
  30 #include "hb-ot-shape-complex-arabic-private.hh"
  31 
  32 /* buffer var allocations */
  33 #define use_category() complex_var_u8_0()
  34 
  35 
  36 /*
  37  * Universal Shaping Engine.
  38  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
  39  */
  40 
  41 static const hb_tag_t
  42 basic_features[] =
  43 {
  44   /*
  45    * Basic features.
  46    * These features are applied all at once, before reordering.
  47    */
  48   HB_TAG('r','k','r','f'),
  49   HB_TAG('a','b','v','f'),
  50   HB_TAG('b','l','w','f'),
  51   HB_TAG('h','a','l','f'),
  52   HB_TAG('p','s','t','f'),
  53   HB_TAG('v','a','t','u'),
  54   HB_TAG('c','j','c','t'),
  55 };
  56 static const hb_tag_t
  57 arabic_features[] =
  58 {
  59   HB_TAG('i','s','o','l'),
  60   HB_TAG('i','n','i','t'),
  61   HB_TAG('m','e','d','i'),
  62   HB_TAG('f','i','n','a'),
  63   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
  64    * does.  These are only used in Syriac spec. */
  65   HB_TAG('m','e','d','2'),
  66   HB_TAG('f','i','n','2'),
  67   HB_TAG('f','i','n','3'),
  68 };
  69 /* Same order as arabic_features.  Don't need Syriac stuff.*/
  70 enum joining_form_t {
  71   ISOL,
  72   INIT,
  73   MEDI,
  74   FINA,
  75   _NONE
  76 };
  77 static const hb_tag_t
  78 other_features[] =
  79 {
  80   /*
  81    * Other features.
  82    * These features are applied all at once, after reordering.
  83    */
  84   HB_TAG('a','b','v','s'),
  85   HB_TAG('b','l','w','s'),
  86   HB_TAG('h','a','l','n'),
  87   HB_TAG('p','r','e','s'),
  88   HB_TAG('p','s','t','s'),
  89   /* Positioning features, though we don't care about the types. */
  90   HB_TAG('d','i','s','t'),
  91   HB_TAG('a','b','v','m'),
  92   HB_TAG('b','l','w','m'),
  93 };
  94 
  95 static void
  96 setup_syllables (const hb_ot_shape_plan_t *plan,
  97                  hb_font_t *font,
  98                  hb_buffer_t *buffer);
  99 static void
 100 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 101                           hb_font_t *font,
 102                           hb_buffer_t *buffer);
 103 static void
 104 record_rphf (const hb_ot_shape_plan_t *plan,
 105              hb_font_t *font,
 106              hb_buffer_t *buffer);
 107 static void
 108 record_pref (const hb_ot_shape_plan_t *plan,
 109              hb_font_t *font,
 110              hb_buffer_t *buffer);
 111 static void
 112 reorder (const hb_ot_shape_plan_t *plan,
 113          hb_font_t *font,
 114          hb_buffer_t *buffer);
 115 
 116 static void
 117 collect_features_use (hb_ot_shape_planner_t *plan)
 118 {
 119   hb_ot_map_builder_t *map = &plan->map;
 120 
 121   /* Do this before any lookups have been applied. */
 122   map->add_gsub_pause (setup_syllables);
 123 
 124   /* "Default glyph pre-processing group" */
 125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 126   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 127   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
 128   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
 129 
 130   /* "Reordering group" */
 131   map->add_gsub_pause (clear_substitution_flags);
 132   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
 133   map->add_gsub_pause (record_rphf);
 134   map->add_gsub_pause (clear_substitution_flags);
 135   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
 136   map->add_gsub_pause (record_pref);
 137 
 138   /* "Orthographic unit shaping group" */
 139   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 140     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 141 
 142   map->add_gsub_pause (reorder);
 143 
 144   /* "Topographical features" */
 145   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
 146     map->add_feature (arabic_features[i], 1, F_NONE);
 147   map->add_gsub_pause (nullptr);
 148 
 149   /* "Standard typographic presentation" and "Positional feature application" */
 150   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 151     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 152 }
 153 
 154 struct use_shape_plan_t
 155 {
 156   ASSERT_POD ();
 157 
 158   hb_mask_t rphf_mask;
 159 
 160   arabic_shape_plan_t *arabic_plan;
 161 };
 162 
 163 static bool
 164 has_arabic_joining (hb_script_t script)
 165 {
 166   /* List of scripts that have data in arabic-table. */
 167   switch ((int) script)
 168   {
 169     /* Unicode-1.1 additions */
 170     case HB_SCRIPT_ARABIC:
 171 
 172     /* Unicode-3.0 additions */
 173     case HB_SCRIPT_MONGOLIAN:
 174     case HB_SCRIPT_SYRIAC:
 175 
 176     /* Unicode-5.0 additions */
 177     case HB_SCRIPT_NKO:
 178     case HB_SCRIPT_PHAGS_PA:
 179 
 180     /* Unicode-6.0 additions */
 181     case HB_SCRIPT_MANDAIC:
 182 
 183     /* Unicode-7.0 additions */
 184     case HB_SCRIPT_MANICHAEAN:
 185     case HB_SCRIPT_PSALTER_PAHLAVI:
 186 
 187     /* Unicode-9.0 additions */
 188     case HB_SCRIPT_ADLAM:
 189 
 190       return true;
 191 
 192     default:
 193       return false;
 194   }
 195 }
 196 
 197 static void *
 198 data_create_use (const hb_ot_shape_plan_t *plan)
 199 {
 200   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
 201   if (unlikely (!use_plan))
 202     return nullptr;
 203 
 204   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
 205 
 206   if (has_arabic_joining (plan->props.script))
 207   {
 208     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
 209     if (unlikely (!use_plan->arabic_plan))
 210     {
 211       free (use_plan);
 212       return nullptr;
 213     }
 214   }
 215 
 216   return use_plan;
 217 }
 218 
 219 static void
 220 data_destroy_use (void *data)
 221 {
 222   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
 223 
 224   if (use_plan->arabic_plan)
 225     data_destroy_arabic (use_plan->arabic_plan);
 226 
 227   free (data);
 228 }
 229 
 230 enum syllable_type_t {
 231   independent_cluster,
 232   virama_terminated_cluster,
 233   standard_cluster,
 234   number_joiner_terminated_cluster,
 235   numeral_cluster,
 236   symbol_cluster,
 237   broken_cluster,
 238   non_cluster,
 239 };
 240 
 241 #include "hb-ot-shape-complex-use-machine.hh"
 242 
 243 
 244 static void
 245 setup_masks_use (const hb_ot_shape_plan_t *plan,
 246                  hb_buffer_t              *buffer,
 247                  hb_font_t                *font HB_UNUSED)
 248 {
 249   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 250 
 251   /* Do this before allocating use_category(). */
 252   if (use_plan->arabic_plan)
 253   {
 254     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
 255   }
 256 
 257   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
 258 
 259   /* We cannot setup masks here.  We save information about characters
 260    * and setup masks later on in a pause-callback. */
 261 
 262   unsigned int count = buffer->len;
 263   hb_glyph_info_t *info = buffer->info;
 264   for (unsigned int i = 0; i < count; i++)
 265     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
 266 }
 267 
 268 static void
 269 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
 270                  hb_buffer_t *buffer)
 271 {
 272   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 273 
 274   hb_mask_t mask = use_plan->rphf_mask;
 275   if (!mask) return;
 276 
 277   hb_glyph_info_t *info = buffer->info;
 278 
 279   foreach_syllable (buffer, start, end)
 280   {
 281     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
 282     for (unsigned int i = start; i < start + limit; i++)
 283       info[i].mask |= mask;
 284   }
 285 }
 286 
 287 static void
 288 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
 289                            hb_buffer_t *buffer)
 290 {
 291   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 292   if (use_plan->arabic_plan)
 293     return;
 294 
 295   static_assert ((INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4), "");
 296   hb_mask_t masks[4], all_masks = 0;
 297   for (unsigned int i = 0; i < 4; i++)
 298   {
 299     masks[i] = plan->map.get_1_mask (arabic_features[i]);
 300     if (masks[i] == plan->map.get_global_mask ())
 301       masks[i] = 0;
 302     all_masks |= masks[i];
 303   }
 304   if (!all_masks)
 305     return;
 306   hb_mask_t other_masks = ~all_masks;
 307 
 308   unsigned int last_start = 0;
 309   joining_form_t last_form = _NONE;
 310   hb_glyph_info_t *info = buffer->info;
 311   foreach_syllable (buffer, start, end)
 312   {
 313     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
 314     switch (syllable_type)
 315     {
 316       case independent_cluster:
 317       case symbol_cluster:
 318       case non_cluster:
 319         /* These don't join.  Nothing to do. */
 320         last_form = _NONE;
 321         break;
 322 
 323       case virama_terminated_cluster:
 324       case standard_cluster:
 325       case number_joiner_terminated_cluster:
 326       case numeral_cluster:
 327       case broken_cluster:
 328 
 329         bool join = last_form == FINA || last_form == ISOL;
 330 
 331         if (join)
 332         {
 333           /* Fixup previous syllable's form. */
 334           last_form = last_form == FINA ? MEDI : INIT;
 335           for (unsigned int i = last_start; i < start; i++)
 336             info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 337         }
 338 
 339         /* Form for this syllable. */
 340         last_form = join ? FINA : ISOL;
 341         for (unsigned int i = start; i < end; i++)
 342           info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 343 
 344         break;
 345     }
 346 
 347     last_start = start;
 348   }
 349 }
 350 
 351 static void
 352 setup_syllables (const hb_ot_shape_plan_t *plan,
 353                  hb_font_t *font HB_UNUSED,
 354                  hb_buffer_t *buffer)
 355 {
 356   find_syllables (buffer);
 357   foreach_syllable (buffer, start, end)
 358     buffer->unsafe_to_break (start, end);
 359   setup_rphf_mask (plan, buffer);
 360   setup_topographical_masks (plan, buffer);
 361 }
 362 
 363 static void
 364 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 365                           hb_font_t *font HB_UNUSED,
 366                           hb_buffer_t *buffer)
 367 {
 368   hb_glyph_info_t *info = buffer->info;
 369   unsigned int count = buffer->len;
 370   for (unsigned int i = 0; i < count; i++)
 371     _hb_glyph_info_clear_substituted (&info[i]);
 372 }
 373 
 374 static void
 375 record_rphf (const hb_ot_shape_plan_t *plan,
 376              hb_font_t *font,
 377              hb_buffer_t *buffer)
 378 {
 379   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 380 
 381   hb_mask_t mask = use_plan->rphf_mask;
 382   if (!mask) return;
 383   hb_glyph_info_t *info = buffer->info;
 384 
 385   foreach_syllable (buffer, start, end)
 386   {
 387     /* Mark a substituted repha as USE_R. */
 388     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
 389       if (_hb_glyph_info_substituted (&info[i]))
 390       {
 391         info[i].use_category() = USE_R;
 392         break;
 393       }
 394   }
 395 }
 396 
 397 static void
 398 record_pref (const hb_ot_shape_plan_t *plan,
 399              hb_font_t *font,
 400              hb_buffer_t *buffer)
 401 {
 402   hb_glyph_info_t *info = buffer->info;
 403 
 404   foreach_syllable (buffer, start, end)
 405   {
 406     /* Mark a substituted pref as VPre, as they behave the same way. */
 407     for (unsigned int i = start; i < end; i++)
 408       if (_hb_glyph_info_substituted (&info[i]))
 409       {
 410         info[i].use_category() = USE_VPre;
 411         break;
 412       }
 413   }
 414 }
 415 
 416 static inline bool
 417 is_halant (const hb_glyph_info_t &info)
 418 {
 419   return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
 420 }
 421 
 422 static void
 423 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
 424 {
 425   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 426   /* Only a few syllable types need reordering. */
 427   if (unlikely (!(FLAG_UNSAFE (syllable_type) &
 428                   (FLAG (virama_terminated_cluster) |
 429                    FLAG (standard_cluster) |
 430                    FLAG (broken_cluster) |
 431                    0))))
 432     return;
 433 
 434   hb_glyph_info_t *info = buffer->info;
 435 
 436 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB))
 437 
 438   /* Move things forward. */
 439   if (info[start].use_category() == USE_R && end - start > 1)
 440   {
 441     /* Got a repha.  Reorder it to after first base, before first halant. */
 442     for (unsigned int i = start + 1; i < end; i++)
 443       if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
 444       {
 445         /* If we hit a halant, move before it; otherwise it's a base: move to it's
 446          * place, and shift things in between backward. */
 447 
 448         if (is_halant (info[i]))
 449           i--;
 450 
 451         buffer->merge_clusters (start, i + 1);
 452         hb_glyph_info_t t = info[start];
 453         memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
 454         info[i] = t;
 455 
 456         break;
 457       }
 458   }
 459 
 460   /* Move things back. */
 461   unsigned int j = end;
 462   for (unsigned int i = start; i < end; i++)
 463   {
 464     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
 465     if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
 466     {
 467       /* If we hit a halant, move after it; otherwise it's a base: move to it's
 468        * place, and shift things in between backward. */
 469       if (is_halant (info[i]))
 470         j = i + 1;
 471       else
 472         j = i;
 473     }
 474     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
 475              /* Only move the first component of a MultipleSubst. */
 476              0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
 477              j < i)
 478     {
 479       buffer->merge_clusters (j, i + 1);
 480       hb_glyph_info_t t = info[i];
 481       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
 482       info[j] = t;
 483     }
 484   }
 485 }
 486 
 487 static inline void
 488 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 489                        hb_font_t *font,
 490                        hb_buffer_t *buffer)
 491 {
 492   /* Note: This loop is extra overhead, but should not be measurable. */
 493   bool has_broken_syllables = false;
 494   unsigned int count = buffer->len;
 495   hb_glyph_info_t *info = buffer->info;
 496   for (unsigned int i = 0; i < count; i++)
 497     if ((info[i].syllable() & 0x0F) == broken_cluster)
 498     {
 499       has_broken_syllables = true;
 500       break;
 501     }
 502   if (likely (!has_broken_syllables))
 503     return;
 504 
 505   hb_glyph_info_t dottedcircle = {0};
 506   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
 507     return;
 508   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
 509 
 510   buffer->clear_output ();
 511 
 512   buffer->idx = 0;
 513   unsigned int last_syllable = 0;
 514   while (buffer->idx < buffer->len && !buffer->in_error)
 515   {
 516     unsigned int syllable = buffer->cur().syllable();
 517     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 518     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 519     {
 520       last_syllable = syllable;
 521 
 522       hb_glyph_info_t ginfo = dottedcircle;
 523       ginfo.cluster = buffer->cur().cluster;
 524       ginfo.mask = buffer->cur().mask;
 525       ginfo.syllable() = buffer->cur().syllable();
 526       /* TODO Set glyph_props? */
 527 
 528       /* Insert dottedcircle after possible Repha. */
 529       while (buffer->idx < buffer->len && !buffer->in_error &&
 530              last_syllable == buffer->cur().syllable() &&
 531              buffer->cur().use_category() == USE_R)
 532         buffer->next_glyph ();
 533 
 534       buffer->output_info (ginfo);
 535     }
 536     else
 537       buffer->next_glyph ();
 538   }
 539 
 540   buffer->swap_buffers ();
 541 }
 542 
 543 static void
 544 reorder (const hb_ot_shape_plan_t *plan,
 545          hb_font_t *font,
 546          hb_buffer_t *buffer)
 547 {
 548   insert_dotted_circles (plan, font, buffer);
 549 
 550   hb_glyph_info_t *info = buffer->info;
 551 
 552   foreach_syllable (buffer, start, end)
 553     reorder_syllable (buffer, start, end);
 554 
 555   /* Zero syllables now... */
 556   unsigned int count = buffer->len;
 557   for (unsigned int i = 0; i < count; i++)
 558     info[i].syllable() = 0;
 559 
 560   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
 561 }
 562 
 563 static bool
 564 decompose_use (const hb_ot_shape_normalize_context_t *c,
 565                 hb_codepoint_t  ab,
 566                 hb_codepoint_t *a,
 567                 hb_codepoint_t *b)
 568 {
 569   switch (ab)
 570   {
 571     /* Chakma:
 572      * Special case where the Unicode decomp gives matras in the wrong order
 573      * for cluster validation.
 574      */
 575     case 0x1112Eu : *a = 0x11127u; *b= 0x11131u; return true;
 576     case 0x1112Fu : *a = 0x11127u; *b= 0x11132u; return true;
 577   }
 578 
 579   return (bool) c->unicode->decompose (ab, a, b);
 580 }
 581 
 582 static bool
 583 compose_use (const hb_ot_shape_normalize_context_t *c,
 584              hb_codepoint_t  a,
 585              hb_codepoint_t  b,
 586              hb_codepoint_t *ab)
 587 {
 588   /* Avoid recomposing split matras. */
 589   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 590     return false;
 591 
 592   return (bool)c->unicode->compose (a, b, ab);
 593 }
 594 
 595 
 596 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
 597 {
 598   collect_features_use,
 599   nullptr, /* override_features */
 600   data_create_use,
 601   data_destroy_use,
 602   nullptr, /* preprocess_text */
 603   nullptr, /* postprocess_glyphs */
 604   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 605   decompose_use,
 606   compose_use,
 607   setup_masks_use,
 608   nullptr, /* disable_otl */
 609   nullptr, /* reorder_marks */
 610   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
 611   false, /* fallback_position */
 612 };