1 /*
   2  * Copyright © 2015  Mozilla Foundation.
   3  * Copyright © 2015  Google, Inc.
   4  *
   5  *  This is part of HarfBuzz, a text shaping library.
   6  *
   7  * Permission is hereby granted, without written agreement and without
   8  * license or royalty fees, to use, copy, modify, and distribute this
   9  * software and its documentation for any purpose, provided that the
  10  * above copyright notice and the following two paragraphs appear in
  11  * all copies of this software.
  12  *
  13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  17  * DAMAGE.
  18  *
  19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  24  *
  25  * Mozilla Author(s): Jonathan Kew
  26  * Google Author(s): Behdad Esfahbod
  27  */
  28 
  29 #include "hb-ot-shape-complex-use-private.hh"
  30 #include "hb-ot-shape-complex-arabic-private.hh"
  31 
  32 /* buffer var allocations */
  33 #define use_category() complex_var_u8_0()
  34 
  35 
  36 /*
  37  * Universal Shaping Engine.
  38  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
  39  */
  40 
  41 static const hb_tag_t
  42 basic_features[] =
  43 {
  44   /*
  45    * Basic features.
  46    * These features are applied all at once, before reordering.
  47    */
  48   HB_TAG('r','k','r','f'),
  49   HB_TAG('a','b','v','f'),
  50   HB_TAG('b','l','w','f'),
  51   HB_TAG('h','a','l','f'),
  52   HB_TAG('p','s','t','f'),
  53   HB_TAG('v','a','t','u'),
  54   HB_TAG('c','j','c','t'),
  55 };
  56 static const hb_tag_t
  57 arabic_features[] =
  58 {
  59   HB_TAG('i','s','o','l'),
  60   HB_TAG('i','n','i','t'),
  61   HB_TAG('m','e','d','i'),
  62   HB_TAG('f','i','n','a'),
  63   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
  64    * does.  These are only used in Syriac spec. */
  65   HB_TAG('m','e','d','2'),
  66   HB_TAG('f','i','n','2'),
  67   HB_TAG('f','i','n','3'),
  68 };
  69 /* Same order as arabic_features.  Don't need Syriac stuff.*/
  70 enum joining_form_t {
  71   ISOL,
  72   INIT,
  73   MEDI,
  74   FINA,
  75   _NONE
  76 };
  77 static const hb_tag_t
  78 other_features[] =
  79 {
  80   /*
  81    * Other features.
  82    * These features are applied all at once, after reordering.
  83    */
  84   HB_TAG('a','b','v','s'),
  85   HB_TAG('b','l','w','s'),
  86   HB_TAG('h','a','l','n'),
  87   HB_TAG('p','r','e','s'),
  88   HB_TAG('p','s','t','s'),
  89   /* Positioning features, though we don't care about the types. */
  90   HB_TAG('d','i','s','t'),
  91   HB_TAG('a','b','v','m'),
  92   HB_TAG('b','l','w','m'),
  93 };
  94 
  95 static void
  96 setup_syllables (const hb_ot_shape_plan_t *plan,
  97                  hb_font_t *font,
  98                  hb_buffer_t *buffer);
  99 static void
 100 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 101                           hb_font_t *font,
 102                           hb_buffer_t *buffer);
 103 static void
 104 record_rphf (const hb_ot_shape_plan_t *plan,
 105              hb_font_t *font,
 106              hb_buffer_t *buffer);
 107 static void
 108 record_pref (const hb_ot_shape_plan_t *plan,
 109              hb_font_t *font,
 110              hb_buffer_t *buffer);
 111 static void
 112 reorder (const hb_ot_shape_plan_t *plan,
 113          hb_font_t *font,
 114          hb_buffer_t *buffer);
 115 
 116 static void
 117 collect_features_use (hb_ot_shape_planner_t *plan)
 118 {
 119   hb_ot_map_builder_t *map = &plan->map;
 120 
 121   /* Do this before any lookups have been applied. */
 122   map->add_gsub_pause (setup_syllables);
 123 
 124   /* "Default glyph pre-processing group" */
 125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 126   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 127   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
 128   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
 129 
 130   /* "Reordering group" */
 131   map->add_gsub_pause (clear_substitution_flags);
 132   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
 133   map->add_gsub_pause (record_rphf);
 134   map->add_gsub_pause (clear_substitution_flags);
 135   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
 136   map->add_gsub_pause (record_pref);
 137 
 138   /* "Orthographic unit shaping group" */
 139   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 140     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 141 
 142   map->add_gsub_pause (reorder);
 143 
 144   /* "Topographical features" */
 145   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
 146     map->add_feature (arabic_features[i], 1, F_NONE);
 147   map->add_gsub_pause (NULL);
 148 
 149   /* "Standard typographic presentation" and "Positional feature application" */
 150   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 151     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 152 }
 153 
 154 struct use_shape_plan_t
 155 {
 156   ASSERT_POD ();
 157 
 158   hb_mask_t rphf_mask;
 159 
 160   arabic_shape_plan_t *arabic_plan;
 161 };
 162 
 163 static bool
 164 has_arabic_joining (hb_script_t script)
 165 {
 166   /* List of scripts that have data in arabic-table. */
 167   switch ((int) script)
 168   {
 169     /* Unicode-1.1 additions */
 170     case HB_SCRIPT_ARABIC:
 171 
 172     /* Unicode-3.0 additions */
 173     case HB_SCRIPT_MONGOLIAN:
 174     case HB_SCRIPT_SYRIAC:
 175 
 176     /* Unicode-5.0 additions */
 177     case HB_SCRIPT_NKO:
 178     case HB_SCRIPT_PHAGS_PA:
 179 
 180     /* Unicode-6.0 additions */
 181     case HB_SCRIPT_MANDAIC:
 182 
 183     /* Unicode-7.0 additions */
 184     case HB_SCRIPT_MANICHAEAN:
 185     case HB_SCRIPT_PSALTER_PAHLAVI:
 186 
 187     /* Unicode-9.0 additions */
 188     case HB_SCRIPT_ADLAM:
 189 
 190       return true;
 191 
 192     default:
 193       return false;
 194   }
 195 }
 196 
 197 static void *
 198 data_create_use (const hb_ot_shape_plan_t *plan)
 199 {
 200   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
 201   if (unlikely (!use_plan))
 202     return NULL;
 203 
 204   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
 205 
 206   if (has_arabic_joining (plan->props.script))
 207   {
 208     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
 209     if (unlikely (!use_plan->arabic_plan))
 210     {
 211       free (use_plan);
 212       return NULL;
 213     }
 214   }
 215 
 216   return use_plan;
 217 }
 218 
 219 static void
 220 data_destroy_use (void *data)
 221 {
 222   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
 223 
 224   if (use_plan->arabic_plan)
 225     data_destroy_arabic (use_plan->arabic_plan);
 226 
 227   free (data);
 228 }
 229 
 230 enum syllable_type_t {
 231   independent_cluster,
 232   virama_terminated_cluster,
 233   standard_cluster,
 234   number_joiner_terminated_cluster,
 235   numeral_cluster,
 236   symbol_cluster,
 237   broken_cluster,
 238   non_cluster,
 239 };
 240 
 241 #include "hb-ot-shape-complex-use-machine.hh"
 242 
 243 
 244 static void
 245 setup_masks_use (const hb_ot_shape_plan_t *plan,
 246                  hb_buffer_t              *buffer,
 247                  hb_font_t                *font HB_UNUSED)
 248 {
 249   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 250 
 251   /* Do this before allocating use_category(). */
 252   if (use_plan->arabic_plan)
 253   {
 254     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
 255   }
 256 
 257   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
 258 
 259   /* We cannot setup masks here.  We save information about characters
 260    * and setup masks later on in a pause-callback. */
 261 
 262   unsigned int count = buffer->len;
 263   hb_glyph_info_t *info = buffer->info;
 264   for (unsigned int i = 0; i < count; i++)
 265     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
 266 }
 267 
 268 static void
 269 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
 270                  hb_buffer_t *buffer)
 271 {
 272   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 273 
 274   hb_mask_t mask = use_plan->rphf_mask;
 275   if (!mask) return;
 276 
 277   hb_glyph_info_t *info = buffer->info;
 278 
 279   foreach_syllable (buffer, start, end)
 280   {
 281     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
 282     for (unsigned int i = start; i < start + limit; i++)
 283       info[i].mask |= mask;
 284   }
 285 }
 286 
 287 static void
 288 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
 289                            hb_buffer_t *buffer)
 290 {
 291   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 292   if (use_plan->arabic_plan)
 293     return;
 294 
 295   ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
 296   hb_mask_t masks[4], all_masks = 0;
 297   for (unsigned int i = 0; i < 4; i++)
 298   {
 299     masks[i] = plan->map.get_1_mask (arabic_features[i]);
 300     if (masks[i] == plan->map.get_global_mask ())
 301       masks[i] = 0;
 302     all_masks |= masks[i];
 303   }
 304   if (!all_masks)
 305     return;
 306   hb_mask_t other_masks = ~all_masks;
 307 
 308   unsigned int last_start = 0;
 309   joining_form_t last_form = _NONE;
 310   hb_glyph_info_t *info = buffer->info;
 311   foreach_syllable (buffer, start, end)
 312   {
 313     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
 314     switch (syllable_type)
 315     {
 316       case independent_cluster:
 317       case symbol_cluster:
 318       case non_cluster:
 319         /* These don't join.  Nothing to do. */
 320         last_form = _NONE;
 321         break;
 322 
 323       case virama_terminated_cluster:
 324       case standard_cluster:
 325       case number_joiner_terminated_cluster:
 326       case numeral_cluster:
 327       case broken_cluster:
 328 
 329         bool join = last_form == FINA || last_form == ISOL;
 330 
 331         if (join)
 332         {
 333           /* Fixup previous syllable's form. */
 334           last_form = last_form == FINA ? MEDI : INIT;
 335           for (unsigned int i = last_start; i < start; i++)
 336             info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 337         }
 338 
 339         /* Form for this syllable. */
 340         last_form = join ? FINA : ISOL;
 341         for (unsigned int i = start; i < end; i++)
 342           info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 343 
 344         break;
 345     }
 346 
 347     last_start = start;
 348   }
 349 }
 350 
 351 static void
 352 setup_syllables (const hb_ot_shape_plan_t *plan,
 353                  hb_font_t *font HB_UNUSED,
 354                  hb_buffer_t *buffer)
 355 {
 356   find_syllables (buffer);
 357   setup_rphf_mask (plan, buffer);
 358   setup_topographical_masks (plan, buffer);
 359 }
 360 
 361 static void
 362 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 363                           hb_font_t *font HB_UNUSED,
 364                           hb_buffer_t *buffer)
 365 {
 366   hb_glyph_info_t *info = buffer->info;
 367   unsigned int count = buffer->len;
 368   for (unsigned int i = 0; i < count; i++)
 369     _hb_glyph_info_clear_substituted (&info[i]);
 370 }
 371 
 372 static void
 373 record_rphf (const hb_ot_shape_plan_t *plan,
 374              hb_font_t *font,
 375              hb_buffer_t *buffer)
 376 {
 377   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 378 
 379   hb_mask_t mask = use_plan->rphf_mask;
 380   if (!mask) return;
 381   hb_glyph_info_t *info = buffer->info;
 382 
 383   foreach_syllable (buffer, start, end)
 384   {
 385     /* Mark a substituted repha as USE_R. */
 386     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
 387       if (_hb_glyph_info_substituted (&info[i]))
 388       {
 389         info[i].use_category() = USE_R;
 390         break;
 391       }
 392   }
 393 }
 394 
 395 static void
 396 record_pref (const hb_ot_shape_plan_t *plan,
 397              hb_font_t *font,
 398              hb_buffer_t *buffer)
 399 {
 400   hb_glyph_info_t *info = buffer->info;
 401 
 402   foreach_syllable (buffer, start, end)
 403   {
 404     /* Mark a substituted pref as VPre, as they behave the same way. */
 405     for (unsigned int i = start; i < end; i++)
 406       if (_hb_glyph_info_substituted (&info[i]))
 407       {
 408         info[i].use_category() = USE_VPre;
 409         break;
 410       }
 411   }
 412 }
 413 
 414 static inline bool
 415 is_halant (const hb_glyph_info_t &info)
 416 {
 417   return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
 418 }
 419 
 420 static void
 421 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
 422 {
 423   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 424   /* Only a few syllable types need reordering. */
 425   if (unlikely (!(FLAG_SAFE (syllable_type) &
 426                   (FLAG (virama_terminated_cluster) |
 427                    FLAG (standard_cluster) |
 428                    FLAG (broken_cluster) |
 429                    0))))
 430     return;
 431 
 432   hb_glyph_info_t *info = buffer->info;
 433 
 434 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB))
 435 
 436   /* Move things forward. */
 437   if (info[start].use_category() == USE_R && end - start > 1)
 438   {
 439     /* Got a repha.  Reorder it to after first base, before first halant. */
 440     for (unsigned int i = start + 1; i < end; i++)
 441       if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
 442       {
 443         /* If we hit a halant, move before it; otherwise it's a base: move to it's
 444          * place, and shift things in between backward. */
 445 
 446         if (is_halant (info[i]))
 447           i--;
 448 
 449         buffer->merge_clusters (start, i + 1);
 450         hb_glyph_info_t t = info[start];
 451         memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
 452         info[i] = t;
 453 
 454         break;
 455       }
 456   }
 457 
 458   /* Move things back. */
 459   unsigned int j = end;
 460   for (unsigned int i = start; i < end; i++)
 461   {
 462     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
 463     if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
 464     {
 465       /* If we hit a halant, move after it; otherwise it's a base: move to it's
 466        * place, and shift things in between backward. */
 467       if (is_halant (info[i]))
 468         j = i + 1;
 469       else
 470         j = i;
 471     }
 472     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
 473              /* Only move the first component of a MultipleSubst. */
 474              0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
 475              j < i)
 476     {
 477       buffer->merge_clusters (j, i + 1);
 478       hb_glyph_info_t t = info[i];
 479       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
 480       info[j] = t;
 481     }
 482   }
 483 }
 484 
 485 static inline void
 486 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 487                        hb_font_t *font,
 488                        hb_buffer_t *buffer)
 489 {
 490   /* Note: This loop is extra overhead, but should not be measurable. */
 491   bool has_broken_syllables = false;
 492   unsigned int count = buffer->len;
 493   hb_glyph_info_t *info = buffer->info;
 494   for (unsigned int i = 0; i < count; i++)
 495     if ((info[i].syllable() & 0x0F) == broken_cluster)
 496     {
 497       has_broken_syllables = true;
 498       break;
 499     }
 500   if (likely (!has_broken_syllables))
 501     return;
 502 
 503   hb_glyph_info_t dottedcircle = {0};
 504   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
 505     return;
 506   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
 507 
 508   buffer->clear_output ();
 509 
 510   buffer->idx = 0;
 511   unsigned int last_syllable = 0;
 512   while (buffer->idx < buffer->len && !buffer->in_error)
 513   {
 514     unsigned int syllable = buffer->cur().syllable();
 515     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 516     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 517     {
 518       last_syllable = syllable;
 519 
 520       hb_glyph_info_t ginfo = dottedcircle;
 521       ginfo.cluster = buffer->cur().cluster;
 522       ginfo.mask = buffer->cur().mask;
 523       ginfo.syllable() = buffer->cur().syllable();
 524       /* TODO Set glyph_props? */
 525 
 526       /* Insert dottedcircle after possible Repha. */
 527       while (buffer->idx < buffer->len && !buffer->in_error &&
 528              last_syllable == buffer->cur().syllable() &&
 529              buffer->cur().use_category() == USE_R)
 530         buffer->next_glyph ();
 531 
 532       buffer->output_info (ginfo);
 533     }
 534     else
 535       buffer->next_glyph ();
 536   }
 537 
 538   buffer->swap_buffers ();
 539 }
 540 
 541 static void
 542 reorder (const hb_ot_shape_plan_t *plan,
 543          hb_font_t *font,
 544          hb_buffer_t *buffer)
 545 {
 546   insert_dotted_circles (plan, font, buffer);
 547 
 548   hb_glyph_info_t *info = buffer->info;
 549 
 550   foreach_syllable (buffer, start, end)
 551     reorder_syllable (buffer, start, end);
 552 
 553   /* Zero syllables now... */
 554   unsigned int count = buffer->len;
 555   for (unsigned int i = 0; i < count; i++)
 556     info[i].syllable() = 0;
 557 
 558   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
 559 }
 560 
 561 static bool
 562 decompose_use (const hb_ot_shape_normalize_context_t *c,
 563                 hb_codepoint_t  ab,
 564                 hb_codepoint_t *a,
 565                 hb_codepoint_t *b)
 566 {
 567   switch (ab)
 568   {
 569     /* Chakma:
 570      * Special case where the Unicode decomp gives matras in the wrong order
 571      * for cluster validation.
 572      */
 573     case 0x1112Eu : *a = 0x11127u; *b= 0x11131u; return true;
 574     case 0x1112Fu : *a = 0x11127u; *b= 0x11132u; return true;
 575 
 576     /*
 577      * Decompose split matras that don't have Unicode decompositions.
 578      */
 579 
 580     /* Limbu */
 581     case 0x1925u  : *a = 0x1920u; *b= 0x1923u; return true;
 582     case 0x1926u  : *a = 0x1920u; *b= 0x1924u; return true;
 583 
 584     /* Balinese */
 585     case 0x1B3Cu  : *a = 0x1B42u; *b= 0x1B3Cu; return true;
 586 
 587 #if 0
 588     /* Lepcha */
 589     case 0x1C29u  : *a = no decomp, -> LEFT; return true;
 590 
 591     /* Javanese */
 592     case 0xA9C0u  : *a = no decomp, -> RIGHT; return true;
 593 
 594     /* Sharada */
 595     case 0x111BFu  : *a = no decomp, -> ABOVE; return true;
 596 #endif
 597   }
 598 
 599   return (bool) c->unicode->decompose (ab, a, b);
 600 }
 601 
 602 static bool
 603 compose_use (const hb_ot_shape_normalize_context_t *c,
 604              hb_codepoint_t  a,
 605              hb_codepoint_t  b,
 606              hb_codepoint_t *ab)
 607 {
 608   /* Avoid recomposing split matras. */
 609   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 610     return false;
 611 
 612   return (bool)c->unicode->compose (a, b, ab);
 613 }
 614 
 615 
 616 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
 617 {
 618   "use",
 619   collect_features_use,
 620   NULL, /* override_features */
 621   data_create_use,
 622   data_destroy_use,
 623   NULL, /* preprocess_text */
 624   NULL, /* postprocess_glyphs */
 625   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 626   decompose_use,
 627   compose_use,
 628   setup_masks_use,
 629   NULL, /* disable_otl */
 630   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
 631   false, /* fallback_position */
 632 };