1 /*
   2  * Copyright © 2015  Mozilla Foundation.
   3  * Copyright © 2015  Google, Inc.
   4  *
   5  *  This is part of HarfBuzz, a text shaping library.
   6  *
   7  * Permission is hereby granted, without written agreement and without
   8  * license or royalty fees, to use, copy, modify, and distribute this
   9  * software and its documentation for any purpose, provided that the
  10  * above copyright notice and the following two paragraphs appear in
  11  * all copies of this software.
  12  *
  13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  17  * DAMAGE.
  18  *
  19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  24  *
  25  * Mozilla Author(s): Jonathan Kew
  26  * Google Author(s): Behdad Esfahbod
  27  */
  28 
  29 #include "hb-ot-shape-complex-use-private.hh"
  30 #include "hb-ot-shape-complex-arabic-private.hh"
  31 
  32 /* buffer var allocations */
  33 #define use_category() complex_var_u8_0()
  34 
  35 
  36 /*
  37  * Universal Shaping Engine.
  38  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
  39  */
  40 
  41 static const hb_tag_t
  42 basic_features[] =
  43 {
  44   /*
  45    * Basic features.
  46    * These features are applied all at once, before reordering.
  47    */
  48   HB_TAG('r','k','r','f'),
  49   HB_TAG('a','b','v','f'),
  50   HB_TAG('b','l','w','f'),
  51   HB_TAG('h','a','l','f'),
  52   HB_TAG('p','s','t','f'),
  53   HB_TAG('v','a','t','u'),
  54   HB_TAG('c','j','c','t'),
  55 };
  56 static const hb_tag_t
  57 arabic_features[] =
  58 {
  59   HB_TAG('i','s','o','l'),
  60   HB_TAG('i','n','i','t'),
  61   HB_TAG('m','e','d','i'),
  62   HB_TAG('f','i','n','a'),
  63   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
  64    * does.  These are only used in Syriac spec. */
  65   HB_TAG('m','e','d','2'),
  66   HB_TAG('f','i','n','2'),
  67   HB_TAG('f','i','n','3'),
  68 };
  69 /* Same order as arabic_features.  Don't need Syriac stuff.*/
  70 enum joining_form_t {
  71   ISOL,
  72   INIT,
  73   MEDI,
  74   FINA,
  75   _NONE
  76 };
  77 static const hb_tag_t
  78 other_features[] =
  79 {
  80   /*
  81    * Other features.
  82    * These features are applied all at once, after reordering.
  83    */
  84   HB_TAG('a','b','v','s'),
  85   HB_TAG('b','l','w','s'),
  86   HB_TAG('h','a','l','n'),
  87   HB_TAG('p','r','e','s'),
  88   HB_TAG('p','s','t','s'),
  89   /* Positioning features, though we don't care about the types. */
  90   HB_TAG('d','i','s','t'),
  91   HB_TAG('a','b','v','m'),
  92   HB_TAG('b','l','w','m'),
  93 };
  94 
  95 static void
  96 setup_syllables (const hb_ot_shape_plan_t *plan,
  97                  hb_font_t *font,
  98                  hb_buffer_t *buffer);
  99 static void
 100 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 101                           hb_font_t *font,
 102                           hb_buffer_t *buffer);
 103 static void
 104 record_rphf (const hb_ot_shape_plan_t *plan,
 105              hb_font_t *font,
 106              hb_buffer_t *buffer);
 107 static void
 108 record_pref (const hb_ot_shape_plan_t *plan,
 109              hb_font_t *font,
 110              hb_buffer_t *buffer);
 111 static void
 112 reorder (const hb_ot_shape_plan_t *plan,
 113          hb_font_t *font,
 114          hb_buffer_t *buffer);
 115 
 116 static void
 117 collect_features_use (hb_ot_shape_planner_t *plan)
 118 {
 119   hb_ot_map_builder_t *map = &plan->map;
 120 
 121   /* Do this before any lookups have been applied. */
 122   map->add_gsub_pause (setup_syllables);
 123 
 124   /* "Default glyph pre-processing group" */
 125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 126   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 127   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
 128   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
 129 
 130   /* "Reordering group" */
 131   map->add_gsub_pause (clear_substitution_flags);
 132   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
 133   map->add_gsub_pause (record_rphf);
 134   map->add_gsub_pause (clear_substitution_flags);
 135   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
 136   map->add_gsub_pause (record_pref);
 137 
 138   /* "Orthographic unit shaping group" */
 139   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 140     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 141 
 142   map->add_gsub_pause (reorder);
 143 
 144   /* "Topographical features" */
 145   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
 146     map->add_feature (arabic_features[i], 1, F_NONE);
 147   map->add_gsub_pause (NULL);
 148 
 149   /* "Standard typographic presentation" and "Positional feature application" */
 150   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 151     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 152 }
 153 
 154 struct use_shape_plan_t
 155 {
 156   ASSERT_POD ();
 157 
 158   hb_mask_t rphf_mask;
 159 
 160   arabic_shape_plan_t *arabic_plan;
 161 };
 162 
 163 static bool
 164 has_arabic_joining (hb_script_t script)
 165 {
 166   /* List of scripts that have data in arabic-table. */
 167   switch ((int) script)
 168   {
 169     /* Unicode-1.1 additions */
 170     case HB_SCRIPT_ARABIC:
 171 
 172     /* Unicode-3.0 additions */
 173     case HB_SCRIPT_MONGOLIAN:
 174     case HB_SCRIPT_SYRIAC:
 175 
 176     /* Unicode-5.0 additions */
 177     case HB_SCRIPT_NKO:
 178     case HB_SCRIPT_PHAGS_PA:
 179 
 180     /* Unicode-6.0 additions */
 181     case HB_SCRIPT_MANDAIC:
 182 
 183     /* Unicode-7.0 additions */
 184     case HB_SCRIPT_MANICHAEAN:
 185     case HB_SCRIPT_PSALTER_PAHLAVI:
 186 
 187       return true;
 188 
 189     default:
 190       return false;
 191   }
 192 }
 193 
 194 static void *
 195 data_create_use (const hb_ot_shape_plan_t *plan)
 196 {
 197   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
 198   if (unlikely (!use_plan))
 199     return NULL;
 200 
 201   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
 202 
 203   if (has_arabic_joining (plan->props.script))
 204   {
 205     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
 206     if (unlikely (!use_plan->arabic_plan))
 207     {
 208       free (use_plan);
 209       return NULL;
 210     }
 211   }
 212 
 213   return use_plan;
 214 }
 215 
 216 static void
 217 data_destroy_use (void *data)
 218 {
 219   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
 220 
 221   if (use_plan->arabic_plan)
 222     data_destroy_arabic (use_plan->arabic_plan);
 223 
 224   free (data);
 225 }
 226 
 227 enum syllable_type_t {
 228   independent_cluster,
 229   virama_terminated_cluster,
 230   consonant_cluster,
 231   vowel_cluster,
 232   number_joiner_terminated_cluster,
 233   numeral_cluster,
 234   symbol_cluster,
 235   broken_cluster,
 236 };
 237 
 238 #include "hb-ot-shape-complex-use-machine.hh"
 239 
 240 
 241 static void
 242 setup_masks_use (const hb_ot_shape_plan_t *plan,
 243                  hb_buffer_t              *buffer,
 244                  hb_font_t                *font HB_UNUSED)
 245 {
 246   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 247 
 248   /* Do this before allocating use_category(). */
 249   if (use_plan->arabic_plan)
 250   {
 251     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
 252   }
 253 
 254   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
 255 
 256   /* We cannot setup masks here.  We save information about characters
 257    * and setup masks later on in a pause-callback. */
 258 
 259   unsigned int count = buffer->len;
 260   hb_glyph_info_t *info = buffer->info;
 261   for (unsigned int i = 0; i < count; i++)
 262     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
 263 }
 264 
 265 static void
 266 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
 267                  hb_buffer_t *buffer)
 268 {
 269   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 270 
 271   hb_mask_t mask = use_plan->rphf_mask;
 272   if (!mask) return;
 273 
 274   hb_glyph_info_t *info = buffer->info;
 275 
 276   foreach_syllable (buffer, start, end)
 277   {
 278     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
 279     for (unsigned int i = start; i < start + limit; i++)
 280       info[i].mask |= mask;
 281   }
 282 }
 283 
 284 static void
 285 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
 286                            hb_buffer_t *buffer)
 287 {
 288 
 289   ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
 290   hb_mask_t masks[4], all_masks = 0;
 291   for (unsigned int i = 0; i < 4; i++)
 292   {
 293     masks[i] = plan->map.get_1_mask (arabic_features[i]);
 294     if (masks[i] == plan->map.get_global_mask ())
 295       masks[i] = 0;
 296     all_masks |= masks[i];
 297   }
 298   if (!all_masks)
 299     return;
 300   hb_mask_t other_masks = ~all_masks;
 301 
 302   unsigned int last_start = 0;
 303   joining_form_t last_form = _NONE;
 304   hb_glyph_info_t *info = buffer->info;
 305   foreach_syllable (buffer, start, end)
 306   {
 307     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
 308     switch (syllable_type)
 309     {
 310       case independent_cluster:
 311       case symbol_cluster:
 312         /* These don't join.  Nothing to do. */
 313         last_form = _NONE;
 314         break;
 315 
 316       case virama_terminated_cluster:
 317       case consonant_cluster:
 318       case vowel_cluster:
 319       case number_joiner_terminated_cluster:
 320       case numeral_cluster:
 321       case broken_cluster:
 322 
 323         bool join = last_form == FINA || last_form == ISOL;
 324 
 325         if (join)
 326         {
 327           /* Fixup previous syllable's form. */
 328           last_form = last_form == FINA ? MEDI : INIT;
 329           for (unsigned int i = last_start; i < start; i++)
 330             info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 331         }
 332 
 333         /* Form for this syllable. */
 334         last_form = join ? FINA : ISOL;
 335         for (unsigned int i = start; i < end; i++)
 336           info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 337 
 338         break;
 339     }
 340 
 341     last_start = start;
 342   }
 343 }
 344 
 345 static void
 346 setup_syllables (const hb_ot_shape_plan_t *plan,
 347                  hb_font_t *font HB_UNUSED,
 348                  hb_buffer_t *buffer)
 349 {
 350   find_syllables (buffer);
 351   setup_rphf_mask (plan, buffer);
 352   setup_topographical_masks (plan, buffer);
 353 }
 354 
 355 static void
 356 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 357                           hb_font_t *font HB_UNUSED,
 358                           hb_buffer_t *buffer)
 359 {
 360   hb_glyph_info_t *info = buffer->info;
 361   unsigned int count = buffer->len;
 362   for (unsigned int i = 0; i < count; i++)
 363     _hb_glyph_info_clear_substituted_and_ligated_and_multiplied (&info[i]);
 364 }
 365 
 366 static void
 367 record_rphf (const hb_ot_shape_plan_t *plan,
 368              hb_font_t *font,
 369              hb_buffer_t *buffer)
 370 {
 371   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 372 
 373   hb_mask_t mask = use_plan->rphf_mask;
 374   if (!mask) return;
 375   hb_glyph_info_t *info = buffer->info;
 376 
 377   foreach_syllable (buffer, start, end)
 378   {
 379     /* Mark a substituted repha as USE_R. */
 380     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
 381       if (_hb_glyph_info_substituted (&info[i]))
 382       {
 383         info[i].use_category() = USE_R;
 384         break;
 385       }
 386   }
 387 }
 388 
 389 static void
 390 record_pref (const hb_ot_shape_plan_t *plan,
 391              hb_font_t *font,
 392              hb_buffer_t *buffer)
 393 {
 394   hb_glyph_info_t *info = buffer->info;
 395 
 396   foreach_syllable (buffer, start, end)
 397   {
 398     /* Mark a substituted pref as VPre, as they behave the same way. */
 399     for (unsigned int i = start; i < end; i++)
 400       if (_hb_glyph_info_substituted (&info[i]))
 401       {
 402         info[i].use_category() = USE_VPre;
 403         break;
 404       }
 405   }
 406 }
 407 
 408 static void
 409 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
 410 {
 411   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 412   /* Only a few syllable types need reordering. */
 413   if (unlikely (!(FLAG_SAFE (syllable_type) &
 414                   (FLAG (virama_terminated_cluster) |
 415                    FLAG (consonant_cluster) |
 416                    FLAG (vowel_cluster) |
 417                    FLAG (broken_cluster) |
 418                    0))))
 419     return;
 420 
 421   hb_glyph_info_t *info = buffer->info;
 422 
 423 #define HALANT_FLAGS FLAG(USE_H)
 424 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB) | FLAG (USE_IV))
 425 
 426   /* Move things forward. */
 427   if (info[start].use_category() == USE_R && end - start > 1)
 428   {
 429     /* Got a repha.  Reorder it to after first base, before first halant. */
 430     for (unsigned int i = start + 1; i < end; i++)
 431       if (FLAG_UNSAFE (info[i].use_category()) & (HALANT_FLAGS | BASE_FLAGS))
 432       {
 433         /* If we hit a halant, move before it; otherwise it's a base: move to it's
 434          * place, and shift things in between backward. */
 435 
 436         if (info[i].use_category() == USE_H)
 437           i--;
 438 
 439         buffer->merge_clusters (start, i + 1);
 440         hb_glyph_info_t t = info[start];
 441         memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
 442         info[i] = t;
 443 
 444         break;
 445       }
 446   }
 447 
 448   /* Move things back. */
 449   unsigned int j = end;
 450   for (unsigned int i = start; i < end; i++)
 451   {
 452     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
 453     if (flag & (HALANT_FLAGS | BASE_FLAGS))
 454     {
 455       /* If we hit a halant, move before it; otherwise it's a base: move to it's
 456        * place, and shift things in between backward. */
 457       if (info[i].use_category() == USE_H)
 458         j = i + 1;
 459       else
 460         j = i;
 461     }
 462     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
 463              /* Only move the first component of a MultipleSubst. */
 464              0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
 465              j < i)
 466     {
 467       buffer->merge_clusters (j, i + 1);
 468       hb_glyph_info_t t = info[i];
 469       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
 470       info[j] = t;
 471     }
 472   }
 473 }
 474 
 475 static inline void
 476 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 477                        hb_font_t *font,
 478                        hb_buffer_t *buffer)
 479 {
 480   /* Note: This loop is extra overhead, but should not be measurable. */
 481   bool has_broken_syllables = false;
 482   unsigned int count = buffer->len;
 483   hb_glyph_info_t *info = buffer->info;
 484   for (unsigned int i = 0; i < count; i++)
 485     if ((info[i].syllable() & 0x0F) == broken_cluster)
 486     {
 487       has_broken_syllables = true;
 488       break;
 489     }
 490   if (likely (!has_broken_syllables))
 491     return;
 492 
 493 
 494   hb_codepoint_t dottedcircle_glyph;
 495   if (!font->get_glyph (0x25CCu, 0, &dottedcircle_glyph))
 496     return;
 497 
 498   hb_glyph_info_t dottedcircle = {0};
 499   if (!font->get_glyph (0x25CCu, 0, &dottedcircle.codepoint))
 500     return;
 501   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
 502 
 503   buffer->clear_output ();
 504 
 505   buffer->idx = 0;
 506 
 507   unsigned int last_syllable = 0;
 508   while (buffer->idx < buffer->len)
 509   {
 510     unsigned int syllable = buffer->cur().syllable();
 511     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 512     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 513     {
 514       last_syllable = syllable;
 515 
 516       hb_glyph_info_t info = dottedcircle;
 517       info.cluster = buffer->cur().cluster;
 518       info.mask = buffer->cur().mask;
 519       info.syllable() = buffer->cur().syllable();
 520       /* TODO Set glyph_props? */
 521 
 522       /* Insert dottedcircle after possible Repha. */
 523       while (buffer->idx < buffer->len &&
 524              last_syllable == buffer->cur().syllable() &&
 525              buffer->cur().use_category() == USE_R)
 526         buffer->next_glyph ();
 527 
 528       buffer->output_info (info);
 529     }
 530     else
 531       buffer->next_glyph ();
 532   }
 533 
 534   buffer->swap_buffers ();
 535 }
 536 
 537 static void
 538 reorder (const hb_ot_shape_plan_t *plan,
 539          hb_font_t *font,
 540          hb_buffer_t *buffer)
 541 {
 542   insert_dotted_circles (plan, font, buffer);
 543 
 544   hb_glyph_info_t *info = buffer->info;
 545 
 546   foreach_syllable (buffer, start, end)
 547     reorder_syllable (buffer, start, end);
 548 
 549   /* Zero syllables now... */
 550   unsigned int count = buffer->len;
 551   for (unsigned int i = 0; i < count; i++)
 552     info[i].syllable() = 0;
 553 
 554   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
 555 }
 556 
 557 static bool
 558 compose_use (const hb_ot_shape_normalize_context_t *c,
 559              hb_codepoint_t  a,
 560              hb_codepoint_t  b,
 561              hb_codepoint_t *ab)
 562 {
 563   /* Avoid recomposing split matras. */
 564   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 565     return false;
 566 
 567   return c->unicode->compose (a, b, ab);
 568 }
 569 
 570 
 571 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
 572 {
 573   "use",
 574   collect_features_use,
 575   NULL, /* override_features */
 576   data_create_use,
 577   data_destroy_use,
 578   NULL, /* preprocess_text */
 579   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 580   NULL, /* decompose */
 581   compose_use,
 582   setup_masks_use,
 583   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
 584   false, /* fallback_position */
 585 };