1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2015  Mozilla Foundation.
  32  * Copyright © 2015  Google, Inc.
  33  *
  34  *  This is part of HarfBuzz, a text shaping library.
  35  *
  36  * Permission is hereby granted, without written agreement and without
  37  * license or royalty fees, to use, copy, modify, and distribute this
  38  * software and its documentation for any purpose, provided that the
  39  * above copyright notice and the following two paragraphs appear in
  40  * all copies of this software.
  41  *
  42  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  43  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  44  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  45  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  46  * DAMAGE.
  47  *
  48  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  49  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  50  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  51  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  52  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  53  *
  54  * Mozilla Author(s): Jonathan Kew
  55  * Google Author(s): Behdad Esfahbod
  56  */
  57 
  58 #include "hb-ot-shape-complex-use-private.hh"
  59 #include "hb-ot-shape-complex-arabic-private.hh"
  60 
  61 /* buffer var allocations */
  62 #define use_category() complex_var_u8_0()
  63 
  64 
  65 /*
  66  * Universal Shaping Engine.
  67  * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
  68  */
  69 
  70 static const hb_tag_t
  71 basic_features[] =
  72 {
  73   /*
  74    * Basic features.
  75    * These features are applied all at once, before reordering.
  76    */
  77   HB_TAG('r','k','r','f'),
  78   HB_TAG('a','b','v','f'),
  79   HB_TAG('b','l','w','f'),
  80   HB_TAG('h','a','l','f'),
  81   HB_TAG('p','s','t','f'),
  82   HB_TAG('v','a','t','u'),
  83   HB_TAG('c','j','c','t'),
  84 };
  85 static const hb_tag_t
  86 arabic_features[] =
  87 {
  88   HB_TAG('i','s','o','l'),
  89   HB_TAG('i','n','i','t'),
  90   HB_TAG('m','e','d','i'),
  91   HB_TAG('f','i','n','a'),
  92   /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
  93    * does.  These are only used in Syriac spec. */
  94   HB_TAG('m','e','d','2'),
  95   HB_TAG('f','i','n','2'),
  96   HB_TAG('f','i','n','3'),
  97 };
  98 /* Same order as arabic_features.  Don't need Syriac stuff.*/
  99 enum joining_form_t {
 100   ISOL,
 101   INIT,
 102   MEDI,
 103   FINA,
 104   _NONE
 105 };
 106 static const hb_tag_t
 107 other_features[] =
 108 {
 109   /*
 110    * Other features.
 111    * These features are applied all at once, after reordering.
 112    */
 113   HB_TAG('a','b','v','s'),
 114   HB_TAG('b','l','w','s'),
 115   HB_TAG('h','a','l','n'),
 116   HB_TAG('p','r','e','s'),
 117   HB_TAG('p','s','t','s'),
 118   /* Positioning features, though we don't care about the types. */
 119   HB_TAG('d','i','s','t'),
 120   HB_TAG('a','b','v','m'),
 121   HB_TAG('b','l','w','m'),
 122 };
 123 
 124 static void
 125 setup_syllables (const hb_ot_shape_plan_t *plan,
 126                  hb_font_t *font,
 127                  hb_buffer_t *buffer);
 128 static void
 129 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 130                           hb_font_t *font,
 131                           hb_buffer_t *buffer);
 132 static void
 133 record_rphf (const hb_ot_shape_plan_t *plan,
 134              hb_font_t *font,
 135              hb_buffer_t *buffer);
 136 static void
 137 record_pref (const hb_ot_shape_plan_t *plan,
 138              hb_font_t *font,
 139              hb_buffer_t *buffer);
 140 static void
 141 reorder (const hb_ot_shape_plan_t *plan,
 142          hb_font_t *font,
 143          hb_buffer_t *buffer);
 144 
 145 static void
 146 collect_features_use (hb_ot_shape_planner_t *plan)
 147 {
 148   hb_ot_map_builder_t *map = &plan->map;
 149 
 150   /* Do this before any lookups have been applied. */
 151   map->add_gsub_pause (setup_syllables);
 152 
 153   /* "Default glyph pre-processing group" */
 154   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 155   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 156   map->add_global_bool_feature (HB_TAG('n','u','k','t'));
 157   map->add_global_bool_feature (HB_TAG('a','k','h','n'));
 158 
 159   /* "Reordering group" */
 160   map->add_gsub_pause (clear_substitution_flags);
 161   map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
 162   map->add_gsub_pause (record_rphf);
 163   map->add_gsub_pause (clear_substitution_flags);
 164   map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
 165   map->add_gsub_pause (record_pref);
 166 
 167   /* "Orthographic unit shaping group" */
 168   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 169     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 170 
 171   map->add_gsub_pause (reorder);
 172 
 173   /* "Topographical features" */
 174   for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
 175     map->add_feature (arabic_features[i], 1, F_NONE);
 176   map->add_gsub_pause (NULL);
 177 
 178   /* "Standard typographic presentation" and "Positional feature application" */
 179   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 180     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 181 }
 182 
 183 struct use_shape_plan_t
 184 {
 185   ASSERT_POD ();
 186 
 187   hb_mask_t rphf_mask;
 188 
 189   arabic_shape_plan_t *arabic_plan;
 190 };
 191 
 192 static bool
 193 has_arabic_joining (hb_script_t script)
 194 {
 195   /* List of scripts that have data in arabic-table. */
 196   switch ((int) script)
 197   {
 198     /* Unicode-1.1 additions */
 199     case HB_SCRIPT_ARABIC:
 200 
 201     /* Unicode-3.0 additions */
 202     case HB_SCRIPT_MONGOLIAN:
 203     case HB_SCRIPT_SYRIAC:
 204 
 205     /* Unicode-5.0 additions */
 206     case HB_SCRIPT_NKO:
 207     case HB_SCRIPT_PHAGS_PA:
 208 
 209     /* Unicode-6.0 additions */
 210     case HB_SCRIPT_MANDAIC:
 211 
 212     /* Unicode-7.0 additions */
 213     case HB_SCRIPT_MANICHAEAN:
 214     case HB_SCRIPT_PSALTER_PAHLAVI:
 215 
 216       return true;
 217 
 218     default:
 219       return false;
 220   }
 221 }
 222 
 223 static void *
 224 data_create_use (const hb_ot_shape_plan_t *plan)
 225 {
 226   use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
 227   if (unlikely (!use_plan))
 228     return NULL;
 229 
 230   use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
 231 
 232   if (has_arabic_joining (plan->props.script))
 233   {
 234     use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
 235     if (unlikely (!use_plan->arabic_plan))
 236     {
 237       free (use_plan);
 238       return NULL;
 239     }
 240   }
 241 
 242   return use_plan;
 243 }
 244 
 245 static void
 246 data_destroy_use (void *data)
 247 {
 248   use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
 249 
 250   if (use_plan->arabic_plan)
 251     data_destroy_arabic (use_plan->arabic_plan);
 252 
 253   free (data);
 254 }
 255 
 256 enum syllable_type_t {
 257   independent_cluster,
 258   virama_terminated_cluster,
 259   consonant_cluster,
 260   vowel_cluster,
 261   number_joiner_terminated_cluster,
 262   numeral_cluster,
 263   symbol_cluster,
 264   broken_cluster,
 265 };
 266 
 267 #include "hb-ot-shape-complex-use-machine.hh"
 268 
 269 
 270 static void
 271 setup_masks_use (const hb_ot_shape_plan_t *plan,
 272                  hb_buffer_t              *buffer,
 273                  hb_font_t                *font HB_UNUSED)
 274 {
 275   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 276 
 277   /* Do this before allocating use_category(). */
 278   if (use_plan->arabic_plan)
 279   {
 280     setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
 281   }
 282 
 283   HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
 284 
 285   /* We cannot setup masks here.  We save information about characters
 286    * and setup masks later on in a pause-callback. */
 287 
 288   unsigned int count = buffer->len;
 289   hb_glyph_info_t *info = buffer->info;
 290   for (unsigned int i = 0; i < count; i++)
 291     info[i].use_category() = hb_use_get_categories (info[i].codepoint);
 292 }
 293 
 294 static void
 295 setup_rphf_mask (const hb_ot_shape_plan_t *plan,
 296                  hb_buffer_t *buffer)
 297 {
 298   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 299 
 300   hb_mask_t mask = use_plan->rphf_mask;
 301   if (!mask) return;
 302 
 303   hb_glyph_info_t *info = buffer->info;
 304 
 305   foreach_syllable (buffer, start, end)
 306   {
 307     unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
 308     for (unsigned int i = start; i < start + limit; i++)
 309       info[i].mask |= mask;
 310   }
 311 }
 312 
 313 static void
 314 setup_topographical_masks (const hb_ot_shape_plan_t *plan,
 315                            hb_buffer_t *buffer)
 316 {
 317 
 318   ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
 319   hb_mask_t masks[4], all_masks = 0;
 320   for (unsigned int i = 0; i < 4; i++)
 321   {
 322     masks[i] = plan->map.get_1_mask (arabic_features[i]);
 323     if (masks[i] == plan->map.get_global_mask ())
 324       masks[i] = 0;
 325     all_masks |= masks[i];
 326   }
 327   if (!all_masks)
 328     return;
 329   hb_mask_t other_masks = ~all_masks;
 330 
 331   unsigned int last_start = 0;
 332   joining_form_t last_form = _NONE;
 333   hb_glyph_info_t *info = buffer->info;
 334   foreach_syllable (buffer, start, end)
 335   {
 336     syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
 337     switch (syllable_type)
 338     {
 339       case independent_cluster:
 340       case symbol_cluster:
 341         /* These don't join.  Nothing to do. */
 342         last_form = _NONE;
 343         break;
 344 
 345       case virama_terminated_cluster:
 346       case consonant_cluster:
 347       case vowel_cluster:
 348       case number_joiner_terminated_cluster:
 349       case numeral_cluster:
 350       case broken_cluster:
 351 
 352         bool join = last_form == FINA || last_form == ISOL;
 353 
 354         if (join)
 355         {
 356           /* Fixup previous syllable's form. */
 357           last_form = last_form == FINA ? MEDI : INIT;
 358           for (unsigned int i = last_start; i < start; i++)
 359             info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 360         }
 361 
 362         /* Form for this syllable. */
 363         last_form = join ? FINA : ISOL;
 364         for (unsigned int i = start; i < end; i++)
 365           info[i].mask = (info[i].mask & other_masks) | masks[last_form];
 366 
 367         break;
 368     }
 369 
 370     last_start = start;
 371   }
 372 }
 373 
 374 static void
 375 setup_syllables (const hb_ot_shape_plan_t *plan,
 376                  hb_font_t *font HB_UNUSED,
 377                  hb_buffer_t *buffer)
 378 {
 379   find_syllables (buffer);
 380   setup_rphf_mask (plan, buffer);
 381   setup_topographical_masks (plan, buffer);
 382 }
 383 
 384 static void
 385 clear_substitution_flags (const hb_ot_shape_plan_t *plan,
 386                           hb_font_t *font HB_UNUSED,
 387                           hb_buffer_t *buffer)
 388 {
 389   hb_glyph_info_t *info = buffer->info;
 390   unsigned int count = buffer->len;
 391   for (unsigned int i = 0; i < count; i++)
 392     _hb_glyph_info_clear_substituted_and_ligated_and_multiplied (&info[i]);
 393 }
 394 
 395 static void
 396 record_rphf (const hb_ot_shape_plan_t *plan,
 397              hb_font_t *font,
 398              hb_buffer_t *buffer)
 399 {
 400   const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
 401 
 402   hb_mask_t mask = use_plan->rphf_mask;
 403   if (!mask) return;
 404   hb_glyph_info_t *info = buffer->info;
 405 
 406   foreach_syllable (buffer, start, end)
 407   {
 408     /* Mark a substituted repha as USE_R. */
 409     for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
 410       if (_hb_glyph_info_substituted (&info[i]))
 411       {
 412         info[i].use_category() = USE_R;
 413         break;
 414       }
 415   }
 416 }
 417 
 418 static void
 419 record_pref (const hb_ot_shape_plan_t *plan,
 420              hb_font_t *font,
 421              hb_buffer_t *buffer)
 422 {
 423   hb_glyph_info_t *info = buffer->info;
 424 
 425   foreach_syllable (buffer, start, end)
 426   {
 427     /* Mark a substituted pref as VPre, as they behave the same way. */
 428     for (unsigned int i = start; i < end; i++)
 429       if (_hb_glyph_info_substituted (&info[i]))
 430       {
 431         info[i].use_category() = USE_VPre;
 432         break;
 433       }
 434   }
 435 }
 436 
 437 static void
 438 reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
 439 {
 440   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 441   /* Only a few syllable types need reordering. */
 442   if (unlikely (!(FLAG_SAFE (syllable_type) &
 443                   (FLAG (virama_terminated_cluster) |
 444                    FLAG (consonant_cluster) |
 445                    FLAG (vowel_cluster) |
 446                    FLAG (broken_cluster) |
 447                    0))))
 448     return;
 449 
 450   hb_glyph_info_t *info = buffer->info;
 451 
 452 #define HALANT_FLAGS FLAG(USE_H)
 453 #define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB) | FLAG (USE_IV))
 454 
 455   /* Move things forward. */
 456   if (info[start].use_category() == USE_R && end - start > 1)
 457   {
 458     /* Got a repha.  Reorder it to after first base, before first halant. */
 459     for (unsigned int i = start + 1; i < end; i++)
 460       if (FLAG_UNSAFE (info[i].use_category()) & (HALANT_FLAGS | BASE_FLAGS))
 461       {
 462         /* If we hit a halant, move before it; otherwise it's a base: move to it's
 463          * place, and shift things in between backward. */
 464 
 465         if (info[i].use_category() == USE_H)
 466           i--;
 467 
 468         buffer->merge_clusters (start, i + 1);
 469         hb_glyph_info_t t = info[start];
 470         memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
 471         info[i] = t;
 472 
 473         break;
 474       }
 475   }
 476 
 477   /* Move things back. */
 478   unsigned int j = end;
 479   for (unsigned int i = start; i < end; i++)
 480   {
 481     uint32_t flag = FLAG_UNSAFE (info[i].use_category());
 482     if (flag & (HALANT_FLAGS | BASE_FLAGS))
 483     {
 484       /* If we hit a halant, move before it; otherwise it's a base: move to it's
 485        * place, and shift things in between backward. */
 486       if (info[i].use_category() == USE_H)
 487         j = i + 1;
 488       else
 489         j = i;
 490     }
 491     else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
 492              /* Only move the first component of a MultipleSubst. */
 493              0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
 494              j < i)
 495     {
 496       buffer->merge_clusters (j, i + 1);
 497       hb_glyph_info_t t = info[i];
 498       memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
 499       info[j] = t;
 500     }
 501   }
 502 }
 503 
 504 static inline void
 505 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 506                        hb_font_t *font,
 507                        hb_buffer_t *buffer)
 508 {
 509   /* Note: This loop is extra overhead, but should not be measurable. */
 510   bool has_broken_syllables = false;
 511   unsigned int count = buffer->len;
 512   hb_glyph_info_t *info = buffer->info;
 513   for (unsigned int i = 0; i < count; i++)
 514     if ((info[i].syllable() & 0x0F) == broken_cluster)
 515     {
 516       has_broken_syllables = true;
 517       break;
 518     }
 519   if (likely (!has_broken_syllables))
 520     return;
 521 
 522 
 523   hb_codepoint_t dottedcircle_glyph;
 524   if (!font->get_glyph (0x25CCu, 0, &dottedcircle_glyph))
 525     return;
 526 
 527   hb_glyph_info_t dottedcircle = {0};
 528   if (!font->get_glyph (0x25CCu, 0, &dottedcircle.codepoint))
 529     return;
 530   dottedcircle.use_category() = hb_use_get_categories (0x25CC);
 531 
 532   buffer->clear_output ();
 533 
 534   buffer->idx = 0;
 535 
 536   unsigned int last_syllable = 0;
 537   while (buffer->idx < buffer->len)
 538   {
 539     unsigned int syllable = buffer->cur().syllable();
 540     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 541     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 542     {
 543       last_syllable = syllable;
 544 
 545       hb_glyph_info_t info = dottedcircle;
 546       info.cluster = buffer->cur().cluster;
 547       info.mask = buffer->cur().mask;
 548       info.syllable() = buffer->cur().syllable();
 549       /* TODO Set glyph_props? */
 550 
 551       /* Insert dottedcircle after possible Repha. */
 552       while (buffer->idx < buffer->len &&
 553              last_syllable == buffer->cur().syllable() &&
 554              buffer->cur().use_category() == USE_R)
 555         buffer->next_glyph ();
 556 
 557       buffer->output_info (info);
 558     }
 559     else
 560       buffer->next_glyph ();
 561   }
 562 
 563   buffer->swap_buffers ();
 564 }
 565 
 566 static void
 567 reorder (const hb_ot_shape_plan_t *plan,
 568          hb_font_t *font,
 569          hb_buffer_t *buffer)
 570 {
 571   insert_dotted_circles (plan, font, buffer);
 572 
 573   hb_glyph_info_t *info = buffer->info;
 574 
 575   foreach_syllable (buffer, start, end)
 576     reorder_syllable (buffer, start, end);
 577 
 578   /* Zero syllables now... */
 579   unsigned int count = buffer->len;
 580   for (unsigned int i = 0; i < count; i++)
 581     info[i].syllable() = 0;
 582 
 583   HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
 584 }
 585 
 586 static bool
 587 compose_use (const hb_ot_shape_normalize_context_t *c,
 588              hb_codepoint_t  a,
 589              hb_codepoint_t  b,
 590              hb_codepoint_t *ab)
 591 {
 592   /* Avoid recomposing split matras. */
 593   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 594     return false;
 595 
 596   return c->unicode->compose (a, b, ab);
 597 }
 598 
 599 
 600 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
 601 {
 602   "use",
 603   collect_features_use,
 604   NULL, /* override_features */
 605   data_create_use,
 606   data_destroy_use,
 607   NULL, /* preprocess_text */
 608   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 609   NULL, /* decompose */
 610   compose_use,
 611   setup_masks_use,
 612   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
 613   false, /* fallback_position */
 614 };