1 /*
   2  * Copyright © 2011,2012,2013  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #include "hb-ot-shape-complex-indic-private.hh"
  28 
  29 /* buffer var allocations */
  30 #define myanmar_category() complex_var_u8_0() /* myanmar_category_t */
  31 #define myanmar_position() complex_var_u8_1() /* myanmar_position_t */
  32 
  33 
  34 /*
  35  * Myanmar shaper.
  36  */
  37 
  38 static const hb_tag_t
  39 basic_features[] =
  40 {
  41   /*
  42    * Basic features.
  43    * These features are applied in order, one at a time, after initial_reordering.
  44    */
  45   HB_TAG('r','p','h','f'),
  46   HB_TAG('p','r','e','f'),
  47   HB_TAG('b','l','w','f'),
  48   HB_TAG('p','s','t','f'),
  49 };
  50 static const hb_tag_t
  51 other_features[] =
  52 {
  53   /*
  54    * Other features.
  55    * These features are applied all at once, after final_reordering.
  56    */
  57   HB_TAG('p','r','e','s'),
  58   HB_TAG('a','b','v','s'),
  59   HB_TAG('b','l','w','s'),
  60   HB_TAG('p','s','t','s'),
  61   /* Positioning features, though we don't care about the types. */
  62   HB_TAG('d','i','s','t'),
  63   /* Pre-release version of Windows 8 Myanmar font had abvm,blwm
  64    * features.  The released Windows 8 version of the font (as well
  65    * as the released spec) used 'mark' instead.  The Windows 8
  66    * shaper however didn't apply 'mark' but did apply 'mkmk'.
  67    * Perhaps it applied abvm/blwm.  This was fixed in a Windows 8
  68    * update, so now it applies mark/mkmk.  We are guessing that
  69    * it still applies abvm/blwm too.
  70    */
  71   HB_TAG('a','b','v','m'),
  72   HB_TAG('b','l','w','m'),
  73 };
  74 
  75 static void
  76 setup_syllables (const hb_ot_shape_plan_t *plan,
  77                  hb_font_t *font,
  78                  hb_buffer_t *buffer);
  79 static void
  80 initial_reordering (const hb_ot_shape_plan_t *plan,
  81                     hb_font_t *font,
  82                     hb_buffer_t *buffer);
  83 static void
  84 final_reordering (const hb_ot_shape_plan_t *plan,
  85                   hb_font_t *font,
  86                   hb_buffer_t *buffer);
  87 
  88 static void
  89 collect_features_myanmar (hb_ot_shape_planner_t *plan)
  90 {
  91   hb_ot_map_builder_t *map = &plan->map;
  92 
  93   /* Do this before any lookups have been applied. */
  94   map->add_gsub_pause (setup_syllables);
  95 
  96   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
  97   /* The Indic specs do not require ccmp, but we apply it here since if
  98    * there is a use of it, it's typically at the beginning. */
  99   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 100 
 101 
 102   map->add_gsub_pause (initial_reordering);
 103   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 104   {
 105     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 106     map->add_gsub_pause (NULL);
 107   }
 108   map->add_gsub_pause (final_reordering);
 109   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 110     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 111 }
 112 
 113 static void
 114 override_features_myanmar (hb_ot_shape_planner_t *plan)
 115 {
 116   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
 117 }
 118 
 119 
 120 enum syllable_type_t {
 121   consonant_syllable,
 122   punctuation_cluster,
 123   broken_cluster,
 124   non_myanmar_cluster,
 125 };
 126 
 127 #include "hb-ot-shape-complex-myanmar-machine.hh"
 128 
 129 
 130 /* Note: This enum is duplicated in the -machine.rl source file.
 131  * Not sure how to avoid duplication. */
 132 enum myanmar_category_t {
 133   OT_As  = 18, /* Asat */
 134   OT_D   = 19, /* Digits except zero */
 135   OT_D0  = 20, /* Digit zero */
 136   OT_DB  = OT_N, /* Dot below */
 137   OT_GB  = OT_PLACEHOLDER,
 138   OT_MH  = 21, /* Various consonant medial types */
 139   OT_MR  = 22, /* Various consonant medial types */
 140   OT_MW  = 23, /* Various consonant medial types */
 141   OT_MY  = 24, /* Various consonant medial types */
 142   OT_PT  = 25, /* Pwo and other tones */
 143   OT_VAbv = 26,
 144   OT_VBlw = 27,
 145   OT_VPre = 28,
 146   OT_VPst = 29,
 147   OT_VS   = 30, /* Variation selectors */
 148   OT_P    = 31  /* Punctuation */
 149 };
 150 
 151 
 152 static inline bool
 153 is_one_of (const hb_glyph_info_t &info, unsigned int flags)
 154 {
 155   /* If it ligated, all bets are off. */
 156   if (_hb_glyph_info_ligated (&info)) return false;
 157   return !!(FLAG_SAFE (info.myanmar_category()) & flags);
 158 }
 159 
 160 static inline bool
 161 is_consonant (const hb_glyph_info_t &info)
 162 {
 163   return is_one_of (info, CONSONANT_FLAGS);
 164 }
 165 
 166 
 167 static inline void
 168 set_myanmar_properties (hb_glyph_info_t &info)
 169 {
 170   hb_codepoint_t u = info.codepoint;
 171   unsigned int type = hb_indic_get_categories (u);
 172   indic_category_t cat = (indic_category_t) (type & 0x7Fu);
 173   indic_position_t pos = (indic_position_t) (type >> 8);
 174 
 175   /* Myanmar
 176    * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze
 177    */
 178   if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu)))
 179     cat = (indic_category_t) OT_VS;
 180 
 181   switch (u)
 182   {
 183     case 0x104Eu:
 184       cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */
 185       break;
 186 
 187     case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
 188     case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
 189     case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
 190     case 0x25FEu:
 191       cat = (indic_category_t) OT_GB;
 192       break;
 193 
 194     case 0x1004u: case 0x101Bu: case 0x105Au:
 195       cat = (indic_category_t) OT_Ra;
 196       break;
 197 
 198     case 0x1032u: case 0x1036u:
 199       cat = (indic_category_t) OT_A;
 200       break;
 201 
 202     case 0x1039u:
 203       cat = (indic_category_t) OT_H;
 204       break;
 205 
 206     case 0x103Au:
 207       cat = (indic_category_t) OT_As;
 208       break;
 209 
 210     case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
 211     case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
 212     case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
 213     case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
 214     case 0x1097u: case 0x1098u: case 0x1099u:
 215       cat = (indic_category_t) OT_D;
 216       break;
 217 
 218     case 0x1040u:
 219       cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
 220       break;
 221 
 222     case 0x103Eu: case 0x1060u:
 223       cat = (indic_category_t) OT_MH;
 224       break;
 225 
 226     case 0x103Cu:
 227       cat = (indic_category_t) OT_MR;
 228       break;
 229 
 230     case 0x103Du: case 0x1082u:
 231       cat = (indic_category_t) OT_MW;
 232       break;
 233 
 234     case 0x103Bu: case 0x105Eu: case 0x105Fu:
 235       cat = (indic_category_t) OT_MY;
 236       break;
 237 
 238     case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
 239     case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
 240       cat = (indic_category_t) OT_PT;
 241       break;
 242 
 243     case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
 244     case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
 245     case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
 246       cat = (indic_category_t) OT_SM;
 247       break;
 248 
 249     case 0x104Au: case 0x104Bu:
 250       cat = (indic_category_t) OT_P;
 251       break;
 252 
 253     case 0xAA74u: case 0xAA75u: case 0xAA76u:
 254       /* https://github.com/roozbehp/unicode-data/issues/3 */
 255       cat = (indic_category_t) OT_C;
 256       break;
 257   }
 258 
 259   if (cat == OT_M)
 260   {
 261     switch ((int) pos)
 262     {
 263       case POS_PRE_C:   cat = (indic_category_t) OT_VPre;
 264                         pos = POS_PRE_M;                  break;
 265       case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break;
 266       case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break;
 267       case POS_POST_C:  cat = (indic_category_t) OT_VPst; break;
 268     }
 269   }
 270 
 271   info.myanmar_category() = (myanmar_category_t) cat;
 272   info.myanmar_position() = pos;
 273 }
 274 
 275 
 276 
 277 static void
 278 setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
 279                    hb_buffer_t              *buffer,
 280                    hb_font_t                *font HB_UNUSED)
 281 {
 282   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
 283   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
 284 
 285   /* We cannot setup masks here.  We save information about characters
 286    * and setup masks later on in a pause-callback. */
 287 
 288   unsigned int count = buffer->len;
 289   hb_glyph_info_t *info = buffer->info;
 290   for (unsigned int i = 0; i < count; i++)
 291     set_myanmar_properties (info[i]);
 292 }
 293 
 294 static void
 295 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 296                  hb_font_t *font HB_UNUSED,
 297                  hb_buffer_t *buffer)
 298 {
 299   find_syllables (buffer);
 300 }
 301 
 302 static int
 303 compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 304 {
 305   int a = pa->myanmar_position();
 306   int b = pb->myanmar_position();
 307 
 308   return a < b ? -1 : a == b ? 0 : +1;
 309 }
 310 
 311 
 312 /* Rules from:
 313  * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */
 314 
 315 static void
 316 initial_reordering_consonant_syllable (hb_buffer_t *buffer,
 317                                        unsigned int start, unsigned int end)
 318 {
 319   hb_glyph_info_t *info = buffer->info;
 320 
 321   unsigned int base = end;
 322   bool has_reph = false;
 323 
 324   {
 325     unsigned int limit = start;
 326     if (start + 3 <= end &&
 327         info[start  ].myanmar_category() == OT_Ra &&
 328         info[start+1].myanmar_category() == OT_As &&
 329         info[start+2].myanmar_category() == OT_H)
 330     {
 331       limit += 3;
 332       base = start;
 333       has_reph = true;
 334     }
 335 
 336     {
 337       if (!has_reph)
 338         base = limit;
 339 
 340       for (unsigned int i = limit; i < end; i++)
 341         if (is_consonant (info[i]))
 342         {
 343           base = i;
 344           break;
 345         }
 346     }
 347   }
 348 
 349   /* Reorder! */
 350   {
 351     unsigned int i = start;
 352     for (; i < start + (has_reph ? 3 : 0); i++)
 353       info[i].myanmar_position() = POS_AFTER_MAIN;
 354     for (; i < base; i++)
 355       info[i].myanmar_position() = POS_PRE_C;
 356     if (i < end)
 357     {
 358       info[i].myanmar_position() = POS_BASE_C;
 359       i++;
 360     }
 361     indic_position_t pos = POS_AFTER_MAIN;
 362     /* The following loop may be ugly, but it implements all of
 363      * Myanmar reordering! */
 364     for (; i < end; i++)
 365     {
 366       if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
 367       {
 368         info[i].myanmar_position() = POS_PRE_C;
 369         continue;
 370       }
 371       if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
 372       {
 373         continue;
 374       }
 375 
 376       if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
 377       {
 378         pos = POS_BELOW_C;
 379         info[i].myanmar_position() = pos;
 380         continue;
 381       }
 382 
 383       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
 384       {
 385         info[i].myanmar_position() = POS_BEFORE_SUB;
 386         continue;
 387       }
 388       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
 389       {
 390         info[i].myanmar_position() = pos;
 391         continue;
 392       }
 393       if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
 394       {
 395         pos = POS_AFTER_SUB;
 396         info[i].myanmar_position() = pos;
 397         continue;
 398       }
 399       info[i].myanmar_position() = pos;
 400     }
 401   }
 402 
 403   /* Sit tight, rock 'n roll! */
 404   buffer->sort (start, end, compare_myanmar_order);
 405 }
 406 
 407 static void
 408 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 409                              hb_face_t *face,
 410                              hb_buffer_t *buffer,
 411                              unsigned int start, unsigned int end)
 412 {
 413   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 414   switch (syllable_type) {
 415 
 416     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
 417     case consonant_syllable:
 418       initial_reordering_consonant_syllable  (buffer, start, end);
 419       break;
 420 
 421     case punctuation_cluster:
 422     case non_myanmar_cluster:
 423       break;
 424   }
 425 }
 426 
 427 static inline void
 428 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 429                        hb_font_t *font,
 430                        hb_buffer_t *buffer)
 431 {
 432   /* Note: This loop is extra overhead, but should not be measurable. */
 433   bool has_broken_syllables = false;
 434   unsigned int count = buffer->len;
 435   hb_glyph_info_t *info = buffer->info;
 436   for (unsigned int i = 0; i < count; i++)
 437     if ((info[i].syllable() & 0x0F) == broken_cluster)
 438     {
 439       has_broken_syllables = true;
 440       break;
 441     }
 442   if (likely (!has_broken_syllables))
 443     return;
 444 
 445 
 446   hb_codepoint_t dottedcircle_glyph;
 447   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
 448     return;
 449 
 450   hb_glyph_info_t dottedcircle = {0};
 451   dottedcircle.codepoint = 0x25CCu;
 452   set_myanmar_properties (dottedcircle);
 453   dottedcircle.codepoint = dottedcircle_glyph;
 454 
 455   buffer->clear_output ();
 456 
 457   buffer->idx = 0;
 458   unsigned int last_syllable = 0;
 459   while (buffer->idx < buffer->len && !buffer->in_error)
 460   {
 461     unsigned int syllable = buffer->cur().syllable();
 462     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 463     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 464     {
 465       last_syllable = syllable;
 466 
 467       hb_glyph_info_t ginfo = dottedcircle;
 468       ginfo.cluster = buffer->cur().cluster;
 469       ginfo.mask = buffer->cur().mask;
 470       ginfo.syllable() = buffer->cur().syllable();
 471 
 472       buffer->output_info (ginfo);
 473     }
 474     else
 475       buffer->next_glyph ();
 476   }
 477 
 478   buffer->swap_buffers ();
 479 }
 480 
 481 static void
 482 initial_reordering (const hb_ot_shape_plan_t *plan,
 483                     hb_font_t *font,
 484                     hb_buffer_t *buffer)
 485 {
 486   insert_dotted_circles (plan, font, buffer);
 487 
 488   foreach_syllable (buffer, start, end)
 489     initial_reordering_syllable (plan, font->face, buffer, start, end);
 490 }
 491 
 492 static void
 493 final_reordering (const hb_ot_shape_plan_t *plan,
 494                   hb_font_t *font HB_UNUSED,
 495                   hb_buffer_t *buffer)
 496 {
 497   hb_glyph_info_t *info = buffer->info;
 498   unsigned int count = buffer->len;
 499 
 500   /* Zero syllables now... */
 501   for (unsigned int i = 0; i < count; i++)
 502     info[i].syllable() = 0;
 503 
 504   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
 505   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
 506 }
 507 
 508 
 509 /* Uniscribe seems to have a shaper for 'mymr' that is like the
 510  * generic shaper, except that it zeros mark advances GDEF_LATE. */
 511 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old =
 512 {
 513   "default",
 514   NULL, /* collect_features */
 515   NULL, /* override_features */
 516   NULL, /* data_create */
 517   NULL, /* data_destroy */
 518   NULL, /* preprocess_text */
 519   NULL, /* postprocess_glyphs */
 520   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
 521   NULL, /* decompose */
 522   NULL, /* compose */
 523   NULL, /* setup_masks */
 524   NULL, /* disable_otl */
 525   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
 526   true, /* fallback_position */
 527 };
 528 
 529 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
 530 {
 531   "myanmar",
 532   collect_features_myanmar,
 533   override_features_myanmar,
 534   NULL, /* data_create */
 535   NULL, /* data_destroy */
 536   NULL, /* preprocess_text */
 537   NULL, /* postprocess_glyphs */
 538   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 539   NULL, /* decompose */
 540   NULL, /* compose */
 541   setup_masks_myanmar,
 542   NULL, /* disable_otl */
 543   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
 544   false, /* fallback_position */
 545 };