1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2011,2012,2013  Google, Inc.
  32  *
  33  *  This is part of HarfBuzz, a text shaping library.
  34  *
  35  * Permission is hereby granted, without written agreement and without
  36  * license or royalty fees, to use, copy, modify, and distribute this
  37  * software and its documentation for any purpose, provided that the
  38  * above copyright notice and the following two paragraphs appear in
  39  * all copies of this software.
  40  *
  41  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  42  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  43  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  44  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  45  * DAMAGE.
  46  *
  47  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  48  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  49  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  50  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  51  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  52  *
  53  * Google Author(s): Behdad Esfahbod
  54  */
  55 
  56 #include "hb-ot-shape-complex-indic-private.hh"
  57 
  58 /* buffer var allocations */
  59 #define myanmar_category() complex_var_u8_0() /* myanmar_category_t */
  60 #define myanmar_position() complex_var_u8_1() /* myanmar_position_t */
  61 
  62 
  63 /*
  64  * Myanmar shaper.
  65  */
  66 
  67 static const hb_tag_t
  68 basic_features[] =
  69 {
  70   /*
  71    * Basic features.
  72    * These features are applied in order, one at a time, after initial_reordering.
  73    */
  74   HB_TAG('r','p','h','f'),
  75   HB_TAG('p','r','e','f'),
  76   HB_TAG('b','l','w','f'),
  77   HB_TAG('p','s','t','f'),
  78 };
  79 static const hb_tag_t
  80 other_features[] =
  81 {
  82   /*
  83    * Other features.
  84    * These features are applied all at once, after final_reordering.
  85    */
  86   HB_TAG('p','r','e','s'),
  87   HB_TAG('a','b','v','s'),
  88   HB_TAG('b','l','w','s'),
  89   HB_TAG('p','s','t','s'),
  90   /* Positioning features, though we don't care about the types. */
  91   HB_TAG('d','i','s','t'),
  92   /* Pre-release version of Windows 8 Myanmar font had abvm,blwm
  93    * features.  The released Windows 8 version of the font (as well
  94    * as the released spec) used 'mark' instead.  The Windows 8
  95    * shaper however didn't apply 'mark' but did apply 'mkmk'.
  96    * Perhaps it applied abvm/blwm.  This was fixed in a Windows 8
  97    * update, so now it applies mark/mkmk.  We are guessing that
  98    * it still applies abvm/blwm too.
  99    */
 100   HB_TAG('a','b','v','m'),
 101   HB_TAG('b','l','w','m'),
 102 };
 103 
 104 static void
 105 setup_syllables (const hb_ot_shape_plan_t *plan,
 106                  hb_font_t *font,
 107                  hb_buffer_t *buffer);
 108 static void
 109 initial_reordering (const hb_ot_shape_plan_t *plan,
 110                     hb_font_t *font,
 111                     hb_buffer_t *buffer);
 112 static void
 113 final_reordering (const hb_ot_shape_plan_t *plan,
 114                   hb_font_t *font,
 115                   hb_buffer_t *buffer);
 116 
 117 static void
 118 collect_features_myanmar (hb_ot_shape_planner_t *plan)
 119 {
 120   hb_ot_map_builder_t *map = &plan->map;
 121 
 122   /* Do this before any lookups have been applied. */
 123   map->add_gsub_pause (setup_syllables);
 124 
 125   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 126   /* The Indic specs do not require ccmp, but we apply it here since if
 127    * there is a use of it, it's typically at the beginning. */
 128   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 129 
 130 
 131   map->add_gsub_pause (initial_reordering);
 132   for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
 133   {
 134     map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 135     map->add_gsub_pause (NULL);
 136   }
 137   map->add_gsub_pause (final_reordering);
 138   for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
 139     map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
 140 }
 141 
 142 static void
 143 override_features_myanmar (hb_ot_shape_planner_t *plan)
 144 {
 145   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
 146 }
 147 
 148 
 149 enum syllable_type_t {
 150   consonant_syllable,
 151   punctuation_cluster,
 152   broken_cluster,
 153   non_myanmar_cluster,
 154 };
 155 
 156 #include "hb-ot-shape-complex-myanmar-machine.hh"
 157 
 158 
 159 /* Note: This enum is duplicated in the -machine.rl source file.
 160  * Not sure how to avoid duplication. */
 161 enum myanmar_category_t {
 162   OT_As  = 18, /* Asat */
 163   OT_D   = 19, /* Digits except zero */
 164   OT_D0  = 20, /* Digit zero */
 165   OT_DB  = OT_N, /* Dot below */
 166   OT_GB  = OT_PLACEHOLDER,
 167   OT_MH  = 21, /* Various consonant medial types */
 168   OT_MR  = 22, /* Various consonant medial types */
 169   OT_MW  = 23, /* Various consonant medial types */
 170   OT_MY  = 24, /* Various consonant medial types */
 171   OT_PT  = 25, /* Pwo and other tones */
 172   OT_VAbv = 26,
 173   OT_VBlw = 27,
 174   OT_VPre = 28,
 175   OT_VPst = 29,
 176   OT_VS   = 30, /* Variation selectors */
 177   OT_P    = 31  /* Punctuation */
 178 };
 179 
 180 
 181 static inline bool
 182 is_one_of (const hb_glyph_info_t &info, unsigned int flags)
 183 {
 184   /* If it ligated, all bets are off. */
 185   if (_hb_glyph_info_ligated (&info)) return false;
 186   return !!(FLAG_SAFE (info.myanmar_category()) & flags);
 187 }
 188 
 189 static inline bool
 190 is_consonant (const hb_glyph_info_t &info)
 191 {
 192   return is_one_of (info, CONSONANT_FLAGS);
 193 }
 194 
 195 
 196 static inline void
 197 set_myanmar_properties (hb_glyph_info_t &info)
 198 {
 199   hb_codepoint_t u = info.codepoint;
 200   unsigned int type = hb_indic_get_categories (u);
 201   indic_category_t cat = (indic_category_t) (type & 0x7Fu);
 202   indic_position_t pos = (indic_position_t) (type >> 8);
 203 
 204   /* Myanmar
 205    * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze
 206    */
 207   if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu)))
 208     cat = (indic_category_t) OT_VS;
 209 
 210   switch (u)
 211   {
 212     case 0x104Eu:
 213       cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */
 214       break;
 215 
 216     case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
 217     case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
 218     case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
 219     case 0x25FEu:
 220       cat = (indic_category_t) OT_GB;
 221       break;
 222 
 223     case 0x1004u: case 0x101Bu: case 0x105Au:
 224       cat = (indic_category_t) OT_Ra;
 225       break;
 226 
 227     case 0x1032u: case 0x1036u:
 228       cat = (indic_category_t) OT_A;
 229       break;
 230 
 231     case 0x103Au:
 232       cat = (indic_category_t) OT_As;
 233       break;
 234 
 235     case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
 236     case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
 237     case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
 238     case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
 239     case 0x1097u: case 0x1098u: case 0x1099u:
 240       cat = (indic_category_t) OT_D;
 241       break;
 242 
 243     case 0x1040u:
 244       cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
 245       break;
 246 
 247     case 0x103Eu: case 0x1060u:
 248       cat = (indic_category_t) OT_MH;
 249       break;
 250 
 251     case 0x103Cu:
 252       cat = (indic_category_t) OT_MR;
 253       break;
 254 
 255     case 0x103Du: case 0x1082u:
 256       cat = (indic_category_t) OT_MW;
 257       break;
 258 
 259     case 0x103Bu: case 0x105Eu: case 0x105Fu:
 260       cat = (indic_category_t) OT_MY;
 261       break;
 262 
 263     case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
 264     case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
 265       cat = (indic_category_t) OT_PT;
 266       break;
 267 
 268     case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
 269     case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
 270     case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
 271       cat = (indic_category_t) OT_SM;
 272       break;
 273 
 274     case 0x104Au: case 0x104Bu:
 275       cat = (indic_category_t) OT_P;
 276       break;
 277   }
 278 
 279   if (cat == OT_M)
 280   {
 281     switch ((int) pos)
 282     {
 283       case POS_PRE_C:   cat = (indic_category_t) OT_VPre;
 284                         pos = POS_PRE_M;                  break;
 285       case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break;
 286       case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break;
 287       case POS_POST_C:  cat = (indic_category_t) OT_VPst; break;
 288     }
 289   }
 290 
 291   info.myanmar_category() = (myanmar_category_t) cat;
 292   info.myanmar_position() = pos;
 293 }
 294 
 295 
 296 
 297 static void
 298 setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
 299                    hb_buffer_t              *buffer,
 300                    hb_font_t                *font HB_UNUSED)
 301 {
 302   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
 303   HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
 304 
 305   /* We cannot setup masks here.  We save information about characters
 306    * and setup masks later on in a pause-callback. */
 307 
 308   unsigned int count = buffer->len;
 309   hb_glyph_info_t *info = buffer->info;
 310   for (unsigned int i = 0; i < count; i++)
 311     set_myanmar_properties (info[i]);
 312 }
 313 
 314 static void
 315 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 316                  hb_font_t *font HB_UNUSED,
 317                  hb_buffer_t *buffer)
 318 {
 319   find_syllables (buffer);
 320 }
 321 
 322 static int
 323 compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 324 {
 325   int a = pa->myanmar_position();
 326   int b = pb->myanmar_position();
 327 
 328   return a < b ? -1 : a == b ? 0 : +1;
 329 }
 330 
 331 
 332 /* Rules from:
 333  * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */
 334 
 335 static void
 336 initial_reordering_consonant_syllable (hb_buffer_t *buffer,
 337                                        unsigned int start, unsigned int end)
 338 {
 339   hb_glyph_info_t *info = buffer->info;
 340 
 341   unsigned int base = end;
 342   bool has_reph = false;
 343 
 344   {
 345     unsigned int limit = start;
 346     if (start + 3 <= end &&
 347         info[start  ].myanmar_category() == OT_Ra &&
 348         info[start+1].myanmar_category() == OT_As &&
 349         info[start+2].myanmar_category() == OT_H)
 350     {
 351       limit += 3;
 352       base = start;
 353       has_reph = true;
 354     }
 355 
 356     {
 357       if (!has_reph)
 358         base = limit;
 359 
 360       for (unsigned int i = limit; i < end; i++)
 361         if (is_consonant (info[i]))
 362         {
 363           base = i;
 364           break;
 365         }
 366     }
 367   }
 368 
 369   /* Reorder! */
 370   {
 371     unsigned int i = start;
 372     for (; i < start + (has_reph ? 3 : 0); i++)
 373       info[i].myanmar_position() = POS_AFTER_MAIN;
 374     for (; i < base; i++)
 375       info[i].myanmar_position() = POS_PRE_C;
 376     if (i < end)
 377     {
 378       info[i].myanmar_position() = POS_BASE_C;
 379       i++;
 380     }
 381     indic_position_t pos = POS_AFTER_MAIN;
 382     /* The following loop may be ugly, but it implements all of
 383      * Myanmar reordering! */
 384     for (; i < end; i++)
 385     {
 386       if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
 387       {
 388         info[i].myanmar_position() = POS_PRE_C;
 389         continue;
 390       }
 391       if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
 392       {
 393         continue;
 394       }
 395 
 396       if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
 397       {
 398         pos = POS_BELOW_C;
 399         info[i].myanmar_position() = pos;
 400         continue;
 401       }
 402 
 403       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
 404       {
 405         info[i].myanmar_position() = POS_BEFORE_SUB;
 406         continue;
 407       }
 408       if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
 409       {
 410         info[i].myanmar_position() = pos;
 411         continue;
 412       }
 413       if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
 414       {
 415         pos = POS_AFTER_SUB;
 416         info[i].myanmar_position() = pos;
 417         continue;
 418       }
 419       info[i].myanmar_position() = pos;
 420     }
 421   }
 422 
 423   /* Sit tight, rock 'n roll! */
 424   buffer->sort (start, end, compare_myanmar_order);
 425 }
 426 
 427 static void
 428 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 429                              hb_face_t *face,
 430                              hb_buffer_t *buffer,
 431                              unsigned int start, unsigned int end)
 432 {
 433   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 434   switch (syllable_type) {
 435 
 436     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
 437     case consonant_syllable:
 438       initial_reordering_consonant_syllable  (buffer, start, end);
 439       break;
 440 
 441     case punctuation_cluster:
 442     case non_myanmar_cluster:
 443       break;
 444   }
 445 }
 446 
 447 static inline void
 448 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 449                        hb_font_t *font,
 450                        hb_buffer_t *buffer)
 451 {
 452   /* Note: This loop is extra overhead, but should not be measurable. */
 453   bool has_broken_syllables = false;
 454   unsigned int count = buffer->len;
 455   hb_glyph_info_t *info = buffer->info;
 456   for (unsigned int i = 0; i < count; i++)
 457     if ((info[i].syllable() & 0x0F) == broken_cluster)
 458     {
 459       has_broken_syllables = true;
 460       break;
 461     }
 462   if (likely (!has_broken_syllables))
 463     return;
 464 
 465 
 466   hb_codepoint_t dottedcircle_glyph;
 467   if (!font->get_glyph (0x25CCu, 0, &dottedcircle_glyph))
 468     return;
 469 
 470   hb_glyph_info_t dottedcircle = {0};
 471   dottedcircle.codepoint = 0x25CCu;
 472   set_myanmar_properties (dottedcircle);
 473   dottedcircle.codepoint = dottedcircle_glyph;
 474 
 475   buffer->clear_output ();
 476 
 477   buffer->idx = 0;
 478   unsigned int last_syllable = 0;
 479   while (buffer->idx < buffer->len)
 480   {
 481     unsigned int syllable = buffer->cur().syllable();
 482     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 483     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 484     {
 485       last_syllable = syllable;
 486 
 487       hb_glyph_info_t info = dottedcircle;
 488       info.cluster = buffer->cur().cluster;
 489       info.mask = buffer->cur().mask;
 490       info.syllable() = buffer->cur().syllable();
 491 
 492       buffer->output_info (info);
 493     }
 494     else
 495       buffer->next_glyph ();
 496   }
 497 
 498   buffer->swap_buffers ();
 499 }
 500 
 501 static void
 502 initial_reordering (const hb_ot_shape_plan_t *plan,
 503                     hb_font_t *font,
 504                     hb_buffer_t *buffer)
 505 {
 506   insert_dotted_circles (plan, font, buffer);
 507 
 508   foreach_syllable (buffer, start, end)
 509     initial_reordering_syllable (plan, font->face, buffer, start, end);
 510 }
 511 
 512 static void
 513 final_reordering (const hb_ot_shape_plan_t *plan,
 514                   hb_font_t *font HB_UNUSED,
 515                   hb_buffer_t *buffer)
 516 {
 517   hb_glyph_info_t *info = buffer->info;
 518   unsigned int count = buffer->len;
 519 
 520   /* Zero syllables now... */
 521   for (unsigned int i = 0; i < count; i++)
 522     info[i].syllable() = 0;
 523 
 524   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
 525   HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
 526 }
 527 
 528 
 529 /* Uniscribe seems to have a shaper for 'mymr' that is like the
 530  * generic shaper, except that it zeros mark advances GDEF_LATE. */
 531 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old =
 532 {
 533   "default",
 534   NULL, /* collect_features */
 535   NULL, /* override_features */
 536   NULL, /* data_create */
 537   NULL, /* data_destroy */
 538   NULL, /* preprocess_text */
 539   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
 540   NULL, /* decompose */
 541   NULL, /* compose */
 542   NULL, /* setup_masks */
 543   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
 544   true, /* fallback_position */
 545 };
 546 
 547 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
 548 {
 549   "myanmar",
 550   collect_features_myanmar,
 551   override_features_myanmar,
 552   NULL, /* data_create */
 553   NULL, /* data_destroy */
 554   NULL, /* preprocess_text */
 555   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 556   NULL, /* decompose */
 557   NULL, /* compose */
 558   setup_masks_myanmar,
 559   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
 560   false, /* fallback_position */
 561 };