1 /*
   2  * Copyright © 2011,2012  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #include "hb-ot-shape-complex-khmer-private.hh"
  28 #include "hb-ot-layout-private.hh"
  29 
  30 
  31 /*
  32  * Khmer shaper.
  33  */
  34 
  35 struct feature_list_t {
  36   hb_tag_t tag;
  37   hb_ot_map_feature_flags_t flags;
  38 };
  39 
  40 static const feature_list_t
  41 khmer_features[] =
  42 {
  43   /*
  44    * Basic features.
  45    * These features are applied in order, one at a time, after initial_reordering.
  46    */
  47   {HB_TAG('p','r','e','f'), F_NONE},
  48   {HB_TAG('b','l','w','f'), F_NONE},
  49   {HB_TAG('a','b','v','f'), F_NONE},
  50   {HB_TAG('p','s','t','f'), F_NONE},
  51   {HB_TAG('c','f','a','r'), F_NONE},
  52   /*
  53    * Other features.
  54    * These features are applied all at once, after final_reordering.
  55    * Default Bengali font in Windows for example has intermixed
  56    * lookups for init,pres,abvs,blws features.
  57    */
  58   {HB_TAG('p','r','e','s'), F_GLOBAL},
  59   {HB_TAG('a','b','v','s'), F_GLOBAL},
  60   {HB_TAG('b','l','w','s'), F_GLOBAL},
  61   {HB_TAG('p','s','t','s'), F_GLOBAL},
  62   /* Positioning features, though we don't care about the types. */
  63   {HB_TAG('d','i','s','t'), F_GLOBAL},
  64   {HB_TAG('a','b','v','m'), F_GLOBAL},
  65   {HB_TAG('b','l','w','m'), F_GLOBAL},
  66 };
  67 
  68 /*
  69  * Must be in the same order as the khmer_features array.
  70  */
  71 enum {
  72   PREF,
  73   BLWF,
  74   ABVF,
  75   PSTF,
  76   CFAR,
  77 
  78   _PRES,
  79   _ABVS,
  80   _BLWS,
  81   _PSTS,
  82   _DIST,
  83   _ABVM,
  84   _BLWM,
  85 
  86   KHMER_NUM_FEATURES,
  87   KHMER_BASIC_FEATURES = _PRES /* Don't forget to update this! */
  88 };
  89 
  90 static void
  91 setup_syllables (const hb_ot_shape_plan_t *plan,
  92                  hb_font_t *font,
  93                  hb_buffer_t *buffer);
  94 static void
  95 initial_reordering (const hb_ot_shape_plan_t *plan,
  96                     hb_font_t *font,
  97                     hb_buffer_t *buffer);
  98 static void
  99 final_reordering (const hb_ot_shape_plan_t *plan,
 100                   hb_font_t *font,
 101                   hb_buffer_t *buffer);
 102 static void
 103 clear_syllables (const hb_ot_shape_plan_t *plan,
 104                  hb_font_t *font,
 105                  hb_buffer_t *buffer);
 106 
 107 static void
 108 collect_features_khmer (hb_ot_shape_planner_t *plan)
 109 {
 110   hb_ot_map_builder_t *map = &plan->map;
 111 
 112   /* Do this before any lookups have been applied. */
 113   map->add_gsub_pause (setup_syllables);
 114 
 115   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 116   /* The Indic specs do not require ccmp, but we apply it here since if
 117    * there is a use of it, it's typically at the beginning. */
 118   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 119 
 120 
 121   unsigned int i = 0;
 122   map->add_gsub_pause (initial_reordering);
 123   for (; i < KHMER_BASIC_FEATURES; i++) {
 124     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
 125     map->add_gsub_pause (nullptr);
 126   }
 127   map->add_gsub_pause (final_reordering);
 128   for (; i < KHMER_NUM_FEATURES; i++) {
 129     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
 130   }
 131 
 132   map->add_global_bool_feature (HB_TAG('c','a','l','t'));
 133   map->add_global_bool_feature (HB_TAG('c','l','i','g'));
 134 
 135   map->add_gsub_pause (clear_syllables);
 136 }
 137 
 138 static void
 139 override_features_khmer (hb_ot_shape_planner_t *plan)
 140 {
 141   /* Uniscribe does not apply 'kern' in Khmer. */
 142   if (hb_options ().uniscribe_bug_compatible)
 143   {
 144     plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);
 145   }
 146 
 147   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
 148 }
 149 
 150 
 151 struct would_substitute_feature_t
 152 {
 153   inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
 154   {
 155     zero_context = zero_context_;
 156     map->get_stage_lookups (0/*GSUB*/,
 157                             map->get_feature_stage (0/*GSUB*/, feature_tag),
 158                             &lookups, &count);
 159   }
 160 
 161   inline bool would_substitute (const hb_codepoint_t *glyphs,
 162                                 unsigned int          glyphs_count,
 163                                 hb_face_t            *face) const
 164   {
 165     for (unsigned int i = 0; i < count; i++)
 166       if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
 167         return true;
 168     return false;
 169   }
 170 
 171   private:
 172   const hb_ot_map_t::lookup_map_t *lookups;
 173   unsigned int count;
 174   bool zero_context;
 175 };
 176 
 177 struct khmer_shape_plan_t
 178 {
 179   ASSERT_POD ();
 180 
 181   inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
 182   {
 183     hb_codepoint_t glyph = virama_glyph;
 184     if (unlikely (virama_glyph == (hb_codepoint_t) -1))
 185     {
 186       if (!font->get_nominal_glyph (0x17D2u, &glyph))
 187         glyph = 0;
 188       /* Technically speaking, the spec says we should apply 'locl' to virama too.
 189        * Maybe one day... */
 190 
 191       /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
 192        * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
 193       virama_glyph = glyph;
 194     }
 195 
 196     *pglyph = glyph;
 197     return glyph != 0;
 198   }
 199 
 200   mutable hb_codepoint_t virama_glyph;
 201 
 202   would_substitute_feature_t pref;
 203 
 204   hb_mask_t mask_array[KHMER_NUM_FEATURES];
 205 };
 206 
 207 static void *
 208 data_create_khmer (const hb_ot_shape_plan_t *plan)
 209 {
 210   khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
 211   if (unlikely (!khmer_plan))
 212     return nullptr;
 213 
 214   khmer_plan->virama_glyph = (hb_codepoint_t) -1;
 215 
 216   khmer_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), true);
 217 
 218   for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
 219     khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
 220                                  0 : plan->map.get_1_mask (khmer_features[i].tag);
 221 
 222   return khmer_plan;
 223 }
 224 
 225 static void
 226 data_destroy_khmer (void *data)
 227 {
 228   free (data);
 229 }
 230 
 231 
 232 enum syllable_type_t {
 233   consonant_syllable,
 234   broken_cluster,
 235   non_khmer_cluster,
 236 };
 237 
 238 #include "hb-ot-shape-complex-khmer-machine.hh"
 239 
 240 static void
 241 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
 242                    hb_buffer_t              *buffer,
 243                    hb_font_t                *font HB_UNUSED)
 244 {
 245   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
 246   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
 247 
 248   /* We cannot setup masks here.  We save information about characters
 249    * and setup masks later on in a pause-callback. */
 250 
 251   unsigned int count = buffer->len;
 252   hb_glyph_info_t *info = buffer->info;
 253   for (unsigned int i = 0; i < count; i++)
 254     set_khmer_properties (info[i]);
 255 }
 256 
 257 static void
 258 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 259                  hb_font_t *font HB_UNUSED,
 260                  hb_buffer_t *buffer)
 261 {
 262   find_syllables (buffer);
 263   foreach_syllable (buffer, start, end)
 264     buffer->unsafe_to_break (start, end);
 265 }
 266 
 267 static int
 268 compare_khmer_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 269 {
 270   int a = pa->khmer_position();
 271   int b = pb->khmer_position();
 272 
 273   return a < b ? -1 : a == b ? 0 : +1;
 274 }
 275 
 276 
 277 /* Rules from:
 278  * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
 279 
 280 static void
 281 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
 282                                        hb_face_t *face,
 283                                        hb_buffer_t *buffer,
 284                                        unsigned int start, unsigned int end)
 285 {
 286   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
 287   hb_glyph_info_t *info = buffer->info;
 288 
 289   /* 1. Khmer shaping assumes that a syllable will begin with a Cons, IndV, or Number. */
 290 
 291   /* The first consonant is always the base. */
 292   unsigned int base = start;
 293   info[base].khmer_position() = POS_BASE_C;
 294 
 295   /* Mark all subsequent consonants as below. */
 296   for (unsigned int i = base + 1; i < end; i++)
 297     if (is_consonant_or_vowel (info[i]))
 298       info[i].khmer_position() = POS_BELOW_C;
 299 
 300   /* Mark final consonants.  A final consonant is one appearing after a matra,
 301    * like in Khmer. */
 302   for (unsigned int i = base + 1; i < end; i++)
 303     if (info[i].khmer_category() == OT_M) {
 304       for (unsigned int j = i + 1; j < end; j++)
 305         if (is_consonant_or_vowel (info[j])) {
 306           info[j].khmer_position() = POS_FINAL_C;
 307           break;
 308         }
 309       break;
 310     }
 311 
 312   /* Attach misc marks to previous char to move with them. */
 313   {
 314     khmer_position_t last_pos = POS_START;
 315     for (unsigned int i = start; i < end; i++)
 316     {
 317       if ((FLAG_UNSAFE (info[i].khmer_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_Coeng))))
 318       {
 319         info[i].khmer_position() = last_pos;
 320         if (unlikely (info[i].khmer_category() == OT_Coeng &&
 321                       info[i].khmer_position() == POS_PRE_M))
 322         {
 323           /*
 324            * Uniscribe doesn't move the Halant with Left Matra.
 325            * TEST: U+092B,U+093F,U+094DE
 326            * We follow.  This is important for the Sinhala
 327            * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
 328            * where U+0DD9 is a left matra and U+0DCA is the virama.
 329            * We don't want to move the virama with the left matra.
 330            * TEST: U+0D9A,U+0DDA
 331            */
 332           for (unsigned int j = i; j > start; j--)
 333             if (info[j - 1].khmer_position() != POS_PRE_M) {
 334               info[i].khmer_position() = info[j - 1].khmer_position();
 335               break;
 336             }
 337         }
 338       } else if (info[i].khmer_position() != POS_SMVD) {
 339         last_pos = (khmer_position_t) info[i].khmer_position();
 340       }
 341     }
 342   }
 343   /* For post-base consonants let them own anything before them
 344    * since the last consonant or matra. */
 345   {
 346     unsigned int last = base;
 347     for (unsigned int i = base + 1; i < end; i++)
 348       if (is_consonant_or_vowel (info[i]))
 349       {
 350         for (unsigned int j = last + 1; j < i; j++)
 351           if (info[j].khmer_position() < POS_SMVD)
 352             info[j].khmer_position() = info[i].khmer_position();
 353         last = i;
 354       } else if (info[i].khmer_category() == OT_M)
 355         last = i;
 356   }
 357 
 358   {
 359     /* Use syllable() for sort accounting temporarily. */
 360     unsigned int syllable = info[start].syllable();
 361     for (unsigned int i = start; i < end; i++)
 362       info[i].syllable() = i - start;
 363 
 364     /* Sit tight, rock 'n roll! */
 365     hb_stable_sort (info + start, end - start, compare_khmer_order);
 366     /* Find base again */
 367     base = end;
 368     for (unsigned int i = start; i < end; i++)
 369       if (info[i].khmer_position() == POS_BASE_C)
 370       {
 371         base = i;
 372         break;
 373       }
 374 
 375     /* Note!  syllable() is a one-byte field. */
 376     for (unsigned int i = base; i < end; i++)
 377       if (info[i].syllable() != 255)
 378       {
 379         unsigned int max = i;
 380         unsigned int j = start + info[i].syllable();
 381         while (j != i)
 382         {
 383           max = MAX (max, j);
 384           unsigned int next = start + info[j].syllable();
 385           info[j].syllable() = 255; /* So we don't process j later again. */
 386           j = next;
 387         }
 388         if (i != max)
 389           buffer->merge_clusters (i, max + 1);
 390       }
 391 
 392     /* Put syllable back in. */
 393     for (unsigned int i = start; i < end; i++)
 394       info[i].syllable() = syllable;
 395   }
 396 
 397   /* Setup masks now */
 398 
 399   {
 400     hb_mask_t mask;
 401 
 402     /* Post-base */
 403     mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
 404     for (unsigned int i = base + 1; i < end; i++)
 405       info[i].mask  |= mask;
 406   }
 407 
 408   unsigned int pref_len = 2;
 409   if (khmer_plan->mask_array[PREF] && base + pref_len < end)
 410   {
 411     /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */
 412     for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {
 413       hb_codepoint_t glyphs[2];
 414       for (unsigned int j = 0; j < pref_len; j++)
 415         glyphs[j] = info[i + j].codepoint;
 416       if (khmer_plan->pref.would_substitute (glyphs, pref_len, face))
 417       {
 418         for (unsigned int j = 0; j < pref_len; j++)
 419           info[i++].mask |= khmer_plan->mask_array[PREF];
 420 
 421         /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
 422          * Read the feature spec.
 423          * This allows distinguishing the following cases with MS Khmer fonts:
 424          * U+1784,U+17D2,U+179A,U+17D2,U+1782
 425          * U+1784,U+17D2,U+1782,U+17D2,U+179A
 426          */
 427         if (khmer_plan->mask_array[CFAR])
 428           for (; i < end; i++)
 429             info[i].mask |= khmer_plan->mask_array[CFAR];
 430 
 431         break;
 432       }
 433     }
 434   }
 435 }
 436 
 437 static void
 438 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 439                              hb_face_t *face,
 440                              hb_buffer_t *buffer,
 441                              unsigned int start, unsigned int end)
 442 {
 443   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 444   switch (syllable_type)
 445   {
 446     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
 447     case consonant_syllable:
 448      initial_reordering_consonant_syllable (plan, face, buffer, start, end);
 449      break;
 450 
 451     case non_khmer_cluster:
 452       break;
 453   }
 454 }
 455 
 456 static inline void
 457 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 458                        hb_font_t *font,
 459                        hb_buffer_t *buffer)
 460 {
 461   /* Note: This loop is extra overhead, but should not be measurable. */
 462   bool has_broken_syllables = false;
 463   unsigned int count = buffer->len;
 464   hb_glyph_info_t *info = buffer->info;
 465   for (unsigned int i = 0; i < count; i++)
 466     if ((info[i].syllable() & 0x0F) == broken_cluster)
 467     {
 468       has_broken_syllables = true;
 469       break;
 470     }
 471   if (likely (!has_broken_syllables))
 472     return;
 473 
 474 
 475   hb_codepoint_t dottedcircle_glyph;
 476   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
 477     return;
 478 
 479   hb_glyph_info_t dottedcircle = {0};
 480   dottedcircle.codepoint = 0x25CCu;
 481   set_khmer_properties (dottedcircle);
 482   dottedcircle.codepoint = dottedcircle_glyph;
 483 
 484   buffer->clear_output ();
 485 
 486   buffer->idx = 0;
 487   unsigned int last_syllable = 0;
 488   while (buffer->idx < buffer->len && buffer->successful)
 489   {
 490     unsigned int syllable = buffer->cur().syllable();
 491     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 492     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 493     {
 494       last_syllable = syllable;
 495 
 496       hb_glyph_info_t ginfo = dottedcircle;
 497       ginfo.cluster = buffer->cur().cluster;
 498       ginfo.mask = buffer->cur().mask;
 499       ginfo.syllable() = buffer->cur().syllable();
 500       /* TODO Set glyph_props? */
 501 
 502       /* Insert dottedcircle after possible Repha. */
 503       while (buffer->idx < buffer->len && buffer->successful &&
 504              last_syllable == buffer->cur().syllable() &&
 505              buffer->cur().khmer_category() == OT_Repha)
 506         buffer->next_glyph ();
 507 
 508       buffer->output_info (ginfo);
 509     }
 510     else
 511       buffer->next_glyph ();
 512   }
 513 
 514   buffer->swap_buffers ();
 515 }
 516 
 517 static void
 518 initial_reordering (const hb_ot_shape_plan_t *plan,
 519                     hb_font_t *font,
 520                     hb_buffer_t *buffer)
 521 {
 522   insert_dotted_circles (plan, font, buffer);
 523 
 524   foreach_syllable (buffer, start, end)
 525     initial_reordering_syllable (plan, font->face, buffer, start, end);
 526 }
 527 
 528 static void
 529 final_reordering_syllable (const hb_ot_shape_plan_t *plan,
 530                            hb_buffer_t *buffer,
 531                            unsigned int start, unsigned int end)
 532 {
 533   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
 534   hb_glyph_info_t *info = buffer->info;
 535 
 536 
 537   /* This function relies heavily on halant glyphs.  Lots of ligation
 538    * and possibly multiple substitutions happened prior to this
 539    * phase, and that might have messed up our properties.  Recover
 540    * from a particular case of that where we're fairly sure that a
 541    * class of OT_Coeng is desired but has been lost. */
 542   if (khmer_plan->virama_glyph)
 543   {
 544     unsigned int virama_glyph = khmer_plan->virama_glyph;
 545     for (unsigned int i = start; i < end; i++)
 546       if (info[i].codepoint == virama_glyph &&
 547           _hb_glyph_info_ligated (&info[i]) &&
 548           _hb_glyph_info_multiplied (&info[i]))
 549       {
 550         /* This will make sure that this glyph passes is_coeng() test. */
 551         info[i].khmer_category() = OT_Coeng;
 552         _hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
 553       }
 554   }
 555 
 556 
 557   /* 4. Final reordering:
 558    *
 559    * After the localized forms and basic shaping forms GSUB features have been
 560    * applied (see below), the shaping engine performs some final glyph
 561    * reordering before applying all the remaining font features to the entire
 562    * syllable.
 563    */
 564 
 565   bool try_pref = !!khmer_plan->mask_array[PREF];
 566 
 567   /* Find base again */
 568   unsigned int base;
 569   for (base = start; base < end; base++)
 570     if (info[base].khmer_position() >= POS_BASE_C)
 571     {
 572       if (try_pref && base + 1 < end)
 573       {
 574         for (unsigned int i = base + 1; i < end; i++)
 575           if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
 576           {
 577             if (!(_hb_glyph_info_substituted (&info[i]) &&
 578                   _hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
 579             {
 580               /* Ok, this was a 'pref' candidate but didn't form any.
 581                * Base is around here... */
 582               base = i;
 583               while (base < end && is_coeng (info[base]))
 584                 base++;
 585               info[base].khmer_position() = POS_BASE_C;
 586 
 587               try_pref = false;
 588             }
 589             break;
 590           }
 591       }
 592 
 593       if (start < base && info[base].khmer_position() > POS_BASE_C)
 594         base--;
 595       break;
 596     }
 597   if (base == end && start < base &&
 598       is_one_of (info[base - 1], FLAG (OT_ZWJ)))
 599     base--;
 600   if (base < end)
 601     while (start < base &&
 602            is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_Coeng))))
 603       base--;
 604 
 605 
 606   /*   o Reorder matras:
 607    *
 608    *     If a pre-base matra character had been reordered before applying basic
 609    *     features, the glyph can be moved closer to the main consonant based on
 610    *     whether half-forms had been formed. Actual position for the matra is
 611    *     defined as “after last standalone halant glyph, after initial matra
 612    *     position and before the main consonant”. If ZWJ or ZWNJ follow this
 613    *     halant, position is moved after it.
 614    */
 615 
 616   if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
 617   {
 618     /* If we lost track of base, alas, position before last thingy. */
 619     unsigned int new_pos = base == end ? base - 2 : base - 1;
 620 
 621     while (new_pos > start &&
 622            !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_Coeng)))))
 623       new_pos--;
 624 
 625     /* If we found no Halant we are done.
 626      * Otherwise only proceed if the Halant does
 627      * not belong to the Matra itself! */
 628     if (is_coeng (info[new_pos]) &&
 629         info[new_pos].khmer_position() != POS_PRE_M)
 630     {
 631       /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 632       if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
 633         new_pos++;
 634     }
 635     else
 636       new_pos = start; /* No move. */
 637 
 638     if (start < new_pos && info[new_pos].khmer_position () != POS_PRE_M)
 639     {
 640       /* Now go see if there's actually any matras... */
 641       for (unsigned int i = new_pos; i > start; i--)
 642         if (info[i - 1].khmer_position () == POS_PRE_M)
 643         {
 644           unsigned int old_pos = i - 1;
 645           if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
 646             base--;
 647 
 648           hb_glyph_info_t tmp = info[old_pos];
 649           memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
 650           info[new_pos] = tmp;
 651 
 652           /* Note: this merge_clusters() is intentionally *after* the reordering.
 653            * Indic matra reordering is special and tricky... */
 654           buffer->merge_clusters (new_pos, MIN (end, base + 1));
 655 
 656           new_pos--;
 657         }
 658     } else {
 659       for (unsigned int i = start; i < base; i++)
 660         if (info[i].khmer_position () == POS_PRE_M) {
 661           buffer->merge_clusters (i, MIN (end, base + 1));
 662           break;
 663         }
 664     }
 665   }
 666 
 667 
 668   /*   o Reorder pre-base-reordering consonants:
 669    *
 670    *     If a pre-base-reordering consonant is found, reorder it according to
 671    *     the following rules:
 672    */
 673 
 674   if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */
 675   {
 676     for (unsigned int i = base + 1; i < end; i++)
 677       if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
 678       {
 679         /*       1. Only reorder a glyph produced by substitution during application
 680          *          of the <pref> feature. (Note that a font may shape a Ra consonant with
 681          *          the feature generally but block it in certain contexts.)
 682          */
 683         /* Note: We just check that something got substituted.  We don't check that
 684          * the <pref> feature actually did it...
 685          *
 686          * Reorder pref only if it ligated. */
 687         if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
 688         {
 689           /*
 690            *       2. Try to find a target position the same way as for pre-base matra.
 691            *          If it is found, reorder pre-base consonant glyph.
 692            *
 693            *       3. If position is not found, reorder immediately before main
 694            *          consonant.
 695            */
 696 
 697           unsigned int new_pos = base;
 698           while (new_pos > start &&
 699                  !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_Coeng))))
 700             new_pos--;
 701 
 702           /* In Khmer coeng model, a H,Ra can go *after* matras.  If it goes after a
 703            * split matra, it should be reordered to *before* the left part of such matra. */
 704           if (new_pos > start && info[new_pos - 1].khmer_category() == OT_M)
 705           {
 706             unsigned int old_pos = i;
 707             for (unsigned int j = base + 1; j < old_pos; j++)
 708               if (info[j].khmer_category() == OT_M)
 709               {
 710                 new_pos--;
 711                 break;
 712               }
 713           }
 714 
 715           if (new_pos > start && is_coeng (info[new_pos - 1]))
 716           {
 717             /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 718             if (new_pos < end && is_joiner (info[new_pos]))
 719               new_pos++;
 720           }
 721 
 722           {
 723             unsigned int old_pos = i;
 724 
 725             buffer->merge_clusters (new_pos, old_pos + 1);
 726             hb_glyph_info_t tmp = info[old_pos];
 727             memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
 728             info[new_pos] = tmp;
 729 
 730             if (new_pos <= base && base < old_pos)
 731               base++;
 732           }
 733         }
 734 
 735         break;
 736       }
 737   }
 738 
 739 
 740   /*
 741    * Finish off the clusters and go home!
 742    */
 743   if (hb_options ().uniscribe_bug_compatible)
 744   {
 745     /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
 746      * This means, half forms are submerged into the main consonant's cluster.
 747      * This is unnecessary, and makes cursor positioning harder, but that's what
 748      * Uniscribe does. */
 749     buffer->merge_clusters (start, end);
 750   }
 751 }
 752 
 753 
 754 static void
 755 final_reordering (const hb_ot_shape_plan_t *plan,
 756                   hb_font_t *font HB_UNUSED,
 757                   hb_buffer_t *buffer)
 758 {
 759   unsigned int count = buffer->len;
 760   if (unlikely (!count)) return;
 761 
 762   foreach_syllable (buffer, start, end)
 763     final_reordering_syllable (plan, buffer, start, end);
 764 
 765   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
 766   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
 767 }
 768 
 769 
 770 static void
 771 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 772                  hb_font_t *font HB_UNUSED,
 773                  hb_buffer_t *buffer)
 774 {
 775   hb_glyph_info_t *info = buffer->info;
 776   unsigned int count = buffer->len;
 777   for (unsigned int i = 0; i < count; i++)
 778     info[i].syllable() = 0;
 779 }
 780 
 781 
 782 static bool
 783 decompose_khmer (const hb_ot_shape_normalize_context_t *c,
 784                  hb_codepoint_t  ab,
 785                  hb_codepoint_t *a,
 786                  hb_codepoint_t *b)
 787 {
 788   switch (ab)
 789   {
 790     /*
 791      * Decompose split matras that don't have Unicode decompositions.
 792      */
 793 
 794     /* Khmer */
 795     case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
 796     case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
 797     case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
 798     case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
 799     case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
 800   }
 801 
 802   return (bool) c->unicode->decompose (ab, a, b);
 803 }
 804 
 805 static bool
 806 compose_khmer (const hb_ot_shape_normalize_context_t *c,
 807                hb_codepoint_t  a,
 808                hb_codepoint_t  b,
 809                hb_codepoint_t *ab)
 810 {
 811   /* Avoid recomposing split matras. */
 812   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 813     return false;
 814 
 815   return (bool) c->unicode->compose (a, b, ab);
 816 }
 817 
 818 
 819 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
 820 {
 821   collect_features_khmer,
 822   override_features_khmer,
 823   data_create_khmer,
 824   data_destroy_khmer,
 825   nullptr, /* preprocess_text */
 826   nullptr, /* postprocess_glyphs */
 827   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 828   decompose_khmer,
 829   compose_khmer,
 830   setup_masks_khmer,
 831   nullptr, /* disable_otl */
 832   nullptr, /* reorder_marks */
 833   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
 834   false, /* fallback_position */
 835 };