1 /*
   2  * Copyright © 2011,2012  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #include "hb-ot-shape-complex-khmer-private.hh"
  28 #include "hb-ot-layout-private.hh"
  29 
  30 
  31 /*
  32  * Khmer shaper.
  33  */
  34 
  35 struct feature_list_t {
  36   hb_tag_t tag;
  37   hb_ot_map_feature_flags_t flags;
  38 };
  39 
  40 static const feature_list_t
  41 khmer_features[] =
  42 {
  43   /*
  44    * Basic features.
  45    * These features are applied in order, one at a time, after initial_reordering.
  46    */
  47   {HB_TAG('p','r','e','f'), F_NONE},
  48   {HB_TAG('b','l','w','f'), F_NONE},
  49   {HB_TAG('a','b','v','f'), F_NONE},
  50   {HB_TAG('p','s','t','f'), F_NONE},
  51   {HB_TAG('c','f','a','r'), F_NONE},
  52   /*
  53    * Other features.
  54    * These features are applied all at once, after final_reordering.
  55    * Default Bengali font in Windows for example has intermixed
  56    * lookups for init,pres,abvs,blws features.
  57    */
  58   {HB_TAG('p','r','e','s'), F_GLOBAL},
  59   {HB_TAG('a','b','v','s'), F_GLOBAL},
  60   {HB_TAG('b','l','w','s'), F_GLOBAL},
  61   {HB_TAG('p','s','t','s'), F_GLOBAL},
  62   /* Positioning features, though we don't care about the types. */
  63   {HB_TAG('d','i','s','t'), F_GLOBAL},
  64   {HB_TAG('a','b','v','m'), F_GLOBAL},
  65   {HB_TAG('b','l','w','m'), F_GLOBAL},
  66 };
  67 
  68 /*
  69  * Must be in the same order as the khmer_features array.
  70  */
  71 enum {
  72   PREF,
  73   BLWF,
  74   ABVF,
  75   PSTF,
  76   CFAR,
  77 
  78   _PRES,
  79   _ABVS,
  80   _BLWS,
  81   _PSTS,
  82   _DIST,
  83   _ABVM,
  84   _BLWM,
  85 
  86   KHMER_NUM_FEATURES,
  87   KHMER_BASIC_FEATURES = _PRES /* Don't forget to update this! */
  88 };
  89 
  90 static void
  91 setup_syllables (const hb_ot_shape_plan_t *plan,
  92                  hb_font_t *font,
  93                  hb_buffer_t *buffer);
  94 static void
  95 initial_reordering (const hb_ot_shape_plan_t *plan,
  96                     hb_font_t *font,
  97                     hb_buffer_t *buffer);
  98 static void
  99 final_reordering (const hb_ot_shape_plan_t *plan,
 100                   hb_font_t *font,
 101                   hb_buffer_t *buffer);
 102 static void
 103 clear_syllables (const hb_ot_shape_plan_t *plan,
 104                  hb_font_t *font,
 105                  hb_buffer_t *buffer);
 106 
 107 static void
 108 collect_features_khmer (hb_ot_shape_planner_t *plan)
 109 {
 110   hb_ot_map_builder_t *map = &plan->map;
 111 
 112   /* Do this before any lookups have been applied. */
 113   map->add_gsub_pause (setup_syllables);
 114 
 115   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 116   /* The Indic specs do not require ccmp, but we apply it here since if
 117    * there is a use of it, it's typically at the beginning. */
 118   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
 119 
 120 
 121   unsigned int i = 0;
 122   map->add_gsub_pause (initial_reordering);
 123   for (; i < KHMER_BASIC_FEATURES; i++) {
 124     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
 125     map->add_gsub_pause (nullptr);
 126   }
 127   map->add_gsub_pause (final_reordering);
 128   for (; i < KHMER_NUM_FEATURES; i++) {
 129     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
 130   }
 131 
 132   map->add_global_bool_feature (HB_TAG('c','a','l','t'));
 133   map->add_global_bool_feature (HB_TAG('c','l','i','g'));
 134 
 135   map->add_gsub_pause (clear_syllables);
 136 }
 137 
 138 static void
 139 override_features_khmer (hb_ot_shape_planner_t *plan)
 140 {
 141   /* Uniscribe does not apply 'kern' in Khmer. */
 142   if (hb_options ().uniscribe_bug_compatible)
 143   {
 144     plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);
 145   }
 146 
 147   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
 148 }
 149 
 150 
 151 struct would_substitute_feature_t
 152 {
 153   inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
 154   {
 155     zero_context = zero_context_;
 156     map->get_stage_lookups (0/*GSUB*/,
 157                             map->get_feature_stage (0/*GSUB*/, feature_tag),
 158                             &lookups, &count);
 159   }
 160 
 161   inline bool would_substitute (const hb_codepoint_t *glyphs,
 162                                 unsigned int          glyphs_count,
 163                                 hb_face_t            *face) const
 164   {
 165     for (unsigned int i = 0; i < count; i++)
 166       if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
 167         return true;
 168     return false;
 169   }
 170 
 171   private:
 172   const hb_ot_map_t::lookup_map_t *lookups;
 173   unsigned int count;
 174   bool zero_context;
 175 };
 176 
 177 struct khmer_shape_plan_t
 178 {
 179   ASSERT_POD ();
 180 
 181   inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
 182   {
 183     hb_codepoint_t glyph = virama_glyph;
 184     if (unlikely (virama_glyph == (hb_codepoint_t) -1))
 185     {
 186       if (!font->get_nominal_glyph (0x17D2u, &glyph))
 187         glyph = 0;
 188       /* Technically speaking, the spec says we should apply 'locl' to virama too.
 189        * Maybe one day... */
 190 
 191       /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
 192        * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
 193       virama_glyph = glyph;
 194     }
 195 
 196     *pglyph = glyph;
 197     return glyph != 0;
 198   }
 199 
 200   mutable hb_codepoint_t virama_glyph;
 201 
 202   would_substitute_feature_t pref;
 203 
 204   hb_mask_t mask_array[KHMER_NUM_FEATURES];
 205 };
 206 
 207 static void *
 208 data_create_khmer (const hb_ot_shape_plan_t *plan)
 209 {
 210   khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
 211   if (unlikely (!khmer_plan))
 212     return nullptr;
 213 
 214   khmer_plan->virama_glyph = (hb_codepoint_t) -1;
 215 
 216   khmer_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), true);
 217 
 218   for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
 219     khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
 220                                  0 : plan->map.get_1_mask (khmer_features[i].tag);
 221 
 222   return khmer_plan;
 223 }
 224 
 225 static void
 226 data_destroy_khmer (void *data)
 227 {
 228   free (data);
 229 }
 230 
 231 
 232 enum syllable_type_t {
 233   consonant_syllable,
 234   broken_cluster,
 235   non_khmer_cluster,
 236 };
 237 
 238 #include "hb-ot-shape-complex-khmer-machine.hh"
 239 
 240 static void
 241 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
 242                    hb_buffer_t              *buffer,
 243                    hb_font_t                *font HB_UNUSED)
 244 {
 245   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
 246   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
 247 
 248   /* We cannot setup masks here.  We save information about characters
 249    * and setup masks later on in a pause-callback. */
 250 
 251   unsigned int count = buffer->len;
 252   hb_glyph_info_t *info = buffer->info;
 253   for (unsigned int i = 0; i < count; i++)
 254     set_khmer_properties (info[i]);
 255 }
 256 
 257 static void
 258 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 259                  hb_font_t *font HB_UNUSED,
 260                  hb_buffer_t *buffer)
 261 {
 262   find_syllables (buffer);
 263   foreach_syllable (buffer, start, end)
 264     buffer->unsafe_to_break (start, end);
 265 }
 266 
 267 static int
 268 compare_khmer_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
 269 {
 270   int a = pa->khmer_position();
 271   int b = pb->khmer_position();
 272 
 273   return a < b ? -1 : a == b ? 0 : +1;
 274 }
 275 
 276 
 277 /* Rules from:
 278  * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
 279 
 280 static void
 281 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
 282                                        hb_face_t *face,
 283                                        hb_buffer_t *buffer,
 284                                        unsigned int start, unsigned int end)
 285 {
 286   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
 287   hb_glyph_info_t *info = buffer->info;
 288 
 289   /* 1. Khmer shaping assumes that a syllable will begin with a Cons, IndV, or Number. */
 290 
 291   /* The first consonant is always the base. */
 292   unsigned int base = start;
 293   info[base].khmer_position() = POS_BASE_C;
 294 
 295   /* Mark all subsequent consonants as below. */
 296   for (unsigned int i = base + 1; i < end; i++)
 297     if (is_consonant_or_vowel (info[i]))
 298       info[i].khmer_position() = POS_BELOW_C;
 299 
 300   /* Mark final consonants.  A final consonant is one appearing after a matra,
 301    * like in Khmer. */
 302   for (unsigned int i = base + 1; i < end; i++)
 303     if (info[i].khmer_category() == OT_M) {
 304       for (unsigned int j = i + 1; j < end; j++)
 305         if (is_consonant_or_vowel (info[j])) {
 306           info[j].khmer_position() = POS_FINAL_C;
 307           break;
 308         }
 309       break;
 310     }
 311 
 312   /* Attach misc marks to previous char to move with them. */
 313   {
 314     khmer_position_t last_pos = POS_START;
 315     for (unsigned int i = start; i < end; i++)
 316     {
 317       if ((FLAG_UNSAFE (info[i].khmer_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_Coeng))))
 318       {
 319         info[i].khmer_position() = last_pos;
 320         if (unlikely (info[i].khmer_category() == OT_Coeng &&
 321                       info[i].khmer_position() == POS_PRE_M))
 322         {
 323           /*
 324            * Uniscribe doesn't move the Halant with Left Matra.
 325            * TEST: U+092B,U+093F,U+094DE
 326            * We follow.  This is important for the Sinhala
 327            * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
 328            * where U+0DD9 is a left matra and U+0DCA is the virama.
 329            * We don't want to move the virama with the left matra.
 330            * TEST: U+0D9A,U+0DDA
 331            */
 332           for (unsigned int j = i; j > start; j--)
 333             if (info[j - 1].khmer_position() != POS_PRE_M) {
 334               info[i].khmer_position() = info[j - 1].khmer_position();
 335               break;
 336             }
 337         }
 338       } else if (info[i].khmer_position() != POS_SMVD) {
 339         last_pos = (khmer_position_t) info[i].khmer_position();
 340       }
 341     }
 342   }
 343   /* For post-base consonants let them own anything before them
 344    * since the last consonant or matra. */
 345   {
 346     unsigned int last = base;
 347     for (unsigned int i = base + 1; i < end; i++)
 348       if (is_consonant_or_vowel (info[i]))
 349       {
 350         for (unsigned int j = last + 1; j < i; j++)
 351           if (info[j].khmer_position() < POS_SMVD)
 352             info[j].khmer_position() = info[i].khmer_position();
 353         last = i;
 354       } else if (info[i].khmer_category() == OT_M)
 355         last = i;
 356   }
 357 
 358   {
 359     /* Use syllable() for sort accounting temporarily. */
 360     unsigned int syllable = info[start].syllable();
 361     for (unsigned int i = start; i < end; i++)
 362       info[i].syllable() = i - start;
 363 
 364     /* Sit tight, rock 'n roll! */
 365     hb_stable_sort (info + start, end - start, compare_khmer_order);
 366     /* Find base again */
 367     base = end;
 368     for (unsigned int i = start; i < end; i++)
 369       if (info[i].khmer_position() == POS_BASE_C)
 370       {
 371         base = i;
 372         break;
 373       }
 374 
 375     if (unlikely (end - start >= 127))
 376       buffer->merge_clusters (start, end);
 377     else
 378       /* Note!  syllable() is a one-byte field. */
 379       for (unsigned int i = base; i < end; i++)
 380         if (info[i].syllable() != 255)
 381         {
 382           unsigned int max = i;
 383           unsigned int j = start + info[i].syllable();
 384           while (j != i)
 385           {
 386             max = MAX (max, j);
 387             unsigned int next = start + info[j].syllable();
 388             info[j].syllable() = 255; /* So we don't process j later again. */
 389             j = next;
 390           }
 391           if (i != max)
 392             buffer->merge_clusters (i, max + 1);
 393         }
 394 
 395     /* Put syllable back in. */
 396     for (unsigned int i = start; i < end; i++)
 397       info[i].syllable() = syllable;
 398   }
 399 
 400   /* Setup masks now */
 401 
 402   {
 403     hb_mask_t mask;
 404 
 405     /* Post-base */
 406     mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
 407     for (unsigned int i = base + 1; i < end; i++)
 408       info[i].mask  |= mask;
 409   }
 410 
 411   unsigned int pref_len = 2;
 412   if (khmer_plan->mask_array[PREF] && base + pref_len < end)
 413   {
 414     /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */
 415     for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {
 416       hb_codepoint_t glyphs[2];
 417       for (unsigned int j = 0; j < pref_len; j++)
 418         glyphs[j] = info[i + j].codepoint;
 419       if (khmer_plan->pref.would_substitute (glyphs, pref_len, face))
 420       {
 421         for (unsigned int j = 0; j < pref_len; j++)
 422           info[i++].mask |= khmer_plan->mask_array[PREF];
 423 
 424         /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
 425          * Read the feature spec.
 426          * This allows distinguishing the following cases with MS Khmer fonts:
 427          * U+1784,U+17D2,U+179A,U+17D2,U+1782
 428          * U+1784,U+17D2,U+1782,U+17D2,U+179A
 429          */
 430         if (khmer_plan->mask_array[CFAR])
 431           for (; i < end; i++)
 432             info[i].mask |= khmer_plan->mask_array[CFAR];
 433 
 434         break;
 435       }
 436     }
 437   }
 438 }
 439 
 440 static void
 441 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
 442                              hb_face_t *face,
 443                              hb_buffer_t *buffer,
 444                              unsigned int start, unsigned int end)
 445 {
 446   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
 447   switch (syllable_type)
 448   {
 449     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
 450     case consonant_syllable:
 451      initial_reordering_consonant_syllable (plan, face, buffer, start, end);
 452      break;
 453 
 454     case non_khmer_cluster:
 455       break;
 456   }
 457 }
 458 
 459 static inline void
 460 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
 461                        hb_font_t *font,
 462                        hb_buffer_t *buffer)
 463 {
 464   /* Note: This loop is extra overhead, but should not be measurable. */
 465   bool has_broken_syllables = false;
 466   unsigned int count = buffer->len;
 467   hb_glyph_info_t *info = buffer->info;
 468   for (unsigned int i = 0; i < count; i++)
 469     if ((info[i].syllable() & 0x0F) == broken_cluster)
 470     {
 471       has_broken_syllables = true;
 472       break;
 473     }
 474   if (likely (!has_broken_syllables))
 475     return;
 476 
 477 
 478   hb_codepoint_t dottedcircle_glyph;
 479   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
 480     return;
 481 
 482   hb_glyph_info_t dottedcircle = {0};
 483   dottedcircle.codepoint = 0x25CCu;
 484   set_khmer_properties (dottedcircle);
 485   dottedcircle.codepoint = dottedcircle_glyph;
 486 
 487   buffer->clear_output ();
 488 
 489   buffer->idx = 0;
 490   unsigned int last_syllable = 0;
 491   while (buffer->idx < buffer->len && buffer->successful)
 492   {
 493     unsigned int syllable = buffer->cur().syllable();
 494     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
 495     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
 496     {
 497       last_syllable = syllable;
 498 
 499       hb_glyph_info_t ginfo = dottedcircle;
 500       ginfo.cluster = buffer->cur().cluster;
 501       ginfo.mask = buffer->cur().mask;
 502       ginfo.syllable() = buffer->cur().syllable();
 503       /* TODO Set glyph_props? */
 504 
 505       /* Insert dottedcircle after possible Repha. */
 506       while (buffer->idx < buffer->len && buffer->successful &&
 507              last_syllable == buffer->cur().syllable() &&
 508              buffer->cur().khmer_category() == OT_Repha)
 509         buffer->next_glyph ();
 510 
 511       buffer->output_info (ginfo);
 512     }
 513     else
 514       buffer->next_glyph ();
 515   }
 516 
 517   buffer->swap_buffers ();
 518 }
 519 
 520 static void
 521 initial_reordering (const hb_ot_shape_plan_t *plan,
 522                     hb_font_t *font,
 523                     hb_buffer_t *buffer)
 524 {
 525   insert_dotted_circles (plan, font, buffer);
 526 
 527   foreach_syllable (buffer, start, end)
 528     initial_reordering_syllable (plan, font->face, buffer, start, end);
 529 }
 530 
 531 static void
 532 final_reordering_syllable (const hb_ot_shape_plan_t *plan,
 533                            hb_buffer_t *buffer,
 534                            unsigned int start, unsigned int end)
 535 {
 536   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
 537   hb_glyph_info_t *info = buffer->info;
 538 
 539 
 540   /* This function relies heavily on halant glyphs.  Lots of ligation
 541    * and possibly multiple substitutions happened prior to this
 542    * phase, and that might have messed up our properties.  Recover
 543    * from a particular case of that where we're fairly sure that a
 544    * class of OT_Coeng is desired but has been lost. */
 545   if (khmer_plan->virama_glyph)
 546   {
 547     unsigned int virama_glyph = khmer_plan->virama_glyph;
 548     for (unsigned int i = start; i < end; i++)
 549       if (info[i].codepoint == virama_glyph &&
 550           _hb_glyph_info_ligated (&info[i]) &&
 551           _hb_glyph_info_multiplied (&info[i]))
 552       {
 553         /* This will make sure that this glyph passes is_coeng() test. */
 554         info[i].khmer_category() = OT_Coeng;
 555         _hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
 556       }
 557   }
 558 
 559 
 560   /* 4. Final reordering:
 561    *
 562    * After the localized forms and basic shaping forms GSUB features have been
 563    * applied (see below), the shaping engine performs some final glyph
 564    * reordering before applying all the remaining font features to the entire
 565    * syllable.
 566    */
 567 
 568   bool try_pref = !!khmer_plan->mask_array[PREF];
 569 
 570   /* Find base again */
 571   unsigned int base;
 572   for (base = start; base < end; base++)
 573     if (info[base].khmer_position() >= POS_BASE_C)
 574     {
 575       if (try_pref && base + 1 < end)
 576       {
 577         for (unsigned int i = base + 1; i < end; i++)
 578           if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
 579           {
 580             if (!(_hb_glyph_info_substituted (&info[i]) &&
 581                   _hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
 582             {
 583               /* Ok, this was a 'pref' candidate but didn't form any.
 584                * Base is around here... */
 585               base = i;
 586               while (base < end && is_coeng (info[base]))
 587                 base++;
 588               info[base].khmer_position() = POS_BASE_C;
 589 
 590               try_pref = false;
 591             }
 592             break;
 593           }
 594       }
 595 
 596       if (start < base && info[base].khmer_position() > POS_BASE_C)
 597         base--;
 598       break;
 599     }
 600   if (base == end && start < base &&
 601       is_one_of (info[base - 1], FLAG (OT_ZWJ)))
 602     base--;
 603   if (base < end)
 604     while (start < base &&
 605            is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_Coeng))))
 606       base--;
 607 
 608 
 609   /*   o Reorder matras:
 610    *
 611    *     If a pre-base matra character had been reordered before applying basic
 612    *     features, the glyph can be moved closer to the main consonant based on
 613    *     whether half-forms had been formed. Actual position for the matra is
 614    *     defined as “after last standalone halant glyph, after initial matra
 615    *     position and before the main consonant”. If ZWJ or ZWNJ follow this
 616    *     halant, position is moved after it.
 617    */
 618 
 619   if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
 620   {
 621     /* If we lost track of base, alas, position before last thingy. */
 622     unsigned int new_pos = base == end ? base - 2 : base - 1;
 623 
 624     while (new_pos > start &&
 625            !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_Coeng)))))
 626       new_pos--;
 627 
 628     /* If we found no Halant we are done.
 629      * Otherwise only proceed if the Halant does
 630      * not belong to the Matra itself! */
 631     if (is_coeng (info[new_pos]) &&
 632         info[new_pos].khmer_position() != POS_PRE_M)
 633     {
 634       /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 635       if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
 636         new_pos++;
 637     }
 638     else
 639       new_pos = start; /* No move. */
 640 
 641     if (start < new_pos && info[new_pos].khmer_position () != POS_PRE_M)
 642     {
 643       /* Now go see if there's actually any matras... */
 644       for (unsigned int i = new_pos; i > start; i--)
 645         if (info[i - 1].khmer_position () == POS_PRE_M)
 646         {
 647           unsigned int old_pos = i - 1;
 648           if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
 649             base--;
 650 
 651           hb_glyph_info_t tmp = info[old_pos];
 652           memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
 653           info[new_pos] = tmp;
 654 
 655           /* Note: this merge_clusters() is intentionally *after* the reordering.
 656            * Indic matra reordering is special and tricky... */
 657           buffer->merge_clusters (new_pos, MIN (end, base + 1));
 658 
 659           new_pos--;
 660         }
 661     } else {
 662       for (unsigned int i = start; i < base; i++)
 663         if (info[i].khmer_position () == POS_PRE_M) {
 664           buffer->merge_clusters (i, MIN (end, base + 1));
 665           break;
 666         }
 667     }
 668   }
 669 
 670 
 671   /*   o Reorder pre-base-reordering consonants:
 672    *
 673    *     If a pre-base-reordering consonant is found, reorder it according to
 674    *     the following rules:
 675    */
 676 
 677   if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */
 678   {
 679     for (unsigned int i = base + 1; i < end; i++)
 680       if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
 681       {
 682         /*       1. Only reorder a glyph produced by substitution during application
 683          *          of the <pref> feature. (Note that a font may shape a Ra consonant with
 684          *          the feature generally but block it in certain contexts.)
 685          */
 686         /* Note: We just check that something got substituted.  We don't check that
 687          * the <pref> feature actually did it...
 688          *
 689          * Reorder pref only if it ligated. */
 690         if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
 691         {
 692           /*
 693            *       2. Try to find a target position the same way as for pre-base matra.
 694            *          If it is found, reorder pre-base consonant glyph.
 695            *
 696            *       3. If position is not found, reorder immediately before main
 697            *          consonant.
 698            */
 699 
 700           unsigned int new_pos = base;
 701           while (new_pos > start &&
 702                  !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_Coeng))))
 703             new_pos--;
 704 
 705           /* In Khmer coeng model, a H,Ra can go *after* matras.  If it goes after a
 706            * split matra, it should be reordered to *before* the left part of such matra. */
 707           if (new_pos > start && info[new_pos - 1].khmer_category() == OT_M)
 708           {
 709             unsigned int old_pos = i;
 710             for (unsigned int j = base + 1; j < old_pos; j++)
 711               if (info[j].khmer_category() == OT_M)
 712               {
 713                 new_pos--;
 714                 break;
 715               }
 716           }
 717 
 718           if (new_pos > start && is_coeng (info[new_pos - 1]))
 719           {
 720             /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 721             if (new_pos < end && is_joiner (info[new_pos]))
 722               new_pos++;
 723           }
 724 
 725           {
 726             unsigned int old_pos = i;
 727 
 728             buffer->merge_clusters (new_pos, old_pos + 1);
 729             hb_glyph_info_t tmp = info[old_pos];
 730             memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
 731             info[new_pos] = tmp;
 732 
 733             if (new_pos <= base && base < old_pos)
 734               base++;
 735           }
 736         }
 737 
 738         break;
 739       }
 740   }
 741 
 742 
 743   /*
 744    * Finish off the clusters and go home!
 745    */
 746   if (hb_options ().uniscribe_bug_compatible)
 747   {
 748     /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
 749      * This means, half forms are submerged into the main consonant's cluster.
 750      * This is unnecessary, and makes cursor positioning harder, but that's what
 751      * Uniscribe does. */
 752     buffer->merge_clusters (start, end);
 753   }
 754 }
 755 
 756 
 757 static void
 758 final_reordering (const hb_ot_shape_plan_t *plan,
 759                   hb_font_t *font HB_UNUSED,
 760                   hb_buffer_t *buffer)
 761 {
 762   unsigned int count = buffer->len;
 763   if (unlikely (!count)) return;
 764 
 765   foreach_syllable (buffer, start, end)
 766     final_reordering_syllable (plan, buffer, start, end);
 767 
 768   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
 769   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
 770 }
 771 
 772 
 773 static void
 774 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
 775                  hb_font_t *font HB_UNUSED,
 776                  hb_buffer_t *buffer)
 777 {
 778   hb_glyph_info_t *info = buffer->info;
 779   unsigned int count = buffer->len;
 780   for (unsigned int i = 0; i < count; i++)
 781     info[i].syllable() = 0;
 782 }
 783 
 784 
 785 static bool
 786 decompose_khmer (const hb_ot_shape_normalize_context_t *c,
 787                  hb_codepoint_t  ab,
 788                  hb_codepoint_t *a,
 789                  hb_codepoint_t *b)
 790 {
 791   switch (ab)
 792   {
 793     /*
 794      * Decompose split matras that don't have Unicode decompositions.
 795      */
 796 
 797     /* Khmer */
 798     case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
 799     case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
 800     case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
 801     case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
 802     case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
 803   }
 804 
 805   return (bool) c->unicode->decompose (ab, a, b);
 806 }
 807 
 808 static bool
 809 compose_khmer (const hb_ot_shape_normalize_context_t *c,
 810                hb_codepoint_t  a,
 811                hb_codepoint_t  b,
 812                hb_codepoint_t *ab)
 813 {
 814   /* Avoid recomposing split matras. */
 815   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
 816     return false;
 817 
 818   return (bool) c->unicode->compose (a, b, ab);
 819 }
 820 
 821 
 822 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
 823 {
 824   collect_features_khmer,
 825   override_features_khmer,
 826   data_create_khmer,
 827   data_destroy_khmer,
 828   nullptr, /* preprocess_text */
 829   nullptr, /* postprocess_glyphs */
 830   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
 831   decompose_khmer,
 832   compose_khmer,
 833   setup_masks_khmer,
 834   nullptr, /* disable_otl */
 835   nullptr, /* reorder_marks */
 836   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
 837   false, /* fallback_position */
 838 };