< prev index next >

src/java.desktop/share/native/libfontmanager/harfbuzz/hb-ot-shape-complex-indic.cc

Print this page

        

@@ -22,12 +22,13 @@
  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Google Author(s): Behdad Esfahbod
  */
 
-#include "hb-ot-shape-complex-indic-private.hh"
-#include "hb-ot-layout-private.hh"
+#include "hb-ot-shape-complex-indic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-layout.hh"
 
 
 /*
  * Indic shaper.
  */

@@ -93,46 +94,45 @@
 
 /*
  * Indic shaper.
  */
 
-struct feature_list_t {
-  hb_tag_t tag;
-  hb_ot_map_feature_flags_t flags;
-};
-
-static const feature_list_t
+static const hb_ot_map_feature_t
 indic_features[] =
 {
   /*
    * Basic features.
    * These features are applied in order, one at a time, after initial_reordering.
    */
-  {HB_TAG('n','u','k','t'), F_GLOBAL},
-  {HB_TAG('a','k','h','n'), F_GLOBAL},
-  {HB_TAG('r','p','h','f'), F_NONE},
-  {HB_TAG('r','k','r','f'), F_GLOBAL},
-  {HB_TAG('p','r','e','f'), F_NONE},
-  {HB_TAG('b','l','w','f'), F_NONE},
-  {HB_TAG('a','b','v','f'), F_NONE},
-  {HB_TAG('h','a','l','f'), F_NONE},
-  {HB_TAG('p','s','t','f'), F_NONE},
-  {HB_TAG('v','a','t','u'), F_GLOBAL},
-  {HB_TAG('c','j','c','t'), F_GLOBAL},
+  {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('r','p','h','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('p','r','e','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('b','l','w','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('a','b','v','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('h','a','l','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('p','s','t','f'),        F_MANUAL_JOINERS},
+  {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
   /*
    * Other features.
-   * These features are applied all at once, after final_reordering.
+   * These features are applied all at once, after final_reordering
+   * but before clearing syllables.
    * Default Bengali font in Windows for example has intermixed
    * lookups for init,pres,abvs,blws features.
    */
-  {HB_TAG('i','n','i','t'), F_NONE},
-  {HB_TAG('p','r','e','s'), F_GLOBAL},
-  {HB_TAG('a','b','v','s'), F_GLOBAL},
-  {HB_TAG('b','l','w','s'), F_GLOBAL},
-  {HB_TAG('p','s','t','s'), F_GLOBAL},
-  {HB_TAG('h','a','l','n'), F_GLOBAL},
-  /* Positioning features, though we don't care about the types. */
+  {HB_TAG('i','n','i','t'),        F_MANUAL_JOINERS},
+  {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
+  {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+  /*
+   * Positioning features.
+   * We don't care about the types.
+   */
   {HB_TAG('d','i','s','t'), F_GLOBAL},
   {HB_TAG('a','b','v','m'), F_GLOBAL},
   {HB_TAG('b','l','w','m'), F_GLOBAL},
 };
 

@@ -156,16 +156,17 @@
   _PRES,
   _ABVS,
   _BLWS,
   _PSTS,
   _HALN,
+
   _DIST,
   _ABVM,
   _BLWM,
 
   INDIC_NUM_FEATURES,
-  INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */
+  INDIC_BASIC_FEATURES = INIT, /* Don't forget to update this! */
 };
 
 static void
 setup_syllables (const hb_ot_shape_plan_t *plan,
                  hb_font_t *font,

@@ -189,51 +190,53 @@
   hb_ot_map_builder_t *map = &plan->map;
 
   /* Do this before any lookups have been applied. */
   map->add_gsub_pause (setup_syllables);
 
-  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
+  map->enable_feature (HB_TAG('l','o','c','l'));
   /* The Indic specs do not require ccmp, but we apply it here since if
    * there is a use of it, it's typically at the beginning. */
-  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
+  map->enable_feature (HB_TAG('c','c','m','p'));
 
 
   unsigned int i = 0;
   map->add_gsub_pause (initial_reordering);
+
   for (; i < INDIC_BASIC_FEATURES; i++) {
-    map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
+    map->add_feature (indic_features[i]);
     map->add_gsub_pause (nullptr);
   }
+
   map->add_gsub_pause (final_reordering);
-  for (; i < INDIC_NUM_FEATURES; i++) {
-    map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
-  }
 
-  map->add_global_bool_feature (HB_TAG('c','a','l','t'));
-  map->add_global_bool_feature (HB_TAG('c','l','i','g'));
+  for (; i < INDIC_NUM_FEATURES; i++)
+    map->add_feature (indic_features[i]);
+
+  map->enable_feature (HB_TAG('c','a','l','t'));
+  map->enable_feature (HB_TAG('c','l','i','g'));
 
   map->add_gsub_pause (clear_syllables);
 }
 
 static void
 override_features_indic (hb_ot_shape_planner_t *plan)
 {
-  plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
+  plan->map.disable_feature (HB_TAG('l','i','g','a'));
 }
 
 
 struct would_substitute_feature_t
 {
-  inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+  void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
   {
     zero_context = zero_context_;
     map->get_stage_lookups (0/*GSUB*/,
                             map->get_feature_stage (0/*GSUB*/, feature_tag),
                             &lookups, &count);
   }
 
-  inline bool would_substitute (const hb_codepoint_t *glyphs,
+  bool would_substitute (const hb_codepoint_t *glyphs,
                                 unsigned int          glyphs_count,
                                 hb_face_t            *face) const
   {
     for (unsigned int i = 0; i < count; i++)
       if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))

@@ -247,35 +250,34 @@
   bool zero_context;
 };
 
 struct indic_shape_plan_t
 {
-  ASSERT_POD ();
-
-  inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
+  bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
   {
-    hb_codepoint_t glyph = virama_glyph;
-    if (unlikely (virama_glyph == (hb_codepoint_t) -1))
+    hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+    if (unlikely (glyph == (hb_codepoint_t) -1))
     {
       if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
         glyph = 0;
       /* Technically speaking, the spec says we should apply 'locl' to virama too.
        * Maybe one day... */
 
       /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
-       * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
-      virama_glyph = glyph;
+       * during shape planning...  Instead, overwrite it here. */
+      virama_glyph.set_relaxed ((int) glyph);
     }
 
     *pglyph = glyph;
     return glyph != 0;
   }
 
   const indic_config_t *config;
 
   bool is_old_spec;
-  mutable hb_codepoint_t virama_glyph;
+  bool uniscribe_bug_compatible;
+  mutable hb_atomic_int_t virama_glyph;
 
   would_substitute_feature_t rphf;
   would_substitute_feature_t pref;
   would_substitute_feature_t blwf;
   would_substitute_feature_t pstf;

@@ -296,11 +298,12 @@
       indic_plan->config = &indic_configs[i];
       break;
     }
 
   indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2');
-  indic_plan->virama_glyph = (hb_codepoint_t) -1;
+  indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
+  indic_plan->virama_glyph.set_relaxed (-1);
 
   /* Use zero-context would_substitute() matching for new-spec of the main
    * Indic scripts, and scripts with one spec only, but not for old-specs.
    * The new-spec for all dual-spec scripts says zero-context matching happens.
    *

@@ -417,11 +420,11 @@
 
   if (indic_plan->config->base_pos != BASE_POS_LAST)
     return;
 
   hb_codepoint_t virama;
-  if (indic_plan->get_virama_glyph (font, &virama))
+  if (indic_plan->load_virama_glyph (font, &virama))
   {
     hb_face_t *face = font->face;
     unsigned int count = buffer->len;
     hb_glyph_info_t *info = buffer->info;
     for (unsigned int i = 0; i < count; i++)

@@ -665,33 +668,37 @@
 
   /* For old-style Indic script tags, move the first post-base Halant after
    * last consonant.
    *
    * Reports suggest that in some scripts Uniscribe does this only if there
-   * is *not* a Halant after last consonant already (eg. Kannada), while it
-   * does it unconditionally in other scripts (eg. Malayalam, Bengali).  We
-   * don't currently know about other scripts, so we whitelist Malayalam and
-   * Bengali for now.
+   * is *not* a Halant after last consonant already.  We know that is the
+   * case for Kannada, while it reorders unconditionally in other scripts,
+   * eg. Malayalam, Bengali, and Devanagari.  We don't currently know about
+   * other scripts, so we blacklist Kannada.
    *
    * Kannada test case:
    * U+0C9A,U+0CCD,U+0C9A,U+0CCD
    * With some versions of Lohit Kannada.
    * https://bugs.freedesktop.org/show_bug.cgi?id=59118
    *
    * Malayalam test case:
    * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
    * With lohit-ttf-20121122/Lohit-Malayalam.ttf
    *
-   * Bengali test case
+   * Bengali test case:
    * U+0998,U+09CD,U+09AF,U+09CD
    * With Windows XP vrinda.ttf
    * https://github.com/harfbuzz/harfbuzz/issues/1073
+   *
+   * Devanagari test case:
+   * U+091F,U+094D,U+0930,U+094D
+   * With chandas.ttf
+   * https://github.com/harfbuzz/harfbuzz/issues/1071
    */
   if (indic_plan->is_old_spec)
   {
-    bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM &&
-                                   buffer->props.script != HB_SCRIPT_BENGALI;
+    bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
     for (unsigned int i = base + 1; i < end; i++)
       if (info[i].indic_category() == OT_H)
       {
         unsigned int j;
         for (j = end - 1; j > i; j--)

@@ -711,11 +718,11 @@
   /* Attach misc marks to previous char to move with them. */
   {
     indic_position_t last_pos = POS_START;
     for (unsigned int i = start; i < end; i++)
     {
-      if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+      if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
       {
         info[i].indic_position() = last_pos;
         if (unlikely (info[i].indic_category() == OT_H &&
                       info[i].indic_position() == POS_PRE_M))
         {

@@ -777,12 +784,14 @@
      * order and merge as needed.
      * For pre-base stuff, we handle cluster issues in final reordering.
      *
      * We could use buffer->sort() for this, if there was no special
      * reordering of pre-base stuff happening later...
+     * We don't want to merge_clusters all of that, which buffer->sort()
+     * would.
      */
-    if (indic_plan->is_old_spec || end - base > 127)
+    if (indic_plan->is_old_spec || end - start > 127)
       buffer->merge_clusters (base, end);
     else
     {
       /* Note!  syllable() is a one-byte field. */
       for (unsigned int i = base; i < end; i++)

@@ -907,14 +916,16 @@
 initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
                                        hb_face_t *face,
                                        hb_buffer_t *buffer,
                                        unsigned int start, unsigned int end)
 {
+  const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+
   /* We treat placeholder/dotted-circle as if they are consonants, so we
    * should just chain.  Only if not in compatibility mode that is... */
 
-  if (hb_options ().uniscribe_bug_compatible)
+  if (indic_plan->uniscribe_bug_compatible)
   {
     /* For dotted-circle, this is what Uniscribe does:
      * If dotted-circle is the last glyph, it just does nothing.
      * Ie. It doesn't form Reph. */
     if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)

@@ -952,11 +963,12 @@
 static inline void
 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
                        hb_font_t *font,
                        hb_buffer_t *buffer)
 {
-  /* Note: This loop is extra overhead, but should not be measurable. */
+  /* Note: This loop is extra overhead, but should not be measurable.
+   * TODO Use a buffer scratch flag to remove the loop. */
   bool has_broken_syllables = false;
   unsigned int count = buffer->len;
   hb_glyph_info_t *info = buffer->info;
   for (unsigned int i = 0; i < count; i++)
     if ((info[i].syllable() & 0x0F) == broken_cluster)

@@ -1004,11 +1016,10 @@
       buffer->output_info (ginfo);
     }
     else
       buffer->next_glyph ();
   }
-
   buffer->swap_buffers ();
 }
 
 static void
 initial_reordering (const hb_ot_shape_plan_t *plan,

@@ -1034,13 +1045,15 @@
   /* This function relies heavily on halant glyphs.  Lots of ligation
    * and possibly multiple substitutions happened prior to this
    * phase, and that might have messed up our properties.  Recover
    * from a particular case of that where we're fairly sure that a
    * class of OT_H is desired but has been lost. */
-  if (indic_plan->virama_glyph)
+  /* We don't call load_virama_glyph(), since we know it's already
+   * loaded. */
+  hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+  if (virama_glyph)
   {
-    unsigned int virama_glyph = indic_plan->virama_glyph;
     for (unsigned int i = start; i < end; i++)
       if (info[i].codepoint == virama_glyph &&
           _hb_glyph_info_ligated (&info[i]) &&
           _hb_glyph_info_multiplied (&info[i]))
       {

@@ -1125,10 +1138,28 @@
    *     features, the glyph can be moved closer to the main consonant based on
    *     whether half-forms had been formed. Actual position for the matra is
    *     defined as “after last standalone halant glyph, after initial matra
    *     position and before the main consonant”. If ZWJ or ZWNJ follow this
    *     halant, position is moved after it.
+   *
+   * IMPLEMENTATION NOTES:
+   *
+   * It looks like the last sentence is wrong.  Testing, with Windows 7 Uniscribe
+   * and Devanagari shows that the behavior is best described as:
+   *
+   * "If ZWJ follows this halant, matra is NOT repositioned after this halant.
+   *  If ZWNJ follows this halant, position is moved after it."
+   *
+   * Test case, with Adobe Devanagari or Nirmala UI:
+   *
+   *   U+091F,U+094D,U+200C,U+092F,U+093F
+   *   (Matra moves to the middle, after ZWNJ.)
+   *
+   *   U+091F,U+094D,U+200D,U+092F,U+093F
+   *   (Matra does NOT move, stays to the left.)
+   *
+   * https://github.com/harfbuzz/harfbuzz/issues/1070
    */
 
   if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
   {
     /* If we lost track of base, alas, position before last thingy. */

@@ -1138,23 +1169,42 @@
      * The glyphs formed by 'half' are Chillus or ligated explicit viramas.
      * We want to position matra after them.
      */
     if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
     {
+    search:
       while (new_pos > start &&
              !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
         new_pos--;
 
       /* If we found no Halant we are done.
        * Otherwise only proceed if the Halant does
        * not belong to the Matra itself! */
       if (is_halant (info[new_pos]) &&
           info[new_pos].indic_position() != POS_PRE_M)
       {
+#if 0 // See comment above
         /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
         if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
           new_pos++;
+#endif
+        if (new_pos + 1 < end)
+        {
+          /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
+          if (info[new_pos + 1].indic_category() == OT_ZWJ)
+          {
+            /* Keep searching. */
+            if (new_pos > start)
+            {
+              new_pos--;
+              goto search;
+            }
+          }
+          /* -> If ZWNJ follows this halant, position is moved after it. */
+          if (info[new_pos + 1].indic_category() == OT_ZWNJ)
+            new_pos++;
+        }
       }
       else
         new_pos = start; /* No move. */
     }
 

@@ -1311,11 +1361,11 @@
        * position it before that Halant so it can interact with the Matra.
        * However, if it's a plain Consonant,Halant we shouldn't do that.
        * Uniscribe doesn't do this.
        * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
        */
-      if (!hb_options ().uniscribe_bug_compatible &&
+      if (!indic_plan->uniscribe_bug_compatible &&
           unlikely (is_halant (info[new_reph_pos]))) {
         for (unsigned int i = base + 1; i < new_reph_pos; i++)
           if (info[i].indic_category() == OT_M) {
             /* Ok, got it. */
             new_reph_pos--;

@@ -1417,11 +1467,11 @@
 
 
   /*
    * Finish off the clusters and go home!
    */
-  if (hb_options ().uniscribe_bug_compatible)
+  if (indic_plan->uniscribe_bug_compatible)
   {
     switch ((hb_tag_t) plan->props.script)
     {
       case HB_SCRIPT_TAMIL:
       case HB_SCRIPT_SINHALA:

@@ -1465,10 +1515,18 @@
   for (unsigned int i = 0; i < count; i++)
     info[i].syllable() = 0;
 }
 
 
+static void
+preprocess_text_indic (const hb_ot_shape_plan_t *plan,
+                       hb_buffer_t              *buffer,
+                       hb_font_t                *font)
+{
+  _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+}
+
 static bool
 decompose_indic (const hb_ot_shape_normalize_context_t *c,
                  hb_codepoint_t  ab,
                  hb_codepoint_t *a,
                  hb_codepoint_t *b)

@@ -1564,16 +1622,16 @@
 {
   collect_features_indic,
   override_features_indic,
   data_create_indic,
   data_destroy_indic,
-  nullptr, /* preprocess_text */
+  preprocess_text_indic,
   nullptr, /* postprocess_glyphs */
   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
   decompose_indic,
   compose_indic,
   setup_masks_indic,
-  nullptr, /* disable_otl */
+  HB_TAG_NONE, /* gpos_tag */
   nullptr, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   false, /* fallback_position */
 };
< prev index next >