< prev index next >
src/java.desktop/share/native/libfontmanager/harfbuzz/hb-ot-shape-complex-indic.cc
Print this page
@@ -22,12 +22,13 @@
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
-#include "hb-ot-shape-complex-indic-private.hh"
-#include "hb-ot-layout-private.hh"
+#include "hb-ot-shape-complex-indic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
+#include "hb-ot-layout.hh"
/*
* Indic shaper.
*/
@@ -93,46 +94,45 @@
/*
* Indic shaper.
*/
-struct feature_list_t {
- hb_tag_t tag;
- hb_ot_map_feature_flags_t flags;
-};
-
-static const feature_list_t
+static const hb_ot_map_feature_t
indic_features[] =
{
/*
* Basic features.
* These features are applied in order, one at a time, after initial_reordering.
*/
- {HB_TAG('n','u','k','t'), F_GLOBAL},
- {HB_TAG('a','k','h','n'), F_GLOBAL},
- {HB_TAG('r','p','h','f'), F_NONE},
- {HB_TAG('r','k','r','f'), F_GLOBAL},
- {HB_TAG('p','r','e','f'), F_NONE},
- {HB_TAG('b','l','w','f'), F_NONE},
- {HB_TAG('a','b','v','f'), F_NONE},
- {HB_TAG('h','a','l','f'), F_NONE},
- {HB_TAG('p','s','t','f'), F_NONE},
- {HB_TAG('v','a','t','u'), F_GLOBAL},
- {HB_TAG('c','j','c','t'), F_GLOBAL},
+ {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS},
+ {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
+ {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS},
/*
* Other features.
- * These features are applied all at once, after final_reordering.
+ * These features are applied all at once, after final_reordering
+ * but before clearing syllables.
* Default Bengali font in Windows for example has intermixed
* lookups for init,pres,abvs,blws features.
*/
- {HB_TAG('i','n','i','t'), F_NONE},
- {HB_TAG('p','r','e','s'), F_GLOBAL},
- {HB_TAG('a','b','v','s'), F_GLOBAL},
- {HB_TAG('b','l','w','s'), F_GLOBAL},
- {HB_TAG('p','s','t','s'), F_GLOBAL},
- {HB_TAG('h','a','l','n'), F_GLOBAL},
- /* Positioning features, though we don't care about the types. */
+ {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS},
+ {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
+ {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS},
+ /*
+ * Positioning features.
+ * We don't care about the types.
+ */
{HB_TAG('d','i','s','t'), F_GLOBAL},
{HB_TAG('a','b','v','m'), F_GLOBAL},
{HB_TAG('b','l','w','m'), F_GLOBAL},
};
@@ -156,16 +156,17 @@
_PRES,
_ABVS,
_BLWS,
_PSTS,
_HALN,
+
_DIST,
_ABVM,
_BLWM,
INDIC_NUM_FEATURES,
- INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */
+ INDIC_BASIC_FEATURES = INIT, /* Don't forget to update this! */
};
static void
setup_syllables (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
@@ -189,51 +190,53 @@
hb_ot_map_builder_t *map = &plan->map;
/* Do this before any lookups have been applied. */
map->add_gsub_pause (setup_syllables);
- map->add_global_bool_feature (HB_TAG('l','o','c','l'));
+ map->enable_feature (HB_TAG('l','o','c','l'));
/* The Indic specs do not require ccmp, but we apply it here since if
* there is a use of it, it's typically at the beginning. */
- map->add_global_bool_feature (HB_TAG('c','c','m','p'));
+ map->enable_feature (HB_TAG('c','c','m','p'));
unsigned int i = 0;
map->add_gsub_pause (initial_reordering);
+
for (; i < INDIC_BASIC_FEATURES; i++) {
- map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
+ map->add_feature (indic_features[i]);
map->add_gsub_pause (nullptr);
}
+
map->add_gsub_pause (final_reordering);
- for (; i < INDIC_NUM_FEATURES; i++) {
- map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
- }
- map->add_global_bool_feature (HB_TAG('c','a','l','t'));
- map->add_global_bool_feature (HB_TAG('c','l','i','g'));
+ for (; i < INDIC_NUM_FEATURES; i++)
+ map->add_feature (indic_features[i]);
+
+ map->enable_feature (HB_TAG('c','a','l','t'));
+ map->enable_feature (HB_TAG('c','l','i','g'));
map->add_gsub_pause (clear_syllables);
}
static void
override_features_indic (hb_ot_shape_planner_t *plan)
{
- plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
+ plan->map.disable_feature (HB_TAG('l','i','g','a'));
}
struct would_substitute_feature_t
{
- inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
+ void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
{
zero_context = zero_context_;
map->get_stage_lookups (0/*GSUB*/,
map->get_feature_stage (0/*GSUB*/, feature_tag),
&lookups, &count);
}
- inline bool would_substitute (const hb_codepoint_t *glyphs,
+ bool would_substitute (const hb_codepoint_t *glyphs,
unsigned int glyphs_count,
hb_face_t *face) const
{
for (unsigned int i = 0; i < count; i++)
if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
@@ -247,35 +250,34 @@
bool zero_context;
};
struct indic_shape_plan_t
{
- ASSERT_POD ();
-
- inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
+ bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
{
- hb_codepoint_t glyph = virama_glyph;
- if (unlikely (virama_glyph == (hb_codepoint_t) -1))
+ hb_codepoint_t glyph = virama_glyph.get_relaxed ();
+ if (unlikely (glyph == (hb_codepoint_t) -1))
{
if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph))
glyph = 0;
/* Technically speaking, the spec says we should apply 'locl' to virama too.
* Maybe one day... */
/* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
- * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */
- virama_glyph = glyph;
+ * during shape planning... Instead, overwrite it here. */
+ virama_glyph.set_relaxed ((int) glyph);
}
*pglyph = glyph;
return glyph != 0;
}
const indic_config_t *config;
bool is_old_spec;
- mutable hb_codepoint_t virama_glyph;
+ bool uniscribe_bug_compatible;
+ mutable hb_atomic_int_t virama_glyph;
would_substitute_feature_t rphf;
would_substitute_feature_t pref;
would_substitute_feature_t blwf;
would_substitute_feature_t pstf;
@@ -296,11 +298,12 @@
indic_plan->config = &indic_configs[i];
break;
}
indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2');
- indic_plan->virama_glyph = (hb_codepoint_t) -1;
+ indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible;
+ indic_plan->virama_glyph.set_relaxed (-1);
/* Use zero-context would_substitute() matching for new-spec of the main
* Indic scripts, and scripts with one spec only, but not for old-specs.
* The new-spec for all dual-spec scripts says zero-context matching happens.
*
@@ -417,11 +420,11 @@
if (indic_plan->config->base_pos != BASE_POS_LAST)
return;
hb_codepoint_t virama;
- if (indic_plan->get_virama_glyph (font, &virama))
+ if (indic_plan->load_virama_glyph (font, &virama))
{
hb_face_t *face = font->face;
unsigned int count = buffer->len;
hb_glyph_info_t *info = buffer->info;
for (unsigned int i = 0; i < count; i++)
@@ -665,33 +668,37 @@
/* For old-style Indic script tags, move the first post-base Halant after
* last consonant.
*
* Reports suggest that in some scripts Uniscribe does this only if there
- * is *not* a Halant after last consonant already (eg. Kannada), while it
- * does it unconditionally in other scripts (eg. Malayalam, Bengali). We
- * don't currently know about other scripts, so we whitelist Malayalam and
- * Bengali for now.
+ * is *not* a Halant after last consonant already. We know that is the
+ * case for Kannada, while it reorders unconditionally in other scripts,
+ * eg. Malayalam, Bengali, and Devanagari. We don't currently know about
+ * other scripts, so we blacklist Kannada.
*
* Kannada test case:
* U+0C9A,U+0CCD,U+0C9A,U+0CCD
* With some versions of Lohit Kannada.
* https://bugs.freedesktop.org/show_bug.cgi?id=59118
*
* Malayalam test case:
* U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
* With lohit-ttf-20121122/Lohit-Malayalam.ttf
*
- * Bengali test case
+ * Bengali test case:
* U+0998,U+09CD,U+09AF,U+09CD
* With Windows XP vrinda.ttf
* https://github.com/harfbuzz/harfbuzz/issues/1073
+ *
+ * Devanagari test case:
+ * U+091F,U+094D,U+0930,U+094D
+ * With chandas.ttf
+ * https://github.com/harfbuzz/harfbuzz/issues/1071
*/
if (indic_plan->is_old_spec)
{
- bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM &&
- buffer->props.script != HB_SCRIPT_BENGALI;
+ bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
for (unsigned int i = base + 1; i < end; i++)
if (info[i].indic_category() == OT_H)
{
unsigned int j;
for (j = end - 1; j > i; j--)
@@ -711,11 +718,11 @@
/* Attach misc marks to previous char to move with them. */
{
indic_position_t last_pos = POS_START;
for (unsigned int i = start; i < end; i++)
{
- if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H))))
+ if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H))))
{
info[i].indic_position() = last_pos;
if (unlikely (info[i].indic_category() == OT_H &&
info[i].indic_position() == POS_PRE_M))
{
@@ -777,12 +784,14 @@
* order and merge as needed.
* For pre-base stuff, we handle cluster issues in final reordering.
*
* We could use buffer->sort() for this, if there was no special
* reordering of pre-base stuff happening later...
+ * We don't want to merge_clusters all of that, which buffer->sort()
+ * would.
*/
- if (indic_plan->is_old_spec || end - base > 127)
+ if (indic_plan->is_old_spec || end - start > 127)
buffer->merge_clusters (base, end);
else
{
/* Note! syllable() is a one-byte field. */
for (unsigned int i = base; i < end; i++)
@@ -907,14 +916,16 @@
initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
hb_face_t *face,
hb_buffer_t *buffer,
unsigned int start, unsigned int end)
{
+ const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
+
/* We treat placeholder/dotted-circle as if they are consonants, so we
* should just chain. Only if not in compatibility mode that is... */
- if (hb_options ().uniscribe_bug_compatible)
+ if (indic_plan->uniscribe_bug_compatible)
{
/* For dotted-circle, this is what Uniscribe does:
* If dotted-circle is the last glyph, it just does nothing.
* Ie. It doesn't form Reph. */
if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
@@ -952,11 +963,12 @@
static inline void
insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
hb_font_t *font,
hb_buffer_t *buffer)
{
- /* Note: This loop is extra overhead, but should not be measurable. */
+ /* Note: This loop is extra overhead, but should not be measurable.
+ * TODO Use a buffer scratch flag to remove the loop. */
bool has_broken_syllables = false;
unsigned int count = buffer->len;
hb_glyph_info_t *info = buffer->info;
for (unsigned int i = 0; i < count; i++)
if ((info[i].syllable() & 0x0F) == broken_cluster)
@@ -1004,11 +1016,10 @@
buffer->output_info (ginfo);
}
else
buffer->next_glyph ();
}
-
buffer->swap_buffers ();
}
static void
initial_reordering (const hb_ot_shape_plan_t *plan,
@@ -1034,13 +1045,15 @@
/* This function relies heavily on halant glyphs. Lots of ligation
* and possibly multiple substitutions happened prior to this
* phase, and that might have messed up our properties. Recover
* from a particular case of that where we're fairly sure that a
* class of OT_H is desired but has been lost. */
- if (indic_plan->virama_glyph)
+ /* We don't call load_virama_glyph(), since we know it's already
+ * loaded. */
+ hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
+ if (virama_glyph)
{
- unsigned int virama_glyph = indic_plan->virama_glyph;
for (unsigned int i = start; i < end; i++)
if (info[i].codepoint == virama_glyph &&
_hb_glyph_info_ligated (&info[i]) &&
_hb_glyph_info_multiplied (&info[i]))
{
@@ -1125,10 +1138,28 @@
* features, the glyph can be moved closer to the main consonant based on
* whether half-forms had been formed. Actual position for the matra is
* defined as “after last standalone halant glyph, after initial matra
* position and before the main consonant”. If ZWJ or ZWNJ follow this
* halant, position is moved after it.
+ *
+ * IMPLEMENTATION NOTES:
+ *
+ * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
+ * and Devanagari shows that the behavior is best described as:
+ *
+ * "If ZWJ follows this halant, matra is NOT repositioned after this halant.
+ * If ZWNJ follows this halant, position is moved after it."
+ *
+ * Test case, with Adobe Devanagari or Nirmala UI:
+ *
+ * U+091F,U+094D,U+200C,U+092F,U+093F
+ * (Matra moves to the middle, after ZWNJ.)
+ *
+ * U+091F,U+094D,U+200D,U+092F,U+093F
+ * (Matra does NOT move, stays to the left.)
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1070
*/
if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
{
/* If we lost track of base, alas, position before last thingy. */
@@ -1138,23 +1169,42 @@
* The glyphs formed by 'half' are Chillus or ligated explicit viramas.
* We want to position matra after them.
*/
if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
{
+ search:
while (new_pos > start &&
!(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H)))))
new_pos--;
/* If we found no Halant we are done.
* Otherwise only proceed if the Halant does
* not belong to the Matra itself! */
if (is_halant (info[new_pos]) &&
info[new_pos].indic_position() != POS_PRE_M)
{
+#if 0 // See comment above
/* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
new_pos++;
+#endif
+ if (new_pos + 1 < end)
+ {
+ /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */
+ if (info[new_pos + 1].indic_category() == OT_ZWJ)
+ {
+ /* Keep searching. */
+ if (new_pos > start)
+ {
+ new_pos--;
+ goto search;
+ }
+ }
+ /* -> If ZWNJ follows this halant, position is moved after it. */
+ if (info[new_pos + 1].indic_category() == OT_ZWNJ)
+ new_pos++;
+ }
}
else
new_pos = start; /* No move. */
}
@@ -1311,11 +1361,11 @@
* position it before that Halant so it can interact with the Matra.
* However, if it's a plain Consonant,Halant we shouldn't do that.
* Uniscribe doesn't do this.
* TEST: U+0930,U+094D,U+0915,U+094B,U+094D
*/
- if (!hb_options ().uniscribe_bug_compatible &&
+ if (!indic_plan->uniscribe_bug_compatible &&
unlikely (is_halant (info[new_reph_pos]))) {
for (unsigned int i = base + 1; i < new_reph_pos; i++)
if (info[i].indic_category() == OT_M) {
/* Ok, got it. */
new_reph_pos--;
@@ -1417,11 +1467,11 @@
/*
* Finish off the clusters and go home!
*/
- if (hb_options ().uniscribe_bug_compatible)
+ if (indic_plan->uniscribe_bug_compatible)
{
switch ((hb_tag_t) plan->props.script)
{
case HB_SCRIPT_TAMIL:
case HB_SCRIPT_SINHALA:
@@ -1465,10 +1515,18 @@
for (unsigned int i = 0; i < count; i++)
info[i].syllable() = 0;
}
+static void
+preprocess_text_indic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+{
+ _hb_preprocess_text_vowel_constraints (plan, buffer, font);
+}
+
static bool
decompose_indic (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t ab,
hb_codepoint_t *a,
hb_codepoint_t *b)
@@ -1564,16 +1622,16 @@
{
collect_features_indic,
override_features_indic,
data_create_indic,
data_destroy_indic,
- nullptr, /* preprocess_text */
+ preprocess_text_indic,
nullptr, /* postprocess_glyphs */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
decompose_indic,
compose_indic,
setup_masks_indic,
- nullptr, /* disable_otl */
+ HB_TAG_NONE, /* gpos_tag */
nullptr, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */
};
< prev index next >