--- old/src/java.desktop/share/native/libfontmanager/harfbuzz/hb-ot-shape-complex-indic.cc 2019-02-28 12:04:04.174502874 -0800 +++ new/src/java.desktop/share/native/libfontmanager/harfbuzz/hb-ot-shape-complex-indic.cc 2019-02-28 12:04:04.030502877 -0800 @@ -24,8 +24,9 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-complex-indic-private.hh" -#include "hb-ot-layout-private.hh" +#include "hb-ot-shape-complex-indic.hh" +#include "hb-ot-shape-complex-vowel-constraints.hh" +#include "hb-ot-layout.hh" /* @@ -95,42 +96,41 @@ * Indic shaper. */ -struct feature_list_t { - hb_tag_t tag; - hb_ot_map_feature_flags_t flags; -}; - -static const feature_list_t +static const hb_ot_map_feature_t indic_features[] = { /* * Basic features. * These features are applied in order, one at a time, after initial_reordering. */ - {HB_TAG('n','u','k','t'), F_GLOBAL}, - {HB_TAG('a','k','h','n'), F_GLOBAL}, - {HB_TAG('r','p','h','f'), F_NONE}, - {HB_TAG('r','k','r','f'), F_GLOBAL}, - {HB_TAG('p','r','e','f'), F_NONE}, - {HB_TAG('b','l','w','f'), F_NONE}, - {HB_TAG('a','b','v','f'), F_NONE}, - {HB_TAG('h','a','l','f'), F_NONE}, - {HB_TAG('p','s','t','f'), F_NONE}, - {HB_TAG('v','a','t','u'), F_GLOBAL}, - {HB_TAG('c','j','c','t'), F_GLOBAL}, + {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS}, + {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS}, + {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS}, + {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS}, + {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS}, + {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS}, + {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS}, /* * Other features. - * These features are applied all at once, after final_reordering. + * These features are applied all at once, after final_reordering + * but before clearing syllables. * Default Bengali font in Windows for example has intermixed * lookups for init,pres,abvs,blws features. */ - {HB_TAG('i','n','i','t'), F_NONE}, - {HB_TAG('p','r','e','s'), F_GLOBAL}, - {HB_TAG('a','b','v','s'), F_GLOBAL}, - {HB_TAG('b','l','w','s'), F_GLOBAL}, - {HB_TAG('p','s','t','s'), F_GLOBAL}, - {HB_TAG('h','a','l','n'), F_GLOBAL}, - /* Positioning features, though we don't care about the types. */ + {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS}, + {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS}, + /* + * Positioning features. + * We don't care about the types. + */ {HB_TAG('d','i','s','t'), F_GLOBAL}, {HB_TAG('a','b','v','m'), F_GLOBAL}, {HB_TAG('b','l','w','m'), F_GLOBAL}, @@ -158,12 +158,13 @@ _BLWS, _PSTS, _HALN, + _DIST, _ABVM, _BLWM, INDIC_NUM_FEATURES, - INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */ + INDIC_BASIC_FEATURES = INIT, /* Don't forget to update this! */ }; static void @@ -191,25 +192,27 @@ /* Do this before any lookups have been applied. */ map->add_gsub_pause (setup_syllables); - map->add_global_bool_feature (HB_TAG('l','o','c','l')); + map->enable_feature (HB_TAG('l','o','c','l')); /* The Indic specs do not require ccmp, but we apply it here since if * there is a use of it, it's typically at the beginning. */ - map->add_global_bool_feature (HB_TAG('c','c','m','p')); + map->enable_feature (HB_TAG('c','c','m','p')); unsigned int i = 0; map->add_gsub_pause (initial_reordering); + for (; i < INDIC_BASIC_FEATURES; i++) { - map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); + map->add_feature (indic_features[i]); map->add_gsub_pause (nullptr); } + map->add_gsub_pause (final_reordering); - for (; i < INDIC_NUM_FEATURES; i++) { - map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); - } - map->add_global_bool_feature (HB_TAG('c','a','l','t')); - map->add_global_bool_feature (HB_TAG('c','l','i','g')); + for (; i < INDIC_NUM_FEATURES; i++) + map->add_feature (indic_features[i]); + + map->enable_feature (HB_TAG('c','a','l','t')); + map->enable_feature (HB_TAG('c','l','i','g')); map->add_gsub_pause (clear_syllables); } @@ -217,13 +220,13 @@ static void override_features_indic (hb_ot_shape_planner_t *plan) { - plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL); + plan->map.disable_feature (HB_TAG('l','i','g','a')); } struct would_substitute_feature_t { - inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) + void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) { zero_context = zero_context_; map->get_stage_lookups (0/*GSUB*/, @@ -231,9 +234,9 @@ &lookups, &count); } - inline bool would_substitute (const hb_codepoint_t *glyphs, - unsigned int glyphs_count, - hb_face_t *face) const + bool would_substitute (const hb_codepoint_t *glyphs, + unsigned int glyphs_count, + hb_face_t *face) const { for (unsigned int i = 0; i < count; i++) if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context)) @@ -249,12 +252,10 @@ struct indic_shape_plan_t { - ASSERT_POD (); - - inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const + bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const { - hb_codepoint_t glyph = virama_glyph; - if (unlikely (virama_glyph == (hb_codepoint_t) -1)) + hb_codepoint_t glyph = virama_glyph.get_relaxed (); + if (unlikely (glyph == (hb_codepoint_t) -1)) { if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph)) glyph = 0; @@ -262,8 +263,8 @@ * Maybe one day... */ /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph - * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */ - virama_glyph = glyph; + * during shape planning... Instead, overwrite it here. */ + virama_glyph.set_relaxed ((int) glyph); } *pglyph = glyph; @@ -273,7 +274,8 @@ const indic_config_t *config; bool is_old_spec; - mutable hb_codepoint_t virama_glyph; + bool uniscribe_bug_compatible; + mutable hb_atomic_int_t virama_glyph; would_substitute_feature_t rphf; would_substitute_feature_t pref; @@ -298,7 +300,8 @@ } indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2'); - indic_plan->virama_glyph = (hb_codepoint_t) -1; + indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible; + indic_plan->virama_glyph.set_relaxed (-1); /* Use zero-context would_substitute() matching for new-spec of the main * Indic scripts, and scripts with one spec only, but not for old-specs. @@ -419,7 +422,7 @@ return; hb_codepoint_t virama; - if (indic_plan->get_virama_glyph (font, &virama)) + if (indic_plan->load_virama_glyph (font, &virama)) { hb_face_t *face = font->face; unsigned int count = buffer->len; @@ -667,10 +670,10 @@ * last consonant. * * Reports suggest that in some scripts Uniscribe does this only if there - * is *not* a Halant after last consonant already (eg. Kannada), while it - * does it unconditionally in other scripts (eg. Malayalam, Bengali). We - * don't currently know about other scripts, so we whitelist Malayalam and - * Bengali for now. + * is *not* a Halant after last consonant already. We know that is the + * case for Kannada, while it reorders unconditionally in other scripts, + * eg. Malayalam, Bengali, and Devanagari. We don't currently know about + * other scripts, so we blacklist Kannada. * * Kannada test case: * U+0C9A,U+0CCD,U+0C9A,U+0CCD @@ -681,15 +684,19 @@ * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D * With lohit-ttf-20121122/Lohit-Malayalam.ttf * - * Bengali test case + * Bengali test case: * U+0998,U+09CD,U+09AF,U+09CD * With Windows XP vrinda.ttf * https://github.com/harfbuzz/harfbuzz/issues/1073 + * + * Devanagari test case: + * U+091F,U+094D,U+0930,U+094D + * With chandas.ttf + * https://github.com/harfbuzz/harfbuzz/issues/1071 */ if (indic_plan->is_old_spec) { - bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM && - buffer->props.script != HB_SCRIPT_BENGALI; + bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA; for (unsigned int i = base + 1; i < end; i++) if (info[i].indic_category() == OT_H) { @@ -713,7 +720,7 @@ indic_position_t last_pos = POS_START; for (unsigned int i = start; i < end; i++) { - if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H)))) + if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) { info[i].indic_position() = last_pos; if (unlikely (info[i].indic_category() == OT_H && @@ -779,8 +786,10 @@ * * We could use buffer->sort() for this, if there was no special * reordering of pre-base stuff happening later... + * We don't want to merge_clusters all of that, which buffer->sort() + * would. */ - if (indic_plan->is_old_spec || end - base > 127) + if (indic_plan->is_old_spec || end - start > 127) buffer->merge_clusters (base, end); else { @@ -909,10 +918,12 @@ hb_buffer_t *buffer, unsigned int start, unsigned int end) { + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; + /* We treat placeholder/dotted-circle as if they are consonants, so we * should just chain. Only if not in compatibility mode that is... */ - if (hb_options ().uniscribe_bug_compatible) + if (indic_plan->uniscribe_bug_compatible) { /* For dotted-circle, this is what Uniscribe does: * If dotted-circle is the last glyph, it just does nothing. @@ -954,7 +965,8 @@ hb_font_t *font, hb_buffer_t *buffer) { - /* Note: This loop is extra overhead, but should not be measurable. */ + /* Note: This loop is extra overhead, but should not be measurable. + * TODO Use a buffer scratch flag to remove the loop. */ bool has_broken_syllables = false; unsigned int count = buffer->len; hb_glyph_info_t *info = buffer->info; @@ -1006,7 +1018,6 @@ else buffer->next_glyph (); } - buffer->swap_buffers (); } @@ -1036,9 +1047,11 @@ * phase, and that might have messed up our properties. Recover * from a particular case of that where we're fairly sure that a * class of OT_H is desired but has been lost. */ - if (indic_plan->virama_glyph) + /* We don't call load_virama_glyph(), since we know it's already + * loaded. */ + hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed (); + if (virama_glyph) { - unsigned int virama_glyph = indic_plan->virama_glyph; for (unsigned int i = start; i < end; i++) if (info[i].codepoint == virama_glyph && _hb_glyph_info_ligated (&info[i]) && @@ -1127,6 +1140,24 @@ * defined as “after last standalone halant glyph, after initial matra * position and before the main consonant”. If ZWJ or ZWNJ follow this * halant, position is moved after it. + * + * IMPLEMENTATION NOTES: + * + * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe + * and Devanagari shows that the behavior is best described as: + * + * "If ZWJ follows this halant, matra is NOT repositioned after this halant. + * If ZWNJ follows this halant, position is moved after it." + * + * Test case, with Adobe Devanagari or Nirmala UI: + * + * U+091F,U+094D,U+200C,U+092F,U+093F + * (Matra moves to the middle, after ZWNJ.) + * + * U+091F,U+094D,U+200D,U+092F,U+093F + * (Matra does NOT move, stays to the left.) + * + * https://github.com/harfbuzz/harfbuzz/issues/1070 */ if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ @@ -1140,6 +1171,7 @@ */ if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) { + search: while (new_pos > start && !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H))))) new_pos--; @@ -1150,9 +1182,27 @@ if (is_halant (info[new_pos]) && info[new_pos].indic_position() != POS_PRE_M) { +#if 0 // See comment above /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) new_pos++; +#endif + if (new_pos + 1 < end) + { + /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */ + if (info[new_pos + 1].indic_category() == OT_ZWJ) + { + /* Keep searching. */ + if (new_pos > start) + { + new_pos--; + goto search; + } + } + /* -> If ZWNJ follows this halant, position is moved after it. */ + if (info[new_pos + 1].indic_category() == OT_ZWNJ) + new_pos++; + } } else new_pos = start; /* No move. */ @@ -1313,7 +1363,7 @@ * Uniscribe doesn't do this. * TEST: U+0930,U+094D,U+0915,U+094B,U+094D */ - if (!hb_options ().uniscribe_bug_compatible && + if (!indic_plan->uniscribe_bug_compatible && unlikely (is_halant (info[new_reph_pos]))) { for (unsigned int i = base + 1; i < new_reph_pos; i++) if (info[i].indic_category() == OT_M) { @@ -1419,7 +1469,7 @@ /* * Finish off the clusters and go home! */ - if (hb_options ().uniscribe_bug_compatible) + if (indic_plan->uniscribe_bug_compatible) { switch ((hb_tag_t) plan->props.script) { @@ -1467,6 +1517,14 @@ } +static void +preprocess_text_indic (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + _hb_preprocess_text_vowel_constraints (plan, buffer, font); +} + static bool decompose_indic (const hb_ot_shape_normalize_context_t *c, hb_codepoint_t ab, @@ -1566,13 +1624,13 @@ override_features_indic, data_create_indic, data_destroy_indic, - nullptr, /* preprocess_text */ + preprocess_text_indic, nullptr, /* postprocess_glyphs */ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, decompose_indic, compose_indic, setup_masks_indic, - nullptr, /* disable_otl */ + HB_TAG_NONE, /* gpos_tag */ nullptr, /* reorder_marks */ HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, false, /* fallback_position */