1 /*
   2  * Copyright © 2012  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH
  28 #define HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH
  29 
  30 #include "hb-private.hh"
  31 
  32 #include "hb-ot-shape-private.hh"
  33 #include "hb-ot-layout-gsub-table.hh"
  34 
  35 
  36 /* Features ordered the same as the entries in shaping_table rows,
  37  * followed by rlig.  Don't change. */
  38 static const hb_tag_t arabic_fallback_features[] =
  39 {
  40   HB_TAG('i','n','i','t'),
  41   HB_TAG('m','e','d','i'),
  42   HB_TAG('f','i','n','a'),
  43   HB_TAG('i','s','o','l'),
  44   HB_TAG('r','l','i','g'),
  45 };
  46 
  47 static OT::SubstLookup *
  48 arabic_fallback_synthesize_lookup_single (const hb_ot_shape_plan_t *plan HB_UNUSED,
  49                                           hb_font_t *font,
  50                                           unsigned int feature_index)
  51 {
  52   OT::GlyphID glyphs[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
  53   OT::GlyphID substitutes[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
  54   unsigned int num_glyphs = 0;
  55 
  56   /* Populate arrays */
  57   for (hb_codepoint_t u = SHAPING_TABLE_FIRST; u < SHAPING_TABLE_LAST + 1; u++)
  58   {
  59     hb_codepoint_t s = shaping_table[u - SHAPING_TABLE_FIRST][feature_index];
  60     hb_codepoint_t u_glyph, s_glyph;
  61 
  62     if (!s ||
  63         !hb_font_get_glyph (font, u, 0, &u_glyph) ||
  64         !hb_font_get_glyph (font, s, 0, &s_glyph) ||
  65         u_glyph == s_glyph ||
  66         u_glyph > 0xFFFFu || s_glyph > 0xFFFFu)
  67       continue;
  68 
  69     glyphs[num_glyphs].set (u_glyph);
  70     substitutes[num_glyphs].set (s_glyph);
  71 
  72     num_glyphs++;
  73   }
  74 
  75   if (!num_glyphs)
  76     return nullptr;
  77 
  78   /* Bubble-sort or something equally good!
  79    * May not be good-enough for presidential candidate interviews, but good-enough for us... */
  80 
  81 #if defined(_AIX)
  82   /* workaround AIX xlC 12 compilation issues caused by the 2 cmp in the IntType template */
  83   hb_stable_sort (&glyphs[0], num_glyphs, (int(*)(const OT::GlyphID*, const OT::GlyphID *)) OT::GlyphID::cmp, &substitutes[0]);
  84 #else
  85   hb_stable_sort (&glyphs[0], num_glyphs, OT::GlyphID::cmp, &substitutes[0]);
  86 #endif
  87 
  88   OT::Supplier<OT::GlyphID> glyphs_supplier      (glyphs, num_glyphs);
  89   OT::Supplier<OT::GlyphID> substitutes_supplier (substitutes, num_glyphs);
  90 
  91   /* Each glyph takes four bytes max, and there's some overhead. */
  92   char buf[(SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1) * 4 + 128];
  93   OT::hb_serialize_context_t c (buf, sizeof (buf));
  94   OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
  95   bool ret = lookup->serialize_single (&c,
  96                                        OT::LookupFlag::IgnoreMarks,
  97                                        glyphs_supplier,
  98                                        substitutes_supplier,
  99                                        num_glyphs);
 100   c.end_serialize ();
 101   /* TODO sanitize the results? */
 102 
 103   return ret ? c.copy<OT::SubstLookup> () : nullptr;
 104 }
 105 
 106 static OT::SubstLookup *
 107 arabic_fallback_synthesize_lookup_ligature (const hb_ot_shape_plan_t *plan HB_UNUSED,
 108                                             hb_font_t *font)
 109 {
 110   OT::GlyphID first_glyphs[ARRAY_LENGTH_CONST (ligature_table)];
 111   unsigned int first_glyphs_indirection[ARRAY_LENGTH_CONST (ligature_table)];
 112   unsigned int ligature_per_first_glyph_count_list[ARRAY_LENGTH_CONST (first_glyphs)];
 113   unsigned int num_first_glyphs = 0;
 114 
 115   /* We know that all our ligatures are 2-component */
 116   OT::GlyphID ligature_list[ARRAY_LENGTH_CONST (first_glyphs) * ARRAY_LENGTH_CONST(ligature_table[0].ligatures)];
 117   unsigned int component_count_list[ARRAY_LENGTH_CONST (ligature_list)];
 118   OT::GlyphID component_list[ARRAY_LENGTH_CONST (ligature_list) * 1/* One extra component per ligature */];
 119   unsigned int num_ligatures = 0;
 120 
 121   /* Populate arrays */
 122 
 123   /* Sort out the first-glyphs */
 124   for (unsigned int first_glyph_idx = 0; first_glyph_idx < ARRAY_LENGTH (first_glyphs); first_glyph_idx++)
 125   {
 126     hb_codepoint_t first_u = ligature_table[first_glyph_idx].first;
 127     hb_codepoint_t first_glyph;
 128     if (!hb_font_get_glyph (font, first_u, 0, &first_glyph))
 129       continue;
 130     first_glyphs[num_first_glyphs].set (first_glyph);
 131     ligature_per_first_glyph_count_list[num_first_glyphs] = 0;
 132     first_glyphs_indirection[num_first_glyphs] = first_glyph_idx;
 133     num_first_glyphs++;
 134   }
 135 
 136 #if defined(_AIX)
 137   /* workaround AIX xlC 12 compilation issues caused by the 2 cmp in the IntType template */
 138   hb_stable_sort (&first_glyphs[0], num_first_glyphs, (int(*)(const OT::GlyphID *, const OT::GlyphID *)) OT::GlyphID::cmp, &first_glyphs_indirection[0]);
 139 #else
 140   hb_stable_sort (&first_glyphs[0], num_first_glyphs, OT::GlyphID::cmp, &first_glyphs_indirection[0]);
 141 #endif
 142 
 143   /* Now that the first-glyphs are sorted, walk again, populate ligatures. */
 144   for (unsigned int i = 0; i < num_first_glyphs; i++)
 145   {
 146     unsigned int first_glyph_idx = first_glyphs_indirection[i];
 147 
 148     for (unsigned int second_glyph_idx = 0; second_glyph_idx < ARRAY_LENGTH (ligature_table[0].ligatures); second_glyph_idx++)
 149     {
 150       hb_codepoint_t second_u   = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].second;
 151       hb_codepoint_t ligature_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].ligature;
 152       hb_codepoint_t second_glyph, ligature_glyph;
 153       if (!second_u ||
 154           !hb_font_get_glyph (font, second_u,   0, &second_glyph) ||
 155           !hb_font_get_glyph (font, ligature_u, 0, &ligature_glyph))
 156         continue;
 157 
 158       ligature_per_first_glyph_count_list[i]++;
 159 
 160       ligature_list[num_ligatures].set (ligature_glyph);
 161       component_count_list[num_ligatures] = 2;
 162       component_list[num_ligatures].set (second_glyph);
 163       num_ligatures++;
 164     }
 165   }
 166 
 167   if (!num_ligatures)
 168     return nullptr;
 169 
 170   OT::Supplier<OT::GlyphID>   first_glyphs_supplier                      (first_glyphs, num_first_glyphs);
 171   OT::Supplier<unsigned int > ligature_per_first_glyph_count_supplier    (ligature_per_first_glyph_count_list, num_first_glyphs);
 172   OT::Supplier<OT::GlyphID>   ligatures_supplier                         (ligature_list, num_ligatures);
 173   OT::Supplier<unsigned int > component_count_supplier                   (component_count_list, num_ligatures);
 174   OT::Supplier<OT::GlyphID>   component_supplier                         (component_list, num_ligatures);
 175 
 176   /* 16 bytes per ligature ought to be enough... */
 177   char buf[ARRAY_LENGTH_CONST (ligature_list) * 16 + 128];
 178   OT::hb_serialize_context_t c (buf, sizeof (buf));
 179   OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
 180   bool ret = lookup->serialize_ligature (&c,
 181                                          OT::LookupFlag::IgnoreMarks,
 182                                          first_glyphs_supplier,
 183                                          ligature_per_first_glyph_count_supplier,
 184                                          num_first_glyphs,
 185                                          ligatures_supplier,
 186                                          component_count_supplier,
 187                                          component_supplier);
 188 
 189   c.end_serialize ();
 190   /* TODO sanitize the results? */
 191 
 192   return ret ? c.copy<OT::SubstLookup> () : nullptr;
 193 }
 194 
 195 static OT::SubstLookup *
 196 arabic_fallback_synthesize_lookup (const hb_ot_shape_plan_t *plan,
 197                                    hb_font_t *font,
 198                                    unsigned int feature_index)
 199 {
 200   if (feature_index < 4)
 201     return arabic_fallback_synthesize_lookup_single (plan, font, feature_index);
 202   else
 203     return arabic_fallback_synthesize_lookup_ligature (plan, font);
 204 }
 205 
 206 #define ARABIC_FALLBACK_MAX_LOOKUPS 5
 207 
 208 struct arabic_fallback_plan_t
 209 {
 210   ASSERT_POD ();
 211 
 212   unsigned int num_lookups;
 213   bool free_lookups;
 214 
 215   hb_mask_t mask_array[ARABIC_FALLBACK_MAX_LOOKUPS];
 216   OT::SubstLookup *lookup_array[ARABIC_FALLBACK_MAX_LOOKUPS];
 217   hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_FALLBACK_MAX_LOOKUPS];
 218 };
 219 
 220 static const arabic_fallback_plan_t arabic_fallback_plan_nil = {};
 221 
 222 #if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(HB_NO_WIN1256)
 223 #define HB_WITH_WIN1256
 224 #endif
 225 
 226 #ifdef HB_WITH_WIN1256
 227 #include "hb-ot-shape-complex-arabic-win1256.hh"
 228 #endif
 229 
 230 struct ManifestLookup {
 231   OT::Tag tag;
 232   OT::OffsetTo<OT::SubstLookup> lookupOffset;
 233 };
 234 typedef OT::ArrayOf<ManifestLookup> Manifest;
 235 
 236 static bool
 237 arabic_fallback_plan_init_win1256 (arabic_fallback_plan_t *fallback_plan,
 238                                    const hb_ot_shape_plan_t *plan,
 239                                    hb_font_t *font)
 240 {
 241 #ifdef HB_WITH_WIN1256
 242   /* Does this font look like it's Windows-1256-encoded? */
 243   hb_codepoint_t g;
 244   if (!(hb_font_get_glyph (font, 0x0627u, 0, &g) && g == 199 /* ALEF */ &&
 245         hb_font_get_glyph (font, 0x0644u, 0, &g) && g == 225 /* LAM */ &&
 246         hb_font_get_glyph (font, 0x0649u, 0, &g) && g == 236 /* ALEF MAKSURA */ &&
 247         hb_font_get_glyph (font, 0x064Au, 0, &g) && g == 237 /* YEH */ &&
 248         hb_font_get_glyph (font, 0x0652u, 0, &g) && g == 250 /* SUKUN */))
 249     return false;
 250 
 251   const Manifest &manifest = reinterpret_cast<const Manifest&> (arabic_win1256_gsub_lookups.manifest);
 252   static_assert (sizeof (arabic_win1256_gsub_lookups.manifestData) / sizeof (ManifestLookup)
 253                  <= ARABIC_FALLBACK_MAX_LOOKUPS, "");
 254   /* TODO sanitize the table? */
 255 
 256   unsigned j = 0;
 257   unsigned int count = manifest.len;
 258   for (unsigned int i = 0; i < count; i++)
 259   {
 260     fallback_plan->mask_array[j] = plan->map.get_1_mask (manifest[i].tag);
 261     if (fallback_plan->mask_array[j])
 262     {
 263       fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset));
 264       if (fallback_plan->lookup_array[j])
 265       {
 266         fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
 267         j++;
 268       }
 269     }
 270   }
 271 
 272   fallback_plan->num_lookups = j;
 273   fallback_plan->free_lookups = false;
 274 
 275   return j > 0;
 276 #else
 277   return false;
 278 #endif
 279 }
 280 
 281 static bool
 282 arabic_fallback_plan_init_unicode (arabic_fallback_plan_t *fallback_plan,
 283                                    const hb_ot_shape_plan_t *plan,
 284                                    hb_font_t *font)
 285 {
 286   static_assert ((ARRAY_LENGTH_CONST(arabic_fallback_features) <= ARABIC_FALLBACK_MAX_LOOKUPS), "");
 287   unsigned int j = 0;
 288   for (unsigned int i = 0; i < ARRAY_LENGTH(arabic_fallback_features) ; i++)
 289   {
 290     fallback_plan->mask_array[j] = plan->map.get_1_mask (arabic_fallback_features[i]);
 291     if (fallback_plan->mask_array[j])
 292     {
 293       fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i);
 294       if (fallback_plan->lookup_array[j])
 295       {
 296         fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
 297         j++;
 298       }
 299     }
 300   }
 301 
 302   fallback_plan->num_lookups = j;
 303   fallback_plan->free_lookups = true;
 304 
 305   return j > 0;
 306 }
 307 
 308 static arabic_fallback_plan_t *
 309 arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan,
 310                              hb_font_t *font)
 311 {
 312   arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) calloc (1, sizeof (arabic_fallback_plan_t));
 313   if (unlikely (!fallback_plan))
 314     return const_cast<arabic_fallback_plan_t *> (&arabic_fallback_plan_nil);
 315 
 316   fallback_plan->num_lookups = 0;
 317   fallback_plan->free_lookups = false;
 318 
 319   /* Try synthesizing GSUB table using Unicode Arabic Presentation Forms,
 320    * in case the font has cmap entries for the presentation-forms characters. */
 321   if (arabic_fallback_plan_init_unicode (fallback_plan, plan, font))
 322     return fallback_plan;
 323 
 324   /* See if this looks like a Windows-1256-encoded font.  If it does, use a
 325    * hand-coded GSUB table. */
 326   if (arabic_fallback_plan_init_win1256 (fallback_plan, plan, font))
 327     return fallback_plan;
 328 
 329   free (fallback_plan);
 330   return const_cast<arabic_fallback_plan_t *> (&arabic_fallback_plan_nil);
 331 }
 332 
 333 static void
 334 arabic_fallback_plan_destroy (arabic_fallback_plan_t *fallback_plan)
 335 {
 336   if (!fallback_plan || fallback_plan == &arabic_fallback_plan_nil)
 337     return;
 338 
 339   for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
 340     if (fallback_plan->lookup_array[i])
 341     {
 342       fallback_plan->accel_array[i].fini ();
 343       if (fallback_plan->free_lookups)
 344         free (fallback_plan->lookup_array[i]);
 345     }
 346 
 347   free (fallback_plan);
 348 }
 349 
 350 static void
 351 arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
 352                             hb_font_t *font,
 353                             hb_buffer_t *buffer)
 354 {
 355   OT::hb_apply_context_t c (0, font, buffer);
 356   for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
 357     if (fallback_plan->lookup_array[i]) {
 358       c.set_lookup_mask (fallback_plan->mask_array[i]);
 359       hb_ot_layout_substitute_lookup (&c,
 360                                       *fallback_plan->lookup_array[i],
 361                                       fallback_plan->accel_array[i]);
 362     }
 363 }
 364 
 365 
 366 #endif /* HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH */