1 /*
   2  * Copyright © 2014  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_OT_CMAP_TABLE_HH
  28 #define HB_OT_CMAP_TABLE_HH
  29 
  30 #include "hb-open-type.hh"
  31 #include "hb-set.hh"
  32 
  33 /*
  34  * cmap -- Character to Glyph Index Mapping
  35  * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
  36  */
  37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
  38 
  39 namespace OT {
  40 
  41 
  42 struct CmapSubtableFormat0
  43 {
  44   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  45   {
  46     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
  47     if (!gid)
  48       return false;
  49     *glyph = gid;
  50     return true;
  51   }
  52   void collect_unicodes (hb_set_t *out) const
  53   {
  54     for (unsigned int i = 0; i < 256; i++)
  55       if (glyphIdArray[i])
  56         out->add (i);
  57   }
  58 
  59   bool sanitize (hb_sanitize_context_t *c) const
  60   {
  61     TRACE_SANITIZE (this);
  62     return_trace (c->check_struct (this));
  63   }
  64 
  65   protected:
  66   HBUINT16      format;         /* Format number is set to 0. */
  67   HBUINT16      length;         /* Byte length of this subtable. */
  68   HBUINT16      language;       /* Ignore. */
  69   HBUINT8       glyphIdArray[256];/* An array that maps character
  70                                  * code to glyph index values. */
  71   public:
  72   DEFINE_SIZE_STATIC (6 + 256);
  73 };
  74 
  75 struct CmapSubtableFormat4
  76 {
  77   struct segment_plan
  78   {
  79     HBUINT16 start_code;
  80     HBUINT16 end_code;
  81     bool use_delta;
  82   };
  83 
  84   bool serialize (hb_serialize_context_t *c,
  85                   const hb_subset_plan_t *plan,
  86                   const hb_vector_t<segment_plan> &segments)
  87   {
  88     TRACE_SERIALIZE (this);
  89 
  90     if (unlikely (!c->extend_min (*this))) return_trace (false);
  91 
  92     this->format.set (4);
  93     this->length.set (get_sub_table_size (segments));
  94 
  95     this->segCountX2.set (segments.length * 2);
  96     this->entrySelector.set (MAX (1u, hb_bit_storage (segments.length)) - 1);
  97     this->searchRange.set (2 * (1u << this->entrySelector));
  98     this->rangeShift.set (segments.length * 2 > this->searchRange
  99                           ? 2 * segments.length - this->searchRange
 100                           : 0);
 101 
 102     HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length);
 103     c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding.
 104     HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length);
 105     HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.length);
 106     HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length);
 107 
 108     if (id_range_offset == nullptr)
 109       return_trace (false);
 110 
 111     for (unsigned int i = 0; i < segments.length; i++)
 112     {
 113       end_count[i].set (segments[i].end_code);
 114       start_count[i].set (segments[i].start_code);
 115       if (segments[i].use_delta)
 116       {
 117         hb_codepoint_t cp = segments[i].start_code;
 118         hb_codepoint_t start_gid = 0;
 119         if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF))
 120           return_trace (false);
 121         id_delta[i].set (start_gid - segments[i].start_code);
 122       } else {
 123         id_delta[i].set (0);
 124         unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1;
 125         HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints);
 126         if (glyph_id_array == nullptr)
 127           return_trace (false);
 128         // From the cmap spec:
 129         //
 130         // id_range_offset[i]/2
 131         // + (cp - segments[i].start_code)
 132         // + (id_range_offset + i)
 133         // =
 134         // glyph_id_array + (cp - segments[i].start_code)
 135         //
 136         // So, solve for id_range_offset[i]:
 137         //
 138         // id_range_offset[i]
 139         // =
 140         // 2 * (glyph_id_array - id_range_offset - i)
 141         id_range_offset[i].set (2 * (
 142             glyph_id_array - id_range_offset - i));
 143         for (unsigned int j = 0; j < num_codepoints; j++)
 144         {
 145           hb_codepoint_t cp = segments[i].start_code + j;
 146           hb_codepoint_t new_gid;
 147           if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
 148             return_trace (false);
 149           glyph_id_array[j].set (new_gid);
 150         }
 151       }
 152     }
 153 
 154     return_trace (true);
 155   }
 156 
 157   static size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments)
 158   {
 159     size_t segment_size = 0;
 160     for (unsigned int i = 0; i < segments.length; i++)
 161     {
 162       // Parallel array entries
 163       segment_size +=
 164             2  // end count
 165           + 2  // start count
 166           + 2  // delta
 167           + 2; // range offset
 168 
 169       if (!segments[i].use_delta)
 170         // Add bytes for the glyph index array entries for this segment.
 171         segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2;
 172     }
 173 
 174     return min_size
 175         + 2 // Padding
 176         + segment_size;
 177   }
 178 
 179   static bool create_sub_table_plan (const hb_subset_plan_t *plan,
 180                                      hb_vector_t<segment_plan> *segments)
 181   {
 182     segment_plan *segment = nullptr;
 183     hb_codepoint_t last_gid = 0;
 184 
 185     hb_codepoint_t cp = HB_SET_VALUE_INVALID;
 186     while (plan->unicodes->next (&cp)) {
 187       hb_codepoint_t new_gid;
 188       if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
 189       {
 190         DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
 191         return false;
 192       }
 193 
 194       /* Stop adding to cmap if we are now outside of unicode BMP. */
 195       if (cp > 0xFFFF) break;
 196 
 197       if (!segment ||
 198           cp != segment->end_code + 1u)
 199       {
 200         segment = segments->push ();
 201         segment->start_code.set (cp);
 202         segment->end_code.set (cp);
 203         segment->use_delta = true;
 204       } else {
 205         segment->end_code.set (cp);
 206         if (last_gid + 1u != new_gid)
 207           // gid's are not consecutive in this segment so delta
 208           // cannot be used.
 209           segment->use_delta = false;
 210       }
 211 
 212       last_gid = new_gid;
 213     }
 214 
 215     // There must be a final entry with end_code == 0xFFFF. Check if we need to add one.
 216     if (segment == nullptr || segment->end_code != 0xFFFF)
 217     {
 218       segment = segments->push ();
 219       segment->start_code.set (0xFFFF);
 220       segment->end_code.set (0xFFFF);
 221       segment->use_delta = true;
 222     }
 223 
 224     return true;
 225   }
 226 
 227   struct accelerator_t
 228   {
 229     accelerator_t () {}
 230     accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
 231     ~accelerator_t () { fini (); }
 232 
 233     void init (const CmapSubtableFormat4 *subtable)
 234     {
 235       segCount = subtable->segCountX2 / 2;
 236       endCount = subtable->values.arrayZ;
 237       startCount = endCount + segCount + 1;
 238       idDelta = startCount + segCount;
 239       idRangeOffset = idDelta + segCount;
 240       glyphIdArray = idRangeOffset + segCount;
 241       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
 242     }
 243     void fini () {}
 244 
 245     bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 246     {
 247       /* Custom two-array bsearch. */
 248       int min = 0, max = (int) this->segCount - 1;
 249       const HBUINT16 *startCount = this->startCount;
 250       const HBUINT16 *endCount = this->endCount;
 251       unsigned int i;
 252       while (min <= max)
 253       {
 254         int mid = ((unsigned int) min + (unsigned int) max) / 2;
 255         if (codepoint < startCount[mid])
 256           max = mid - 1;
 257         else if (codepoint > endCount[mid])
 258           min = mid + 1;
 259         else
 260         {
 261           i = mid;
 262           goto found;
 263         }
 264       }
 265       return false;
 266 
 267     found:
 268       hb_codepoint_t gid;
 269       unsigned int rangeOffset = this->idRangeOffset[i];
 270       if (rangeOffset == 0)
 271         gid = codepoint + this->idDelta[i];
 272       else
 273       {
 274         /* Somebody has been smoking... */
 275         unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
 276         if (unlikely (index >= this->glyphIdArrayLength))
 277           return false;
 278         gid = this->glyphIdArray[index];
 279         if (unlikely (!gid))
 280           return false;
 281         gid += this->idDelta[i];
 282       }
 283       gid &= 0xFFFFu;
 284       if (!gid)
 285         return false;
 286       *glyph = gid;
 287       return true;
 288     }
 289     static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
 290     {
 291       return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph);
 292     }
 293     void collect_unicodes (hb_set_t *out) const
 294     {
 295       unsigned int count = this->segCount;
 296       if (count && this->startCount[count - 1] == 0xFFFFu)
 297         count--; /* Skip sentinel segment. */
 298       for (unsigned int i = 0; i < count; i++)
 299       {
 300         unsigned int rangeOffset = this->idRangeOffset[i];
 301         if (rangeOffset == 0)
 302           out->add_range (this->startCount[i], this->endCount[i]);
 303         else
 304         {
 305           for (hb_codepoint_t codepoint = this->startCount[i];
 306                codepoint <= this->endCount[i];
 307                codepoint++)
 308           {
 309             unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
 310             if (unlikely (index >= this->glyphIdArrayLength))
 311               break;
 312             hb_codepoint_t gid = this->glyphIdArray[index];
 313             if (unlikely (!gid))
 314               continue;
 315             out->add (codepoint);
 316           }
 317         }
 318       }
 319     }
 320 
 321     const HBUINT16 *endCount;
 322     const HBUINT16 *startCount;
 323     const HBUINT16 *idDelta;
 324     const HBUINT16 *idRangeOffset;
 325     const HBUINT16 *glyphIdArray;
 326     unsigned int segCount;
 327     unsigned int glyphIdArrayLength;
 328   };
 329 
 330   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 331   {
 332     accelerator_t accel (this);
 333     return accel.get_glyph_func (&accel, codepoint, glyph);
 334   }
 335   void collect_unicodes (hb_set_t *out) const
 336   {
 337     accelerator_t accel (this);
 338     accel.collect_unicodes (out);
 339   }
 340 
 341   bool sanitize (hb_sanitize_context_t *c) const
 342   {
 343     TRACE_SANITIZE (this);
 344     if (unlikely (!c->check_struct (this)))
 345       return_trace (false);
 346 
 347     if (unlikely (!c->check_range (this, length)))
 348     {
 349       /* Some broken fonts have too long of a "length" value.
 350        * If that is the case, just change the value to truncate
 351        * the subtable at the end of the blob. */
 352       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
 353                                             (uintptr_t) (c->end -
 354                                                          (char *) this));
 355       if (!c->try_set (&length, new_length))
 356         return_trace (false);
 357     }
 358 
 359     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
 360   }
 361 
 362 
 363 
 364   protected:
 365   HBUINT16      format;         /* Format number is set to 4. */
 366   HBUINT16      length;         /* This is the length in bytes of the
 367                                  * subtable. */
 368   HBUINT16      language;       /* Ignore. */
 369   HBUINT16      segCountX2;     /* 2 x segCount. */
 370   HBUINT16      searchRange;    /* 2 * (2**floor(log2(segCount))) */
 371   HBUINT16      entrySelector;  /* log2(searchRange/2) */
 372   HBUINT16      rangeShift;     /* 2 x segCount - searchRange */
 373 
 374   UnsizedArrayOf<HBUINT16>
 375                 values;
 376 #if 0
 377   HBUINT16      endCount[segCount];     /* End characterCode for each segment,
 378                                          * last=0xFFFFu. */
 379   HBUINT16      reservedPad;            /* Set to 0. */
 380   HBUINT16      startCount[segCount];   /* Start character code for each segment. */
 381   HBINT16               idDelta[segCount];      /* Delta for all character codes in segment. */
 382   HBUINT16      idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
 383   UnsizedArrayOf<HBUINT16>
 384                 glyphIdArray;   /* Glyph index array (arbitrary length) */
 385 #endif
 386 
 387   public:
 388   DEFINE_SIZE_ARRAY (14, values);
 389 };
 390 
 391 struct CmapSubtableLongGroup
 392 {
 393   friend struct CmapSubtableFormat12;
 394   friend struct CmapSubtableFormat13;
 395   template<typename U>
 396   friend struct CmapSubtableLongSegmented;
 397   friend struct cmap;
 398 
 399   int cmp (hb_codepoint_t codepoint) const
 400   {
 401     if (codepoint < startCharCode) return -1;
 402     if (codepoint > endCharCode)   return +1;
 403     return 0;
 404   }
 405 
 406   bool sanitize (hb_sanitize_context_t *c) const
 407   {
 408     TRACE_SANITIZE (this);
 409     return_trace (c->check_struct (this));
 410   }
 411 
 412   private:
 413   HBUINT32              startCharCode;  /* First character code in this group. */
 414   HBUINT32              endCharCode;    /* Last character code in this group. */
 415   HBUINT32              glyphID;        /* Glyph index; interpretation depends on
 416                                          * subtable format. */
 417   public:
 418   DEFINE_SIZE_STATIC (12);
 419 };
 420 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
 421 
 422 template <typename UINT>
 423 struct CmapSubtableTrimmed
 424 {
 425   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 426   {
 427     /* Rely on our implicit array bound-checking. */
 428     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
 429     if (!gid)
 430       return false;
 431     *glyph = gid;
 432     return true;
 433   }
 434   void collect_unicodes (hb_set_t *out) const
 435   {
 436     hb_codepoint_t start = startCharCode;
 437     unsigned int count = glyphIdArray.len;
 438     for (unsigned int i = 0; i < count; i++)
 439       if (glyphIdArray[i])
 440         out->add (start + i);
 441   }
 442 
 443   bool sanitize (hb_sanitize_context_t *c) const
 444   {
 445     TRACE_SANITIZE (this);
 446     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
 447   }
 448 
 449   protected:
 450   UINT          formatReserved; /* Subtable format and (maybe) padding. */
 451   UINT          length;         /* Byte length of this subtable. */
 452   UINT          language;       /* Ignore. */
 453   UINT          startCharCode;  /* First character code covered. */
 454   ArrayOf<GlyphID, UINT>
 455                 glyphIdArray;   /* Array of glyph index values for character
 456                                  * codes in the range. */
 457   public:
 458   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
 459 };
 460 
 461 struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
 462 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
 463 
 464 template <typename T>
 465 struct CmapSubtableLongSegmented
 466 {
 467   friend struct cmap;
 468 
 469   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 470   {
 471     hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
 472     if (!gid)
 473       return false;
 474     *glyph = gid;
 475     return true;
 476   }
 477 
 478   void collect_unicodes (hb_set_t *out) const
 479   {
 480     for (unsigned int i = 0; i < this->groups.len; i++) {
 481       out->add_range (this->groups[i].startCharCode,
 482                       MIN ((hb_codepoint_t) this->groups[i].endCharCode,
 483                            (hb_codepoint_t) HB_UNICODE_MAX));
 484     }
 485   }
 486 
 487   bool sanitize (hb_sanitize_context_t *c) const
 488   {
 489     TRACE_SANITIZE (this);
 490     return_trace (c->check_struct (this) && groups.sanitize (c));
 491   }
 492 
 493   bool serialize (hb_serialize_context_t *c,
 494                   const hb_vector_t<CmapSubtableLongGroup> &group_data)
 495   {
 496     TRACE_SERIALIZE (this);
 497     if (unlikely (!c->extend_min (*this))) return_trace (false);
 498     if (unlikely (!groups.serialize (c, group_data.as_array ()))) return_trace (false);
 499     return true;
 500   }
 501 
 502   protected:
 503   HBUINT16      format;         /* Subtable format; set to 12. */
 504   HBUINT16      reserved;       /* Reserved; set to 0. */
 505   HBUINT32      length;         /* Byte length of this subtable. */
 506   HBUINT32      language;       /* Ignore. */
 507   SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
 508                 groups;         /* Groupings. */
 509   public:
 510   DEFINE_SIZE_ARRAY (16, groups);
 511 };
 512 
 513 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
 514 {
 515   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 516                                          hb_codepoint_t u)
 517   { return likely (group.startCharCode <= group.endCharCode) ?
 518            group.glyphID + (u - group.startCharCode) : 0; }
 519 
 520 
 521   bool serialize (hb_serialize_context_t *c,
 522                   const hb_vector_t<CmapSubtableLongGroup> &groups)
 523   {
 524     if (unlikely (!c->extend_min (*this))) return false;
 525 
 526     this->format.set (12);
 527     this->reserved.set (0);
 528     this->length.set (get_sub_table_size (groups));
 529 
 530     return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups);
 531   }
 532 
 533   static size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups)
 534   {
 535     return 16 + 12 * groups.length;
 536   }
 537 
 538   static bool create_sub_table_plan (const hb_subset_plan_t *plan,
 539                                      hb_vector_t<CmapSubtableLongGroup> *groups)
 540   {
 541     CmapSubtableLongGroup *group = nullptr;
 542 
 543     hb_codepoint_t cp = HB_SET_VALUE_INVALID;
 544     while (plan->unicodes->next (&cp)) {
 545       hb_codepoint_t new_gid;
 546       if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
 547       {
 548         DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
 549         return false;
 550       }
 551 
 552       if (!group || !_is_gid_consecutive (group, cp, new_gid))
 553       {
 554         group = groups->push ();
 555         group->startCharCode.set (cp);
 556         group->endCharCode.set (cp);
 557         group->glyphID.set (new_gid);
 558       }
 559       else group->endCharCode.set (cp);
 560     }
 561 
 562     DEBUG_MSG(SUBSET, nullptr, "cmap");
 563     for (unsigned int i = 0; i < groups->length; i++) {
 564       CmapSubtableLongGroup& group = (*groups)[i];
 565       DEBUG_MSG(SUBSET, nullptr, "  %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
 566     }
 567 
 568     return true;
 569   }
 570 
 571  private:
 572   static bool _is_gid_consecutive (CmapSubtableLongGroup *group,
 573                                    hb_codepoint_t cp,
 574                                    hb_codepoint_t new_gid)
 575   {
 576     return (cp - 1 == group->endCharCode) &&
 577         new_gid == group->glyphID + (cp - group->startCharCode);
 578   }
 579 
 580 };
 581 
 582 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
 583 {
 584   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 585                                          hb_codepoint_t u HB_UNUSED)
 586   { return group.glyphID; }
 587 };
 588 
 589 typedef enum
 590 {
 591   GLYPH_VARIANT_NOT_FOUND = 0,
 592   GLYPH_VARIANT_FOUND = 1,
 593   GLYPH_VARIANT_USE_DEFAULT = 2
 594 } glyph_variant_t;
 595 
 596 struct UnicodeValueRange
 597 {
 598   int cmp (const hb_codepoint_t &codepoint) const
 599   {
 600     if (codepoint < startUnicodeValue) return -1;
 601     if (codepoint > startUnicodeValue + additionalCount) return +1;
 602     return 0;
 603   }
 604 
 605   bool sanitize (hb_sanitize_context_t *c) const
 606   {
 607     TRACE_SANITIZE (this);
 608     return_trace (c->check_struct (this));
 609   }
 610 
 611   HBUINT24      startUnicodeValue;      /* First value in this range. */
 612   HBUINT8       additionalCount;        /* Number of additional values in this
 613                                          * range. */
 614   public:
 615   DEFINE_SIZE_STATIC (4);
 616 };
 617 
 618 struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
 619 {
 620   void collect_unicodes (hb_set_t *out) const
 621   {
 622     unsigned int count = len;
 623     for (unsigned int i = 0; i < count; i++)
 624     {
 625       hb_codepoint_t first = arrayZ[i].startUnicodeValue;
 626       hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
 627                                  (hb_codepoint_t) HB_UNICODE_MAX);
 628       out->add_range (first, last);
 629     }
 630   }
 631 
 632   public:
 633   DEFINE_SIZE_ARRAY (4, *this);
 634 };
 635 
 636 struct UVSMapping
 637 {
 638   int cmp (const hb_codepoint_t &codepoint) const
 639   {
 640     return unicodeValue.cmp (codepoint);
 641   }
 642 
 643   bool sanitize (hb_sanitize_context_t *c) const
 644   {
 645     TRACE_SANITIZE (this);
 646     return_trace (c->check_struct (this));
 647   }
 648 
 649   HBUINT24      unicodeValue;   /* Base Unicode value of the UVS */
 650   GlyphID       glyphID;        /* Glyph ID of the UVS */
 651   public:
 652   DEFINE_SIZE_STATIC (5);
 653 };
 654 
 655 struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
 656 {
 657   void collect_unicodes (hb_set_t *out) const
 658   {
 659     unsigned int count = len;
 660     for (unsigned int i = 0; i < count; i++)
 661       out->add (arrayZ[i].glyphID);
 662   }
 663 
 664   public:
 665   DEFINE_SIZE_ARRAY (4, *this);
 666 };
 667 
 668 struct VariationSelectorRecord
 669 {
 670   glyph_variant_t get_glyph (hb_codepoint_t codepoint,
 671                              hb_codepoint_t *glyph,
 672                              const void *base) const
 673   {
 674     if ((base+defaultUVS).bfind (codepoint))
 675       return GLYPH_VARIANT_USE_DEFAULT;
 676     const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
 677     if (nonDefault.glyphID)
 678     {
 679       *glyph = nonDefault.glyphID;
 680        return GLYPH_VARIANT_FOUND;
 681     }
 682     return GLYPH_VARIANT_NOT_FOUND;
 683   }
 684 
 685   void collect_unicodes (hb_set_t *out, const void *base) const
 686   {
 687     (base+defaultUVS).collect_unicodes (out);
 688     (base+nonDefaultUVS).collect_unicodes (out);
 689   }
 690 
 691   int cmp (const hb_codepoint_t &variation_selector) const
 692   {
 693     return varSelector.cmp (variation_selector);
 694   }
 695 
 696   bool sanitize (hb_sanitize_context_t *c, const void *base) const
 697   {
 698     TRACE_SANITIZE (this);
 699     return_trace (c->check_struct (this) &&
 700                   defaultUVS.sanitize (c, base) &&
 701                   nonDefaultUVS.sanitize (c, base));
 702   }
 703 
 704   HBUINT24      varSelector;    /* Variation selector. */
 705   LOffsetTo<DefaultUVS>
 706                 defaultUVS;     /* Offset to Default UVS Table.  May be 0. */
 707   LOffsetTo<NonDefaultUVS>
 708                 nonDefaultUVS;  /* Offset to Non-Default UVS Table.  May be 0. */
 709   public:
 710   DEFINE_SIZE_STATIC (11);
 711 };
 712 
 713 struct CmapSubtableFormat14
 714 {
 715   glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 716                                      hb_codepoint_t variation_selector,
 717                                      hb_codepoint_t *glyph) const
 718   {
 719     return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this);
 720   }
 721 
 722   void collect_variation_selectors (hb_set_t *out) const
 723   {
 724     unsigned int count = record.len;
 725     for (unsigned int i = 0; i < count; i++)
 726       out->add (record.arrayZ[i].varSelector);
 727   }
 728   void collect_variation_unicodes (hb_codepoint_t variation_selector,
 729                                    hb_set_t *out) const
 730   {
 731     record.bsearch (variation_selector).collect_unicodes (out, this);
 732   }
 733 
 734   bool sanitize (hb_sanitize_context_t *c) const
 735   {
 736     TRACE_SANITIZE (this);
 737     return_trace (c->check_struct (this) &&
 738                   record.sanitize (c, this));
 739   }
 740 
 741   protected:
 742   HBUINT16      format;         /* Format number is set to 14. */
 743   HBUINT32      length;         /* Byte length of this subtable. */
 744   SortedArrayOf<VariationSelectorRecord, HBUINT32>
 745                 record;         /* Variation selector records; sorted
 746                                  * in increasing order of `varSelector'. */
 747   public:
 748   DEFINE_SIZE_ARRAY (10, record);
 749 };
 750 
 751 struct CmapSubtable
 752 {
 753   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
 754 
 755   bool get_glyph (hb_codepoint_t codepoint,
 756                   hb_codepoint_t *glyph) const
 757   {
 758     switch (u.format) {
 759     case  0: return u.format0 .get_glyph (codepoint, glyph);
 760     case  4: return u.format4 .get_glyph (codepoint, glyph);
 761     case  6: return u.format6 .get_glyph (codepoint, glyph);
 762     case 10: return u.format10.get_glyph (codepoint, glyph);
 763     case 12: return u.format12.get_glyph (codepoint, glyph);
 764     case 13: return u.format13.get_glyph (codepoint, glyph);
 765     case 14:
 766     default: return false;
 767     }
 768   }
 769   void collect_unicodes (hb_set_t *out) const
 770   {
 771     switch (u.format) {
 772     case  0: u.format0 .collect_unicodes (out); return;
 773     case  4: u.format4 .collect_unicodes (out); return;
 774     case  6: u.format6 .collect_unicodes (out); return;
 775     case 10: u.format10.collect_unicodes (out); return;
 776     case 12: u.format12.collect_unicodes (out); return;
 777     case 13: u.format13.collect_unicodes (out); return;
 778     case 14:
 779     default: return;
 780     }
 781   }
 782 
 783   bool sanitize (hb_sanitize_context_t *c) const
 784   {
 785     TRACE_SANITIZE (this);
 786     if (!u.format.sanitize (c)) return_trace (false);
 787     switch (u.format) {
 788     case  0: return_trace (u.format0 .sanitize (c));
 789     case  4: return_trace (u.format4 .sanitize (c));
 790     case  6: return_trace (u.format6 .sanitize (c));
 791     case 10: return_trace (u.format10.sanitize (c));
 792     case 12: return_trace (u.format12.sanitize (c));
 793     case 13: return_trace (u.format13.sanitize (c));
 794     case 14: return_trace (u.format14.sanitize (c));
 795     default:return_trace (true);
 796     }
 797   }
 798 
 799   public:
 800   union {
 801   HBUINT16              format;         /* Format identifier */
 802   CmapSubtableFormat0   format0;
 803   CmapSubtableFormat4   format4;
 804   CmapSubtableFormat6   format6;
 805   CmapSubtableFormat10  format10;
 806   CmapSubtableFormat12  format12;
 807   CmapSubtableFormat13  format13;
 808   CmapSubtableFormat14  format14;
 809   } u;
 810   public:
 811   DEFINE_SIZE_UNION (2, format);
 812 };
 813 
 814 
 815 struct EncodingRecord
 816 {
 817   int cmp (const EncodingRecord &other) const
 818   {
 819     int ret;
 820     ret = platformID.cmp (other.platformID);
 821     if (ret) return ret;
 822     ret = encodingID.cmp (other.encodingID);
 823     if (ret) return ret;
 824     return 0;
 825   }
 826 
 827   bool sanitize (hb_sanitize_context_t *c, const void *base) const
 828   {
 829     TRACE_SANITIZE (this);
 830     return_trace (c->check_struct (this) &&
 831                   subtable.sanitize (c, base));
 832   }
 833 
 834   HBUINT16      platformID;     /* Platform ID. */
 835   HBUINT16      encodingID;     /* Platform-specific encoding ID. */
 836   LOffsetTo<CmapSubtable>
 837                 subtable;       /* Byte offset from beginning of table to the subtable for this encoding. */
 838   public:
 839   DEFINE_SIZE_STATIC (8);
 840 };
 841 
 842 struct cmap
 843 {
 844   static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
 845 
 846   struct subset_plan
 847   {
 848     size_t final_size () const
 849     {
 850       return 4 // header
 851           +  8 * 3 // 3 EncodingRecord
 852           +  CmapSubtableFormat4::get_sub_table_size (this->format4_segments)
 853           +  CmapSubtableFormat12::get_sub_table_size (this->format12_groups);
 854     }
 855 
 856     hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments;
 857     hb_vector_t<CmapSubtableLongGroup> format12_groups;
 858   };
 859 
 860   bool _create_plan (const hb_subset_plan_t *plan,
 861                      subset_plan *cmap_plan) const
 862   {
 863     if (unlikely (!CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments)))
 864       return false;
 865 
 866     return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups);
 867   }
 868 
 869   bool _subset (const hb_subset_plan_t *plan,
 870                 const subset_plan &cmap_subset_plan,
 871                 size_t dest_sz,
 872                 void *dest) const
 873   {
 874     hb_serialize_context_t c (dest, dest_sz);
 875 
 876     cmap *table = c.start_serialize<cmap> ();
 877     if (unlikely (!c.extend_min (*table)))
 878     {
 879       return false;
 880     }
 881 
 882     table->version.set (0);
 883 
 884     if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3)))
 885       return false;
 886 
 887     // TODO(grieger): Convert the below to a for loop
 888 
 889     // Format 4, Plat 0 Encoding Record
 890     EncodingRecord &format4_plat0_rec = table->encodingRecord[0];
 891     format4_plat0_rec.platformID.set (0); // Unicode
 892     format4_plat0_rec.encodingID.set (3);
 893 
 894     // Format 4, Plat 3 Encoding Record
 895     EncodingRecord &format4_plat3_rec = table->encodingRecord[1];
 896     format4_plat3_rec.platformID.set (3); // Windows
 897     format4_plat3_rec.encodingID.set (1); // Unicode BMP
 898 
 899     // Format 12 Encoding Record
 900     EncodingRecord &format12_rec = table->encodingRecord[2];
 901     format12_rec.platformID.set (3); // Windows
 902     format12_rec.encodingID.set (10); // Unicode UCS-4
 903 
 904     // Write out format 4 sub table
 905     {
 906       CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table);
 907       format4_plat3_rec.subtable.set (format4_plat0_rec.subtable);
 908       subtable.u.format.set (4);
 909 
 910       CmapSubtableFormat4 &format4 = subtable.u.format4;
 911       if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments)))
 912         return false;
 913     }
 914 
 915     // Write out format 12 sub table.
 916     {
 917       CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table);
 918       subtable.u.format.set (12);
 919 
 920       CmapSubtableFormat12 &format12 = subtable.u.format12;
 921       if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups)))
 922         return false;
 923     }
 924 
 925     c.end_serialize ();
 926 
 927     return true;
 928   }
 929 
 930   bool subset (hb_subset_plan_t *plan) const
 931   {
 932     subset_plan cmap_subset_plan;
 933 
 934     if (unlikely (!_create_plan (plan, &cmap_subset_plan)))
 935     {
 936       DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan.");
 937       return false;
 938     }
 939 
 940     // We now know how big our blob needs to be
 941     size_t dest_sz = cmap_subset_plan.final_size ();
 942     void *dest = malloc (dest_sz);
 943     if (unlikely (!dest)) {
 944       DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz);
 945       return false;
 946     }
 947 
 948     if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest)))
 949     {
 950       DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap.");
 951       free (dest);
 952       return false;
 953     }
 954 
 955     // all done, write the blob into dest
 956     hb_blob_t *cmap_prime = hb_blob_create ((const char *) dest,
 957                                             dest_sz,
 958                                             HB_MEMORY_MODE_READONLY,
 959                                             dest,
 960                                             free);
 961     bool result =  plan->add_table (HB_OT_TAG_cmap, cmap_prime);
 962     hb_blob_destroy (cmap_prime);
 963     return result;
 964   }
 965 
 966   const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
 967   {
 968     if (symbol) *symbol = false;
 969 
 970     const CmapSubtable *subtable;
 971 
 972     /* 32-bit subtables. */
 973     if ((subtable = this->find_subtable (3, 10))) return subtable;
 974     if ((subtable = this->find_subtable (0, 6))) return subtable;
 975     if ((subtable = this->find_subtable (0, 4))) return subtable;
 976 
 977     /* 16-bit subtables. */
 978     if ((subtable = this->find_subtable (3, 1))) return subtable;
 979     if ((subtable = this->find_subtable (0, 3))) return subtable;
 980     if ((subtable = this->find_subtable (0, 2))) return subtable;
 981     if ((subtable = this->find_subtable (0, 1))) return subtable;
 982     if ((subtable = this->find_subtable (0, 0))) return subtable;
 983 
 984     /* Symbol subtable. */
 985     if ((subtable = this->find_subtable (3, 0)))
 986     {
 987       if (symbol) *symbol = true;
 988       return subtable;
 989     }
 990 
 991     /* Meh. */
 992     return &Null (CmapSubtable);
 993   }
 994 
 995   struct accelerator_t
 996   {
 997     void init (hb_face_t *face)
 998     {
 999       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
1000       bool symbol;
1001       this->subtable = table->find_best_subtable (&symbol);
1002       this->subtable_uvs = &Null (CmapSubtableFormat14);
1003       {
1004         const CmapSubtable *st = table->find_subtable (0, 5);
1005         if (st && st->u.format == 14)
1006           subtable_uvs = &st->u.format14;
1007       }
1008 
1009       this->get_glyph_data = subtable;
1010       if (unlikely (symbol))
1011       {
1012         this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
1013       } else {
1014         switch (subtable->u.format) {
1015         /* Accelerate format 4 and format 12. */
1016         default:
1017           this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
1018           break;
1019         case 12:
1020           this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
1021           break;
1022         case  4:
1023           {
1024             this->format4_accel.init (&subtable->u.format4);
1025             this->get_glyph_data = &this->format4_accel;
1026             this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
1027           }
1028           break;
1029         }
1030       }
1031     }
1032 
1033     void fini () { this->table.destroy (); }
1034 
1035     bool get_nominal_glyph (hb_codepoint_t  unicode,
1036                                    hb_codepoint_t *glyph) const
1037     {
1038       if (unlikely (!this->get_glyph_funcZ)) return false;
1039       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
1040     }
1041     unsigned int get_nominal_glyphs (unsigned int count,
1042                                      const hb_codepoint_t *first_unicode,
1043                                      unsigned int unicode_stride,
1044                                      hb_codepoint_t *first_glyph,
1045                                      unsigned int glyph_stride) const
1046     {
1047       if (unlikely (!this->get_glyph_funcZ)) return 0;
1048 
1049       hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
1050       const void *get_glyph_data = this->get_glyph_data;
1051 
1052       unsigned int done;
1053       for (done = 0;
1054            done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
1055            done++)
1056       {
1057         first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
1058         first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
1059       }
1060       return done;
1061     }
1062 
1063     bool get_variation_glyph (hb_codepoint_t  unicode,
1064                               hb_codepoint_t  variation_selector,
1065                               hb_codepoint_t *glyph) const
1066     {
1067       switch (this->subtable_uvs->get_glyph_variant (unicode,
1068                                                      variation_selector,
1069                                                      glyph))
1070       {
1071         case GLYPH_VARIANT_NOT_FOUND:   return false;
1072         case GLYPH_VARIANT_FOUND:       return true;
1073         case GLYPH_VARIANT_USE_DEFAULT: break;
1074       }
1075 
1076       return get_nominal_glyph (unicode, glyph);
1077     }
1078 
1079     void collect_unicodes (hb_set_t *out) const
1080     {
1081       subtable->collect_unicodes (out);
1082     }
1083     void collect_variation_selectors (hb_set_t *out) const
1084     {
1085       subtable_uvs->collect_variation_selectors (out);
1086     }
1087     void collect_variation_unicodes (hb_codepoint_t variation_selector,
1088                                      hb_set_t *out) const
1089     {
1090       subtable_uvs->collect_variation_unicodes (variation_selector, out);
1091     }
1092 
1093     protected:
1094     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1095                                               hb_codepoint_t codepoint,
1096                                               hb_codepoint_t *glyph);
1097 
1098     template <typename Type>
1099     static bool get_glyph_from (const void *obj,
1100                                 hb_codepoint_t codepoint,
1101                                 hb_codepoint_t *glyph)
1102     {
1103       const Type *typed_obj = (const Type *) obj;
1104       return typed_obj->get_glyph (codepoint, glyph);
1105     }
1106 
1107     template <typename Type>
1108     static bool get_glyph_from_symbol (const void *obj,
1109                                               hb_codepoint_t codepoint,
1110                                               hb_codepoint_t *glyph)
1111     {
1112       const Type *typed_obj = (const Type *) obj;
1113       if (likely (typed_obj->get_glyph (codepoint, glyph)))
1114         return true;
1115 
1116       if (codepoint <= 0x00FFu)
1117       {
1118         /* For symbol-encoded OpenType fonts, we duplicate the
1119          * U+F000..F0FF range at U+0000..U+00FF.  That's what
1120          * Windows seems to do, and that's hinted about at:
1121          * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1122          * under "Non-Standard (Symbol) Fonts". */
1123         return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1124       }
1125 
1126       return false;
1127     }
1128 
1129     private:
1130     hb_nonnull_ptr_t<const CmapSubtable> subtable;
1131     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
1132 
1133     hb_cmap_get_glyph_func_t get_glyph_funcZ;
1134     const void *get_glyph_data;
1135 
1136     CmapSubtableFormat4::accelerator_t format4_accel;
1137 
1138     hb_blob_ptr_t<cmap> table;
1139   };
1140 
1141   protected:
1142 
1143   const CmapSubtable *find_subtable (unsigned int platform_id,
1144                                      unsigned int encoding_id) const
1145   {
1146     EncodingRecord key;
1147     key.platformID.set (platform_id);
1148     key.encodingID.set (encoding_id);
1149 
1150     const EncodingRecord &result = encodingRecord.bsearch (key);
1151     if (!result.subtable)
1152       return nullptr;
1153 
1154     return &(this+result.subtable);
1155   }
1156 
1157   public:
1158 
1159   bool sanitize (hb_sanitize_context_t *c) const
1160   {
1161     TRACE_SANITIZE (this);
1162     return_trace (c->check_struct (this) &&
1163                   likely (version == 0) &&
1164                   encodingRecord.sanitize (c, this));
1165   }
1166 
1167   protected:
1168   HBUINT16              version;        /* Table version number (0). */
1169   SortedArrayOf<EncodingRecord>
1170                         encodingRecord; /* Encoding tables. */
1171   public:
1172   DEFINE_SIZE_ARRAY (4, encodingRecord);
1173 };
1174 
1175 struct cmap_accelerator_t : cmap::accelerator_t {};
1176 
1177 } /* namespace OT */
1178 
1179 
1180 #endif /* HB_OT_CMAP_TABLE_HH */