1 /*
   2  * Copyright © 2014  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_OT_CMAP_TABLE_HH
  28 #define HB_OT_CMAP_TABLE_HH
  29 
  30 #include "hb-open-type-private.hh"
  31 
  32 
  33 namespace OT {
  34 
  35 
  36 /*
  37  * cmap -- Character To Glyph Index Mapping Table
  38  */
  39 
  40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
  41 
  42 
  43 struct CmapSubtableFormat0
  44 {
  45   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  46   {
  47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
  48     if (!gid)
  49       return false;
  50     *glyph = gid;
  51     return true;
  52   }
  53 
  54   inline bool sanitize (hb_sanitize_context_t *c) const
  55   {
  56     TRACE_SANITIZE (this);
  57     return_trace (c->check_struct (this));
  58   }
  59 
  60   protected:
  61   USHORT        format;         /* Format number is set to 0. */
  62   USHORT        lengthZ;        /* Byte length of this subtable. */
  63   USHORT        languageZ;      /* Ignore. */
  64   BYTE          glyphIdArray[256];/* An array that maps character
  65                                  * code to glyph index values. */
  66   public:
  67   DEFINE_SIZE_STATIC (6 + 256);
  68 };
  69 
  70 struct CmapSubtableFormat4
  71 {
  72   struct accelerator_t
  73   {
  74     inline void init (const CmapSubtableFormat4 *subtable)
  75     {
  76       segCount = subtable->segCountX2 / 2;
  77       endCount = subtable->values;
  78       startCount = endCount + segCount + 1;
  79       idDelta = startCount + segCount;
  80       idRangeOffset = idDelta + segCount;
  81       glyphIdArray = idRangeOffset + segCount;
  82       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
  83     }
  84 
  85     static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
  86     {
  87       const accelerator_t *thiz = (const accelerator_t *) obj;
  88 
  89       /* Custom two-array bsearch. */
  90       int min = 0, max = (int) thiz->segCount - 1;
  91       const USHORT *startCount = thiz->startCount;
  92       const USHORT *endCount = thiz->endCount;
  93       unsigned int i;
  94       while (min <= max)
  95       {
  96         int mid = (min + max) / 2;
  97         if (codepoint < startCount[mid])
  98           max = mid - 1;
  99         else if (codepoint > endCount[mid])
 100           min = mid + 1;
 101         else
 102         {
 103           i = mid;
 104           goto found;
 105         }
 106       }
 107       return false;
 108 
 109     found:
 110       hb_codepoint_t gid;
 111       unsigned int rangeOffset = thiz->idRangeOffset[i];
 112       if (rangeOffset == 0)
 113         gid = codepoint + thiz->idDelta[i];
 114       else
 115       {
 116         /* Somebody has been smoking... */
 117         unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
 118         if (unlikely (index >= thiz->glyphIdArrayLength))
 119           return false;
 120         gid = thiz->glyphIdArray[index];
 121         if (unlikely (!gid))
 122           return false;
 123         gid += thiz->idDelta[i];
 124       }
 125 
 126       *glyph = gid & 0xFFFFu;
 127       return true;
 128     }
 129 
 130     const USHORT *endCount;
 131     const USHORT *startCount;
 132     const USHORT *idDelta;
 133     const USHORT *idRangeOffset;
 134     const USHORT *glyphIdArray;
 135     unsigned int segCount;
 136     unsigned int glyphIdArrayLength;
 137   };
 138 
 139   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 140   {
 141     accelerator_t accel;
 142     accel.init (this);
 143     return accel.get_glyph_func (&accel, codepoint, glyph);
 144   }
 145 
 146   inline bool sanitize (hb_sanitize_context_t *c) const
 147   {
 148     TRACE_SANITIZE (this);
 149     if (unlikely (!c->check_struct (this)))
 150       return_trace (false);
 151 
 152     if (unlikely (!c->check_range (this, length)))
 153     {
 154       /* Some broken fonts have too long of a "length" value.
 155        * If that is the case, just change the value to truncate
 156        * the subtable at the end of the blob. */
 157       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
 158                                             (uintptr_t) (c->end -
 159                                                          (char *) this));
 160       if (!c->try_set (&length, new_length))
 161         return_trace (false);
 162     }
 163 
 164     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
 165   }
 166 
 167   protected:
 168   USHORT        format;         /* Format number is set to 4. */
 169   USHORT        length;         /* This is the length in bytes of the
 170                                  * subtable. */
 171   USHORT        languageZ;      /* Ignore. */
 172   USHORT        segCountX2;     /* 2 x segCount. */
 173   USHORT        searchRangeZ;   /* 2 * (2**floor(log2(segCount))) */
 174   USHORT        entrySelectorZ; /* log2(searchRange/2) */
 175   USHORT        rangeShiftZ;    /* 2 x segCount - searchRange */
 176 
 177   USHORT        values[VAR];
 178 #if 0
 179   USHORT        endCount[segCount];     /* End characterCode for each segment,
 180                                          * last=0xFFFFu. */
 181   USHORT        reservedPad;            /* Set to 0. */
 182   USHORT        startCount[segCount];   /* Start character code for each segment. */
 183   SHORT         idDelta[segCount];      /* Delta for all character codes in segment. */
 184   USHORT        idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
 185   USHORT        glyphIdArray[VAR];      /* Glyph index array (arbitrary length) */
 186 #endif
 187 
 188   public:
 189   DEFINE_SIZE_ARRAY (14, values);
 190 };
 191 
 192 struct CmapSubtableLongGroup
 193 {
 194   friend struct CmapSubtableFormat12;
 195   friend struct CmapSubtableFormat13;
 196 
 197   int cmp (hb_codepoint_t codepoint) const
 198   {
 199     if (codepoint < startCharCode) return -1;
 200     if (codepoint > endCharCode)   return +1;
 201     return 0;
 202   }
 203 
 204   inline bool sanitize (hb_sanitize_context_t *c) const
 205   {
 206     TRACE_SANITIZE (this);
 207     return_trace (c->check_struct (this));
 208   }
 209 
 210   private:
 211   ULONG         startCharCode;  /* First character code in this group. */
 212   ULONG         endCharCode;    /* Last character code in this group. */
 213   ULONG         glyphID;        /* Glyph index; interpretation depends on
 214                                  * subtable format. */
 215   public:
 216   DEFINE_SIZE_STATIC (12);
 217 };
 218 
 219 template <typename UINT>
 220 struct CmapSubtableTrimmed
 221 {
 222   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 223   {
 224     /* Rely on our implicit array bound-checking. */
 225     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
 226     if (!gid)
 227       return false;
 228     *glyph = gid;
 229     return true;
 230   }
 231 
 232   inline bool sanitize (hb_sanitize_context_t *c) const
 233   {
 234     TRACE_SANITIZE (this);
 235     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
 236   }
 237 
 238   protected:
 239   UINT          formatReserved; /* Subtable format and (maybe) padding. */
 240   UINT          lengthZ;        /* Byte length of this subtable. */
 241   UINT          languageZ;      /* Ignore. */
 242   UINT          startCharCode;  /* First character code covered. */
 243   ArrayOf<GlyphID, UINT>
 244                 glyphIdArray;   /* Array of glyph index values for character
 245                                  * codes in the range. */
 246   public:
 247   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
 248 };
 249 
 250 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
 251 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
 252 
 253 template <typename T>
 254 struct CmapSubtableLongSegmented
 255 {
 256   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 257   {
 258     int i = groups.bsearch (codepoint);
 259     if (i == -1)
 260       return false;
 261     *glyph = T::group_get_glyph (groups[i], codepoint);
 262     return true;
 263   }
 264 
 265   inline bool sanitize (hb_sanitize_context_t *c) const
 266   {
 267     TRACE_SANITIZE (this);
 268     return_trace (c->check_struct (this) && groups.sanitize (c));
 269   }
 270 
 271   protected:
 272   USHORT        format;         /* Subtable format; set to 12. */
 273   USHORT        reservedZ;      /* Reserved; set to 0. */
 274   ULONG         lengthZ;        /* Byte length of this subtable. */
 275   ULONG         languageZ;      /* Ignore. */
 276   SortedArrayOf<CmapSubtableLongGroup, ULONG>
 277                 groups;         /* Groupings. */
 278   public:
 279   DEFINE_SIZE_ARRAY (16, groups);
 280 };
 281 
 282 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
 283 {
 284   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 285                                                 hb_codepoint_t u)
 286   { return group.glyphID + (u - group.startCharCode); }
 287 };
 288 
 289 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
 290 {
 291   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 292                                                 hb_codepoint_t u HB_UNUSED)
 293   { return group.glyphID; }
 294 };
 295 
 296 typedef enum
 297 {
 298   GLYPH_VARIANT_NOT_FOUND = 0,
 299   GLYPH_VARIANT_FOUND = 1,
 300   GLYPH_VARIANT_USE_DEFAULT = 2
 301 } glyph_variant_t;
 302 
 303 struct UnicodeValueRange
 304 {
 305   inline int cmp (const hb_codepoint_t &codepoint) const
 306   {
 307     if (codepoint < startUnicodeValue) return -1;
 308     if (codepoint > startUnicodeValue + additionalCount) return +1;
 309     return 0;
 310   }
 311 
 312   inline bool sanitize (hb_sanitize_context_t *c) const
 313   {
 314     TRACE_SANITIZE (this);
 315     return_trace (c->check_struct (this));
 316   }
 317 
 318   UINT24        startUnicodeValue;      /* First value in this range. */
 319   BYTE          additionalCount;        /* Number of additional values in this
 320                                          * range. */
 321   public:
 322   DEFINE_SIZE_STATIC (4);
 323 };
 324 
 325 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
 326 
 327 struct UVSMapping
 328 {
 329   inline int cmp (const hb_codepoint_t &codepoint) const
 330   {
 331     return unicodeValue.cmp (codepoint);
 332   }
 333 
 334   inline bool sanitize (hb_sanitize_context_t *c) const
 335   {
 336     TRACE_SANITIZE (this);
 337     return_trace (c->check_struct (this));
 338   }
 339 
 340   UINT24        unicodeValue;   /* Base Unicode value of the UVS */
 341   GlyphID       glyphID;        /* Glyph ID of the UVS */
 342   public:
 343   DEFINE_SIZE_STATIC (5);
 344 };
 345 
 346 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
 347 
 348 struct VariationSelectorRecord
 349 {
 350   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
 351                                     hb_codepoint_t *glyph,
 352                                     const void *base) const
 353   {
 354     int i;
 355     const DefaultUVS &defaults = base+defaultUVS;
 356     i = defaults.bsearch (codepoint);
 357     if (i != -1)
 358       return GLYPH_VARIANT_USE_DEFAULT;
 359     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
 360     i = nonDefaults.bsearch (codepoint);
 361     if (i != -1)
 362     {
 363       *glyph = nonDefaults[i].glyphID;
 364        return GLYPH_VARIANT_FOUND;
 365     }
 366     return GLYPH_VARIANT_NOT_FOUND;
 367   }
 368 
 369   inline int cmp (const hb_codepoint_t &variation_selector) const
 370   {
 371     return varSelector.cmp (variation_selector);
 372   }
 373 
 374   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 375   {
 376     TRACE_SANITIZE (this);
 377     return_trace (c->check_struct (this) &&
 378                   defaultUVS.sanitize (c, base) &&
 379                   nonDefaultUVS.sanitize (c, base));
 380   }
 381 
 382   UINT24        varSelector;    /* Variation selector. */
 383   LOffsetTo<DefaultUVS>
 384                 defaultUVS;     /* Offset to Default UVS Table. May be 0. */
 385   LOffsetTo<NonDefaultUVS>
 386                 nonDefaultUVS;  /* Offset to Non-Default UVS Table. May be 0. */
 387   public:
 388   DEFINE_SIZE_STATIC (11);
 389 };
 390 
 391 struct CmapSubtableFormat14
 392 {
 393   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 394                                             hb_codepoint_t variation_selector,
 395                                             hb_codepoint_t *glyph) const
 396   {
 397     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
 398   }
 399 
 400   inline bool sanitize (hb_sanitize_context_t *c) const
 401   {
 402     TRACE_SANITIZE (this);
 403     return_trace (c->check_struct (this) &&
 404                   record.sanitize (c, this));
 405   }
 406 
 407   protected:
 408   USHORT        format;         /* Format number is set to 14. */
 409   ULONG         lengthZ;        /* Byte length of this subtable. */
 410   SortedArrayOf<VariationSelectorRecord, ULONG>
 411                 record;         /* Variation selector records; sorted
 412                                  * in increasing order of `varSelector'. */
 413   public:
 414   DEFINE_SIZE_ARRAY (10, record);
 415 };
 416 
 417 struct CmapSubtable
 418 {
 419   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
 420 
 421   inline bool get_glyph (hb_codepoint_t codepoint,
 422                          hb_codepoint_t *glyph) const
 423   {
 424     switch (u.format) {
 425     case  0: return u.format0 .get_glyph(codepoint, glyph);
 426     case  4: return u.format4 .get_glyph(codepoint, glyph);
 427     case  6: return u.format6 .get_glyph(codepoint, glyph);
 428     case 10: return u.format10.get_glyph(codepoint, glyph);
 429     case 12: return u.format12.get_glyph(codepoint, glyph);
 430     case 13: return u.format13.get_glyph(codepoint, glyph);
 431     case 14:
 432     default: return false;
 433     }
 434   }
 435 
 436   inline bool sanitize (hb_sanitize_context_t *c) const
 437   {
 438     TRACE_SANITIZE (this);
 439     if (!u.format.sanitize (c)) return_trace (false);
 440     switch (u.format) {
 441     case  0: return_trace (u.format0 .sanitize (c));
 442     case  4: return_trace (u.format4 .sanitize (c));
 443     case  6: return_trace (u.format6 .sanitize (c));
 444     case 10: return_trace (u.format10.sanitize (c));
 445     case 12: return_trace (u.format12.sanitize (c));
 446     case 13: return_trace (u.format13.sanitize (c));
 447     case 14: return_trace (u.format14.sanitize (c));
 448     default:return_trace (true);
 449     }
 450   }
 451 
 452   public:
 453   union {
 454   USHORT                format;         /* Format identifier */
 455   CmapSubtableFormat0   format0;
 456   CmapSubtableFormat4   format4;
 457   CmapSubtableFormat6   format6;
 458   CmapSubtableFormat10  format10;
 459   CmapSubtableFormat12  format12;
 460   CmapSubtableFormat13  format13;
 461   CmapSubtableFormat14  format14;
 462   } u;
 463   public:
 464   DEFINE_SIZE_UNION (2, format);
 465 };
 466 
 467 
 468 struct EncodingRecord
 469 {
 470   inline int cmp (const EncodingRecord &other) const
 471   {
 472     int ret;
 473     ret = platformID.cmp (other.platformID);
 474     if (ret) return ret;
 475     ret = encodingID.cmp (other.encodingID);
 476     if (ret) return ret;
 477     return 0;
 478   }
 479 
 480   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 481   {
 482     TRACE_SANITIZE (this);
 483     return_trace (c->check_struct (this) &&
 484                   subtable.sanitize (c, base));
 485   }
 486 
 487   USHORT        platformID;     /* Platform ID. */
 488   USHORT        encodingID;     /* Platform-specific encoding ID. */
 489   LOffsetTo<CmapSubtable>
 490                 subtable;       /* Byte offset from beginning of table to the subtable for this encoding. */
 491   public:
 492   DEFINE_SIZE_STATIC (8);
 493 };
 494 
 495 struct cmap
 496 {
 497   static const hb_tag_t tableTag        = HB_OT_TAG_cmap;
 498 
 499   inline const CmapSubtable *find_subtable (unsigned int platform_id,
 500                                             unsigned int encoding_id) const
 501   {
 502     EncodingRecord key;
 503     key.platformID.set (platform_id);
 504     key.encodingID.set (encoding_id);
 505 
 506     /* Note: We can use bsearch, but since it has no performance
 507      * implications, we use lsearch and as such accept fonts with
 508      * unsorted subtable list. */
 509     int result = encodingRecord./*bsearch*/lsearch (key);
 510     if (result == -1 || !encodingRecord[result].subtable)
 511       return nullptr;
 512 
 513     return &(this+encodingRecord[result].subtable);
 514   }
 515 
 516   inline bool sanitize (hb_sanitize_context_t *c) const
 517   {
 518     TRACE_SANITIZE (this);
 519     return_trace (c->check_struct (this) &&
 520                   likely (version == 0) &&
 521                   encodingRecord.sanitize (c, this));
 522   }
 523 
 524   USHORT                version;        /* Table version number (0). */
 525   SortedArrayOf<EncodingRecord>
 526                         encodingRecord; /* Encoding tables. */
 527   public:
 528   DEFINE_SIZE_ARRAY (4, encodingRecord);
 529 };
 530 
 531 
 532 } /* namespace OT */
 533 
 534 
 535 #endif /* HB_OT_CMAP_TABLE_HH */