1 /*
   2  * Copyright © 2014  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_OT_CMAP_TABLE_HH
  28 #define HB_OT_CMAP_TABLE_HH
  29 
  30 #include "hb-open-type-private.hh"
  31 
  32 
  33 namespace OT {
  34 
  35 
  36 /*
  37  * cmap -- Character To Glyph Index Mapping Table
  38  */
  39 
  40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
  41 
  42 
  43 struct CmapSubtableFormat0
  44 {
  45   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  46   {
  47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
  48     if (!gid)
  49       return false;
  50     *glyph = gid;
  51     return true;
  52   }
  53 
  54   inline bool sanitize (hb_sanitize_context_t *c) const
  55   {
  56     TRACE_SANITIZE (this);
  57     return_trace (c->check_struct (this));
  58   }
  59 
  60   protected:
  61   USHORT        format;         /* Format number is set to 0. */
  62   USHORT        lengthZ;        /* Byte length of this subtable. */
  63   USHORT        languageZ;      /* Ignore. */
  64   BYTE          glyphIdArray[256];/* An array that maps character
  65                                  * code to glyph index values. */
  66   public:
  67   DEFINE_SIZE_STATIC (6 + 256);
  68 };
  69 
  70 struct CmapSubtableFormat4
  71 {
  72   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  73   {
  74     unsigned int segCount;
  75     const USHORT *endCount;
  76     const USHORT *startCount;
  77     const USHORT *idDelta;
  78     const USHORT *idRangeOffset;
  79     const USHORT *glyphIdArray;
  80     unsigned int glyphIdArrayLength;
  81 
  82     segCount = this->segCountX2 / 2;
  83     endCount = this->values;
  84     startCount = endCount + segCount + 1;
  85     idDelta = startCount + segCount;
  86     idRangeOffset = idDelta + segCount;
  87     glyphIdArray = idRangeOffset + segCount;
  88     glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
  89 
  90     /* Custom two-array bsearch. */
  91     int min = 0, max = (int) segCount - 1;
  92     unsigned int i;
  93     while (min <= max)
  94     {
  95       int mid = (min + max) / 2;
  96       if (codepoint < startCount[mid])
  97         max = mid - 1;
  98       else if (codepoint > endCount[mid])
  99         min = mid + 1;
 100       else
 101       {
 102         i = mid;
 103         goto found;
 104       }
 105     }
 106     return false;
 107 
 108   found:
 109     hb_codepoint_t gid;
 110     unsigned int rangeOffset = idRangeOffset[i];
 111     if (rangeOffset == 0)
 112       gid = codepoint + idDelta[i];
 113     else
 114     {
 115       /* Somebody has been smoking... */
 116       unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
 117       if (unlikely (index >= glyphIdArrayLength))
 118         return false;
 119       gid = glyphIdArray[index];
 120       if (unlikely (!gid))
 121         return false;
 122       gid += idDelta[i];
 123     }
 124 
 125     *glyph = gid & 0xFFFFu;
 126     return true;
 127   }
 128 
 129   inline bool sanitize (hb_sanitize_context_t *c) const
 130   {
 131     TRACE_SANITIZE (this);
 132     if (unlikely (!c->check_struct (this)))
 133       return_trace (false);
 134 
 135     if (unlikely (!c->check_range (this, length)))
 136     {
 137       /* Some broken fonts have too long of a "length" value.
 138        * If that is the case, just change the value to truncate
 139        * the subtable at the end of the blob. */
 140       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
 141                                             (uintptr_t) (c->end -
 142                                                          (char *) this));
 143       if (!c->try_set (&length, new_length))
 144         return_trace (false);
 145     }
 146 
 147     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
 148   }
 149 
 150   protected:
 151   USHORT        format;         /* Format number is set to 4. */
 152   USHORT        length;         /* This is the length in bytes of the
 153                                  * subtable. */
 154   USHORT        languageZ;      /* Ignore. */
 155   USHORT        segCountX2;     /* 2 x segCount. */
 156   USHORT        searchRangeZ;   /* 2 * (2**floor(log2(segCount))) */
 157   USHORT        entrySelectorZ; /* log2(searchRange/2) */
 158   USHORT        rangeShiftZ;    /* 2 x segCount - searchRange */
 159 
 160   USHORT        values[VAR];
 161 #if 0
 162   USHORT        endCount[segCount];     /* End characterCode for each segment,
 163                                          * last=0xFFFFu. */
 164   USHORT        reservedPad;            /* Set to 0. */
 165   USHORT        startCount[segCount];   /* Start character code for each segment. */
 166   SHORT         idDelta[segCount];      /* Delta for all character codes in segment. */
 167   USHORT        idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
 168   USHORT        glyphIdArray[VAR];      /* Glyph index array (arbitrary length) */
 169 #endif
 170 
 171   public:
 172   DEFINE_SIZE_ARRAY (14, values);
 173 };
 174 
 175 struct CmapSubtableLongGroup
 176 {
 177   friend struct CmapSubtableFormat12;
 178   friend struct CmapSubtableFormat13;
 179 
 180   int cmp (hb_codepoint_t codepoint) const
 181   {
 182     if (codepoint < startCharCode) return -1;
 183     if (codepoint > endCharCode)   return +1;
 184     return 0;
 185   }
 186 
 187   inline bool sanitize (hb_sanitize_context_t *c) const
 188   {
 189     TRACE_SANITIZE (this);
 190     return_trace (c->check_struct (this));
 191   }
 192 
 193   private:
 194   ULONG         startCharCode;  /* First character code in this group. */
 195   ULONG         endCharCode;    /* Last character code in this group. */
 196   ULONG         glyphID;        /* Glyph index; interpretation depends on
 197                                  * subtable format. */
 198   public:
 199   DEFINE_SIZE_STATIC (12);
 200 };
 201 
 202 template <typename UINT>
 203 struct CmapSubtableTrimmed
 204 {
 205   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 206   {
 207     /* Rely on our implicit array bound-checking. */
 208     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
 209     if (!gid)
 210       return false;
 211     *glyph = gid;
 212     return true;
 213   }
 214 
 215   inline bool sanitize (hb_sanitize_context_t *c) const
 216   {
 217     TRACE_SANITIZE (this);
 218     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
 219   }
 220 
 221   protected:
 222   UINT          formatReserved; /* Subtable format and (maybe) padding. */
 223   UINT          lengthZ;        /* Byte length of this subtable. */
 224   UINT          languageZ;      /* Ignore. */
 225   UINT          startCharCode;  /* First character code covered. */
 226   ArrayOf<GlyphID, UINT>
 227                 glyphIdArray;   /* Array of glyph index values for character
 228                                  * codes in the range. */
 229   public:
 230   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
 231 };
 232 
 233 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
 234 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
 235 
 236 template <typename T>
 237 struct CmapSubtableLongSegmented
 238 {
 239   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 240   {
 241     int i = groups.bsearch (codepoint);
 242     if (i == -1)
 243       return false;
 244     *glyph = T::group_get_glyph (groups[i], codepoint);
 245     return true;
 246   }
 247 
 248   inline bool sanitize (hb_sanitize_context_t *c) const
 249   {
 250     TRACE_SANITIZE (this);
 251     return_trace (c->check_struct (this) && groups.sanitize (c));
 252   }
 253 
 254   protected:
 255   USHORT        format;         /* Subtable format; set to 12. */
 256   USHORT        reservedZ;      /* Reserved; set to 0. */
 257   ULONG         lengthZ;        /* Byte length of this subtable. */
 258   ULONG         languageZ;      /* Ignore. */
 259   SortedArrayOf<CmapSubtableLongGroup, ULONG>
 260                 groups;         /* Groupings. */
 261   public:
 262   DEFINE_SIZE_ARRAY (16, groups);
 263 };
 264 
 265 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
 266 {
 267   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 268                                                 hb_codepoint_t u)
 269   { return group.glyphID + (u - group.startCharCode); }
 270 };
 271 
 272 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
 273 {
 274   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 275                                                 hb_codepoint_t u HB_UNUSED)
 276   { return group.glyphID; }
 277 };
 278 
 279 typedef enum
 280 {
 281   GLYPH_VARIANT_NOT_FOUND = 0,
 282   GLYPH_VARIANT_FOUND = 1,
 283   GLYPH_VARIANT_USE_DEFAULT = 2
 284 } glyph_variant_t;
 285 
 286 struct UnicodeValueRange
 287 {
 288   inline int cmp (const hb_codepoint_t &codepoint) const
 289   {
 290     if (codepoint < startUnicodeValue) return -1;
 291     if (codepoint > startUnicodeValue + additionalCount) return +1;
 292     return 0;
 293   }
 294 
 295   inline bool sanitize (hb_sanitize_context_t *c) const
 296   {
 297     TRACE_SANITIZE (this);
 298     return_trace (c->check_struct (this));
 299   }
 300 
 301   UINT24        startUnicodeValue;      /* First value in this range. */
 302   BYTE          additionalCount;        /* Number of additional values in this
 303                                          * range. */
 304   public:
 305   DEFINE_SIZE_STATIC (4);
 306 };
 307 
 308 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
 309 
 310 struct UVSMapping
 311 {
 312   inline int cmp (const hb_codepoint_t &codepoint) const
 313   {
 314     return unicodeValue.cmp (codepoint);
 315   }
 316 
 317   inline bool sanitize (hb_sanitize_context_t *c) const
 318   {
 319     TRACE_SANITIZE (this);
 320     return_trace (c->check_struct (this));
 321   }
 322 
 323   UINT24        unicodeValue;   /* Base Unicode value of the UVS */
 324   GlyphID       glyphID;        /* Glyph ID of the UVS */
 325   public:
 326   DEFINE_SIZE_STATIC (5);
 327 };
 328 
 329 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
 330 
 331 struct VariationSelectorRecord
 332 {
 333   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
 334                                     hb_codepoint_t *glyph,
 335                                     const void *base) const
 336   {
 337     int i;
 338     const DefaultUVS &defaults = base+defaultUVS;
 339     i = defaults.bsearch (codepoint);
 340     if (i != -1)
 341       return GLYPH_VARIANT_USE_DEFAULT;
 342     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
 343     i = nonDefaults.bsearch (codepoint);
 344     if (i != -1)
 345     {
 346       *glyph = nonDefaults[i].glyphID;
 347        return GLYPH_VARIANT_FOUND;
 348     }
 349     return GLYPH_VARIANT_NOT_FOUND;
 350   }
 351 
 352   inline int cmp (const hb_codepoint_t &variation_selector) const
 353   {
 354     return varSelector.cmp (variation_selector);
 355   }
 356 
 357   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 358   {
 359     TRACE_SANITIZE (this);
 360     return_trace (c->check_struct (this) &&
 361                   defaultUVS.sanitize (c, base) &&
 362                   nonDefaultUVS.sanitize (c, base));
 363   }
 364 
 365   UINT24        varSelector;    /* Variation selector. */
 366   OffsetTo<DefaultUVS, ULONG>
 367                 defaultUVS;     /* Offset to Default UVS Table. May be 0. */
 368   OffsetTo<NonDefaultUVS, ULONG>
 369                 nonDefaultUVS;  /* Offset to Non-Default UVS Table. May be 0. */
 370   public:
 371   DEFINE_SIZE_STATIC (11);
 372 };
 373 
 374 struct CmapSubtableFormat14
 375 {
 376   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 377                                             hb_codepoint_t variation_selector,
 378                                             hb_codepoint_t *glyph) const
 379   {
 380     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
 381   }
 382 
 383   inline bool sanitize (hb_sanitize_context_t *c) const
 384   {
 385     TRACE_SANITIZE (this);
 386     return_trace (c->check_struct (this) &&
 387                   record.sanitize (c, this));
 388   }
 389 
 390   protected:
 391   USHORT        format;         /* Format number is set to 0. */
 392   ULONG         lengthZ;        /* Byte length of this subtable. */
 393   SortedArrayOf<VariationSelectorRecord, ULONG>
 394                 record;         /* Variation selector records; sorted
 395                                  * in increasing order of `varSelector'. */
 396   public:
 397   DEFINE_SIZE_ARRAY (10, record);
 398 };
 399 
 400 struct CmapSubtable
 401 {
 402   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
 403 
 404   inline bool get_glyph (hb_codepoint_t codepoint,
 405                          hb_codepoint_t *glyph) const
 406   {
 407     switch (u.format) {
 408     case  0: return u.format0 .get_glyph(codepoint, glyph);
 409     case  4: return u.format4 .get_glyph(codepoint, glyph);
 410     case  6: return u.format6 .get_glyph(codepoint, glyph);
 411     case 10: return u.format10.get_glyph(codepoint, glyph);
 412     case 12: return u.format12.get_glyph(codepoint, glyph);
 413     case 13: return u.format13.get_glyph(codepoint, glyph);
 414     case 14:
 415     default: return false;
 416     }
 417   }
 418 
 419   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 420                                             hb_codepoint_t variation_selector,
 421                                             hb_codepoint_t *glyph) const
 422   {
 423     switch (u.format) {
 424     case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
 425     default: return GLYPH_VARIANT_NOT_FOUND;
 426     }
 427   }
 428 
 429   inline bool sanitize (hb_sanitize_context_t *c) const
 430   {
 431     TRACE_SANITIZE (this);
 432     if (!u.format.sanitize (c)) return_trace (false);
 433     switch (u.format) {
 434     case  0: return_trace (u.format0 .sanitize (c));
 435     case  4: return_trace (u.format4 .sanitize (c));
 436     case  6: return_trace (u.format6 .sanitize (c));
 437     case 10: return_trace (u.format10.sanitize (c));
 438     case 12: return_trace (u.format12.sanitize (c));
 439     case 13: return_trace (u.format13.sanitize (c));
 440     case 14: return_trace (u.format14.sanitize (c));
 441     default:return_trace (true);
 442     }
 443   }
 444 
 445   protected:
 446   union {
 447   USHORT                format;         /* Format identifier */
 448   CmapSubtableFormat0   format0;
 449   CmapSubtableFormat4   format4;
 450   CmapSubtableFormat6   format6;
 451   CmapSubtableFormat10  format10;
 452   CmapSubtableFormat12  format12;
 453   CmapSubtableFormat13  format13;
 454   CmapSubtableFormat14  format14;
 455   } u;
 456   public:
 457   DEFINE_SIZE_UNION (2, format);
 458 };
 459 
 460 
 461 struct EncodingRecord
 462 {
 463   inline int cmp (const EncodingRecord &other) const
 464   {
 465     int ret;
 466     ret = platformID.cmp (other.platformID);
 467     if (ret) return ret;
 468     ret = encodingID.cmp (other.encodingID);
 469     if (ret) return ret;
 470     return 0;
 471   }
 472 
 473   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 474   {
 475     TRACE_SANITIZE (this);
 476     return_trace (c->check_struct (this) &&
 477                   subtable.sanitize (c, base));
 478   }
 479 
 480   USHORT        platformID;     /* Platform ID. */
 481   USHORT        encodingID;     /* Platform-specific encoding ID. */
 482   OffsetTo<CmapSubtable, ULONG>
 483                 subtable;       /* Byte offset from beginning of table to the subtable for this encoding. */
 484   public:
 485   DEFINE_SIZE_STATIC (8);
 486 };
 487 
 488 struct cmap
 489 {
 490   static const hb_tag_t tableTag        = HB_OT_TAG_cmap;
 491 
 492   inline const CmapSubtable *find_subtable (unsigned int platform_id,
 493                                             unsigned int encoding_id) const
 494   {
 495     EncodingRecord key;
 496     key.platformID.set (platform_id);
 497     key.encodingID.set (encoding_id);
 498 
 499     /* Note: We can use bsearch, but since it has no performance
 500      * implications, we use lsearch and as such accept fonts with
 501      * unsorted subtable list. */
 502     int result = encodingRecord./*bsearch*/lsearch (key);
 503     if (result == -1 || !encodingRecord[result].subtable)
 504       return NULL;
 505 
 506     return &(this+encodingRecord[result].subtable);
 507   }
 508 
 509   inline bool sanitize (hb_sanitize_context_t *c) const
 510   {
 511     TRACE_SANITIZE (this);
 512     return_trace (c->check_struct (this) &&
 513                   likely (version == 0) &&
 514                   encodingRecord.sanitize (c, this));
 515   }
 516 
 517   USHORT                version;        /* Table version number (0). */
 518   SortedArrayOf<EncodingRecord>
 519                         encodingRecord; /* Encoding tables. */
 520   public:
 521   DEFINE_SIZE_ARRAY (4, encodingRecord);
 522 };
 523 
 524 
 525 } /* namespace OT */
 526 
 527 
 528 #endif /* HB_OT_CMAP_TABLE_HH */