1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2014  Google, Inc.
  32  *
  33  *  This is part of HarfBuzz, a text shaping library.
  34  *
  35  * Permission is hereby granted, without written agreement and without
  36  * license or royalty fees, to use, copy, modify, and distribute this
  37  * software and its documentation for any purpose, provided that the
  38  * above copyright notice and the following two paragraphs appear in
  39  * all copies of this software.
  40  *
  41  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  42  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  43  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  44  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  45  * DAMAGE.
  46  *
  47  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  48  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  49  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  50  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  51  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  52  *
  53  * Google Author(s): Behdad Esfahbod
  54  */
  55 
  56 #ifndef HB_OT_CMAP_TABLE_HH
  57 #define HB_OT_CMAP_TABLE_HH
  58 
  59 #include "hb-open-type-private.hh"
  60 
  61 
  62 namespace OT {
  63 
  64 
  65 /*
  66  * cmap -- Character To Glyph Index Mapping Table
  67  */
  68 
  69 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
  70 
  71 
  72 struct CmapSubtableFormat0
  73 {
  74   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  75   {
  76     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
  77     if (!gid)
  78       return false;
  79     *glyph = gid;
  80     return true;
  81   }
  82 
  83   inline bool sanitize (hb_sanitize_context_t *c) const
  84   {
  85     TRACE_SANITIZE (this);
  86     return_trace (c->check_struct (this));
  87   }
  88 
  89   protected:
  90   USHORT        format;         /* Format number is set to 0. */
  91   USHORT        lengthZ;        /* Byte length of this subtable. */
  92   USHORT        languageZ;      /* Ignore. */
  93   BYTE          glyphIdArray[256];/* An array that maps character
  94                                  * code to glyph index values. */
  95   public:
  96   DEFINE_SIZE_STATIC (6 + 256);
  97 };
  98 
  99 struct CmapSubtableFormat4
 100 {
 101   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 102   {
 103     unsigned int segCount;
 104     const USHORT *endCount;
 105     const USHORT *startCount;
 106     const USHORT *idDelta;
 107     const USHORT *idRangeOffset;
 108     const USHORT *glyphIdArray;
 109     unsigned int glyphIdArrayLength;
 110 
 111     segCount = this->segCountX2 / 2;
 112     endCount = this->values;
 113     startCount = endCount + segCount + 1;
 114     idDelta = startCount + segCount;
 115     idRangeOffset = idDelta + segCount;
 116     glyphIdArray = idRangeOffset + segCount;
 117     glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
 118 
 119     /* Custom two-array bsearch. */
 120     int min = 0, max = (int) segCount - 1;
 121     unsigned int i;
 122     while (min <= max)
 123     {
 124       int mid = (min + max) / 2;
 125       if (codepoint < startCount[mid])
 126         max = mid - 1;
 127       else if (codepoint > endCount[mid])
 128         min = mid + 1;
 129       else
 130       {
 131         i = mid;
 132         goto found;
 133       }
 134     }
 135     return false;
 136 
 137   found:
 138     hb_codepoint_t gid;
 139     unsigned int rangeOffset = idRangeOffset[i];
 140     if (rangeOffset == 0)
 141       gid = codepoint + idDelta[i];
 142     else
 143     {
 144       /* Somebody has been smoking... */
 145       unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
 146       if (unlikely (index >= glyphIdArrayLength))
 147         return false;
 148       gid = glyphIdArray[index];
 149       if (unlikely (!gid))
 150         return false;
 151       gid += idDelta[i];
 152     }
 153 
 154     *glyph = gid & 0xFFFFu;
 155     return true;
 156   }
 157 
 158   inline bool sanitize (hb_sanitize_context_t *c) const
 159   {
 160     TRACE_SANITIZE (this);
 161     if (unlikely (!c->check_struct (this)))
 162       return_trace (false);
 163 
 164     if (unlikely (!c->check_range (this, length)))
 165     {
 166       /* Some broken fonts have too long of a "length" value.
 167        * If that is the case, just change the value to truncate
 168        * the subtable at the end of the blob. */
 169       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
 170                                             (uintptr_t) (c->end -
 171                                                          (char *) this));
 172       if (!c->try_set (&length, new_length))
 173         return_trace (false);
 174     }
 175 
 176     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
 177   }
 178 
 179   protected:
 180   USHORT        format;         /* Format number is set to 4. */
 181   USHORT        length;         /* This is the length in bytes of the
 182                                  * subtable. */
 183   USHORT        languageZ;      /* Ignore. */
 184   USHORT        segCountX2;     /* 2 x segCount. */
 185   USHORT        searchRangeZ;   /* 2 * (2**floor(log2(segCount))) */
 186   USHORT        entrySelectorZ; /* log2(searchRange/2) */
 187   USHORT        rangeShiftZ;    /* 2 x segCount - searchRange */
 188 
 189   USHORT        values[VAR];
 190 #if 0
 191   USHORT        endCount[segCount];     /* End characterCode for each segment,
 192                                          * last=0xFFFFu. */
 193   USHORT        reservedPad;            /* Set to 0. */
 194   USHORT        startCount[segCount];   /* Start character code for each segment. */
 195   SHORT         idDelta[segCount];      /* Delta for all character codes in segment. */
 196   USHORT        idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
 197   USHORT        glyphIdArray[VAR];      /* Glyph index array (arbitrary length) */
 198 #endif
 199 
 200   public:
 201   DEFINE_SIZE_ARRAY (14, values);
 202 };
 203 
 204 struct CmapSubtableLongGroup
 205 {
 206   friend struct CmapSubtableFormat12;
 207   friend struct CmapSubtableFormat13;
 208 
 209   int cmp (hb_codepoint_t codepoint) const
 210   {
 211     if (codepoint < startCharCode) return -1;
 212     if (codepoint > endCharCode)   return +1;
 213     return 0;
 214   }
 215 
 216   inline bool sanitize (hb_sanitize_context_t *c) const
 217   {
 218     TRACE_SANITIZE (this);
 219     return_trace (c->check_struct (this));
 220   }
 221 
 222   private:
 223   ULONG         startCharCode;  /* First character code in this group. */
 224   ULONG         endCharCode;    /* Last character code in this group. */
 225   ULONG         glyphID;        /* Glyph index; interpretation depends on
 226                                  * subtable format. */
 227   public:
 228   DEFINE_SIZE_STATIC (12);
 229 };
 230 
 231 template <typename UINT>
 232 struct CmapSubtableTrimmed
 233 {
 234   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 235   {
 236     /* Rely on our implicit array bound-checking. */
 237     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
 238     if (!gid)
 239       return false;
 240     *glyph = gid;
 241     return true;
 242   }
 243 
 244   inline bool sanitize (hb_sanitize_context_t *c) const
 245   {
 246     TRACE_SANITIZE (this);
 247     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
 248   }
 249 
 250   protected:
 251   UINT          formatReserved; /* Subtable format and (maybe) padding. */
 252   UINT          lengthZ;        /* Byte length of this subtable. */
 253   UINT          languageZ;      /* Ignore. */
 254   UINT          startCharCode;  /* First character code covered. */
 255   ArrayOf<GlyphID, UINT>
 256                 glyphIdArray;   /* Array of glyph index values for character
 257                                  * codes in the range. */
 258   public:
 259   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
 260 };
 261 
 262 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
 263 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
 264 
 265 template <typename T>
 266 struct CmapSubtableLongSegmented
 267 {
 268   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
 269   {
 270     int i = groups.bsearch (codepoint);
 271     if (i == -1)
 272       return false;
 273     *glyph = T::group_get_glyph (groups[i], codepoint);
 274     return true;
 275   }
 276 
 277   inline bool sanitize (hb_sanitize_context_t *c) const
 278   {
 279     TRACE_SANITIZE (this);
 280     return_trace (c->check_struct (this) && groups.sanitize (c));
 281   }
 282 
 283   protected:
 284   USHORT        format;         /* Subtable format; set to 12. */
 285   USHORT        reservedZ;      /* Reserved; set to 0. */
 286   ULONG         lengthZ;        /* Byte length of this subtable. */
 287   ULONG         languageZ;      /* Ignore. */
 288   SortedArrayOf<CmapSubtableLongGroup, ULONG>
 289                 groups;         /* Groupings. */
 290   public:
 291   DEFINE_SIZE_ARRAY (16, groups);
 292 };
 293 
 294 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
 295 {
 296   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 297                                                 hb_codepoint_t u)
 298   { return group.glyphID + (u - group.startCharCode); }
 299 };
 300 
 301 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
 302 {
 303   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
 304                                                 hb_codepoint_t u HB_UNUSED)
 305   { return group.glyphID; }
 306 };
 307 
 308 typedef enum
 309 {
 310   GLYPH_VARIANT_NOT_FOUND = 0,
 311   GLYPH_VARIANT_FOUND = 1,
 312   GLYPH_VARIANT_USE_DEFAULT = 2
 313 } glyph_variant_t;
 314 
 315 struct UnicodeValueRange
 316 {
 317   inline int cmp (const hb_codepoint_t &codepoint) const
 318   {
 319     if (codepoint < startUnicodeValue) return -1;
 320     if (codepoint > startUnicodeValue + additionalCount) return +1;
 321     return 0;
 322   }
 323 
 324   inline bool sanitize (hb_sanitize_context_t *c) const
 325   {
 326     TRACE_SANITIZE (this);
 327     return_trace (c->check_struct (this));
 328   }
 329 
 330   UINT24        startUnicodeValue;      /* First value in this range. */
 331   BYTE          additionalCount;        /* Number of additional values in this
 332                                          * range. */
 333   public:
 334   DEFINE_SIZE_STATIC (4);
 335 };
 336 
 337 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
 338 
 339 struct UVSMapping
 340 {
 341   inline int cmp (const hb_codepoint_t &codepoint) const
 342   {
 343     return unicodeValue.cmp (codepoint);
 344   }
 345 
 346   inline bool sanitize (hb_sanitize_context_t *c) const
 347   {
 348     TRACE_SANITIZE (this);
 349     return_trace (c->check_struct (this));
 350   }
 351 
 352   UINT24        unicodeValue;   /* Base Unicode value of the UVS */
 353   GlyphID       glyphID;        /* Glyph ID of the UVS */
 354   public:
 355   DEFINE_SIZE_STATIC (5);
 356 };
 357 
 358 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
 359 
 360 struct VariationSelectorRecord
 361 {
 362   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
 363                                     hb_codepoint_t *glyph,
 364                                     const void *base) const
 365   {
 366     int i;
 367     const DefaultUVS &defaults = base+defaultUVS;
 368     i = defaults.bsearch (codepoint);
 369     if (i != -1)
 370       return GLYPH_VARIANT_USE_DEFAULT;
 371     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
 372     i = nonDefaults.bsearch (codepoint);
 373     if (i != -1)
 374     {
 375       *glyph = nonDefaults[i].glyphID;
 376        return GLYPH_VARIANT_FOUND;
 377     }
 378     return GLYPH_VARIANT_NOT_FOUND;
 379   }
 380 
 381   inline int cmp (const hb_codepoint_t &variation_selector) const
 382   {
 383     return varSelector.cmp (variation_selector);
 384   }
 385 
 386   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 387   {
 388     TRACE_SANITIZE (this);
 389     return_trace (c->check_struct (this) &&
 390                   defaultUVS.sanitize (c, base) &&
 391                   nonDefaultUVS.sanitize (c, base));
 392   }
 393 
 394   UINT24        varSelector;    /* Variation selector. */
 395   OffsetTo<DefaultUVS, ULONG>
 396                 defaultUVS;     /* Offset to Default UVS Table. May be 0. */
 397   OffsetTo<NonDefaultUVS, ULONG>
 398                 nonDefaultUVS;  /* Offset to Non-Default UVS Table. May be 0. */
 399   public:
 400   DEFINE_SIZE_STATIC (11);
 401 };
 402 
 403 struct CmapSubtableFormat14
 404 {
 405   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 406                                             hb_codepoint_t variation_selector,
 407                                             hb_codepoint_t *glyph) const
 408   {
 409     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
 410   }
 411 
 412   inline bool sanitize (hb_sanitize_context_t *c) const
 413   {
 414     TRACE_SANITIZE (this);
 415     return_trace (c->check_struct (this) &&
 416                   record.sanitize (c, this));
 417   }
 418 
 419   protected:
 420   USHORT        format;         /* Format number is set to 0. */
 421   ULONG         lengthZ;        /* Byte length of this subtable. */
 422   SortedArrayOf<VariationSelectorRecord, ULONG>
 423                 record;         /* Variation selector records; sorted
 424                                  * in increasing order of `varSelector'. */
 425   public:
 426   DEFINE_SIZE_ARRAY (10, record);
 427 };
 428 
 429 struct CmapSubtable
 430 {
 431   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
 432 
 433   inline bool get_glyph (hb_codepoint_t codepoint,
 434                          hb_codepoint_t *glyph) const
 435   {
 436     switch (u.format) {
 437     case  0: return u.format0 .get_glyph(codepoint, glyph);
 438     case  4: return u.format4 .get_glyph(codepoint, glyph);
 439     case  6: return u.format6 .get_glyph(codepoint, glyph);
 440     case 10: return u.format10.get_glyph(codepoint, glyph);
 441     case 12: return u.format12.get_glyph(codepoint, glyph);
 442     case 13: return u.format13.get_glyph(codepoint, glyph);
 443     case 14:
 444     default: return false;
 445     }
 446   }
 447 
 448   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
 449                                             hb_codepoint_t variation_selector,
 450                                             hb_codepoint_t *glyph) const
 451   {
 452     switch (u.format) {
 453     case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
 454     default: return GLYPH_VARIANT_NOT_FOUND;
 455     }
 456   }
 457 
 458   inline bool sanitize (hb_sanitize_context_t *c) const
 459   {
 460     TRACE_SANITIZE (this);
 461     if (!u.format.sanitize (c)) return_trace (false);
 462     switch (u.format) {
 463     case  0: return_trace (u.format0 .sanitize (c));
 464     case  4: return_trace (u.format4 .sanitize (c));
 465     case  6: return_trace (u.format6 .sanitize (c));
 466     case 10: return_trace (u.format10.sanitize (c));
 467     case 12: return_trace (u.format12.sanitize (c));
 468     case 13: return_trace (u.format13.sanitize (c));
 469     case 14: return_trace (u.format14.sanitize (c));
 470     default:return_trace (true);
 471     }
 472   }
 473 
 474   protected:
 475   union {
 476   USHORT                format;         /* Format identifier */
 477   CmapSubtableFormat0   format0;
 478   CmapSubtableFormat4   format4;
 479   CmapSubtableFormat6   format6;
 480   CmapSubtableFormat10  format10;
 481   CmapSubtableFormat12  format12;
 482   CmapSubtableFormat13  format13;
 483   CmapSubtableFormat14  format14;
 484   } u;
 485   public:
 486   DEFINE_SIZE_UNION (2, format);
 487 };
 488 
 489 
 490 struct EncodingRecord
 491 {
 492   inline int cmp (const EncodingRecord &other) const
 493   {
 494     int ret;
 495     ret = platformID.cmp (other.platformID);
 496     if (ret) return ret;
 497     ret = encodingID.cmp (other.encodingID);
 498     if (ret) return ret;
 499     return 0;
 500   }
 501 
 502   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
 503   {
 504     TRACE_SANITIZE (this);
 505     return_trace (c->check_struct (this) &&
 506                   subtable.sanitize (c, base));
 507   }
 508 
 509   USHORT        platformID;     /* Platform ID. */
 510   USHORT        encodingID;     /* Platform-specific encoding ID. */
 511   OffsetTo<CmapSubtable, ULONG>
 512                 subtable;       /* Byte offset from beginning of table to the subtable for this encoding. */
 513   public:
 514   DEFINE_SIZE_STATIC (8);
 515 };
 516 
 517 struct cmap
 518 {
 519   static const hb_tag_t tableTag        = HB_OT_TAG_cmap;
 520 
 521   inline const CmapSubtable *find_subtable (unsigned int platform_id,
 522                                             unsigned int encoding_id) const
 523   {
 524     EncodingRecord key;
 525     key.platformID.set (platform_id);
 526     key.encodingID.set (encoding_id);
 527 
 528     /* Note: We can use bsearch, but since it has no performance
 529      * implications, we use lsearch and as such accept fonts with
 530      * unsorted subtable list. */
 531     int result = encodingRecord./*bsearch*/lsearch (key);
 532     if (result == -1 || !encodingRecord[result].subtable)
 533       return NULL;
 534 
 535     return &(this+encodingRecord[result].subtable);
 536   }
 537 
 538   inline bool sanitize (hb_sanitize_context_t *c) const
 539   {
 540     TRACE_SANITIZE (this);
 541     return_trace (c->check_struct (this) &&
 542                   likely (version == 0) &&
 543                   encodingRecord.sanitize (c, this));
 544   }
 545 
 546   USHORT                version;        /* Table version number (0). */
 547   SortedArrayOf<EncodingRecord>
 548                         encodingRecord; /* Encoding tables. */
 549   public:
 550   DEFINE_SIZE_ARRAY (4, encodingRecord);
 551 };
 552 
 553 
 554 } /* namespace OT */
 555 
 556 
 557 #endif /* HB_OT_CMAP_TABLE_HH */