1 /*
   2  * Copyright © 2012  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_SET_PRIVATE_HH
  28 #define HB_SET_PRIVATE_HH
  29 
  30 #include "hb-private.hh"
  31 #include "hb-object-private.hh"
  32 
  33 
  34 /*
  35  * The set digests here implement various "filters" that support
  36  * "approximate member query".  Conceptually these are like Bloom
  37  * Filter and Quotient Filter, however, much smaller, faster, and
  38  * designed to fit the requirements of our uses for glyph coverage
  39  * queries.
  40  *
  41  * Our filters are highly accurate if the lookup covers fairly local
  42  * set of glyphs, but fully flooded and ineffective if coverage is
  43  * all over the place.
  44  *
  45  * The frozen-set can be used instead of a digest, to trade more
  46  * memory for 100% accuracy, but in practice, that doesn't look like
  47  * an attractive trade-off.
  48  */
  49 
  50 template <typename mask_t, unsigned int shift>
  51 struct hb_set_digest_lowest_bits_t
  52 {
  53   ASSERT_POD ();
  54 
  55   static const unsigned int mask_bytes = sizeof (mask_t);
  56   static const unsigned int mask_bits = sizeof (mask_t) * 8;
  57   static const unsigned int num_bits = 0
  58                                      + (mask_bytes >= 1 ? 3 : 0)
  59                                      + (mask_bytes >= 2 ? 1 : 0)
  60                                      + (mask_bytes >= 4 ? 1 : 0)
  61                                      + (mask_bytes >= 8 ? 1 : 0)
  62                                      + (mask_bytes >= 16? 1 : 0)
  63                                      + 0;
  64 
  65   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
  66   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
  67 
  68   inline void init (void) {
  69     mask = 0;
  70   }
  71 
  72   inline void add (hb_codepoint_t g) {
  73     mask |= mask_for (g);
  74   }
  75 
  76   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
  77     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
  78       mask = (mask_t) -1;
  79     else {
  80       mask_t ma = mask_for (a);
  81       mask_t mb = mask_for (b);
  82       mask |= mb + (mb - ma) - (mb < ma);
  83     }
  84   }
  85 
  86   inline bool may_have (hb_codepoint_t g) const {
  87     return !!(mask & mask_for (g));
  88   }
  89 
  90   private:
  91 
  92   static inline mask_t mask_for (hb_codepoint_t g) {
  93     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
  94   }
  95   mask_t mask;
  96 };
  97 
  98 template <typename head_t, typename tail_t>
  99 struct hb_set_digest_combiner_t
 100 {
 101   ASSERT_POD ();
 102 
 103   inline void init (void) {
 104     head.init ();
 105     tail.init ();
 106   }
 107 
 108   inline void add (hb_codepoint_t g) {
 109     head.add (g);
 110     tail.add (g);
 111   }
 112 
 113   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
 114     head.add_range (a, b);
 115     tail.add_range (a, b);
 116   }
 117 
 118   inline bool may_have (hb_codepoint_t g) const {
 119     return head.may_have (g) && tail.may_have (g);
 120   }
 121 
 122   private:
 123   head_t head;
 124   tail_t tail;
 125 };
 126 
 127 
 128 /*
 129  * hb_set_digest_t
 130  *
 131  * This is a combination of digests that performs "best".
 132  * There is not much science to this: it's a result of intuition
 133  * and testing.
 134  */
 135 typedef hb_set_digest_combiner_t
 136 <
 137   hb_set_digest_lowest_bits_t<unsigned long, 4>,
 138   hb_set_digest_combiner_t
 139   <
 140     hb_set_digest_lowest_bits_t<unsigned long, 0>,
 141     hb_set_digest_lowest_bits_t<unsigned long, 9>
 142   >
 143 > hb_set_digest_t;
 144 
 145 
 146 
 147 /*
 148  * hb_set_t
 149  */
 150 
 151 
 152 /* TODO Make this faster and memmory efficient. */
 153 
 154 struct hb_set_t
 155 {
 156   friend struct hb_frozen_set_t;
 157 
 158   hb_object_header_t header;
 159   ASSERT_POD ();
 160   bool in_error;
 161 
 162   inline void init (void) {
 163     hb_object_init (this);
 164     clear ();
 165   }
 166   inline void fini (void) {
 167   }
 168   inline void clear (void) {
 169     if (unlikely (hb_object_is_inert (this)))
 170       return;
 171     in_error = false;
 172     memset (elts, 0, sizeof elts);
 173   }
 174   inline bool is_empty (void) const {
 175     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
 176       if (elts[i])
 177         return false;
 178     return true;
 179   }
 180   inline void add (hb_codepoint_t g)
 181   {
 182     if (unlikely (in_error)) return;
 183     if (unlikely (g == INVALID)) return;
 184     if (unlikely (g > MAX_G)) return;
 185     elt (g) |= mask (g);
 186   }
 187   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
 188   {
 189     if (unlikely (in_error)) return;
 190     /* TODO Speedup */
 191     for (unsigned int i = a; i < b + 1; i++)
 192       add (i);
 193   }
 194   inline void del (hb_codepoint_t g)
 195   {
 196     if (unlikely (in_error)) return;
 197     if (unlikely (g > MAX_G)) return;
 198     elt (g) &= ~mask (g);
 199   }
 200   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
 201   {
 202     if (unlikely (in_error)) return;
 203     /* TODO Speedup */
 204     for (unsigned int i = a; i < b + 1; i++)
 205       del (i);
 206   }
 207   inline bool has (hb_codepoint_t g) const
 208   {
 209     if (unlikely (g > MAX_G)) return false;
 210     return !!(elt (g) & mask (g));
 211   }
 212   inline bool intersects (hb_codepoint_t first,
 213                           hb_codepoint_t last) const
 214   {
 215     if (unlikely (first > MAX_G)) return false;
 216     if (unlikely (last  > MAX_G)) last = MAX_G;
 217     unsigned int end = last + 1;
 218     for (hb_codepoint_t i = first; i < end; i++)
 219       if (has (i))
 220         return true;
 221     return false;
 222   }
 223   inline bool is_equal (const hb_set_t *other) const
 224   {
 225     for (unsigned int i = 0; i < ELTS; i++)
 226       if (elts[i] != other->elts[i])
 227         return false;
 228     return true;
 229   }
 230   inline void set (const hb_set_t *other)
 231   {
 232     if (unlikely (in_error)) return;
 233     for (unsigned int i = 0; i < ELTS; i++)
 234       elts[i] = other->elts[i];
 235   }
 236   inline void union_ (const hb_set_t *other)
 237   {
 238     if (unlikely (in_error)) return;
 239     for (unsigned int i = 0; i < ELTS; i++)
 240       elts[i] |= other->elts[i];
 241   }
 242   inline void intersect (const hb_set_t *other)
 243   {
 244     if (unlikely (in_error)) return;
 245     for (unsigned int i = 0; i < ELTS; i++)
 246       elts[i] &= other->elts[i];
 247   }
 248   inline void subtract (const hb_set_t *other)
 249   {
 250     if (unlikely (in_error)) return;
 251     for (unsigned int i = 0; i < ELTS; i++)
 252       elts[i] &= ~other->elts[i];
 253   }
 254   inline void symmetric_difference (const hb_set_t *other)
 255   {
 256     if (unlikely (in_error)) return;
 257     for (unsigned int i = 0; i < ELTS; i++)
 258       elts[i] ^= other->elts[i];
 259   }
 260   inline void invert (void)
 261   {
 262     if (unlikely (in_error)) return;
 263     for (unsigned int i = 0; i < ELTS; i++)
 264       elts[i] = ~elts[i];
 265   }
 266   inline bool next (hb_codepoint_t *codepoint) const
 267   {
 268     if (unlikely (*codepoint == INVALID)) {
 269       hb_codepoint_t i = get_min ();
 270       if (i != INVALID) {
 271         *codepoint = i;
 272         return true;
 273       } else {
 274         *codepoint = INVALID;
 275         return false;
 276       }
 277     }
 278     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
 279       if (has (i)) {
 280         *codepoint = i;
 281         return true;
 282       }
 283     *codepoint = INVALID;
 284     return false;
 285   }
 286   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
 287   {
 288     hb_codepoint_t i;
 289 
 290     i = *last;
 291     if (!next (&i))
 292     {
 293       *last = *first = INVALID;
 294       return false;
 295     }
 296 
 297     *last = *first = i;
 298     while (next (&i) && i == *last + 1)
 299       (*last)++;
 300 
 301     return true;
 302   }
 303 
 304   inline unsigned int get_population (void) const
 305   {
 306     unsigned int count = 0;
 307     for (unsigned int i = 0; i < ELTS; i++)
 308       count += _hb_popcount32 (elts[i]);
 309     return count;
 310   }
 311   inline hb_codepoint_t get_min (void) const
 312   {
 313     for (unsigned int i = 0; i < ELTS; i++)
 314       if (elts[i])
 315         for (unsigned int j = 0; j < BITS; j++)
 316           if (elts[i] & (1 << j))
 317             return i * BITS + j;
 318     return INVALID;
 319   }
 320   inline hb_codepoint_t get_max (void) const
 321   {
 322     for (unsigned int i = ELTS; i; i--)
 323       if (elts[i - 1])
 324         for (unsigned int j = BITS; j; j--)
 325           if (elts[i - 1] & (1 << (j - 1)))
 326             return (i - 1) * BITS + (j - 1);
 327     return INVALID;
 328   }
 329 
 330   typedef uint32_t elt_t;
 331   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
 332   static const unsigned int SHIFT = 5;
 333   static const unsigned int BITS = (1 << SHIFT);
 334   static const unsigned int MASK = BITS - 1;
 335   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
 336   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
 337 
 338   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
 339   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
 340   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
 341 
 342   elt_t elts[ELTS]; /* XXX 8kb */
 343 
 344   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
 345   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
 346 };
 347 
 348 struct hb_frozen_set_t
 349 {
 350   static const unsigned int SHIFT = hb_set_t::SHIFT;
 351   static const unsigned int BITS = hb_set_t::BITS;
 352   static const unsigned int MASK = hb_set_t::MASK;
 353   typedef hb_set_t::elt_t elt_t;
 354 
 355   inline void init (const hb_set_t &set)
 356   {
 357     start = count = 0;
 358     elts = NULL;
 359 
 360     unsigned int max = set.get_max ();
 361     if (max == set.INVALID)
 362       return;
 363     unsigned int min = set.get_min ();
 364     const elt_t &min_elt = set.elt (min);
 365 
 366     start = min & ~MASK;
 367     count = max - start + 1;
 368     unsigned int num_elts = (count + BITS - 1) / BITS;
 369     unsigned int elts_size = num_elts * sizeof (elt_t);
 370     elts = (elt_t *) malloc (elts_size);
 371     if (unlikely (!elts))
 372     {
 373       start = count = 0;
 374       return;
 375     }
 376     memcpy (elts, &min_elt, elts_size);
 377   }
 378 
 379   inline void fini (void)
 380   {
 381     if (elts)
 382       free (elts);
 383   }
 384 
 385   inline bool has (hb_codepoint_t g) const
 386   {
 387     /* hb_codepoint_t is unsigned. */
 388     g -= start;
 389     if (unlikely (g > count)) return false;
 390     return !!(elt (g) & mask (g));
 391   }
 392 
 393   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
 394   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
 395 
 396   private:
 397   hb_codepoint_t start, count;
 398   elt_t *elts;
 399 };
 400 
 401 
 402 #endif /* HB_SET_PRIVATE_HH */