1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2012  Google, Inc.
  32  *
  33  *  This is part of HarfBuzz, a text shaping library.
  34  *
  35  * Permission is hereby granted, without written agreement and without
  36  * license or royalty fees, to use, copy, modify, and distribute this
  37  * software and its documentation for any purpose, provided that the
  38  * above copyright notice and the following two paragraphs appear in
  39  * all copies of this software.
  40  *
  41  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  42  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  43  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  44  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  45  * DAMAGE.
  46  *
  47  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  48  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  49  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  50  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  51  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  52  *
  53  * Google Author(s): Behdad Esfahbod
  54  */
  55 
  56 #ifndef HB_SET_PRIVATE_HH
  57 #define HB_SET_PRIVATE_HH
  58 
  59 #include "hb-private.hh"
  60 #include "hb-object-private.hh"
  61 
  62 
  63 /*
  64  * The set digests here implement various "filters" that support
  65  * "approximate member query".  Conceptually these are like Bloom
  66  * Filter and Quotient Filter, however, much smaller, faster, and
  67  * designed to fit the requirements of our uses for glyph coverage
  68  * queries.
  69  *
  70  * Our filters are highly accurate if the lookup covers fairly local
  71  * set of glyphs, but fully flooded and ineffective if coverage is
  72  * all over the place.
  73  *
  74  * The frozen-set can be used instead of a digest, to trade more
  75  * memory for 100% accuracy, but in practice, that doesn't look like
  76  * an attractive trade-off.
  77  */
  78 
  79 template <typename mask_t, unsigned int shift>
  80 struct hb_set_digest_lowest_bits_t
  81 {
  82   ASSERT_POD ();
  83 
  84   static const unsigned int mask_bytes = sizeof (mask_t);
  85   static const unsigned int mask_bits = sizeof (mask_t) * 8;
  86   static const unsigned int num_bits = 0
  87                                      + (mask_bytes >= 1 ? 3 : 0)
  88                                      + (mask_bytes >= 2 ? 1 : 0)
  89                                      + (mask_bytes >= 4 ? 1 : 0)
  90                                      + (mask_bytes >= 8 ? 1 : 0)
  91                                      + (mask_bytes >= 16? 1 : 0)
  92                                      + 0;
  93 
  94   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
  95   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
  96 
  97   inline void init (void) {
  98     mask = 0;
  99   }
 100 
 101   inline void add (hb_codepoint_t g) {
 102     mask |= mask_for (g);
 103   }
 104 
 105   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
 106     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
 107       mask = (mask_t) -1;
 108     else {
 109       mask_t ma = mask_for (a);
 110       mask_t mb = mask_for (b);
 111       mask |= mb + (mb - ma) - (mb < ma);
 112     }
 113   }
 114 
 115   inline bool may_have (hb_codepoint_t g) const {
 116     return !!(mask & mask_for (g));
 117   }
 118 
 119   private:
 120 
 121   static inline mask_t mask_for (hb_codepoint_t g) {
 122     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
 123   }
 124   mask_t mask;
 125 };
 126 
 127 template <typename head_t, typename tail_t>
 128 struct hb_set_digest_combiner_t
 129 {
 130   ASSERT_POD ();
 131 
 132   inline void init (void) {
 133     head.init ();
 134     tail.init ();
 135   }
 136 
 137   inline void add (hb_codepoint_t g) {
 138     head.add (g);
 139     tail.add (g);
 140   }
 141 
 142   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
 143     head.add_range (a, b);
 144     tail.add_range (a, b);
 145   }
 146 
 147   inline bool may_have (hb_codepoint_t g) const {
 148     return head.may_have (g) && tail.may_have (g);
 149   }
 150 
 151   private:
 152   head_t head;
 153   tail_t tail;
 154 };
 155 
 156 
 157 /*
 158  * hb_set_digest_t
 159  *
 160  * This is a combination of digests that performs "best".
 161  * There is not much science to this: it's a result of intuition
 162  * and testing.
 163  */
 164 typedef hb_set_digest_combiner_t
 165 <
 166   hb_set_digest_lowest_bits_t<unsigned long, 4>,
 167   hb_set_digest_combiner_t
 168   <
 169     hb_set_digest_lowest_bits_t<unsigned long, 0>,
 170     hb_set_digest_lowest_bits_t<unsigned long, 9>
 171   >
 172 > hb_set_digest_t;
 173 
 174 
 175 
 176 /*
 177  * hb_set_t
 178  */
 179 
 180 
 181 /* TODO Make this faster and memmory efficient. */
 182 
 183 struct hb_set_t
 184 {
 185   friend struct hb_frozen_set_t;
 186 
 187   hb_object_header_t header;
 188   ASSERT_POD ();
 189   bool in_error;
 190 
 191   inline void init (void) {
 192     hb_object_init (this);
 193     clear ();
 194   }
 195   inline void fini (void) {
 196   }
 197   inline void clear (void) {
 198     if (unlikely (hb_object_is_inert (this)))
 199       return;
 200     in_error = false;
 201     memset (elts, 0, sizeof elts);
 202   }
 203   inline bool is_empty (void) const {
 204     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
 205       if (elts[i])
 206         return false;
 207     return true;
 208   }
 209   inline void add (hb_codepoint_t g)
 210   {
 211     if (unlikely (in_error)) return;
 212     if (unlikely (g == INVALID)) return;
 213     if (unlikely (g > MAX_G)) return;
 214     elt (g) |= mask (g);
 215   }
 216   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
 217   {
 218     if (unlikely (in_error)) return;
 219     /* TODO Speedup */
 220     for (unsigned int i = a; i < b + 1; i++)
 221       add (i);
 222   }
 223   inline void del (hb_codepoint_t g)
 224   {
 225     if (unlikely (in_error)) return;
 226     if (unlikely (g > MAX_G)) return;
 227     elt (g) &= ~mask (g);
 228   }
 229   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
 230   {
 231     if (unlikely (in_error)) return;
 232     /* TODO Speedup */
 233     for (unsigned int i = a; i < b + 1; i++)
 234       del (i);
 235   }
 236   inline bool has (hb_codepoint_t g) const
 237   {
 238     if (unlikely (g > MAX_G)) return false;
 239     return !!(elt (g) & mask (g));
 240   }
 241   inline bool intersects (hb_codepoint_t first,
 242                           hb_codepoint_t last) const
 243   {
 244     if (unlikely (first > MAX_G)) return false;
 245     if (unlikely (last  > MAX_G)) last = MAX_G;
 246     unsigned int end = last + 1;
 247     for (hb_codepoint_t i = first; i < end; i++)
 248       if (has (i))
 249         return true;
 250     return false;
 251   }
 252   inline bool is_equal (const hb_set_t *other) const
 253   {
 254     for (unsigned int i = 0; i < ELTS; i++)
 255       if (elts[i] != other->elts[i])
 256         return false;
 257     return true;
 258   }
 259   inline void set (const hb_set_t *other)
 260   {
 261     if (unlikely (in_error)) return;
 262     for (unsigned int i = 0; i < ELTS; i++)
 263       elts[i] = other->elts[i];
 264   }
 265   inline void union_ (const hb_set_t *other)
 266   {
 267     if (unlikely (in_error)) return;
 268     for (unsigned int i = 0; i < ELTS; i++)
 269       elts[i] |= other->elts[i];
 270   }
 271   inline void intersect (const hb_set_t *other)
 272   {
 273     if (unlikely (in_error)) return;
 274     for (unsigned int i = 0; i < ELTS; i++)
 275       elts[i] &= other->elts[i];
 276   }
 277   inline void subtract (const hb_set_t *other)
 278   {
 279     if (unlikely (in_error)) return;
 280     for (unsigned int i = 0; i < ELTS; i++)
 281       elts[i] &= ~other->elts[i];
 282   }
 283   inline void symmetric_difference (const hb_set_t *other)
 284   {
 285     if (unlikely (in_error)) return;
 286     for (unsigned int i = 0; i < ELTS; i++)
 287       elts[i] ^= other->elts[i];
 288   }
 289   inline void invert (void)
 290   {
 291     if (unlikely (in_error)) return;
 292     for (unsigned int i = 0; i < ELTS; i++)
 293       elts[i] = ~elts[i];
 294   }
 295   inline bool next (hb_codepoint_t *codepoint) const
 296   {
 297     if (unlikely (*codepoint == INVALID)) {
 298       hb_codepoint_t i = get_min ();
 299       if (i != INVALID) {
 300         *codepoint = i;
 301         return true;
 302       } else {
 303         *codepoint = INVALID;
 304         return false;
 305       }
 306     }
 307     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
 308       if (has (i)) {
 309         *codepoint = i;
 310         return true;
 311       }
 312     *codepoint = INVALID;
 313     return false;
 314   }
 315   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
 316   {
 317     hb_codepoint_t i;
 318 
 319     i = *last;
 320     if (!next (&i))
 321     {
 322       *last = *first = INVALID;
 323       return false;
 324     }
 325 
 326     *last = *first = i;
 327     while (next (&i) && i == *last + 1)
 328       (*last)++;
 329 
 330     return true;
 331   }
 332 
 333   inline unsigned int get_population (void) const
 334   {
 335     unsigned int count = 0;
 336     for (unsigned int i = 0; i < ELTS; i++)
 337       count += _hb_popcount32 (elts[i]);
 338     return count;
 339   }
 340   inline hb_codepoint_t get_min (void) const
 341   {
 342     for (unsigned int i = 0; i < ELTS; i++)
 343       if (elts[i])
 344         for (unsigned int j = 0; j < BITS; j++)
 345           if (elts[i] & (1 << j))
 346             return i * BITS + j;
 347     return INVALID;
 348   }
 349   inline hb_codepoint_t get_max (void) const
 350   {
 351     for (unsigned int i = ELTS; i; i--)
 352       if (elts[i - 1])
 353         for (unsigned int j = BITS; j; j--)
 354           if (elts[i - 1] & (1 << (j - 1)))
 355             return (i - 1) * BITS + (j - 1);
 356     return INVALID;
 357   }
 358 
 359   typedef uint32_t elt_t;
 360   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
 361   static const unsigned int SHIFT = 5;
 362   static const unsigned int BITS = (1 << SHIFT);
 363   static const unsigned int MASK = BITS - 1;
 364   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
 365   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
 366 
 367   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
 368   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
 369   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
 370 
 371   elt_t elts[ELTS]; /* XXX 8kb */
 372 
 373   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
 374   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
 375 };
 376 
 377 struct hb_frozen_set_t
 378 {
 379   static const unsigned int SHIFT = hb_set_t::SHIFT;
 380   static const unsigned int BITS = hb_set_t::BITS;
 381   static const unsigned int MASK = hb_set_t::MASK;
 382   typedef hb_set_t::elt_t elt_t;
 383 
 384   inline void init (const hb_set_t &set)
 385   {
 386     start = count = 0;
 387     elts = NULL;
 388 
 389     unsigned int max = set.get_max ();
 390     if (max == set.INVALID)
 391       return;
 392     unsigned int min = set.get_min ();
 393     const elt_t &min_elt = set.elt (min);
 394 
 395     start = min & ~MASK;
 396     count = max - start + 1;
 397     unsigned int num_elts = (count + BITS - 1) / BITS;
 398     unsigned int elts_size = num_elts * sizeof (elt_t);
 399     elts = (elt_t *) malloc (elts_size);
 400     if (unlikely (!elts))
 401     {
 402       start = count = 0;
 403       return;
 404     }
 405     memcpy (elts, &min_elt, elts_size);
 406   }
 407 
 408   inline void fini (void)
 409   {
 410     if (elts)
 411       free (elts);
 412   }
 413 
 414   inline bool has (hb_codepoint_t g) const
 415   {
 416     /* hb_codepoint_t is unsigned. */
 417     g -= start;
 418     if (unlikely (g > count)) return false;
 419     return !!(elt (g) & mask (g));
 420   }
 421 
 422   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
 423   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
 424 
 425   private:
 426   hb_codepoint_t start, count;
 427   elt_t *elts;
 428 };
 429 
 430 
 431 #endif /* HB_SET_PRIVATE_HH */