1 /*
   2  * Copyright © 2012,2017  Google, Inc.
   3  *
   4  *  This is part of HarfBuzz, a text shaping library.
   5  *
   6  * Permission is hereby granted, without written agreement and without
   7  * license or royalty fees, to use, copy, modify, and distribute this
   8  * software and its documentation for any purpose, provided that the
   9  * above copyright notice and the following two paragraphs appear in
  10  * all copies of this software.
  11  *
  12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  16  * DAMAGE.
  17  *
  18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  23  *
  24  * Google Author(s): Behdad Esfahbod
  25  */
  26 
  27 #ifndef HB_SET_HH
  28 #define HB_SET_HH
  29 
  30 #include "hb.hh"
  31 
  32 
  33 /*
  34  * hb_set_t
  35  */
  36 
  37 /* TODO Keep a free-list so we can free pages that are completely zeroed.  At that
  38  * point maybe also use a sentinel value for "all-1" pages? */
  39 
  40 struct hb_set_t
  41 {
  42   HB_NO_COPY_ASSIGN (hb_set_t);
  43   hb_set_t ()  { init (); }
  44   ~hb_set_t () { fini (); }
  45 
  46   struct page_map_t
  47   {
  48     int cmp (const page_map_t &o) const { return (int) o.major - (int) major; }
  49 
  50     uint32_t major;
  51     uint32_t index;
  52   };
  53 
  54   struct page_t
  55   {
  56     void init0 () { v.clear (); }
  57     void init1 () { v.clear (0xFF); }
  58 
  59     unsigned int len () const
  60     { return ARRAY_LENGTH_CONST (v); }
  61 
  62     bool is_empty () const
  63     {
  64       for (unsigned int i = 0; i < len (); i++)
  65         if (v[i])
  66           return false;
  67       return true;
  68     }
  69 
  70     void add (hb_codepoint_t g) { elt (g) |= mask (g); }
  71     void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
  72     bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }
  73 
  74     void add_range (hb_codepoint_t a, hb_codepoint_t b)
  75     {
  76       elt_t *la = &elt (a);
  77       elt_t *lb = &elt (b);
  78       if (la == lb)
  79         *la |= (mask (b) << 1) - mask(a);
  80       else
  81       {
  82         *la |= ~(mask (a) - 1);
  83         la++;
  84 
  85         memset (la, 0xff, (char *) lb - (char *) la);
  86 
  87         *lb |= ((mask (b) << 1) - 1);
  88       }
  89     }
  90 
  91     bool is_equal (const page_t *other) const
  92     {
  93       return 0 == hb_memcmp (&v, &other->v, sizeof (v));
  94     }
  95 
  96     unsigned int get_population () const
  97     {
  98       unsigned int pop = 0;
  99       for (unsigned int i = 0; i < len (); i++)
 100         pop += hb_popcount (v[i]);
 101       return pop;
 102     }
 103 
 104     bool next (hb_codepoint_t *codepoint) const
 105     {
 106       unsigned int m = (*codepoint + 1) & MASK;
 107       if (!m)
 108       {
 109         *codepoint = INVALID;
 110         return false;
 111       }
 112       unsigned int i = m / ELT_BITS;
 113       unsigned int j = m & ELT_MASK;
 114 
 115       const elt_t vv = v[i] & ~((elt_t (1) << j) - 1);
 116       for (const elt_t *p = &vv; i < len (); p = &v[++i])
 117         if (*p)
 118         {
 119           *codepoint = i * ELT_BITS + elt_get_min (*p);
 120           return true;
 121         }
 122 
 123       *codepoint = INVALID;
 124       return false;
 125     }
 126     bool previous (hb_codepoint_t *codepoint) const
 127     {
 128       unsigned int m = (*codepoint - 1) & MASK;
 129       if (m == MASK)
 130       {
 131         *codepoint = INVALID;
 132         return false;
 133       }
 134       unsigned int i = m / ELT_BITS;
 135       unsigned int j = m & ELT_MASK;
 136 
 137       const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1);
 138       for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i])
 139         if (*p)
 140         {
 141           *codepoint = i * ELT_BITS + elt_get_max (*p);
 142           return true;
 143         }
 144 
 145       *codepoint = INVALID;
 146       return false;
 147     }
 148     hb_codepoint_t get_min () const
 149     {
 150       for (unsigned int i = 0; i < len (); i++)
 151         if (v[i])
 152           return i * ELT_BITS + elt_get_min (v[i]);
 153       return INVALID;
 154     }
 155     hb_codepoint_t get_max () const
 156     {
 157       for (int i = len () - 1; i >= 0; i--)
 158         if (v[i])
 159           return i * ELT_BITS + elt_get_max (v[i]);
 160       return 0;
 161     }
 162 
 163     typedef unsigned long long elt_t;
 164     static constexpr unsigned PAGE_BITS = 512;
 165     static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
 166 
 167     static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
 168     static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
 169 
 170     typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
 171 
 172     static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8;
 173     static constexpr unsigned ELT_MASK = ELT_BITS - 1;
 174     static constexpr unsigned BITS = sizeof (vector_t) * 8;
 175     static constexpr unsigned MASK = BITS - 1;
 176     static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");
 177 
 178     elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
 179     elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
 180     elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
 181 
 182     vector_t v;
 183   };
 184   static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
 185 
 186   hb_object_header_t header;
 187   bool successful; /* Allocations successful */
 188   mutable unsigned int population;
 189   hb_vector_t<page_map_t> page_map;
 190   hb_vector_t<page_t> pages;
 191 
 192   void init_shallow ()
 193   {
 194     successful = true;
 195     population = 0;
 196     page_map.init ();
 197     pages.init ();
 198   }
 199   void init ()
 200   {
 201     hb_object_init (this);
 202     init_shallow ();
 203   }
 204   void fini_shallow ()
 205   {
 206     population = 0;
 207     page_map.fini ();
 208     pages.fini ();
 209   }
 210   void fini ()
 211   {
 212     hb_object_fini (this);
 213     fini_shallow ();
 214   }
 215 
 216   bool in_error () const { return !successful; }
 217 
 218   bool resize (unsigned int count)
 219   {
 220     if (unlikely (!successful)) return false;
 221     if (!pages.resize (count) || !page_map.resize (count))
 222     {
 223       pages.resize (page_map.length);
 224       successful = false;
 225       return false;
 226     }
 227     return true;
 228   }
 229 
 230   void clear ()
 231   {
 232     if (unlikely (hb_object_is_immutable (this)))
 233       return;
 234     successful = true;
 235     population = 0;
 236     page_map.resize (0);
 237     pages.resize (0);
 238   }
 239   bool is_empty () const
 240   {
 241     unsigned int count = pages.length;
 242     for (unsigned int i = 0; i < count; i++)
 243       if (!pages[i].is_empty ())
 244         return false;
 245     return true;
 246   }
 247 
 248   void dirty () { population = (unsigned int) -1; }
 249 
 250   void add (hb_codepoint_t g)
 251   {
 252     if (unlikely (!successful)) return;
 253     if (unlikely (g == INVALID)) return;
 254     dirty ();
 255     page_t *page = page_for_insert (g); if (unlikely (!page)) return;
 256     page->add (g);
 257   }
 258   bool add_range (hb_codepoint_t a, hb_codepoint_t b)
 259   {
 260     if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
 261     if (unlikely (a > b || a == INVALID || b == INVALID)) return false;
 262     dirty ();
 263     unsigned int ma = get_major (a);
 264     unsigned int mb = get_major (b);
 265     if (ma == mb)
 266     {
 267       page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
 268       page->add_range (a, b);
 269     }
 270     else
 271     {
 272       page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
 273       page->add_range (a, major_start (ma + 1) - 1);
 274 
 275       for (unsigned int m = ma + 1; m < mb; m++)
 276       {
 277         page = page_for_insert (major_start (m)); if (unlikely (!page)) return false;
 278         page->init1 ();
 279       }
 280 
 281       page = page_for_insert (b); if (unlikely (!page)) return false;
 282       page->add_range (major_start (mb), b);
 283     }
 284     return true;
 285   }
 286 
 287   template <typename T>
 288   void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
 289   {
 290     if (unlikely (!successful)) return;
 291     if (!count) return;
 292     dirty ();
 293     hb_codepoint_t g = *array;
 294     while (count)
 295     {
 296       unsigned int m = get_major (g);
 297       page_t *page = page_for_insert (g); if (unlikely (!page)) return;
 298       unsigned int start = major_start (m);
 299       unsigned int end = major_start (m + 1);
 300       do
 301       {
 302         page->add (g);
 303 
 304         array = (const T *) ((const char *) array + stride);
 305         count--;
 306       }
 307       while (count && (g = *array, start <= g && g < end));
 308     }
 309   }
 310 
 311   /* Might return false if array looks unsorted.
 312    * Used for faster rejection of corrupt data. */
 313   template <typename T>
 314   bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
 315   {
 316     if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
 317     if (!count) return true;
 318     dirty ();
 319     hb_codepoint_t g = *array;
 320     hb_codepoint_t last_g = g;
 321     while (count)
 322     {
 323       unsigned int m = get_major (g);
 324       page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
 325       unsigned int end = major_start (m + 1);
 326       do
 327       {
 328         /* If we try harder we can change the following comparison to <=;
 329          * Not sure if it's worth it. */
 330         if (g < last_g) return false;
 331         last_g = g;
 332         page->add (g);
 333 
 334         array = (const T *) ((const char *) array + stride);
 335         count--;
 336       }
 337       while (count && (g = *array, g < end));
 338     }
 339     return true;
 340   }
 341 
 342   void del (hb_codepoint_t g)
 343   {
 344     /* TODO perform op even if !successful. */
 345     if (unlikely (!successful)) return;
 346     page_t *page = page_for (g);
 347     if (!page)
 348       return;
 349     dirty ();
 350     page->del (g);
 351   }
 352   void del_range (hb_codepoint_t a, hb_codepoint_t b)
 353   {
 354     /* TODO perform op even if !successful. */
 355     /* TODO Optimize, like add_range(). */
 356     if (unlikely (!successful)) return;
 357     for (unsigned int i = a; i < b + 1; i++)
 358       del (i);
 359   }
 360   bool has (hb_codepoint_t g) const
 361   {
 362     const page_t *page = page_for (g);
 363     if (!page)
 364       return false;
 365     return page->has (g);
 366   }
 367   bool intersects (hb_codepoint_t first,
 368                           hb_codepoint_t last) const
 369   {
 370     hb_codepoint_t c = first - 1;
 371     return next (&c) && c <= last;
 372   }
 373   void set (const hb_set_t *other)
 374   {
 375     if (unlikely (!successful)) return;
 376     unsigned int count = other->pages.length;
 377     if (!resize (count))
 378       return;
 379     population = other->population;
 380     memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size);
 381     memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size);
 382   }
 383 
 384   bool is_equal (const hb_set_t *other) const
 385   {
 386     if (get_population () != other->get_population ())
 387       return false;
 388 
 389     unsigned int na = pages.length;
 390     unsigned int nb = other->pages.length;
 391 
 392     unsigned int a = 0, b = 0;
 393     for (; a < na && b < nb; )
 394     {
 395       if (page_at (a).is_empty ()) { a++; continue; }
 396       if (other->page_at (b).is_empty ()) { b++; continue; }
 397       if (page_map[a].major != other->page_map[b].major ||
 398           !page_at (a).is_equal (&other->page_at (b)))
 399         return false;
 400       a++;
 401       b++;
 402     }
 403     for (; a < na; a++)
 404       if (!page_at (a).is_empty ()) { return false; }
 405     for (; b < nb; b++)
 406       if (!other->page_at (b).is_empty ()) { return false; }
 407 
 408     return true;
 409   }
 410 
 411   bool is_subset (const hb_set_t *larger_set) const
 412   {
 413     if (get_population () > larger_set->get_population ())
 414       return false;
 415 
 416     /* TODO Optimize to use pages. */
 417     hb_codepoint_t c = INVALID;
 418     while (next (&c))
 419       if (!larger_set->has (c))
 420         return false;
 421 
 422     return true;
 423   }
 424 
 425   template <class Op>
 426   void process (const hb_set_t *other)
 427   {
 428     if (unlikely (!successful)) return;
 429 
 430     dirty ();
 431 
 432     unsigned int na = pages.length;
 433     unsigned int nb = other->pages.length;
 434     unsigned int next_page = na;
 435 
 436     unsigned int count = 0, newCount = 0;
 437     unsigned int a = 0, b = 0;
 438     for (; a < na && b < nb; )
 439     {
 440       if (page_map[a].major == other->page_map[b].major)
 441       {
 442         count++;
 443         a++;
 444         b++;
 445       }
 446       else if (page_map[a].major < other->page_map[b].major)
 447       {
 448         if (Op::passthru_left)
 449           count++;
 450         a++;
 451       }
 452       else
 453       {
 454         if (Op::passthru_right)
 455           count++;
 456         b++;
 457       }
 458     }
 459     if (Op::passthru_left)
 460       count += na - a;
 461     if (Op::passthru_right)
 462       count += nb - b;
 463 
 464     if (count > pages.length)
 465       if (!resize (count))
 466         return;
 467     newCount = count;
 468 
 469     /* Process in-place backward. */
 470     a = na;
 471     b = nb;
 472     for (; a && b; )
 473     {
 474       if (page_map[a - 1].major == other->page_map[b - 1].major)
 475       {
 476         a--;
 477         b--;
 478         count--;
 479         page_map[count] = page_map[a];
 480         Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v);
 481       }
 482       else if (page_map[a - 1].major > other->page_map[b - 1].major)
 483       {
 484         a--;
 485         if (Op::passthru_left)
 486         {
 487           count--;
 488           page_map[count] = page_map[a];
 489         }
 490       }
 491       else
 492       {
 493         b--;
 494         if (Op::passthru_right)
 495         {
 496           count--;
 497           page_map[count].major = other->page_map[b].major;
 498           page_map[count].index = next_page++;
 499           page_at (count).v = other->page_at (b).v;
 500         }
 501       }
 502     }
 503     if (Op::passthru_left)
 504       while (a)
 505       {
 506         a--;
 507         count--;
 508         page_map[count] = page_map [a];
 509       }
 510     if (Op::passthru_right)
 511       while (b)
 512       {
 513         b--;
 514         count--;
 515         page_map[count].major = other->page_map[b].major;
 516         page_map[count].index = next_page++;
 517         page_at (count).v = other->page_at (b).v;
 518       }
 519     assert (!count);
 520     if (pages.length > newCount)
 521       resize (newCount);
 522   }
 523 
 524   void union_ (const hb_set_t *other)
 525   {
 526     process<HbOpOr> (other);
 527   }
 528   void intersect (const hb_set_t *other)
 529   {
 530     process<HbOpAnd> (other);
 531   }
 532   void subtract (const hb_set_t *other)
 533   {
 534     process<HbOpMinus> (other);
 535   }
 536   void symmetric_difference (const hb_set_t *other)
 537   {
 538     process<HbOpXor> (other);
 539   }
 540   bool next (hb_codepoint_t *codepoint) const
 541   {
 542     if (unlikely (*codepoint == INVALID)) {
 543       *codepoint = get_min ();
 544       return *codepoint != INVALID;
 545     }
 546 
 547     page_map_t map = {get_major (*codepoint), 0};
 548     unsigned int i;
 549     page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
 550     if (i < page_map.length && page_map[i].major == map.major)
 551     {
 552       if (pages[page_map[i].index].next (codepoint))
 553       {
 554         *codepoint += page_map[i].major * page_t::PAGE_BITS;
 555         return true;
 556       }
 557       i++;
 558     }
 559     for (; i < page_map.length; i++)
 560     {
 561       hb_codepoint_t m = pages[page_map[i].index].get_min ();
 562       if (m != INVALID)
 563       {
 564         *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
 565         return true;
 566       }
 567     }
 568     *codepoint = INVALID;
 569     return false;
 570   }
 571   bool previous (hb_codepoint_t *codepoint) const
 572   {
 573     if (unlikely (*codepoint == INVALID)) {
 574       *codepoint = get_max ();
 575       return *codepoint != INVALID;
 576     }
 577 
 578     page_map_t map = {get_major (*codepoint), 0};
 579     unsigned int i;
 580     page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST);
 581     if (i < page_map.length && page_map[i].major == map.major)
 582     {
 583       if (pages[page_map[i].index].previous (codepoint))
 584       {
 585         *codepoint += page_map[i].major * page_t::PAGE_BITS;
 586         return true;
 587       }
 588     }
 589     i--;
 590     for (; (int) i >= 0; i--)
 591     {
 592       hb_codepoint_t m = pages[page_map[i].index].get_max ();
 593       if (m != INVALID)
 594       {
 595         *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
 596         return true;
 597       }
 598     }
 599     *codepoint = INVALID;
 600     return false;
 601   }
 602   bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
 603   {
 604     hb_codepoint_t i;
 605 
 606     i = *last;
 607     if (!next (&i))
 608     {
 609       *last = *first = INVALID;
 610       return false;
 611     }
 612 
 613     /* TODO Speed up. */
 614     *last = *first = i;
 615     while (next (&i) && i == *last + 1)
 616       (*last)++;
 617 
 618     return true;
 619   }
 620   bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const
 621   {
 622     hb_codepoint_t i;
 623 
 624     i = *first;
 625     if (!previous (&i))
 626     {
 627       *last = *first = INVALID;
 628       return false;
 629     }
 630 
 631     /* TODO Speed up. */
 632     *last = *first = i;
 633     while (previous (&i) && i == *first - 1)
 634       (*first)--;
 635 
 636     return true;
 637   }
 638 
 639   unsigned int get_population () const
 640   {
 641     if (population != (unsigned int) -1)
 642       return population;
 643 
 644     unsigned int pop = 0;
 645     unsigned int count = pages.length;
 646     for (unsigned int i = 0; i < count; i++)
 647       pop += pages[i].get_population ();
 648 
 649     population = pop;
 650     return pop;
 651   }
 652   hb_codepoint_t get_min () const
 653   {
 654     unsigned int count = pages.length;
 655     for (unsigned int i = 0; i < count; i++)
 656       if (!page_at (i).is_empty ())
 657         return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
 658     return INVALID;
 659   }
 660   hb_codepoint_t get_max () const
 661   {
 662     unsigned int count = pages.length;
 663     for (int i = count - 1; i >= 0; i++)
 664       if (!page_at (i).is_empty ())
 665         return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max ();
 666     return INVALID;
 667   }
 668 
 669   static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
 670 
 671   /*
 672    * Iterator implementation.
 673    */
 674   struct const_iter_t : hb_sorted_iter_t<const_iter_t, const hb_codepoint_t>
 675   {
 676     const_iter_t (const hb_set_t &s_) :
 677       s (s_), v (INVALID), l (s.get_population () + 1) { __next__ (); }
 678 
 679     typedef hb_codepoint_t __item_type__;
 680     hb_codepoint_t __item__ () const { return v; }
 681     bool __more__ () const { return v != INVALID; }
 682     void __next__ () { s.next (&v); if (l) l--; }
 683     void __prev__ () { s.previous (&v); }
 684     unsigned __len__ () { return l; }
 685 
 686     protected:
 687     const hb_set_t &s;
 688     hb_codepoint_t v;
 689     unsigned l;
 690   };
 691   const_iter_t const_iter () const { return const_iter_t (*this); }
 692   operator const_iter_t () const { return const_iter (); }
 693   typedef const_iter_t iter_t;
 694   iter_t iter () const { return const_iter (); }
 695 
 696   protected:
 697 
 698   page_t *page_for_insert (hb_codepoint_t g)
 699   {
 700     page_map_t map = {get_major (g), pages.length};
 701     unsigned int i;
 702     if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST))
 703     {
 704       if (!resize (pages.length + 1))
 705         return nullptr;
 706 
 707       pages[map.index].init0 ();
 708       memmove (page_map + i + 1,
 709                page_map + i,
 710                (page_map.length - 1 - i) * page_map.item_size);
 711       page_map[i] = map;
 712     }
 713     return &pages[page_map[i].index];
 714   }
 715   page_t *page_for (hb_codepoint_t g)
 716   {
 717     page_map_t key = {get_major (g)};
 718     const page_map_t *found = page_map.bsearch (key);
 719     if (found)
 720       return &pages[found->index];
 721     return nullptr;
 722   }
 723   const page_t *page_for (hb_codepoint_t g) const
 724   {
 725     page_map_t key = {get_major (g)};
 726     const page_map_t *found = page_map.bsearch (key);
 727     if (found)
 728       return &pages[found->index];
 729     return nullptr;
 730   }
 731   page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
 732   const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
 733   unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
 734   hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; }
 735 };
 736 
 737 
 738 #endif /* HB_SET_HH */