7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_PRIVATE_HH 28 #define HB_SET_PRIVATE_HH 29 30 #include "hb-private.hh" 31 #include "hb-object-private.hh" 32 33 34 /* 35 * hb_set_t 36 */ 37 38 /* TODO Keep a free-list so we can free pages that are completely zeroed. At that 39 * point maybe also use a sentinel value for "all-1" pages? */ 40 41 struct hb_set_t 42 { 43 struct page_map_t 44 { 45 inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; } 46 47 uint32_t major; 48 uint32_t index; 49 }; 50 51 struct page_t 52 { 53 inline void init0 (void) { memset (&v, 0, sizeof (v)); } 54 inline void init1 (void) { memset (&v, 0xff, sizeof (v)); } 55 56 inline unsigned int len (void) const 57 { return ARRAY_LENGTH_CONST (v); } 58 59 inline bool is_empty (void) const 60 { 61 for (unsigned int i = 0; i < len (); i++) 62 if (v[i]) 63 return false; 64 return true; 65 } 66 67 inline void add (hb_codepoint_t g) { elt (g) |= mask (g); } 68 inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } 69 inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } 70 71 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 72 { 73 elt_t *la = &elt (a); 74 elt_t *lb = &elt (b); 75 if (la == lb) 76 *la |= (mask (b) << 1) - mask(a); 77 else 78 { 79 *la |= ~(mask (a) - 1); 80 la++; 81 82 memset (la, 0xff, (char *) lb - (char *) la); 83 84 *lb |= ((mask (b) << 1) - 1); 85 } 86 } 87 88 inline bool is_equal (const page_t *other) const 89 { 90 return 0 == memcmp (&v, &other->v, sizeof (v)); 91 } 92 93 inline unsigned int get_population (void) const 94 { 95 unsigned int pop = 0; 96 for (unsigned int i = 0; i < len (); i++) 97 pop += _hb_popcount (v[i]); 98 return pop; 99 } 100 101 inline bool next (hb_codepoint_t *codepoint) const 102 { 103 unsigned int m = (*codepoint + 1) & MASK; 104 if (!m) 105 { 106 *codepoint = INVALID; 107 return false; 108 } 109 unsigned int i = m / ELT_BITS; 110 unsigned int j = m & ELT_MASK; 111 112 const elt_t vv = v[i] & ~((elt_t (1) << j) - 1); 113 for (const elt_t *p = &vv; i < len (); p = &v[++i]) 114 if (*p) 115 { 116 *codepoint = i * ELT_BITS + elt_get_min (*p); 117 return true; 118 } 119 120 *codepoint = INVALID; 121 return false; 122 } 123 inline bool previous (hb_codepoint_t *codepoint) const 124 { 125 unsigned int m = (*codepoint - 1) & MASK; 126 if (m == MASK) 127 { 128 *codepoint = INVALID; 129 return false; 130 } 131 unsigned int i = m / ELT_BITS; 132 unsigned int j = m & ELT_MASK; 133 134 const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1); 135 for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i]) 136 if (*p) 137 { 138 *codepoint = i * ELT_BITS + elt_get_max (*p); 139 return true; 140 } 141 142 *codepoint = INVALID; 143 return false; 144 } 145 inline hb_codepoint_t get_min (void) const 146 { 147 for (unsigned int i = 0; i < len (); i++) 148 if (v[i]) 149 return i * ELT_BITS + elt_get_min (v[i]); 150 return INVALID; 151 } 152 inline hb_codepoint_t get_max (void) const 153 { 154 for (int i = len () - 1; i >= 0; i--) 155 if (v[i]) 156 return i * ELT_BITS + elt_get_max (v[i]); 157 return 0; 158 } 159 160 typedef unsigned long long elt_t; 161 static const unsigned int PAGE_BITS = 512; 162 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); 163 164 static inline unsigned int elt_get_min (const elt_t &elt) { return _hb_ctz (elt); } 165 static inline unsigned int elt_get_max (const elt_t &elt) { return _hb_bit_storage (elt) - 1; } 166 167 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; 168 169 static const unsigned int ELT_BITS = sizeof (elt_t) * 8; 170 static const unsigned int ELT_MASK = ELT_BITS - 1; 171 static const unsigned int BITS = sizeof (vector_t) * 8; 172 static const unsigned int MASK = BITS - 1; 173 static_assert (PAGE_BITS == BITS, ""); 174 175 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } 176 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } 177 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } 178 179 vector_t v; 180 }; 181 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); 182 183 hb_object_header_t header; 184 bool successful; /* Allocations successful */ 185 mutable unsigned int population; 186 hb_vector_t<page_map_t, 1> page_map; 187 hb_vector_t<page_t, 1> pages; 188 189 inline void init_shallow (void) 190 { 191 successful = true; 192 population = 0; 193 page_map.init (); 194 pages.init (); 195 } 196 inline void init (void) 197 { 198 hb_object_init (this); 199 init_shallow (); 200 } 201 inline void fini_shallow (void) 202 { 203 page_map.fini (); 204 pages.fini (); 205 } 206 inline void fini (void) 207 { 208 hb_object_fini (this); 209 fini_shallow (); 210 } 211 212 inline bool resize (unsigned int count) 213 { 214 if (unlikely (!successful)) return false; 215 if (!pages.resize (count) || !page_map.resize (count)) 216 { 217 pages.resize (page_map.len); 218 successful = false; 219 return false; 220 } 221 return true; 222 } 223 224 inline void clear (void) { 225 if (unlikely (hb_object_is_inert (this))) 226 return; 227 successful = true; 228 population = 0; 229 page_map.resize (0); 230 pages.resize (0); 231 } 232 inline bool is_empty (void) const { 233 unsigned int count = pages.len; 234 for (unsigned int i = 0; i < count; i++) 235 if (!pages[i].is_empty ()) 236 return false; 237 return true; 238 } 239 240 inline void dirty (void) { population = (unsigned int) -1; } 241 242 inline void add (hb_codepoint_t g) 243 { 244 if (unlikely (!successful)) return; 245 if (unlikely (g == INVALID)) return; 246 dirty (); 247 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 248 page->add (g); 249 } 250 inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) 251 { 252 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 253 if (unlikely (a > b || a == INVALID || b == INVALID)) return false; 254 dirty (); 255 unsigned int ma = get_major (a); 256 unsigned int mb = get_major (b); 257 if (ma == mb) 258 { 259 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 260 page->add_range (a, b); 261 } 262 else 263 { 264 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 265 page->add_range (a, major_start (ma + 1) - 1); 266 267 for (unsigned int m = ma + 1; m < mb; m++) 268 { 269 page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; 270 page->init1 (); 271 } 272 273 page = page_for_insert (b); if (unlikely (!page)) return false; 274 page->add_range (major_start (mb), b); 275 } 276 return true; 277 } 278 279 template <typename T> 280 inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 281 { 282 if (unlikely (!successful)) return; 283 if (!count) return; 284 dirty (); 285 hb_codepoint_t g = *array; 286 while (count) 287 { 288 unsigned int m = get_major (g); 289 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 290 unsigned int start = major_start (m); 291 unsigned int end = major_start (m + 1); 292 do 293 { 294 page->add (g); 295 296 array = (const T *) ((const char *) array + stride); 297 count--; 298 } 299 while (count && (g = *array, start <= g && g < end)); 300 } 301 } 302 303 /* Might return false if array looks unsorted. 304 * Used for faster rejection of corrupt data. */ 305 template <typename T> 306 inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 307 { 308 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 309 if (!count) return true; 310 dirty (); 311 hb_codepoint_t g = *array; 312 hb_codepoint_t last_g = g; 313 while (count) 314 { 315 unsigned int m = get_major (g); 316 page_t *page = page_for_insert (g); if (unlikely (!page)) return false; 317 unsigned int end = major_start (m + 1); 318 do 319 { 320 /* If we try harder we can change the following comparison to <=; 321 * Not sure if it's worth it. */ 322 if (g < last_g) return false; 323 last_g = g; 324 page->add (g); 325 326 array = (const T *) ((const char *) array + stride); 327 count--; 328 } 329 while (count && (g = *array, g < end)); 330 } 331 return true; 332 } 333 334 inline void del (hb_codepoint_t g) 335 { 336 /* TODO perform op even if !successful. */ 337 if (unlikely (!successful)) return; 338 page_t *p = page_for (g); 339 if (!p) 340 return; 341 dirty (); 342 p->del (g); 343 } 344 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 345 { 346 /* TODO perform op even if !successful. */ 347 /* TODO Optimize, like add_range(). */ 348 if (unlikely (!successful)) return; 349 for (unsigned int i = a; i < b + 1; i++) 350 del (i); 351 } 352 inline bool has (hb_codepoint_t g) const 353 { 354 const page_t *p = page_for (g); 355 if (!p) 356 return false; 357 return p->has (g); 358 } 359 inline bool intersects (hb_codepoint_t first, 360 hb_codepoint_t last) const 361 { 362 hb_codepoint_t c = first - 1; 363 return next (&c) && c <= last; 364 } 365 inline void set (const hb_set_t *other) 366 { 367 if (unlikely (!successful)) return; 368 unsigned int count = other->pages.len; 369 if (!resize (count)) 370 return; 371 population = other->population; 372 memcpy (pages.arrayZ, other->pages.arrayZ, count * sizeof (pages.arrayZ[0])); 373 memcpy (page_map.arrayZ, other->page_map.arrayZ, count * sizeof (page_map.arrayZ[0])); 374 } 375 376 inline bool is_equal (const hb_set_t *other) const 377 { 378 if (get_population () != other->get_population ()) 379 return false; 380 381 unsigned int na = pages.len; 382 unsigned int nb = other->pages.len; 383 384 unsigned int a = 0, b = 0; 385 for (; a < na && b < nb; ) 386 { 387 if (page_at (a).is_empty ()) { a++; continue; } 388 if (other->page_at (b).is_empty ()) { b++; continue; } 389 if (page_map[a].major != other->page_map[b].major || 390 !page_at (a).is_equal (&other->page_at (b))) 391 return false; 392 a++; 393 b++; 394 } 395 for (; a < na; a++) 396 if (!page_at (a).is_empty ()) { return false; } 397 for (; b < nb; b++) 398 if (!other->page_at (b).is_empty ()) { return false; } 399 400 return true; 401 } 402 403 inline bool is_subset (const hb_set_t *larger_set) const 404 { 405 if (get_population () > larger_set->get_population ()) 406 return false; 407 408 hb_codepoint_t c = INVALID; 409 while (next (&c)) 410 if (!larger_set->has (c)) 411 return false; 412 413 return true; 414 } 415 416 template <class Op> 417 inline void process (const hb_set_t *other) 418 { 419 if (unlikely (!successful)) return; 420 421 dirty (); 422 423 unsigned int na = pages.len; 424 unsigned int nb = other->pages.len; 425 unsigned int next_page = na; 426 427 unsigned int count = 0, newCount = 0; 428 unsigned int a = 0, b = 0; 429 for (; a < na && b < nb; ) 430 { 431 if (page_map[a].major == other->page_map[b].major) 432 { 433 count++; 434 a++; 435 b++; 436 } 437 else if (page_map[a].major < other->page_map[b].major) 438 { 439 if (Op::passthru_left) 440 count++; 441 a++; 442 } 443 else 444 { 445 if (Op::passthru_right) 446 count++; 447 b++; 448 } 449 } 450 if (Op::passthru_left) 451 count += na - a; 452 if (Op::passthru_right) 453 count += nb - b; 454 455 if (count > pages.len) 456 if (!resize (count)) 457 return; 458 newCount = count; 459 460 /* Process in-place backward. */ 461 a = na; 462 b = nb; 463 for (; a && b; ) 464 { 465 if (page_map[a - 1].major == other->page_map[b - 1].major) 466 { 467 a--; 468 b--; 469 count--; 470 page_map[count] = page_map[a]; 471 Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v); 472 } 473 else if (page_map[a - 1].major > other->page_map[b - 1].major) 474 { 475 a--; 491 } 492 } 493 } 494 if (Op::passthru_left) 495 while (a) 496 { 497 a--; 498 count--; 499 page_map[count] = page_map [a]; 500 } 501 if (Op::passthru_right) 502 while (b) 503 { 504 b--; 505 count--; 506 page_map[count].major = other->page_map[b].major; 507 page_map[count].index = next_page++; 508 page_at (count).v = other->page_at (b).v; 509 } 510 assert (!count); 511 if (pages.len > newCount) 512 resize (newCount); 513 } 514 515 inline void union_ (const hb_set_t *other) 516 { 517 process<HbOpOr> (other); 518 } 519 inline void intersect (const hb_set_t *other) 520 { 521 process<HbOpAnd> (other); 522 } 523 inline void subtract (const hb_set_t *other) 524 { 525 process<HbOpMinus> (other); 526 } 527 inline void symmetric_difference (const hb_set_t *other) 528 { 529 process<HbOpXor> (other); 530 } 531 inline bool next (hb_codepoint_t *codepoint) const 532 { 533 if (unlikely (*codepoint == INVALID)) { 534 *codepoint = get_min (); 535 return *codepoint != INVALID; 536 } 537 538 page_map_t map = {get_major (*codepoint), 0}; 539 unsigned int i; 540 page_map.bfind (map, &i); 541 if (i < page_map.len && page_map[i].major == map.major) 542 { 543 if (pages[page_map[i].index].next (codepoint)) 544 { 545 *codepoint += page_map[i].major * page_t::PAGE_BITS; 546 return true; 547 } 548 i++; 549 } 550 for (; i < page_map.len; i++) 551 { 552 hb_codepoint_t m = pages[page_map[i].index].get_min (); 553 if (m != INVALID) 554 { 555 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 556 return true; 557 } 558 } 559 *codepoint = INVALID; 560 return false; 561 } 562 inline bool previous (hb_codepoint_t *codepoint) const 563 { 564 if (unlikely (*codepoint == INVALID)) { 565 *codepoint = get_max (); 566 return *codepoint != INVALID; 567 } 568 569 page_map_t map = {get_major (*codepoint), 0}; 570 unsigned int i; 571 page_map.bfind (map, &i); 572 if (i < page_map.len && page_map[i].major == map.major) 573 { 574 if (pages[page_map[i].index].previous (codepoint)) 575 { 576 *codepoint += page_map[i].major * page_t::PAGE_BITS; 577 return true; 578 } 579 } 580 i--; 581 for (; (int) i >= 0; i--) 582 { 583 hb_codepoint_t m = pages[page_map[i].index].get_max (); 584 if (m != INVALID) 585 { 586 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 587 return true; 588 } 589 } 590 *codepoint = INVALID; 591 return false; 592 } 593 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 594 { 595 hb_codepoint_t i; 596 597 i = *last; 598 if (!next (&i)) 599 { 600 *last = *first = INVALID; 601 return false; 602 } 603 604 /* TODO Speed up. */ 605 *last = *first = i; 606 while (next (&i) && i == *last + 1) 607 (*last)++; 608 609 return true; 610 } 611 inline bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const 612 { 613 hb_codepoint_t i; 614 615 i = *first; 616 if (!previous (&i)) 617 { 618 *last = *first = INVALID; 619 return false; 620 } 621 622 /* TODO Speed up. */ 623 *last = *first = i; 624 while (previous (&i) && i == *first - 1) 625 (*first)--; 626 627 return true; 628 } 629 630 inline unsigned int get_population (void) const 631 { 632 if (population != (unsigned int) -1) 633 return population; 634 635 unsigned int pop = 0; 636 unsigned int count = pages.len; 637 for (unsigned int i = 0; i < count; i++) 638 pop += pages[i].get_population (); 639 640 population = pop; 641 return pop; 642 } 643 inline hb_codepoint_t get_min (void) const 644 { 645 unsigned int count = pages.len; 646 for (unsigned int i = 0; i < count; i++) 647 if (!page_at (i).is_empty ()) 648 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); 649 return INVALID; 650 } 651 inline hb_codepoint_t get_max (void) const 652 { 653 unsigned int count = pages.len; 654 for (int i = count - 1; i >= 0; i++) 655 if (!page_at (i).is_empty ()) 656 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max (); 657 return INVALID; 658 } 659 660 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 661 662 inline page_t *page_for_insert (hb_codepoint_t g) 663 { 664 page_map_t map = {get_major (g), pages.len}; 665 unsigned int i; 666 if (!page_map.bfind (map, &i)) 667 { 668 if (!resize (pages.len + 1)) 669 return nullptr; 670 671 pages[map.index].init0 (); 672 memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0])); 673 page_map[i] = map; 674 } 675 return &pages[page_map[i].index]; 676 } 677 inline page_t *page_for (hb_codepoint_t g) 678 { 679 page_map_t key = {get_major (g)}; 680 const page_map_t *found = page_map.bsearch (key); 681 if (found) 682 return &pages[found->index]; 683 return nullptr; 684 } 685 inline const page_t *page_for (hb_codepoint_t g) const 686 { 687 page_map_t key = {get_major (g)}; 688 const page_map_t *found = page_map.bsearch (key); 689 if (found) 690 return &pages[found->index]; 691 return nullptr; 692 } 693 inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } 694 inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } 695 inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } 696 inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } 697 }; 698 699 700 #endif /* HB_SET_PRIVATE_HH */ | 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_SET_HH 28 #define HB_SET_HH 29 30 #include "hb.hh" 31 32 33 /* 34 * hb_set_t 35 */ 36 37 /* TODO Keep a free-list so we can free pages that are completely zeroed. At that 38 * point maybe also use a sentinel value for "all-1" pages? */ 39 40 struct hb_set_t 41 { 42 HB_NO_COPY_ASSIGN (hb_set_t); 43 hb_set_t () { init (); } 44 ~hb_set_t () { fini (); } 45 46 struct page_map_t 47 { 48 int cmp (const page_map_t &o) const { return (int) o.major - (int) major; } 49 50 uint32_t major; 51 uint32_t index; 52 }; 53 54 struct page_t 55 { 56 void init0 () { v.clear (); } 57 void init1 () { v.clear (0xFF); } 58 59 unsigned int len () const 60 { return ARRAY_LENGTH_CONST (v); } 61 62 bool is_empty () const 63 { 64 for (unsigned int i = 0; i < len (); i++) 65 if (v[i]) 66 return false; 67 return true; 68 } 69 70 void add (hb_codepoint_t g) { elt (g) |= mask (g); } 71 void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } 72 bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } 73 74 void add_range (hb_codepoint_t a, hb_codepoint_t b) 75 { 76 elt_t *la = &elt (a); 77 elt_t *lb = &elt (b); 78 if (la == lb) 79 *la |= (mask (b) << 1) - mask(a); 80 else 81 { 82 *la |= ~(mask (a) - 1); 83 la++; 84 85 memset (la, 0xff, (char *) lb - (char *) la); 86 87 *lb |= ((mask (b) << 1) - 1); 88 } 89 } 90 91 bool is_equal (const page_t *other) const 92 { 93 return 0 == hb_memcmp (&v, &other->v, sizeof (v)); 94 } 95 96 unsigned int get_population () const 97 { 98 unsigned int pop = 0; 99 for (unsigned int i = 0; i < len (); i++) 100 pop += hb_popcount (v[i]); 101 return pop; 102 } 103 104 bool next (hb_codepoint_t *codepoint) const 105 { 106 unsigned int m = (*codepoint + 1) & MASK; 107 if (!m) 108 { 109 *codepoint = INVALID; 110 return false; 111 } 112 unsigned int i = m / ELT_BITS; 113 unsigned int j = m & ELT_MASK; 114 115 const elt_t vv = v[i] & ~((elt_t (1) << j) - 1); 116 for (const elt_t *p = &vv; i < len (); p = &v[++i]) 117 if (*p) 118 { 119 *codepoint = i * ELT_BITS + elt_get_min (*p); 120 return true; 121 } 122 123 *codepoint = INVALID; 124 return false; 125 } 126 bool previous (hb_codepoint_t *codepoint) const 127 { 128 unsigned int m = (*codepoint - 1) & MASK; 129 if (m == MASK) 130 { 131 *codepoint = INVALID; 132 return false; 133 } 134 unsigned int i = m / ELT_BITS; 135 unsigned int j = m & ELT_MASK; 136 137 const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1); 138 for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i]) 139 if (*p) 140 { 141 *codepoint = i * ELT_BITS + elt_get_max (*p); 142 return true; 143 } 144 145 *codepoint = INVALID; 146 return false; 147 } 148 hb_codepoint_t get_min () const 149 { 150 for (unsigned int i = 0; i < len (); i++) 151 if (v[i]) 152 return i * ELT_BITS + elt_get_min (v[i]); 153 return INVALID; 154 } 155 hb_codepoint_t get_max () const 156 { 157 for (int i = len () - 1; i >= 0; i--) 158 if (v[i]) 159 return i * ELT_BITS + elt_get_max (v[i]); 160 return 0; 161 } 162 163 typedef unsigned long long elt_t; 164 static constexpr unsigned PAGE_BITS = 512; 165 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); 166 167 static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); } 168 static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; } 169 170 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; 171 172 static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8; 173 static constexpr unsigned ELT_MASK = ELT_BITS - 1; 174 static constexpr unsigned BITS = sizeof (vector_t) * 8; 175 static constexpr unsigned MASK = BITS - 1; 176 static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, ""); 177 178 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } 179 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } 180 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } 181 182 vector_t v; 183 }; 184 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); 185 186 hb_object_header_t header; 187 bool successful; /* Allocations successful */ 188 mutable unsigned int population; 189 hb_vector_t<page_map_t> page_map; 190 hb_vector_t<page_t> pages; 191 192 void init_shallow () 193 { 194 successful = true; 195 population = 0; 196 page_map.init (); 197 pages.init (); 198 } 199 void init () 200 { 201 hb_object_init (this); 202 init_shallow (); 203 } 204 void fini_shallow () 205 { 206 population = 0; 207 page_map.fini (); 208 pages.fini (); 209 } 210 void fini () 211 { 212 hb_object_fini (this); 213 fini_shallow (); 214 } 215 216 bool in_error () const { return !successful; } 217 218 bool resize (unsigned int count) 219 { 220 if (unlikely (!successful)) return false; 221 if (!pages.resize (count) || !page_map.resize (count)) 222 { 223 pages.resize (page_map.length); 224 successful = false; 225 return false; 226 } 227 return true; 228 } 229 230 void clear () 231 { 232 if (unlikely (hb_object_is_immutable (this))) 233 return; 234 successful = true; 235 population = 0; 236 page_map.resize (0); 237 pages.resize (0); 238 } 239 bool is_empty () const 240 { 241 unsigned int count = pages.length; 242 for (unsigned int i = 0; i < count; i++) 243 if (!pages[i].is_empty ()) 244 return false; 245 return true; 246 } 247 248 void dirty () { population = (unsigned int) -1; } 249 250 void add (hb_codepoint_t g) 251 { 252 if (unlikely (!successful)) return; 253 if (unlikely (g == INVALID)) return; 254 dirty (); 255 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 256 page->add (g); 257 } 258 bool add_range (hb_codepoint_t a, hb_codepoint_t b) 259 { 260 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 261 if (unlikely (a > b || a == INVALID || b == INVALID)) return false; 262 dirty (); 263 unsigned int ma = get_major (a); 264 unsigned int mb = get_major (b); 265 if (ma == mb) 266 { 267 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 268 page->add_range (a, b); 269 } 270 else 271 { 272 page_t *page = page_for_insert (a); if (unlikely (!page)) return false; 273 page->add_range (a, major_start (ma + 1) - 1); 274 275 for (unsigned int m = ma + 1; m < mb; m++) 276 { 277 page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; 278 page->init1 (); 279 } 280 281 page = page_for_insert (b); if (unlikely (!page)) return false; 282 page->add_range (major_start (mb), b); 283 } 284 return true; 285 } 286 287 template <typename T> 288 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 289 { 290 if (unlikely (!successful)) return; 291 if (!count) return; 292 dirty (); 293 hb_codepoint_t g = *array; 294 while (count) 295 { 296 unsigned int m = get_major (g); 297 page_t *page = page_for_insert (g); if (unlikely (!page)) return; 298 unsigned int start = major_start (m); 299 unsigned int end = major_start (m + 1); 300 do 301 { 302 page->add (g); 303 304 array = (const T *) ((const char *) array + stride); 305 count--; 306 } 307 while (count && (g = *array, start <= g && g < end)); 308 } 309 } 310 311 /* Might return false if array looks unsorted. 312 * Used for faster rejection of corrupt data. */ 313 template <typename T> 314 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) 315 { 316 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ 317 if (!count) return true; 318 dirty (); 319 hb_codepoint_t g = *array; 320 hb_codepoint_t last_g = g; 321 while (count) 322 { 323 unsigned int m = get_major (g); 324 page_t *page = page_for_insert (g); if (unlikely (!page)) return false; 325 unsigned int end = major_start (m + 1); 326 do 327 { 328 /* If we try harder we can change the following comparison to <=; 329 * Not sure if it's worth it. */ 330 if (g < last_g) return false; 331 last_g = g; 332 page->add (g); 333 334 array = (const T *) ((const char *) array + stride); 335 count--; 336 } 337 while (count && (g = *array, g < end)); 338 } 339 return true; 340 } 341 342 void del (hb_codepoint_t g) 343 { 344 /* TODO perform op even if !successful. */ 345 if (unlikely (!successful)) return; 346 page_t *page = page_for (g); 347 if (!page) 348 return; 349 dirty (); 350 page->del (g); 351 } 352 void del_range (hb_codepoint_t a, hb_codepoint_t b) 353 { 354 /* TODO perform op even if !successful. */ 355 /* TODO Optimize, like add_range(). */ 356 if (unlikely (!successful)) return; 357 for (unsigned int i = a; i < b + 1; i++) 358 del (i); 359 } 360 bool has (hb_codepoint_t g) const 361 { 362 const page_t *page = page_for (g); 363 if (!page) 364 return false; 365 return page->has (g); 366 } 367 bool intersects (hb_codepoint_t first, 368 hb_codepoint_t last) const 369 { 370 hb_codepoint_t c = first - 1; 371 return next (&c) && c <= last; 372 } 373 void set (const hb_set_t *other) 374 { 375 if (unlikely (!successful)) return; 376 unsigned int count = other->pages.length; 377 if (!resize (count)) 378 return; 379 population = other->population; 380 memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size); 381 memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size); 382 } 383 384 bool is_equal (const hb_set_t *other) const 385 { 386 if (get_population () != other->get_population ()) 387 return false; 388 389 unsigned int na = pages.length; 390 unsigned int nb = other->pages.length; 391 392 unsigned int a = 0, b = 0; 393 for (; a < na && b < nb; ) 394 { 395 if (page_at (a).is_empty ()) { a++; continue; } 396 if (other->page_at (b).is_empty ()) { b++; continue; } 397 if (page_map[a].major != other->page_map[b].major || 398 !page_at (a).is_equal (&other->page_at (b))) 399 return false; 400 a++; 401 b++; 402 } 403 for (; a < na; a++) 404 if (!page_at (a).is_empty ()) { return false; } 405 for (; b < nb; b++) 406 if (!other->page_at (b).is_empty ()) { return false; } 407 408 return true; 409 } 410 411 bool is_subset (const hb_set_t *larger_set) const 412 { 413 if (get_population () > larger_set->get_population ()) 414 return false; 415 416 /* TODO Optimize to use pages. */ 417 hb_codepoint_t c = INVALID; 418 while (next (&c)) 419 if (!larger_set->has (c)) 420 return false; 421 422 return true; 423 } 424 425 template <class Op> 426 void process (const hb_set_t *other) 427 { 428 if (unlikely (!successful)) return; 429 430 dirty (); 431 432 unsigned int na = pages.length; 433 unsigned int nb = other->pages.length; 434 unsigned int next_page = na; 435 436 unsigned int count = 0, newCount = 0; 437 unsigned int a = 0, b = 0; 438 for (; a < na && b < nb; ) 439 { 440 if (page_map[a].major == other->page_map[b].major) 441 { 442 count++; 443 a++; 444 b++; 445 } 446 else if (page_map[a].major < other->page_map[b].major) 447 { 448 if (Op::passthru_left) 449 count++; 450 a++; 451 } 452 else 453 { 454 if (Op::passthru_right) 455 count++; 456 b++; 457 } 458 } 459 if (Op::passthru_left) 460 count += na - a; 461 if (Op::passthru_right) 462 count += nb - b; 463 464 if (count > pages.length) 465 if (!resize (count)) 466 return; 467 newCount = count; 468 469 /* Process in-place backward. */ 470 a = na; 471 b = nb; 472 for (; a && b; ) 473 { 474 if (page_map[a - 1].major == other->page_map[b - 1].major) 475 { 476 a--; 477 b--; 478 count--; 479 page_map[count] = page_map[a]; 480 Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v); 481 } 482 else if (page_map[a - 1].major > other->page_map[b - 1].major) 483 { 484 a--; 500 } 501 } 502 } 503 if (Op::passthru_left) 504 while (a) 505 { 506 a--; 507 count--; 508 page_map[count] = page_map [a]; 509 } 510 if (Op::passthru_right) 511 while (b) 512 { 513 b--; 514 count--; 515 page_map[count].major = other->page_map[b].major; 516 page_map[count].index = next_page++; 517 page_at (count).v = other->page_at (b).v; 518 } 519 assert (!count); 520 if (pages.length > newCount) 521 resize (newCount); 522 } 523 524 void union_ (const hb_set_t *other) 525 { 526 process<HbOpOr> (other); 527 } 528 void intersect (const hb_set_t *other) 529 { 530 process<HbOpAnd> (other); 531 } 532 void subtract (const hb_set_t *other) 533 { 534 process<HbOpMinus> (other); 535 } 536 void symmetric_difference (const hb_set_t *other) 537 { 538 process<HbOpXor> (other); 539 } 540 bool next (hb_codepoint_t *codepoint) const 541 { 542 if (unlikely (*codepoint == INVALID)) { 543 *codepoint = get_min (); 544 return *codepoint != INVALID; 545 } 546 547 page_map_t map = {get_major (*codepoint), 0}; 548 unsigned int i; 549 page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); 550 if (i < page_map.length && page_map[i].major == map.major) 551 { 552 if (pages[page_map[i].index].next (codepoint)) 553 { 554 *codepoint += page_map[i].major * page_t::PAGE_BITS; 555 return true; 556 } 557 i++; 558 } 559 for (; i < page_map.length; i++) 560 { 561 hb_codepoint_t m = pages[page_map[i].index].get_min (); 562 if (m != INVALID) 563 { 564 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 565 return true; 566 } 567 } 568 *codepoint = INVALID; 569 return false; 570 } 571 bool previous (hb_codepoint_t *codepoint) const 572 { 573 if (unlikely (*codepoint == INVALID)) { 574 *codepoint = get_max (); 575 return *codepoint != INVALID; 576 } 577 578 page_map_t map = {get_major (*codepoint), 0}; 579 unsigned int i; 580 page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); 581 if (i < page_map.length && page_map[i].major == map.major) 582 { 583 if (pages[page_map[i].index].previous (codepoint)) 584 { 585 *codepoint += page_map[i].major * page_t::PAGE_BITS; 586 return true; 587 } 588 } 589 i--; 590 for (; (int) i >= 0; i--) 591 { 592 hb_codepoint_t m = pages[page_map[i].index].get_max (); 593 if (m != INVALID) 594 { 595 *codepoint = page_map[i].major * page_t::PAGE_BITS + m; 596 return true; 597 } 598 } 599 *codepoint = INVALID; 600 return false; 601 } 602 bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 603 { 604 hb_codepoint_t i; 605 606 i = *last; 607 if (!next (&i)) 608 { 609 *last = *first = INVALID; 610 return false; 611 } 612 613 /* TODO Speed up. */ 614 *last = *first = i; 615 while (next (&i) && i == *last + 1) 616 (*last)++; 617 618 return true; 619 } 620 bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const 621 { 622 hb_codepoint_t i; 623 624 i = *first; 625 if (!previous (&i)) 626 { 627 *last = *first = INVALID; 628 return false; 629 } 630 631 /* TODO Speed up. */ 632 *last = *first = i; 633 while (previous (&i) && i == *first - 1) 634 (*first)--; 635 636 return true; 637 } 638 639 unsigned int get_population () const 640 { 641 if (population != (unsigned int) -1) 642 return population; 643 644 unsigned int pop = 0; 645 unsigned int count = pages.length; 646 for (unsigned int i = 0; i < count; i++) 647 pop += pages[i].get_population (); 648 649 population = pop; 650 return pop; 651 } 652 hb_codepoint_t get_min () const 653 { 654 unsigned int count = pages.length; 655 for (unsigned int i = 0; i < count; i++) 656 if (!page_at (i).is_empty ()) 657 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); 658 return INVALID; 659 } 660 hb_codepoint_t get_max () const 661 { 662 unsigned int count = pages.length; 663 for (int i = count - 1; i >= 0; i++) 664 if (!page_at (i).is_empty ()) 665 return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max (); 666 return INVALID; 667 } 668 669 static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 670 671 /* 672 * Iterator implementation. 673 */ 674 struct const_iter_t : hb_sorted_iter_t<const_iter_t, const hb_codepoint_t> 675 { 676 const_iter_t (const hb_set_t &s_) : 677 s (s_), v (INVALID), l (s.get_population () + 1) { __next__ (); } 678 679 typedef hb_codepoint_t __item_type__; 680 hb_codepoint_t __item__ () const { return v; } 681 bool __more__ () const { return v != INVALID; } 682 void __next__ () { s.next (&v); if (l) l--; } 683 void __prev__ () { s.previous (&v); } 684 unsigned __len__ () { return l; } 685 686 protected: 687 const hb_set_t &s; 688 hb_codepoint_t v; 689 unsigned l; 690 }; 691 const_iter_t const_iter () const { return const_iter_t (*this); } 692 operator const_iter_t () const { return const_iter (); } 693 typedef const_iter_t iter_t; 694 iter_t iter () const { return const_iter (); } 695 696 protected: 697 698 page_t *page_for_insert (hb_codepoint_t g) 699 { 700 page_map_t map = {get_major (g), pages.length}; 701 unsigned int i; 702 if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST)) 703 { 704 if (!resize (pages.length + 1)) 705 return nullptr; 706 707 pages[map.index].init0 (); 708 memmove (page_map + i + 1, 709 page_map + i, 710 (page_map.length - 1 - i) * page_map.item_size); 711 page_map[i] = map; 712 } 713 return &pages[page_map[i].index]; 714 } 715 page_t *page_for (hb_codepoint_t g) 716 { 717 page_map_t key = {get_major (g)}; 718 const page_map_t *found = page_map.bsearch (key); 719 if (found) 720 return &pages[found->index]; 721 return nullptr; 722 } 723 const page_t *page_for (hb_codepoint_t g) const 724 { 725 page_map_t key = {get_major (g)}; 726 const page_map_t *found = page_map.bsearch (key); 727 if (found) 728 return &pages[found->index]; 729 return nullptr; 730 } 731 page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } 732 const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } 733 unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } 734 hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } 735 }; 736 737 738 #endif /* HB_SET_HH */ |