1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. Oracle designates this 7 * particular file as subject to the "Classpath" exception as provided 8 * by Oracle in the LICENSE file that accompanied this code. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 // This file is available under and governed by the GNU General Public 26 // License version 2 only, as published by the Free Software Foundation. 27 // However, the following notice accompanied the original version of this 28 // file: 29 // 30 /* 31 * Copyright © 2012 Google, Inc. 32 * 33 * This is part of HarfBuzz, a text shaping library. 34 * 35 * Permission is hereby granted, without written agreement and without 36 * license or royalty fees, to use, copy, modify, and distribute this 37 * software and its documentation for any purpose, provided that the 38 * above copyright notice and the following two paragraphs appear in 39 * all copies of this software. 40 * 41 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 42 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 43 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 44 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 45 * DAMAGE. 46 * 47 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 48 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 49 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 50 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 51 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 52 * 53 * Google Author(s): Behdad Esfahbod 54 */ 55 56 #ifndef HB_SET_PRIVATE_HH 57 #define HB_SET_PRIVATE_HH 58 59 #include "hb-private.hh" 60 #include "hb-object-private.hh" 61 62 63 /* 64 * The set digests here implement various "filters" that support 65 * "approximate member query". Conceptually these are like Bloom 66 * Filter and Quotient Filter, however, much smaller, faster, and 67 * designed to fit the requirements of our uses for glyph coverage 68 * queries. 69 * 70 * Our filters are highly accurate if the lookup covers fairly local 71 * set of glyphs, but fully flooded and ineffective if coverage is 72 * all over the place. 73 * 74 * The frozen-set can be used instead of a digest, to trade more 75 * memory for 100% accuracy, but in practice, that doesn't look like 76 * an attractive trade-off. 77 */ 78 79 template <typename mask_t, unsigned int shift> 80 struct hb_set_digest_lowest_bits_t 81 { 82 ASSERT_POD (); 83 84 static const unsigned int mask_bytes = sizeof (mask_t); 85 static const unsigned int mask_bits = sizeof (mask_t) * 8; 86 static const unsigned int num_bits = 0 87 + (mask_bytes >= 1 ? 3 : 0) 88 + (mask_bytes >= 2 ? 1 : 0) 89 + (mask_bytes >= 4 ? 1 : 0) 90 + (mask_bytes >= 8 ? 1 : 0) 91 + (mask_bytes >= 16? 1 : 0) 92 + 0; 93 94 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8); 95 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8); 96 97 inline void init (void) { 98 mask = 0; 99 } 100 101 inline void add (hb_codepoint_t g) { 102 mask |= mask_for (g); 103 } 104 105 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 106 if ((b >> shift) - (a >> shift) >= mask_bits - 1) 107 mask = (mask_t) -1; 108 else { 109 mask_t ma = mask_for (a); 110 mask_t mb = mask_for (b); 111 mask |= mb + (mb - ma) - (mb < ma); 112 } 113 } 114 115 inline bool may_have (hb_codepoint_t g) const { 116 return !!(mask & mask_for (g)); 117 } 118 119 private: 120 121 static inline mask_t mask_for (hb_codepoint_t g) { 122 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); 123 } 124 mask_t mask; 125 }; 126 127 template <typename head_t, typename tail_t> 128 struct hb_set_digest_combiner_t 129 { 130 ASSERT_POD (); 131 132 inline void init (void) { 133 head.init (); 134 tail.init (); 135 } 136 137 inline void add (hb_codepoint_t g) { 138 head.add (g); 139 tail.add (g); 140 } 141 142 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) { 143 head.add_range (a, b); 144 tail.add_range (a, b); 145 } 146 147 inline bool may_have (hb_codepoint_t g) const { 148 return head.may_have (g) && tail.may_have (g); 149 } 150 151 private: 152 head_t head; 153 tail_t tail; 154 }; 155 156 157 /* 158 * hb_set_digest_t 159 * 160 * This is a combination of digests that performs "best". 161 * There is not much science to this: it's a result of intuition 162 * and testing. 163 */ 164 typedef hb_set_digest_combiner_t 165 < 166 hb_set_digest_lowest_bits_t<unsigned long, 4>, 167 hb_set_digest_combiner_t 168 < 169 hb_set_digest_lowest_bits_t<unsigned long, 0>, 170 hb_set_digest_lowest_bits_t<unsigned long, 9> 171 > 172 > hb_set_digest_t; 173 174 175 176 /* 177 * hb_set_t 178 */ 179 180 181 /* TODO Make this faster and memmory efficient. */ 182 183 struct hb_set_t 184 { 185 friend struct hb_frozen_set_t; 186 187 hb_object_header_t header; 188 ASSERT_POD (); 189 bool in_error; 190 191 inline void init (void) { 192 hb_object_init (this); 193 clear (); 194 } 195 inline void fini (void) { 196 } 197 inline void clear (void) { 198 if (unlikely (hb_object_is_inert (this))) 199 return; 200 in_error = false; 201 memset (elts, 0, sizeof elts); 202 } 203 inline bool is_empty (void) const { 204 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++) 205 if (elts[i]) 206 return false; 207 return true; 208 } 209 inline void add (hb_codepoint_t g) 210 { 211 if (unlikely (in_error)) return; 212 if (unlikely (g == INVALID)) return; 213 if (unlikely (g > MAX_G)) return; 214 elt (g) |= mask (g); 215 } 216 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) 217 { 218 if (unlikely (in_error)) return; 219 /* TODO Speedup */ 220 for (unsigned int i = a; i < b + 1; i++) 221 add (i); 222 } 223 inline void del (hb_codepoint_t g) 224 { 225 if (unlikely (in_error)) return; 226 if (unlikely (g > MAX_G)) return; 227 elt (g) &= ~mask (g); 228 } 229 inline void del_range (hb_codepoint_t a, hb_codepoint_t b) 230 { 231 if (unlikely (in_error)) return; 232 /* TODO Speedup */ 233 for (unsigned int i = a; i < b + 1; i++) 234 del (i); 235 } 236 inline bool has (hb_codepoint_t g) const 237 { 238 if (unlikely (g > MAX_G)) return false; 239 return !!(elt (g) & mask (g)); 240 } 241 inline bool intersects (hb_codepoint_t first, 242 hb_codepoint_t last) const 243 { 244 if (unlikely (first > MAX_G)) return false; 245 if (unlikely (last > MAX_G)) last = MAX_G; 246 unsigned int end = last + 1; 247 for (hb_codepoint_t i = first; i < end; i++) 248 if (has (i)) 249 return true; 250 return false; 251 } 252 inline bool is_equal (const hb_set_t *other) const 253 { 254 for (unsigned int i = 0; i < ELTS; i++) 255 if (elts[i] != other->elts[i]) 256 return false; 257 return true; 258 } 259 inline void set (const hb_set_t *other) 260 { 261 if (unlikely (in_error)) return; 262 for (unsigned int i = 0; i < ELTS; i++) 263 elts[i] = other->elts[i]; 264 } 265 inline void union_ (const hb_set_t *other) 266 { 267 if (unlikely (in_error)) return; 268 for (unsigned int i = 0; i < ELTS; i++) 269 elts[i] |= other->elts[i]; 270 } 271 inline void intersect (const hb_set_t *other) 272 { 273 if (unlikely (in_error)) return; 274 for (unsigned int i = 0; i < ELTS; i++) 275 elts[i] &= other->elts[i]; 276 } 277 inline void subtract (const hb_set_t *other) 278 { 279 if (unlikely (in_error)) return; 280 for (unsigned int i = 0; i < ELTS; i++) 281 elts[i] &= ~other->elts[i]; 282 } 283 inline void symmetric_difference (const hb_set_t *other) 284 { 285 if (unlikely (in_error)) return; 286 for (unsigned int i = 0; i < ELTS; i++) 287 elts[i] ^= other->elts[i]; 288 } 289 inline void invert (void) 290 { 291 if (unlikely (in_error)) return; 292 for (unsigned int i = 0; i < ELTS; i++) 293 elts[i] = ~elts[i]; 294 } 295 inline bool next (hb_codepoint_t *codepoint) const 296 { 297 if (unlikely (*codepoint == INVALID)) { 298 hb_codepoint_t i = get_min (); 299 if (i != INVALID) { 300 *codepoint = i; 301 return true; 302 } else { 303 *codepoint = INVALID; 304 return false; 305 } 306 } 307 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++) 308 if (has (i)) { 309 *codepoint = i; 310 return true; 311 } 312 *codepoint = INVALID; 313 return false; 314 } 315 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const 316 { 317 hb_codepoint_t i; 318 319 i = *last; 320 if (!next (&i)) 321 { 322 *last = *first = INVALID; 323 return false; 324 } 325 326 *last = *first = i; 327 while (next (&i) && i == *last + 1) 328 (*last)++; 329 330 return true; 331 } 332 333 inline unsigned int get_population (void) const 334 { 335 unsigned int count = 0; 336 for (unsigned int i = 0; i < ELTS; i++) 337 count += _hb_popcount32 (elts[i]); 338 return count; 339 } 340 inline hb_codepoint_t get_min (void) const 341 { 342 for (unsigned int i = 0; i < ELTS; i++) 343 if (elts[i]) 344 for (unsigned int j = 0; j < BITS; j++) 345 if (elts[i] & (1 << j)) 346 return i * BITS + j; 347 return INVALID; 348 } 349 inline hb_codepoint_t get_max (void) const 350 { 351 for (unsigned int i = ELTS; i; i--) 352 if (elts[i - 1]) 353 for (unsigned int j = BITS; j; j--) 354 if (elts[i - 1] & (1 << (j - 1))) 355 return (i - 1) * BITS + (j - 1); 356 return INVALID; 357 } 358 359 typedef uint32_t elt_t; 360 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */ 361 static const unsigned int SHIFT = 5; 362 static const unsigned int BITS = (1 << SHIFT); 363 static const unsigned int MASK = BITS - 1; 364 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS; 365 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; 366 367 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; } 368 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } 369 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 370 371 elt_t elts[ELTS]; /* XXX 8kb */ 372 373 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS); 374 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G); 375 }; 376 377 struct hb_frozen_set_t 378 { 379 static const unsigned int SHIFT = hb_set_t::SHIFT; 380 static const unsigned int BITS = hb_set_t::BITS; 381 static const unsigned int MASK = hb_set_t::MASK; 382 typedef hb_set_t::elt_t elt_t; 383 384 inline void init (const hb_set_t &set) 385 { 386 start = count = 0; 387 elts = NULL; 388 389 unsigned int max = set.get_max (); 390 if (max == set.INVALID) 391 return; 392 unsigned int min = set.get_min (); 393 const elt_t &min_elt = set.elt (min); 394 395 start = min & ~MASK; 396 count = max - start + 1; 397 unsigned int num_elts = (count + BITS - 1) / BITS; 398 unsigned int elts_size = num_elts * sizeof (elt_t); 399 elts = (elt_t *) malloc (elts_size); 400 if (unlikely (!elts)) 401 { 402 start = count = 0; 403 return; 404 } 405 memcpy (elts, &min_elt, elts_size); 406 } 407 408 inline void fini (void) 409 { 410 if (elts) 411 free (elts); 412 } 413 414 inline bool has (hb_codepoint_t g) const 415 { 416 /* hb_codepoint_t is unsigned. */ 417 g -= start; 418 if (unlikely (g > count)) return false; 419 return !!(elt (g) & mask (g)); 420 } 421 422 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; } 423 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); } 424 425 private: 426 hb_codepoint_t start, count; 427 elt_t *elts; 428 }; 429 430 431 #endif /* HB_SET_PRIVATE_HH */