1 /*
   2  * Copyright © 2009,2010  Red Hat, Inc.
   3  * Copyright © 2011,2012  Google, Inc.
   4  *
   5  *  This is part of HarfBuzz, a text shaping library.
   6  *
   7  * Permission is hereby granted, without written agreement and without
   8  * license or royalty fees, to use, copy, modify, and distribute this
   9  * software and its documentation for any purpose, provided that the
  10  * above copyright notice and the following two paragraphs appear in
  11  * all copies of this software.
  12  *
  13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  17  * DAMAGE.
  18  *
  19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  24  *
  25  * Red Hat Author(s): Behdad Esfahbod
  26  * Google Author(s): Behdad Esfahbod
  27  */
  28 
  29 #include "hb-private.hh"
  30 
  31 #include "hb-mutex-private.hh"
  32 #include "hb-object-private.hh"
  33 
  34 #include <locale.h>
  35 
  36 
  37 /* hb_options_t */
  38 
  39 hb_options_union_t _hb_options;
  40 
  41 void
  42 _hb_options_init (void)
  43 {
  44   hb_options_union_t u;
  45   u.i = 0;
  46   u.opts.initialized = 1;
  47 
  48   char *c = getenv ("HB_OPTIONS");
  49   u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
  50 
  51   /* This is idempotent and threadsafe. */
  52   _hb_options = u;
  53 }
  54 
  55 
  56 /* hb_tag_t */
  57 
  58 /**
  59  * hb_tag_from_string:
  60  * @str: (array length=len) (element-type uint8_t): 
  61  * @len: 
  62  *
  63  * 
  64  *
  65  * Return value: 
  66  *
  67  * Since: 0.9.2
  68  **/
  69 hb_tag_t
  70 hb_tag_from_string (const char *str, int len)
  71 {
  72   char tag[4];
  73   unsigned int i;
  74 
  75   if (!str || !len || !*str)
  76     return HB_TAG_NONE;
  77 
  78   if (len < 0 || len > 4)
  79     len = 4;
  80   for (i = 0; i < (unsigned) len && str[i]; i++)
  81     tag[i] = str[i];
  82   for (; i < 4; i++)
  83     tag[i] = ' ';
  84 
  85   return HB_TAG_CHAR4 (tag);
  86 }
  87 
  88 /**
  89  * hb_tag_to_string:
  90  * @tag: 
  91  * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t): 
  92  *
  93  * 
  94  *
  95  * Since: 0.9.5
  96  **/
  97 void
  98 hb_tag_to_string (hb_tag_t tag, char *buf)
  99 {
 100   buf[0] = (char) (uint8_t) (tag >> 24);
 101   buf[1] = (char) (uint8_t) (tag >> 16);
 102   buf[2] = (char) (uint8_t) (tag >>  8);
 103   buf[3] = (char) (uint8_t) (tag >>  0);
 104 }
 105 
 106 
 107 /* hb_direction_t */
 108 
 109 const char direction_strings[][4] = {
 110   "ltr",
 111   "rtl",
 112   "ttb",
 113   "btt"
 114 };
 115 
 116 /**
 117  * hb_direction_from_string:
 118  * @str: (array length=len) (element-type uint8_t): 
 119  * @len: 
 120  *
 121  * 
 122  *
 123  * Return value: 
 124  *
 125  * Since: 0.9.2
 126  **/
 127 hb_direction_t
 128 hb_direction_from_string (const char *str, int len)
 129 {
 130   if (unlikely (!str || !len || !*str))
 131     return HB_DIRECTION_INVALID;
 132 
 133   /* Lets match loosely: just match the first letter, such that
 134    * all of "ltr", "left-to-right", etc work!
 135    */
 136   char c = TOLOWER (str[0]);
 137   for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
 138     if (c == direction_strings[i][0])
 139       return (hb_direction_t) (HB_DIRECTION_LTR + i);
 140 
 141   return HB_DIRECTION_INVALID;
 142 }
 143 
 144 /**
 145  * hb_direction_to_string:
 146  * @direction: 
 147  *
 148  * 
 149  *
 150  * Return value: (transfer none): 
 151  *
 152  * Since: 0.9.2
 153  **/
 154 const char *
 155 hb_direction_to_string (hb_direction_t direction)
 156 {
 157   if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
 158               < ARRAY_LENGTH (direction_strings)))
 159     return direction_strings[direction - HB_DIRECTION_LTR];
 160 
 161   return "invalid";
 162 }
 163 
 164 
 165 /* hb_language_t */
 166 
 167 struct hb_language_impl_t {
 168   const char s[1];
 169 };
 170 
 171 static const char canon_map[256] = {
 172    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 173    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 174    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
 175   '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
 176   '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 177   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
 178    0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 179   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
 180 };
 181 
 182 static bool
 183 lang_equal (hb_language_t  v1,
 184             const void    *v2)
 185 {
 186   const unsigned char *p1 = (const unsigned char *) v1;
 187   const unsigned char *p2 = (const unsigned char *) v2;
 188 
 189   while (*p1 && *p1 == canon_map[*p2])
 190     p1++, p2++;
 191 
 192   return *p1 == canon_map[*p2];
 193 }
 194 
 195 #if 0
 196 static unsigned int
 197 lang_hash (const void *key)
 198 {
 199   const unsigned char *p = key;
 200   unsigned int h = 0;
 201   while (canon_map[*p])
 202     {
 203       h = (h << 5) - h + canon_map[*p];
 204       p++;
 205     }
 206 
 207   return h;
 208 }
 209 #endif
 210 
 211 
 212 struct hb_language_item_t {
 213 
 214   struct hb_language_item_t *next;
 215   hb_language_t lang;
 216 
 217   inline bool operator == (const char *s) const {
 218     return lang_equal (lang, s);
 219   }
 220 
 221   inline hb_language_item_t & operator = (const char *s) {
 222     lang = (hb_language_t) strdup (s);
 223     for (unsigned char *p = (unsigned char *) lang; *p; p++)
 224       *p = canon_map[*p];
 225 
 226     return *this;
 227   }
 228 
 229   void finish (void) { free ((void *) lang); }
 230 };
 231 
 232 
 233 /* Thread-safe lock-free language list */
 234 
 235 static hb_language_item_t *langs;
 236 
 237 #ifdef HB_USE_ATEXIT
 238 static
 239 void free_langs (void)
 240 {
 241   while (langs) {
 242     hb_language_item_t *next = langs->next;
 243     langs->finish ();
 244     free (langs);
 245     langs = next;
 246   }
 247 }
 248 #endif
 249 
 250 static hb_language_item_t *
 251 lang_find_or_insert (const char *key)
 252 {
 253 retry:
 254   hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
 255 
 256   for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
 257     if (*lang == key)
 258       return lang;
 259 
 260   /* Not found; allocate one. */
 261   hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
 262   if (unlikely (!lang))
 263     return NULL;
 264   lang->next = first_lang;
 265   *lang = key;
 266 
 267   if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
 268     lang->finish ();
 269     free (lang);
 270     goto retry;
 271   }
 272 
 273 #ifdef HB_USE_ATEXIT
 274   if (!first_lang)
 275     atexit (free_langs); /* First person registers atexit() callback. */
 276 #endif
 277 
 278   return lang;
 279 }
 280 
 281 
 282 /**
 283  * hb_language_from_string:
 284  * @str: (array length=len) (element-type uint8_t): a string representing
 285  *       ISO 639 language code
 286  * @len: length of the @str, or -1 if it is %NULL-terminated.
 287  *
 288  * Converts @str representing an ISO 639 language code to the corresponding
 289  * #hb_language_t.
 290  *
 291  * Return value: (transfer none):
 292  * The #hb_language_t corresponding to the ISO 639 language code.
 293  *
 294  * Since: 0.9.2
 295  **/
 296 hb_language_t
 297 hb_language_from_string (const char *str, int len)
 298 {
 299   if (!str || !len || !*str)
 300     return HB_LANGUAGE_INVALID;
 301 
 302   hb_language_item_t *item = NULL;
 303   if (len >= 0)
 304   {
 305     /* NUL-terminate it. */
 306     char strbuf[64];
 307     len = MIN (len, (int) sizeof (strbuf) - 1);
 308     memcpy (strbuf, str, len);
 309     strbuf[len] = '\0';
 310     item = lang_find_or_insert (strbuf);
 311   }
 312   else
 313     item = lang_find_or_insert (str);
 314 
 315   return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
 316 }
 317 
 318 /**
 319  * hb_language_to_string:
 320  * @language: an #hb_language_t to convert.
 321  *
 322  * See hb_language_from_string().
 323  *
 324  * Return value: (transfer none):
 325  * A %NULL-terminated string representing the @language. Must not be freed by
 326  * the caller.
 327  *
 328  * Since: 0.9.2
 329  **/
 330 const char *
 331 hb_language_to_string (hb_language_t language)
 332 {
 333   /* This is actually NULL-safe! */
 334   return language->s;
 335 }
 336 
 337 /**
 338  * hb_language_get_default:
 339  *
 340  * 
 341  *
 342  * Return value: (transfer none):
 343  *
 344  * Since: 0.9.2
 345  **/
 346 hb_language_t
 347 hb_language_get_default (void)
 348 {
 349   static hb_language_t default_language = HB_LANGUAGE_INVALID;
 350 
 351   hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
 352   if (unlikely (language == HB_LANGUAGE_INVALID)) {
 353     language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
 354     (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
 355   }
 356 
 357   return default_language;
 358 }
 359 
 360 
 361 /* hb_script_t */
 362 
 363 /**
 364  * hb_script_from_iso15924_tag:
 365  * @tag: an #hb_tag_t representing an ISO 15924 tag.
 366  *
 367  * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
 368  *
 369  * Return value: 
 370  * An #hb_script_t corresponding to the ISO 15924 tag.
 371  *
 372  * Since: 0.9.2
 373  **/
 374 hb_script_t
 375 hb_script_from_iso15924_tag (hb_tag_t tag)
 376 {
 377   if (unlikely (tag == HB_TAG_NONE))
 378     return HB_SCRIPT_INVALID;
 379 
 380   /* Be lenient, adjust case (one capital letter followed by three small letters) */
 381   tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
 382 
 383   switch (tag) {
 384 
 385     /* These graduated from the 'Q' private-area codes, but
 386      * the old code is still aliased by Unicode, and the Qaai
 387      * one in use by ICU. */
 388     case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
 389     case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
 390 
 391     /* Script variants from http://unicode.org/iso15924/ */
 392     case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
 393     case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
 394     case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
 395     case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
 396     case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
 397     case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
 398   }
 399 
 400   /* If it looks right, just use the tag as a script */
 401   if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
 402     return (hb_script_t) tag;
 403 
 404   /* Otherwise, return unknown */
 405   return HB_SCRIPT_UNKNOWN;
 406 }
 407 
 408 /**
 409  * hb_script_from_string:
 410  * @str: (array length=len) (element-type uint8_t): a string representing an
 411  *       ISO 15924 tag.
 412  * @len: length of the @str, or -1 if it is %NULL-terminated.
 413  *
 414  * Converts a string @str representing an ISO 15924 script tag to a
 415  * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
 416  * hb_script_from_iso15924_tag().
 417  *
 418  * Return value: 
 419  * An #hb_script_t corresponding to the ISO 15924 tag.
 420  *
 421  * Since: 0.9.2
 422  **/
 423 hb_script_t
 424 hb_script_from_string (const char *str, int len)
 425 {
 426   return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
 427 }
 428 
 429 /**
 430  * hb_script_to_iso15924_tag:
 431  * @script: an #hb_script_ to convert.
 432  *
 433  * See hb_script_from_iso15924_tag().
 434  *
 435  * Return value:
 436  * An #hb_tag_t representing an ISO 15924 script tag.
 437  *
 438  * Since: 0.9.2
 439  **/
 440 hb_tag_t
 441 hb_script_to_iso15924_tag (hb_script_t script)
 442 {
 443   return (hb_tag_t) script;
 444 }
 445 
 446 /**
 447  * hb_script_get_horizontal_direction:
 448  * @script: 
 449  *
 450  * 
 451  *
 452  * Return value: 
 453  *
 454  * Since: 0.9.2
 455  **/
 456 hb_direction_t
 457 hb_script_get_horizontal_direction (hb_script_t script)
 458 {
 459   /* http://goo.gl/x9ilM */
 460   switch ((hb_tag_t) script)
 461   {
 462     /* Unicode-1.1 additions */
 463     case HB_SCRIPT_ARABIC:
 464     case HB_SCRIPT_HEBREW:
 465 
 466     /* Unicode-3.0 additions */
 467     case HB_SCRIPT_SYRIAC:
 468     case HB_SCRIPT_THAANA:
 469 
 470     /* Unicode-4.0 additions */
 471     case HB_SCRIPT_CYPRIOT:
 472 
 473     /* Unicode-4.1 additions */
 474     case HB_SCRIPT_KHAROSHTHI:
 475 
 476     /* Unicode-5.0 additions */
 477     case HB_SCRIPT_PHOENICIAN:
 478     case HB_SCRIPT_NKO:
 479 
 480     /* Unicode-5.1 additions */
 481     case HB_SCRIPT_LYDIAN:
 482 
 483     /* Unicode-5.2 additions */
 484     case HB_SCRIPT_AVESTAN:
 485     case HB_SCRIPT_IMPERIAL_ARAMAIC:
 486     case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
 487     case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
 488     case HB_SCRIPT_OLD_SOUTH_ARABIAN:
 489     case HB_SCRIPT_OLD_TURKIC:
 490     case HB_SCRIPT_SAMARITAN:
 491 
 492     /* Unicode-6.0 additions */
 493     case HB_SCRIPT_MANDAIC:
 494 
 495     /* Unicode-6.1 additions */
 496     case HB_SCRIPT_MEROITIC_CURSIVE:
 497     case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
 498 
 499     /* Unicode-7.0 additions */
 500     case HB_SCRIPT_MANICHAEAN:
 501     case HB_SCRIPT_MENDE_KIKAKUI:
 502     case HB_SCRIPT_NABATAEAN:
 503     case HB_SCRIPT_OLD_NORTH_ARABIAN:
 504     case HB_SCRIPT_PALMYRENE:
 505     case HB_SCRIPT_PSALTER_PAHLAVI:
 506 
 507     /* Unicode-8.0 additions */
 508     case HB_SCRIPT_OLD_HUNGARIAN:
 509 
 510     /* Unicode-9.0 additions */
 511     case HB_SCRIPT_ADLAM:
 512 
 513       return HB_DIRECTION_RTL;
 514   }
 515 
 516   return HB_DIRECTION_LTR;
 517 }
 518 
 519 
 520 /* hb_user_data_array_t */
 521 
 522 bool
 523 hb_user_data_array_t::set (hb_user_data_key_t *key,
 524                            void *              data,
 525                            hb_destroy_func_t   destroy,
 526                            hb_bool_t           replace)
 527 {
 528   if (!key)
 529     return false;
 530 
 531   if (replace) {
 532     if (!data && !destroy) {
 533       items.remove (key, lock);
 534       return true;
 535     }
 536   }
 537   hb_user_data_item_t item = {key, data, destroy};
 538   bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
 539 
 540   return ret;
 541 }
 542 
 543 void *
 544 hb_user_data_array_t::get (hb_user_data_key_t *key)
 545 {
 546   hb_user_data_item_t item = {NULL, NULL, NULL};
 547 
 548   return items.find (key, &item, lock) ? item.data : NULL;
 549 }
 550 
 551 
 552 /* hb_version */
 553 
 554 /**
 555  * hb_version:
 556  * @major: (out): Library major version component.
 557  * @minor: (out): Library minor version component.
 558  * @micro: (out): Library micro version component.
 559  *
 560  * Returns library version as three integer components.
 561  *
 562  * Since: 0.9.2
 563  **/
 564 void
 565 hb_version (unsigned int *major,
 566             unsigned int *minor,
 567             unsigned int *micro)
 568 {
 569   *major = HB_VERSION_MAJOR;
 570   *minor = HB_VERSION_MINOR;
 571   *micro = HB_VERSION_MICRO;
 572 }
 573 
 574 /**
 575  * hb_version_string:
 576  *
 577  * Returns library version as a string with three components.
 578  *
 579  * Return value: library version string.
 580  *
 581  * Since: 0.9.2
 582  **/
 583 const char *
 584 hb_version_string (void)
 585 {
 586   return HB_VERSION_STRING;
 587 }
 588 
 589 /**
 590  * hb_version_atleast:
 591  * @major: 
 592  * @minor: 
 593  * @micro: 
 594  *
 595  * 
 596  *
 597  * Return value: 
 598  *
 599  * Since: 0.9.30
 600  **/
 601 hb_bool_t
 602 hb_version_atleast (unsigned int major,
 603                     unsigned int minor,
 604                     unsigned int micro)
 605 {
 606   return HB_VERSION_ATLEAST (major, minor, micro);
 607 }