1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2009,2010  Red Hat, Inc.
  32  * Copyright © 2011,2012  Google, Inc.
  33  *
  34  *  This is part of HarfBuzz, a text shaping library.
  35  *
  36  * Permission is hereby granted, without written agreement and without
  37  * license or royalty fees, to use, copy, modify, and distribute this
  38  * software and its documentation for any purpose, provided that the
  39  * above copyright notice and the following two paragraphs appear in
  40  * all copies of this software.
  41  *
  42  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  43  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  44  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  45  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  46  * DAMAGE.
  47  *
  48  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  49  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  50  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  51  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  52  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  53  *
  54  * Red Hat Author(s): Behdad Esfahbod
  55  * Google Author(s): Behdad Esfahbod
  56  */
  57 
  58 #include "hb-private.hh"
  59 
  60 #include "hb-mutex-private.hh"
  61 #include "hb-object-private.hh"
  62 
  63 #include <locale.h>
  64 
  65 
  66 /* hb_options_t */
  67 
  68 hb_options_union_t _hb_options;
  69 
  70 void
  71 _hb_options_init (void)
  72 {
  73   hb_options_union_t u;
  74   u.i = 0;
  75   u.opts.initialized = 1;
  76 
  77   char *c = getenv ("HB_OPTIONS");
  78   u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
  79 
  80   /* This is idempotent and threadsafe. */
  81   _hb_options = u;
  82 }
  83 
  84 
  85 /* hb_tag_t */
  86 
  87 /**
  88  * hb_tag_from_string:
  89  * @str: (array length=len) (element-type uint8_t):
  90  * @len:
  91  *
  92  *
  93  *
  94  * Return value:
  95  *
  96  * Since: 0.9.2
  97  **/
  98 hb_tag_t
  99 hb_tag_from_string (const char *str, int len)
 100 {
 101   char tag[4];
 102   unsigned int i;
 103 
 104   if (!str || !len || !*str)
 105     return HB_TAG_NONE;
 106 
 107   if (len < 0 || len > 4)
 108     len = 4;
 109   for (i = 0; i < (unsigned) len && str[i]; i++)
 110     tag[i] = str[i];
 111   for (; i < 4; i++)
 112     tag[i] = ' ';
 113 
 114   return HB_TAG_CHAR4 (tag);
 115 }
 116 
 117 /**
 118  * hb_tag_to_string:
 119  * @tag:
 120  * @buf: (array fixed-size=4):
 121  *
 122  *
 123  *
 124  * Since: 0.9.5
 125  **/
 126 void
 127 hb_tag_to_string (hb_tag_t tag, char *buf)
 128 {
 129   buf[0] = (char) (uint8_t) (tag >> 24);
 130   buf[1] = (char) (uint8_t) (tag >> 16);
 131   buf[2] = (char) (uint8_t) (tag >>  8);
 132   buf[3] = (char) (uint8_t) (tag >>  0);
 133 }
 134 
 135 
 136 /* hb_direction_t */
 137 
 138 const char direction_strings[][4] = {
 139   "ltr",
 140   "rtl",
 141   "ttb",
 142   "btt"
 143 };
 144 
 145 /**
 146  * hb_direction_from_string:
 147  * @str: (array length=len) (element-type uint8_t):
 148  * @len:
 149  *
 150  *
 151  *
 152  * Return value:
 153  *
 154  * Since: 0.9.2
 155  **/
 156 hb_direction_t
 157 hb_direction_from_string (const char *str, int len)
 158 {
 159   if (unlikely (!str || !len || !*str))
 160     return HB_DIRECTION_INVALID;
 161 
 162   /* Lets match loosely: just match the first letter, such that
 163    * all of "ltr", "left-to-right", etc work!
 164    */
 165   char c = TOLOWER (str[0]);
 166   for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
 167     if (c == direction_strings[i][0])
 168       return (hb_direction_t) (HB_DIRECTION_LTR + i);
 169 
 170   return HB_DIRECTION_INVALID;
 171 }
 172 
 173 /**
 174  * hb_direction_to_string:
 175  * @direction:
 176  *
 177  *
 178  *
 179  * Return value: (transfer none):
 180  *
 181  * Since: 0.9.2
 182  **/
 183 const char *
 184 hb_direction_to_string (hb_direction_t direction)
 185 {
 186   if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
 187               < ARRAY_LENGTH (direction_strings)))
 188     return direction_strings[direction - HB_DIRECTION_LTR];
 189 
 190   return "invalid";
 191 }
 192 
 193 
 194 /* hb_language_t */
 195 
 196 struct hb_language_impl_t {
 197   const char s[1];
 198 };
 199 
 200 static const char canon_map[256] = {
 201    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 202    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
 203    0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
 204   '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
 205   '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 206   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
 207    0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 208   'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
 209 };
 210 
 211 static bool
 212 lang_equal (hb_language_t  v1,
 213             const void    *v2)
 214 {
 215   const unsigned char *p1 = (const unsigned char *) v1;
 216   const unsigned char *p2 = (const unsigned char *) v2;
 217 
 218   while (*p1 && *p1 == canon_map[*p2])
 219     p1++, p2++;
 220 
 221   return *p1 == canon_map[*p2];
 222 }
 223 
 224 #if 0
 225 static unsigned int
 226 lang_hash (const void *key)
 227 {
 228   const unsigned char *p = key;
 229   unsigned int h = 0;
 230   while (canon_map[*p])
 231     {
 232       h = (h << 5) - h + canon_map[*p];
 233       p++;
 234     }
 235 
 236   return h;
 237 }
 238 #endif
 239 
 240 
 241 struct hb_language_item_t {
 242 
 243   struct hb_language_item_t *next;
 244   hb_language_t lang;
 245 
 246   inline bool operator == (const char *s) const {
 247     return lang_equal (lang, s);
 248   }
 249 
 250   inline hb_language_item_t & operator = (const char *s) {
 251     lang = (hb_language_t) strdup (s);
 252     for (unsigned char *p = (unsigned char *) lang; *p; p++)
 253       *p = canon_map[*p];
 254 
 255     return *this;
 256   }
 257 
 258   void finish (void) { free ((void *) lang); }
 259 };
 260 
 261 
 262 /* Thread-safe lock-free language list */
 263 
 264 static hb_language_item_t *langs;
 265 
 266 #ifdef HB_USE_ATEXIT
 267 static
 268 void free_langs (void)
 269 {
 270   while (langs) {
 271     hb_language_item_t *next = langs->next;
 272     langs->finish ();
 273     free (langs);
 274     langs = next;
 275   }
 276 }
 277 #endif
 278 
 279 static hb_language_item_t *
 280 lang_find_or_insert (const char *key)
 281 {
 282 retry:
 283   hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
 284 
 285   for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
 286     if (*lang == key)
 287       return lang;
 288 
 289   /* Not found; allocate one. */
 290   hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
 291   if (unlikely (!lang))
 292     return NULL;
 293   lang->next = first_lang;
 294   *lang = key;
 295 
 296   if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
 297     lang->finish ();
 298     free (lang);
 299     goto retry;
 300   }
 301 
 302 #ifdef HB_USE_ATEXIT
 303   if (!first_lang)
 304     atexit (free_langs); /* First person registers atexit() callback. */
 305 #endif
 306 
 307   return lang;
 308 }
 309 
 310 
 311 /**
 312  * hb_language_from_string:
 313  * @str: (array length=len) (element-type uint8_t):
 314  * @len:
 315  *
 316  *
 317  *
 318  * Return value: (transfer none):
 319  *
 320  * Since: 0.9.2
 321  **/
 322 hb_language_t
 323 hb_language_from_string (const char *str, int len)
 324 {
 325   if (!str || !len || !*str)
 326     return HB_LANGUAGE_INVALID;
 327 
 328   hb_language_item_t *item = NULL;
 329   if (len >= 0)
 330   {
 331     /* NUL-terminate it. */
 332     char strbuf[64];
 333     len = MIN (len, (int) sizeof (strbuf) - 1);
 334     memcpy (strbuf, str, len);
 335     strbuf[len] = '\0';
 336     item = lang_find_or_insert (strbuf);
 337   }
 338   else
 339     item = lang_find_or_insert (str);
 340 
 341   return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
 342 }
 343 
 344 /**
 345  * hb_language_to_string:
 346  * @language:
 347  *
 348  *
 349  *
 350  * Return value: (transfer none):
 351  *
 352  * Since: 0.9.2
 353  **/
 354 const char *
 355 hb_language_to_string (hb_language_t language)
 356 {
 357   /* This is actually NULL-safe! */
 358   return language->s;
 359 }
 360 
 361 /**
 362  * hb_language_get_default:
 363  *
 364  *
 365  *
 366  * Return value: (transfer none):
 367  *
 368  * Since: 0.9.2
 369  **/
 370 hb_language_t
 371 hb_language_get_default (void)
 372 {
 373   static hb_language_t default_language = HB_LANGUAGE_INVALID;
 374 
 375   hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
 376   if (unlikely (language == HB_LANGUAGE_INVALID)) {
 377     language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
 378     (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
 379   }
 380 
 381   return default_language;
 382 }
 383 
 384 
 385 /* hb_script_t */
 386 
 387 /**
 388  * hb_script_from_iso15924_tag:
 389  * @tag:
 390  *
 391  *
 392  *
 393  * Return value:
 394  *
 395  * Since: 0.9.2
 396  **/
 397 hb_script_t
 398 hb_script_from_iso15924_tag (hb_tag_t tag)
 399 {
 400   if (unlikely (tag == HB_TAG_NONE))
 401     return HB_SCRIPT_INVALID;
 402 
 403   /* Be lenient, adjust case (one capital letter followed by three small letters) */
 404   tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
 405 
 406   switch (tag) {
 407 
 408     /* These graduated from the 'Q' private-area codes, but
 409      * the old code is still aliased by Unicode, and the Qaai
 410      * one in use by ICU. */
 411     case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
 412     case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
 413 
 414     /* Script variants from http://unicode.org/iso15924/ */
 415     case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
 416     case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
 417     case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
 418     case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
 419     case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
 420     case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
 421   }
 422 
 423   /* If it looks right, just use the tag as a script */
 424   if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
 425     return (hb_script_t) tag;
 426 
 427   /* Otherwise, return unknown */
 428   return HB_SCRIPT_UNKNOWN;
 429 }
 430 
 431 /**
 432  * hb_script_from_string:
 433  * @s: (array length=len) (element-type uint8_t):
 434  * @len:
 435  *
 436  *
 437  *
 438  * Return value:
 439  *
 440  * Since: 0.9.2
 441  **/
 442 hb_script_t
 443 hb_script_from_string (const char *s, int len)
 444 {
 445   return hb_script_from_iso15924_tag (hb_tag_from_string (s, len));
 446 }
 447 
 448 /**
 449  * hb_script_to_iso15924_tag:
 450  * @script:
 451  *
 452  *
 453  *
 454  * Return value:
 455  *
 456  * Since: 0.9.2
 457  **/
 458 hb_tag_t
 459 hb_script_to_iso15924_tag (hb_script_t script)
 460 {
 461   return (hb_tag_t) script;
 462 }
 463 
 464 /**
 465  * hb_script_get_horizontal_direction:
 466  * @script:
 467  *
 468  *
 469  *
 470  * Return value:
 471  *
 472  * Since: 0.9.2
 473  **/
 474 hb_direction_t
 475 hb_script_get_horizontal_direction (hb_script_t script)
 476 {
 477   /* http://goo.gl/x9ilM */
 478   switch ((hb_tag_t) script)
 479   {
 480     /* Unicode-1.1 additions */
 481     case HB_SCRIPT_ARABIC:
 482     case HB_SCRIPT_HEBREW:
 483 
 484     /* Unicode-3.0 additions */
 485     case HB_SCRIPT_SYRIAC:
 486     case HB_SCRIPT_THAANA:
 487 
 488     /* Unicode-4.0 additions */
 489     case HB_SCRIPT_CYPRIOT:
 490 
 491     /* Unicode-4.1 additions */
 492     case HB_SCRIPT_KHAROSHTHI:
 493 
 494     /* Unicode-5.0 additions */
 495     case HB_SCRIPT_PHOENICIAN:
 496     case HB_SCRIPT_NKO:
 497 
 498     /* Unicode-5.1 additions */
 499     case HB_SCRIPT_LYDIAN:
 500 
 501     /* Unicode-5.2 additions */
 502     case HB_SCRIPT_AVESTAN:
 503     case HB_SCRIPT_IMPERIAL_ARAMAIC:
 504     case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
 505     case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
 506     case HB_SCRIPT_OLD_SOUTH_ARABIAN:
 507     case HB_SCRIPT_OLD_TURKIC:
 508     case HB_SCRIPT_SAMARITAN:
 509 
 510     /* Unicode-6.0 additions */
 511     case HB_SCRIPT_MANDAIC:
 512 
 513     /* Unicode-6.1 additions */
 514     case HB_SCRIPT_MEROITIC_CURSIVE:
 515     case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
 516 
 517     /* Unicode-7.0 additions */
 518     case HB_SCRIPT_MANICHAEAN:
 519     case HB_SCRIPT_MENDE_KIKAKUI:
 520     case HB_SCRIPT_NABATAEAN:
 521     case HB_SCRIPT_OLD_NORTH_ARABIAN:
 522     case HB_SCRIPT_PALMYRENE:
 523     case HB_SCRIPT_PSALTER_PAHLAVI:
 524 
 525     /* Unicode-8.0 additions */
 526     case HB_SCRIPT_OLD_HUNGARIAN:
 527 
 528       return HB_DIRECTION_RTL;
 529   }
 530 
 531   return HB_DIRECTION_LTR;
 532 }
 533 
 534 
 535 /* hb_user_data_array_t */
 536 
 537 bool
 538 hb_user_data_array_t::set (hb_user_data_key_t *key,
 539                            void *              data,
 540                            hb_destroy_func_t   destroy,
 541                            hb_bool_t           replace)
 542 {
 543   if (!key)
 544     return false;
 545 
 546   if (replace) {
 547     if (!data && !destroy) {
 548       items.remove (key, lock);
 549       return true;
 550     }
 551   }
 552   hb_user_data_item_t item = {key, data, destroy};
 553   bool ret = !!items.replace_or_insert (item, lock, replace);
 554 
 555   return ret;
 556 }
 557 
 558 void *
 559 hb_user_data_array_t::get (hb_user_data_key_t *key)
 560 {
 561   hb_user_data_item_t item = {NULL };
 562 
 563   return items.find (key, &item, lock) ? item.data : NULL;
 564 }
 565 
 566 
 567 /* hb_version */
 568 
 569 /**
 570  * hb_version:
 571  * @major: (out): Library major version component.
 572  * @minor: (out): Library minor version component.
 573  * @micro: (out): Library micro version component.
 574  *
 575  * Returns library version as three integer components.
 576  *
 577  * Since: 0.9.2
 578  **/
 579 void
 580 hb_version (unsigned int *major,
 581             unsigned int *minor,
 582             unsigned int *micro)
 583 {
 584   *major = HB_VERSION_MAJOR;
 585   *minor = HB_VERSION_MINOR;
 586   *micro = HB_VERSION_MICRO;
 587 }
 588 
 589 /**
 590  * hb_version_string:
 591  *
 592  * Returns library version as a string with three components.
 593  *
 594  * Return value: library version string.
 595  *
 596  * Since: 0.9.2
 597  **/
 598 const char *
 599 hb_version_string (void)
 600 {
 601   return HB_VERSION_STRING;
 602 }
 603 
 604 /**
 605  * hb_version_atleast:
 606  * @major:
 607  * @minor:
 608  * @micro:
 609  *
 610  *
 611  *
 612  * Return value:
 613  *
 614  * Since: 0.9.30
 615  **/
 616 hb_bool_t
 617 hb_version_atleast (unsigned int major,
 618                     unsigned int minor,
 619                     unsigned int micro)
 620 {
 621   return HB_VERSION_ATLEAST (major, minor, micro);
 622 }