1 /*
   2  * Copyright © 2009  Red Hat, Inc.
   3  * Copyright © 2011  Codethink Limited
   4  * Copyright © 2011,2012  Google, Inc.
   5  *
   6  *  This is part of HarfBuzz, a text shaping library.
   7  *
   8  * Permission is hereby granted, without written agreement and without
   9  * license or royalty fees, to use, copy, modify, and distribute this
  10  * software and its documentation for any purpose, provided that the
  11  * above copyright notice and the following two paragraphs appear in
  12  * all copies of this software.
  13  *
  14  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  15  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  16  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  17  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  18  * DAMAGE.
  19  *
  20  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  21  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  22  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  23  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  24  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  25  *
  26  * Red Hat Author(s): Behdad Esfahbod
  27  * Codethink Author(s): Ryan Lortie
  28  * Google Author(s): Behdad Esfahbod
  29  */
  30 
  31 #ifndef HB_H_IN
  32 #error "Include <hb.h> instead."
  33 #endif
  34 
  35 #ifndef HB_UNICODE_H
  36 #define HB_UNICODE_H
  37 
  38 #include "hb-common.h"
  39 
  40 HB_BEGIN_DECLS
  41 
  42 
  43 /* hb_unicode_general_category_t */
  44 
  45 /* Unicode Character Database property: General_Category (gc) */
  46 typedef enum
  47 {
  48   HB_UNICODE_GENERAL_CATEGORY_CONTROL,                  /* Cc */
  49   HB_UNICODE_GENERAL_CATEGORY_FORMAT,                   /* Cf */
  50   HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,               /* Cn */
  51   HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,              /* Co */
  52   HB_UNICODE_GENERAL_CATEGORY_SURROGATE,                /* Cs */
  53   HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,         /* Ll */
  54   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,          /* Lm */
  55   HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,             /* Lo */
  56   HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,         /* Lt */
  57   HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,         /* Lu */
  58   HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,             /* Mc */
  59   HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,           /* Me */
  60   HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,         /* Mn */
  61   HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,           /* Nd */
  62   HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,            /* Nl */
  63   HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,             /* No */
  64   HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,      /* Pc */
  65   HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,         /* Pd */
  66   HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,        /* Pe */
  67   HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,        /* Pf */
  68   HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,      /* Pi */
  69   HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,        /* Po */
  70   HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,         /* Ps */
  71   HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,          /* Sc */
  72   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,          /* Sk */
  73   HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,              /* Sm */
  74   HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,             /* So */
  75   HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,           /* Zl */
  76   HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,      /* Zp */
  77   HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR           /* Zs */
  78 } hb_unicode_general_category_t;
  79 
  80 /* hb_unicode_combining_class_t */
  81 
  82 /* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
  83  * any value in the 0..254 range being returned from hb_unicode_combining_class().
  84  */
  85 
  86 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
  87 typedef enum
  88 {
  89   HB_UNICODE_COMBINING_CLASS_NOT_REORDERED      = 0,
  90   HB_UNICODE_COMBINING_CLASS_OVERLAY            = 1,
  91   HB_UNICODE_COMBINING_CLASS_NUKTA              = 7,
  92   HB_UNICODE_COMBINING_CLASS_KANA_VOICING       = 8,
  93   HB_UNICODE_COMBINING_CLASS_VIRAMA             = 9,
  94 
  95   /* Hebrew */
  96   HB_UNICODE_COMBINING_CLASS_CCC10      =  10,
  97   HB_UNICODE_COMBINING_CLASS_CCC11      =  11,
  98   HB_UNICODE_COMBINING_CLASS_CCC12      =  12,
  99   HB_UNICODE_COMBINING_CLASS_CCC13      =  13,
 100   HB_UNICODE_COMBINING_CLASS_CCC14      =  14,
 101   HB_UNICODE_COMBINING_CLASS_CCC15      =  15,
 102   HB_UNICODE_COMBINING_CLASS_CCC16      =  16,
 103   HB_UNICODE_COMBINING_CLASS_CCC17      =  17,
 104   HB_UNICODE_COMBINING_CLASS_CCC18      =  18,
 105   HB_UNICODE_COMBINING_CLASS_CCC19      =  19,
 106   HB_UNICODE_COMBINING_CLASS_CCC20      =  20,
 107   HB_UNICODE_COMBINING_CLASS_CCC21      =  21,
 108   HB_UNICODE_COMBINING_CLASS_CCC22      =  22,
 109   HB_UNICODE_COMBINING_CLASS_CCC23      =  23,
 110   HB_UNICODE_COMBINING_CLASS_CCC24      =  24,
 111   HB_UNICODE_COMBINING_CLASS_CCC25      =  25,
 112   HB_UNICODE_COMBINING_CLASS_CCC26      =  26,
 113 
 114   /* Arabic */
 115   HB_UNICODE_COMBINING_CLASS_CCC27      =  27,
 116   HB_UNICODE_COMBINING_CLASS_CCC28      =  28,
 117   HB_UNICODE_COMBINING_CLASS_CCC29      =  29,
 118   HB_UNICODE_COMBINING_CLASS_CCC30      =  30,
 119   HB_UNICODE_COMBINING_CLASS_CCC31      =  31,
 120   HB_UNICODE_COMBINING_CLASS_CCC32      =  32,
 121   HB_UNICODE_COMBINING_CLASS_CCC33      =  33,
 122   HB_UNICODE_COMBINING_CLASS_CCC34      =  34,
 123   HB_UNICODE_COMBINING_CLASS_CCC35      =  35,
 124 
 125   /* Syriac */
 126   HB_UNICODE_COMBINING_CLASS_CCC36      =  36,
 127 
 128   /* Telugu */
 129   HB_UNICODE_COMBINING_CLASS_CCC84      =  84,
 130   HB_UNICODE_COMBINING_CLASS_CCC91      =  91,
 131 
 132   /* Thai */
 133   HB_UNICODE_COMBINING_CLASS_CCC103     = 103,
 134   HB_UNICODE_COMBINING_CLASS_CCC107     = 107,
 135 
 136   /* Lao */
 137   HB_UNICODE_COMBINING_CLASS_CCC118     = 118,
 138   HB_UNICODE_COMBINING_CLASS_CCC122     = 122,
 139 
 140   /* Tibetan */
 141   HB_UNICODE_COMBINING_CLASS_CCC129     = 129,
 142   HB_UNICODE_COMBINING_CLASS_CCC130     = 130,
 143   HB_UNICODE_COMBINING_CLASS_CCC133     = 132,
 144 
 145 
 146   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT        = 200,
 147   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW             = 202,
 148   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE             = 214,
 149   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT       = 216,
 150   HB_UNICODE_COMBINING_CLASS_BELOW_LEFT                 = 218,
 151   HB_UNICODE_COMBINING_CLASS_BELOW                      = 220,
 152   HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT                = 222,
 153   HB_UNICODE_COMBINING_CLASS_LEFT                       = 224,
 154   HB_UNICODE_COMBINING_CLASS_RIGHT                      = 226,
 155   HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT                 = 228,
 156   HB_UNICODE_COMBINING_CLASS_ABOVE                      = 230,
 157   HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT                = 232,
 158   HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW               = 233,
 159   HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE               = 234,
 160 
 161   HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT             = 240,
 162 
 163   HB_UNICODE_COMBINING_CLASS_INVALID    = 255
 164 } hb_unicode_combining_class_t;
 165 
 166 
 167 /*
 168  * hb_unicode_funcs_t
 169  */
 170 
 171 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
 172 
 173 
 174 /*
 175  * just give me the best implementation you've got there.
 176  */
 177 hb_unicode_funcs_t *
 178 hb_unicode_funcs_get_default (void);
 179 
 180 
 181 hb_unicode_funcs_t *
 182 hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
 183 
 184 hb_unicode_funcs_t *
 185 hb_unicode_funcs_get_empty (void);
 186 
 187 hb_unicode_funcs_t *
 188 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
 189 
 190 void
 191 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
 192 
 193 hb_bool_t
 194 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
 195                                 hb_user_data_key_t *key,
 196                                 void *              data,
 197                                 hb_destroy_func_t   destroy,
 198                                 hb_bool_t           replace);
 199 
 200 
 201 void *
 202 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
 203                                 hb_user_data_key_t *key);
 204 
 205 
 206 void
 207 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
 208 
 209 hb_bool_t
 210 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
 211 
 212 hb_unicode_funcs_t *
 213 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
 214 
 215 
 216 /*
 217  * funcs
 218  */
 219 
 220 /* typedefs */
 221 
 222 typedef hb_unicode_combining_class_t    (*hb_unicode_combining_class_func_t)    (hb_unicode_funcs_t *ufuncs,
 223                                                                                  hb_codepoint_t      unicode,
 224                                                                                  void               *user_data);
 225 typedef unsigned int                    (*hb_unicode_eastasian_width_func_t)    (hb_unicode_funcs_t *ufuncs,
 226                                                                                  hb_codepoint_t      unicode,
 227                                                                                  void               *user_data);
 228 typedef hb_unicode_general_category_t   (*hb_unicode_general_category_func_t)   (hb_unicode_funcs_t *ufuncs,
 229                                                                                  hb_codepoint_t      unicode,
 230                                                                                  void               *user_data);
 231 typedef hb_codepoint_t                  (*hb_unicode_mirroring_func_t)          (hb_unicode_funcs_t *ufuncs,
 232                                                                                  hb_codepoint_t      unicode,
 233                                                                                  void               *user_data);
 234 typedef hb_script_t                     (*hb_unicode_script_func_t)             (hb_unicode_funcs_t *ufuncs,
 235                                                                                  hb_codepoint_t      unicode,
 236                                                                                  void               *user_data);
 237 
 238 typedef hb_bool_t                       (*hb_unicode_compose_func_t)            (hb_unicode_funcs_t *ufuncs,
 239                                                                                  hb_codepoint_t      a,
 240                                                                                  hb_codepoint_t      b,
 241                                                                                  hb_codepoint_t     *ab,
 242                                                                                  void               *user_data);
 243 typedef hb_bool_t                       (*hb_unicode_decompose_func_t)          (hb_unicode_funcs_t *ufuncs,
 244                                                                                  hb_codepoint_t      ab,
 245                                                                                  hb_codepoint_t     *a,
 246                                                                                  hb_codepoint_t     *b,
 247                                                                                  void               *user_data);
 248 
 249 /**
 250  * hb_unicode_decompose_compatibility_func_t:
 251  * @ufuncs: a Unicode function structure
 252  * @u: codepoint to decompose
 253  * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
 254  * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
 255  *
 256  * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
 257  * The complete length of the decomposition will be returned.
 258  *
 259  * If @u has no compatibility decomposition, zero should be returned.
 260  *
 261  * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
 262  * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
 263  * of this function type must ensure that they do not write past the provided array.
 264  *
 265  * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
 266  */
 267 typedef unsigned int                    (*hb_unicode_decompose_compatibility_func_t)    (hb_unicode_funcs_t *ufuncs,
 268                                                                                          hb_codepoint_t      u,
 269                                                                                          hb_codepoint_t     *decomposed,
 270                                                                                          void               *user_data);
 271 
 272 /* See Unicode 6.1 for details on the maximum decomposition length. */
 273 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
 274 
 275 /* setters */
 276 
 277 /**
 278  * hb_unicode_funcs_set_combining_class_func:
 279  * @ufuncs: a Unicode function structure
 280  * @func: (closure user_data) (destroy destroy) (scope notified):
 281  * @user_data:
 282  * @destroy:
 283  *
 284  *
 285  *
 286  * Since: 0.9.2
 287  **/
 288 void
 289 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
 290                                            hb_unicode_combining_class_func_t func,
 291                                            void *user_data, hb_destroy_func_t destroy);
 292 
 293 /**
 294  * hb_unicode_funcs_set_eastasian_width_func:
 295  * @ufuncs: a Unicode function structure
 296  * @func: (closure user_data) (destroy destroy) (scope notified):
 297  * @user_data:
 298  * @destroy:
 299  *
 300  *
 301  *
 302  * Since: 0.9.2
 303  **/
 304 void
 305 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
 306                                            hb_unicode_eastasian_width_func_t func,
 307                                            void *user_data, hb_destroy_func_t destroy);
 308 
 309 /**
 310  * hb_unicode_funcs_set_general_category_func:
 311  * @ufuncs: a Unicode function structure
 312  * @func: (closure user_data) (destroy destroy) (scope notified):
 313  * @user_data:
 314  * @destroy:
 315  *
 316  *
 317  *
 318  * Since: 0.9.2
 319  **/
 320 void
 321 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
 322                                             hb_unicode_general_category_func_t func,
 323                                             void *user_data, hb_destroy_func_t destroy);
 324 
 325 /**
 326  * hb_unicode_funcs_set_mirroring_func:
 327  * @ufuncs: a Unicode function structure
 328  * @func: (closure user_data) (destroy destroy) (scope notified):
 329  * @user_data:
 330  * @destroy:
 331  *
 332  *
 333  *
 334  * Since: 0.9.2
 335  **/
 336 void
 337 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
 338                                      hb_unicode_mirroring_func_t func,
 339                                      void *user_data, hb_destroy_func_t destroy);
 340 
 341 /**
 342  * hb_unicode_funcs_set_script_func:
 343  * @ufuncs: a Unicode function structure
 344  * @func: (closure user_data) (destroy destroy) (scope notified):
 345  * @user_data:
 346  * @destroy:
 347  *
 348  *
 349  *
 350  * Since: 0.9.2
 351  **/
 352 void
 353 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
 354                                   hb_unicode_script_func_t func,
 355                                   void *user_data, hb_destroy_func_t destroy);
 356 
 357 /**
 358  * hb_unicode_funcs_set_compose_func:
 359  * @ufuncs: a Unicode function structure
 360  * @func: (closure user_data) (destroy destroy) (scope notified):
 361  * @user_data:
 362  * @destroy:
 363  *
 364  *
 365  *
 366  * Since: 0.9.2
 367  **/
 368 void
 369 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
 370                                    hb_unicode_compose_func_t func,
 371                                    void *user_data, hb_destroy_func_t destroy);
 372 
 373 /**
 374  * hb_unicode_funcs_set_decompose_func:
 375  * @ufuncs: a Unicode function structure
 376  * @func: (closure user_data) (destroy destroy) (scope notified):
 377  * @user_data:
 378  * @destroy:
 379  *
 380  *
 381  *
 382  * Since: 0.9.2
 383  **/
 384 void
 385 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
 386                                      hb_unicode_decompose_func_t func,
 387                                      void *user_data, hb_destroy_func_t destroy);
 388 
 389 /**
 390  * hb_unicode_funcs_set_decompose_compatibility_func:
 391  * @ufuncs: a Unicode function structure
 392  * @func: (closure user_data) (destroy destroy) (scope notified):
 393  * @user_data:
 394  * @destroy:
 395  *
 396  *
 397  *
 398  * Since: 0.9.2
 399  **/
 400 void
 401 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
 402                                                    hb_unicode_decompose_compatibility_func_t func,
 403                                                    void *user_data, hb_destroy_func_t destroy);
 404 
 405 /* accessors */
 406 
 407 /**
 408  * Since: 0.9.2
 409  **/
 410 hb_unicode_combining_class_t
 411 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
 412                             hb_codepoint_t unicode);
 413 
 414 /**
 415  * Since: 0.9.2
 416  **/
 417 unsigned int
 418 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
 419                             hb_codepoint_t unicode);
 420 
 421 /**
 422  * Since: 0.9.2
 423  **/
 424 hb_unicode_general_category_t
 425 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
 426                              hb_codepoint_t unicode);
 427 
 428 /**
 429  * Since: 0.9.2
 430  **/
 431 hb_codepoint_t
 432 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
 433                       hb_codepoint_t unicode);
 434 
 435 /**
 436  * Since: 0.9.2
 437  **/
 438 hb_script_t
 439 hb_unicode_script (hb_unicode_funcs_t *ufuncs,
 440                    hb_codepoint_t unicode);
 441 
 442 /**
 443  * Since: 0.9.2
 444  **/
 445 hb_bool_t
 446 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
 447                     hb_codepoint_t      a,
 448                     hb_codepoint_t      b,
 449                     hb_codepoint_t     *ab);
 450 
 451 /**
 452  * Since: 0.9.2
 453  **/
 454 hb_bool_t
 455 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
 456                       hb_codepoint_t      ab,
 457                       hb_codepoint_t     *a,
 458                       hb_codepoint_t     *b);
 459 
 460 /**
 461  * Since: 0.9.2
 462  **/
 463 unsigned int
 464 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
 465                                     hb_codepoint_t      u,
 466                                     hb_codepoint_t     *decomposed);
 467 
 468 HB_END_DECLS
 469 
 470 #endif /* HB_UNICODE_H */