1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 // This file is available under and governed by the GNU General Public
  26 // License version 2 only, as published by the Free Software Foundation.
  27 // However, the following notice accompanied the original version of this
  28 // file:
  29 //
  30 /*
  31  * Copyright © 2009  Red Hat, Inc.
  32  * Copyright © 2011  Codethink Limited
  33  * Copyright © 2011,2012  Google, Inc.
  34  *
  35  *  This is part of HarfBuzz, a text shaping library.
  36  *
  37  * Permission is hereby granted, without written agreement and without
  38  * license or royalty fees, to use, copy, modify, and distribute this
  39  * software and its documentation for any purpose, provided that the
  40  * above copyright notice and the following two paragraphs appear in
  41  * all copies of this software.
  42  *
  43  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
  44  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  45  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
  46  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
  47  * DAMAGE.
  48  *
  49  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  50  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  51  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  52  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  53  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  54  *
  55  * Red Hat Author(s): Behdad Esfahbod
  56  * Codethink Author(s): Ryan Lortie
  57  * Google Author(s): Behdad Esfahbod
  58  */
  59 
  60 #ifndef HB_H_IN
  61 #error "Include <hb.h> instead."
  62 #endif
  63 
  64 #ifndef HB_UNICODE_H
  65 #define HB_UNICODE_H
  66 
  67 #include "hb-common.h"
  68 
  69 HB_BEGIN_DECLS
  70 
  71 
  72 /* hb_unicode_general_category_t */
  73 
  74 /* Unicode Character Database property: General_Category (gc) */
  75 typedef enum
  76 {
  77   HB_UNICODE_GENERAL_CATEGORY_CONTROL,                  /* Cc */
  78   HB_UNICODE_GENERAL_CATEGORY_FORMAT,                   /* Cf */
  79   HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,               /* Cn */
  80   HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,              /* Co */
  81   HB_UNICODE_GENERAL_CATEGORY_SURROGATE,                /* Cs */
  82   HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,         /* Ll */
  83   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,          /* Lm */
  84   HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,             /* Lo */
  85   HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,         /* Lt */
  86   HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,         /* Lu */
  87   HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,             /* Mc */
  88   HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,           /* Me */
  89   HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,         /* Mn */
  90   HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,           /* Nd */
  91   HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,            /* Nl */
  92   HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,             /* No */
  93   HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,      /* Pc */
  94   HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,         /* Pd */
  95   HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,        /* Pe */
  96   HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,        /* Pf */
  97   HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,      /* Pi */
  98   HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,        /* Po */
  99   HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,         /* Ps */
 100   HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,          /* Sc */
 101   HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,          /* Sk */
 102   HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,              /* Sm */
 103   HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,             /* So */
 104   HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,           /* Zl */
 105   HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,      /* Zp */
 106   HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR           /* Zs */
 107 } hb_unicode_general_category_t;
 108 
 109 /* hb_unicode_combining_class_t */
 110 
 111 /* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
 112  * any value in the 0..254 range being returned from hb_unicode_combining_class().
 113  */
 114 
 115 /* Unicode Character Database property: Canonical_Combining_Class (ccc) */
 116 typedef enum
 117 {
 118   HB_UNICODE_COMBINING_CLASS_NOT_REORDERED      = 0,
 119   HB_UNICODE_COMBINING_CLASS_OVERLAY            = 1,
 120   HB_UNICODE_COMBINING_CLASS_NUKTA              = 7,
 121   HB_UNICODE_COMBINING_CLASS_KANA_VOICING       = 8,
 122   HB_UNICODE_COMBINING_CLASS_VIRAMA             = 9,
 123 
 124   /* Hebrew */
 125   HB_UNICODE_COMBINING_CLASS_CCC10      =  10,
 126   HB_UNICODE_COMBINING_CLASS_CCC11      =  11,
 127   HB_UNICODE_COMBINING_CLASS_CCC12      =  12,
 128   HB_UNICODE_COMBINING_CLASS_CCC13      =  13,
 129   HB_UNICODE_COMBINING_CLASS_CCC14      =  14,
 130   HB_UNICODE_COMBINING_CLASS_CCC15      =  15,
 131   HB_UNICODE_COMBINING_CLASS_CCC16      =  16,
 132   HB_UNICODE_COMBINING_CLASS_CCC17      =  17,
 133   HB_UNICODE_COMBINING_CLASS_CCC18      =  18,
 134   HB_UNICODE_COMBINING_CLASS_CCC19      =  19,
 135   HB_UNICODE_COMBINING_CLASS_CCC20      =  20,
 136   HB_UNICODE_COMBINING_CLASS_CCC21      =  21,
 137   HB_UNICODE_COMBINING_CLASS_CCC22      =  22,
 138   HB_UNICODE_COMBINING_CLASS_CCC23      =  23,
 139   HB_UNICODE_COMBINING_CLASS_CCC24      =  24,
 140   HB_UNICODE_COMBINING_CLASS_CCC25      =  25,
 141   HB_UNICODE_COMBINING_CLASS_CCC26      =  26,
 142 
 143   /* Arabic */
 144   HB_UNICODE_COMBINING_CLASS_CCC27      =  27,
 145   HB_UNICODE_COMBINING_CLASS_CCC28      =  28,
 146   HB_UNICODE_COMBINING_CLASS_CCC29      =  29,
 147   HB_UNICODE_COMBINING_CLASS_CCC30      =  30,
 148   HB_UNICODE_COMBINING_CLASS_CCC31      =  31,
 149   HB_UNICODE_COMBINING_CLASS_CCC32      =  32,
 150   HB_UNICODE_COMBINING_CLASS_CCC33      =  33,
 151   HB_UNICODE_COMBINING_CLASS_CCC34      =  34,
 152   HB_UNICODE_COMBINING_CLASS_CCC35      =  35,
 153 
 154   /* Syriac */
 155   HB_UNICODE_COMBINING_CLASS_CCC36      =  36,
 156 
 157   /* Telugu */
 158   HB_UNICODE_COMBINING_CLASS_CCC84      =  84,
 159   HB_UNICODE_COMBINING_CLASS_CCC91      =  91,
 160 
 161   /* Thai */
 162   HB_UNICODE_COMBINING_CLASS_CCC103     = 103,
 163   HB_UNICODE_COMBINING_CLASS_CCC107     = 107,
 164 
 165   /* Lao */
 166   HB_UNICODE_COMBINING_CLASS_CCC118     = 118,
 167   HB_UNICODE_COMBINING_CLASS_CCC122     = 122,
 168 
 169   /* Tibetan */
 170   HB_UNICODE_COMBINING_CLASS_CCC129     = 129,
 171   HB_UNICODE_COMBINING_CLASS_CCC130     = 130,
 172   HB_UNICODE_COMBINING_CLASS_CCC133     = 132,
 173 
 174 
 175   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT        = 200,
 176   HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW             = 202,
 177   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE             = 214,
 178   HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT       = 216,
 179   HB_UNICODE_COMBINING_CLASS_BELOW_LEFT                 = 218,
 180   HB_UNICODE_COMBINING_CLASS_BELOW                      = 220,
 181   HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT                = 222,
 182   HB_UNICODE_COMBINING_CLASS_LEFT                       = 224,
 183   HB_UNICODE_COMBINING_CLASS_RIGHT                      = 226,
 184   HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT                 = 228,
 185   HB_UNICODE_COMBINING_CLASS_ABOVE                      = 230,
 186   HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT                = 232,
 187   HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW               = 233,
 188   HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE               = 234,
 189 
 190   HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT             = 240,
 191 
 192   HB_UNICODE_COMBINING_CLASS_INVALID    = 255
 193 } hb_unicode_combining_class_t;
 194 
 195 
 196 /*
 197  * hb_unicode_funcs_t
 198  */
 199 
 200 typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
 201 
 202 
 203 /*
 204  * just give me the best implementation you've got there.
 205  */
 206 hb_unicode_funcs_t *
 207 hb_unicode_funcs_get_default (void);
 208 
 209 
 210 hb_unicode_funcs_t *
 211 hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
 212 
 213 hb_unicode_funcs_t *
 214 hb_unicode_funcs_get_empty (void);
 215 
 216 hb_unicode_funcs_t *
 217 hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
 218 
 219 void
 220 hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
 221 
 222 hb_bool_t
 223 hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
 224                                 hb_user_data_key_t *key,
 225                                 void *              data,
 226                                 hb_destroy_func_t   destroy,
 227                                 hb_bool_t           replace);
 228 
 229 
 230 void *
 231 hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
 232                                 hb_user_data_key_t *key);
 233 
 234 
 235 void
 236 hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
 237 
 238 hb_bool_t
 239 hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
 240 
 241 hb_unicode_funcs_t *
 242 hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
 243 
 244 
 245 /*
 246  * funcs
 247  */
 248 
 249 /* typedefs */
 250 
 251 typedef hb_unicode_combining_class_t    (*hb_unicode_combining_class_func_t)    (hb_unicode_funcs_t *ufuncs,
 252                                                                                  hb_codepoint_t      unicode,
 253                                                                                  void               *user_data);
 254 typedef unsigned int                    (*hb_unicode_eastasian_width_func_t)    (hb_unicode_funcs_t *ufuncs,
 255                                                                                  hb_codepoint_t      unicode,
 256                                                                                  void               *user_data);
 257 typedef hb_unicode_general_category_t   (*hb_unicode_general_category_func_t)   (hb_unicode_funcs_t *ufuncs,
 258                                                                                  hb_codepoint_t      unicode,
 259                                                                                  void               *user_data);
 260 typedef hb_codepoint_t                  (*hb_unicode_mirroring_func_t)          (hb_unicode_funcs_t *ufuncs,
 261                                                                                  hb_codepoint_t      unicode,
 262                                                                                  void               *user_data);
 263 typedef hb_script_t                     (*hb_unicode_script_func_t)             (hb_unicode_funcs_t *ufuncs,
 264                                                                                  hb_codepoint_t      unicode,
 265                                                                                  void               *user_data);
 266 
 267 typedef hb_bool_t                       (*hb_unicode_compose_func_t)            (hb_unicode_funcs_t *ufuncs,
 268                                                                                  hb_codepoint_t      a,
 269                                                                                  hb_codepoint_t      b,
 270                                                                                  hb_codepoint_t     *ab,
 271                                                                                  void               *user_data);
 272 typedef hb_bool_t                       (*hb_unicode_decompose_func_t)          (hb_unicode_funcs_t *ufuncs,
 273                                                                                  hb_codepoint_t      ab,
 274                                                                                  hb_codepoint_t     *a,
 275                                                                                  hb_codepoint_t     *b,
 276                                                                                  void               *user_data);
 277 
 278 /**
 279  * hb_unicode_decompose_compatibility_func_t:
 280  * @ufuncs: a Unicode function structure
 281  * @u: codepoint to decompose
 282  * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
 283  * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
 284  *
 285  * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
 286  * The complete length of the decomposition will be returned.
 287  *
 288  * If @u has no compatibility decomposition, zero should be returned.
 289  *
 290  * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
 291  * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
 292  * of this function type must ensure that they do not write past the provided array.
 293  *
 294  * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
 295  */
 296 typedef unsigned int                    (*hb_unicode_decompose_compatibility_func_t)    (hb_unicode_funcs_t *ufuncs,
 297                                                                                          hb_codepoint_t      u,
 298                                                                                          hb_codepoint_t     *decomposed,
 299                                                                                          void               *user_data);
 300 
 301 /* See Unicode 6.1 for details on the maximum decomposition length. */
 302 #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
 303 
 304 /* setters */
 305 
 306 /**
 307  * hb_unicode_funcs_set_combining_class_func:
 308  * @ufuncs: a Unicode function structure
 309  * @func: (closure user_data) (destroy destroy) (scope notified):
 310  * @user_data:
 311  * @destroy:
 312  *
 313  *
 314  *
 315  * Since: 0.9.2
 316  **/
 317 void
 318 hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
 319                                            hb_unicode_combining_class_func_t func,
 320                                            void *user_data, hb_destroy_func_t destroy);
 321 
 322 /**
 323  * hb_unicode_funcs_set_eastasian_width_func:
 324  * @ufuncs: a Unicode function structure
 325  * @func: (closure user_data) (destroy destroy) (scope notified):
 326  * @user_data:
 327  * @destroy:
 328  *
 329  *
 330  *
 331  * Since: 0.9.2
 332  **/
 333 void
 334 hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
 335                                            hb_unicode_eastasian_width_func_t func,
 336                                            void *user_data, hb_destroy_func_t destroy);
 337 
 338 /**
 339  * hb_unicode_funcs_set_general_category_func:
 340  * @ufuncs: a Unicode function structure
 341  * @func: (closure user_data) (destroy destroy) (scope notified):
 342  * @user_data:
 343  * @destroy:
 344  *
 345  *
 346  *
 347  * Since: 0.9.2
 348  **/
 349 void
 350 hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
 351                                             hb_unicode_general_category_func_t func,
 352                                             void *user_data, hb_destroy_func_t destroy);
 353 
 354 /**
 355  * hb_unicode_funcs_set_mirroring_func:
 356  * @ufuncs: a Unicode function structure
 357  * @func: (closure user_data) (destroy destroy) (scope notified):
 358  * @user_data:
 359  * @destroy:
 360  *
 361  *
 362  *
 363  * Since: 0.9.2
 364  **/
 365 void
 366 hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
 367                                      hb_unicode_mirroring_func_t func,
 368                                      void *user_data, hb_destroy_func_t destroy);
 369 
 370 /**
 371  * hb_unicode_funcs_set_script_func:
 372  * @ufuncs: a Unicode function structure
 373  * @func: (closure user_data) (destroy destroy) (scope notified):
 374  * @user_data:
 375  * @destroy:
 376  *
 377  *
 378  *
 379  * Since: 0.9.2
 380  **/
 381 void
 382 hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
 383                                   hb_unicode_script_func_t func,
 384                                   void *user_data, hb_destroy_func_t destroy);
 385 
 386 /**
 387  * hb_unicode_funcs_set_compose_func:
 388  * @ufuncs: a Unicode function structure
 389  * @func: (closure user_data) (destroy destroy) (scope notified):
 390  * @user_data:
 391  * @destroy:
 392  *
 393  *
 394  *
 395  * Since: 0.9.2
 396  **/
 397 void
 398 hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
 399                                    hb_unicode_compose_func_t func,
 400                                    void *user_data, hb_destroy_func_t destroy);
 401 
 402 /**
 403  * hb_unicode_funcs_set_decompose_func:
 404  * @ufuncs: a Unicode function structure
 405  * @func: (closure user_data) (destroy destroy) (scope notified):
 406  * @user_data:
 407  * @destroy:
 408  *
 409  *
 410  *
 411  * Since: 0.9.2
 412  **/
 413 void
 414 hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
 415                                      hb_unicode_decompose_func_t func,
 416                                      void *user_data, hb_destroy_func_t destroy);
 417 
 418 /**
 419  * hb_unicode_funcs_set_decompose_compatibility_func:
 420  * @ufuncs: a Unicode function structure
 421  * @func: (closure user_data) (destroy destroy) (scope notified):
 422  * @user_data:
 423  * @destroy:
 424  *
 425  *
 426  *
 427  * Since: 0.9.2
 428  **/
 429 void
 430 hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
 431                                                    hb_unicode_decompose_compatibility_func_t func,
 432                                                    void *user_data, hb_destroy_func_t destroy);
 433 
 434 /* accessors */
 435 
 436 /**
 437  * Since: 0.9.2
 438  **/
 439 hb_unicode_combining_class_t
 440 hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
 441                             hb_codepoint_t unicode);
 442 
 443 /**
 444  * Since: 0.9.2
 445  **/
 446 unsigned int
 447 hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
 448                             hb_codepoint_t unicode);
 449 
 450 /**
 451  * Since: 0.9.2
 452  **/
 453 hb_unicode_general_category_t
 454 hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
 455                              hb_codepoint_t unicode);
 456 
 457 /**
 458  * Since: 0.9.2
 459  **/
 460 hb_codepoint_t
 461 hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
 462                       hb_codepoint_t unicode);
 463 
 464 /**
 465  * Since: 0.9.2
 466  **/
 467 hb_script_t
 468 hb_unicode_script (hb_unicode_funcs_t *ufuncs,
 469                    hb_codepoint_t unicode);
 470 
 471 /**
 472  * Since: 0.9.2
 473  **/
 474 hb_bool_t
 475 hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
 476                     hb_codepoint_t      a,
 477                     hb_codepoint_t      b,
 478                     hb_codepoint_t     *ab);
 479 
 480 /**
 481  * Since: 0.9.2
 482  **/
 483 hb_bool_t
 484 hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
 485                       hb_codepoint_t      ab,
 486                       hb_codepoint_t     *a,
 487                       hb_codepoint_t     *b);
 488 
 489 /**
 490  * Since: 0.9.2
 491  **/
 492 unsigned int
 493 hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
 494                                     hb_codepoint_t      u,
 495                                     hb_codepoint_t     *decomposed);
 496 
 497 HB_END_DECLS
 498 
 499 #endif /* HB_UNICODE_H */