1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 /*
  27  *
  28  * (C) Copyright IBM Corp. 1998-2013 - All Rights Reserved
  29  *
  30  */
  31 
  32 #ifndef __INDICREORDERING_H
  33 #define __INDICREORDERING_H
  34 
  35 /**
  36  * \file
  37  * \internal
  38  */
  39 
  40 #include "LETypes.h"
  41 #include "OpenTypeTables.h"
  42 
  43 U_NAMESPACE_BEGIN
  44 
  45 // Characters that get referred to by name...
  46 #define C_SIGN_ZWNJ           0x200C
  47 #define C_SIGN_ZWJ            0x200D
  48 
  49 // Character class values
  50 #define CC_RESERVED               0U
  51 #define CC_VOWEL_MODIFIER         1U
  52 #define CC_STRESS_MARK            2U
  53 #define CC_INDEPENDENT_VOWEL      3U
  54 #define CC_INDEPENDENT_VOWEL_2    4U
  55 #define CC_INDEPENDENT_VOWEL_3    5U
  56 #define CC_CONSONANT              6U
  57 #define CC_CONSONANT_WITH_NUKTA   7U
  58 #define CC_NUKTA                  8U
  59 #define CC_DEPENDENT_VOWEL        9U
  60 #define CC_SPLIT_VOWEL_PIECE_1   10U
  61 #define CC_SPLIT_VOWEL_PIECE_2   11U
  62 #define CC_SPLIT_VOWEL_PIECE_3   12U
  63 #define CC_VIRAMA                13U
  64 #define CC_ZERO_WIDTH_MARK       14U
  65 #define CC_AL_LAKUNA             15U
  66 #define CC_COUNT                 16U
  67 
  68 // Character class flags
  69 #define CF_CLASS_MASK    0x0000FFFFU
  70 
  71 #define CF_CONSONANT     0x80000000U
  72 
  73 #define CF_REPH          0x40000000U
  74 #define CF_VATTU         0x20000000U
  75 #define CF_BELOW_BASE    0x10000000U
  76 #define CF_POST_BASE     0x08000000U
  77 #define CF_LENGTH_MARK   0x04000000U
  78 #define CF_PRE_BASE      0x02000000U
  79 
  80 #define CF_POS_BEFORE    0x00300000U
  81 #define CF_POS_BELOW     0x00200000U
  82 #define CF_POS_ABOVE     0x00100000U
  83 #define CF_POS_AFTER     0x00000000U
  84 #define CF_POS_MASK      0x00300000U
  85 
  86 #define CF_INDEX_MASK    0x000F0000U
  87 #define CF_INDEX_SHIFT   16
  88 
  89 // Script flag bits
  90 #define SF_MATRAS_AFTER_BASE     0x80000000U
  91 #define SF_REPH_AFTER_BELOW      0x40000000U
  92 #define SF_EYELASH_RA            0x20000000U
  93 #define SF_MPRE_FIXUP            0x10000000U
  94 #define SF_FILTER_ZERO_WIDTH     0x08000000U
  95 
  96 #define SF_POST_BASE_LIMIT_MASK  0x0000FFFFU
  97 #define SF_NO_POST_BASE_LIMIT    0x00007FFFU
  98 
  99 #define SM_MAX_PIECES 3
 100 
 101 typedef LEUnicode SplitMatra[SM_MAX_PIECES];
 102 
 103 class MPreFixups;
 104 class LEGlyphStorage;
 105 
 106 // Dynamic Properties ( v2 fonts only )
 107 typedef le_uint32 DynamicProperties;
 108 
 109 #define DP_REPH               0x80000000U
 110 #define DP_HALF               0x40000000U
 111 #define DP_PREF               0x20000000U
 112 #define DP_BLWF               0x10000000U
 113 #define DP_PSTF               0x08000000U
 114 
 115 struct IndicClassTable
 116 {
 117     typedef le_uint32 CharClass;
 118     typedef le_uint32 ScriptFlags;
 119 
 120     LEUnicode firstChar;
 121     LEUnicode lastChar;
 122     le_int32 worstCaseExpansion;
 123     ScriptFlags scriptFlags;
 124     const CharClass *classTable;
 125     const SplitMatra *splitMatraTable;
 126 
 127     inline le_int32 getWorstCaseExpansion() const;
 128     inline le_bool getFilterZeroWidth() const;
 129 
 130     CharClass getCharClass(LEUnicode ch) const;
 131 
 132     inline const SplitMatra *getSplitMatra(CharClass charClass) const;
 133 
 134     inline le_bool isVowelModifier(LEUnicode ch) const;
 135     inline le_bool isStressMark(LEUnicode ch) const;
 136     inline le_bool isConsonant(LEUnicode ch) const;
 137     inline le_bool isReph(LEUnicode ch) const;
 138     inline le_bool isVirama(LEUnicode ch) const;
 139     inline le_bool isAlLakuna(LEUnicode ch) const;
 140     inline le_bool isNukta(LEUnicode ch) const;
 141     inline le_bool isVattu(LEUnicode ch) const;
 142     inline le_bool isMatra(LEUnicode ch) const;
 143     inline le_bool isSplitMatra(LEUnicode ch) const;
 144     inline le_bool isLengthMark(LEUnicode ch) const;
 145     inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
 146     inline le_bool hasPostBaseForm(LEUnicode ch) const;
 147     inline le_bool hasBelowBaseForm(LEUnicode ch) const;
 148     inline le_bool hasAboveBaseForm(LEUnicode ch) const;
 149     inline le_bool hasPreBaseForm(LEUnicode ch) const;
 150 
 151     inline static le_bool isVowelModifier(CharClass charClass);
 152     inline static le_bool isStressMark(CharClass charClass);
 153     inline static le_bool isConsonant(CharClass charClass);
 154     inline static le_bool isReph(CharClass charClass);
 155     inline static le_bool isVirama(CharClass charClass);
 156     inline static le_bool isAlLakuna(CharClass charClass);
 157     inline static le_bool isNukta(CharClass charClass);
 158     inline static le_bool isVattu(CharClass charClass);
 159     inline static le_bool isMatra(CharClass charClass);
 160     inline static le_bool isSplitMatra(CharClass charClass);
 161     inline static le_bool isLengthMark(CharClass charClass);
 162     inline static le_bool hasPostOrBelowBaseForm(CharClass charClass);
 163     inline static le_bool hasPostBaseForm(CharClass charClass);
 164     inline static le_bool hasBelowBaseForm(CharClass charClass);
 165     inline static le_bool hasAboveBaseForm(CharClass charClass);
 166     inline static le_bool hasPreBaseForm(CharClass charClass);
 167 
 168     static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
 169 };
 170 
 171 class IndicReordering /* not : public UObject because all methods are static */ {
 172 public:
 173     static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
 174 
 175     static le_bool getFilterZeroWidth(le_int32 scriptCode);
 176 
 177     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
 178         LEUnicode *outChars, LEGlyphStorage &glyphStorage,
 179         MPreFixups **outMPreFixups, LEErrorCode& success);
 180 
 181     static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success);
 182 
 183     static le_int32 v2process(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
 184         LEUnicode *outChars, LEGlyphStorage &glyphStorage, LEErrorCode& success);
 185 
 186     static const FeatureMap *getFeatureMap(le_int32 &count);
 187 
 188         static const FeatureMap *getv2FeatureMap(le_int32 &count);
 189 
 190     static void applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count);
 191 
 192     static void finalReordering(LEGlyphStorage &glyphStorage, le_int32 count);
 193 
 194     static void getDynamicProperties(DynamicProperties *dProps, const IndicClassTable *classTable);
 195 
 196 private:
 197     // do not instantiate
 198     IndicReordering();
 199 
 200     static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
 201 
 202 };
 203 
 204 inline le_int32 IndicClassTable::getWorstCaseExpansion() const
 205 {
 206     return worstCaseExpansion;
 207 }
 208 
 209 inline le_bool IndicClassTable::getFilterZeroWidth() const
 210 {
 211     return (scriptFlags & SF_FILTER_ZERO_WIDTH) != 0;
 212 }
 213 
 214 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
 215 {
 216     le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
 217 
 218     return &splitMatraTable[index - 1];
 219 }
 220 
 221 inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
 222 {
 223     return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
 224 }
 225 
 226 inline le_bool IndicClassTable::isStressMark(CharClass charClass)
 227 {
 228     return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
 229 }
 230 
 231 inline le_bool IndicClassTable::isConsonant(CharClass charClass)
 232 {
 233     return (charClass & CF_CONSONANT) != 0;
 234 }
 235 
 236 inline le_bool IndicClassTable::isReph(CharClass charClass)
 237 {
 238     return (charClass & CF_REPH) != 0;
 239 }
 240 
 241 inline le_bool IndicClassTable::isNukta(CharClass charClass)
 242 {
 243     return (charClass & CF_CLASS_MASK) == CC_NUKTA;
 244 }
 245 
 246 inline le_bool IndicClassTable::isVirama(CharClass charClass)
 247 {
 248     return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
 249 }
 250 
 251 inline le_bool IndicClassTable::isAlLakuna(CharClass charClass)
 252 {
 253     return (charClass & CF_CLASS_MASK) == CC_AL_LAKUNA;
 254 }
 255 
 256 inline le_bool IndicClassTable::isVattu(CharClass charClass)
 257 {
 258     return (charClass & CF_VATTU) != 0;
 259 }
 260 
 261 inline le_bool IndicClassTable::isMatra(CharClass charClass)
 262 {
 263     charClass &= CF_CLASS_MASK;
 264 
 265     return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3;
 266 }
 267 
 268 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
 269 {
 270     return (charClass & CF_INDEX_MASK) != 0;
 271 }
 272 
 273 inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
 274 {
 275     return (charClass & CF_LENGTH_MARK) != 0;
 276 }
 277 
 278 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
 279 {
 280     return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
 281 }
 282 
 283 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
 284 {
 285     return (charClass & CF_POST_BASE) != 0;
 286 }
 287 
 288 inline le_bool IndicClassTable::hasPreBaseForm(CharClass charClass)
 289 {
 290     return (charClass & CF_PRE_BASE) != 0;
 291 }
 292 
 293 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
 294 {
 295     return (charClass & CF_BELOW_BASE) != 0;
 296 }
 297 
 298 inline le_bool IndicClassTable::hasAboveBaseForm(CharClass charClass)
 299 {
 300     return ((charClass & CF_POS_MASK) == CF_POS_ABOVE);
 301 }
 302 
 303 inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
 304 {
 305     return isVowelModifier(getCharClass(ch));
 306 }
 307 
 308 inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
 309 {
 310     return isStressMark(getCharClass(ch));
 311 }
 312 
 313 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
 314 {
 315     return isConsonant(getCharClass(ch));
 316 }
 317 
 318 inline le_bool IndicClassTable::isReph(LEUnicode ch) const
 319 {
 320     return isReph(getCharClass(ch));
 321 }
 322 
 323 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
 324 {
 325     return isVirama(getCharClass(ch));
 326 }
 327 
 328 inline le_bool IndicClassTable::isAlLakuna(LEUnicode ch) const
 329 {
 330     return isAlLakuna(getCharClass(ch));
 331 }
 332 
 333 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
 334 {
 335     return isNukta(getCharClass(ch));
 336 }
 337 
 338 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
 339 {
 340     return isVattu(getCharClass(ch));
 341 }
 342 
 343 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
 344 {
 345     return isMatra(getCharClass(ch));
 346 }
 347 
 348 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
 349 {
 350     return isSplitMatra(getCharClass(ch));
 351 }
 352 
 353 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
 354 {
 355     return isLengthMark(getCharClass(ch));
 356 }
 357 
 358 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
 359 {
 360     return hasPostOrBelowBaseForm(getCharClass(ch));
 361 }
 362 
 363 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
 364 {
 365     return hasPostBaseForm(getCharClass(ch));
 366 }
 367 
 368 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
 369 {
 370     return hasBelowBaseForm(getCharClass(ch));
 371 }
 372 
 373 inline le_bool IndicClassTable::hasPreBaseForm(LEUnicode ch) const
 374 {
 375     return hasPreBaseForm(getCharClass(ch));
 376 }
 377 
 378 inline le_bool IndicClassTable::hasAboveBaseForm(LEUnicode ch) const
 379 {
 380     return hasAboveBaseForm(getCharClass(ch));
 381 }
 382 U_NAMESPACE_END
 383 #endif