1 /*
   2  * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.awt.font;
  27 
  28 import java.io.IOException;
  29 import java.io.ObjectOutputStream;
  30 import java.util.Arrays;
  31 import java.util.Comparator;
  32 import java.util.EnumSet;
  33 import java.util.Set;
  34 import sun.misc.SharedSecrets;
  35 
  36 /**
  37  * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
  38  * digits to other Unicode decimal digits.  Users of this class will
  39  * primarily be people who wish to present data using
  40  * national digit shapes, but find it more convenient to represent the
  41  * data internally using Latin-1 (European) digits.  This does not
  42  * interpret the deprecated numeric shape selector character (U+206E).
  43  * <p>
  44  * Instances of <code>NumericShaper</code> are typically applied
  45  * as attributes to text with the
  46  * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
  47  * of the <code>TextAttribute</code> class.
  48  * For example, this code snippet causes a <code>TextLayout</code> to
  49  * shape European digits to Arabic in an Arabic context:<br>
  50  * <blockquote><pre>
  51  * Map map = new HashMap();
  52  * map.put(TextAttribute.NUMERIC_SHAPING,
  53  *     NumericShaper.getContextualShaper(NumericShaper.ARABIC));
  54  * FontRenderContext frc = ...;
  55  * TextLayout layout = new TextLayout(text, map, frc);
  56  * layout.draw(g2d, x, y);
  57  * </pre></blockquote>
  58  * <br>
  59  * It is also possible to perform numeric shaping explicitly using instances
  60  * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
  61  * <blockquote><pre>
  62  * char[] text = ...;
  63  * // shape all EUROPEAN digits (except zero) to ARABIC digits
  64  * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
  65  * shaper.shape(text, start, count);
  66  *
  67  * // shape European digits to ARABIC digits if preceding text is Arabic, or
  68  * // shape European digits to TAMIL digits if preceding text is Tamil, or
  69  * // leave European digits alone if there is no preceding text, or
  70  * // preceding text is neither Arabic nor Tamil
  71  * NumericShaper shaper =
  72  *     NumericShaper.getContextualShaper(NumericShaper.ARABIC |
  73  *                                         NumericShaper.TAMIL,
  74  *                                       NumericShaper.EUROPEAN);
  75  * shaper.shape(text, start, count);
  76  * </pre></blockquote>
  77  *
  78  * <p><b>Bit mask- and enum-based Unicode ranges</b></p>
  79  *
  80  * <p>This class supports two different programming interfaces to
  81  * represent Unicode ranges for script-specific digits: bit
  82  * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
  83  * enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
  84  * Multiple ranges can be specified by ORing bit mask-based constants,
  85  * such as:
  86  * <blockquote><pre>
  87  * NumericShaper.ARABIC | NumericShaper.TAMIL
  88  * </pre></blockquote>
  89  * or creating a {@code Set} with the {@link NumericShaper.Range}
  90  * constants, such as:
  91  * <blockquote><pre>
  92  * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
  93  * </pre></blockquote>
  94  * The enum-based ranges are a super set of the bit mask-based ones.
  95  *
  96  * <p>If the two interfaces are mixed (including serialization),
  97  * Unicode range values are mapped to their counterparts where such
  98  * mapping is possible, such as {@code NumericShaper.Range.ARABIC}
  99  * from/to {@code NumericShaper.ARABIC}.  If any unmappable range
 100  * values are specified, such as {@code NumericShaper.Range.BALINESE},
 101  * those ranges are ignored.
 102  *
 103  * <p><b>Decimal Digits Precedence</b></p>
 104  *
 105  * <p>A Unicode range may have more than one set of decimal digits. If
 106  * multiple decimal digits sets are specified for the same Unicode
 107  * range, one of the sets will take precedence as follows.
 108  *
 109  * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence.">
 110  *    <tr>
 111  *       <th class="TableHeadingColor">Unicode Range</th>
 112  *       <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th>
 113  *       <th class="TableHeadingColor">Precedence</th>
 114  *    </tr>
 115  *    <tr>
 116  *       <td rowspan="2">Arabic</td>
 117  *       <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br>
 118  *           {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
 119  *       <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td>
 120  *    </tr>
 121  *    <tr>
 122  *       <td>{@link NumericShaper.Range#ARABIC}<br>
 123  *           {@link NumericShaper.Range#EASTERN_ARABIC}</td>
 124  *       <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td>
 125  *    </tr>
 126  *    <tr>
 127  *       <td>Tai Tham</td>
 128  *       <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br>
 129  *           {@link NumericShaper.Range#TAI_THAM_THAM}</td>
 130  *       <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td>
 131  *    </tr>
 132  * </table>
 133  *
 134  * @since 1.4
 135  */
 136 
 137 public final class NumericShaper implements java.io.Serializable {
 138 
 139     // For access from java.text.Bidi
 140     static {
 141         if (SharedSecrets.getJavaAWTFontAccess() == null) {
 142             SharedSecrets.setJavaAWTFontAccess(new JavaAWTFontAccessImpl());
 143         }
 144     }
 145 
 146     /**
 147      * A {@code NumericShaper.Range} represents a Unicode range of a
 148      * script having its own decimal digits. For example, the {@link
 149      * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
 150      * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
 151      *
 152      * <p>The <code>Range</code> enum replaces the traditional bit
 153      * mask-based values (e.g., {@link NumericShaper#ARABIC}), and
 154      * supports more Unicode ranges than the bit mask-based ones. For
 155      * example, the following code using the bit mask:
 156      * <blockquote><pre>
 157      * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
 158      *                                     NumericShaper.TAMIL,
 159      *                                   NumericShaper.EUROPEAN);
 160      * </pre></blockquote>
 161      * can be written using this enum as:
 162      * <blockquote><pre>
 163      * NumericShaper.getContextualShaper(EnumSet.of(
 164      *                                     NumericShaper.Range.ARABIC,
 165      *                                     NumericShaper.Range.TAMIL),
 166      *                                   NumericShaper.Range.EUROPEAN);
 167      * </pre></blockquote>
 168      *
 169      * @since 1.7
 170      */
 171     public static enum Range {
 172         // The order of EUROPEAN to MOGOLIAN must be consistent
 173         // with the bitmask-based constants.
 174         /**
 175          * The Latin (European) range with the Latin (ASCII) digits.
 176          */
 177         EUROPEAN        ('\u0030', '\u0000', '\u0300'),
 178         /**
 179          * The Arabic range with the Arabic-Indic digits.
 180          */
 181         ARABIC          ('\u0660', '\u0600', '\u0780'),
 182         /**
 183          * The Arabic range with the Eastern Arabic-Indic digits.
 184          */
 185         EASTERN_ARABIC  ('\u06f0', '\u0600', '\u0780'),
 186         /**
 187          * The Devanagari range with the Devanagari digits.
 188          */
 189         DEVANAGARI      ('\u0966', '\u0900', '\u0980'),
 190         /**
 191          * The Bengali range with the Bengali digits.
 192          */
 193         BENGALI         ('\u09e6', '\u0980', '\u0a00'),
 194         /**
 195          * The Gurmukhi range with the Gurmukhi digits.
 196          */
 197         GURMUKHI        ('\u0a66', '\u0a00', '\u0a80'),
 198         /**
 199          * The Gujarati range with the Gujarati digits.
 200          */
 201         GUJARATI        ('\u0ae6', '\u0b00', '\u0b80'),
 202         /**
 203          * The Oriya range with the Oriya digits.
 204          */
 205         ORIYA           ('\u0b66', '\u0b00', '\u0b80'),
 206         /**
 207          * The Tamil range with the Tamil digits.
 208          */
 209         TAMIL           ('\u0be6', '\u0b80', '\u0c00'),
 210         /**
 211          * The Telugu range with the Telugu digits.
 212          */
 213         TELUGU          ('\u0c66', '\u0c00', '\u0c80'),
 214         /**
 215          * The Kannada range with the Kannada digits.
 216          */
 217         KANNADA         ('\u0ce6', '\u0c80', '\u0d00'),
 218         /**
 219          * The Malayalam range with the Malayalam digits.
 220          */
 221         MALAYALAM       ('\u0d66', '\u0d00', '\u0d80'),
 222         /**
 223          * The Thai range with the Thai digits.
 224          */
 225         THAI            ('\u0e50', '\u0e00', '\u0e80'),
 226         /**
 227          * The Lao range with the Lao digits.
 228          */
 229         LAO             ('\u0ed0', '\u0e80', '\u0f00'),
 230         /**
 231          * The Tibetan range with the Tibetan digits.
 232          */
 233         TIBETAN         ('\u0f20', '\u0f00', '\u1000'),
 234         /**
 235          * The Myanmar range with the Myanmar digits.
 236          */
 237         MYANMAR         ('\u1040', '\u1000', '\u1080'),
 238         /**
 239          * The Ethiopic range with the Ethiopic digits. Ethiopic
 240          * does not have a decimal digit 0 so Latin (European) 0 is
 241          * used.
 242          */
 243         ETHIOPIC        ('\u1369', '\u1200', '\u1380') {
 244             @Override
 245             char getNumericBase() { return 1; }
 246         },
 247         /**
 248          * The Khmer range with the Khmer digits.
 249          */
 250         KHMER           ('\u17e0', '\u1780', '\u1800'),
 251         /**
 252          * The Mongolian range with the Mongolian digits.
 253          */
 254         MONGOLIAN       ('\u1810', '\u1800', '\u1900'),
 255         // The order of EUROPEAN to MOGOLIAN must be consistent
 256         // with the bitmask-based constants.
 257 
 258         /**
 259          * The N'Ko range with the N'Ko digits.
 260          */
 261         NKO             ('\u07c0', '\u07c0', '\u0800'),
 262         /**
 263          * The Myanmar range with the Myanmar Shan digits.
 264          */
 265         MYANMAR_SHAN    ('\u1090', '\u1000', '\u10a0'),
 266         /**
 267          * The Limbu range with the Limbu digits.
 268          */
 269         LIMBU           ('\u1946', '\u1900', '\u1950'),
 270         /**
 271          * The New Tai Lue range with the New Tai Lue digits.
 272          */
 273         NEW_TAI_LUE     ('\u19d0', '\u1980', '\u19e0'),
 274         /**
 275          * The Balinese range with the Balinese digits.
 276          */
 277         BALINESE        ('\u1b50', '\u1b00', '\u1b80'),
 278         /**
 279          * The Sundanese range with the Sundanese digits.
 280          */
 281         SUNDANESE       ('\u1bb0', '\u1b80', '\u1bc0'),
 282         /**
 283          * The Lepcha range with the Lepcha digits.
 284          */
 285         LEPCHA          ('\u1c40', '\u1c00', '\u1c50'),
 286         /**
 287          * The Ol Chiki range with the Ol Chiki digits.
 288          */
 289         OL_CHIKI        ('\u1c50', '\u1c50', '\u1c80'),
 290         /**
 291          * The Vai range with the Vai digits.
 292          */
 293         VAI             ('\ua620', '\ua500', '\ua640'),
 294         /**
 295          * The Saurashtra range with the Saurashtra digits.
 296          */
 297         SAURASHTRA      ('\ua8d0', '\ua880', '\ua8e0'),
 298         /**
 299          * The Kayah Li range with the Kayah Li digits.
 300          */
 301         KAYAH_LI        ('\ua900', '\ua900', '\ua930'),
 302         /**
 303          * The Cham range with the Cham digits.
 304          */
 305         CHAM            ('\uaa50', '\uaa00', '\uaa60'),
 306         /**
 307          * The Tai Tham Hora range with the Tai Tham Hora digits.
 308          */
 309         TAI_THAM_HORA   ('\u1a80', '\u1a20', '\u1ab0'),
 310         /**
 311          * The Tai Tham Tham range with the Tai Tham Tham digits.
 312          */
 313         TAI_THAM_THAM   ('\u1a90', '\u1a20', '\u1ab0'),
 314         /**
 315          * The Javanese range with the Javanese digits.
 316          */
 317         JAVANESE        ('\ua9d0', '\ua980', '\ua9e0'),
 318         /**
 319          * The Meetei Mayek range with the Meetei Mayek digits.
 320          */
 321         MEETEI_MAYEK    ('\uabf0', '\uabc0', '\uac00');
 322 
 323         private static int toRangeIndex(Range script) {
 324             int index = script.ordinal();
 325             return index < NUM_KEYS ? index : -1;
 326         }
 327 
 328         private static Range indexToRange(int index) {
 329             return index < NUM_KEYS ? Range.values()[index] : null;
 330         }
 331 
 332         private static int toRangeMask(Set<Range> ranges) {
 333             int m = 0;
 334             for (Range range : ranges) {
 335                 int index = range.ordinal();
 336                 if (index < NUM_KEYS) {
 337                     m |= 1 << index;
 338                 }
 339             }
 340             return m;
 341         }
 342 
 343         private static Set<Range> maskToRangeSet(int mask) {
 344             Set<Range> set = EnumSet.noneOf(Range.class);
 345             Range[] a = Range.values();
 346             for (int i = 0; i < NUM_KEYS; i++) {
 347                 if ((mask & (1 << i)) != 0) {
 348                     set.add(a[i]);
 349                 }
 350             }
 351             return set;
 352         }
 353 
 354         // base character of range digits
 355         private final int base;
 356         // Unicode range
 357         private final int start, // inclusive
 358                           end;   // exclusive
 359 
 360         private Range(int base, int start, int end) {
 361             this.base = base - ('0' + getNumericBase());
 362             this.start = start;
 363             this.end = end;
 364         }
 365 
 366         private int getDigitBase() {
 367             return base;
 368         }
 369 
 370         char getNumericBase() {
 371             return 0;
 372         }
 373 
 374         private boolean inRange(int c) {
 375             return start <= c && c < end;
 376         }
 377     }
 378 
 379     /** index of context for contextual shaping - values range from 0 to 18 */
 380     private int key;
 381 
 382     /** flag indicating whether to shape contextually (high bit) and which
 383      *  digit ranges to shape (bits 0-18)
 384      */
 385     private int mask;
 386 
 387     /**
 388      * The context {@code Range} for contextual shaping or the {@code
 389      * Range} for non-contextual shaping. {@code null} for the bit
 390      * mask-based API.
 391      *
 392      * @since 1.7
 393      */
 394     private Range shapingRange;
 395 
 396     /**
 397      * {@code Set<Range>} indicating which Unicode ranges to
 398      * shape. {@code null} for the bit mask-based API.
 399      */
 400     private transient Set<Range> rangeSet;
 401 
 402     /**
 403      * rangeSet.toArray() value. Sorted by Range.base when the number
 404      * of elements is greater then BSEARCH_THRESHOLD.
 405      */
 406     private transient Range[] rangeArray;
 407 
 408     /**
 409      * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
 410      */
 411     private static final int BSEARCH_THRESHOLD = 3;
 412 
 413     private static final long serialVersionUID = -8022764705923730308L;
 414 
 415     /** Identifies the Latin-1 (European) and extended range, and
 416      *  Latin-1 (European) decimal base.
 417      */
 418     public static final int EUROPEAN = 1<<0;
 419 
 420     /** Identifies the ARABIC range and decimal base. */
 421     public static final int ARABIC = 1<<1;
 422 
 423     /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
 424     public static final int EASTERN_ARABIC = 1<<2;
 425 
 426     /** Identifies the DEVANAGARI range and decimal base. */
 427     public static final int DEVANAGARI = 1<<3;
 428 
 429     /** Identifies the BENGALI range and decimal base. */
 430     public static final int BENGALI = 1<<4;
 431 
 432     /** Identifies the GURMUKHI range and decimal base. */
 433     public static final int GURMUKHI = 1<<5;
 434 
 435     /** Identifies the GUJARATI range and decimal base. */
 436     public static final int GUJARATI = 1<<6;
 437 
 438     /** Identifies the ORIYA range and decimal base. */
 439     public static final int ORIYA = 1<<7;
 440 
 441     /** Identifies the TAMIL range and decimal base. */
 442     // TAMIL DIGIT ZERO was added in Unicode 4.1
 443     public static final int TAMIL = 1<<8;
 444 
 445     /** Identifies the TELUGU range and decimal base. */
 446     public static final int TELUGU = 1<<9;
 447 
 448     /** Identifies the KANNADA range and decimal base. */
 449     public static final int KANNADA = 1<<10;
 450 
 451     /** Identifies the MALAYALAM range and decimal base. */
 452     public static final int MALAYALAM = 1<<11;
 453 
 454     /** Identifies the THAI range and decimal base. */
 455     public static final int THAI = 1<<12;
 456 
 457     /** Identifies the LAO range and decimal base. */
 458     public static final int LAO = 1<<13;
 459 
 460     /** Identifies the TIBETAN range and decimal base. */
 461     public static final int TIBETAN = 1<<14;
 462 
 463     /** Identifies the MYANMAR range and decimal base. */
 464     public static final int MYANMAR = 1<<15;
 465 
 466     /** Identifies the ETHIOPIC range and decimal base. */
 467     public static final int ETHIOPIC = 1<<16;
 468 
 469     /** Identifies the KHMER range and decimal base. */
 470     public static final int KHMER = 1<<17;
 471 
 472     /** Identifies the MONGOLIAN range and decimal base. */
 473     public static final int MONGOLIAN = 1<<18;
 474 
 475     /** Identifies all ranges, for full contextual shaping.
 476      *
 477      * <p>This constant specifies all of the bit mask-based
 478      * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
 479      * specify all of the enum-based ranges.
 480      */
 481     public static final int ALL_RANGES = 0x0007ffff;
 482 
 483     private static final int EUROPEAN_KEY = 0;
 484     private static final int ARABIC_KEY = 1;
 485     private static final int EASTERN_ARABIC_KEY = 2;
 486     private static final int DEVANAGARI_KEY = 3;
 487     private static final int BENGALI_KEY = 4;
 488     private static final int GURMUKHI_KEY = 5;
 489     private static final int GUJARATI_KEY = 6;
 490     private static final int ORIYA_KEY = 7;
 491     private static final int TAMIL_KEY = 8;
 492     private static final int TELUGU_KEY = 9;
 493     private static final int KANNADA_KEY = 10;
 494     private static final int MALAYALAM_KEY = 11;
 495     private static final int THAI_KEY = 12;
 496     private static final int LAO_KEY = 13;
 497     private static final int TIBETAN_KEY = 14;
 498     private static final int MYANMAR_KEY = 15;
 499     private static final int ETHIOPIC_KEY = 16;
 500     private static final int KHMER_KEY = 17;
 501     private static final int MONGOLIAN_KEY = 18;
 502 
 503     private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
 504 
 505     private static final int CONTEXTUAL_MASK = 1<<31;
 506 
 507     private static final char[] bases = {
 508         '\u0030' - '\u0030', // EUROPEAN
 509         '\u0660' - '\u0030', // ARABIC-INDIC
 510         '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
 511         '\u0966' - '\u0030', // DEVANAGARI
 512         '\u09e6' - '\u0030', // BENGALI
 513         '\u0a66' - '\u0030', // GURMUKHI
 514         '\u0ae6' - '\u0030', // GUJARATI
 515         '\u0b66' - '\u0030', // ORIYA
 516         '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
 517         '\u0c66' - '\u0030', // TELUGU
 518         '\u0ce6' - '\u0030', // KANNADA
 519         '\u0d66' - '\u0030', // MALAYALAM
 520         '\u0e50' - '\u0030', // THAI
 521         '\u0ed0' - '\u0030', // LAO
 522         '\u0f20' - '\u0030', // TIBETAN
 523         '\u1040' - '\u0030', // MYANMAR
 524         '\u1369' - '\u0031', // ETHIOPIC - no zero
 525         '\u17e0' - '\u0030', // KHMER
 526         '\u1810' - '\u0030', // MONGOLIAN
 527     };
 528 
 529     // some ranges adjoin or overlap, rethink if we want to do a binary search on this
 530 
 531     private static final char[] contexts = {
 532         '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
 533         '\u0600', '\u0780', // ARABIC
 534         '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
 535         '\u0900', '\u0980', // DEVANAGARI
 536         '\u0980', '\u0a00', // BENGALI
 537         '\u0a00', '\u0a80', // GURMUKHI
 538         '\u0a80', '\u0b00', // GUJARATI
 539         '\u0b00', '\u0b80', // ORIYA
 540         '\u0b80', '\u0c00', // TAMIL
 541         '\u0c00', '\u0c80', // TELUGU
 542         '\u0c80', '\u0d00', // KANNADA
 543         '\u0d00', '\u0d80', // MALAYALAM
 544         '\u0e00', '\u0e80', // THAI
 545         '\u0e80', '\u0f00', // LAO
 546         '\u0f00', '\u1000', // TIBETAN
 547         '\u1000', '\u1080', // MYANMAR
 548         '\u1200', '\u1380', // ETHIOPIC - note missing zero
 549         '\u1780', '\u1800', // KHMER
 550         '\u1800', '\u1900', // MONGOLIAN
 551         '\uffff',
 552     };
 553 
 554     // assume most characters are near each other so probing the cache is infrequent,
 555     // and a linear probe is ok.
 556 
 557     private static int ctCache = 0;
 558     private static int ctCacheLimit = contexts.length - 2;
 559 
 560     // warning, synchronize access to this as it modifies state
 561     private static int getContextKey(char c) {
 562         if (c < contexts[ctCache]) {
 563             while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
 564         } else if (c >= contexts[ctCache + 1]) {
 565             while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
 566         }
 567 
 568         // if we're not in a known range, then return EUROPEAN as the range key
 569         return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
 570     }
 571 
 572     // cache for the NumericShaper.Range version
 573     private transient volatile Range currentRange = Range.EUROPEAN;
 574 
 575     private Range rangeForCodePoint(final int codepoint) {
 576         if (currentRange.inRange(codepoint)) {
 577             return currentRange;
 578         }
 579 
 580         final Range[] ranges = rangeArray;
 581         if (ranges.length > BSEARCH_THRESHOLD) {
 582             int lo = 0;
 583             int hi = ranges.length - 1;
 584             while (lo <= hi) {
 585                 int mid = (lo + hi) / 2;
 586                 Range range = ranges[mid];
 587                 if (codepoint < range.start) {
 588                     hi = mid - 1;
 589                 } else if (codepoint >= range.end) {
 590                     lo = mid + 1;
 591                 } else {
 592                     currentRange = range;
 593                     return range;
 594                 }
 595             }
 596         } else {
 597             for (int i = 0; i < ranges.length; i++) {
 598                 if (ranges[i].inRange(codepoint)) {
 599                     return ranges[i];
 600                 }
 601             }
 602         }
 603         return Range.EUROPEAN;
 604     }
 605 
 606     /*
 607      * A range table of strong directional characters (types L, R, AL).
 608      * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
 609      * characters, odd (right) indexes are starts of ranges of strong directional
 610      * characters.
 611      */
 612     private static int[] strongTable = {
 613         0x0000, 0x0041,
 614         0x005b, 0x0061,
 615         0x007b, 0x00aa,
 616         0x00ab, 0x00b5,
 617         0x00b6, 0x00ba,
 618         0x00bb, 0x00c0,
 619         0x00d7, 0x00d8,
 620         0x00f7, 0x00f8,
 621         0x02b9, 0x02bb,
 622         0x02c2, 0x02d0,
 623         0x02d2, 0x02e0,
 624         0x02e5, 0x02ee,
 625         0x02ef, 0x0370,
 626         0x0374, 0x0376,
 627         0x037e, 0x0386,
 628         0x0387, 0x0388,
 629         0x03f6, 0x03f7,
 630         0x0483, 0x048a,
 631         0x058a, 0x05be,
 632         0x05bf, 0x05c0,
 633         0x05c1, 0x05c3,
 634         0x05c4, 0x05c6,
 635         0x05c7, 0x05d0,
 636         0x0600, 0x0608,
 637         0x0609, 0x060b,
 638         0x060c, 0x060d,
 639         0x060e, 0x061b,
 640         0x064b, 0x066d,
 641         0x0670, 0x0671,
 642         0x06d6, 0x06e5,
 643         0x06e7, 0x06ee,
 644         0x06f0, 0x06fa,
 645         0x0711, 0x0712,
 646         0x0730, 0x074d,
 647         0x07a6, 0x07b1,
 648         0x07eb, 0x07f4,
 649         0x07f6, 0x07fa,
 650         0x0816, 0x081a,
 651         0x081b, 0x0824,
 652         0x0825, 0x0828,
 653         0x0829, 0x0830,
 654         0x0859, 0x085e,
 655         0x08e4, 0x0903,
 656         0x093a, 0x093b,
 657         0x093c, 0x093d,
 658         0x0941, 0x0949,
 659         0x094d, 0x094e,
 660         0x0951, 0x0958,
 661         0x0962, 0x0964,
 662         0x0981, 0x0982,
 663         0x09bc, 0x09bd,
 664         0x09c1, 0x09c7,
 665         0x09cd, 0x09ce,
 666         0x09e2, 0x09e6,
 667         0x09f2, 0x09f4,
 668         0x09fb, 0x0a03,
 669         0x0a3c, 0x0a3e,
 670         0x0a41, 0x0a59,
 671         0x0a70, 0x0a72,
 672         0x0a75, 0x0a83,
 673         0x0abc, 0x0abd,
 674         0x0ac1, 0x0ac9,
 675         0x0acd, 0x0ad0,
 676         0x0ae2, 0x0ae6,
 677         0x0af1, 0x0b02,
 678         0x0b3c, 0x0b3d,
 679         0x0b3f, 0x0b40,
 680         0x0b41, 0x0b47,
 681         0x0b4d, 0x0b57,
 682         0x0b62, 0x0b66,
 683         0x0b82, 0x0b83,
 684         0x0bc0, 0x0bc1,
 685         0x0bcd, 0x0bd0,
 686         0x0bf3, 0x0c01,
 687         0x0c3e, 0x0c41,
 688         0x0c46, 0x0c58,
 689         0x0c62, 0x0c66,
 690         0x0c78, 0x0c7f,
 691         0x0cbc, 0x0cbd,
 692         0x0ccc, 0x0cd5,
 693         0x0ce2, 0x0ce6,
 694         0x0d41, 0x0d46,
 695         0x0d4d, 0x0d4e,
 696         0x0d62, 0x0d66,
 697         0x0dca, 0x0dcf,
 698         0x0dd2, 0x0dd8,
 699         0x0e31, 0x0e32,
 700         0x0e34, 0x0e40,
 701         0x0e47, 0x0e4f,
 702         0x0eb1, 0x0eb2,
 703         0x0eb4, 0x0ebd,
 704         0x0ec8, 0x0ed0,
 705         0x0f18, 0x0f1a,
 706         0x0f35, 0x0f36,
 707         0x0f37, 0x0f38,
 708         0x0f39, 0x0f3e,
 709         0x0f71, 0x0f7f,
 710         0x0f80, 0x0f85,
 711         0x0f86, 0x0f88,
 712         0x0f8d, 0x0fbe,
 713         0x0fc6, 0x0fc7,
 714         0x102d, 0x1031,
 715         0x1032, 0x1038,
 716         0x1039, 0x103b,
 717         0x103d, 0x103f,
 718         0x1058, 0x105a,
 719         0x105e, 0x1061,
 720         0x1071, 0x1075,
 721         0x1082, 0x1083,
 722         0x1085, 0x1087,
 723         0x108d, 0x108e,
 724         0x109d, 0x109e,
 725         0x135d, 0x1360,
 726         0x1390, 0x13a0,
 727         0x1400, 0x1401,
 728         0x1680, 0x1681,
 729         0x169b, 0x16a0,
 730         0x1712, 0x1720,
 731         0x1732, 0x1735,
 732         0x1752, 0x1760,
 733         0x1772, 0x1780,
 734         0x17b4, 0x17b6,
 735         0x17b7, 0x17be,
 736         0x17c6, 0x17c7,
 737         0x17c9, 0x17d4,
 738         0x17db, 0x17dc,
 739         0x17dd, 0x17e0,
 740         0x17f0, 0x1810,
 741         0x18a9, 0x18aa,
 742         0x1920, 0x1923,
 743         0x1927, 0x1929,
 744         0x1932, 0x1933,
 745         0x1939, 0x1946,
 746         0x19de, 0x1a00,
 747         0x1a17, 0x1a19,
 748         0x1a56, 0x1a57,
 749         0x1a58, 0x1a61,
 750         0x1a62, 0x1a63,
 751         0x1a65, 0x1a6d,
 752         0x1a73, 0x1a80,
 753         0x1b00, 0x1b04,
 754         0x1b34, 0x1b35,
 755         0x1b36, 0x1b3b,
 756         0x1b3c, 0x1b3d,
 757         0x1b42, 0x1b43,
 758         0x1b6b, 0x1b74,
 759         0x1b80, 0x1b82,
 760         0x1ba2, 0x1ba6,
 761         0x1ba8, 0x1baa,
 762         0x1bab, 0x1bac,
 763         0x1be6, 0x1be7,
 764         0x1be8, 0x1bea,
 765         0x1bed, 0x1bee,
 766         0x1bef, 0x1bf2,
 767         0x1c2c, 0x1c34,
 768         0x1c36, 0x1c3b,
 769         0x1cd0, 0x1cd3,
 770         0x1cd4, 0x1ce1,
 771         0x1ce2, 0x1ce9,
 772         0x1ced, 0x1cee,
 773         0x1cf4, 0x1cf5,
 774         0x1dc0, 0x1e00,
 775         0x1fbd, 0x1fbe,
 776         0x1fbf, 0x1fc2,
 777         0x1fcd, 0x1fd0,
 778         0x1fdd, 0x1fe0,
 779         0x1fed, 0x1ff2,
 780         0x1ffd, 0x200e,
 781         0x2010, 0x2071,
 782         0x2074, 0x207f,
 783         0x2080, 0x2090,
 784         0x20a0, 0x2102,
 785         0x2103, 0x2107,
 786         0x2108, 0x210a,
 787         0x2114, 0x2115,
 788         0x2116, 0x2119,
 789         0x211e, 0x2124,
 790         0x2125, 0x2126,
 791         0x2127, 0x2128,
 792         0x2129, 0x212a,
 793         0x212e, 0x212f,
 794         0x213a, 0x213c,
 795         0x2140, 0x2145,
 796         0x214a, 0x214e,
 797         0x2150, 0x2160,
 798         0x2189, 0x2336,
 799         0x237b, 0x2395,
 800         0x2396, 0x249c,
 801         0x24ea, 0x26ac,
 802         0x26ad, 0x2800,
 803         0x2900, 0x2c00,
 804         0x2ce5, 0x2ceb,
 805         0x2cef, 0x2cf2,
 806         0x2cf9, 0x2d00,
 807         0x2d7f, 0x2d80,
 808         0x2de0, 0x3005,
 809         0x3008, 0x3021,
 810         0x302a, 0x3031,
 811         0x3036, 0x3038,
 812         0x303d, 0x3041,
 813         0x3099, 0x309d,
 814         0x30a0, 0x30a1,
 815         0x30fb, 0x30fc,
 816         0x31c0, 0x31f0,
 817         0x321d, 0x3220,
 818         0x3250, 0x3260,
 819         0x327c, 0x327f,
 820         0x32b1, 0x32c0,
 821         0x32cc, 0x32d0,
 822         0x3377, 0x337b,
 823         0x33de, 0x33e0,
 824         0x33ff, 0x3400,
 825         0x4dc0, 0x4e00,
 826         0xa490, 0xa4d0,
 827         0xa60d, 0xa610,
 828         0xa66f, 0xa680,
 829         0xa69f, 0xa6a0,
 830         0xa6f0, 0xa6f2,
 831         0xa700, 0xa722,
 832         0xa788, 0xa789,
 833         0xa802, 0xa803,
 834         0xa806, 0xa807,
 835         0xa80b, 0xa80c,
 836         0xa825, 0xa827,
 837         0xa828, 0xa830,
 838         0xa838, 0xa840,
 839         0xa874, 0xa880,
 840         0xa8c4, 0xa8ce,
 841         0xa8e0, 0xa8f2,
 842         0xa926, 0xa92e,
 843         0xa947, 0xa952,
 844         0xa980, 0xa983,
 845         0xa9b3, 0xa9b4,
 846         0xa9b6, 0xa9ba,
 847         0xa9bc, 0xa9bd,
 848         0xaa29, 0xaa2f,
 849         0xaa31, 0xaa33,
 850         0xaa35, 0xaa40,
 851         0xaa43, 0xaa44,
 852         0xaa4c, 0xaa4d,
 853         0xaab0, 0xaab1,
 854         0xaab2, 0xaab5,
 855         0xaab7, 0xaab9,
 856         0xaabe, 0xaac0,
 857         0xaac1, 0xaac2,
 858         0xaaec, 0xaaee,
 859         0xaaf6, 0xab01,
 860         0xabe5, 0xabe6,
 861         0xabe8, 0xabe9,
 862         0xabed, 0xabf0,
 863         0xfb1e, 0xfb1f,
 864         0xfb29, 0xfb2a,
 865         0xfd3e, 0xfd50,
 866         0xfdfd, 0xfe70,
 867         0xfeff, 0xff21,
 868         0xff3b, 0xff41,
 869         0xff5b, 0xff66,
 870         0xffe0, 0x10000,
 871         0x10101, 0x10102,
 872         0x10140, 0x101d0,
 873         0x101fd, 0x10280,
 874         0x1091f, 0x10920,
 875         0x10a01, 0x10a10,
 876         0x10a38, 0x10a40,
 877         0x10b39, 0x10b40,
 878         0x10e60, 0x11000,
 879         0x11001, 0x11002,
 880         0x11038, 0x11047,
 881         0x11052, 0x11066,
 882         0x11080, 0x11082,
 883         0x110b3, 0x110b7,
 884         0x110b9, 0x110bb,
 885         0x11100, 0x11103,
 886         0x11127, 0x1112c,
 887         0x1112d, 0x11136,
 888         0x11180, 0x11182,
 889         0x111b6, 0x111bf,
 890         0x116ab, 0x116ac,
 891         0x116ad, 0x116ae,
 892         0x116b0, 0x116b6,
 893         0x116b7, 0x116c0,
 894         0x16f8f, 0x16f93,
 895         0x1d167, 0x1d16a,
 896         0x1d173, 0x1d183,
 897         0x1d185, 0x1d18c,
 898         0x1d1aa, 0x1d1ae,
 899         0x1d200, 0x1d360,
 900         0x1d6db, 0x1d6dc,
 901         0x1d715, 0x1d716,
 902         0x1d74f, 0x1d750,
 903         0x1d789, 0x1d78a,
 904         0x1d7c3, 0x1d7c4,
 905         0x1d7ce, 0x1ee00,
 906         0x1eef0, 0x1f110,
 907         0x1f16a, 0x1f170,
 908         0x1f300, 0x1f48c,
 909         0x1f48d, 0x1f524,
 910         0x1f525, 0x20000,
 911         0xe0001, 0xf0000,
 912         0x10fffe, 0x10ffff // sentinel
 913     };
 914 
 915 
 916     // use a binary search with a cache
 917 
 918     private transient volatile int stCache = 0;
 919 
 920     private boolean isStrongDirectional(char c) {
 921         int cachedIndex = stCache;
 922         if (c < strongTable[cachedIndex]) {
 923             cachedIndex = search(c, strongTable, 0, cachedIndex);
 924         } else if (c >= strongTable[cachedIndex + 1]) {
 925             cachedIndex = search(c, strongTable, cachedIndex + 1,
 926                                  strongTable.length - cachedIndex - 1);
 927         }
 928         boolean val = (cachedIndex & 0x1) == 1;
 929         stCache = cachedIndex;
 930         return val;
 931     }
 932 
 933     private static int getKeyFromMask(int mask) {
 934         int key = 0;
 935         while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
 936             ++key;
 937         }
 938         if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
 939             throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
 940         }
 941         return key;
 942     }
 943 
 944     /**
 945      * Returns a shaper for the provided unicode range.  All
 946      * Latin-1 (EUROPEAN) digits are converted
 947      * to the corresponding decimal unicode digits.
 948      * @param singleRange the specified Unicode range
 949      * @return a non-contextual numeric shaper
 950      * @throws IllegalArgumentException if the range is not a single range
 951      */
 952     public static NumericShaper getShaper(int singleRange) {
 953         int key = getKeyFromMask(singleRange);
 954         return new NumericShaper(key, singleRange);
 955     }
 956 
 957     /**
 958      * Returns a shaper for the provided Unicode
 959      * range. All Latin-1 (EUROPEAN) digits are converted to the
 960      * corresponding decimal digits of the specified Unicode range.
 961      *
 962      * @param singleRange the Unicode range given by a {@link
 963      *                    NumericShaper.Range} constant.
 964      * @return a non-contextual {@code NumericShaper}.
 965      * @throws NullPointerException if {@code singleRange} is {@code null}
 966      * @since 1.7
 967      */
 968     public static NumericShaper getShaper(Range singleRange) {
 969         return new NumericShaper(singleRange, EnumSet.of(singleRange));
 970     }
 971 
 972     /**
 973      * Returns a contextual shaper for the provided unicode range(s).
 974      * Latin-1 (EUROPEAN) digits are converted to the decimal digits
 975      * corresponding to the range of the preceding text, if the
 976      * range is one of the provided ranges.  Multiple ranges are
 977      * represented by or-ing the values together, such as,
 978      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
 979      * shaper assumes EUROPEAN as the starting context, that is, if
 980      * EUROPEAN digits are encountered before any strong directional
 981      * text in the string, the context is presumed to be EUROPEAN, and
 982      * so the digits will not shape.
 983      * @param ranges the specified Unicode ranges
 984      * @return a shaper for the specified ranges
 985      */
 986     public static NumericShaper getContextualShaper(int ranges) {
 987         ranges |= CONTEXTUAL_MASK;
 988         return new NumericShaper(EUROPEAN_KEY, ranges);
 989     }
 990 
 991     /**
 992      * Returns a contextual shaper for the provided Unicode
 993      * range(s). The Latin-1 (EUROPEAN) digits are converted to the
 994      * decimal digits corresponding to the range of the preceding
 995      * text, if the range is one of the provided ranges.
 996      *
 997      * <p>The shaper assumes EUROPEAN as the starting context, that
 998      * is, if EUROPEAN digits are encountered before any strong
 999      * directional text in the string, the context is presumed to be
1000      * EUROPEAN, and so the digits will not shape.
1001      *
1002      * @param ranges the specified Unicode ranges
1003      * @return a contextual shaper for the specified ranges
1004      * @throws NullPointerException if {@code ranges} is {@code null}.
1005      * @since 1.7
1006      */
1007     public static NumericShaper getContextualShaper(Set<Range> ranges) {
1008         NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
1009         shaper.mask = CONTEXTUAL_MASK;
1010         return shaper;
1011     }
1012 
1013     /**
1014      * Returns a contextual shaper for the provided unicode range(s).
1015      * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
1016      * corresponding to the range of the preceding text, if the
1017      * range is one of the provided ranges.  Multiple ranges are
1018      * represented by or-ing the values together, for example,
1019      * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
1020      * shaper uses defaultContext as the starting context.
1021      * @param ranges the specified Unicode ranges
1022      * @param defaultContext the starting context, such as
1023      * <code>NumericShaper.EUROPEAN</code>
1024      * @return a shaper for the specified Unicode ranges.
1025      * @throws IllegalArgumentException if the specified
1026      * <code>defaultContext</code> is not a single valid range.
1027      */
1028     public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
1029         int key = getKeyFromMask(defaultContext);
1030         ranges |= CONTEXTUAL_MASK;
1031         return new NumericShaper(key, ranges);
1032     }
1033 
1034     /**
1035      * Returns a contextual shaper for the provided Unicode range(s).
1036      * The Latin-1 (EUROPEAN) digits will be converted to the decimal
1037      * digits corresponding to the range of the preceding text, if the
1038      * range is one of the provided ranges. The shaper uses {@code
1039      * defaultContext} as the starting context.
1040      *
1041      * @param ranges the specified Unicode ranges
1042      * @param defaultContext the starting context, such as
1043      *                       {@code NumericShaper.Range.EUROPEAN}
1044      * @return a contextual shaper for the specified Unicode ranges.
1045      * @throws NullPointerException
1046      *         if {@code ranges} or {@code defaultContext} is {@code null}
1047      * @since 1.7
1048      */
1049     public static NumericShaper getContextualShaper(Set<Range> ranges,
1050                                                     Range defaultContext) {
1051         if (defaultContext == null) {
1052             throw new NullPointerException();
1053         }
1054         NumericShaper shaper = new NumericShaper(defaultContext, ranges);
1055         shaper.mask = CONTEXTUAL_MASK;
1056         return shaper;
1057     }
1058 
1059     /**
1060      * Private constructor.
1061      */
1062     private NumericShaper(int key, int mask) {
1063         this.key = key;
1064         this.mask = mask;
1065     }
1066 
1067     private NumericShaper(Range defaultContext, Set<Range> ranges) {
1068         shapingRange = defaultContext;
1069         rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
1070 
1071         // Give precedance to EASTERN_ARABIC if both ARABIC and
1072         // EASTERN_ARABIC are specified.
1073         if (rangeSet.contains(Range.EASTERN_ARABIC)
1074             && rangeSet.contains(Range.ARABIC)) {
1075             rangeSet.remove(Range.ARABIC);
1076         }
1077 
1078         // As well as the above case, give precedance to TAI_THAM_THAM if both
1079         // TAI_THAM_HORA and TAI_THAM_THAM are specified.
1080         if (rangeSet.contains(Range.TAI_THAM_THAM)
1081             && rangeSet.contains(Range.TAI_THAM_HORA)) {
1082             rangeSet.remove(Range.TAI_THAM_HORA);
1083         }
1084 
1085         rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
1086         if (rangeArray.length > BSEARCH_THRESHOLD) {
1087             // sort rangeArray for binary search
1088             Arrays.sort(rangeArray,
1089                         new Comparator<Range>() {
1090                             public int compare(Range s1, Range s2) {
1091                                 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
1092                             }
1093                         });
1094         }
1095     }
1096 
1097     /**
1098      * Converts the digits in the text that occur between start and
1099      * start + count.
1100      * @param text an array of characters to convert
1101      * @param start the index into <code>text</code> to start
1102      *        converting
1103      * @param count the number of characters in <code>text</code>
1104      *        to convert
1105      * @throws IndexOutOfBoundsException if start or start + count is
1106      *        out of bounds
1107      * @throws NullPointerException if text is null
1108      */
1109     public void shape(char[] text, int start, int count) {
1110         checkParams(text, start, count);
1111         if (isContextual()) {
1112             if (rangeSet == null) {
1113                 shapeContextually(text, start, count, key);
1114             } else {
1115                 shapeContextually(text, start, count, shapingRange);
1116             }
1117         } else {
1118             shapeNonContextually(text, start, count);
1119         }
1120     }
1121 
1122     /**
1123      * Converts the digits in the text that occur between start and
1124      * start + count, using the provided context.
1125      * Context is ignored if the shaper is not a contextual shaper.
1126      * @param text an array of characters
1127      * @param start the index into <code>text</code> to start
1128      *        converting
1129      * @param count the number of characters in <code>text</code>
1130      *        to convert
1131      * @param context the context to which to convert the
1132      *        characters, such as <code>NumericShaper.EUROPEAN</code>
1133      * @throws IndexOutOfBoundsException if start or start + count is
1134      *        out of bounds
1135      * @throws NullPointerException if text is null
1136      * @throws IllegalArgumentException if this is a contextual shaper
1137      * and the specified <code>context</code> is not a single valid
1138      * range.
1139      */
1140     public void shape(char[] text, int start, int count, int context) {
1141         checkParams(text, start, count);
1142         if (isContextual()) {
1143             int ctxKey = getKeyFromMask(context);
1144             if (rangeSet == null) {
1145                 shapeContextually(text, start, count, ctxKey);
1146             } else {
1147                 shapeContextually(text, start, count, Range.values()[ctxKey]);
1148             }
1149         } else {
1150             shapeNonContextually(text, start, count);
1151         }
1152     }
1153 
1154     /**
1155      * Converts the digits in the text that occur between {@code
1156      * start} and {@code start + count}, using the provided {@code
1157      * context}. {@code Context} is ignored if the shaper is not a
1158      * contextual shaper.
1159      *
1160      * @param text  a {@code char} array
1161      * @param start the index into {@code text} to start converting
1162      * @param count the number of {@code char}s in {@code text}
1163      *              to convert
1164      * @param context the context to which to convert the characters,
1165      *                such as {@code NumericShaper.Range.EUROPEAN}
1166      * @throws IndexOutOfBoundsException
1167      *         if {@code start} or {@code start + count} is out of bounds
1168      * @throws NullPointerException
1169      *         if {@code text} or {@code context} is null
1170      * @since 1.7
1171      */
1172     public void shape(char[] text, int start, int count, Range context) {
1173         checkParams(text, start, count);
1174         if (context == null) {
1175             throw new NullPointerException("context is null");
1176         }
1177 
1178         if (isContextual()) {
1179             if (rangeSet != null) {
1180                 shapeContextually(text, start, count, context);
1181             } else {
1182                 int key = Range.toRangeIndex(context);
1183                 if (key >= 0) {
1184                     shapeContextually(text, start, count, key);
1185                 } else {
1186                     shapeContextually(text, start, count, shapingRange);
1187                 }
1188             }
1189         } else {
1190             shapeNonContextually(text, start, count);
1191         }
1192     }
1193 
1194     private void checkParams(char[] text, int start, int count) {
1195         if (text == null) {
1196             throw new NullPointerException("text is null");
1197         }
1198         if ((start < 0)
1199             || (start > text.length)
1200             || ((start + count) < 0)
1201             || ((start + count) > text.length)) {
1202             throw new IndexOutOfBoundsException(
1203                 "bad start or count for text of length " + text.length);
1204         }
1205     }
1206 
1207     /**
1208      * Returns a <code>boolean</code> indicating whether or not
1209      * this shaper shapes contextually.
1210      * @return <code>true</code> if this shaper is contextual;
1211      *         <code>false</code> otherwise.
1212      */
1213     public boolean isContextual() {
1214         return (mask & CONTEXTUAL_MASK) != 0;
1215     }
1216 
1217     /**
1218      * Returns an <code>int</code> that ORs together the values for
1219      * all the ranges that will be shaped.
1220      * <p>
1221      * For example, to check if a shaper shapes to Arabic, you would use the
1222      * following:
1223      * <blockquote>
1224      *   {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) &#123; ... }
1225      * </blockquote>
1226      *
1227      * <p>Note that this method supports only the bit mask-based
1228      * ranges. Call {@link #getRangeSet()} for the enum-based ranges.
1229      *
1230      * @return the values for all the ranges to be shaped.
1231      */
1232     public int getRanges() {
1233         return mask & ~CONTEXTUAL_MASK;
1234     }
1235 
1236     /**
1237      * Returns a {@code Set} representing all the Unicode ranges in
1238      * this {@code NumericShaper} that will be shaped.
1239      *
1240      * @return all the Unicode ranges to be shaped.
1241      * @since 1.7
1242      */
1243     public Set<Range> getRangeSet() {
1244         if (rangeSet != null) {
1245             return EnumSet.copyOf(rangeSet);
1246         }
1247         return Range.maskToRangeSet(mask);
1248     }
1249 
1250     /**
1251      * Perform non-contextual shaping.
1252      */
1253     private void shapeNonContextually(char[] text, int start, int count) {
1254         int base;
1255         char minDigit = '0';
1256         if (shapingRange != null) {
1257             base = shapingRange.getDigitBase();
1258             minDigit += shapingRange.getNumericBase();
1259         } else {
1260             base = bases[key];
1261             if (key == ETHIOPIC_KEY) {
1262                 minDigit++; // Ethiopic doesn't use decimal zero
1263             }
1264         }
1265         for (int i = start, e = start + count; i < e; ++i) {
1266             char c = text[i];
1267             if (c >= minDigit && c <= '\u0039') {
1268                 text[i] = (char)(c + base);
1269             }
1270         }
1271     }
1272 
1273     /**
1274      * Perform contextual shaping.
1275      * Synchronized to protect caches used in getContextKey.
1276      */
1277     private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
1278 
1279         // if we don't support this context, then don't shape
1280         if ((mask & (1<<ctxKey)) == 0) {
1281             ctxKey = EUROPEAN_KEY;
1282         }
1283         int lastkey = ctxKey;
1284 
1285         int base = bases[ctxKey];
1286         char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1287 
1288         synchronized (NumericShaper.class) {
1289             for (int i = start, e = start + count; i < e; ++i) {
1290                 char c = text[i];
1291                 if (c >= minDigit && c <= '\u0039') {
1292                     text[i] = (char)(c + base);
1293                 }
1294 
1295                 if (isStrongDirectional(c)) {
1296                     int newkey = getContextKey(c);
1297                     if (newkey != lastkey) {
1298                         lastkey = newkey;
1299 
1300                         ctxKey = newkey;
1301                         if (((mask & EASTERN_ARABIC) != 0) &&
1302                              (ctxKey == ARABIC_KEY ||
1303                               ctxKey == EASTERN_ARABIC_KEY)) {
1304                             ctxKey = EASTERN_ARABIC_KEY;
1305                         } else if (((mask & ARABIC) != 0) &&
1306                              (ctxKey == ARABIC_KEY ||
1307                               ctxKey == EASTERN_ARABIC_KEY)) {
1308                             ctxKey = ARABIC_KEY;
1309                         } else if ((mask & (1<<ctxKey)) == 0) {
1310                             ctxKey = EUROPEAN_KEY;
1311                         }
1312 
1313                         base = bases[ctxKey];
1314 
1315                         minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
1316                     }
1317                 }
1318             }
1319         }
1320     }
1321 
1322     private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
1323         // if we don't support the specified context, then don't shape.
1324         if (ctxKey == null || !rangeSet.contains(ctxKey)) {
1325             ctxKey = Range.EUROPEAN;
1326         }
1327 
1328         Range lastKey = ctxKey;
1329         int base = ctxKey.getDigitBase();
1330         char minDigit = (char)('0' + ctxKey.getNumericBase());
1331         final int end = start + count;
1332         for (int i = start; i < end; ++i) {
1333             char c = text[i];
1334             if (c >= minDigit && c <= '9') {
1335                 text[i] = (char)(c + base);
1336                 continue;
1337             }
1338             if (isStrongDirectional(c)) {
1339                 ctxKey = rangeForCodePoint(c);
1340                 if (ctxKey != lastKey) {
1341                     lastKey = ctxKey;
1342                     base = ctxKey.getDigitBase();
1343                     minDigit = (char)('0' + ctxKey.getNumericBase());
1344                 }
1345             }
1346         }
1347     }
1348 
1349     /**
1350      * Returns a hash code for this shaper.
1351      * @return this shaper's hash code.
1352      * @see java.lang.Object#hashCode
1353      */
1354     public int hashCode() {
1355         int hash = mask;
1356         if (rangeSet != null) {
1357             // Use the CONTEXTUAL_MASK bit only for the enum-based
1358             // NumericShaper. A deserialized NumericShaper might have
1359             // bit masks.
1360             hash &= CONTEXTUAL_MASK;
1361             hash ^= rangeSet.hashCode();
1362         }
1363         return hash;
1364     }
1365 
1366     /**
1367      * Returns {@code true} if the specified object is an instance of
1368      * <code>NumericShaper</code> and shapes identically to this one,
1369      * regardless of the range representations, the bit mask or the
1370      * enum. For example, the following code produces {@code "true"}.
1371      * <blockquote><pre>
1372      * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
1373      * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
1374      * System.out.println(ns1.equals(ns2));
1375      * </pre></blockquote>
1376      *
1377      * @param o the specified object to compare to this
1378      *          <code>NumericShaper</code>
1379      * @return <code>true</code> if <code>o</code> is an instance
1380      *         of <code>NumericShaper</code> and shapes in the same way;
1381      *         <code>false</code> otherwise.
1382      * @see java.lang.Object#equals(java.lang.Object)
1383      */
1384     public boolean equals(Object o) {
1385         if (o != null) {
1386             try {
1387                 NumericShaper rhs = (NumericShaper)o;
1388                 if (rangeSet != null) {
1389                     if (rhs.rangeSet != null) {
1390                         return isContextual() == rhs.isContextual()
1391                             && rangeSet.equals(rhs.rangeSet)
1392                             && shapingRange == rhs.shapingRange;
1393                     }
1394                     return isContextual() == rhs.isContextual()
1395                         && rangeSet.equals(Range.maskToRangeSet(rhs.mask))
1396                         && shapingRange == Range.indexToRange(rhs.key);
1397                 } else if (rhs.rangeSet != null) {
1398                     Set<Range> rset = Range.maskToRangeSet(mask);
1399                     Range srange = Range.indexToRange(key);
1400                     return isContextual() == rhs.isContextual()
1401                         && rset.equals(rhs.rangeSet)
1402                         && srange == rhs.shapingRange;
1403                 }
1404                 return rhs.mask == mask && rhs.key == key;
1405             }
1406             catch (ClassCastException e) {
1407             }
1408         }
1409         return false;
1410     }
1411 
1412     /**
1413      * Returns a <code>String</code> that describes this shaper. This method
1414      * is used for debugging purposes only.
1415      * @return a <code>String</code> describing this shaper.
1416      */
1417     public String toString() {
1418         StringBuilder buf = new StringBuilder(super.toString());
1419 
1420         buf.append("[contextual:").append(isContextual());
1421 
1422         String[] keyNames = null;
1423         if (isContextual()) {
1424             buf.append(", context:");
1425             buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
1426         }
1427 
1428         if (rangeSet == null) {
1429             buf.append(", range(s): ");
1430             boolean first = true;
1431             for (int i = 0; i < NUM_KEYS; ++i) {
1432                 if ((mask & (1 << i)) != 0) {
1433                     if (first) {
1434                         first = false;
1435                     } else {
1436                         buf.append(", ");
1437                     }
1438                     buf.append(Range.values()[i]);
1439                 }
1440             }
1441         } else {
1442             buf.append(", range set: ").append(rangeSet);
1443         }
1444         buf.append(']');
1445 
1446         return buf.toString();
1447     }
1448 
1449     /**
1450      * Returns the index of the high bit in value (assuming le, actually
1451      * power of 2 >= value). value must be positive.
1452      */
1453     private static int getHighBit(int value) {
1454         if (value <= 0) {
1455             return -32;
1456         }
1457 
1458         int bit = 0;
1459 
1460         if (value >= 1 << 16) {
1461             value >>= 16;
1462             bit += 16;
1463         }
1464 
1465         if (value >= 1 << 8) {
1466             value >>= 8;
1467             bit += 8;
1468         }
1469 
1470         if (value >= 1 << 4) {
1471             value >>= 4;
1472             bit += 4;
1473         }
1474 
1475         if (value >= 1 << 2) {
1476             value >>= 2;
1477             bit += 2;
1478         }
1479 
1480         if (value >= 1 << 1) {
1481             bit += 1;
1482         }
1483 
1484         return bit;
1485     }
1486 
1487     /**
1488      * fast binary search over subrange of array.
1489      */
1490     private static int search(int value, int[] array, int start, int length)
1491     {
1492         int power = 1 << getHighBit(length);
1493         int extra = length - power;
1494         int probe = power;
1495         int index = start;
1496 
1497         if (value >= array[index + extra]) {
1498             index += extra;
1499         }
1500 
1501         while (probe > 1) {
1502             probe >>= 1;
1503 
1504             if (value >= array[index + probe]) {
1505                 index += probe;
1506             }
1507         }
1508 
1509         return index;
1510     }
1511 
1512     /**
1513      * Converts the {@code NumericShaper.Range} enum-based parameters,
1514      * if any, to the bit mask-based counterparts and writes this
1515      * object to the {@code stream}. Any enum constants that have no
1516      * bit mask-based counterparts are ignored in the conversion.
1517      *
1518      * @param stream the output stream to write to
1519      * @throws IOException if an I/O error occurs while writing to {@code stream}
1520      * @since 1.7
1521      */
1522     private void writeObject(ObjectOutputStream stream) throws IOException {
1523         if (shapingRange != null) {
1524             int index = Range.toRangeIndex(shapingRange);
1525             if (index >= 0) {
1526                 key = index;
1527             }
1528         }
1529         if (rangeSet != null) {
1530             mask |= Range.toRangeMask(rangeSet);
1531         }
1532         stream.defaultWriteObject();
1533     }
1534 }