1 /* 2 * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.awt.font; 27 28 import java.io.IOException; 29 import java.io.ObjectOutputStream; 30 import java.util.Arrays; 31 import java.util.Comparator; 32 import java.util.EnumSet; 33 import java.util.Set; 34 import sun.misc.SharedSecrets; 35 36 /** 37 * The <code>NumericShaper</code> class is used to convert Latin-1 (European) 38 * digits to other Unicode decimal digits. Users of this class will 39 * primarily be people who wish to present data using 40 * national digit shapes, but find it more convenient to represent the 41 * data internally using Latin-1 (European) digits. This does not 42 * interpret the deprecated numeric shape selector character (U+206E). 43 * <p> 44 * Instances of <code>NumericShaper</code> are typically applied 45 * as attributes to text with the 46 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute 47 * of the <code>TextAttribute</code> class. 48 * For example, this code snippet causes a <code>TextLayout</code> to 49 * shape European digits to Arabic in an Arabic context:<br> 50 * <blockquote><pre> 51 * Map map = new HashMap(); 52 * map.put(TextAttribute.NUMERIC_SHAPING, 53 * NumericShaper.getContextualShaper(NumericShaper.ARABIC)); 54 * FontRenderContext frc = ...; 55 * TextLayout layout = new TextLayout(text, map, frc); 56 * layout.draw(g2d, x, y); 57 * </pre></blockquote> 58 * <br> 59 * It is also possible to perform numeric shaping explicitly using instances 60 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br> 61 * <blockquote><pre> 62 * char[] text = ...; 63 * // shape all EUROPEAN digits (except zero) to ARABIC digits 64 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC); 65 * shaper.shape(text, start, count); 66 * 67 * // shape European digits to ARABIC digits if preceding text is Arabic, or 68 * // shape European digits to TAMIL digits if preceding text is Tamil, or 69 * // leave European digits alone if there is no preceding text, or 70 * // preceding text is neither Arabic nor Tamil 71 * NumericShaper shaper = 72 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 73 * NumericShaper.TAMIL, 74 * NumericShaper.EUROPEAN); 75 * shaper.shape(text, start, count); 76 * </pre></blockquote> 77 * 78 * <p><b>Bit mask- and enum-based Unicode ranges</b></p> 79 * 80 * <p>This class supports two different programming interfaces to 81 * represent Unicode ranges for script-specific digits: bit 82 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and 83 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}. 84 * Multiple ranges can be specified by ORing bit mask-based constants, 85 * such as: 86 * <blockquote><pre> 87 * NumericShaper.ARABIC | NumericShaper.TAMIL 88 * </pre></blockquote> 89 * or creating a {@code Set} with the {@link NumericShaper.Range} 90 * constants, such as: 91 * <blockquote><pre> 92 * EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL) 93 * </pre></blockquote> 94 * The enum-based ranges are a super set of the bit mask-based ones. 95 * 96 * <p>If the two interfaces are mixed (including serialization), 97 * Unicode range values are mapped to their counterparts where such 98 * mapping is possible, such as {@code NumericShaper.Range.ARABIC} 99 * from/to {@code NumericShaper.ARABIC}. If any unmappable range 100 * values are specified, such as {@code NumericShaper.Range.BALINESE}, 101 * those ranges are ignored. 102 * 103 * <p><b>Decimal Digits Precedence</b></p> 104 * 105 * <p>A Unicode range may have more than one set of decimal digits. If 106 * multiple decimal digits sets are specified for the same Unicode 107 * range, one of the sets will take precedence as follows. 108 * 109 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence."> 110 * <tr> 111 * <th class="TableHeadingColor">Unicode Range</th> 112 * <th class="TableHeadingColor"><code>NumericShaper</code> Constants</th> 113 * <th class="TableHeadingColor">Precedence</th> 114 * </tr> 115 * <tr> 116 * <td rowspan="2">Arabic</td> 117 * <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br> 118 * {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 119 * <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 120 * </tr> 121 * <tr> 122 * <td>{@link NumericShaper.Range#ARABIC}<br> 123 * {@link NumericShaper.Range#EASTERN_ARABIC}</td> 124 * <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td> 125 * </tr> 126 * <tr> 127 * <td>Tai Tham</td> 128 * <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br> 129 * {@link NumericShaper.Range#TAI_THAM_THAM}</td> 130 * <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td> 131 * </tr> 132 * </table> 133 * 134 * @since 1.4 135 */ 136 137 public final class NumericShaper implements java.io.Serializable { 138 139 // For access from java.text.Bidi 140 static { 141 if (SharedSecrets.getJavaAWTFontAccess() == null) { 142 SharedSecrets.setJavaAWTFontAccess(new JavaAWTFontAccessImpl()); 143 } 144 } 145 146 /** 147 * A {@code NumericShaper.Range} represents a Unicode range of a 148 * script having its own decimal digits. For example, the {@link 149 * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT 150 * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59). 151 * 152 * <p>The <code>Range</code> enum replaces the traditional bit 153 * mask-based values (e.g., {@link NumericShaper#ARABIC}), and 154 * supports more Unicode ranges than the bit mask-based ones. For 155 * example, the following code using the bit mask: 156 * <blockquote><pre> 157 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 158 * NumericShaper.TAMIL, 159 * NumericShaper.EUROPEAN); 160 * </pre></blockquote> 161 * can be written using this enum as: 162 * <blockquote><pre> 163 * NumericShaper.getContextualShaper(EnumSet.of( 164 * NumericShaper.Range.ARABIC, 165 * NumericShaper.Range.TAMIL), 166 * NumericShaper.Range.EUROPEAN); 167 * </pre></blockquote> 168 * 169 * @since 1.7 170 */ 171 public static enum Range { 172 // The order of EUROPEAN to MOGOLIAN must be consistent 173 // with the bitmask-based constants. 174 /** 175 * The Latin (European) range with the Latin (ASCII) digits. 176 */ 177 EUROPEAN ('\u0030', '\u0000', '\u0300'), 178 /** 179 * The Arabic range with the Arabic-Indic digits. 180 */ 181 ARABIC ('\u0660', '\u0600', '\u0780'), 182 /** 183 * The Arabic range with the Eastern Arabic-Indic digits. 184 */ 185 EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'), 186 /** 187 * The Devanagari range with the Devanagari digits. 188 */ 189 DEVANAGARI ('\u0966', '\u0900', '\u0980'), 190 /** 191 * The Bengali range with the Bengali digits. 192 */ 193 BENGALI ('\u09e6', '\u0980', '\u0a00'), 194 /** 195 * The Gurmukhi range with the Gurmukhi digits. 196 */ 197 GURMUKHI ('\u0a66', '\u0a00', '\u0a80'), 198 /** 199 * The Gujarati range with the Gujarati digits. 200 */ 201 GUJARATI ('\u0ae6', '\u0b00', '\u0b80'), 202 /** 203 * The Oriya range with the Oriya digits. 204 */ 205 ORIYA ('\u0b66', '\u0b00', '\u0b80'), 206 /** 207 * The Tamil range with the Tamil digits. 208 */ 209 TAMIL ('\u0be6', '\u0b80', '\u0c00'), 210 /** 211 * The Telugu range with the Telugu digits. 212 */ 213 TELUGU ('\u0c66', '\u0c00', '\u0c80'), 214 /** 215 * The Kannada range with the Kannada digits. 216 */ 217 KANNADA ('\u0ce6', '\u0c80', '\u0d00'), 218 /** 219 * The Malayalam range with the Malayalam digits. 220 */ 221 MALAYALAM ('\u0d66', '\u0d00', '\u0d80'), 222 /** 223 * The Thai range with the Thai digits. 224 */ 225 THAI ('\u0e50', '\u0e00', '\u0e80'), 226 /** 227 * The Lao range with the Lao digits. 228 */ 229 LAO ('\u0ed0', '\u0e80', '\u0f00'), 230 /** 231 * The Tibetan range with the Tibetan digits. 232 */ 233 TIBETAN ('\u0f20', '\u0f00', '\u1000'), 234 /** 235 * The Myanmar range with the Myanmar digits. 236 */ 237 MYANMAR ('\u1040', '\u1000', '\u1080'), 238 /** 239 * The Ethiopic range with the Ethiopic digits. Ethiopic 240 * does not have a decimal digit 0 so Latin (European) 0 is 241 * used. 242 */ 243 ETHIOPIC ('\u1369', '\u1200', '\u1380') { 244 @Override 245 char getNumericBase() { return 1; } 246 }, 247 /** 248 * The Khmer range with the Khmer digits. 249 */ 250 KHMER ('\u17e0', '\u1780', '\u1800'), 251 /** 252 * The Mongolian range with the Mongolian digits. 253 */ 254 MONGOLIAN ('\u1810', '\u1800', '\u1900'), 255 // The order of EUROPEAN to MOGOLIAN must be consistent 256 // with the bitmask-based constants. 257 258 /** 259 * The N'Ko range with the N'Ko digits. 260 */ 261 NKO ('\u07c0', '\u07c0', '\u0800'), 262 /** 263 * The Myanmar range with the Myanmar Shan digits. 264 */ 265 MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'), 266 /** 267 * The Limbu range with the Limbu digits. 268 */ 269 LIMBU ('\u1946', '\u1900', '\u1950'), 270 /** 271 * The New Tai Lue range with the New Tai Lue digits. 272 */ 273 NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'), 274 /** 275 * The Balinese range with the Balinese digits. 276 */ 277 BALINESE ('\u1b50', '\u1b00', '\u1b80'), 278 /** 279 * The Sundanese range with the Sundanese digits. 280 */ 281 SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'), 282 /** 283 * The Lepcha range with the Lepcha digits. 284 */ 285 LEPCHA ('\u1c40', '\u1c00', '\u1c50'), 286 /** 287 * The Ol Chiki range with the Ol Chiki digits. 288 */ 289 OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'), 290 /** 291 * The Vai range with the Vai digits. 292 */ 293 VAI ('\ua620', '\ua500', '\ua640'), 294 /** 295 * The Saurashtra range with the Saurashtra digits. 296 */ 297 SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'), 298 /** 299 * The Kayah Li range with the Kayah Li digits. 300 */ 301 KAYAH_LI ('\ua900', '\ua900', '\ua930'), 302 /** 303 * The Cham range with the Cham digits. 304 */ 305 CHAM ('\uaa50', '\uaa00', '\uaa60'), 306 /** 307 * The Tai Tham Hora range with the Tai Tham Hora digits. 308 */ 309 TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'), 310 /** 311 * The Tai Tham Tham range with the Tai Tham Tham digits. 312 */ 313 TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'), 314 /** 315 * The Javanese range with the Javanese digits. 316 */ 317 JAVANESE ('\ua9d0', '\ua980', '\ua9e0'), 318 /** 319 * The Meetei Mayek range with the Meetei Mayek digits. 320 */ 321 MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00'); 322 323 private static int toRangeIndex(Range script) { 324 int index = script.ordinal(); 325 return index < NUM_KEYS ? index : -1; 326 } 327 328 private static Range indexToRange(int index) { 329 return index < NUM_KEYS ? Range.values()[index] : null; 330 } 331 332 private static int toRangeMask(Set<Range> ranges) { 333 int m = 0; 334 for (Range range : ranges) { 335 int index = range.ordinal(); 336 if (index < NUM_KEYS) { 337 m |= 1 << index; 338 } 339 } 340 return m; 341 } 342 343 private static Set<Range> maskToRangeSet(int mask) { 344 Set<Range> set = EnumSet.noneOf(Range.class); 345 Range[] a = Range.values(); 346 for (int i = 0; i < NUM_KEYS; i++) { 347 if ((mask & (1 << i)) != 0) { 348 set.add(a[i]); 349 } 350 } 351 return set; 352 } 353 354 // base character of range digits 355 private final int base; 356 // Unicode range 357 private final int start, // inclusive 358 end; // exclusive 359 360 private Range(int base, int start, int end) { 361 this.base = base - ('0' + getNumericBase()); 362 this.start = start; 363 this.end = end; 364 } 365 366 private int getDigitBase() { 367 return base; 368 } 369 370 char getNumericBase() { 371 return 0; 372 } 373 374 private boolean inRange(int c) { 375 return start <= c && c < end; 376 } 377 } 378 379 /** index of context for contextual shaping - values range from 0 to 18 */ 380 private int key; 381 382 /** flag indicating whether to shape contextually (high bit) and which 383 * digit ranges to shape (bits 0-18) 384 */ 385 private int mask; 386 387 /** 388 * The context {@code Range} for contextual shaping or the {@code 389 * Range} for non-contextual shaping. {@code null} for the bit 390 * mask-based API. 391 * 392 * @since 1.7 393 */ 394 private Range shapingRange; 395 396 /** 397 * {@code Set<Range>} indicating which Unicode ranges to 398 * shape. {@code null} for the bit mask-based API. 399 */ 400 private transient Set<Range> rangeSet; 401 402 /** 403 * rangeSet.toArray() value. Sorted by Range.base when the number 404 * of elements is greater then BSEARCH_THRESHOLD. 405 */ 406 private transient Range[] rangeArray; 407 408 /** 409 * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used. 410 */ 411 private static final int BSEARCH_THRESHOLD = 3; 412 413 private static final long serialVersionUID = -8022764705923730308L; 414 415 /** Identifies the Latin-1 (European) and extended range, and 416 * Latin-1 (European) decimal base. 417 */ 418 public static final int EUROPEAN = 1<<0; 419 420 /** Identifies the ARABIC range and decimal base. */ 421 public static final int ARABIC = 1<<1; 422 423 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */ 424 public static final int EASTERN_ARABIC = 1<<2; 425 426 /** Identifies the DEVANAGARI range and decimal base. */ 427 public static final int DEVANAGARI = 1<<3; 428 429 /** Identifies the BENGALI range and decimal base. */ 430 public static final int BENGALI = 1<<4; 431 432 /** Identifies the GURMUKHI range and decimal base. */ 433 public static final int GURMUKHI = 1<<5; 434 435 /** Identifies the GUJARATI range and decimal base. */ 436 public static final int GUJARATI = 1<<6; 437 438 /** Identifies the ORIYA range and decimal base. */ 439 public static final int ORIYA = 1<<7; 440 441 /** Identifies the TAMIL range and decimal base. */ 442 // TAMIL DIGIT ZERO was added in Unicode 4.1 443 public static final int TAMIL = 1<<8; 444 445 /** Identifies the TELUGU range and decimal base. */ 446 public static final int TELUGU = 1<<9; 447 448 /** Identifies the KANNADA range and decimal base. */ 449 public static final int KANNADA = 1<<10; 450 451 /** Identifies the MALAYALAM range and decimal base. */ 452 public static final int MALAYALAM = 1<<11; 453 454 /** Identifies the THAI range and decimal base. */ 455 public static final int THAI = 1<<12; 456 457 /** Identifies the LAO range and decimal base. */ 458 public static final int LAO = 1<<13; 459 460 /** Identifies the TIBETAN range and decimal base. */ 461 public static final int TIBETAN = 1<<14; 462 463 /** Identifies the MYANMAR range and decimal base. */ 464 public static final int MYANMAR = 1<<15; 465 466 /** Identifies the ETHIOPIC range and decimal base. */ 467 public static final int ETHIOPIC = 1<<16; 468 469 /** Identifies the KHMER range and decimal base. */ 470 public static final int KHMER = 1<<17; 471 472 /** Identifies the MONGOLIAN range and decimal base. */ 473 public static final int MONGOLIAN = 1<<18; 474 475 /** Identifies all ranges, for full contextual shaping. 476 * 477 * <p>This constant specifies all of the bit mask-based 478 * ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to 479 * specify all of the enum-based ranges. 480 */ 481 public static final int ALL_RANGES = 0x0007ffff; 482 483 private static final int EUROPEAN_KEY = 0; 484 private static final int ARABIC_KEY = 1; 485 private static final int EASTERN_ARABIC_KEY = 2; 486 private static final int DEVANAGARI_KEY = 3; 487 private static final int BENGALI_KEY = 4; 488 private static final int GURMUKHI_KEY = 5; 489 private static final int GUJARATI_KEY = 6; 490 private static final int ORIYA_KEY = 7; 491 private static final int TAMIL_KEY = 8; 492 private static final int TELUGU_KEY = 9; 493 private static final int KANNADA_KEY = 10; 494 private static final int MALAYALAM_KEY = 11; 495 private static final int THAI_KEY = 12; 496 private static final int LAO_KEY = 13; 497 private static final int TIBETAN_KEY = 14; 498 private static final int MYANMAR_KEY = 15; 499 private static final int ETHIOPIC_KEY = 16; 500 private static final int KHMER_KEY = 17; 501 private static final int MONGOLIAN_KEY = 18; 502 503 private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed 504 505 private static final int CONTEXTUAL_MASK = 1<<31; 506 507 private static final char[] bases = { 508 '\u0030' - '\u0030', // EUROPEAN 509 '\u0660' - '\u0030', // ARABIC-INDIC 510 '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC) 511 '\u0966' - '\u0030', // DEVANAGARI 512 '\u09e6' - '\u0030', // BENGALI 513 '\u0a66' - '\u0030', // GURMUKHI 514 '\u0ae6' - '\u0030', // GUJARATI 515 '\u0b66' - '\u0030', // ORIYA 516 '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1 517 '\u0c66' - '\u0030', // TELUGU 518 '\u0ce6' - '\u0030', // KANNADA 519 '\u0d66' - '\u0030', // MALAYALAM 520 '\u0e50' - '\u0030', // THAI 521 '\u0ed0' - '\u0030', // LAO 522 '\u0f20' - '\u0030', // TIBETAN 523 '\u1040' - '\u0030', // MYANMAR 524 '\u1369' - '\u0031', // ETHIOPIC - no zero 525 '\u17e0' - '\u0030', // KHMER 526 '\u1810' - '\u0030', // MONGOLIAN 527 }; 528 529 // some ranges adjoin or overlap, rethink if we want to do a binary search on this 530 531 private static final char[] contexts = { 532 '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) 533 '\u0600', '\u0780', // ARABIC 534 '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic 535 '\u0900', '\u0980', // DEVANAGARI 536 '\u0980', '\u0a00', // BENGALI 537 '\u0a00', '\u0a80', // GURMUKHI 538 '\u0a80', '\u0b00', // GUJARATI 539 '\u0b00', '\u0b80', // ORIYA 540 '\u0b80', '\u0c00', // TAMIL 541 '\u0c00', '\u0c80', // TELUGU 542 '\u0c80', '\u0d00', // KANNADA 543 '\u0d00', '\u0d80', // MALAYALAM 544 '\u0e00', '\u0e80', // THAI 545 '\u0e80', '\u0f00', // LAO 546 '\u0f00', '\u1000', // TIBETAN 547 '\u1000', '\u1080', // MYANMAR 548 '\u1200', '\u1380', // ETHIOPIC - note missing zero 549 '\u1780', '\u1800', // KHMER 550 '\u1800', '\u1900', // MONGOLIAN 551 '\uffff', 552 }; 553 554 // assume most characters are near each other so probing the cache is infrequent, 555 // and a linear probe is ok. 556 557 private static int ctCache = 0; 558 private static int ctCacheLimit = contexts.length - 2; 559 560 // warning, synchronize access to this as it modifies state 561 private static int getContextKey(char c) { 562 if (c < contexts[ctCache]) { 563 while (ctCache > 0 && c < contexts[ctCache]) --ctCache; 564 } else if (c >= contexts[ctCache + 1]) { 565 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache; 566 } 567 568 // if we're not in a known range, then return EUROPEAN as the range key 569 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; 570 } 571 572 // cache for the NumericShaper.Range version 573 private transient volatile Range currentRange = Range.EUROPEAN; 574 575 private Range rangeForCodePoint(final int codepoint) { 576 if (currentRange.inRange(codepoint)) { 577 return currentRange; 578 } 579 580 final Range[] ranges = rangeArray; 581 if (ranges.length > BSEARCH_THRESHOLD) { 582 int lo = 0; 583 int hi = ranges.length - 1; 584 while (lo <= hi) { 585 int mid = (lo + hi) / 2; 586 Range range = ranges[mid]; 587 if (codepoint < range.start) { 588 hi = mid - 1; 589 } else if (codepoint >= range.end) { 590 lo = mid + 1; 591 } else { 592 currentRange = range; 593 return range; 594 } 595 } 596 } else { 597 for (int i = 0; i < ranges.length; i++) { 598 if (ranges[i].inRange(codepoint)) { 599 return ranges[i]; 600 } 601 } 602 } 603 return Range.EUROPEAN; 604 } 605 606 /* 607 * A range table of strong directional characters (types L, R, AL). 608 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined) 609 * characters, odd (right) indexes are starts of ranges of strong directional 610 * characters. 611 */ 612 private static int[] strongTable = { 613 0x0000, 0x0041, 614 0x005b, 0x0061, 615 0x007b, 0x00aa, 616 0x00ab, 0x00b5, 617 0x00b6, 0x00ba, 618 0x00bb, 0x00c0, 619 0x00d7, 0x00d8, 620 0x00f7, 0x00f8, 621 0x02b9, 0x02bb, 622 0x02c2, 0x02d0, 623 0x02d2, 0x02e0, 624 0x02e5, 0x02ee, 625 0x02ef, 0x0370, 626 0x0374, 0x0376, 627 0x037e, 0x0386, 628 0x0387, 0x0388, 629 0x03f6, 0x03f7, 630 0x0483, 0x048a, 631 0x058a, 0x05be, 632 0x05bf, 0x05c0, 633 0x05c1, 0x05c3, 634 0x05c4, 0x05c6, 635 0x05c7, 0x05d0, 636 0x0600, 0x0608, 637 0x0609, 0x060b, 638 0x060c, 0x060d, 639 0x060e, 0x061b, 640 0x064b, 0x066d, 641 0x0670, 0x0671, 642 0x06d6, 0x06e5, 643 0x06e7, 0x06ee, 644 0x06f0, 0x06fa, 645 0x0711, 0x0712, 646 0x0730, 0x074d, 647 0x07a6, 0x07b1, 648 0x07eb, 0x07f4, 649 0x07f6, 0x07fa, 650 0x0816, 0x081a, 651 0x081b, 0x0824, 652 0x0825, 0x0828, 653 0x0829, 0x0830, 654 0x0859, 0x085e, 655 0x08e4, 0x0903, 656 0x093a, 0x093b, 657 0x093c, 0x093d, 658 0x0941, 0x0949, 659 0x094d, 0x094e, 660 0x0951, 0x0958, 661 0x0962, 0x0964, 662 0x0981, 0x0982, 663 0x09bc, 0x09bd, 664 0x09c1, 0x09c7, 665 0x09cd, 0x09ce, 666 0x09e2, 0x09e6, 667 0x09f2, 0x09f4, 668 0x09fb, 0x0a03, 669 0x0a3c, 0x0a3e, 670 0x0a41, 0x0a59, 671 0x0a70, 0x0a72, 672 0x0a75, 0x0a83, 673 0x0abc, 0x0abd, 674 0x0ac1, 0x0ac9, 675 0x0acd, 0x0ad0, 676 0x0ae2, 0x0ae6, 677 0x0af1, 0x0b02, 678 0x0b3c, 0x0b3d, 679 0x0b3f, 0x0b40, 680 0x0b41, 0x0b47, 681 0x0b4d, 0x0b57, 682 0x0b62, 0x0b66, 683 0x0b82, 0x0b83, 684 0x0bc0, 0x0bc1, 685 0x0bcd, 0x0bd0, 686 0x0bf3, 0x0c01, 687 0x0c3e, 0x0c41, 688 0x0c46, 0x0c58, 689 0x0c62, 0x0c66, 690 0x0c78, 0x0c7f, 691 0x0cbc, 0x0cbd, 692 0x0ccc, 0x0cd5, 693 0x0ce2, 0x0ce6, 694 0x0d41, 0x0d46, 695 0x0d4d, 0x0d4e, 696 0x0d62, 0x0d66, 697 0x0dca, 0x0dcf, 698 0x0dd2, 0x0dd8, 699 0x0e31, 0x0e32, 700 0x0e34, 0x0e40, 701 0x0e47, 0x0e4f, 702 0x0eb1, 0x0eb2, 703 0x0eb4, 0x0ebd, 704 0x0ec8, 0x0ed0, 705 0x0f18, 0x0f1a, 706 0x0f35, 0x0f36, 707 0x0f37, 0x0f38, 708 0x0f39, 0x0f3e, 709 0x0f71, 0x0f7f, 710 0x0f80, 0x0f85, 711 0x0f86, 0x0f88, 712 0x0f8d, 0x0fbe, 713 0x0fc6, 0x0fc7, 714 0x102d, 0x1031, 715 0x1032, 0x1038, 716 0x1039, 0x103b, 717 0x103d, 0x103f, 718 0x1058, 0x105a, 719 0x105e, 0x1061, 720 0x1071, 0x1075, 721 0x1082, 0x1083, 722 0x1085, 0x1087, 723 0x108d, 0x108e, 724 0x109d, 0x109e, 725 0x135d, 0x1360, 726 0x1390, 0x13a0, 727 0x1400, 0x1401, 728 0x1680, 0x1681, 729 0x169b, 0x16a0, 730 0x1712, 0x1720, 731 0x1732, 0x1735, 732 0x1752, 0x1760, 733 0x1772, 0x1780, 734 0x17b4, 0x17b6, 735 0x17b7, 0x17be, 736 0x17c6, 0x17c7, 737 0x17c9, 0x17d4, 738 0x17db, 0x17dc, 739 0x17dd, 0x17e0, 740 0x17f0, 0x1810, 741 0x18a9, 0x18aa, 742 0x1920, 0x1923, 743 0x1927, 0x1929, 744 0x1932, 0x1933, 745 0x1939, 0x1946, 746 0x19de, 0x1a00, 747 0x1a17, 0x1a19, 748 0x1a56, 0x1a57, 749 0x1a58, 0x1a61, 750 0x1a62, 0x1a63, 751 0x1a65, 0x1a6d, 752 0x1a73, 0x1a80, 753 0x1b00, 0x1b04, 754 0x1b34, 0x1b35, 755 0x1b36, 0x1b3b, 756 0x1b3c, 0x1b3d, 757 0x1b42, 0x1b43, 758 0x1b6b, 0x1b74, 759 0x1b80, 0x1b82, 760 0x1ba2, 0x1ba6, 761 0x1ba8, 0x1baa, 762 0x1bab, 0x1bac, 763 0x1be6, 0x1be7, 764 0x1be8, 0x1bea, 765 0x1bed, 0x1bee, 766 0x1bef, 0x1bf2, 767 0x1c2c, 0x1c34, 768 0x1c36, 0x1c3b, 769 0x1cd0, 0x1cd3, 770 0x1cd4, 0x1ce1, 771 0x1ce2, 0x1ce9, 772 0x1ced, 0x1cee, 773 0x1cf4, 0x1cf5, 774 0x1dc0, 0x1e00, 775 0x1fbd, 0x1fbe, 776 0x1fbf, 0x1fc2, 777 0x1fcd, 0x1fd0, 778 0x1fdd, 0x1fe0, 779 0x1fed, 0x1ff2, 780 0x1ffd, 0x200e, 781 0x2010, 0x2071, 782 0x2074, 0x207f, 783 0x2080, 0x2090, 784 0x20a0, 0x2102, 785 0x2103, 0x2107, 786 0x2108, 0x210a, 787 0x2114, 0x2115, 788 0x2116, 0x2119, 789 0x211e, 0x2124, 790 0x2125, 0x2126, 791 0x2127, 0x2128, 792 0x2129, 0x212a, 793 0x212e, 0x212f, 794 0x213a, 0x213c, 795 0x2140, 0x2145, 796 0x214a, 0x214e, 797 0x2150, 0x2160, 798 0x2189, 0x2336, 799 0x237b, 0x2395, 800 0x2396, 0x249c, 801 0x24ea, 0x26ac, 802 0x26ad, 0x2800, 803 0x2900, 0x2c00, 804 0x2ce5, 0x2ceb, 805 0x2cef, 0x2cf2, 806 0x2cf9, 0x2d00, 807 0x2d7f, 0x2d80, 808 0x2de0, 0x3005, 809 0x3008, 0x3021, 810 0x302a, 0x3031, 811 0x3036, 0x3038, 812 0x303d, 0x3041, 813 0x3099, 0x309d, 814 0x30a0, 0x30a1, 815 0x30fb, 0x30fc, 816 0x31c0, 0x31f0, 817 0x321d, 0x3220, 818 0x3250, 0x3260, 819 0x327c, 0x327f, 820 0x32b1, 0x32c0, 821 0x32cc, 0x32d0, 822 0x3377, 0x337b, 823 0x33de, 0x33e0, 824 0x33ff, 0x3400, 825 0x4dc0, 0x4e00, 826 0xa490, 0xa4d0, 827 0xa60d, 0xa610, 828 0xa66f, 0xa680, 829 0xa69f, 0xa6a0, 830 0xa6f0, 0xa6f2, 831 0xa700, 0xa722, 832 0xa788, 0xa789, 833 0xa802, 0xa803, 834 0xa806, 0xa807, 835 0xa80b, 0xa80c, 836 0xa825, 0xa827, 837 0xa828, 0xa830, 838 0xa838, 0xa840, 839 0xa874, 0xa880, 840 0xa8c4, 0xa8ce, 841 0xa8e0, 0xa8f2, 842 0xa926, 0xa92e, 843 0xa947, 0xa952, 844 0xa980, 0xa983, 845 0xa9b3, 0xa9b4, 846 0xa9b6, 0xa9ba, 847 0xa9bc, 0xa9bd, 848 0xaa29, 0xaa2f, 849 0xaa31, 0xaa33, 850 0xaa35, 0xaa40, 851 0xaa43, 0xaa44, 852 0xaa4c, 0xaa4d, 853 0xaab0, 0xaab1, 854 0xaab2, 0xaab5, 855 0xaab7, 0xaab9, 856 0xaabe, 0xaac0, 857 0xaac1, 0xaac2, 858 0xaaec, 0xaaee, 859 0xaaf6, 0xab01, 860 0xabe5, 0xabe6, 861 0xabe8, 0xabe9, 862 0xabed, 0xabf0, 863 0xfb1e, 0xfb1f, 864 0xfb29, 0xfb2a, 865 0xfd3e, 0xfd50, 866 0xfdfd, 0xfe70, 867 0xfeff, 0xff21, 868 0xff3b, 0xff41, 869 0xff5b, 0xff66, 870 0xffe0, 0x10000, 871 0x10101, 0x10102, 872 0x10140, 0x101d0, 873 0x101fd, 0x10280, 874 0x1091f, 0x10920, 875 0x10a01, 0x10a10, 876 0x10a38, 0x10a40, 877 0x10b39, 0x10b40, 878 0x10e60, 0x11000, 879 0x11001, 0x11002, 880 0x11038, 0x11047, 881 0x11052, 0x11066, 882 0x11080, 0x11082, 883 0x110b3, 0x110b7, 884 0x110b9, 0x110bb, 885 0x11100, 0x11103, 886 0x11127, 0x1112c, 887 0x1112d, 0x11136, 888 0x11180, 0x11182, 889 0x111b6, 0x111bf, 890 0x116ab, 0x116ac, 891 0x116ad, 0x116ae, 892 0x116b0, 0x116b6, 893 0x116b7, 0x116c0, 894 0x16f8f, 0x16f93, 895 0x1d167, 0x1d16a, 896 0x1d173, 0x1d183, 897 0x1d185, 0x1d18c, 898 0x1d1aa, 0x1d1ae, 899 0x1d200, 0x1d360, 900 0x1d6db, 0x1d6dc, 901 0x1d715, 0x1d716, 902 0x1d74f, 0x1d750, 903 0x1d789, 0x1d78a, 904 0x1d7c3, 0x1d7c4, 905 0x1d7ce, 0x1ee00, 906 0x1eef0, 0x1f110, 907 0x1f16a, 0x1f170, 908 0x1f300, 0x1f48c, 909 0x1f48d, 0x1f524, 910 0x1f525, 0x20000, 911 0xe0001, 0xf0000, 912 0x10fffe, 0x10ffff // sentinel 913 }; 914 915 916 // use a binary search with a cache 917 918 private transient volatile int stCache = 0; 919 920 private boolean isStrongDirectional(char c) { 921 int cachedIndex = stCache; 922 if (c < strongTable[cachedIndex]) { 923 cachedIndex = search(c, strongTable, 0, cachedIndex); 924 } else if (c >= strongTable[cachedIndex + 1]) { 925 cachedIndex = search(c, strongTable, cachedIndex + 1, 926 strongTable.length - cachedIndex - 1); 927 } 928 boolean val = (cachedIndex & 0x1) == 1; 929 stCache = cachedIndex; 930 return val; 931 } 932 933 private static int getKeyFromMask(int mask) { 934 int key = 0; 935 while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { 936 ++key; 937 } 938 if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) { 939 throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask)); 940 } 941 return key; 942 } 943 944 /** 945 * Returns a shaper for the provided unicode range. All 946 * Latin-1 (EUROPEAN) digits are converted 947 * to the corresponding decimal unicode digits. 948 * @param singleRange the specified Unicode range 949 * @return a non-contextual numeric shaper 950 * @throws IllegalArgumentException if the range is not a single range 951 */ 952 public static NumericShaper getShaper(int singleRange) { 953 int key = getKeyFromMask(singleRange); 954 return new NumericShaper(key, singleRange); 955 } 956 957 /** 958 * Returns a shaper for the provided Unicode 959 * range. All Latin-1 (EUROPEAN) digits are converted to the 960 * corresponding decimal digits of the specified Unicode range. 961 * 962 * @param singleRange the Unicode range given by a {@link 963 * NumericShaper.Range} constant. 964 * @return a non-contextual {@code NumericShaper}. 965 * @throws NullPointerException if {@code singleRange} is {@code null} 966 * @since 1.7 967 */ 968 public static NumericShaper getShaper(Range singleRange) { 969 return new NumericShaper(singleRange, EnumSet.of(singleRange)); 970 } 971 972 /** 973 * Returns a contextual shaper for the provided unicode range(s). 974 * Latin-1 (EUROPEAN) digits are converted to the decimal digits 975 * corresponding to the range of the preceding text, if the 976 * range is one of the provided ranges. Multiple ranges are 977 * represented by or-ing the values together, such as, 978 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 979 * shaper assumes EUROPEAN as the starting context, that is, if 980 * EUROPEAN digits are encountered before any strong directional 981 * text in the string, the context is presumed to be EUROPEAN, and 982 * so the digits will not shape. 983 * @param ranges the specified Unicode ranges 984 * @return a shaper for the specified ranges 985 */ 986 public static NumericShaper getContextualShaper(int ranges) { 987 ranges |= CONTEXTUAL_MASK; 988 return new NumericShaper(EUROPEAN_KEY, ranges); 989 } 990 991 /** 992 * Returns a contextual shaper for the provided Unicode 993 * range(s). The Latin-1 (EUROPEAN) digits are converted to the 994 * decimal digits corresponding to the range of the preceding 995 * text, if the range is one of the provided ranges. 996 * 997 * <p>The shaper assumes EUROPEAN as the starting context, that 998 * is, if EUROPEAN digits are encountered before any strong 999 * directional text in the string, the context is presumed to be 1000 * EUROPEAN, and so the digits will not shape. 1001 * 1002 * @param ranges the specified Unicode ranges 1003 * @return a contextual shaper for the specified ranges 1004 * @throws NullPointerException if {@code ranges} is {@code null}. 1005 * @since 1.7 1006 */ 1007 public static NumericShaper getContextualShaper(Set<Range> ranges) { 1008 NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges); 1009 shaper.mask = CONTEXTUAL_MASK; 1010 return shaper; 1011 } 1012 1013 /** 1014 * Returns a contextual shaper for the provided unicode range(s). 1015 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits 1016 * corresponding to the range of the preceding text, if the 1017 * range is one of the provided ranges. Multiple ranges are 1018 * represented by or-ing the values together, for example, 1019 * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>. The 1020 * shaper uses defaultContext as the starting context. 1021 * @param ranges the specified Unicode ranges 1022 * @param defaultContext the starting context, such as 1023 * <code>NumericShaper.EUROPEAN</code> 1024 * @return a shaper for the specified Unicode ranges. 1025 * @throws IllegalArgumentException if the specified 1026 * <code>defaultContext</code> is not a single valid range. 1027 */ 1028 public static NumericShaper getContextualShaper(int ranges, int defaultContext) { 1029 int key = getKeyFromMask(defaultContext); 1030 ranges |= CONTEXTUAL_MASK; 1031 return new NumericShaper(key, ranges); 1032 } 1033 1034 /** 1035 * Returns a contextual shaper for the provided Unicode range(s). 1036 * The Latin-1 (EUROPEAN) digits will be converted to the decimal 1037 * digits corresponding to the range of the preceding text, if the 1038 * range is one of the provided ranges. The shaper uses {@code 1039 * defaultContext} as the starting context. 1040 * 1041 * @param ranges the specified Unicode ranges 1042 * @param defaultContext the starting context, such as 1043 * {@code NumericShaper.Range.EUROPEAN} 1044 * @return a contextual shaper for the specified Unicode ranges. 1045 * @throws NullPointerException 1046 * if {@code ranges} or {@code defaultContext} is {@code null} 1047 * @since 1.7 1048 */ 1049 public static NumericShaper getContextualShaper(Set<Range> ranges, 1050 Range defaultContext) { 1051 if (defaultContext == null) { 1052 throw new NullPointerException(); 1053 } 1054 NumericShaper shaper = new NumericShaper(defaultContext, ranges); 1055 shaper.mask = CONTEXTUAL_MASK; 1056 return shaper; 1057 } 1058 1059 /** 1060 * Private constructor. 1061 */ 1062 private NumericShaper(int key, int mask) { 1063 this.key = key; 1064 this.mask = mask; 1065 } 1066 1067 private NumericShaper(Range defaultContext, Set<Range> ranges) { 1068 shapingRange = defaultContext; 1069 rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null. 1070 1071 // Give precedance to EASTERN_ARABIC if both ARABIC and 1072 // EASTERN_ARABIC are specified. 1073 if (rangeSet.contains(Range.EASTERN_ARABIC) 1074 && rangeSet.contains(Range.ARABIC)) { 1075 rangeSet.remove(Range.ARABIC); 1076 } 1077 1078 // As well as the above case, give precedance to TAI_THAM_THAM if both 1079 // TAI_THAM_HORA and TAI_THAM_THAM are specified. 1080 if (rangeSet.contains(Range.TAI_THAM_THAM) 1081 && rangeSet.contains(Range.TAI_THAM_HORA)) { 1082 rangeSet.remove(Range.TAI_THAM_HORA); 1083 } 1084 1085 rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); 1086 if (rangeArray.length > BSEARCH_THRESHOLD) { 1087 // sort rangeArray for binary search 1088 Arrays.sort(rangeArray, 1089 new Comparator<Range>() { 1090 public int compare(Range s1, Range s2) { 1091 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1; 1092 } 1093 }); 1094 } 1095 } 1096 1097 /** 1098 * Converts the digits in the text that occur between start and 1099 * start + count. 1100 * @param text an array of characters to convert 1101 * @param start the index into <code>text</code> to start 1102 * converting 1103 * @param count the number of characters in <code>text</code> 1104 * to convert 1105 * @throws IndexOutOfBoundsException if start or start + count is 1106 * out of bounds 1107 * @throws NullPointerException if text is null 1108 */ 1109 public void shape(char[] text, int start, int count) { 1110 checkParams(text, start, count); 1111 if (isContextual()) { 1112 if (rangeSet == null) { 1113 shapeContextually(text, start, count, key); 1114 } else { 1115 shapeContextually(text, start, count, shapingRange); 1116 } 1117 } else { 1118 shapeNonContextually(text, start, count); 1119 } 1120 } 1121 1122 /** 1123 * Converts the digits in the text that occur between start and 1124 * start + count, using the provided context. 1125 * Context is ignored if the shaper is not a contextual shaper. 1126 * @param text an array of characters 1127 * @param start the index into <code>text</code> to start 1128 * converting 1129 * @param count the number of characters in <code>text</code> 1130 * to convert 1131 * @param context the context to which to convert the 1132 * characters, such as <code>NumericShaper.EUROPEAN</code> 1133 * @throws IndexOutOfBoundsException if start or start + count is 1134 * out of bounds 1135 * @throws NullPointerException if text is null 1136 * @throws IllegalArgumentException if this is a contextual shaper 1137 * and the specified <code>context</code> is not a single valid 1138 * range. 1139 */ 1140 public void shape(char[] text, int start, int count, int context) { 1141 checkParams(text, start, count); 1142 if (isContextual()) { 1143 int ctxKey = getKeyFromMask(context); 1144 if (rangeSet == null) { 1145 shapeContextually(text, start, count, ctxKey); 1146 } else { 1147 shapeContextually(text, start, count, Range.values()[ctxKey]); 1148 } 1149 } else { 1150 shapeNonContextually(text, start, count); 1151 } 1152 } 1153 1154 /** 1155 * Converts the digits in the text that occur between {@code 1156 * start} and {@code start + count}, using the provided {@code 1157 * context}. {@code Context} is ignored if the shaper is not a 1158 * contextual shaper. 1159 * 1160 * @param text a {@code char} array 1161 * @param start the index into {@code text} to start converting 1162 * @param count the number of {@code char}s in {@code text} 1163 * to convert 1164 * @param context the context to which to convert the characters, 1165 * such as {@code NumericShaper.Range.EUROPEAN} 1166 * @throws IndexOutOfBoundsException 1167 * if {@code start} or {@code start + count} is out of bounds 1168 * @throws NullPointerException 1169 * if {@code text} or {@code context} is null 1170 * @since 1.7 1171 */ 1172 public void shape(char[] text, int start, int count, Range context) { 1173 checkParams(text, start, count); 1174 if (context == null) { 1175 throw new NullPointerException("context is null"); 1176 } 1177 1178 if (isContextual()) { 1179 if (rangeSet != null) { 1180 shapeContextually(text, start, count, context); 1181 } else { 1182 int key = Range.toRangeIndex(context); 1183 if (key >= 0) { 1184 shapeContextually(text, start, count, key); 1185 } else { 1186 shapeContextually(text, start, count, shapingRange); 1187 } 1188 } 1189 } else { 1190 shapeNonContextually(text, start, count); 1191 } 1192 } 1193 1194 private void checkParams(char[] text, int start, int count) { 1195 if (text == null) { 1196 throw new NullPointerException("text is null"); 1197 } 1198 if ((start < 0) 1199 || (start > text.length) 1200 || ((start + count) < 0) 1201 || ((start + count) > text.length)) { 1202 throw new IndexOutOfBoundsException( 1203 "bad start or count for text of length " + text.length); 1204 } 1205 } 1206 1207 /** 1208 * Returns a <code>boolean</code> indicating whether or not 1209 * this shaper shapes contextually. 1210 * @return <code>true</code> if this shaper is contextual; 1211 * <code>false</code> otherwise. 1212 */ 1213 public boolean isContextual() { 1214 return (mask & CONTEXTUAL_MASK) != 0; 1215 } 1216 1217 /** 1218 * Returns an <code>int</code> that ORs together the values for 1219 * all the ranges that will be shaped. 1220 * <p> 1221 * For example, to check if a shaper shapes to Arabic, you would use the 1222 * following: 1223 * <blockquote> 1224 * {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... } 1225 * </blockquote> 1226 * 1227 * <p>Note that this method supports only the bit mask-based 1228 * ranges. Call {@link #getRangeSet()} for the enum-based ranges. 1229 * 1230 * @return the values for all the ranges to be shaped. 1231 */ 1232 public int getRanges() { 1233 return mask & ~CONTEXTUAL_MASK; 1234 } 1235 1236 /** 1237 * Returns a {@code Set} representing all the Unicode ranges in 1238 * this {@code NumericShaper} that will be shaped. 1239 * 1240 * @return all the Unicode ranges to be shaped. 1241 * @since 1.7 1242 */ 1243 public Set<Range> getRangeSet() { 1244 if (rangeSet != null) { 1245 return EnumSet.copyOf(rangeSet); 1246 } 1247 return Range.maskToRangeSet(mask); 1248 } 1249 1250 /** 1251 * Perform non-contextual shaping. 1252 */ 1253 private void shapeNonContextually(char[] text, int start, int count) { 1254 int base; 1255 char minDigit = '0'; 1256 if (shapingRange != null) { 1257 base = shapingRange.getDigitBase(); 1258 minDigit += shapingRange.getNumericBase(); 1259 } else { 1260 base = bases[key]; 1261 if (key == ETHIOPIC_KEY) { 1262 minDigit++; // Ethiopic doesn't use decimal zero 1263 } 1264 } 1265 for (int i = start, e = start + count; i < e; ++i) { 1266 char c = text[i]; 1267 if (c >= minDigit && c <= '\u0039') { 1268 text[i] = (char)(c + base); 1269 } 1270 } 1271 } 1272 1273 /** 1274 * Perform contextual shaping. 1275 * Synchronized to protect caches used in getContextKey. 1276 */ 1277 private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { 1278 1279 // if we don't support this context, then don't shape 1280 if ((mask & (1<<ctxKey)) == 0) { 1281 ctxKey = EUROPEAN_KEY; 1282 } 1283 int lastkey = ctxKey; 1284 1285 int base = bases[ctxKey]; 1286 char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1287 1288 synchronized (NumericShaper.class) { 1289 for (int i = start, e = start + count; i < e; ++i) { 1290 char c = text[i]; 1291 if (c >= minDigit && c <= '\u0039') { 1292 text[i] = (char)(c + base); 1293 } 1294 1295 if (isStrongDirectional(c)) { 1296 int newkey = getContextKey(c); 1297 if (newkey != lastkey) { 1298 lastkey = newkey; 1299 1300 ctxKey = newkey; 1301 if (((mask & EASTERN_ARABIC) != 0) && 1302 (ctxKey == ARABIC_KEY || 1303 ctxKey == EASTERN_ARABIC_KEY)) { 1304 ctxKey = EASTERN_ARABIC_KEY; 1305 } else if (((mask & ARABIC) != 0) && 1306 (ctxKey == ARABIC_KEY || 1307 ctxKey == EASTERN_ARABIC_KEY)) { 1308 ctxKey = ARABIC_KEY; 1309 } else if ((mask & (1<<ctxKey)) == 0) { 1310 ctxKey = EUROPEAN_KEY; 1311 } 1312 1313 base = bases[ctxKey]; 1314 1315 minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1316 } 1317 } 1318 } 1319 } 1320 } 1321 1322 private void shapeContextually(char[] text, int start, int count, Range ctxKey) { 1323 // if we don't support the specified context, then don't shape. 1324 if (ctxKey == null || !rangeSet.contains(ctxKey)) { 1325 ctxKey = Range.EUROPEAN; 1326 } 1327 1328 Range lastKey = ctxKey; 1329 int base = ctxKey.getDigitBase(); 1330 char minDigit = (char)('0' + ctxKey.getNumericBase()); 1331 final int end = start + count; 1332 for (int i = start; i < end; ++i) { 1333 char c = text[i]; 1334 if (c >= minDigit && c <= '9') { 1335 text[i] = (char)(c + base); 1336 continue; 1337 } 1338 if (isStrongDirectional(c)) { 1339 ctxKey = rangeForCodePoint(c); 1340 if (ctxKey != lastKey) { 1341 lastKey = ctxKey; 1342 base = ctxKey.getDigitBase(); 1343 minDigit = (char)('0' + ctxKey.getNumericBase()); 1344 } 1345 } 1346 } 1347 } 1348 1349 /** 1350 * Returns a hash code for this shaper. 1351 * @return this shaper's hash code. 1352 * @see java.lang.Object#hashCode 1353 */ 1354 public int hashCode() { 1355 int hash = mask; 1356 if (rangeSet != null) { 1357 // Use the CONTEXTUAL_MASK bit only for the enum-based 1358 // NumericShaper. A deserialized NumericShaper might have 1359 // bit masks. 1360 hash &= CONTEXTUAL_MASK; 1361 hash ^= rangeSet.hashCode(); 1362 } 1363 return hash; 1364 } 1365 1366 /** 1367 * Returns {@code true} if the specified object is an instance of 1368 * <code>NumericShaper</code> and shapes identically to this one, 1369 * regardless of the range representations, the bit mask or the 1370 * enum. For example, the following code produces {@code "true"}. 1371 * <blockquote><pre> 1372 * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC); 1373 * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC); 1374 * System.out.println(ns1.equals(ns2)); 1375 * </pre></blockquote> 1376 * 1377 * @param o the specified object to compare to this 1378 * <code>NumericShaper</code> 1379 * @return <code>true</code> if <code>o</code> is an instance 1380 * of <code>NumericShaper</code> and shapes in the same way; 1381 * <code>false</code> otherwise. 1382 * @see java.lang.Object#equals(java.lang.Object) 1383 */ 1384 public boolean equals(Object o) { 1385 if (o != null) { 1386 try { 1387 NumericShaper rhs = (NumericShaper)o; 1388 if (rangeSet != null) { 1389 if (rhs.rangeSet != null) { 1390 return isContextual() == rhs.isContextual() 1391 && rangeSet.equals(rhs.rangeSet) 1392 && shapingRange == rhs.shapingRange; 1393 } 1394 return isContextual() == rhs.isContextual() 1395 && rangeSet.equals(Range.maskToRangeSet(rhs.mask)) 1396 && shapingRange == Range.indexToRange(rhs.key); 1397 } else if (rhs.rangeSet != null) { 1398 Set<Range> rset = Range.maskToRangeSet(mask); 1399 Range srange = Range.indexToRange(key); 1400 return isContextual() == rhs.isContextual() 1401 && rset.equals(rhs.rangeSet) 1402 && srange == rhs.shapingRange; 1403 } 1404 return rhs.mask == mask && rhs.key == key; 1405 } 1406 catch (ClassCastException e) { 1407 } 1408 } 1409 return false; 1410 } 1411 1412 /** 1413 * Returns a <code>String</code> that describes this shaper. This method 1414 * is used for debugging purposes only. 1415 * @return a <code>String</code> describing this shaper. 1416 */ 1417 public String toString() { 1418 StringBuilder buf = new StringBuilder(super.toString()); 1419 1420 buf.append("[contextual:").append(isContextual()); 1421 1422 String[] keyNames = null; 1423 if (isContextual()) { 1424 buf.append(", context:"); 1425 buf.append(shapingRange == null ? Range.values()[key] : shapingRange); 1426 } 1427 1428 if (rangeSet == null) { 1429 buf.append(", range(s): "); 1430 boolean first = true; 1431 for (int i = 0; i < NUM_KEYS; ++i) { 1432 if ((mask & (1 << i)) != 0) { 1433 if (first) { 1434 first = false; 1435 } else { 1436 buf.append(", "); 1437 } 1438 buf.append(Range.values()[i]); 1439 } 1440 } 1441 } else { 1442 buf.append(", range set: ").append(rangeSet); 1443 } 1444 buf.append(']'); 1445 1446 return buf.toString(); 1447 } 1448 1449 /** 1450 * Returns the index of the high bit in value (assuming le, actually 1451 * power of 2 >= value). value must be positive. 1452 */ 1453 private static int getHighBit(int value) { 1454 if (value <= 0) { 1455 return -32; 1456 } 1457 1458 int bit = 0; 1459 1460 if (value >= 1 << 16) { 1461 value >>= 16; 1462 bit += 16; 1463 } 1464 1465 if (value >= 1 << 8) { 1466 value >>= 8; 1467 bit += 8; 1468 } 1469 1470 if (value >= 1 << 4) { 1471 value >>= 4; 1472 bit += 4; 1473 } 1474 1475 if (value >= 1 << 2) { 1476 value >>= 2; 1477 bit += 2; 1478 } 1479 1480 if (value >= 1 << 1) { 1481 bit += 1; 1482 } 1483 1484 return bit; 1485 } 1486 1487 /** 1488 * fast binary search over subrange of array. 1489 */ 1490 private static int search(int value, int[] array, int start, int length) 1491 { 1492 int power = 1 << getHighBit(length); 1493 int extra = length - power; 1494 int probe = power; 1495 int index = start; 1496 1497 if (value >= array[index + extra]) { 1498 index += extra; 1499 } 1500 1501 while (probe > 1) { 1502 probe >>= 1; 1503 1504 if (value >= array[index + probe]) { 1505 index += probe; 1506 } 1507 } 1508 1509 return index; 1510 } 1511 1512 /** 1513 * Converts the {@code NumericShaper.Range} enum-based parameters, 1514 * if any, to the bit mask-based counterparts and writes this 1515 * object to the {@code stream}. Any enum constants that have no 1516 * bit mask-based counterparts are ignored in the conversion. 1517 * 1518 * @param stream the output stream to write to 1519 * @throws IOException if an I/O error occurs while writing to {@code stream} 1520 * @since 1.7 1521 */ 1522 private void writeObject(ObjectOutputStream stream) throws IOException { 1523 if (shapingRange != null) { 1524 int index = Range.toRangeIndex(shapingRange); 1525 if (index >= 0) { 1526 key = index; 1527 } 1528 } 1529 if (rangeSet != null) { 1530 mask |= Range.toRangeMask(rangeSet); 1531 } 1532 stream.defaultWriteObject(); 1533 } 1534 }