1 /* 2 * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.awt.font; 27 28 import java.io.IOException; 29 import java.io.ObjectOutputStream; 30 import java.util.Arrays; 31 import java.util.Comparator; 32 import java.util.EnumSet; 33 import java.util.Set; 34 import jdk.internal.misc.SharedSecrets; 35 36 /** 37 * The {@code NumericShaper} class is used to convert Latin-1 (European) 38 * digits to other Unicode decimal digits. Users of this class will 39 * primarily be people who wish to present data using 40 * national digit shapes, but find it more convenient to represent the 41 * data internally using Latin-1 (European) digits. This does not 42 * interpret the deprecated numeric shape selector character (U+206E). 43 * <p> 44 * Instances of {@code NumericShaper} are typically applied 45 * as attributes to text with the 46 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute 47 * of the {@code TextAttribute} class. 48 * For example, this code snippet causes a {@code TextLayout} to 49 * shape European digits to Arabic in an Arabic context:<br> 50 * <blockquote><pre> 51 * Map map = new HashMap(); 52 * map.put(TextAttribute.NUMERIC_SHAPING, 53 * NumericShaper.getContextualShaper(NumericShaper.ARABIC)); 54 * FontRenderContext frc = ...; 55 * TextLayout layout = new TextLayout(text, map, frc); 56 * layout.draw(g2d, x, y); 57 * </pre></blockquote> 58 * <br> 59 * It is also possible to perform numeric shaping explicitly using instances 60 * of {@code NumericShaper}, as this code snippet demonstrates:<br> 61 * <blockquote><pre> 62 * char[] text = ...; 63 * // shape all EUROPEAN digits (except zero) to ARABIC digits 64 * NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC); 65 * shaper.shape(text, start, count); 66 * 67 * // shape European digits to ARABIC digits if preceding text is Arabic, or 68 * // shape European digits to TAMIL digits if preceding text is Tamil, or 69 * // leave European digits alone if there is no preceding text, or 70 * // preceding text is neither Arabic nor Tamil 71 * NumericShaper shaper = 72 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 73 * NumericShaper.TAMIL, 74 * NumericShaper.EUROPEAN); 75 * shaper.shape(text, start, count); 76 * </pre></blockquote> 77 * 78 * <p><b>Bit mask- and enum-based Unicode ranges</b></p> 79 * 80 * <p>This class supports two different programming interfaces to 81 * represent Unicode ranges for script-specific digits: bit 82 * mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and 83 * enum-based ones, such as {@link NumericShaper.Range#ARABIC}. 84 * Multiple ranges can be specified by ORing bit mask-based constants, 85 * such as: 86 * <blockquote><pre> 87 * NumericShaper.ARABIC | NumericShaper.TAMIL 88 * </pre></blockquote> 89 * or creating a {@code Set} with the {@link NumericShaper.Range} 90 * constants, such as: 91 * <blockquote><pre> 92 * EnumSet.of(NumericShaper.Range.ARABIC, NumericShaper.Range.TAMIL) 93 * </pre></blockquote> 94 * The enum-based ranges are a super set of the bit mask-based ones. 95 * 96 * <p>If the two interfaces are mixed (including serialization), 97 * Unicode range values are mapped to their counterparts where such 98 * mapping is possible, such as {@code NumericShaper.Range.ARABIC} 99 * from/to {@code NumericShaper.ARABIC}. If any unmappable range 100 * values are specified, such as {@code NumericShaper.Range.BALINESE}, 101 * those ranges are ignored. 102 * 103 * <p><b>Decimal Digits Precedence</b></p> 104 * 105 * <p>A Unicode range may have more than one set of decimal digits. If 106 * multiple decimal digits sets are specified for the same Unicode 107 * range, one of the sets will take precedence as follows. 108 * 109 * <table border=1 cellspacing=3 cellpadding=0 summary="NumericShaper constants precedence."> 110 * <tr> 111 * <th class="TableHeadingColor">Unicode Range</th> 112 * <th class="TableHeadingColor">{@code NumericShaper} Constants</th> 113 * <th class="TableHeadingColor">Precedence</th> 114 * </tr> 115 * <tr> 116 * <td rowspan="2">Arabic</td> 117 * <td>{@link NumericShaper#ARABIC NumericShaper.ARABIC}<br> 118 * {@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 119 * <td>{@link NumericShaper#EASTERN_ARABIC NumericShaper.EASTERN_ARABIC}</td> 120 * </tr> 121 * <tr> 122 * <td>{@link NumericShaper.Range#ARABIC}<br> 123 * {@link NumericShaper.Range#EASTERN_ARABIC}</td> 124 * <td>{@link NumericShaper.Range#EASTERN_ARABIC}</td> 125 * </tr> 126 * <tr> 127 * <td>Tai Tham</td> 128 * <td>{@link NumericShaper.Range#TAI_THAM_HORA}<br> 129 * {@link NumericShaper.Range#TAI_THAM_THAM}</td> 130 * <td>{@link NumericShaper.Range#TAI_THAM_THAM}</td> 131 * </tr> 132 * </table> 133 * 134 * @since 1.4 135 */ 136 137 public final class NumericShaper implements java.io.Serializable { 138 139 // For access from java.text.Bidi 140 static { 141 if (SharedSecrets.getJavaAWTFontAccess() == null) { 142 SharedSecrets.setJavaAWTFontAccess(new JavaAWTFontAccessImpl()); 143 } 144 } 145 146 /** 147 * A {@code NumericShaper.Range} represents a Unicode range of a 148 * script having its own decimal digits. For example, the {@link 149 * NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT 150 * ZERO (U+0E50) to THAI DIGIT NINE (U+0E59). 151 * 152 * <p>The {@code Range} enum replaces the traditional bit 153 * mask-based values (e.g., {@link NumericShaper#ARABIC}), and 154 * supports more Unicode ranges than the bit mask-based ones. For 155 * example, the following code using the bit mask: 156 * <blockquote><pre> 157 * NumericShaper.getContextualShaper(NumericShaper.ARABIC | 158 * NumericShaper.TAMIL, 159 * NumericShaper.EUROPEAN); 160 * </pre></blockquote> 161 * can be written using this enum as: 162 * <blockquote><pre> 163 * NumericShaper.getContextualShaper(EnumSet.of( 164 * NumericShaper.Range.ARABIC, 165 * NumericShaper.Range.TAMIL), 166 * NumericShaper.Range.EUROPEAN); 167 * </pre></blockquote> 168 * 169 * @since 1.7 170 */ 171 public static enum Range { 172 // The order of EUROPEAN to MOGOLIAN must be consistent 173 // with the bitmask-based constants. 174 /** 175 * The Latin (European) range with the Latin (ASCII) digits. 176 */ 177 EUROPEAN ('\u0030', '\u0000', '\u0300'), 178 /** 179 * The Arabic range with the Arabic-Indic digits. 180 */ 181 ARABIC ('\u0660', '\u0600', '\u0780'), 182 /** 183 * The Arabic range with the Eastern Arabic-Indic digits. 184 */ 185 EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'), 186 /** 187 * The Devanagari range with the Devanagari digits. 188 */ 189 DEVANAGARI ('\u0966', '\u0900', '\u0980'), 190 /** 191 * The Bengali range with the Bengali digits. 192 */ 193 BENGALI ('\u09e6', '\u0980', '\u0a00'), 194 /** 195 * The Gurmukhi range with the Gurmukhi digits. 196 */ 197 GURMUKHI ('\u0a66', '\u0a00', '\u0a80'), 198 /** 199 * The Gujarati range with the Gujarati digits. 200 */ 201 GUJARATI ('\u0ae6', '\u0b00', '\u0b80'), 202 /** 203 * The Oriya range with the Oriya digits. 204 */ 205 ORIYA ('\u0b66', '\u0b00', '\u0b80'), 206 /** 207 * The Tamil range with the Tamil digits. 208 */ 209 TAMIL ('\u0be6', '\u0b80', '\u0c00'), 210 /** 211 * The Telugu range with the Telugu digits. 212 */ 213 TELUGU ('\u0c66', '\u0c00', '\u0c80'), 214 /** 215 * The Kannada range with the Kannada digits. 216 */ 217 KANNADA ('\u0ce6', '\u0c80', '\u0d00'), 218 /** 219 * The Malayalam range with the Malayalam digits. 220 */ 221 MALAYALAM ('\u0d66', '\u0d00', '\u0d80'), 222 /** 223 * The Thai range with the Thai digits. 224 */ 225 THAI ('\u0e50', '\u0e00', '\u0e80'), 226 /** 227 * The Lao range with the Lao digits. 228 */ 229 LAO ('\u0ed0', '\u0e80', '\u0f00'), 230 /** 231 * The Tibetan range with the Tibetan digits. 232 */ 233 TIBETAN ('\u0f20', '\u0f00', '\u1000'), 234 /** 235 * The Myanmar range with the Myanmar digits. 236 */ 237 MYANMAR ('\u1040', '\u1000', '\u1080'), 238 /** 239 * The Ethiopic range with the Ethiopic digits. Ethiopic 240 * does not have a decimal digit 0 so Latin (European) 0 is 241 * used. 242 */ 243 ETHIOPIC ('\u1369', '\u1200', '\u1380') { 244 @Override 245 char getNumericBase() { return 1; } 246 }, 247 /** 248 * The Khmer range with the Khmer digits. 249 */ 250 KHMER ('\u17e0', '\u1780', '\u1800'), 251 /** 252 * The Mongolian range with the Mongolian digits. 253 */ 254 MONGOLIAN ('\u1810', '\u1800', '\u1900'), 255 // The order of EUROPEAN to MOGOLIAN must be consistent 256 // with the bitmask-based constants. 257 258 /** 259 * The N'Ko range with the N'Ko digits. 260 */ 261 NKO ('\u07c0', '\u07c0', '\u0800'), 262 /** 263 * The Myanmar range with the Myanmar Shan digits. 264 */ 265 MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'), 266 /** 267 * The Limbu range with the Limbu digits. 268 */ 269 LIMBU ('\u1946', '\u1900', '\u1950'), 270 /** 271 * The New Tai Lue range with the New Tai Lue digits. 272 */ 273 NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'), 274 /** 275 * The Balinese range with the Balinese digits. 276 */ 277 BALINESE ('\u1b50', '\u1b00', '\u1b80'), 278 /** 279 * The Sundanese range with the Sundanese digits. 280 */ 281 SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'), 282 /** 283 * The Lepcha range with the Lepcha digits. 284 */ 285 LEPCHA ('\u1c40', '\u1c00', '\u1c50'), 286 /** 287 * The Ol Chiki range with the Ol Chiki digits. 288 */ 289 OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'), 290 /** 291 * The Vai range with the Vai digits. 292 */ 293 VAI ('\ua620', '\ua500', '\ua640'), 294 /** 295 * The Saurashtra range with the Saurashtra digits. 296 */ 297 SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'), 298 /** 299 * The Kayah Li range with the Kayah Li digits. 300 */ 301 KAYAH_LI ('\ua900', '\ua900', '\ua930'), 302 /** 303 * The Cham range with the Cham digits. 304 */ 305 CHAM ('\uaa50', '\uaa00', '\uaa60'), 306 /** 307 * The Tai Tham Hora range with the Tai Tham Hora digits. 308 */ 309 TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'), 310 /** 311 * The Tai Tham Tham range with the Tai Tham Tham digits. 312 */ 313 TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'), 314 /** 315 * The Javanese range with the Javanese digits. 316 */ 317 JAVANESE ('\ua9d0', '\ua980', '\ua9e0'), 318 /** 319 * The Meetei Mayek range with the Meetei Mayek digits. 320 */ 321 MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00'), 322 /** 323 * The Sinhala range with the Sinhala digits. 324 * @since 9 325 */ 326 SINHALA ('\u0de6', '\u0d80', '\u0e00'), 327 /** 328 * The Myanmar Extended-B range with the Myanmar Tai Laing digits. 329 * @since 9 330 */ 331 MYANMAR_TAI_LAING ('\ua9f0', '\ua9e0', '\uaa00'); 332 333 private static int toRangeIndex(Range script) { 334 int index = script.ordinal(); 335 return index < NUM_KEYS ? index : -1; 336 } 337 338 private static Range indexToRange(int index) { 339 return index < NUM_KEYS ? Range.values()[index] : null; 340 } 341 342 private static int toRangeMask(Set<Range> ranges) { 343 int m = 0; 344 for (Range range : ranges) { 345 int index = range.ordinal(); 346 if (index < NUM_KEYS) { 347 m |= 1 << index; 348 } 349 } 350 return m; 351 } 352 353 private static Set<Range> maskToRangeSet(int mask) { 354 Set<Range> set = EnumSet.noneOf(Range.class); 355 Range[] a = Range.values(); 356 for (int i = 0; i < NUM_KEYS; i++) { 357 if ((mask & (1 << i)) != 0) { 358 set.add(a[i]); 359 } 360 } 361 return set; 362 } 363 364 // base character of range digits 365 private final int base; 366 // Unicode range 367 private final int start, // inclusive 368 end; // exclusive 369 370 private Range(int base, int start, int end) { 371 this.base = base - ('0' + getNumericBase()); 372 this.start = start; 373 this.end = end; 374 } 375 376 private int getDigitBase() { 377 return base; 378 } 379 380 char getNumericBase() { 381 return 0; 382 } 383 384 private boolean inRange(int c) { 385 return start <= c && c < end; 386 } 387 } 388 389 /** index of context for contextual shaping - values range from 0 to 18 */ 390 private int key; 391 392 /** flag indicating whether to shape contextually (high bit) and which 393 * digit ranges to shape (bits 0-18) 394 */ 395 private int mask; 396 397 /** 398 * The context {@code Range} for contextual shaping or the {@code 399 * Range} for non-contextual shaping. {@code null} for the bit 400 * mask-based API. 401 * 402 * @since 1.7 403 */ 404 private Range shapingRange; 405 406 /** 407 * {@code Set<Range>} indicating which Unicode ranges to 408 * shape. {@code null} for the bit mask-based API. 409 */ 410 private transient Set<Range> rangeSet; 411 412 /** 413 * rangeSet.toArray() value. Sorted by Range.base when the number 414 * of elements is greater than BSEARCH_THRESHOLD. 415 */ 416 private transient Range[] rangeArray; 417 418 /** 419 * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used. 420 */ 421 private static final int BSEARCH_THRESHOLD = 3; 422 423 private static final long serialVersionUID = -8022764705923730308L; 424 425 /** Identifies the Latin-1 (European) and extended range, and 426 * Latin-1 (European) decimal base. 427 */ 428 public static final int EUROPEAN = 1<<0; 429 430 /** Identifies the ARABIC range and decimal base. */ 431 public static final int ARABIC = 1<<1; 432 433 /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */ 434 public static final int EASTERN_ARABIC = 1<<2; 435 436 /** Identifies the DEVANAGARI range and decimal base. */ 437 public static final int DEVANAGARI = 1<<3; 438 439 /** Identifies the BENGALI range and decimal base. */ 440 public static final int BENGALI = 1<<4; 441 442 /** Identifies the GURMUKHI range and decimal base. */ 443 public static final int GURMUKHI = 1<<5; 444 445 /** Identifies the GUJARATI range and decimal base. */ 446 public static final int GUJARATI = 1<<6; 447 448 /** Identifies the ORIYA range and decimal base. */ 449 public static final int ORIYA = 1<<7; 450 451 /** Identifies the TAMIL range and decimal base. */ 452 // TAMIL DIGIT ZERO was added in Unicode 4.1 453 public static final int TAMIL = 1<<8; 454 455 /** Identifies the TELUGU range and decimal base. */ 456 public static final int TELUGU = 1<<9; 457 458 /** Identifies the KANNADA range and decimal base. */ 459 public static final int KANNADA = 1<<10; 460 461 /** Identifies the MALAYALAM range and decimal base. */ 462 public static final int MALAYALAM = 1<<11; 463 464 /** Identifies the THAI range and decimal base. */ 465 public static final int THAI = 1<<12; 466 467 /** Identifies the LAO range and decimal base. */ 468 public static final int LAO = 1<<13; 469 470 /** Identifies the TIBETAN range and decimal base. */ 471 public static final int TIBETAN = 1<<14; 472 473 /** Identifies the MYANMAR range and decimal base. */ 474 public static final int MYANMAR = 1<<15; 475 476 /** Identifies the ETHIOPIC range and decimal base. */ 477 public static final int ETHIOPIC = 1<<16; 478 479 /** Identifies the KHMER range and decimal base. */ 480 public static final int KHMER = 1<<17; 481 482 /** Identifies the MONGOLIAN range and decimal base. */ 483 public static final int MONGOLIAN = 1<<18; 484 485 /** Identifies all ranges, for full contextual shaping. 486 * 487 * <p>This constant specifies all of the bit mask-based 488 * ranges. Use {@code EnumSet.allOf(NumericShaper.Range.class)} to 489 * specify all of the enum-based ranges. 490 */ 491 public static final int ALL_RANGES = 0x0007ffff; 492 493 private static final int EUROPEAN_KEY = 0; 494 private static final int ARABIC_KEY = 1; 495 private static final int EASTERN_ARABIC_KEY = 2; 496 private static final int DEVANAGARI_KEY = 3; 497 private static final int BENGALI_KEY = 4; 498 private static final int GURMUKHI_KEY = 5; 499 private static final int GUJARATI_KEY = 6; 500 private static final int ORIYA_KEY = 7; 501 private static final int TAMIL_KEY = 8; 502 private static final int TELUGU_KEY = 9; 503 private static final int KANNADA_KEY = 10; 504 private static final int MALAYALAM_KEY = 11; 505 private static final int THAI_KEY = 12; 506 private static final int LAO_KEY = 13; 507 private static final int TIBETAN_KEY = 14; 508 private static final int MYANMAR_KEY = 15; 509 private static final int ETHIOPIC_KEY = 16; 510 private static final int KHMER_KEY = 17; 511 private static final int MONGOLIAN_KEY = 18; 512 513 private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed 514 515 private static final int CONTEXTUAL_MASK = 1<<31; 516 517 private static final char[] bases = { 518 '\u0030' - '\u0030', // EUROPEAN 519 '\u0660' - '\u0030', // ARABIC-INDIC 520 '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC) 521 '\u0966' - '\u0030', // DEVANAGARI 522 '\u09e6' - '\u0030', // BENGALI 523 '\u0a66' - '\u0030', // GURMUKHI 524 '\u0ae6' - '\u0030', // GUJARATI 525 '\u0b66' - '\u0030', // ORIYA 526 '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1 527 '\u0c66' - '\u0030', // TELUGU 528 '\u0ce6' - '\u0030', // KANNADA 529 '\u0d66' - '\u0030', // MALAYALAM 530 '\u0e50' - '\u0030', // THAI 531 '\u0ed0' - '\u0030', // LAO 532 '\u0f20' - '\u0030', // TIBETAN 533 '\u1040' - '\u0030', // MYANMAR 534 '\u1369' - '\u0031', // ETHIOPIC - no zero 535 '\u17e0' - '\u0030', // KHMER 536 '\u1810' - '\u0030', // MONGOLIAN 537 }; 538 539 // some ranges adjoin or overlap, rethink if we want to do a binary search on this 540 541 private static final char[] contexts = { 542 '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) 543 '\u0600', '\u0780', // ARABIC 544 '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic 545 '\u0900', '\u0980', // DEVANAGARI 546 '\u0980', '\u0a00', // BENGALI 547 '\u0a00', '\u0a80', // GURMUKHI 548 '\u0a80', '\u0b00', // GUJARATI 549 '\u0b00', '\u0b80', // ORIYA 550 '\u0b80', '\u0c00', // TAMIL 551 '\u0c00', '\u0c80', // TELUGU 552 '\u0c80', '\u0d00', // KANNADA 553 '\u0d00', '\u0d80', // MALAYALAM 554 '\u0e00', '\u0e80', // THAI 555 '\u0e80', '\u0f00', // LAO 556 '\u0f00', '\u1000', // TIBETAN 557 '\u1000', '\u1080', // MYANMAR 558 '\u1200', '\u1380', // ETHIOPIC - note missing zero 559 '\u1780', '\u1800', // KHMER 560 '\u1800', '\u1900', // MONGOLIAN 561 '\uffff', 562 }; 563 564 // assume most characters are near each other so probing the cache is infrequent, 565 // and a linear probe is ok. 566 567 private static int ctCache = 0; 568 private static int ctCacheLimit = contexts.length - 2; 569 570 // warning, synchronize access to this as it modifies state 571 private static int getContextKey(char c) { 572 if (c < contexts[ctCache]) { 573 while (ctCache > 0 && c < contexts[ctCache]) --ctCache; 574 } else if (c >= contexts[ctCache + 1]) { 575 while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache; 576 } 577 578 // if we're not in a known range, then return EUROPEAN as the range key 579 return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; 580 } 581 582 // cache for the NumericShaper.Range version 583 private transient volatile Range currentRange = Range.EUROPEAN; 584 585 private Range rangeForCodePoint(final int codepoint) { 586 if (currentRange.inRange(codepoint)) { 587 return currentRange; 588 } 589 590 final Range[] ranges = rangeArray; 591 if (ranges.length > BSEARCH_THRESHOLD) { 592 int lo = 0; 593 int hi = ranges.length - 1; 594 while (lo <= hi) { 595 int mid = (lo + hi) / 2; 596 Range range = ranges[mid]; 597 if (codepoint < range.start) { 598 hi = mid - 1; 599 } else if (codepoint >= range.end) { 600 lo = mid + 1; 601 } else { 602 currentRange = range; 603 return range; 604 } 605 } 606 } else { 607 for (int i = 0; i < ranges.length; i++) { 608 if (ranges[i].inRange(codepoint)) { 609 return ranges[i]; 610 } 611 } 612 } 613 return Range.EUROPEAN; 614 } 615 616 /* 617 * A range table of strong directional characters (types L, R, AL). 618 * Even (left) indexes are starts of ranges of non-strong-directional (or undefined) 619 * characters, odd (right) indexes are starts of ranges of strong directional 620 * characters. 621 */ 622 private static int[] strongTable = { 623 0x0000, 0x0041, 624 0x005b, 0x0061, 625 0x007b, 0x00aa, 626 0x00ab, 0x00b5, 627 0x00b6, 0x00ba, 628 0x00bb, 0x00c0, 629 0x00d7, 0x00d8, 630 0x00f7, 0x00f8, 631 0x02b9, 0x02bb, 632 0x02c2, 0x02d0, 633 0x02d2, 0x02e0, 634 0x02e5, 0x02ee, 635 0x02ef, 0x0370, 636 0x0374, 0x0376, 637 0x0378, 0x037a, 638 0x037e, 0x037f, 639 0x0380, 0x0386, 640 0x0387, 0x0388, 641 0x038b, 0x038c, 642 0x038d, 0x038e, 643 0x03a2, 0x03a3, 644 0x03f6, 0x03f7, 645 0x0483, 0x048a, 646 0x0530, 0x0531, 647 0x0557, 0x0559, 648 0x0560, 0x0561, 649 0x0588, 0x0589, 650 0x058a, 0x0590, 651 0x0591, 0x05be, 652 0x05bf, 0x05c0, 653 0x05c1, 0x05c3, 654 0x05c4, 0x05c6, 655 0x05c7, 0x05c8, 656 0x0600, 0x0608, 657 0x0609, 0x060b, 658 0x060c, 0x060d, 659 0x060e, 0x061b, 660 0x064b, 0x066d, 661 0x0670, 0x0671, 662 0x06d6, 0x06e5, 663 0x06e7, 0x06ee, 664 0x06f0, 0x06fa, 665 0x0711, 0x0712, 666 0x0730, 0x074b, 667 0x07a6, 0x07b1, 668 0x07eb, 0x07f4, 669 0x07f6, 0x07fa, 670 0x0816, 0x081a, 671 0x081b, 0x0824, 672 0x0825, 0x0828, 673 0x0829, 0x082e, 674 0x0859, 0x085c, 675 0x08e3, 0x0903, 676 0x093a, 0x093b, 677 0x093c, 0x093d, 678 0x0941, 0x0949, 679 0x094d, 0x094e, 680 0x0951, 0x0958, 681 0x0962, 0x0964, 682 0x0981, 0x0982, 683 0x0984, 0x0985, 684 0x098d, 0x098f, 685 0x0991, 0x0993, 686 0x09a9, 0x09aa, 687 0x09b1, 0x09b2, 688 0x09b3, 0x09b6, 689 0x09ba, 0x09bd, 690 0x09c1, 0x09c7, 691 0x09c9, 0x09cb, 692 0x09cd, 0x09ce, 693 0x09cf, 0x09d7, 694 0x09d8, 0x09dc, 695 0x09de, 0x09df, 696 0x09e2, 0x09e6, 697 0x09f2, 0x09f4, 698 0x09fb, 0x0a03, 699 0x0a04, 0x0a05, 700 0x0a0b, 0x0a0f, 701 0x0a11, 0x0a13, 702 0x0a29, 0x0a2a, 703 0x0a31, 0x0a32, 704 0x0a34, 0x0a35, 705 0x0a37, 0x0a38, 706 0x0a3a, 0x0a3e, 707 0x0a41, 0x0a59, 708 0x0a5d, 0x0a5e, 709 0x0a5f, 0x0a66, 710 0x0a70, 0x0a72, 711 0x0a75, 0x0a83, 712 0x0a84, 0x0a85, 713 0x0a8e, 0x0a8f, 714 0x0a92, 0x0a93, 715 0x0aa9, 0x0aaa, 716 0x0ab1, 0x0ab2, 717 0x0ab4, 0x0ab5, 718 0x0aba, 0x0abd, 719 0x0ac1, 0x0ac9, 720 0x0aca, 0x0acb, 721 0x0acd, 0x0ad0, 722 0x0ad1, 0x0ae0, 723 0x0ae2, 0x0ae6, 724 0x0af1, 0x0af9, 725 0x0afa, 0x0b02, 726 0x0b04, 0x0b05, 727 0x0b0d, 0x0b0f, 728 0x0b11, 0x0b13, 729 0x0b29, 0x0b2a, 730 0x0b31, 0x0b32, 731 0x0b34, 0x0b35, 732 0x0b3a, 0x0b3d, 733 0x0b3f, 0x0b40, 734 0x0b41, 0x0b47, 735 0x0b49, 0x0b4b, 736 0x0b4d, 0x0b57, 737 0x0b58, 0x0b5c, 738 0x0b5e, 0x0b5f, 739 0x0b62, 0x0b66, 740 0x0b78, 0x0b83, 741 0x0b84, 0x0b85, 742 0x0b8b, 0x0b8e, 743 0x0b91, 0x0b92, 744 0x0b96, 0x0b99, 745 0x0b9b, 0x0b9c, 746 0x0b9d, 0x0b9e, 747 0x0ba0, 0x0ba3, 748 0x0ba5, 0x0ba8, 749 0x0bab, 0x0bae, 750 0x0bba, 0x0bbe, 751 0x0bc0, 0x0bc1, 752 0x0bc3, 0x0bc6, 753 0x0bc9, 0x0bca, 754 0x0bcd, 0x0bd0, 755 0x0bd1, 0x0bd7, 756 0x0bd8, 0x0be6, 757 0x0bf3, 0x0c01, 758 0x0c04, 0x0c05, 759 0x0c0d, 0x0c0e, 760 0x0c11, 0x0c12, 761 0x0c29, 0x0c2a, 762 0x0c3a, 0x0c3d, 763 0x0c3e, 0x0c41, 764 0x0c45, 0x0c58, 765 0x0c5b, 0x0c60, 766 0x0c62, 0x0c66, 767 0x0c70, 0x0c7f, 768 0x0c80, 0x0c82, 769 0x0c84, 0x0c85, 770 0x0c8d, 0x0c8e, 771 0x0c91, 0x0c92, 772 0x0ca9, 0x0caa, 773 0x0cb4, 0x0cb5, 774 0x0cba, 0x0cbd, 775 0x0cc5, 0x0cc6, 776 0x0cc9, 0x0cca, 777 0x0ccc, 0x0cd5, 778 0x0cd7, 0x0cde, 779 0x0cdf, 0x0ce0, 780 0x0ce2, 0x0ce6, 781 0x0cf0, 0x0cf1, 782 0x0cf3, 0x0d02, 783 0x0d04, 0x0d05, 784 0x0d0d, 0x0d0e, 785 0x0d11, 0x0d12, 786 0x0d3b, 0x0d3d, 787 0x0d41, 0x0d46, 788 0x0d49, 0x0d4a, 789 0x0d4d, 0x0d4e, 790 0x0d4f, 0x0d57, 791 0x0d58, 0x0d5f, 792 0x0d62, 0x0d66, 793 0x0d76, 0x0d79, 794 0x0d80, 0x0d82, 795 0x0d84, 0x0d85, 796 0x0d97, 0x0d9a, 797 0x0db2, 0x0db3, 798 0x0dbc, 0x0dbd, 799 0x0dbe, 0x0dc0, 800 0x0dc7, 0x0dcf, 801 0x0dd2, 0x0dd8, 802 0x0de0, 0x0de6, 803 0x0df0, 0x0df2, 804 0x0df5, 0x0e01, 805 0x0e31, 0x0e32, 806 0x0e34, 0x0e40, 807 0x0e47, 0x0e4f, 808 0x0e5c, 0x0e81, 809 0x0e83, 0x0e84, 810 0x0e85, 0x0e87, 811 0x0e89, 0x0e8a, 812 0x0e8b, 0x0e8d, 813 0x0e8e, 0x0e94, 814 0x0e98, 0x0e99, 815 0x0ea0, 0x0ea1, 816 0x0ea4, 0x0ea5, 817 0x0ea6, 0x0ea7, 818 0x0ea8, 0x0eaa, 819 0x0eac, 0x0ead, 820 0x0eb1, 0x0eb2, 821 0x0eb4, 0x0ebd, 822 0x0ebe, 0x0ec0, 823 0x0ec5, 0x0ec6, 824 0x0ec7, 0x0ed0, 825 0x0eda, 0x0edc, 826 0x0ee0, 0x0f00, 827 0x0f18, 0x0f1a, 828 0x0f35, 0x0f36, 829 0x0f37, 0x0f38, 830 0x0f39, 0x0f3e, 831 0x0f48, 0x0f49, 832 0x0f6d, 0x0f7f, 833 0x0f80, 0x0f85, 834 0x0f86, 0x0f88, 835 0x0f8d, 0x0fbe, 836 0x0fc6, 0x0fc7, 837 0x0fcd, 0x0fce, 838 0x0fdb, 0x1000, 839 0x102d, 0x1031, 840 0x1032, 0x1038, 841 0x1039, 0x103b, 842 0x103d, 0x103f, 843 0x1058, 0x105a, 844 0x105e, 0x1061, 845 0x1071, 0x1075, 846 0x1082, 0x1083, 847 0x1085, 0x1087, 848 0x108d, 0x108e, 849 0x109d, 0x109e, 850 0x10c6, 0x10c7, 851 0x10c8, 0x10cd, 852 0x10ce, 0x10d0, 853 0x1249, 0x124a, 854 0x124e, 0x1250, 855 0x1257, 0x1258, 856 0x1259, 0x125a, 857 0x125e, 0x1260, 858 0x1289, 0x128a, 859 0x128e, 0x1290, 860 0x12b1, 0x12b2, 861 0x12b6, 0x12b8, 862 0x12bf, 0x12c0, 863 0x12c1, 0x12c2, 864 0x12c6, 0x12c8, 865 0x12d7, 0x12d8, 866 0x1311, 0x1312, 867 0x1316, 0x1318, 868 0x135b, 0x1360, 869 0x137d, 0x1380, 870 0x1390, 0x13a0, 871 0x13f6, 0x13f8, 872 0x13fe, 0x1401, 873 0x1680, 0x1681, 874 0x169b, 0x16a0, 875 0x16f9, 0x1700, 876 0x170d, 0x170e, 877 0x1712, 0x1720, 878 0x1732, 0x1735, 879 0x1737, 0x1740, 880 0x1752, 0x1760, 881 0x176d, 0x176e, 882 0x1771, 0x1780, 883 0x17b4, 0x17b6, 884 0x17b7, 0x17be, 885 0x17c6, 0x17c7, 886 0x17c9, 0x17d4, 887 0x17db, 0x17dc, 888 0x17dd, 0x17e0, 889 0x17ea, 0x1810, 890 0x181a, 0x1820, 891 0x1878, 0x1880, 892 0x18a9, 0x18aa, 893 0x18ab, 0x18b0, 894 0x18f6, 0x1900, 895 0x191f, 0x1923, 896 0x1927, 0x1929, 897 0x192c, 0x1930, 898 0x1932, 0x1933, 899 0x1939, 0x1946, 900 0x196e, 0x1970, 901 0x1975, 0x1980, 902 0x19ac, 0x19b0, 903 0x19ca, 0x19d0, 904 0x19db, 0x1a00, 905 0x1a17, 0x1a19, 906 0x1a1b, 0x1a1e, 907 0x1a56, 0x1a57, 908 0x1a58, 0x1a61, 909 0x1a62, 0x1a63, 910 0x1a65, 0x1a6d, 911 0x1a73, 0x1a80, 912 0x1a8a, 0x1a90, 913 0x1a9a, 0x1aa0, 914 0x1aae, 0x1b04, 915 0x1b34, 0x1b35, 916 0x1b36, 0x1b3b, 917 0x1b3c, 0x1b3d, 918 0x1b42, 0x1b43, 919 0x1b4c, 0x1b50, 920 0x1b6b, 0x1b74, 921 0x1b7d, 0x1b82, 922 0x1ba2, 0x1ba6, 923 0x1ba8, 0x1baa, 924 0x1bab, 0x1bae, 925 0x1be6, 0x1be7, 926 0x1be8, 0x1bea, 927 0x1bed, 0x1bee, 928 0x1bef, 0x1bf2, 929 0x1bf4, 0x1bfc, 930 0x1c2c, 0x1c34, 931 0x1c36, 0x1c3b, 932 0x1c4a, 0x1c4d, 933 0x1c80, 0x1cc0, 934 0x1cc8, 0x1cd3, 935 0x1cd4, 0x1ce1, 936 0x1ce2, 0x1ce9, 937 0x1ced, 0x1cee, 938 0x1cf4, 0x1cf5, 939 0x1cf7, 0x1d00, 940 0x1dc0, 0x1e00, 941 0x1f16, 0x1f18, 942 0x1f1e, 0x1f20, 943 0x1f46, 0x1f48, 944 0x1f4e, 0x1f50, 945 0x1f58, 0x1f59, 946 0x1f5a, 0x1f5b, 947 0x1f5c, 0x1f5d, 948 0x1f5e, 0x1f5f, 949 0x1f7e, 0x1f80, 950 0x1fb5, 0x1fb6, 951 0x1fbd, 0x1fbe, 952 0x1fbf, 0x1fc2, 953 0x1fc5, 0x1fc6, 954 0x1fcd, 0x1fd0, 955 0x1fd4, 0x1fd6, 956 0x1fdc, 0x1fe0, 957 0x1fed, 0x1ff2, 958 0x1ff5, 0x1ff6, 959 0x1ffd, 0x200e, 960 0x2010, 0x2071, 961 0x2072, 0x207f, 962 0x2080, 0x2090, 963 0x209d, 0x2102, 964 0x2103, 0x2107, 965 0x2108, 0x210a, 966 0x2114, 0x2115, 967 0x2116, 0x2119, 968 0x211e, 0x2124, 969 0x2125, 0x2126, 970 0x2127, 0x2128, 971 0x2129, 0x212a, 972 0x212e, 0x212f, 973 0x213a, 0x213c, 974 0x2140, 0x2145, 975 0x214a, 0x214e, 976 0x2150, 0x2160, 977 0x2189, 0x2336, 978 0x237b, 0x2395, 979 0x2396, 0x249c, 980 0x24ea, 0x26ac, 981 0x26ad, 0x2800, 982 0x2900, 0x2c00, 983 0x2c2f, 0x2c30, 984 0x2c5f, 0x2c60, 985 0x2ce5, 0x2ceb, 986 0x2cef, 0x2cf2, 987 0x2cf4, 0x2d00, 988 0x2d26, 0x2d27, 989 0x2d28, 0x2d2d, 990 0x2d2e, 0x2d30, 991 0x2d68, 0x2d6f, 992 0x2d71, 0x2d80, 993 0x2d97, 0x2da0, 994 0x2da7, 0x2da8, 995 0x2daf, 0x2db0, 996 0x2db7, 0x2db8, 997 0x2dbf, 0x2dc0, 998 0x2dc7, 0x2dc8, 999 0x2dcf, 0x2dd0, 1000 0x2dd7, 0x2dd8, 1001 0x2ddf, 0x3005, 1002 0x3008, 0x3021, 1003 0x302a, 0x302e, 1004 0x3030, 0x3031, 1005 0x3036, 0x3038, 1006 0x303d, 0x3041, 1007 0x3097, 0x309d, 1008 0x30a0, 0x30a1, 1009 0x30fb, 0x30fc, 1010 0x3100, 0x3105, 1011 0x312e, 0x3131, 1012 0x318f, 0x3190, 1013 0x31bb, 0x31f0, 1014 0x321d, 0x3220, 1015 0x3250, 0x3260, 1016 0x327c, 0x327f, 1017 0x32b1, 0x32c0, 1018 0x32cc, 0x32d0, 1019 0x32ff, 0x3300, 1020 0x3377, 0x337b, 1021 0x33de, 0x33e0, 1022 0x33ff, 0x3400, 1023 0x4db6, 0x4e00, 1024 0x9fd6, 0xa000, 1025 0xa48d, 0xa4d0, 1026 0xa60d, 0xa610, 1027 0xa62c, 0xa640, 1028 0xa66f, 0xa680, 1029 0xa69e, 0xa6a0, 1030 0xa6f0, 0xa6f2, 1031 0xa6f8, 0xa722, 1032 0xa788, 0xa789, 1033 0xa7ae, 0xa7b0, 1034 0xa7b8, 0xa7f7, 1035 0xa802, 0xa803, 1036 0xa806, 0xa807, 1037 0xa80b, 0xa80c, 1038 0xa825, 0xa827, 1039 0xa828, 0xa830, 1040 0xa838, 0xa840, 1041 0xa874, 0xa880, 1042 0xa8c4, 0xa8ce, 1043 0xa8da, 0xa8f2, 1044 0xa8fe, 0xa900, 1045 0xa926, 0xa92e, 1046 0xa947, 0xa952, 1047 0xa954, 0xa95f, 1048 0xa97d, 0xa983, 1049 0xa9b3, 0xa9b4, 1050 0xa9b6, 0xa9ba, 1051 0xa9bc, 0xa9bd, 1052 0xa9ce, 0xa9cf, 1053 0xa9da, 0xa9de, 1054 0xa9e5, 0xa9e6, 1055 0xa9ff, 0xaa00, 1056 0xaa29, 0xaa2f, 1057 0xaa31, 0xaa33, 1058 0xaa35, 0xaa40, 1059 0xaa43, 0xaa44, 1060 0xaa4c, 0xaa4d, 1061 0xaa4e, 0xaa50, 1062 0xaa5a, 0xaa5c, 1063 0xaa7c, 0xaa7d, 1064 0xaab0, 0xaab1, 1065 0xaab2, 0xaab5, 1066 0xaab7, 0xaab9, 1067 0xaabe, 0xaac0, 1068 0xaac1, 0xaac2, 1069 0xaac3, 0xaadb, 1070 0xaaec, 0xaaee, 1071 0xaaf6, 0xab01, 1072 0xab07, 0xab09, 1073 0xab0f, 0xab11, 1074 0xab17, 0xab20, 1075 0xab27, 0xab28, 1076 0xab2f, 0xab30, 1077 0xab66, 0xab70, 1078 0xabe5, 0xabe6, 1079 0xabe8, 0xabe9, 1080 0xabed, 0xabf0, 1081 0xabfa, 0xac00, 1082 0xd7a4, 0xd7b0, 1083 0xd7c7, 0xd7cb, 1084 0xd7fc, 0xe000, 1085 0xfa6e, 0xfa70, 1086 0xfada, 0xfb00, 1087 0xfb07, 0xfb13, 1088 0xfb18, 0xfb1d, 1089 0xfb1e, 0xfb1f, 1090 0xfb29, 0xfb2a, 1091 0xfd3e, 0xfd40, 1092 0xfdd0, 0xfdf0, 1093 0xfdfd, 0xfdfe, 1094 0xfe00, 0xfe70, 1095 0xfeff, 0xff21, 1096 0xff3b, 0xff41, 1097 0xff5b, 0xff66, 1098 0xffbf, 0xffc2, 1099 0xffc8, 0xffca, 1100 0xffd0, 0xffd2, 1101 0xffd8, 0xffda, 1102 0xffdd, 0x10000, 1103 0x1000c, 0x1000d, 1104 0x10027, 0x10028, 1105 0x1003b, 0x1003c, 1106 0x1003e, 0x1003f, 1107 0x1004e, 0x10050, 1108 0x1005e, 0x10080, 1109 0x100fb, 0x10100, 1110 0x10101, 0x10102, 1111 0x10103, 0x10107, 1112 0x10134, 0x10137, 1113 0x10140, 0x101d0, 1114 0x101fd, 0x10280, 1115 0x1029d, 0x102a0, 1116 0x102d1, 0x10300, 1117 0x10324, 0x10330, 1118 0x1034b, 0x10350, 1119 0x10376, 0x10380, 1120 0x1039e, 0x1039f, 1121 0x103c4, 0x103c8, 1122 0x103d6, 0x10400, 1123 0x1049e, 0x104a0, 1124 0x104aa, 0x10500, 1125 0x10528, 0x10530, 1126 0x10564, 0x1056f, 1127 0x10570, 0x10600, 1128 0x10737, 0x10740, 1129 0x10756, 0x10760, 1130 0x10768, 0x10800, 1131 0x1091f, 0x10920, 1132 0x10a01, 0x10a04, 1133 0x10a05, 0x10a07, 1134 0x10a0c, 0x10a10, 1135 0x10a38, 0x10a3b, 1136 0x10a3f, 0x10a40, 1137 0x10ae5, 0x10ae7, 1138 0x10b39, 0x10b40, 1139 0x10e60, 0x10e7f, 1140 0x11001, 0x11002, 1141 0x11038, 0x11047, 1142 0x1104e, 0x11066, 1143 0x11070, 0x11082, 1144 0x110b3, 0x110b7, 1145 0x110b9, 0x110bb, 1146 0x110c2, 0x110d0, 1147 0x110e9, 0x110f0, 1148 0x110fa, 0x11103, 1149 0x11127, 0x1112c, 1150 0x1112d, 0x11136, 1151 0x11144, 0x11150, 1152 0x11173, 0x11174, 1153 0x11177, 0x11182, 1154 0x111b6, 0x111bf, 1155 0x111ca, 0x111cd, 1156 0x111ce, 0x111d0, 1157 0x111e0, 0x111e1, 1158 0x111f5, 0x11200, 1159 0x11212, 0x11213, 1160 0x1122f, 0x11232, 1161 0x11234, 0x11235, 1162 0x11236, 0x11238, 1163 0x1123e, 0x11280, 1164 0x11287, 0x11288, 1165 0x11289, 0x1128a, 1166 0x1128e, 0x1128f, 1167 0x1129e, 0x1129f, 1168 0x112aa, 0x112b0, 1169 0x112df, 0x112e0, 1170 0x112e3, 0x112f0, 1171 0x112fa, 0x11302, 1172 0x11304, 0x11305, 1173 0x1130d, 0x1130f, 1174 0x11311, 0x11313, 1175 0x11329, 0x1132a, 1176 0x11331, 0x11332, 1177 0x11334, 0x11335, 1178 0x1133a, 0x1133d, 1179 0x11340, 0x11341, 1180 0x11345, 0x11347, 1181 0x11349, 0x1134b, 1182 0x1134e, 0x11350, 1183 0x11351, 0x11357, 1184 0x11358, 0x1135d, 1185 0x11364, 0x11480, 1186 0x114b3, 0x114b9, 1187 0x114ba, 0x114bb, 1188 0x114bf, 0x114c1, 1189 0x114c2, 0x114c4, 1190 0x114c8, 0x114d0, 1191 0x114da, 0x11580, 1192 0x115b2, 0x115b8, 1193 0x115bc, 0x115be, 1194 0x115bf, 0x115c1, 1195 0x115dc, 0x11600, 1196 0x11633, 0x1163b, 1197 0x1163d, 0x1163e, 1198 0x1163f, 0x11641, 1199 0x11645, 0x11650, 1200 0x1165a, 0x11680, 1201 0x116ab, 0x116ac, 1202 0x116ad, 0x116ae, 1203 0x116b0, 0x116b6, 1204 0x116b7, 0x116c0, 1205 0x116ca, 0x11700, 1206 0x1171a, 0x11720, 1207 0x11722, 0x11726, 1208 0x11727, 0x11730, 1209 0x11740, 0x118a0, 1210 0x118f3, 0x118ff, 1211 0x11900, 0x11ac0, 1212 0x11af9, 0x12000, 1213 0x1239a, 0x12400, 1214 0x1246f, 0x12470, 1215 0x12475, 0x12480, 1216 0x12544, 0x13000, 1217 0x1342f, 0x14400, 1218 0x14647, 0x16800, 1219 0x16a39, 0x16a40, 1220 0x16a5f, 0x16a60, 1221 0x16a6a, 0x16a6e, 1222 0x16a70, 0x16ad0, 1223 0x16aee, 0x16af5, 1224 0x16af6, 0x16b00, 1225 0x16b30, 0x16b37, 1226 0x16b46, 0x16b50, 1227 0x16b5a, 0x16b5b, 1228 0x16b62, 0x16b63, 1229 0x16b78, 0x16b7d, 1230 0x16b90, 0x16f00, 1231 0x16f45, 0x16f50, 1232 0x16f7f, 0x16f93, 1233 0x16fa0, 0x1b000, 1234 0x1b002, 0x1bc00, 1235 0x1bc6b, 0x1bc70, 1236 0x1bc7d, 0x1bc80, 1237 0x1bc89, 0x1bc90, 1238 0x1bc9a, 0x1bc9c, 1239 0x1bc9d, 0x1bc9f, 1240 0x1bca0, 0x1d000, 1241 0x1d0f6, 0x1d100, 1242 0x1d127, 0x1d129, 1243 0x1d167, 0x1d16a, 1244 0x1d173, 0x1d183, 1245 0x1d185, 0x1d18c, 1246 0x1d1aa, 0x1d1ae, 1247 0x1d1e9, 0x1d360, 1248 0x1d372, 0x1d400, 1249 0x1d455, 0x1d456, 1250 0x1d49d, 0x1d49e, 1251 0x1d4a0, 0x1d4a2, 1252 0x1d4a3, 0x1d4a5, 1253 0x1d4a7, 0x1d4a9, 1254 0x1d4ad, 0x1d4ae, 1255 0x1d4ba, 0x1d4bb, 1256 0x1d4bc, 0x1d4bd, 1257 0x1d4c4, 0x1d4c5, 1258 0x1d506, 0x1d507, 1259 0x1d50b, 0x1d50d, 1260 0x1d515, 0x1d516, 1261 0x1d51d, 0x1d51e, 1262 0x1d53a, 0x1d53b, 1263 0x1d53f, 0x1d540, 1264 0x1d545, 0x1d546, 1265 0x1d547, 0x1d54a, 1266 0x1d551, 0x1d552, 1267 0x1d6a6, 0x1d6a8, 1268 0x1d6db, 0x1d6dc, 1269 0x1d715, 0x1d716, 1270 0x1d74f, 0x1d750, 1271 0x1d789, 0x1d78a, 1272 0x1d7c3, 0x1d7c4, 1273 0x1d7cc, 0x1d800, 1274 0x1da00, 0x1da37, 1275 0x1da3b, 0x1da6d, 1276 0x1da75, 0x1da76, 1277 0x1da84, 0x1da85, 1278 0x1da8c, 0x1e800, 1279 0x1e8d0, 0x1e8d7, 1280 0x1eef0, 0x1eef2, 1281 0x1f000, 0x1f110, 1282 0x1f12f, 0x1f130, 1283 0x1f16a, 0x1f170, 1284 0x1f19b, 0x1f1e6, 1285 0x1f203, 0x1f210, 1286 0x1f23b, 0x1f240, 1287 0x1f249, 0x1f250, 1288 0x1f252, 0x20000, 1289 0x2a6d7, 0x2a700, 1290 0x2b735, 0x2b740, 1291 0x2b81e, 0x2b820, 1292 0x2cea2, 0x2f800, 1293 0x2fa1e, 0xf0000, 1294 0xffffe, 0x100000, 1295 0x10fffe, 0x10ffff // sentinel 1296 }; 1297 1298 1299 // use a binary search with a cache 1300 1301 private transient volatile int stCache = 0; 1302 1303 private boolean isStrongDirectional(char c) { 1304 int cachedIndex = stCache; 1305 if (c < strongTable[cachedIndex]) { 1306 cachedIndex = search(c, strongTable, 0, cachedIndex); 1307 } else if (c >= strongTable[cachedIndex + 1]) { 1308 cachedIndex = search(c, strongTable, cachedIndex + 1, 1309 strongTable.length - cachedIndex - 1); 1310 } 1311 boolean val = (cachedIndex & 0x1) == 1; 1312 stCache = cachedIndex; 1313 return val; 1314 } 1315 1316 private static int getKeyFromMask(int mask) { 1317 int key = 0; 1318 while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { 1319 ++key; 1320 } 1321 if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) { 1322 throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask)); 1323 } 1324 return key; 1325 } 1326 1327 /** 1328 * Returns a shaper for the provided unicode range. All 1329 * Latin-1 (EUROPEAN) digits are converted 1330 * to the corresponding decimal unicode digits. 1331 * @param singleRange the specified Unicode range 1332 * @return a non-contextual numeric shaper 1333 * @throws IllegalArgumentException if the range is not a single range 1334 */ 1335 public static NumericShaper getShaper(int singleRange) { 1336 int key = getKeyFromMask(singleRange); 1337 return new NumericShaper(key, singleRange); 1338 } 1339 1340 /** 1341 * Returns a shaper for the provided Unicode 1342 * range. All Latin-1 (EUROPEAN) digits are converted to the 1343 * corresponding decimal digits of the specified Unicode range. 1344 * 1345 * @param singleRange the Unicode range given by a {@link 1346 * NumericShaper.Range} constant. 1347 * @return a non-contextual {@code NumericShaper}. 1348 * @throws NullPointerException if {@code singleRange} is {@code null} 1349 * @since 1.7 1350 */ 1351 public static NumericShaper getShaper(Range singleRange) { 1352 return new NumericShaper(singleRange, EnumSet.of(singleRange)); 1353 } 1354 1355 /** 1356 * Returns a contextual shaper for the provided unicode range(s). 1357 * Latin-1 (EUROPEAN) digits are converted to the decimal digits 1358 * corresponding to the range of the preceding text, if the 1359 * range is one of the provided ranges. Multiple ranges are 1360 * represented by or-ing the values together, such as, 1361 * {@code NumericShaper.ARABIC | NumericShaper.THAI}. The 1362 * shaper assumes EUROPEAN as the starting context, that is, if 1363 * EUROPEAN digits are encountered before any strong directional 1364 * text in the string, the context is presumed to be EUROPEAN, and 1365 * so the digits will not shape. 1366 * @param ranges the specified Unicode ranges 1367 * @return a shaper for the specified ranges 1368 */ 1369 public static NumericShaper getContextualShaper(int ranges) { 1370 ranges |= CONTEXTUAL_MASK; 1371 return new NumericShaper(EUROPEAN_KEY, ranges); 1372 } 1373 1374 /** 1375 * Returns a contextual shaper for the provided Unicode 1376 * range(s). The Latin-1 (EUROPEAN) digits are converted to the 1377 * decimal digits corresponding to the range of the preceding 1378 * text, if the range is one of the provided ranges. 1379 * 1380 * <p>The shaper assumes EUROPEAN as the starting context, that 1381 * is, if EUROPEAN digits are encountered before any strong 1382 * directional text in the string, the context is presumed to be 1383 * EUROPEAN, and so the digits will not shape. 1384 * 1385 * @param ranges the specified Unicode ranges 1386 * @return a contextual shaper for the specified ranges 1387 * @throws NullPointerException if {@code ranges} is {@code null}. 1388 * @since 1.7 1389 */ 1390 public static NumericShaper getContextualShaper(Set<Range> ranges) { 1391 NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges); 1392 shaper.mask = CONTEXTUAL_MASK; 1393 return shaper; 1394 } 1395 1396 /** 1397 * Returns a contextual shaper for the provided unicode range(s). 1398 * Latin-1 (EUROPEAN) digits will be converted to the decimal digits 1399 * corresponding to the range of the preceding text, if the 1400 * range is one of the provided ranges. Multiple ranges are 1401 * represented by or-ing the values together, for example, 1402 * {@code NumericShaper.ARABIC | NumericShaper.THAI}. The 1403 * shaper uses defaultContext as the starting context. 1404 * @param ranges the specified Unicode ranges 1405 * @param defaultContext the starting context, such as 1406 * {@code NumericShaper.EUROPEAN} 1407 * @return a shaper for the specified Unicode ranges. 1408 * @throws IllegalArgumentException if the specified 1409 * {@code defaultContext} is not a single valid range. 1410 */ 1411 public static NumericShaper getContextualShaper(int ranges, int defaultContext) { 1412 int key = getKeyFromMask(defaultContext); 1413 ranges |= CONTEXTUAL_MASK; 1414 return new NumericShaper(key, ranges); 1415 } 1416 1417 /** 1418 * Returns a contextual shaper for the provided Unicode range(s). 1419 * The Latin-1 (EUROPEAN) digits will be converted to the decimal 1420 * digits corresponding to the range of the preceding text, if the 1421 * range is one of the provided ranges. The shaper uses {@code 1422 * defaultContext} as the starting context. 1423 * 1424 * @param ranges the specified Unicode ranges 1425 * @param defaultContext the starting context, such as 1426 * {@code NumericShaper.Range.EUROPEAN} 1427 * @return a contextual shaper for the specified Unicode ranges. 1428 * @throws NullPointerException 1429 * if {@code ranges} or {@code defaultContext} is {@code null} 1430 * @since 1.7 1431 */ 1432 public static NumericShaper getContextualShaper(Set<Range> ranges, 1433 Range defaultContext) { 1434 if (defaultContext == null) { 1435 throw new NullPointerException(); 1436 } 1437 NumericShaper shaper = new NumericShaper(defaultContext, ranges); 1438 shaper.mask = CONTEXTUAL_MASK; 1439 return shaper; 1440 } 1441 1442 /** 1443 * Private constructor. 1444 */ 1445 private NumericShaper(int key, int mask) { 1446 this.key = key; 1447 this.mask = mask; 1448 } 1449 1450 private NumericShaper(Range defaultContext, Set<Range> ranges) { 1451 shapingRange = defaultContext; 1452 rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null. 1453 1454 // Give precedence to EASTERN_ARABIC if both ARABIC and 1455 // EASTERN_ARABIC are specified. 1456 if (rangeSet.contains(Range.EASTERN_ARABIC) 1457 && rangeSet.contains(Range.ARABIC)) { 1458 rangeSet.remove(Range.ARABIC); 1459 } 1460 1461 // As well as the above case, give precedence to TAI_THAM_THAM if both 1462 // TAI_THAM_HORA and TAI_THAM_THAM are specified. 1463 if (rangeSet.contains(Range.TAI_THAM_THAM) 1464 && rangeSet.contains(Range.TAI_THAM_HORA)) { 1465 rangeSet.remove(Range.TAI_THAM_HORA); 1466 } 1467 1468 rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); 1469 if (rangeArray.length > BSEARCH_THRESHOLD) { 1470 // sort rangeArray for binary search 1471 Arrays.sort(rangeArray, 1472 new Comparator<Range>() { 1473 public int compare(Range s1, Range s2) { 1474 return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1; 1475 } 1476 }); 1477 } 1478 } 1479 1480 /** 1481 * Converts the digits in the text that occur between start and 1482 * start + count. 1483 * @param text an array of characters to convert 1484 * @param start the index into {@code text} to start 1485 * converting 1486 * @param count the number of characters in {@code text} 1487 * to convert 1488 * @throws IndexOutOfBoundsException if start or start + count is 1489 * out of bounds 1490 * @throws NullPointerException if text is null 1491 */ 1492 public void shape(char[] text, int start, int count) { 1493 checkParams(text, start, count); 1494 if (isContextual()) { 1495 if (rangeSet == null) { 1496 shapeContextually(text, start, count, key); 1497 } else { 1498 shapeContextually(text, start, count, shapingRange); 1499 } 1500 } else { 1501 shapeNonContextually(text, start, count); 1502 } 1503 } 1504 1505 /** 1506 * Converts the digits in the text that occur between start and 1507 * start + count, using the provided context. 1508 * Context is ignored if the shaper is not a contextual shaper. 1509 * @param text an array of characters 1510 * @param start the index into {@code text} to start 1511 * converting 1512 * @param count the number of characters in {@code text} 1513 * to convert 1514 * @param context the context to which to convert the 1515 * characters, such as {@code NumericShaper.EUROPEAN} 1516 * @throws IndexOutOfBoundsException if start or start + count is 1517 * out of bounds 1518 * @throws NullPointerException if text is null 1519 * @throws IllegalArgumentException if this is a contextual shaper 1520 * and the specified {@code context} is not a single valid 1521 * range. 1522 */ 1523 public void shape(char[] text, int start, int count, int context) { 1524 checkParams(text, start, count); 1525 if (isContextual()) { 1526 int ctxKey = getKeyFromMask(context); 1527 if (rangeSet == null) { 1528 shapeContextually(text, start, count, ctxKey); 1529 } else { 1530 shapeContextually(text, start, count, Range.values()[ctxKey]); 1531 } 1532 } else { 1533 shapeNonContextually(text, start, count); 1534 } 1535 } 1536 1537 /** 1538 * Converts the digits in the text that occur between {@code 1539 * start} and {@code start + count}, using the provided {@code 1540 * context}. {@code Context} is ignored if the shaper is not a 1541 * contextual shaper. 1542 * 1543 * @param text a {@code char} array 1544 * @param start the index into {@code text} to start converting 1545 * @param count the number of {@code char}s in {@code text} 1546 * to convert 1547 * @param context the context to which to convert the characters, 1548 * such as {@code NumericShaper.Range.EUROPEAN} 1549 * @throws IndexOutOfBoundsException 1550 * if {@code start} or {@code start + count} is out of bounds 1551 * @throws NullPointerException 1552 * if {@code text} or {@code context} is null 1553 * @since 1.7 1554 */ 1555 public void shape(char[] text, int start, int count, Range context) { 1556 checkParams(text, start, count); 1557 if (context == null) { 1558 throw new NullPointerException("context is null"); 1559 } 1560 1561 if (isContextual()) { 1562 if (rangeSet != null) { 1563 shapeContextually(text, start, count, context); 1564 } else { 1565 int key = Range.toRangeIndex(context); 1566 if (key >= 0) { 1567 shapeContextually(text, start, count, key); 1568 } else { 1569 shapeContextually(text, start, count, shapingRange); 1570 } 1571 } 1572 } else { 1573 shapeNonContextually(text, start, count); 1574 } 1575 } 1576 1577 private void checkParams(char[] text, int start, int count) { 1578 if (text == null) { 1579 throw new NullPointerException("text is null"); 1580 } 1581 if ((start < 0) 1582 || (start > text.length) 1583 || ((start + count) < 0) 1584 || ((start + count) > text.length)) { 1585 throw new IndexOutOfBoundsException( 1586 "bad start or count for text of length " + text.length); 1587 } 1588 } 1589 1590 /** 1591 * Returns a {@code boolean} indicating whether or not 1592 * this shaper shapes contextually. 1593 * @return {@code true} if this shaper is contextual; 1594 * {@code false} otherwise. 1595 */ 1596 public boolean isContextual() { 1597 return (mask & CONTEXTUAL_MASK) != 0; 1598 } 1599 1600 /** 1601 * Returns an {@code int} that ORs together the values for 1602 * all the ranges that will be shaped. 1603 * <p> 1604 * For example, to check if a shaper shapes to Arabic, you would use the 1605 * following: 1606 * <blockquote> 1607 * {@code if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... } 1608 * </blockquote> 1609 * 1610 * <p>Note that this method supports only the bit mask-based 1611 * ranges. Call {@link #getRangeSet()} for the enum-based ranges. 1612 * 1613 * @return the values for all the ranges to be shaped. 1614 */ 1615 public int getRanges() { 1616 return mask & ~CONTEXTUAL_MASK; 1617 } 1618 1619 /** 1620 * Returns a {@code Set} representing all the Unicode ranges in 1621 * this {@code NumericShaper} that will be shaped. 1622 * 1623 * @return all the Unicode ranges to be shaped. 1624 * @since 1.7 1625 */ 1626 public Set<Range> getRangeSet() { 1627 if (rangeSet != null) { 1628 return EnumSet.copyOf(rangeSet); 1629 } 1630 return Range.maskToRangeSet(mask); 1631 } 1632 1633 /** 1634 * Perform non-contextual shaping. 1635 */ 1636 private void shapeNonContextually(char[] text, int start, int count) { 1637 int base; 1638 char minDigit = '0'; 1639 if (shapingRange != null) { 1640 base = shapingRange.getDigitBase(); 1641 minDigit += shapingRange.getNumericBase(); 1642 } else { 1643 base = bases[key]; 1644 if (key == ETHIOPIC_KEY) { 1645 minDigit++; // Ethiopic doesn't use decimal zero 1646 } 1647 } 1648 for (int i = start, e = start + count; i < e; ++i) { 1649 char c = text[i]; 1650 if (c >= minDigit && c <= '\u0039') { 1651 text[i] = (char)(c + base); 1652 } 1653 } 1654 } 1655 1656 /** 1657 * Perform contextual shaping. 1658 * Synchronized to protect caches used in getContextKey. 1659 */ 1660 private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { 1661 1662 // if we don't support this context, then don't shape 1663 if ((mask & (1<<ctxKey)) == 0) { 1664 ctxKey = EUROPEAN_KEY; 1665 } 1666 int lastkey = ctxKey; 1667 1668 int base = bases[ctxKey]; 1669 char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1670 1671 synchronized (NumericShaper.class) { 1672 for (int i = start, e = start + count; i < e; ++i) { 1673 char c = text[i]; 1674 if (c >= minDigit && c <= '\u0039') { 1675 text[i] = (char)(c + base); 1676 } 1677 1678 if (isStrongDirectional(c)) { 1679 int newkey = getContextKey(c); 1680 if (newkey != lastkey) { 1681 lastkey = newkey; 1682 1683 ctxKey = newkey; 1684 if (((mask & EASTERN_ARABIC) != 0) && 1685 (ctxKey == ARABIC_KEY || 1686 ctxKey == EASTERN_ARABIC_KEY)) { 1687 ctxKey = EASTERN_ARABIC_KEY; 1688 } else if (((mask & ARABIC) != 0) && 1689 (ctxKey == ARABIC_KEY || 1690 ctxKey == EASTERN_ARABIC_KEY)) { 1691 ctxKey = ARABIC_KEY; 1692 } else if ((mask & (1<<ctxKey)) == 0) { 1693 ctxKey = EUROPEAN_KEY; 1694 } 1695 1696 base = bases[ctxKey]; 1697 1698 minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero 1699 } 1700 } 1701 } 1702 } 1703 } 1704 1705 private void shapeContextually(char[] text, int start, int count, Range ctxKey) { 1706 // if we don't support the specified context, then don't shape. 1707 if (ctxKey == null || !rangeSet.contains(ctxKey)) { 1708 ctxKey = Range.EUROPEAN; 1709 } 1710 1711 Range lastKey = ctxKey; 1712 int base = ctxKey.getDigitBase(); 1713 char minDigit = (char)('0' + ctxKey.getNumericBase()); 1714 final int end = start + count; 1715 for (int i = start; i < end; ++i) { 1716 char c = text[i]; 1717 if (c >= minDigit && c <= '9') { 1718 text[i] = (char)(c + base); 1719 continue; 1720 } 1721 if (isStrongDirectional(c)) { 1722 ctxKey = rangeForCodePoint(c); 1723 if (ctxKey != lastKey) { 1724 lastKey = ctxKey; 1725 base = ctxKey.getDigitBase(); 1726 minDigit = (char)('0' + ctxKey.getNumericBase()); 1727 } 1728 } 1729 } 1730 } 1731 1732 /** 1733 * Returns a hash code for this shaper. 1734 * @return this shaper's hash code. 1735 * @see java.lang.Object#hashCode 1736 */ 1737 public int hashCode() { 1738 int hash = mask; 1739 if (rangeSet != null) { 1740 // Use the CONTEXTUAL_MASK bit only for the enum-based 1741 // NumericShaper. A deserialized NumericShaper might have 1742 // bit masks. 1743 hash &= CONTEXTUAL_MASK; 1744 hash ^= rangeSet.hashCode(); 1745 } 1746 return hash; 1747 } 1748 1749 /** 1750 * Returns {@code true} if the specified object is an instance of 1751 * {@code NumericShaper} and shapes identically to this one, 1752 * regardless of the range representations, the bit mask or the 1753 * enum. For example, the following code produces {@code "true"}. 1754 * <blockquote><pre> 1755 * NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC); 1756 * NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC); 1757 * System.out.println(ns1.equals(ns2)); 1758 * </pre></blockquote> 1759 * 1760 * @param o the specified object to compare to this 1761 * {@code NumericShaper} 1762 * @return {@code true} if {@code o} is an instance 1763 * of {@code NumericShaper} and shapes in the same way; 1764 * {@code false} otherwise. 1765 * @see java.lang.Object#equals(java.lang.Object) 1766 */ 1767 public boolean equals(Object o) { 1768 if (o != null) { 1769 try { 1770 NumericShaper rhs = (NumericShaper)o; 1771 if (rangeSet != null) { 1772 if (rhs.rangeSet != null) { 1773 return isContextual() == rhs.isContextual() 1774 && rangeSet.equals(rhs.rangeSet) 1775 && shapingRange == rhs.shapingRange; 1776 } 1777 return isContextual() == rhs.isContextual() 1778 && rangeSet.equals(Range.maskToRangeSet(rhs.mask)) 1779 && shapingRange == Range.indexToRange(rhs.key); 1780 } else if (rhs.rangeSet != null) { 1781 Set<Range> rset = Range.maskToRangeSet(mask); 1782 Range srange = Range.indexToRange(key); 1783 return isContextual() == rhs.isContextual() 1784 && rset.equals(rhs.rangeSet) 1785 && srange == rhs.shapingRange; 1786 } 1787 return rhs.mask == mask && rhs.key == key; 1788 } 1789 catch (ClassCastException e) { 1790 } 1791 } 1792 return false; 1793 } 1794 1795 /** 1796 * Returns a {@code String} that describes this shaper. This method 1797 * is used for debugging purposes only. 1798 * @return a {@code String} describing this shaper. 1799 */ 1800 public String toString() { 1801 StringBuilder buf = new StringBuilder(super.toString()); 1802 1803 buf.append("[contextual:").append(isContextual()); 1804 1805 String[] keyNames = null; 1806 if (isContextual()) { 1807 buf.append(", context:"); 1808 buf.append(shapingRange == null ? Range.values()[key] : shapingRange); 1809 } 1810 1811 if (rangeSet == null) { 1812 buf.append(", range(s): "); 1813 boolean first = true; 1814 for (int i = 0; i < NUM_KEYS; ++i) { 1815 if ((mask & (1 << i)) != 0) { 1816 if (first) { 1817 first = false; 1818 } else { 1819 buf.append(", "); 1820 } 1821 buf.append(Range.values()[i]); 1822 } 1823 } 1824 } else { 1825 buf.append(", range set: ").append(rangeSet); 1826 } 1827 buf.append(']'); 1828 1829 return buf.toString(); 1830 } 1831 1832 /** 1833 * Returns the index of the high bit in value (assuming le, actually 1834 * power of 2 >= value). value must be positive. 1835 */ 1836 private static int getHighBit(int value) { 1837 if (value <= 0) { 1838 return -32; 1839 } 1840 1841 int bit = 0; 1842 1843 if (value >= 1 << 16) { 1844 value >>= 16; 1845 bit += 16; 1846 } 1847 1848 if (value >= 1 << 8) { 1849 value >>= 8; 1850 bit += 8; 1851 } 1852 1853 if (value >= 1 << 4) { 1854 value >>= 4; 1855 bit += 4; 1856 } 1857 1858 if (value >= 1 << 2) { 1859 value >>= 2; 1860 bit += 2; 1861 } 1862 1863 if (value >= 1 << 1) { 1864 bit += 1; 1865 } 1866 1867 return bit; 1868 } 1869 1870 /** 1871 * fast binary search over subrange of array. 1872 */ 1873 private static int search(int value, int[] array, int start, int length) 1874 { 1875 int power = 1 << getHighBit(length); 1876 int extra = length - power; 1877 int probe = power; 1878 int index = start; 1879 1880 if (value >= array[index + extra]) { 1881 index += extra; 1882 } 1883 1884 while (probe > 1) { 1885 probe >>= 1; 1886 1887 if (value >= array[index + probe]) { 1888 index += probe; 1889 } 1890 } 1891 1892 return index; 1893 } 1894 1895 /** 1896 * Converts the {@code NumericShaper.Range} enum-based parameters, 1897 * if any, to the bit mask-based counterparts and writes this 1898 * object to the {@code stream}. Any enum constants that have no 1899 * bit mask-based counterparts are ignored in the conversion. 1900 * 1901 * @param stream the output stream to write to 1902 * @throws IOException if an I/O error occurs while writing to {@code stream} 1903 * @since 1.7 1904 */ 1905 private void writeObject(ObjectOutputStream stream) throws IOException { 1906 if (shapingRange != null) { 1907 int index = Range.toRangeIndex(shapingRange); 1908 if (index >= 0) { 1909 key = index; 1910 } 1911 } 1912 if (rangeSet != null) { 1913 mask |= Range.toRangeMask(rangeSet); 1914 } 1915 stream.defaultWriteObject(); 1916 } 1917 }