1 /* 2 * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 /* 26 ******************************************************************************* 27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * 28 * * 29 * The original version of this source code and documentation is copyrighted * 30 * and owned by IBM, These materials are provided under terms of a License * 31 * Agreement between IBM and Sun. This technology is protected by multiple * 32 * US and International patents. This notice and attribution to IBM may not * 33 * to removed. * 34 ******************************************************************************* 35 */ 36 37 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 38 * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre 39 * concept of RUNS_ONLY which is a double operation. 40 * It could be advantageous to divide this into 3 concepts: 41 * a) Operation: direct / inverse / RUNS_ONLY 42 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L 43 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 44 * This would allow combinations not possible today like RUNS_ONLY with 45 * NUMBERS_SPECIAL. 46 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 47 * REMOVE_CONTROLS for the inverse step. 48 * Not all combinations would be supported, and probably not all do make sense. 49 * This would need to document which ones are supported and what are the 50 * fallbacks for unsupported combinations. 51 */ 52 53 package sun.text.bidi; 54 55 import java.io.IOException; 56 import java.lang.reflect.Array; 57 import java.text.AttributedCharacterIterator; 58 import java.text.Bidi; 59 import java.util.Arrays; 60 import java.util.MissingResourceException; 61 import sun.misc.JavaAWTFontAccess; 62 import sun.misc.SharedSecrets; 63 import sun.text.normalizer.UBiDiProps; 64 import sun.text.normalizer.UCharacter; 65 import sun.text.normalizer.UTF16; 66 67 /** 68 * 69 * <h2>Bidi algorithm for ICU</h2> 70 * 71 * This is an implementation of the Unicode Bidirectional algorithm. The 72 * algorithm is defined in the <a 73 * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 74 * version 13, also described in The Unicode Standard, Version 4.0 . 75 * <p> 76 * 77 * Note: Libraries that perform a bidirectional algorithm and reorder strings 78 * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and 79 * shaping (ArabicShaping) classes can be used at the core of such "Storage 80 * Layout Engines". 81 * 82 * <h3>General remarks about the API:</h3> 83 * 84 * The "limit" of a sequence of characters is the position just after 85 * their last character, i.e., one more than that position. 86 * <p> 87 * 88 * Some of the API methods provide access to "runs". Such a 89 * "run" is defined as a sequence of characters that are at the same 90 * embedding level after performing the Bidi algorithm. 91 * <p> 92 * 93 * <h3>Basic concept: paragraph</h3> 94 * A piece of text can be divided into several paragraphs by characters 95 * with the Bidi class <code>Block Separator</code>. For handling of 96 * paragraphs, see: 97 * <ul> 98 * <li>{@link #countParagraphs} 99 * <li>{@link #getParaLevel} 100 * <li>{@link #getParagraph} 101 * <li>{@link #getParagraphByIndex} 102 * </ul> 103 * 104 * <h3>Basic concept: text direction</h3> 105 * The direction of a piece of text may be: 106 * <ul> 107 * <li>{@link #LTR} 108 * <li>{@link #RTL} 109 * <li>{@link #MIXED} 110 * </ul> 111 * 112 * <h3>Basic concept: levels</h3> 113 * 114 * Levels in this API represent embedding levels according to the Unicode 115 * Bidirectional Algorithm. 116 * Their low-order bit (even/odd value) indicates the visual direction.<p> 117 * 118 * Levels can be abstract values when used for the 119 * <code>paraLevel</code> and <code>embeddingLevels</code> 120 * arguments of <code>setPara()</code>; there: 121 * <ul> 122 * <li>the high-order bit of an <code>embeddingLevels[]</code> 123 * value indicates whether the using application is 124 * specifying the level of a character to <i>override</i> whatever the 125 * Bidi implementation would resolve it to.</li> 126 * <li><code>paraLevel</code> can be set to the 127 * pseudo-level values <code>LEVEL_DEFAULT_LTR</code> 128 * and <code>LEVEL_DEFAULT_RTL</code>.</li> 129 * </ul> 130 * 131 * <p>The related constants are not real, valid level values. 132 * <code>DEFAULT_XXX</code> can be used to specify 133 * a default for the paragraph level for 134 * when the <code>setPara()</code> method 135 * shall determine it but there is no 136 * strongly typed character in the input.<p> 137 * 138 * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even 139 * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd, 140 * just like with normal LTR and RTL level values - 141 * these special values are designed that way. Also, the implementation 142 * assumes that MAX_EXPLICIT_LEVEL is odd. 143 * 144 * <ul><b>See Also:</b> 145 * <li>{@link #LEVEL_DEFAULT_LTR} 146 * <li>{@link #LEVEL_DEFAULT_RTL} 147 * <li>{@link #LEVEL_OVERRIDE} 148 * <li>{@link #MAX_EXPLICIT_LEVEL} 149 * <li>{@link #setPara} 150 * </ul> 151 * 152 * <h3>Basic concept: Reordering Mode</h3> 153 * Reordering mode values indicate which variant of the Bidi algorithm to 154 * use. 155 * 156 * <ul><b>See Also:</b> 157 * <li>{@link #setReorderingMode} 158 * <li>{@link #REORDER_DEFAULT} 159 * <li>{@link #REORDER_NUMBERS_SPECIAL} 160 * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R} 161 * <li>{@link #REORDER_RUNS_ONLY} 162 * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L} 163 * <li>{@link #REORDER_INVERSE_LIKE_DIRECT} 164 * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL} 165 * </ul> 166 * 167 * <h3>Basic concept: Reordering Options</h3> 168 * Reordering options can be applied during Bidi text transformations. 169 * <ul><b>See Also:</b> 170 * <li>{@link #setReorderingOptions} 171 * <li>{@link #OPTION_DEFAULT} 172 * <li>{@link #OPTION_INSERT_MARKS} 173 * <li>{@link #OPTION_REMOVE_CONTROLS} 174 * <li>{@link #OPTION_STREAMING} 175 * </ul> 176 * 177 * 178 * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer) 179 * @stable ICU 3.8 180 * 181 * 182 * <h4> Sample code for the ICU Bidi API </h4> 183 * 184 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 185 * 186 * This is (hypothetical) sample code that illustrates how the ICU Bidi API 187 * could be used to render a paragraph of text. Rendering code depends highly on 188 * the graphics system, therefore this sample code must make a lot of 189 * assumptions, which may or may not match any existing graphics system's 190 * properties. 191 * 192 * <p> 193 * The basic assumptions are: 194 * </p> 195 * <ul> 196 * <li>Rendering is done from left to right on a horizontal line.</li> 197 * <li>A run of single-style, unidirectional text can be rendered at once. 198 * </li> 199 * <li>Such a run of text is passed to the graphics system with characters 200 * (code units) in logical order.</li> 201 * <li>The line-breaking algorithm is very complicated and Locale-dependent - 202 * and therefore its implementation omitted from this sample code.</li> 203 * </ul> 204 * 205 * <pre> 206 * 207 * package com.ibm.icu.dev.test.bidi; 208 * 209 * import com.ibm.icu.text.Bidi; 210 * import com.ibm.icu.text.BidiRun; 211 * 212 * public class Sample { 213 * 214 * static final int styleNormal = 0; 215 * static final int styleSelected = 1; 216 * static final int styleBold = 2; 217 * static final int styleItalics = 4; 218 * static final int styleSuper=8; 219 * static final int styleSub = 16; 220 * 221 * static class StyleRun { 222 * int limit; 223 * int style; 224 * 225 * public StyleRun(int limit, int style) { 226 * this.limit = limit; 227 * this.style = style; 228 * } 229 * } 230 * 231 * static class Bounds { 232 * int start; 233 * int limit; 234 * 235 * public Bounds(int start, int limit) { 236 * this.start = start; 237 * this.limit = limit; 238 * } 239 * } 240 * 241 * static int getTextWidth(String text, int start, int limit, 242 * StyleRun[] styleRuns, int styleRunCount) { 243 * // simplistic way to compute the width 244 * return limit - start; 245 * } 246 * 247 * // set limit and StyleRun limit for a line 248 * // from text[start] and from styleRuns[styleRunStart] 249 * // using Bidi.getLogicalRun(...) 250 * // returns line width 251 * static int getLineBreak(String text, Bounds line, Bidi para, 252 * StyleRun styleRuns[], Bounds styleRun) { 253 * // dummy return 254 * return 0; 255 * } 256 * 257 * // render runs on a line sequentially, always from left to right 258 * 259 * // prepare rendering a new line 260 * static void startLine(byte textDirection, int lineWidth) { 261 * System.out.println(); 262 * } 263 * 264 * // render a run of text and advance to the right by the run width 265 * // the text[start..limit-1] is always in logical order 266 * static void renderRun(String text, int start, int limit, 267 * byte textDirection, int style) { 268 * } 269 * 270 * // We could compute a cross-product 271 * // from the style runs with the directional runs 272 * // and then reorder it. 273 * // Instead, here we iterate over each run type 274 * // and render the intersections - 275 * // with shortcuts in simple (and common) cases. 276 * // renderParagraph() is the main function. 277 * 278 * // render a directional run with 279 * // (possibly) multiple style runs intersecting with it 280 * static void renderDirectionalRun(String text, int start, int limit, 281 * byte direction, StyleRun styleRuns[], 282 * int styleRunCount) { 283 * int i; 284 * 285 * // iterate over style runs 286 * if (direction == Bidi.LTR) { 287 * int styleLimit; 288 * for (i = 0; i < styleRunCount; ++i) { 289 * styleLimit = styleRuns[i].limit; 290 * if (start < styleLimit) { 291 * if (styleLimit > limit) { 292 * styleLimit = limit; 293 * } 294 * renderRun(text, start, styleLimit, 295 * direction, styleRuns[i].style); 296 * if (styleLimit == limit) { 297 * break; 298 * } 299 * start = styleLimit; 300 * } 301 * } 302 * } else { 303 * int styleStart; 304 * 305 * for (i = styleRunCount-1; i >= 0; --i) { 306 * if (i > 0) { 307 * styleStart = styleRuns[i-1].limit; 308 * } else { 309 * styleStart = 0; 310 * } 311 * if (limit >= styleStart) { 312 * if (styleStart < start) { 313 * styleStart = start; 314 * } 315 * renderRun(text, styleStart, limit, direction, 316 * styleRuns[i].style); 317 * if (styleStart == start) { 318 * break; 319 * } 320 * limit = styleStart; 321 * } 322 * } 323 * } 324 * } 325 * 326 * // the line object represents text[start..limit-1] 327 * static void renderLine(Bidi line, String text, int start, int limit, 328 * StyleRun styleRuns[], int styleRunCount) { 329 * byte direction = line.getDirection(); 330 * if (direction != Bidi.MIXED) { 331 * // unidirectional 332 * if (styleRunCount <= 1) { 333 * renderRun(text, start, limit, direction, styleRuns[0].style); 334 * } else { 335 * renderDirectionalRun(text, start, limit, direction, 336 * styleRuns, styleRunCount); 337 * } 338 * } else { 339 * // mixed-directional 340 * int count, i; 341 * BidiRun run; 342 * 343 * try { 344 * count = line.countRuns(); 345 * } catch (IllegalStateException e) { 346 * e.printStackTrace(); 347 * return; 348 * } 349 * if (styleRunCount <= 1) { 350 * int style = styleRuns[0].style; 351 * 352 * // iterate over directional runs 353 * for (i = 0; i < count; ++i) { 354 * run = line.getVisualRun(i); 355 * renderRun(text, run.getStart(), run.getLimit(), 356 * run.getDirection(), style); 357 * } 358 * } else { 359 * // iterate over both directional and style runs 360 * for (i = 0; i < count; ++i) { 361 * run = line.getVisualRun(i); 362 * renderDirectionalRun(text, run.getStart(), 363 * run.getLimit(), run.getDirection(), 364 * styleRuns, styleRunCount); 365 * } 366 * } 367 * } 368 * } 369 * 370 * static void renderParagraph(String text, byte textDirection, 371 * StyleRun styleRuns[], int styleRunCount, 372 * int lineWidth) { 373 * int length = text.length(); 374 * Bidi para = new Bidi(); 375 * try { 376 * para.setPara(text, 377 * textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL 378 * : Bidi.LEVEL_DEFAULT_LTR, 379 * null); 380 * } catch (Exception e) { 381 * e.printStackTrace(); 382 * return; 383 * } 384 * byte paraLevel = (byte)(1 & para.getParaLevel()); 385 * StyleRun styleRun = new StyleRun(length, styleNormal); 386 * 387 * if (styleRuns == null || styleRunCount <= 0) { 388 * styleRuns = new StyleRun[1]; 389 * styleRunCount = 1; 390 * styleRuns[0] = styleRun; 391 * } 392 * // assume styleRuns[styleRunCount-1].limit>=length 393 * 394 * int width = getTextWidth(text, 0, length, styleRuns, styleRunCount); 395 * if (width <= lineWidth) { 396 * // everything fits onto one line 397 * 398 * // prepare rendering a new line from either left or right 399 * startLine(paraLevel, width); 400 * 401 * renderLine(para, text, 0, length, styleRuns, styleRunCount); 402 * } else { 403 * // we need to render several lines 404 * Bidi line = new Bidi(length, 0); 405 * int start = 0, limit; 406 * int styleRunStart = 0, styleRunLimit; 407 * 408 * for (;;) { 409 * limit = length; 410 * styleRunLimit = styleRunCount; 411 * width = getLineBreak(text, new Bounds(start, limit), 412 * para, styleRuns, 413 * new Bounds(styleRunStart, styleRunLimit)); 414 * try { 415 * line = para.setLine(start, limit); 416 * } catch (Exception e) { 417 * e.printStackTrace(); 418 * return; 419 * } 420 * // prepare rendering a new line 421 * // from either left or right 422 * startLine(paraLevel, width); 423 * 424 * if (styleRunStart > 0) { 425 * int newRunCount = styleRuns.length - styleRunStart; 426 * StyleRun[] newRuns = new StyleRun[newRunCount]; 427 * System.arraycopy(styleRuns, styleRunStart, newRuns, 0, 428 * newRunCount); 429 * renderLine(line, text, start, limit, newRuns, 430 * styleRunLimit - styleRunStart); 431 * } else { 432 * renderLine(line, text, start, limit, styleRuns, 433 * styleRunLimit - styleRunStart); 434 * } 435 * if (limit == length) { 436 * break; 437 * } 438 * start = limit; 439 * styleRunStart = styleRunLimit - 1; 440 * if (start >= styleRuns[styleRunStart].limit) { 441 * ++styleRunStart; 442 * } 443 * } 444 * } 445 * } 446 * 447 * public static void main(String[] args) 448 * { 449 * renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80); 450 * renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60); 451 * } 452 * } 453 * 454 * </pre> 455 */ 456 457 public class BidiBase { 458 459 class Point { 460 int pos; /* position in text */ 461 int flag; /* flag for LRM/RLM, before/after */ 462 } 463 464 class InsertPoints { 465 int size; 466 int confirmed; 467 Point[] points = new Point[0]; 468 } 469 470 /** Paragraph level setting<p> 471 * 472 * Constant indicating that the base direction depends on the first strong 473 * directional character in the text according to the Unicode Bidirectional 474 * Algorithm. If no strong directional character is present, 475 * then set the paragraph level to 0 (left-to-right).<p> 476 * 477 * If this value is used in conjunction with reordering modes 478 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 479 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 480 * is assumed to be visual LTR, and the text after reordering is required 481 * to be the corresponding logical string with appropriate contextual 482 * direction. The direction of the result string will be RTL if either 483 * the righmost or leftmost strong character of the source text is RTL 484 * or Arabic Letter, the direction will be LTR otherwise.<p> 485 * 486 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 487 * be added at the beginning of the result string to ensure round trip 488 * (that the result string, when reordered back to visual, will produce 489 * the original source text). 490 * @see #REORDER_INVERSE_LIKE_DIRECT 491 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 492 * @stable ICU 3.8 493 */ 494 public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e; 495 496 /** Paragraph level setting<p> 497 * 498 * Constant indicating that the base direction depends on the first strong 499 * directional character in the text according to the Unicode Bidirectional 500 * Algorithm. If no strong directional character is present, 501 * then set the paragraph level to 1 (right-to-left).<p> 502 * 503 * If this value is used in conjunction with reordering modes 504 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 505 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 506 * is assumed to be visual LTR, and the text after reordering is required 507 * to be the corresponding logical string with appropriate contextual 508 * direction. The direction of the result string will be RTL if either 509 * the righmost or leftmost strong character of the source text is RTL 510 * or Arabic Letter, or if the text contains no strong character; 511 * the direction will be LTR otherwise.<p> 512 * 513 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 514 * be added at the beginning of the result string to ensure round trip 515 * (that the result string, when reordered back to visual, will produce 516 * the original source text). 517 * @see #REORDER_INVERSE_LIKE_DIRECT 518 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 519 * @stable ICU 3.8 520 */ 521 public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f; 522 523 /** 524 * Maximum explicit embedding level. 525 * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>). 526 * @stable ICU 3.8 527 */ 528 public static final byte MAX_EXPLICIT_LEVEL = 61; 529 530 /** 531 * Bit flag for level input. 532 * Overrides directional properties. 533 * @stable ICU 3.8 534 */ 535 public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80; 536 537 /** 538 * Special value which can be returned by the mapping methods when a 539 * logical index has no corresponding visual index or vice-versa. This may 540 * happen for the logical-to-visual mapping of a Bidi control when option 541 * <code>OPTION_REMOVE_CONTROLS</code> is 542 * specified. This can also happen for the visual-to-logical mapping of a 543 * Bidi mark (LRM or RLM) inserted by option 544 * <code>OPTION_INSERT_MARKS</code>. 545 * @see #getVisualIndex 546 * @see #getVisualMap 547 * @see #getLogicalIndex 548 * @see #getLogicalMap 549 * @see #OPTION_INSERT_MARKS 550 * @see #OPTION_REMOVE_CONTROLS 551 * @stable ICU 3.8 552 */ 553 public static final int MAP_NOWHERE = -1; 554 555 /** 556 * Mixed-directional text. 557 * @stable ICU 3.8 558 */ 559 public static final byte MIXED = 2; 560 561 /** 562 * option bit for writeReordered(): 563 * replace characters with the "mirrored" property in RTL runs 564 * by their mirror-image mappings 565 * 566 * @see #writeReordered 567 * @stable ICU 3.8 568 */ 569 public static final short DO_MIRRORING = 2; 570 571 /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode. 572 * @see #setReorderingMode 573 * @stable ICU 3.8 574 */ 575 private static final short REORDER_DEFAULT = 0; 576 577 /** Reordering mode: Logical to Visual algorithm which handles numbers in 578 * a way which mimicks the behavior of Windows XP. 579 * @see #setReorderingMode 580 * @stable ICU 3.8 581 */ 582 private static final short REORDER_NUMBERS_SPECIAL = 1; 583 584 /** Reordering mode: Logical to Visual algorithm grouping numbers with 585 * adjacent R characters (reversible algorithm). 586 * @see #setReorderingMode 587 * @stable ICU 3.8 588 */ 589 private static final short REORDER_GROUP_NUMBERS_WITH_R = 2; 590 591 /** Reordering mode: Reorder runs only to transform a Logical LTR string 592 * to the logical RTL string with the same display, or vice-versa.<br> 593 * If this mode is set together with option 594 * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source 595 * text may be removed and other controls may be added to produce the 596 * minimum combination which has the required display. 597 * @see #OPTION_INSERT_MARKS 598 * @see #setReorderingMode 599 * @stable ICU 3.8 600 */ 601 private static final short REORDER_RUNS_ONLY = 3; 602 603 /** Reordering mode: Visual to Logical algorithm which handles numbers 604 * like L (same algorithm as selected by <code>setInverse(true)</code>. 605 * @see #setInverse 606 * @see #setReorderingMode 607 * @stable ICU 3.8 608 */ 609 private static final short REORDER_INVERSE_NUMBERS_AS_L = 4; 610 611 /** Reordering mode: Visual to Logical algorithm equivalent to the regular 612 * Logical to Visual algorithm. 613 * @see #setReorderingMode 614 * @stable ICU 3.8 615 */ 616 private static final short REORDER_INVERSE_LIKE_DIRECT = 5; 617 618 /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the 619 * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 620 * @see #setReorderingMode 621 * @stable ICU 3.8 622 */ 623 private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6; 624 625 /* Reordering mode values must be ordered so that all the regular logical to 626 * visual modes come first, and all inverse Bidi modes come last. 627 */ 628 private static final short REORDER_LAST_LOGICAL_TO_VISUAL = 629 REORDER_NUMBERS_SPECIAL; 630 631 /** 632 * Option bit for <code>setReorderingOptions</code>: 633 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 634 * a reordering to a Logical order 635 * 636 * <p>This option must be set or reset before calling 637 * <code>setPara</code>.</p> 638 * 639 * <p>This option is significant only with reordering modes which generate 640 * a result with Logical order, specifically.</p> 641 * <ul> 642 * <li><code>REORDER_RUNS_ONLY</code></li> 643 * <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li> 644 * <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li> 645 * <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 646 * </ul> 647 * 648 * <p>If this option is set in conjunction with reordering mode 649 * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 650 * <code>setInverse(true)</code>, it implies option 651 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 652 * <code>writeReordered()</code>.</p> 653 * 654 * <p>For other reordering modes, a minimum number of LRM or RLM characters 655 * will be added to the source text after reordering it so as to ensure 656 * round trip, i.e. when applying the inverse reordering mode on the 657 * resulting logical text with removal of Bidi marks 658 * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling 659 * <code>setPara()</code> or option 660 * <code>REMOVE_BIDI_CONTROLS</code> in 661 * <code>writeReordered</code>), the result will be identical to the 662 * source text in the first transformation. 663 * 664 * <p>This option will be ignored if specified together with option 665 * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option 666 * <code>REMOVE_BIDI_CONTROLS</code> in calls to method 667 * <code>writeReordered()</code> and it implies option 668 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 669 * <code>writeReordered()</code> if the reordering mode is 670 * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 671 * 672 * @see #setReorderingMode 673 * @see #setReorderingOptions 674 * @see #INSERT_LRM_FOR_NUMERIC 675 * @see #REMOVE_BIDI_CONTROLS 676 * @see #OPTION_REMOVE_CONTROLS 677 * @see #REORDER_RUNS_ONLY 678 * @see #REORDER_INVERSE_NUMBERS_AS_L 679 * @see #REORDER_INVERSE_LIKE_DIRECT 680 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 681 * @stable ICU 3.8 682 */ 683 private static final int OPTION_INSERT_MARKS = 1; 684 685 /** 686 * Option bit for <code>setReorderingOptions</code>: 687 * remove Bidi control characters 688 * 689 * <p>This option must be set or reset before calling 690 * <code>setPara</code>.</p> 691 * 692 * <p>This option nullifies option 693 * <code>OPTION_INSERT_MARKS</code>. It inhibits option 694 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 695 * <code>writeReordered()</code> and it implies option 696 * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p> 697 * 698 * @see #setReorderingMode 699 * @see #setReorderingOptions 700 * @see #OPTION_INSERT_MARKS 701 * @see #INSERT_LRM_FOR_NUMERIC 702 * @see #REMOVE_BIDI_CONTROLS 703 * @stable ICU 3.8 704 */ 705 private static final int OPTION_REMOVE_CONTROLS = 2; 706 707 /** 708 * Option bit for <code>setReorderingOptions</code>: 709 * process the output as part of a stream to be continued 710 * 711 * <p>This option must be set or reset before calling 712 * <code>setPara</code>.</p> 713 * 714 * <p>This option specifies that the caller is interested in processing 715 * large text object in parts. The results of the successive calls are 716 * expected to be concatenated by the caller. Only the call for the last 717 * part will have this option bit off.</p> 718 * 719 * <p>When this option bit is on, <code>setPara()</code> may process 720 * less than the full source text in order to truncate the text at a 721 * meaningful boundary. The caller should call 722 * <code>getProcessedLength()</code> immediately after calling 723 * <code>setPara()</code> in order to determine how much of the source 724 * text has been processed. Source text beyond that length should be 725 * resubmitted in following calls to <code>setPara</code>. The 726 * processed length may be less than the length of the source text if a 727 * character preceding the last character of the source text constitutes a 728 * reasonable boundary (like a block separator) for text to be continued.<br> 729 * If the last character of the source text constitutes a reasonable 730 * boundary, the whole text will be processed at once.<br> 731 * If nowhere in the source text there exists 732 * such a reasonable boundary, the processed length will be zero.<br> 733 * The caller should check for such an occurrence and do one of the following: 734 * <ul><li>submit a larger amount of text with a better chance to include 735 * a reasonable boundary.</li> 736 * <li>resubmit the same text after turning off option 737 * <code>OPTION_STREAMING</code>.</li></ul> 738 * In all cases, this option should be turned off before processing the last 739 * part of the text.</p> 740 * 741 * <p>When the <code>OPTION_STREAMING</code> option is used, it is 742 * recommended to call <code>orderParagraphsLTR()</code> with argument 743 * <code>orderParagraphsLTR</code> set to <code>true</code> before calling 744 * <code>setPara()</code> so that later paragraphs may be concatenated to 745 * previous paragraphs on the right. 746 * </p> 747 * 748 * @see #setReorderingMode 749 * @see #setReorderingOptions 750 * @see #getProcessedLength 751 * @see #orderParagraphsLTR 752 * @stable ICU 3.8 753 */ 754 private static final int OPTION_STREAMING = 4; 755 756 /* 757 * Comparing the description of the Bidi algorithm with this implementation 758 * is easier with the same names for the Bidi types in the code as there. 759 * See UCharacterDirection 760 */ 761 private static final byte L = 0; 762 private static final byte R = 1; 763 private static final byte EN = 2; 764 private static final byte ES = 3; 765 private static final byte ET = 4; 766 private static final byte AN = 5; 767 private static final byte CS = 6; 768 static final byte B = 7; 769 private static final byte S = 8; 770 private static final byte WS = 9; 771 private static final byte ON = 10; 772 private static final byte LRE = 11; 773 private static final byte LRO = 12; 774 private static final byte AL = 13; 775 private static final byte RLE = 14; 776 private static final byte RLO = 15; 777 private static final byte PDF = 16; 778 private static final byte NSM = 17; 779 private static final byte BN = 18; 780 781 private static final int MASK_R_AL = (1 << R | 1 << AL); 782 783 private static final char CR = '\r'; 784 private static final char LF = '\n'; 785 786 static final int LRM_BEFORE = 1; 787 static final int LRM_AFTER = 2; 788 static final int RLM_BEFORE = 4; 789 static final int RLM_AFTER = 8; 790 791 /* 792 * reference to parent paragraph object (reference to self if this object is 793 * a paragraph object); set to null in a newly opened object; set to a 794 * real value after a successful execution of setPara or setLine 795 */ 796 BidiBase paraBidi; 797 798 final UBiDiProps bdp; 799 800 /* character array representing the current text */ 801 char[] text; 802 803 /* length of the current text */ 804 int originalLength; 805 806 /* if the option OPTION_STREAMING is set, this is the length of 807 * text actually processed by <code>setPara</code>, which may be shorter 808 * than the original length. Otherwise, it is identical to the original 809 * length. 810 */ 811 public int length; 812 813 /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi 814 * marks are allowed to be inserted in one of the reordering modes, the 815 * length of the result string may be different from the processed length. 816 */ 817 int resultLength; 818 819 /* indicators for whether memory may be allocated after construction */ 820 boolean mayAllocateText; 821 boolean mayAllocateRuns; 822 823 /* arrays with one value per text-character */ 824 byte[] dirPropsMemory = new byte[1]; 825 byte[] levelsMemory = new byte[1]; 826 byte[] dirProps; 827 byte[] levels; 828 829 /* must block separators receive level 0? */ 830 boolean orderParagraphsLTR; 831 832 /* the paragraph level */ 833 byte paraLevel; 834 835 /* original paraLevel when contextual */ 836 /* must be one of DEFAULT_xxx or 0 if not contextual */ 837 byte defaultParaLevel; 838 839 /* the following is set in setPara, used in processPropertySeq */ 840 841 ImpTabPair impTabPair; /* reference to levels state table pair */ 842 843 /* the overall paragraph or line directionality*/ 844 byte direction; 845 846 /* flags is a bit set for which directional properties are in the text */ 847 int flags; 848 849 /* lastArabicPos is index to the last AL in the text, -1 if none */ 850 int lastArabicPos; 851 852 /* characters after trailingWSStart are WS and are */ 853 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 854 int trailingWSStart; 855 856 /* fields for paragraph handling */ 857 int paraCount; /* set in getDirProps() */ 858 int[] parasMemory = new int[1]; 859 int[] paras; /* limits of paragraphs, filled in 860 ResolveExplicitLevels() or CheckExplicitLevels() */ 861 862 /* for single paragraph text, we only need a tiny array of paras (no allocation) */ 863 int[] simpleParas = {0}; 864 865 /* fields for line reordering */ 866 int runCount; /* ==-1: runs not set up yet */ 867 BidiRun[] runsMemory = new BidiRun[0]; 868 BidiRun[] runs; 869 870 /* for non-mixed text, we only need a tiny array of runs (no allocation) */ 871 BidiRun[] simpleRuns = {new BidiRun()}; 872 873 /* mapping of runs in logical order to visual order */ 874 int[] logicalToVisualRunsMap; 875 876 /* flag to indicate that the map has been updated */ 877 boolean isGoodLogicalToVisualRunsMap; 878 879 /* for inverse Bidi with insertion of directional marks */ 880 InsertPoints insertPoints = new InsertPoints(); 881 882 /* for option OPTION_REMOVE_CONTROLS */ 883 int controlCount; 884 885 /* 886 * Sometimes, bit values are more appropriate 887 * to deal with directionality properties. 888 * Abbreviations in these method names refer to names 889 * used in the Bidi algorithm. 890 */ 891 static int DirPropFlag(byte dir) { 892 return (1 << dir); 893 } 894 895 /* 896 * The following bit is ORed to the property of characters in paragraphs 897 * with contextual RTL direction when paraLevel is contextual. 898 */ 899 static final byte CONTEXT_RTL_SHIFT = 6; 900 static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40 901 static byte NoContextRTL(byte dir) 902 { 903 return (byte)(dir & ~CONTEXT_RTL); 904 } 905 906 /* 907 * The following is a variant of DirProp.DirPropFlag() which ignores the 908 * CONTEXT_RTL bit. 909 */ 910 static int DirPropFlagNC(byte dir) { 911 return (1<<(dir & ~CONTEXT_RTL)); 912 } 913 914 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31); 915 916 /* to avoid some conditional statements, use tiny constant arrays */ 917 static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) }; 918 static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) }; 919 static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) }; 920 921 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; } 922 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; } 923 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; } 924 925 /* 926 * are there any characters that are LTR? 927 */ 928 static final int MASK_LTR = 929 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO); 930 931 /* 932 * are there any characters that are RTL? 933 */ 934 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO); 935 936 /* explicit embedding codes */ 937 private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO); 938 private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO); 939 private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF); 940 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT; 941 942 /* paragraph and segment separators */ 943 private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S); 944 945 /* all types that are counted as White Space or Neutral in some steps */ 946 static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT; 947 private static final int MASK_N = DirPropFlag(ON)|MASK_WS; 948 949 /* types that are neutrals or could becomes neutrals in (Wn) */ 950 private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N; 951 952 /* 953 * These types may be changed to "e", 954 * the embedding type (L or R) of the run, 955 * in the Bidi algorithm (N2) 956 */ 957 static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N; 958 959 /* 960 * the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java 961 */ 962 private static byte GetLRFromLevel(byte level) 963 { 964 return (byte)(level & 1); 965 } 966 967 private static boolean IsDefaultLevel(byte level) 968 { 969 return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR); 970 } 971 972 byte GetParaLevelAt(int index) 973 { 974 return (defaultParaLevel != 0) ? 975 (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel; 976 } 977 978 static boolean IsBidiControlChar(int c) 979 { 980 /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or 981 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 982 return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))); 983 } 984 985 public void verifyValidPara() 986 { 987 if (this != this.paraBidi) { 988 throw new IllegalStateException(""); 989 } 990 } 991 992 public void verifyValidParaOrLine() 993 { 994 BidiBase para = this.paraBidi; 995 /* verify Para */ 996 if (this == para) { 997 return; 998 } 999 /* verify Line */ 1000 if ((para == null) || (para != para.paraBidi)) { 1001 throw new IllegalStateException(); 1002 } 1003 } 1004 1005 public void verifyRange(int index, int start, int limit) 1006 { 1007 if (index < start || index >= limit) { 1008 throw new IllegalArgumentException("Value " + index + 1009 " is out of range " + start + " to " + limit); 1010 } 1011 } 1012 1013 public void verifyIndex(int index, int start, int limit) 1014 { 1015 if (index < start || index >= limit) { 1016 throw new ArrayIndexOutOfBoundsException("Index " + index + 1017 " is out of range " + start + " to " + limit); 1018 } 1019 } 1020 1021 /** 1022 * Allocate a <code>Bidi</code> object with preallocated memory 1023 * for internal structures. 1024 * This method provides a <code>Bidi</code> object like the default constructor 1025 * but it also preallocates memory for internal structures 1026 * according to the sizings supplied by the caller.<p> 1027 * The preallocation can be limited to some of the internal memory 1028 * by setting some values to 0 here. That means that if, e.g., 1029 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 1030 * be set to <code>maxLength</code> (the only failproof value) to avoid 1031 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 1032 * and the internal structures that are associated with it will be allocated 1033 * on demand, just like with the default constructor. 1034 * 1035 * @param maxLength is the maximum text or line length that internal memory 1036 * will be preallocated for. An attempt to associate this object with a 1037 * longer text will fail, unless this value is 0, which leaves the allocation 1038 * up to the implementation. 1039 * 1040 * @param maxRunCount is the maximum anticipated number of same-level runs 1041 * that internal memory will be preallocated for. An attempt to access 1042 * visual runs on an object that was not preallocated for as many runs 1043 * as the text was actually resolved to will fail, 1044 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 1045 * The number of runs depends on the actual text and maybe anywhere between 1046 * 1 and <code>maxLength</code>. It is typically small. 1047 * 1048 * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0 1049 * @stable ICU 3.8 1050 */ 1051 public BidiBase(int maxLength, int maxRunCount) 1052 { 1053 /* check the argument values */ 1054 if (maxLength < 0 || maxRunCount < 0) { 1055 throw new IllegalArgumentException(); 1056 } 1057 1058 /* reset the object, all reference variables null, all flags false, 1059 all sizes 0. 1060 In fact, we don't need to do anything, since class members are 1061 initialized as zero when an instance is created. 1062 */ 1063 /* 1064 mayAllocateText = false; 1065 mayAllocateRuns = false; 1066 orderParagraphsLTR = false; 1067 paraCount = 0; 1068 runCount = 0; 1069 trailingWSStart = 0; 1070 flags = 0; 1071 paraLevel = 0; 1072 defaultParaLevel = 0; 1073 direction = 0; 1074 */ 1075 /* get Bidi properties */ 1076 try { 1077 bdp = UBiDiProps.getSingleton(); 1078 } 1079 catch (IOException e) { 1080 throw new MissingResourceException(e.getMessage(), "(BidiProps)", ""); 1081 } 1082 1083 /* allocate memory for arrays as requested */ 1084 if (maxLength > 0) { 1085 getInitialDirPropsMemory(maxLength); 1086 getInitialLevelsMemory(maxLength); 1087 } else { 1088 mayAllocateText = true; 1089 } 1090 1091 if (maxRunCount > 0) { 1092 // if maxRunCount == 1, use simpleRuns[] 1093 if (maxRunCount > 1) { 1094 getInitialRunsMemory(maxRunCount); 1095 } 1096 } else { 1097 mayAllocateRuns = true; 1098 } 1099 } 1100 1101 /* 1102 * We are allowed to allocate memory if object==null or 1103 * mayAllocate==true for each array that we need. 1104 * 1105 * Assume sizeNeeded>0. 1106 * If object != null, then assume size > 0. 1107 */ 1108 private Object getMemory(String label, Object array, Class<?> arrayClass, 1109 boolean mayAllocate, int sizeNeeded) 1110 { 1111 int len = Array.getLength(array); 1112 1113 /* we have at least enough memory and must not allocate */ 1114 if (sizeNeeded == len) { 1115 return array; 1116 } 1117 if (!mayAllocate) { 1118 /* we must not allocate */ 1119 if (sizeNeeded <= len) { 1120 return array; 1121 } 1122 throw new OutOfMemoryError("Failed to allocate memory for " 1123 + label); 1124 } 1125 /* we may try to grow or shrink */ 1126 /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid 1127 the allocation altogether and rely on this.length */ 1128 try { 1129 return Array.newInstance(arrayClass, sizeNeeded); 1130 } catch (Exception e) { 1131 throw new OutOfMemoryError("Failed to allocate memory for " 1132 + label); 1133 } 1134 } 1135 1136 /* helper methods for each allocated array */ 1137 private void getDirPropsMemory(boolean mayAllocate, int len) 1138 { 1139 Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len); 1140 dirPropsMemory = (byte[]) array; 1141 } 1142 1143 void getDirPropsMemory(int len) 1144 { 1145 getDirPropsMemory(mayAllocateText, len); 1146 } 1147 1148 private void getLevelsMemory(boolean mayAllocate, int len) 1149 { 1150 Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len); 1151 levelsMemory = (byte[]) array; 1152 } 1153 1154 void getLevelsMemory(int len) 1155 { 1156 getLevelsMemory(mayAllocateText, len); 1157 } 1158 1159 private void getRunsMemory(boolean mayAllocate, int len) 1160 { 1161 Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len); 1162 runsMemory = (BidiRun[]) array; 1163 } 1164 1165 void getRunsMemory(int len) 1166 { 1167 getRunsMemory(mayAllocateRuns, len); 1168 } 1169 1170 /* additional methods used by constructor - always allow allocation */ 1171 private void getInitialDirPropsMemory(int len) 1172 { 1173 getDirPropsMemory(true, len); 1174 } 1175 1176 private void getInitialLevelsMemory(int len) 1177 { 1178 getLevelsMemory(true, len); 1179 } 1180 1181 private void getInitialParasMemory(int len) 1182 { 1183 Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len); 1184 parasMemory = (int[]) array; 1185 } 1186 1187 private void getInitialRunsMemory(int len) 1188 { 1189 getRunsMemory(true, len); 1190 } 1191 1192 /* perform (P2)..(P3) ------------------------------------------------------- */ 1193 1194 private void getDirProps() 1195 { 1196 int i = 0, i0, i1; 1197 flags = 0; /* collect all directionalities in the text */ 1198 int uchar; 1199 byte dirProp; 1200 byte paraDirDefault = 0; /* initialize to avoid compiler warnings */ 1201 boolean isDefaultLevel = IsDefaultLevel(paraLevel); 1202 /* for inverse Bidi, the default para level is set to RTL if there is a 1203 strong R or AL character at either end of the text */ 1204 lastArabicPos = -1; 1205 controlCount = 0; 1206 1207 final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */ 1208 final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */ 1209 final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */ 1210 1211 int state; 1212 int paraStart = 0; /* index of first char in paragraph */ 1213 byte paraDir; /* == CONTEXT_RTL within paragraphs 1214 starting with strong R char */ 1215 byte lastStrongDir=0; /* for default level & inverse Bidi */ 1216 int lastStrongLTR=0; /* for STREAMING option */ 1217 1218 if (isDefaultLevel) { 1219 paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0; 1220 paraDir = paraDirDefault; 1221 lastStrongDir = paraDirDefault; 1222 state = LOOKING_FOR_STRONG; 1223 } else { 1224 state = NOT_CONTEXTUAL; 1225 paraDir = 0; 1226 } 1227 /* count paragraphs and determine the paragraph level (P2..P3) */ 1228 /* 1229 * see comment on constant fields: 1230 * the LEVEL_DEFAULT_XXX values are designed so that 1231 * their low-order bit alone yields the intended default 1232 */ 1233 1234 for (i = 0; i < originalLength; /* i is incremented in the loop */) { 1235 i0 = i; /* index of first code unit */ 1236 uchar = UTF16.charAt(text, 0, originalLength, i); 1237 i += Character.charCount(uchar); 1238 i1 = i - 1; /* index of last code unit, gets the directional property */ 1239 1240 dirProp = (byte)bdp.getClass(uchar); 1241 1242 flags |= DirPropFlag(dirProp); 1243 dirProps[i1] = (byte)(dirProp | paraDir); 1244 if (i1 > i0) { /* set previous code units' properties to BN */ 1245 flags |= DirPropFlag(BN); 1246 do { 1247 dirProps[--i1] = (byte)(BN | paraDir); 1248 } while (i1 > i0); 1249 } 1250 if (state == LOOKING_FOR_STRONG) { 1251 if (dirProp == L) { 1252 state = FOUND_STRONG_CHAR; 1253 if (paraDir != 0) { 1254 paraDir = 0; 1255 for (i1 = paraStart; i1 < i; i1++) { 1256 dirProps[i1] &= ~CONTEXT_RTL; 1257 } 1258 } 1259 continue; 1260 } 1261 if (dirProp == R || dirProp == AL) { 1262 state = FOUND_STRONG_CHAR; 1263 if (paraDir == 0) { 1264 paraDir = CONTEXT_RTL; 1265 for (i1 = paraStart; i1 < i; i1++) { 1266 dirProps[i1] |= CONTEXT_RTL; 1267 } 1268 } 1269 continue; 1270 } 1271 } 1272 if (dirProp == L) { 1273 lastStrongDir = 0; 1274 lastStrongLTR = i; /* i is index to next character */ 1275 } 1276 else if (dirProp == R) { 1277 lastStrongDir = CONTEXT_RTL; 1278 } 1279 else if (dirProp == AL) { 1280 lastStrongDir = CONTEXT_RTL; 1281 lastArabicPos = i-1; 1282 } 1283 else if (dirProp == B) { 1284 if (i < originalLength) { /* B not last char in text */ 1285 if (!((uchar == (int)CR) && (text[i] == (int)LF))) { 1286 paraCount++; 1287 } 1288 if (isDefaultLevel) { 1289 state=LOOKING_FOR_STRONG; 1290 paraStart = i; /* i is index to next character */ 1291 paraDir = paraDirDefault; 1292 lastStrongDir = paraDirDefault; 1293 } 1294 } 1295 } 1296 } 1297 if (isDefaultLevel) { 1298 paraLevel = GetParaLevelAt(0); 1299 } 1300 1301 /* The following line does nothing new for contextual paraLevel, but is 1302 needed for absolute paraLevel. */ 1303 flags |= DirPropFlagLR(paraLevel); 1304 1305 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1306 flags |= DirPropFlag(L); 1307 } 1308 } 1309 1310 /* perform (X1)..(X9) ------------------------------------------------------- */ 1311 1312 /* determine if the text is mixed-directional or single-directional */ 1313 private byte directionFromFlags() { 1314 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 1315 if (!((flags & MASK_RTL) != 0 || 1316 ((flags & DirPropFlag(AN)) != 0 && 1317 (flags & MASK_POSSIBLE_N) != 0))) { 1318 return Bidi.DIRECTION_LEFT_TO_RIGHT; 1319 } else if ((flags & MASK_LTR) == 0) { 1320 return Bidi.DIRECTION_RIGHT_TO_LEFT; 1321 } else { 1322 return MIXED; 1323 } 1324 } 1325 1326 /* 1327 * Resolve the explicit levels as specified by explicit embedding codes. 1328 * Recalculate the flags to have them reflect the real properties 1329 * after taking the explicit embeddings into account. 1330 * 1331 * The Bidi algorithm is designed to result in the same behavior whether embedding 1332 * levels are externally specified (from "styled text", supposedly the preferred 1333 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 1334 * That is why (X9) instructs to remove all explicit codes (and BN). 1335 * However, in a real implementation, this removal of these codes and their index 1336 * positions in the plain text is undesirable since it would result in 1337 * reallocated, reindexed text. 1338 * Instead, this implementation leaves the codes in there and just ignores them 1339 * in the subsequent processing. 1340 * In order to get the same reordering behavior, positions with a BN or an 1341 * explicit embedding code just get the same level assigned as the last "real" 1342 * character. 1343 * 1344 * Some implementations, not this one, then overwrite some of these 1345 * directionality properties at "real" same-level-run boundaries by 1346 * L or R codes so that the resolution of weak types can be performed on the 1347 * entire paragraph at once instead of having to parse it once more and 1348 * perform that resolution on same-level-runs. 1349 * This limits the scope of the implicit rules in effectively 1350 * the same way as the run limits. 1351 * 1352 * Instead, this implementation does not modify these codes. 1353 * On one hand, the paragraph has to be scanned for same-level-runs, but 1354 * on the other hand, this saves another loop to reset these codes, 1355 * or saves making and modifying a copy of dirProps[]. 1356 * 1357 * 1358 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm. 1359 * 1360 * 1361 * Handling the stack of explicit levels (Xn): 1362 * 1363 * With the Bidi stack of explicit levels, 1364 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 1365 * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61. 1366 * 1367 * In order to have a correct push-pop semantics even in the case of overflows, 1368 * there are two overflow counters: 1369 * - countOver60 is incremented with each LRx at level 60 1370 * - from level 60, one RLx increases the level to 61 1371 * - countOver61 is incremented with each LRx and RLx at level 61 1372 * 1373 * Popping levels with PDF must work in the opposite order so that level 61 1374 * is correct at the correct point. Underflows (too many PDFs) must be checked. 1375 * 1376 * This implementation assumes that MAX_EXPLICIT_LEVEL is odd. 1377 */ 1378 private byte resolveExplicitLevels() { 1379 int i = 0; 1380 byte dirProp; 1381 byte level = GetParaLevelAt(0); 1382 1383 byte dirct; 1384 int paraIndex = 0; 1385 1386 /* determine if the text is mixed-directional or single-directional */ 1387 dirct = directionFromFlags(); 1388 1389 /* we may not need to resolve any explicit levels, but for multiple 1390 paragraphs we want to loop on all chars to set the para boundaries */ 1391 if ((dirct != MIXED) && (paraCount == 1)) { 1392 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 1393 } else if ((paraCount == 1) && 1394 ((flags & MASK_EXPLICIT) == 0)) { 1395 /* mixed, but all characters are at the same embedding level */ 1396 /* or we are in "inverse Bidi" */ 1397 /* and we don't have contextual multiple paragraphs with some B char */ 1398 /* set all levels to the paragraph level */ 1399 for (i = 0; i < length; ++i) { 1400 levels[i] = level; 1401 } 1402 } else { 1403 /* continue to perform (Xn) */ 1404 1405 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 1406 /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */ 1407 byte embeddingLevel = level; 1408 byte newLevel; 1409 byte stackTop = 0; 1410 1411 byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */ 1412 int countOver60 = 0; 1413 int countOver61 = 0; /* count overflows of explicit levels */ 1414 1415 /* recalculate the flags */ 1416 flags = 0; 1417 1418 for (i = 0; i < length; ++i) { 1419 dirProp = NoContextRTL(dirProps[i]); 1420 switch(dirProp) { 1421 case LRE: 1422 case LRO: 1423 /* (X3, X5) */ 1424 newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */ 1425 if (newLevel <= MAX_EXPLICIT_LEVEL) { 1426 stack[stackTop] = embeddingLevel; 1427 ++stackTop; 1428 embeddingLevel = newLevel; 1429 if (dirProp == LRO) { 1430 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1431 } 1432 /* we don't need to set LEVEL_OVERRIDE off for LRE 1433 since this has already been done for newLevel which is 1434 the source for embeddingLevel. 1435 */ 1436 } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) { 1437 ++countOver61; 1438 } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ { 1439 ++countOver60; 1440 } 1441 flags |= DirPropFlag(BN); 1442 break; 1443 case RLE: 1444 case RLO: 1445 /* (X2, X4) */ 1446 newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */ 1447 if (newLevel<=MAX_EXPLICIT_LEVEL) { 1448 stack[stackTop] = embeddingLevel; 1449 ++stackTop; 1450 embeddingLevel = newLevel; 1451 if (dirProp == RLO) { 1452 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1453 } 1454 /* we don't need to set LEVEL_OVERRIDE off for RLE 1455 since this has already been done for newLevel which is 1456 the source for embeddingLevel. 1457 */ 1458 } else { 1459 ++countOver61; 1460 } 1461 flags |= DirPropFlag(BN); 1462 break; 1463 case PDF: 1464 /* (X7) */ 1465 /* handle all the overflow cases first */ 1466 if (countOver61 > 0) { 1467 --countOver61; 1468 } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) { 1469 /* handle LRx overflows from level 60 */ 1470 --countOver60; 1471 } else if (stackTop > 0) { 1472 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 1473 --stackTop; 1474 embeddingLevel = stack[stackTop]; 1475 /* } else { (underflow) */ 1476 } 1477 flags |= DirPropFlag(BN); 1478 break; 1479 case B: 1480 stackTop = 0; 1481 countOver60 = 0; 1482 countOver61 = 0; 1483 level = GetParaLevelAt(i); 1484 if ((i + 1) < length) { 1485 embeddingLevel = GetParaLevelAt(i+1); 1486 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1487 paras[paraIndex++] = i+1; 1488 } 1489 } 1490 flags |= DirPropFlag(B); 1491 break; 1492 case BN: 1493 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 1494 /* they will get their levels set correctly in adjustWSLevels() */ 1495 flags |= DirPropFlag(BN); 1496 break; 1497 default: 1498 /* all other types get the "real" level */ 1499 if (level != embeddingLevel) { 1500 level = embeddingLevel; 1501 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1502 flags |= DirPropFlagO(level) | DirPropFlagMultiRuns; 1503 } else { 1504 flags |= DirPropFlagE(level) | DirPropFlagMultiRuns; 1505 } 1506 } 1507 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 1508 flags |= DirPropFlag(dirProp); 1509 } 1510 break; 1511 } 1512 1513 /* 1514 * We need to set reasonable levels even on BN codes and 1515 * explicit codes because we will later look at same-level runs (X10). 1516 */ 1517 levels[i] = level; 1518 } 1519 if ((flags & MASK_EMBEDDING) != 0) { 1520 flags |= DirPropFlagLR(paraLevel); 1521 } 1522 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1523 flags |= DirPropFlag(L); 1524 } 1525 1526 /* subsequently, ignore the explicit codes and BN (X9) */ 1527 1528 /* again, determine if the text is mixed-directional or single-directional */ 1529 dirct = directionFromFlags(); 1530 } 1531 1532 return dirct; 1533 } 1534 1535 /* 1536 * Use a pre-specified embedding levels array: 1537 * 1538 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 1539 * ignore all explicit codes (X9), 1540 * and check all the preset levels. 1541 * 1542 * Recalculate the flags to have them reflect the real properties 1543 * after taking the explicit embeddings into account. 1544 */ 1545 private byte checkExplicitLevels() { 1546 byte dirProp; 1547 int i; 1548 this.flags = 0; /* collect all directionalities in the text */ 1549 byte level; 1550 int paraIndex = 0; 1551 1552 for (i = 0; i < length; ++i) { 1553 if (levels[i] == 0) { 1554 levels[i] = paraLevel; 1555 } 1556 if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) { 1557 if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) { 1558 levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE); 1559 } else { 1560 levels[i] = paraLevel; 1561 } 1562 } 1563 level = levels[i]; 1564 dirProp = NoContextRTL(dirProps[i]); 1565 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1566 /* keep the override flag in levels[i] but adjust the flags */ 1567 level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */ 1568 flags |= DirPropFlagO(level); 1569 } else { 1570 /* set the flags */ 1571 flags |= DirPropFlagE(level) | DirPropFlag(dirProp); 1572 } 1573 1574 if ((level < GetParaLevelAt(i) && 1575 !((0 == level) && (dirProp == B))) || 1576 (MAX_EXPLICIT_LEVEL <level)) { 1577 /* level out of bounds */ 1578 throw new IllegalArgumentException("level " + level + 1579 " out of bounds at index " + i); 1580 } 1581 if ((dirProp == B) && ((i + 1) < length)) { 1582 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1583 paras[paraIndex++] = i + 1; 1584 } 1585 } 1586 } 1587 if ((flags&MASK_EMBEDDING) != 0) { 1588 flags |= DirPropFlagLR(paraLevel); 1589 } 1590 1591 /* determine if the text is mixed-directional or single-directional */ 1592 return directionFromFlags(); 1593 } 1594 1595 /*********************************************************************/ 1596 /* The Properties state machine table */ 1597 /*********************************************************************/ 1598 /* */ 1599 /* All table cells are 8 bits: */ 1600 /* bits 0..4: next state */ 1601 /* bits 5..7: action to perform (if > 0) */ 1602 /* */ 1603 /* Cells may be of format "n" where n represents the next state */ 1604 /* (except for the rightmost column). */ 1605 /* Cells may also be of format "_(x,y)" where x represents an action */ 1606 /* to perform and y represents the next state. */ 1607 /* */ 1608 /*********************************************************************/ 1609 /* Definitions and type for properties state tables */ 1610 /*********************************************************************/ 1611 private static final int IMPTABPROPS_COLUMNS = 14; 1612 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1; 1613 private static short GetStateProps(short cell) { 1614 return (short)(cell & 0x1f); 1615 } 1616 private static short GetActionProps(short cell) { 1617 return (short)(cell >> 5); 1618 } 1619 1620 private static final short groupProp[] = /* dirProp regrouped */ 1621 { 1622 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 1623 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 1624 }; 1625 private static final short _L = 0; 1626 private static final short _R = 1; 1627 private static final short _EN = 2; 1628 private static final short _AN = 3; 1629 private static final short _ON = 4; 1630 private static final short _S = 5; 1631 private static final short _B = 6; /* reduced dirProp */ 1632 1633 /*********************************************************************/ 1634 /* */ 1635 /* PROPERTIES STATE TABLE */ 1636 /* */ 1637 /* In table impTabProps, */ 1638 /* - the ON column regroups ON and WS */ 1639 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */ 1640 /* - the Res column is the reduced property assigned to a run */ 1641 /* */ 1642 /* Action 1: process current run1, init new run1 */ 1643 /* 2: init new run2 */ 1644 /* 3: process run1, process run2, init new run1 */ 1645 /* 4: process run1, set run1=run2, init new run2 */ 1646 /* */ 1647 /* Notes: */ 1648 /* 1) This table is used in resolveImplicitLevels(). */ 1649 /* 2) This table triggers actions when there is a change in the Bidi*/ 1650 /* property of incoming characters (action 1). */ 1651 /* 3) Most such property sequences are processed immediately (in */ 1652 /* fact, passed to processPropertySeq(). */ 1653 /* 4) However, numbers are assembled as one sequence. This means */ 1654 /* that undefined situations (like CS following digits, until */ 1655 /* it is known if the next char will be a digit) are held until */ 1656 /* following chars define them. */ 1657 /* Example: digits followed by CS, then comes another CS or ON; */ 1658 /* the digits will be processed, then the CS assigned */ 1659 /* as the start of an ON sequence (action 3). */ 1660 /* 5) There are cases where more than one sequence must be */ 1661 /* processed, for instance digits followed by CS followed by L: */ 1662 /* the digits must be processed as one sequence, and the CS */ 1663 /* must be processed as an ON sequence, all this before starting */ 1664 /* assembling chars for the opening L sequence. */ 1665 /* */ 1666 /* */ 1667 private static final short impTabProps[][] = 1668 { 1669 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */ 1670 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON }, 1671 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L }, 1672 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R }, 1673 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R }, 1674 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN }, 1675 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN }, 1676 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN }, 1677 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON }, 1678 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON }, 1679 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON }, 1680 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN }, 1681 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN }, 1682 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN }, 1683 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN }, 1684 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON }, 1685 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S }, 1686 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S }, 1687 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B } 1688 }; 1689 1690 /*********************************************************************/ 1691 /* The levels state machine tables */ 1692 /*********************************************************************/ 1693 /* */ 1694 /* All table cells are 8 bits: */ 1695 /* bits 0..3: next state */ 1696 /* bits 4..7: action to perform (if > 0) */ 1697 /* */ 1698 /* Cells may be of format "n" where n represents the next state */ 1699 /* (except for the rightmost column). */ 1700 /* Cells may also be of format "_(x,y)" where x represents an action */ 1701 /* to perform and y represents the next state. */ 1702 /* */ 1703 /* This format limits each table to 16 states each and to 15 actions.*/ 1704 /* */ 1705 /*********************************************************************/ 1706 /* Definitions and type for levels state tables */ 1707 /*********************************************************************/ 1708 private static final int IMPTABLEVELS_COLUMNS = _B + 2; 1709 private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1; 1710 private static short GetState(byte cell) { return (short)(cell & 0x0f); } 1711 private static short GetAction(byte cell) { return (short)(cell >> 4); } 1712 1713 private static class ImpTabPair { 1714 byte[][][] imptab; 1715 short[][] impact; 1716 1717 ImpTabPair(byte[][] table1, byte[][] table2, 1718 short[] act1, short[] act2) { 1719 imptab = new byte[][][] {table1, table2}; 1720 impact = new short[][] {act1, act2}; 1721 } 1722 } 1723 1724 /*********************************************************************/ 1725 /* */ 1726 /* LEVELS STATE TABLES */ 1727 /* */ 1728 /* In all levels state tables, */ 1729 /* - state 0 is the initial state */ 1730 /* - the Res column is the increment to add to the text level */ 1731 /* for this property sequence. */ 1732 /* */ 1733 /* The impact arrays for each table of a pair map the local action */ 1734 /* numbers of the table to the total list of actions. For instance, */ 1735 /* action 2 in a given table corresponds to the action number which */ 1736 /* appears in entry [2] of the impact array for that table. */ 1737 /* The first entry of all impact arrays must be 0. */ 1738 /* */ 1739 /* Action 1: init conditional sequence */ 1740 /* 2: prepend conditional sequence to current sequence */ 1741 /* 3: set ON sequence to new level - 1 */ 1742 /* 4: init EN/AN/ON sequence */ 1743 /* 5: fix EN/AN/ON sequence followed by R */ 1744 /* 6: set previous level sequence to level 2 */ 1745 /* */ 1746 /* Notes: */ 1747 /* 1) These tables are used in processPropertySeq(). The input */ 1748 /* is property sequences as determined by resolveImplicitLevels. */ 1749 /* 2) Most such property sequences are processed immediately */ 1750 /* (levels are assigned). */ 1751 /* 3) However, some sequences cannot be assigned a final level till */ 1752 /* one or more following sequences are received. For instance, */ 1753 /* ON following an R sequence within an even-level paragraph. */ 1754 /* If the following sequence is R, the ON sequence will be */ 1755 /* assigned basic run level+1, and so will the R sequence. */ 1756 /* 4) S is generally handled like ON, since its level will be fixed */ 1757 /* to paragraph level in adjustWSLevels(). */ 1758 /* */ 1759 1760 private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */ 1761 /* In this table, conditional sequences receive the higher possible level 1762 until proven otherwise. 1763 */ 1764 { 1765 /* L, R, EN, AN, ON, S, B, Res */ 1766 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 }, 1767 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 }, 1768 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 }, 1769 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 }, 1770 /* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 }, 1771 /* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 } 1772 }; 1773 1774 private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */ 1775 /* In this table, conditional sequences receive the lower possible level 1776 until proven otherwise. 1777 */ 1778 { 1779 /* L, R, EN, AN, ON, S, B, Res */ 1780 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1781 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 }, 1782 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1783 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 }, 1784 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 }, 1785 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 } 1786 }; 1787 1788 private static final short[] impAct0 = {0,1,2,3,4,5,6}; 1789 1790 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair( 1791 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0); 1792 1793 private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */ 1794 /* In this table, conditional sequences receive the higher possible 1795 level until proven otherwise. 1796 */ 1797 /* L, R, EN, AN, ON, S, B, Res */ 1798 /* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 }, 1799 /* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 }, 1800 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 }, 1801 /* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 }, 1802 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 } 1803 }; 1804 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair( 1805 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0); 1806 1807 private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = { 1808 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1809 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1810 */ 1811 /* L, R, EN, AN, ON, S, B, Res */ 1812 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 }, 1813 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 }, 1814 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 }, 1815 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 }, 1816 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 }, 1817 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 } 1818 }; 1819 private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = { 1820 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1821 until proven that there is L on both sides. AN is handled like EN. 1822 */ 1823 /* L, R, EN, AN, ON, S, B, Res */ 1824 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 }, 1825 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 }, 1826 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 }, 1827 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 }, 1828 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 } 1829 }; 1830 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new 1831 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R, 1832 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0); 1833 1834 private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = { 1835 /* This table is identical to the Default LTR table except that EN and AN 1836 are handled like L. 1837 */ 1838 /* L, R, EN, AN, ON, S, B, Res */ 1839 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 }, 1840 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 }, 1841 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 }, 1842 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 }, 1843 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 }, 1844 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 } 1845 }; 1846 private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = { 1847 /* This table is identical to the Default RTL table except that EN and AN 1848 are handled like L. 1849 */ 1850 /* L, R, EN, AN, ON, S, B, Res */ 1851 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 }, 1852 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 }, 1853 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 }, 1854 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 }, 1855 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 }, 1856 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 } 1857 }; 1858 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair 1859 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L, 1860 impAct0, impAct0); 1861 1862 private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */ 1863 /* In this table, conditional sequences receive the lower possible level 1864 until proven otherwise. 1865 */ 1866 /* L, R, EN, AN, ON, S, B, Res */ 1867 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1868 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 }, 1869 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1870 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 }, 1871 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 }, 1872 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 }, 1873 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 } 1874 }; 1875 private static final short[] impAct1 = {0,1,11,12}; 1876 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair( 1877 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1878 1879 private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1880 /* The case handled in this table is (visually): R EN L 1881 */ 1882 /* L, R, EN, AN, ON, S, B, Res */ 1883 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 }, 1884 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 }, 1885 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 }, 1886 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 }, 1887 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 }, 1888 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 }, 1889 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 } 1890 }; 1891 private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1892 /* The cases handled in this table are (visually): R EN L 1893 R L AN L 1894 */ 1895 /* L, R, EN, AN, ON, S, B, Res */ 1896 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 }, 1897 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 }, 1898 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 }, 1899 /* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 }, 1900 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 }, 1901 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 }, 1902 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 } 1903 }; 1904 private static final short impAct2[] = {0,1,7,8,9,10}; 1905 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = 1906 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1907 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1908 1909 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair( 1910 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1911 1912 private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = { 1913 /* The case handled in this table is (visually): R EN L 1914 */ 1915 /* L, R, EN, AN, ON, S, B, Res */ 1916 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 }, 1917 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 }, 1918 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 }, 1919 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 }, 1920 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 } 1921 }; 1922 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new 1923 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1924 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1925 1926 private class LevState { 1927 byte[][] impTab; /* level table pointer */ 1928 short[] impAct; /* action map array */ 1929 int startON; /* start of ON sequence */ 1930 int startL2EN; /* start of level 2 sequence */ 1931 int lastStrongRTL; /* index of last found R or AL */ 1932 short state; /* current state */ 1933 byte runLevel; /* run level before implicit solving */ 1934 } 1935 1936 /*------------------------------------------------------------------------*/ 1937 1938 static final int FIRSTALLOC = 10; 1939 /* 1940 * param pos: position where to insert 1941 * param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1942 */ 1943 private void addPoint(int pos, int flag) 1944 { 1945 Point point = new Point(); 1946 1947 int len = insertPoints.points.length; 1948 if (len == 0) { 1949 insertPoints.points = new Point[FIRSTALLOC]; 1950 len = FIRSTALLOC; 1951 } 1952 if (insertPoints.size >= len) { /* no room for new point */ 1953 Point[] savePoints = insertPoints.points; 1954 insertPoints.points = new Point[len * 2]; 1955 System.arraycopy(savePoints, 0, insertPoints.points, 0, len); 1956 } 1957 point.pos = pos; 1958 point.flag = flag; 1959 insertPoints.points[insertPoints.size] = point; 1960 insertPoints.size++; 1961 } 1962 1963 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1964 1965 /* 1966 * This implementation of the (Wn) rules applies all rules in one pass. 1967 * In order to do so, it needs a look-ahead of typically 1 character 1968 * (except for W5: sequences of ET) and keeps track of changes 1969 * in a rule Wp that affect a later Wq (p<q). 1970 * 1971 * The (Nn) and (In) rules are also performed in that same single loop, 1972 * but effectively one iteration behind for white space. 1973 * 1974 * Since all implicit rules are performed in one step, it is not necessary 1975 * to actually store the intermediate directional properties in dirProps[]. 1976 */ 1977 1978 private void processPropertySeq(LevState levState, short _prop, 1979 int start, int limit) { 1980 byte cell; 1981 byte[][] impTab = levState.impTab; 1982 short[] impAct = levState.impAct; 1983 short oldStateSeq,actionSeq; 1984 byte level, addLevel; 1985 int start0, k; 1986 1987 start0 = start; /* save original start position */ 1988 oldStateSeq = levState.state; 1989 cell = impTab[oldStateSeq][_prop]; 1990 levState.state = GetState(cell); /* isolate the new state */ 1991 actionSeq = impAct[GetAction(cell)]; /* isolate the action */ 1992 addLevel = impTab[levState.state][IMPTABLEVELS_RES]; 1993 1994 if (actionSeq != 0) { 1995 switch (actionSeq) { 1996 case 1: /* init ON seq */ 1997 levState.startON = start0; 1998 break; 1999 2000 case 2: /* prepend ON seq to current seq */ 2001 start = levState.startON; 2002 break; 2003 2004 case 3: /* L or S after possible relevant EN/AN */ 2005 /* check if we had EN after R/AL */ 2006 if (levState.startL2EN >= 0) { 2007 addPoint(levState.startL2EN, LRM_BEFORE); 2008 } 2009 levState.startL2EN = -1; /* not within previous if since could also be -2 */ 2010 /* check if we had any relevant EN/AN after R/AL */ 2011 if ((insertPoints.points.length == 0) || 2012 (insertPoints.size <= insertPoints.confirmed)) { 2013 /* nothing, just clean up */ 2014 levState.lastStrongRTL = -1; 2015 /* check if we have a pending conditional segment */ 2016 level = impTab[oldStateSeq][IMPTABLEVELS_RES]; 2017 if ((level & 1) != 0 && levState.startON > 0) { /* after ON */ 2018 start = levState.startON; /* reset to basic run level */ 2019 } 2020 if (_prop == _S) { /* add LRM before S */ 2021 addPoint(start0, LRM_BEFORE); 2022 insertPoints.confirmed = insertPoints.size; 2023 } 2024 break; 2025 } 2026 /* reset previous RTL cont to level for LTR text */ 2027 for (k = levState.lastStrongRTL + 1; k < start0; k++) { 2028 /* reset odd level, leave runLevel+2 as is */ 2029 levels[k] = (byte)((levels[k] - 2) & ~1); 2030 } 2031 /* mark insert points as confirmed */ 2032 insertPoints.confirmed = insertPoints.size; 2033 levState.lastStrongRTL = -1; 2034 if (_prop == _S) { /* add LRM before S */ 2035 addPoint(start0, LRM_BEFORE); 2036 insertPoints.confirmed = insertPoints.size; 2037 } 2038 break; 2039 2040 case 4: /* R/AL after possible relevant EN/AN */ 2041 /* just clean up */ 2042 if (insertPoints.points.length > 0) 2043 /* remove all non confirmed insert points */ 2044 insertPoints.size = insertPoints.confirmed; 2045 levState.startON = -1; 2046 levState.startL2EN = -1; 2047 levState.lastStrongRTL = limit - 1; 2048 break; 2049 2050 case 5: /* EN/AN after R/AL + possible cont */ 2051 /* check for real AN */ 2052 if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) { 2053 /* real AN */ 2054 if (levState.startL2EN == -1) { /* if no relevant EN already found */ 2055 /* just note the righmost digit as a strong RTL */ 2056 levState.lastStrongRTL = limit - 1; 2057 break; 2058 } 2059 if (levState.startL2EN >= 0) { /* after EN, no AN */ 2060 addPoint(levState.startL2EN, LRM_BEFORE); 2061 levState.startL2EN = -2; 2062 } 2063 /* note AN */ 2064 addPoint(start0, LRM_BEFORE); 2065 break; 2066 } 2067 /* if first EN/AN after R/AL */ 2068 if (levState.startL2EN == -1) { 2069 levState.startL2EN = start0; 2070 } 2071 break; 2072 2073 case 6: /* note location of latest R/AL */ 2074 levState.lastStrongRTL = limit - 1; 2075 levState.startON = -1; 2076 break; 2077 2078 case 7: /* L after R+ON/EN/AN */ 2079 /* include possible adjacent number on the left */ 2080 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) { 2081 } 2082 if (k >= 0) { 2083 addPoint(k, RLM_BEFORE); /* add RLM before */ 2084 insertPoints.confirmed = insertPoints.size; /* confirm it */ 2085 } 2086 levState.startON = start0; 2087 break; 2088 2089 case 8: /* AN after L */ 2090 /* AN numbers between L text on both sides may be trouble. */ 2091 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 2092 addPoint(start0, LRM_BEFORE); /* add LRM before */ 2093 addPoint(start0, LRM_AFTER); /* add LRM after */ 2094 break; 2095 2096 case 9: /* R after L+ON/EN/AN */ 2097 /* false alert, infirm LRMs around previous AN */ 2098 insertPoints.size=insertPoints.confirmed; 2099 if (_prop == _S) { /* add RLM before S */ 2100 addPoint(start0, RLM_BEFORE); 2101 insertPoints.confirmed = insertPoints.size; 2102 } 2103 break; 2104 2105 case 10: /* L after L+ON/AN */ 2106 level = (byte)(levState.runLevel + addLevel); 2107 for (k=levState.startON; k < start0; k++) { 2108 if (levels[k] < level) { 2109 levels[k] = level; 2110 } 2111 } 2112 insertPoints.confirmed = insertPoints.size; /* confirm inserts */ 2113 levState.startON = start0; 2114 break; 2115 2116 case 11: /* L after L+ON+EN/AN/ON */ 2117 level = levState.runLevel; 2118 for (k = start0-1; k >= levState.startON; k--) { 2119 if (levels[k] == level+3) { 2120 while (levels[k] == level+3) { 2121 levels[k--] -= 2; 2122 } 2123 while (levels[k] == level) { 2124 k--; 2125 } 2126 } 2127 if (levels[k] == level+2) { 2128 levels[k] = level; 2129 continue; 2130 } 2131 levels[k] = (byte)(level+1); 2132 } 2133 break; 2134 2135 case 12: /* R after L+ON+EN/AN/ON */ 2136 level = (byte)(levState.runLevel+1); 2137 for (k = start0-1; k >= levState.startON; k--) { 2138 if (levels[k] > level) { 2139 levels[k] -= 2; 2140 } 2141 } 2142 break; 2143 2144 default: /* we should never get here */ 2145 throw new IllegalStateException("Internal ICU error in processPropertySeq"); 2146 } 2147 } 2148 if ((addLevel) != 0 || (start < start0)) { 2149 level = (byte)(levState.runLevel + addLevel); 2150 for (k = start; k < limit; k++) { 2151 levels[k] = level; 2152 } 2153 } 2154 } 2155 2156 private void resolveImplicitLevels(int start, int limit, short sor, short eor) 2157 { 2158 LevState levState = new LevState(); 2159 int i, start1, start2; 2160 short oldStateImp, stateImp, actionImp; 2161 short gprop, resProp, cell; 2162 short nextStrongProp = R; 2163 int nextStrongPos = -1; 2164 2165 2166 /* check for RTL inverse Bidi mode */ 2167 /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to 2168 * loop on the text characters from end to start. 2169 * This would need a different properties state table (at least different 2170 * actions) and different levels state tables (maybe very similar to the 2171 * LTR corresponding ones. 2172 */ 2173 /* initialize for levels state table */ 2174 levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2175 levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2176 levState.state = 0; 2177 levState.runLevel = levels[start]; 2178 levState.impTab = impTabPair.imptab[levState.runLevel & 1]; 2179 levState.impAct = impTabPair.impact[levState.runLevel & 1]; 2180 processPropertySeq(levState, sor, start, start); 2181 /* initialize for property state table */ 2182 if (dirProps[start] == NSM) { 2183 stateImp = (short)(1 + sor); 2184 } else { 2185 stateImp = 0; 2186 } 2187 start1 = start; 2188 start2 = 0; 2189 2190 for (i = start; i <= limit; i++) { 2191 if (i >= limit) { 2192 gprop = eor; 2193 } else { 2194 short prop, prop1; 2195 prop = NoContextRTL(dirProps[i]); 2196 gprop = groupProp[prop]; 2197 } 2198 oldStateImp = stateImp; 2199 cell = impTabProps[oldStateImp][gprop]; 2200 stateImp = GetStateProps(cell); /* isolate the new state */ 2201 actionImp = GetActionProps(cell); /* isolate the action */ 2202 if ((i == limit) && (actionImp == 0)) { 2203 /* there is an unprocessed sequence if its property == eor */ 2204 actionImp = 1; /* process the last sequence */ 2205 } 2206 if (actionImp != 0) { 2207 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES]; 2208 switch (actionImp) { 2209 case 1: /* process current seq1, init new seq1 */ 2210 processPropertySeq(levState, resProp, start1, i); 2211 start1 = i; 2212 break; 2213 case 2: /* init new seq2 */ 2214 start2 = i; 2215 break; 2216 case 3: /* process seq1, process seq2, init new seq1 */ 2217 processPropertySeq(levState, resProp, start1, start2); 2218 processPropertySeq(levState, _ON, start2, i); 2219 start1 = i; 2220 break; 2221 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 2222 processPropertySeq(levState, resProp, start1, start2); 2223 start1 = start2; 2224 start2 = i; 2225 break; 2226 default: /* we should never get here */ 2227 throw new IllegalStateException("Internal ICU error in resolveImplicitLevels"); 2228 } 2229 } 2230 } 2231 /* flush possible pending sequence, e.g. ON */ 2232 processPropertySeq(levState, eor, limit, limit); 2233 } 2234 2235 /* perform (L1) and (X9) ---------------------------------------------------- */ 2236 2237 /* 2238 * Reset the embedding levels for some non-graphic characters (L1). 2239 * This method also sets appropriate levels for BN, and 2240 * explicit embedding types that are supposed to have been removed 2241 * from the paragraph in (X9). 2242 */ 2243 private void adjustWSLevels() { 2244 int i; 2245 2246 if ((flags & MASK_WS) != 0) { 2247 int flag; 2248 i = trailingWSStart; 2249 while (i > 0) { 2250 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 2251 while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) { 2252 if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2253 levels[i] = 0; 2254 } else { 2255 levels[i] = GetParaLevelAt(i); 2256 } 2257 } 2258 2259 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 2260 /* here, i+1 is guaranteed to be <length */ 2261 while (i > 0) { 2262 flag = DirPropFlagNC(dirProps[--i]); 2263 if ((flag & MASK_BN_EXPLICIT) != 0) { 2264 levels[i] = levels[i + 1]; 2265 } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2266 levels[i] = 0; 2267 break; 2268 } else if ((flag & MASK_B_S) != 0){ 2269 levels[i] = GetParaLevelAt(i); 2270 break; 2271 } 2272 } 2273 } 2274 } 2275 } 2276 2277 private int Bidi_Min(int x, int y) { 2278 return x < y ? x : y; 2279 } 2280 2281 private int Bidi_Abs(int x) { 2282 return x >= 0 ? x : -x; 2283 } 2284 2285 /** 2286 * Perform the Unicode Bidi algorithm. It is defined in the 2287 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2288 * version 13, 2289 * also described in The Unicode Standard, Version 4.0 .<p> 2290 * 2291 * This method takes a piece of plain text containing one or more paragraphs, 2292 * with or without externally specified embedding levels from <i>styled</i> 2293 * text and computes the left-right-directionality of each character.<p> 2294 * 2295 * If the entire text is all of the same directionality, then 2296 * the method may not perform all the steps described by the algorithm, 2297 * i.e., some levels may not be the same as if all steps were performed. 2298 * This is not relevant for unidirectional text.<br> 2299 * For example, in pure LTR text with numbers the numbers would get 2300 * a resolved level of 2 higher than the surrounding text according to 2301 * the algorithm. This implementation may set all resolved levels to 2302 * the same value in such a case.<p> 2303 * 2304 * The text can be composed of multiple paragraphs. Occurrence of a block 2305 * separator in the text terminates a paragraph, and whatever comes next starts 2306 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2307 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2308 * in that case, the pair of characters is considered as terminating the 2309 * preceding paragraph, and a new paragraph will be started by a character 2310 * coming after the LF. 2311 * 2312 * Although the text is passed here as a <code>String</code>, it is 2313 * stored internally as an array of characters. Therefore the 2314 * documentation will refer to indexes of the characters in the text. 2315 * 2316 * @param text contains the text that the Bidi algorithm will be performed 2317 * on. This text can be retrieved with <code>getText()</code> or 2318 * <code>getTextAsString</code>.<br> 2319 * 2320 * @param paraLevel specifies the default level for the text; 2321 * it is typically 0 (LTR) or 1 (RTL). 2322 * If the method shall determine the paragraph level from the text, 2323 * then <code>paraLevel</code> can be set to 2324 * either <code>LEVEL_DEFAULT_LTR</code> 2325 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2326 * paragraphs, the paragraph level shall be determined separately for 2327 * each paragraph; if a paragraph does not include any strongly typed 2328 * character, then the desired default is used (0 for LTR or 1 for RTL). 2329 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2330 * is also valid, with odd levels indicating RTL. 2331 * 2332 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 2333 * ignoring characters like LRE and PDF in the text. 2334 * A level overrides the directional property of its corresponding 2335 * (same index) character if the level has the 2336 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2337 * Except for that bit, it must be 2338 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 2339 * with one exception: a level of zero may be specified for a 2340 * paragraph separator even if <code>paraLevel>0</code> when multiple 2341 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2342 * <strong>Caution: </strong>A reference to this array, not a copy 2343 * of the levels, will be stored in the <code>Bidi</code> object; 2344 * the <code>embeddingLevels</code> 2345 * should not be modified to avoid unexpected results on subsequent 2346 * Bidi operations. However, the <code>setPara()</code> and 2347 * <code>setLine()</code> methods may modify some or all of the 2348 * levels.<br><br> 2349 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2350 * have one entry for each character in <code>text</code>. 2351 * 2352 * @throws IllegalArgumentException if the values in embeddingLevels are 2353 * not within the allowed range 2354 * 2355 * @see #LEVEL_DEFAULT_LTR 2356 * @see #LEVEL_DEFAULT_RTL 2357 * @see #LEVEL_OVERRIDE 2358 * @see #MAX_EXPLICIT_LEVEL 2359 * @stable ICU 3.8 2360 */ 2361 void setPara(String text, byte paraLevel, byte[] embeddingLevels) 2362 { 2363 if (text == null) { 2364 setPara(new char[0], paraLevel, embeddingLevels); 2365 } else { 2366 setPara(text.toCharArray(), paraLevel, embeddingLevels); 2367 } 2368 } 2369 2370 /** 2371 * Perform the Unicode Bidi algorithm. It is defined in the 2372 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2373 * version 13, 2374 * also described in The Unicode Standard, Version 4.0 .<p> 2375 * 2376 * This method takes a piece of plain text containing one or more paragraphs, 2377 * with or without externally specified embedding levels from <i>styled</i> 2378 * text and computes the left-right-directionality of each character.<p> 2379 * 2380 * If the entire text is all of the same directionality, then 2381 * the method may not perform all the steps described by the algorithm, 2382 * i.e., some levels may not be the same as if all steps were performed. 2383 * This is not relevant for unidirectional text.<br> 2384 * For example, in pure LTR text with numbers the numbers would get 2385 * a resolved level of 2 higher than the surrounding text according to 2386 * the algorithm. This implementation may set all resolved levels to 2387 * the same value in such a case.<p> 2388 * 2389 * The text can be composed of multiple paragraphs. Occurrence of a block 2390 * separator in the text terminates a paragraph, and whatever comes next starts 2391 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2392 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2393 * in that case, the pair of characters is considered as terminating the 2394 * preceding paragraph, and a new paragraph will be started by a character 2395 * coming after the LF. 2396 * 2397 * The text is stored internally as an array of characters. Therefore the 2398 * documentation will refer to indexes of the characters in the text. 2399 * 2400 * @param chars contains the text that the Bidi algorithm will be performed 2401 * on. This text can be retrieved with <code>getText()</code> or 2402 * <code>getTextAsString</code>.<br> 2403 * 2404 * @param paraLevel specifies the default level for the text; 2405 * it is typically 0 (LTR) or 1 (RTL). 2406 * If the method shall determine the paragraph level from the text, 2407 * then <code>paraLevel</code> can be set to 2408 * either <code>LEVEL_DEFAULT_LTR</code> 2409 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2410 * paragraphs, the paragraph level shall be determined separately for 2411 * each paragraph; if a paragraph does not include any strongly typed 2412 * character, then the desired default is used (0 for LTR or 1 for RTL). 2413 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2414 * is also valid, with odd levels indicating RTL. 2415 * 2416 * @param embeddingLevels (in) may be used to preset the embedding and 2417 * override levels, ignoring characters like LRE and PDF in the text. 2418 * A level overrides the directional property of its corresponding 2419 * (same index) character if the level has the 2420 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2421 * Except for that bit, it must be 2422 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 2423 * with one exception: a level of zero may be specified for a 2424 * paragraph separator even if <code>paraLevel>0</code> when multiple 2425 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2426 * <strong>Caution: </strong>A reference to this array, not a copy 2427 * of the levels, will be stored in the <code>Bidi</code> object; 2428 * the <code>embeddingLevels</code> 2429 * should not be modified to avoid unexpected results on subsequent 2430 * Bidi operations. However, the <code>setPara()</code> and 2431 * <code>setLine()</code> methods may modify some or all of the 2432 * levels.<br><br> 2433 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2434 * have one entry for each character in <code>text</code>. 2435 * 2436 * @throws IllegalArgumentException if the values in embeddingLevels are 2437 * not within the allowed range 2438 * 2439 * @see #LEVEL_DEFAULT_LTR 2440 * @see #LEVEL_DEFAULT_RTL 2441 * @see #LEVEL_OVERRIDE 2442 * @see #MAX_EXPLICIT_LEVEL 2443 * @stable ICU 3.8 2444 */ 2445 public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels) 2446 { 2447 /* check the argument values */ 2448 if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) { 2449 verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1); 2450 } 2451 if (chars == null) { 2452 chars = new char[0]; 2453 } 2454 2455 /* initialize the Bidi object */ 2456 this.paraBidi = null; /* mark unfinished setPara */ 2457 this.text = chars; 2458 this.length = this.originalLength = this.resultLength = text.length; 2459 this.paraLevel = paraLevel; 2460 this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2461 this.paraCount = 1; 2462 2463 /* Allocate zero-length arrays instead of setting to null here; then 2464 * checks for null in various places can be eliminated. 2465 */ 2466 dirProps = new byte[0]; 2467 levels = new byte[0]; 2468 runs = new BidiRun[0]; 2469 isGoodLogicalToVisualRunsMap = false; 2470 insertPoints.size = 0; /* clean up from last call */ 2471 insertPoints.confirmed = 0; /* clean up from last call */ 2472 2473 /* 2474 * Save the original paraLevel if contextual; otherwise, set to 0. 2475 */ 2476 if (IsDefaultLevel(paraLevel)) { 2477 defaultParaLevel = paraLevel; 2478 } else { 2479 defaultParaLevel = 0; 2480 } 2481 2482 if (length == 0) { 2483 /* 2484 * For an empty paragraph, create a Bidi object with the paraLevel and 2485 * the flags and the direction set but without allocating zero-length arrays. 2486 * There is nothing more to do. 2487 */ 2488 if (IsDefaultLevel(paraLevel)) { 2489 this.paraLevel &= 1; 2490 defaultParaLevel = 0; 2491 } 2492 if ((this.paraLevel & 1) != 0) { 2493 flags = DirPropFlag(R); 2494 direction = Bidi.DIRECTION_RIGHT_TO_LEFT; 2495 } else { 2496 flags = DirPropFlag(L); 2497 direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2498 } 2499 2500 runCount = 0; 2501 paraCount = 0; 2502 paraBidi = this; /* mark successful setPara */ 2503 return; 2504 } 2505 2506 runCount = -1; 2507 2508 /* 2509 * Get the directional properties, 2510 * the flags bit-set, and 2511 * determine the paragraph level if necessary. 2512 */ 2513 getDirPropsMemory(length); 2514 dirProps = dirPropsMemory; 2515 getDirProps(); 2516 2517 /* the processed length may have changed if OPTION_STREAMING is set */ 2518 trailingWSStart = length; /* the levels[] will reflect the WS run */ 2519 2520 /* allocate paras memory */ 2521 if (paraCount > 1) { 2522 getInitialParasMemory(paraCount); 2523 paras = parasMemory; 2524 paras[paraCount - 1] = length; 2525 } else { 2526 /* initialize paras for single paragraph */ 2527 paras = simpleParas; 2528 simpleParas[0] = length; 2529 } 2530 2531 /* are explicit levels specified? */ 2532 if (embeddingLevels == null) { 2533 /* no: determine explicit levels according to the (Xn) rules */ 2534 getLevelsMemory(length); 2535 levels = levelsMemory; 2536 direction = resolveExplicitLevels(); 2537 } else { 2538 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */ 2539 levels = embeddingLevels; 2540 direction = checkExplicitLevels(); 2541 } 2542 2543 /* 2544 * The steps after (X9) in the Bidi algorithm are performed only if 2545 * the paragraph text has mixed directionality! 2546 */ 2547 switch (direction) { 2548 case Bidi.DIRECTION_LEFT_TO_RIGHT: 2549 /* make sure paraLevel is even */ 2550 paraLevel = (byte)((paraLevel + 1) & ~1); 2551 2552 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2553 trailingWSStart = 0; 2554 break; 2555 case Bidi.DIRECTION_RIGHT_TO_LEFT: 2556 /* make sure paraLevel is odd */ 2557 paraLevel |= 1; 2558 2559 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2560 trailingWSStart = 0; 2561 break; 2562 default: 2563 this.impTabPair = impTab_DEFAULT; 2564 2565 /* 2566 * If there are no external levels specified and there 2567 * are no significant explicit level codes in the text, 2568 * then we can treat the entire paragraph as one run. 2569 * Otherwise, we need to perform the following rules on runs of 2570 * the text with the same embedding levels. (X10) 2571 * "Significant" explicit level codes are ones that actually 2572 * affect non-BN characters. 2573 * Examples for "insignificant" ones are empty embeddings 2574 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 2575 */ 2576 if (embeddingLevels == null && paraCount <= 1 && 2577 (flags & DirPropFlagMultiRuns) == 0) { 2578 resolveImplicitLevels(0, length, 2579 GetLRFromLevel(GetParaLevelAt(0)), 2580 GetLRFromLevel(GetParaLevelAt(length - 1))); 2581 } else { 2582 /* sor, eor: start and end types of same-level-run */ 2583 int start, limit = 0; 2584 byte level, nextLevel; 2585 short sor, eor; 2586 2587 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 2588 level = GetParaLevelAt(0); 2589 nextLevel = levels[0]; 2590 if (level < nextLevel) { 2591 eor = GetLRFromLevel(nextLevel); 2592 } else { 2593 eor = GetLRFromLevel(level); 2594 } 2595 2596 do { 2597 /* determine start and limit of the run (end points just behind the run) */ 2598 2599 /* the values for this run's start are the same as for the previous run's end */ 2600 start = limit; 2601 level = nextLevel; 2602 if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) { 2603 /* except if this is a new paragraph, then set sor = para level */ 2604 sor = GetLRFromLevel(GetParaLevelAt(start)); 2605 } else { 2606 sor = eor; 2607 } 2608 2609 /* search for the limit of this run */ 2610 while (++limit < length && levels[limit] == level) {} 2611 2612 /* get the correct level of the next run */ 2613 if (limit < length) { 2614 nextLevel = levels[limit]; 2615 } else { 2616 nextLevel = GetParaLevelAt(length - 1); 2617 } 2618 2619 /* determine eor from max(level, nextLevel); sor is last run's eor */ 2620 if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) { 2621 eor = GetLRFromLevel(nextLevel); 2622 } else { 2623 eor = GetLRFromLevel(level); 2624 } 2625 2626 /* if the run consists of overridden directional types, then there 2627 are no implicit types to be resolved */ 2628 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 2629 resolveImplicitLevels(start, limit, sor, eor); 2630 } else { 2631 /* remove the LEVEL_OVERRIDE flags */ 2632 do { 2633 levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE; 2634 } while (start < limit); 2635 } 2636 } while (limit < length); 2637 } 2638 2639 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2640 adjustWSLevels(); 2641 2642 break; 2643 } 2644 2645 resultLength += insertPoints.size; 2646 paraBidi = this; /* mark successful setPara */ 2647 } 2648 2649 /** 2650 * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the 2651 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2652 * version 13, 2653 * also described in The Unicode Standard, Version 4.0 .<p> 2654 * 2655 * This method takes a paragraph of text and computes the 2656 * left-right-directionality of each character. The text should not 2657 * contain any Unicode block separators.<p> 2658 * 2659 * The RUN_DIRECTION attribute in the text, if present, determines the base 2660 * direction (left-to-right or right-to-left). If not present, the base 2661 * direction is computed using the Unicode Bidirectional Algorithm, 2662 * defaulting to left-to-right if there are no strong directional characters 2663 * in the text. This attribute, if present, must be applied to all the text 2664 * in the paragraph.<p> 2665 * 2666 * The BIDI_EMBEDDING attribute in the text, if present, represents 2667 * embedding level information. Negative values from -1 to -62 indicate 2668 * overrides at the absolute value of the level. Positive values from 1 to 2669 * 62 indicate embeddings. Where values are zero or not defined, the base 2670 * embedding level as determined by the base direction is assumed.<p> 2671 * 2672 * The NUMERIC_SHAPING attribute in the text, if present, converts European 2673 * digits to other decimal digits before running the bidi algorithm. This 2674 * attribute, if present, must be applied to all the text in the paragraph. 2675 * 2676 * If the entire text is all of the same directionality, then 2677 * the method may not perform all the steps described by the algorithm, 2678 * i.e., some levels may not be the same as if all steps were performed. 2679 * This is not relevant for unidirectional text.<br> 2680 * For example, in pure LTR text with numbers the numbers would get 2681 * a resolved level of 2 higher than the surrounding text according to 2682 * the algorithm. This implementation may set all resolved levels to 2683 * the same value in such a case.<p> 2684 * 2685 * @param paragraph a paragraph of text with optional character and 2686 * paragraph attribute information 2687 * @stable ICU 3.8 2688 */ 2689 public void setPara(AttributedCharacterIterator paragraph) 2690 { 2691 byte paraLvl; 2692 char ch = paragraph.first(); 2693 Boolean runDirection = 2694 (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION); 2695 Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING); 2696 if (runDirection == null) { 2697 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 2698 } else { 2699 paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ? 2700 (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT; 2701 } 2702 2703 byte[] lvls = null; 2704 int len = paragraph.getEndIndex() - paragraph.getBeginIndex(); 2705 byte[] embeddingLevels = new byte[len]; 2706 char[] txt = new char[len]; 2707 int i = 0; 2708 while (ch != AttributedCharacterIterator.DONE) { 2709 txt[i] = ch; 2710 Integer embedding = 2711 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING); 2712 if (embedding != null) { 2713 byte level = embedding.byteValue(); 2714 if (level == 0) { 2715 /* no-op */ 2716 } else if (level < 0) { 2717 lvls = embeddingLevels; 2718 embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE); 2719 } else { 2720 lvls = embeddingLevels; 2721 embeddingLevels[i] = level; 2722 } 2723 } 2724 ch = paragraph.next(); 2725 ++i; 2726 } 2727 2728 if (shaper != null) { 2729 NumericShapings.shape(shaper, txt, 0, len); 2730 } 2731 setPara(txt, paraLvl, lvls); 2732 } 2733 2734 /** 2735 * Specify whether block separators must be allocated level zero, 2736 * so that successive paragraphs will progress from left to right. 2737 * This method must be called before <code>setPara()</code>. 2738 * Paragraph separators (B) may appear in the text. Setting them to level zero 2739 * means that all paragraph separators (including one possibly appearing 2740 * in the last text position) are kept in the reordered text after the text 2741 * that they follow in the source text. 2742 * When this feature is not enabled, a paragraph separator at the last 2743 * position of the text before reordering will go to the first position 2744 * of the reordered text when the paragraph level is odd. 2745 * 2746 * @param ordarParaLTR specifies whether paragraph separators (B) must 2747 * receive level 0, so that successive paragraphs progress from left to right. 2748 * 2749 * @see #setPara 2750 * @stable ICU 3.8 2751 */ 2752 private void orderParagraphsLTR(boolean ordarParaLTR) { 2753 orderParagraphsLTR = ordarParaLTR; 2754 } 2755 2756 /** 2757 * Get the directionality of the text. 2758 * 2759 * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code> 2760 * that indicates if the entire text 2761 * represented by this object is unidirectional, 2762 * and which direction, or if it is mixed-directional. 2763 * 2764 * @throws IllegalStateException if this call is not preceded by a successful 2765 * call to <code>setPara</code> or <code>setLine</code> 2766 * 2767 * @see #LTR 2768 * @see #RTL 2769 * @see #MIXED 2770 * @stable ICU 3.8 2771 */ 2772 private byte getDirection() 2773 { 2774 verifyValidParaOrLine(); 2775 return direction; 2776 } 2777 2778 /** 2779 * Get the length of the text. 2780 * 2781 * @return The length of the text that the <code>Bidi</code> object was 2782 * created for. 2783 * 2784 * @throws IllegalStateException if this call is not preceded by a successful 2785 * call to <code>setPara</code> or <code>setLine</code> 2786 * @stable ICU 3.8 2787 */ 2788 public int getLength() 2789 { 2790 verifyValidParaOrLine(); 2791 return originalLength; 2792 } 2793 2794 /* paragraphs API methods ------------------------------------------------- */ 2795 2796 /** 2797 * Get the paragraph level of the text. 2798 * 2799 * @return The paragraph level. If there are multiple paragraphs, their 2800 * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or 2801 * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph 2802 * is returned. 2803 * 2804 * @throws IllegalStateException if this call is not preceded by a successful 2805 * call to <code>setPara</code> or <code>setLine</code> 2806 * 2807 * @see #LEVEL_DEFAULT_LTR 2808 * @see #LEVEL_DEFAULT_RTL 2809 * @see #getParagraph 2810 * @see #getParagraphByIndex 2811 * @stable ICU 3.8 2812 */ 2813 public byte getParaLevel() 2814 { 2815 verifyValidParaOrLine(); 2816 return paraLevel; 2817 } 2818 2819 /** 2820 * Get the index of a paragraph, given a position within the text.<p> 2821 * 2822 * @param charIndex is the index of a character within the text, in the 2823 * range <code>[0..getProcessedLength()-1]</code>. 2824 * 2825 * @return The index of the paragraph containing the specified position, 2826 * starting from 0. 2827 * 2828 * @throws IllegalStateException if this call is not preceded by a successful 2829 * call to <code>setPara</code> or <code>setLine</code> 2830 * @throws IllegalArgumentException if charIndex is not within the legal range 2831 * 2832 * @see com.ibm.icu.text.BidiRun 2833 * @see #getProcessedLength 2834 * @stable ICU 3.8 2835 */ 2836 public int getParagraphIndex(int charIndex) 2837 { 2838 verifyValidParaOrLine(); 2839 BidiBase bidi = paraBidi; /* get Para object if Line object */ 2840 verifyRange(charIndex, 0, bidi.length); 2841 int paraIndex; 2842 for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) { 2843 } 2844 return paraIndex; 2845 } 2846 2847 /** 2848 * <code>setLine()</code> returns a <code>Bidi</code> object to 2849 * contain the reordering information, especially the resolved levels, 2850 * for all the characters in a line of text. This line of text is 2851 * specified by referring to a <code>Bidi</code> object representing 2852 * this information for a piece of text containing one or more paragraphs, 2853 * and by specifying a range of indexes in this text.<p> 2854 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 2855 * 2856 * This is used after calling <code>setPara()</code> 2857 * for a piece of text, and after line-breaking on that text. 2858 * It is not necessary if each paragraph is treated as a single line.<p> 2859 * 2860 * After line-breaking, rules (L1) and (L2) for the treatment of 2861 * trailing WS and for reordering are performed on 2862 * a <code>Bidi</code> object that represents a line.<p> 2863 * 2864 * <strong>Important: </strong>the line <code>Bidi</code> object may 2865 * reference data within the global text <code>Bidi</code> object. 2866 * You should not alter the content of the global text object until 2867 * you are finished using the line object. 2868 * 2869 * @param start is the line's first index into the text. 2870 * 2871 * @param limit is just behind the line's last index into the text 2872 * (its last index +1). 2873 * 2874 * @return a <code>Bidi</code> object that will now represent a line of the text. 2875 * 2876 * @throws IllegalStateException if this call is not preceded by a successful 2877 * call to <code>setPara</code> 2878 * @throws IllegalArgumentException if start and limit are not in the range 2879 * <code>0<=start<limit<=getProcessedLength()</code>, 2880 * or if the specified line crosses a paragraph boundary 2881 * 2882 * @see #setPara 2883 * @see #getProcessedLength 2884 * @stable ICU 3.8 2885 */ 2886 public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit) 2887 { 2888 verifyValidPara(); 2889 verifyRange(start, 0, limit); 2890 verifyRange(limit, 0, length+1); 2891 2892 return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit); 2893 } 2894 2895 /** 2896 * Get the level for one character. 2897 * 2898 * @param charIndex the index of a character. 2899 * 2900 * @return The level for the character at <code>charIndex</code>. 2901 * 2902 * @throws IllegalStateException if this call is not preceded by a successful 2903 * call to <code>setPara</code> or <code>setLine</code> 2904 * @throws IllegalArgumentException if charIndex is not in the range 2905 * <code>0<=charIndex<getProcessedLength()</code> 2906 * 2907 * @see #getProcessedLength 2908 * @stable ICU 3.8 2909 */ 2910 public byte getLevelAt(int charIndex) 2911 { 2912 if (charIndex < 0 || charIndex >= length) { 2913 return (byte)getBaseLevel(); 2914 } 2915 verifyValidParaOrLine(); 2916 verifyRange(charIndex, 0, length); 2917 return BidiLine.getLevelAt(this, charIndex); 2918 } 2919 2920 /** 2921 * Get an array of levels for each character.<p> 2922 * 2923 * Note that this method may allocate memory under some 2924 * circumstances, unlike <code>getLevelAt()</code>. 2925 * 2926 * @return The levels array for the text, 2927 * or <code>null</code> if an error occurs. 2928 * 2929 * @throws IllegalStateException if this call is not preceded by a successful 2930 * call to <code>setPara</code> or <code>setLine</code> 2931 * @stable ICU 3.8 2932 */ 2933 private byte[] getLevels() 2934 { 2935 verifyValidParaOrLine(); 2936 if (length <= 0) { 2937 return new byte[0]; 2938 } 2939 return BidiLine.getLevels(this); 2940 } 2941 2942 /** 2943 * Get the number of runs. 2944 * This method may invoke the actual reordering on the 2945 * <code>Bidi</code> object, after <code>setPara()</code> 2946 * may have resolved only the levels of the text. Therefore, 2947 * <code>countRuns()</code> may have to allocate memory, 2948 * and may throw an exception if it fails to do so. 2949 * 2950 * @return The number of runs. 2951 * 2952 * @throws IllegalStateException if this call is not preceded by a successful 2953 * call to <code>setPara</code> or <code>setLine</code> 2954 * @stable ICU 3.8 2955 */ 2956 public int countRuns() 2957 { 2958 verifyValidParaOrLine(); 2959 BidiLine.getRuns(this); 2960 return runCount; 2961 } 2962 2963 /** 2964 * Get a visual-to-logical index map (array) for the characters in the 2965 * <code>Bidi</code> (paragraph or line) object. 2966 * <p> 2967 * Some values in the map may be <code>MAP_NOWHERE</code> if the 2968 * corresponding text characters are Bidi marks inserted in the visual 2969 * output by the option <code>OPTION_INSERT_MARKS</code>. 2970 * <p> 2971 * When the visual output is altered by using options of 2972 * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>, 2973 * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>, 2974 * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 2975 * be correct. It is advised to use, when possible, reordering options 2976 * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}. 2977 * 2978 * @return an array of <code>getResultLength()</code> 2979 * indexes which will reflect the reordering of the characters.<br><br> 2980 * The index map will result in 2981 * <code>indexMap[visualIndex]==logicalIndex</code>, where 2982 * <code>indexMap</code> represents the returned array. 2983 * 2984 * @throws IllegalStateException if this call is not preceded by a successful 2985 * call to <code>setPara</code> or <code>setLine</code> 2986 * 2987 * @see #getLogicalMap 2988 * @see #getLogicalIndex 2989 * @see #getResultLength 2990 * @see #MAP_NOWHERE 2991 * @see #OPTION_INSERT_MARKS 2992 * @see #writeReordered 2993 * @stable ICU 3.8 2994 */ 2995 private int[] getVisualMap() 2996 { 2997 /* countRuns() checks successful call to setPara/setLine */ 2998 countRuns(); 2999 if (resultLength <= 0) { 3000 return new int[0]; 3001 } 3002 return BidiLine.getVisualMap(this); 3003 } 3004 3005 /** 3006 * This is a convenience method that does not use a <code>Bidi</code> object. 3007 * It is intended to be used for when an application has determined the levels 3008 * of objects (character sequences) and just needs to have them reordered (L2). 3009 * This is equivalent to using <code>getVisualMap()</code> on a 3010 * <code>Bidi</code> object. 3011 * 3012 * @param levels is an array of levels that have been determined by 3013 * the application. 3014 * 3015 * @return an array of <code>levels.length</code> 3016 * indexes which will reflect the reordering of the characters.<p> 3017 * The index map will result in 3018 * <code>indexMap[visualIndex]==logicalIndex</code>, where 3019 * <code>indexMap</code> represents the returned array. 3020 * 3021 * @stable ICU 3.8 3022 */ 3023 private static int[] reorderVisual(byte[] levels) 3024 { 3025 return BidiLine.reorderVisual(levels); 3026 } 3027 3028 /** 3029 * Constant indicating that the base direction depends on the first strong 3030 * directional character in the text according to the Unicode Bidirectional 3031 * Algorithm. If no strong directional character is present, the base 3032 * direction is left-to-right. 3033 * @stable ICU 3.8 3034 */ 3035 private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e; 3036 3037 /** 3038 * Constant indicating that the base direction depends on the first strong 3039 * directional character in the text according to the Unicode Bidirectional 3040 * Algorithm. If no strong directional character is present, the base 3041 * direction is right-to-left. 3042 * @stable ICU 3.8 3043 */ 3044 private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f; 3045 3046 /** 3047 * Create Bidi from the given text, embedding, and direction information. 3048 * The embeddings array may be null. If present, the values represent 3049 * embedding level information. Negative values from -1 to -61 indicate 3050 * overrides at the absolute value of the level. Positive values from 1 to 3051 * 61 indicate embeddings. Where values are zero, the base embedding level 3052 * as determined by the base direction is assumed.<p> 3053 * 3054 * Note: this constructor calls setPara() internally. 3055 * 3056 * @param text an array containing the paragraph of text to process. 3057 * @param textStart the index into the text array of the start of the 3058 * paragraph. 3059 * @param embeddings an array containing embedding values for each character 3060 * in the paragraph. This can be null, in which case it is assumed 3061 * that there is no external embedding information. 3062 * @param embStart the index into the embedding array of the start of the 3063 * paragraph. 3064 * @param paragraphLength the length of the paragraph in the text and 3065 * embeddings arrays. 3066 * @param flags a collection of flags that control the algorithm. The 3067 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, 3068 * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and 3069 * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved. 3070 * 3071 * @throws IllegalArgumentException if the values in embeddings are 3072 * not within the allowed range 3073 * 3074 * @see #DIRECTION_LEFT_TO_RIGHT 3075 * @see #DIRECTION_RIGHT_TO_LEFT 3076 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 3077 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 3078 * @stable ICU 3.8 3079 */ 3080 public BidiBase(char[] text, 3081 int textStart, 3082 byte[] embeddings, 3083 int embStart, 3084 int paragraphLength, 3085 int flags) 3086 { 3087 this(0, 0); 3088 byte paraLvl; 3089 switch (flags) { 3090 case Bidi.DIRECTION_LEFT_TO_RIGHT: 3091 default: 3092 paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT; 3093 break; 3094 case Bidi.DIRECTION_RIGHT_TO_LEFT: 3095 paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT; 3096 break; 3097 case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT: 3098 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 3099 break; 3100 case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT: 3101 paraLvl = INTERNAL_LEVEL_DEFAULT_RTL; 3102 break; 3103 } 3104 byte[] paraEmbeddings; 3105 if (embeddings == null) { 3106 paraEmbeddings = null; 3107 } else { 3108 paraEmbeddings = new byte[paragraphLength]; 3109 byte lev; 3110 for (int i = 0; i < paragraphLength; i++) { 3111 lev = embeddings[i + embStart]; 3112 if (lev < 0) { 3113 lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE); 3114 } else if (lev == 0) { 3115 lev = paraLvl; 3116 if (paraLvl > MAX_EXPLICIT_LEVEL) { 3117 lev &= 1; 3118 } 3119 } 3120 paraEmbeddings[i] = lev; 3121 } 3122 } 3123 if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { 3124 setPara(text, paraLvl, paraEmbeddings); 3125 } else { 3126 char[] paraText = new char[paragraphLength]; 3127 System.arraycopy(text, textStart, paraText, 0, paragraphLength); 3128 setPara(paraText, paraLvl, paraEmbeddings); 3129 } 3130 } 3131 3132 /** 3133 * Return true if the line is not left-to-right or right-to-left. This means 3134 * it either has mixed runs of left-to-right and right-to-left text, or the 3135 * base direction differs from the direction of the only run of text. 3136 * 3137 * @return true if the line is not left-to-right or right-to-left. 3138 * 3139 * @throws IllegalStateException if this call is not preceded by a successful 3140 * call to <code>setPara</code> 3141 * @stable ICU 3.8 3142 */ 3143 public boolean isMixed() 3144 { 3145 return (!isLeftToRight() && !isRightToLeft()); 3146 } 3147 3148 /** 3149 * Return true if the line is all left-to-right text and the base direction 3150 * is left-to-right. 3151 * 3152 * @return true if the line is all left-to-right text and the base direction 3153 * is left-to-right. 3154 * 3155 * @throws IllegalStateException if this call is not preceded by a successful 3156 * call to <code>setPara</code> 3157 * @stable ICU 3.8 3158 */ 3159 public boolean isLeftToRight() 3160 { 3161 return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0); 3162 } 3163 3164 /** 3165 * Return true if the line is all right-to-left text, and the base direction 3166 * is right-to-left 3167 * 3168 * @return true if the line is all right-to-left text, and the base 3169 * direction is right-to-left 3170 * 3171 * @throws IllegalStateException if this call is not preceded by a successful 3172 * call to <code>setPara</code> 3173 * @stable ICU 3.8 3174 */ 3175 public boolean isRightToLeft() 3176 { 3177 return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1); 3178 } 3179 3180 /** 3181 * Return true if the base direction is left-to-right 3182 * 3183 * @return true if the base direction is left-to-right 3184 * 3185 * @throws IllegalStateException if this call is not preceded by a successful 3186 * call to <code>setPara</code> or <code>setLine</code> 3187 * 3188 * @stable ICU 3.8 3189 */ 3190 public boolean baseIsLeftToRight() 3191 { 3192 return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT); 3193 } 3194 3195 /** 3196 * Return the base level (0 if left-to-right, 1 if right-to-left). 3197 * 3198 * @return the base level 3199 * 3200 * @throws IllegalStateException if this call is not preceded by a successful 3201 * call to <code>setPara</code> or <code>setLine</code> 3202 * 3203 * @stable ICU 3.8 3204 */ 3205 public int getBaseLevel() 3206 { 3207 return getParaLevel(); 3208 } 3209 3210 /** 3211 * Compute the logical to visual run mapping 3212 */ 3213 private void getLogicalToVisualRunsMap() 3214 { 3215 if (isGoodLogicalToVisualRunsMap) { 3216 return; 3217 } 3218 int count = countRuns(); 3219 if ((logicalToVisualRunsMap == null) || 3220 (logicalToVisualRunsMap.length < count)) { 3221 logicalToVisualRunsMap = new int[count]; 3222 } 3223 int i; 3224 long[] keys = new long[count]; 3225 for (i = 0; i < count; i++) { 3226 keys[i] = ((long)(runs[i].start)<<32) + i; 3227 } 3228 Arrays.sort(keys); 3229 for (i = 0; i < count; i++) { 3230 logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF); 3231 } 3232 keys = null; 3233 isGoodLogicalToVisualRunsMap = true; 3234 } 3235 3236 /** 3237 * Return the level of the nth logical run in this line. 3238 * 3239 * @param run the index of the run, between 0 and <code>countRuns()-1</code> 3240 * 3241 * @return the level of the run 3242 * 3243 * @throws IllegalStateException if this call is not preceded by a successful 3244 * call to <code>setPara</code> or <code>setLine</code> 3245 * @throws IllegalArgumentException if <code>run</code> is not in 3246 * the range <code>0<=run<countRuns()</code> 3247 * @stable ICU 3.8 3248 */ 3249 public int getRunLevel(int run) 3250 { 3251 verifyValidParaOrLine(); 3252 BidiLine.getRuns(this); 3253 if (run < 0 || run >= runCount) { 3254 return getParaLevel(); 3255 } 3256 getLogicalToVisualRunsMap(); 3257 return runs[logicalToVisualRunsMap[run]].level; 3258 } 3259 3260 /** 3261 * Return the index of the character at the start of the nth logical run in 3262 * this line, as an offset from the start of the line. 3263 * 3264 * @param run the index of the run, between 0 and <code>countRuns()</code> 3265 * 3266 * @return the start of the run 3267 * 3268 * @throws IllegalStateException if this call is not preceded by a successful 3269 * call to <code>setPara</code> or <code>setLine</code> 3270 * @throws IllegalArgumentException if <code>run</code> is not in 3271 * the range <code>0<=run<countRuns()</code> 3272 * @stable ICU 3.8 3273 */ 3274 public int getRunStart(int run) 3275 { 3276 verifyValidParaOrLine(); 3277 BidiLine.getRuns(this); 3278 if (runCount == 1) { 3279 return 0; 3280 } else if (run == runCount) { 3281 return length; 3282 } 3283 verifyIndex(run, 0, runCount); 3284 getLogicalToVisualRunsMap(); 3285 return runs[logicalToVisualRunsMap[run]].start; 3286 } 3287 3288 /** 3289 * Return the index of the character past the end of the nth logical run in 3290 * this line, as an offset from the start of the line. For example, this 3291 * will return the length of the line for the last run on the line. 3292 * 3293 * @param run the index of the run, between 0 and <code>countRuns()</code> 3294 * 3295 * @return the limit of the run 3296 * 3297 * @throws IllegalStateException if this call is not preceded by a successful 3298 * call to <code>setPara</code> or <code>setLine</code> 3299 * @throws IllegalArgumentException if <code>run</code> is not in 3300 * the range <code>0<=run<countRuns()</code> 3301 * @stable ICU 3.8 3302 */ 3303 public int getRunLimit(int run) 3304 { 3305 verifyValidParaOrLine(); 3306 BidiLine.getRuns(this); 3307 if (runCount == 1) { 3308 return length; 3309 } 3310 verifyIndex(run, 0, runCount); 3311 getLogicalToVisualRunsMap(); 3312 int idx = logicalToVisualRunsMap[run]; 3313 int len = idx == 0 ? runs[idx].limit : 3314 runs[idx].limit - runs[idx-1].limit; 3315 return runs[idx].start + len; 3316 } 3317 3318 /** 3319 * Return true if the specified text requires bidi analysis. If this returns 3320 * false, the text will display left-to-right. Clients can then avoid 3321 * constructing a Bidi object. Text in the Arabic Presentation Forms area of 3322 * Unicode is presumed to already be shaped and ordered for display, and so 3323 * will not cause this method to return true. 3324 * 3325 * @param text the text containing the characters to test 3326 * @param start the start of the range of characters to test 3327 * @param limit the limit of the range of characters to test 3328 * 3329 * @return true if the range of characters requires bidi analysis 3330 * 3331 * @stable ICU 3.8 3332 */ 3333 public static boolean requiresBidi(char[] text, 3334 int start, 3335 int limit) 3336 { 3337 final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT | 3338 1 << AL | 3339 1 << RLE | 3340 1 << RLO | 3341 1 << AN); 3342 3343 if (0 > start || start > limit || limit > text.length) { 3344 throw new IllegalArgumentException("Value start " + start + 3345 " is out of range 0 to " + limit); 3346 } 3347 for (int i = start; i < limit; ++i) { 3348 if (Character.isHighSurrogate(text[i]) && i < (limit-1) && 3349 Character.isLowSurrogate(text[i+1])) { 3350 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) { 3351 return true; 3352 } 3353 } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) { 3354 return true; 3355 } 3356 } 3357 return false; 3358 } 3359 3360 /** 3361 * Reorder the objects in the array into visual order based on their levels. 3362 * This is a utility method to use when you have a collection of objects 3363 * representing runs of text in logical order, each run containing text at a 3364 * single level. The elements at <code>index</code> from 3365 * <code>objectStart</code> up to <code>objectStart + count</code> in the 3366 * objects array will be reordered into visual order assuming 3367 * each run of text has the level indicated by the corresponding element in 3368 * the levels array (at <code>index - objectStart + levelStart</code>). 3369 * 3370 * @param levels an array representing the bidi level of each object 3371 * @param levelStart the start position in the levels array 3372 * @param objects the array of objects to be reordered into visual order 3373 * @param objectStart the start position in the objects array 3374 * @param count the number of objects to reorder 3375 * @stable ICU 3.8 3376 */ 3377 public static void reorderVisually(byte[] levels, 3378 int levelStart, 3379 Object[] objects, 3380 int objectStart, 3381 int count) 3382 { 3383 if (0 > levelStart || levels.length <= levelStart) { 3384 throw new IllegalArgumentException("Value levelStart " + 3385 levelStart + " is out of range 0 to " + 3386 (levels.length-1)); 3387 } 3388 if (0 > objectStart || objects.length <= objectStart) { 3389 throw new IllegalArgumentException("Value objectStart " + 3390 levelStart + " is out of range 0 to " + 3391 (objects.length-1)); 3392 } 3393 if (0 > count || objects.length < (objectStart+count)) { 3394 throw new IllegalArgumentException("Value count " + 3395 levelStart + " is out of range 0 to " + 3396 (objects.length - objectStart)); 3397 } 3398 byte[] reorderLevels = new byte[count]; 3399 System.arraycopy(levels, levelStart, reorderLevels, 0, count); 3400 int[] indexMap = reorderVisual(reorderLevels); 3401 Object[] temp = new Object[count]; 3402 System.arraycopy(objects, objectStart, temp, 0, count); 3403 for (int i = 0; i < count; ++i) { 3404 objects[objectStart + i] = temp[indexMap[i]]; 3405 } 3406 } 3407 3408 /** 3409 * Display the bidi internal state, used in debugging. 3410 */ 3411 public String toString() { 3412 StringBuilder buf = new StringBuilder(getClass().getName()); 3413 3414 buf.append("[dir: "); 3415 buf.append(direction); 3416 buf.append(" baselevel: "); 3417 buf.append(paraLevel); 3418 buf.append(" length: "); 3419 buf.append(length); 3420 buf.append(" runs: "); 3421 if (levels == null) { 3422 buf.append("none"); 3423 } else { 3424 buf.append('['); 3425 buf.append(levels[0]); 3426 for (int i = 1; i < levels.length; i++) { 3427 buf.append(' '); 3428 buf.append(levels[i]); 3429 } 3430 buf.append(']'); 3431 } 3432 buf.append(" text: [0x"); 3433 buf.append(Integer.toHexString(text[0])); 3434 for (int i = 1; i < text.length; i++) { 3435 buf.append(" 0x"); 3436 buf.append(Integer.toHexString(text[i])); 3437 } 3438 buf.append("]]"); 3439 3440 return buf.toString(); 3441 } 3442 3443 /** 3444 * A class that provides access to constants defined by 3445 * java.awt.font.TextAttribute without creating a static dependency. 3446 */ 3447 private static class TextAttributeConstants { 3448 // Make sure to load the AWT's TextAttribute class before using the constants, if any. 3449 static { 3450 try { 3451 Class.forName("java.awt.font.TextAttribute", true, null); 3452 } catch (ClassNotFoundException e) {} 3453 } 3454 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 3455 3456 /** 3457 * TextAttribute instances (or a fake Attribute type if 3458 * java.awt.font.TextAttribute is not present) 3459 */ 3460 static final AttributedCharacterIterator.Attribute RUN_DIRECTION = 3461 getTextAttribute("RUN_DIRECTION"); 3462 static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING = 3463 getTextAttribute("NUMERIC_SHAPING"); 3464 static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING = 3465 getTextAttribute("BIDI_EMBEDDING"); 3466 3467 /** 3468 * TextAttribute.RUN_DIRECTION_LTR 3469 */ 3470 static final Boolean RUN_DIRECTION_LTR = (jafa == null) ? 3471 Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR"); 3472 3473 @SuppressWarnings("serial") 3474 private static AttributedCharacterIterator.Attribute 3475 getTextAttribute(String name) 3476 { 3477 if (jafa == null) { 3478 // fake attribute 3479 return new AttributedCharacterIterator.Attribute(name) { }; 3480 } else { 3481 return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name); 3482 } 3483 } 3484 } 3485 3486 /** 3487 * A class that provides access to java.awt.font.NumericShaper without 3488 * creating a static dependency. 3489 */ 3490 private static class NumericShapings { 3491 // Make sure to load the AWT's NumericShaper class before calling shape, if any. 3492 static { 3493 try { 3494 Class.forName("java.awt.font.NumericShaper", true, null); 3495 } catch (ClassNotFoundException e) {} 3496 } 3497 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 3498 3499 /** 3500 * Invokes NumericShaping shape(text,start,count) method. 3501 */ 3502 static void shape(Object shaper, char[] text, int start, int count) { 3503 if (jafa != null) { 3504 jafa.shape(shaper, text, start, count); 3505 } 3506 } 3507 } 3508 }