1 /* 2 * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 /* 26 ******************************************************************************* 27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * 28 * * 29 * The original version of this source code and documentation is copyrighted * 30 * and owned by IBM, These materials are provided under terms of a License * 31 * Agreement between IBM and Sun. This technology is protected by multiple * 32 * US and International patents. This notice and attribution to IBM may not * 33 * to removed. * 34 ******************************************************************************* 35 */ 36 37 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 38 * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre 39 * concept of RUNS_ONLY which is a double operation. 40 * It could be advantageous to divide this into 3 concepts: 41 * a) Operation: direct / inverse / RUNS_ONLY 42 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L 43 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 44 * This would allow combinations not possible today like RUNS_ONLY with 45 * NUMBERS_SPECIAL. 46 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 47 * REMOVE_CONTROLS for the inverse step. 48 * Not all combinations would be supported, and probably not all do make sense. 49 * This would need to document which ones are supported and what are the 50 * fallbacks for unsupported combinations. 51 */ 52 53 package sun.text.bidi; 54 55 import java.io.IOException; 56 import java.lang.reflect.Array; 57 import java.text.AttributedCharacterIterator; 58 import java.text.Bidi; 59 import java.util.Arrays; 60 import java.util.MissingResourceException; 61 import sun.misc.JavaAWTFontAccess; 62 import sun.misc.SharedSecrets; 63 import sun.text.normalizer.UBiDiProps; 64 import sun.text.normalizer.UCharacter; 65 import sun.text.normalizer.UTF16; 66 67 /** 68 * 69 * <h2>Bidi algorithm for ICU</h2> 70 * 71 * This is an implementation of the Unicode Bidirectional algorithm. The 72 * algorithm is defined in the <a 73 * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 74 * version 13, also described in The Unicode Standard, Version 4.0 . 75 * <p> 76 * 77 * Note: Libraries that perform a bidirectional algorithm and reorder strings 78 * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and 79 * shaping (ArabicShaping) classes can be used at the core of such "Storage 80 * Layout Engines". 81 * 82 * <h3>General remarks about the API:</h3> 83 * 84 * The "limit" of a sequence of characters is the position just after 85 * their last character, i.e., one more than that position. 86 * <p> 87 * 88 * Some of the API methods provide access to "runs". Such a 89 * "run" is defined as a sequence of characters that are at the same 90 * embedding level after performing the Bidi algorithm. 91 * 92 * <h3>Basic concept: paragraph</h3> 93 * A piece of text can be divided into several paragraphs by characters 94 * with the Bidi class <code>Block Separator</code>. For handling of 95 * paragraphs, see: 96 * <ul> 97 * <li>{@link #countParagraphs} 98 * <li>{@link #getParaLevel} 99 * <li>{@link #getParagraph} 100 * <li>{@link #getParagraphByIndex} 101 * </ul> 102 * 103 * <h3>Basic concept: text direction</h3> 104 * The direction of a piece of text may be: 105 * <ul> 106 * <li>{@link #LTR} 107 * <li>{@link #RTL} 108 * <li>{@link #MIXED} 109 * </ul> 110 * 111 * <h3>Basic concept: levels</h3> 112 * 113 * Levels in this API represent embedding levels according to the Unicode 114 * Bidirectional Algorithm. 115 * Their low-order bit (even/odd value) indicates the visual direction.<p> 116 * 117 * Levels can be abstract values when used for the 118 * <code>paraLevel</code> and <code>embeddingLevels</code> 119 * arguments of <code>setPara()</code>; there: 120 * <ul> 121 * <li>the high-order bit of an <code>embeddingLevels[]</code> 122 * value indicates whether the using application is 123 * specifying the level of a character to <i>override</i> whatever the 124 * Bidi implementation would resolve it to.</li> 125 * <li><code>paraLevel</code> can be set to the 126 * pseudo-level values <code>LEVEL_DEFAULT_LTR</code> 127 * and <code>LEVEL_DEFAULT_RTL</code>.</li> 128 * </ul> 129 * 130 * <p>The related constants are not real, valid level values. 131 * <code>DEFAULT_XXX</code> can be used to specify 132 * a default for the paragraph level for 133 * when the <code>setPara()</code> method 134 * shall determine it but there is no 135 * strongly typed character in the input.<p> 136 * 137 * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even 138 * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd, 139 * just like with normal LTR and RTL level values - 140 * these special values are designed that way. Also, the implementation 141 * assumes that MAX_EXPLICIT_LEVEL is odd. 142 * 143 * <p><b>See Also:</b> 144 * <ul> 145 * <li>{@link #LEVEL_DEFAULT_LTR} 146 * <li>{@link #LEVEL_DEFAULT_RTL} 147 * <li>{@link #LEVEL_OVERRIDE} 148 * <li>{@link #MAX_EXPLICIT_LEVEL} 149 * <li>{@link #setPara} 150 * </ul> 151 * 152 * <h3>Basic concept: Reordering Mode</h3> 153 * Reordering mode values indicate which variant of the Bidi algorithm to 154 * use. 155 * 156 * <p><b>See Also:</b> 157 * <ul> 158 * <li>{@link #setReorderingMode} 159 * <li>{@link #REORDER_DEFAULT} 160 * <li>{@link #REORDER_NUMBERS_SPECIAL} 161 * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R} 162 * <li>{@link #REORDER_RUNS_ONLY} 163 * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L} 164 * <li>{@link #REORDER_INVERSE_LIKE_DIRECT} 165 * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL} 166 * </ul> 167 * 168 * <h3>Basic concept: Reordering Options</h3> 169 * Reordering options can be applied during Bidi text transformations. 170 * <p><b>See Also:</b> 171 * <ul> 172 * <li>{@link #setReorderingOptions} 173 * <li>{@link #OPTION_DEFAULT} 174 * <li>{@link #OPTION_INSERT_MARKS} 175 * <li>{@link #OPTION_REMOVE_CONTROLS} 176 * <li>{@link #OPTION_STREAMING} 177 * </ul> 178 * 179 * 180 * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer) 181 * @stable ICU 3.8 182 * 183 * 184 * <h4> Sample code for the ICU Bidi API </h4> 185 * 186 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 187 * 188 * This is (hypothetical) sample code that illustrates how the ICU Bidi API 189 * could be used to render a paragraph of text. Rendering code depends highly on 190 * the graphics system, therefore this sample code must make a lot of 191 * assumptions, which may or may not match any existing graphics system's 192 * properties. 193 * 194 * <p> 195 * The basic assumptions are: 196 * </p> 197 * <ul> 198 * <li>Rendering is done from left to right on a horizontal line.</li> 199 * <li>A run of single-style, unidirectional text can be rendered at once. 200 * </li> 201 * <li>Such a run of text is passed to the graphics system with characters 202 * (code units) in logical order.</li> 203 * <li>The line-breaking algorithm is very complicated and Locale-dependent - 204 * and therefore its implementation omitted from this sample code.</li> 205 * </ul> 206 * 207 * <pre>{@code 208 * 209 * package com.ibm.icu.dev.test.bidi; 210 * 211 * import com.ibm.icu.text.Bidi; 212 * import com.ibm.icu.text.BidiRun; 213 * 214 * public class Sample { 215 * 216 * static final int styleNormal = 0; 217 * static final int styleSelected = 1; 218 * static final int styleBold = 2; 219 * static final int styleItalics = 4; 220 * static final int styleSuper=8; 221 * static final int styleSub = 16; 222 * 223 * static class StyleRun { 224 * int limit; 225 * int style; 226 * 227 * public StyleRun(int limit, int style) { 228 * this.limit = limit; 229 * this.style = style; 230 * } 231 * } 232 * 233 * static class Bounds { 234 * int start; 235 * int limit; 236 * 237 * public Bounds(int start, int limit) { 238 * this.start = start; 239 * this.limit = limit; 240 * } 241 * } 242 * 243 * static int getTextWidth(String text, int start, int limit, 244 * StyleRun[] styleRuns, int styleRunCount) { 245 * // simplistic way to compute the width 246 * return limit - start; 247 * } 248 * 249 * // set limit and StyleRun limit for a line 250 * // from text[start] and from styleRuns[styleRunStart] 251 * // using Bidi.getLogicalRun(...) 252 * // returns line width 253 * static int getLineBreak(String text, Bounds line, Bidi para, 254 * StyleRun styleRuns[], Bounds styleRun) { 255 * // dummy return 256 * return 0; 257 * } 258 * 259 * // render runs on a line sequentially, always from left to right 260 * 261 * // prepare rendering a new line 262 * static void startLine(byte textDirection, int lineWidth) { 263 * System.out.println(); 264 * } 265 * 266 * // render a run of text and advance to the right by the run width 267 * // the text[start..limit-1] is always in logical order 268 * static void renderRun(String text, int start, int limit, 269 * byte textDirection, int style) { 270 * } 271 * 272 * // We could compute a cross-product 273 * // from the style runs with the directional runs 274 * // and then reorder it. 275 * // Instead, here we iterate over each run type 276 * // and render the intersections - 277 * // with shortcuts in simple (and common) cases. 278 * // renderParagraph() is the main function. 279 * 280 * // render a directional run with 281 * // (possibly) multiple style runs intersecting with it 282 * static void renderDirectionalRun(String text, int start, int limit, 283 * byte direction, StyleRun styleRuns[], 284 * int styleRunCount) { 285 * int i; 286 * 287 * // iterate over style runs 288 * if (direction == Bidi.LTR) { 289 * int styleLimit; 290 * for (i = 0; i < styleRunCount; ++i) { 291 * styleLimit = styleRuns[i].limit; 292 * if (start < styleLimit) { 293 * if (styleLimit > limit) { 294 * styleLimit = limit; 295 * } 296 * renderRun(text, start, styleLimit, 297 * direction, styleRuns[i].style); 298 * if (styleLimit == limit) { 299 * break; 300 * } 301 * start = styleLimit; 302 * } 303 * } 304 * } else { 305 * int styleStart; 306 * 307 * for (i = styleRunCount-1; i >= 0; --i) { 308 * if (i > 0) { 309 * styleStart = styleRuns[i-1].limit; 310 * } else { 311 * styleStart = 0; 312 * } 313 * if (limit >= styleStart) { 314 * if (styleStart < start) { 315 * styleStart = start; 316 * } 317 * renderRun(text, styleStart, limit, direction, 318 * styleRuns[i].style); 319 * if (styleStart == start) { 320 * break; 321 * } 322 * limit = styleStart; 323 * } 324 * } 325 * } 326 * } 327 * 328 * // the line object represents text[start..limit-1] 329 * static void renderLine(Bidi line, String text, int start, int limit, 330 * StyleRun styleRuns[], int styleRunCount) { 331 * byte direction = line.getDirection(); 332 * if (direction != Bidi.MIXED) { 333 * // unidirectional 334 * if (styleRunCount <= 1) { 335 * renderRun(text, start, limit, direction, styleRuns[0].style); 336 * } else { 337 * renderDirectionalRun(text, start, limit, direction, 338 * styleRuns, styleRunCount); 339 * } 340 * } else { 341 * // mixed-directional 342 * int count, i; 343 * BidiRun run; 344 * 345 * try { 346 * count = line.countRuns(); 347 * } catch (IllegalStateException e) { 348 * e.printStackTrace(); 349 * return; 350 * } 351 * if (styleRunCount <= 1) { 352 * int style = styleRuns[0].style; 353 * 354 * // iterate over directional runs 355 * for (i = 0; i < count; ++i) { 356 * run = line.getVisualRun(i); 357 * renderRun(text, run.getStart(), run.getLimit(), 358 * run.getDirection(), style); 359 * } 360 * } else { 361 * // iterate over both directional and style runs 362 * for (i = 0; i < count; ++i) { 363 * run = line.getVisualRun(i); 364 * renderDirectionalRun(text, run.getStart(), 365 * run.getLimit(), run.getDirection(), 366 * styleRuns, styleRunCount); 367 * } 368 * } 369 * } 370 * } 371 * 372 * static void renderParagraph(String text, byte textDirection, 373 * StyleRun styleRuns[], int styleRunCount, 374 * int lineWidth) { 375 * int length = text.length(); 376 * Bidi para = new Bidi(); 377 * try { 378 * para.setPara(text, 379 * textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL 380 * : Bidi.LEVEL_DEFAULT_LTR, 381 * null); 382 * } catch (Exception e) { 383 * e.printStackTrace(); 384 * return; 385 * } 386 * byte paraLevel = (byte)(1 & para.getParaLevel()); 387 * StyleRun styleRun = new StyleRun(length, styleNormal); 388 * 389 * if (styleRuns == null || styleRunCount <= 0) { 390 * styleRuns = new StyleRun[1]; 391 * styleRunCount = 1; 392 * styleRuns[0] = styleRun; 393 * } 394 * // assume styleRuns[styleRunCount-1].limit>=length 395 * 396 * int width = getTextWidth(text, 0, length, styleRuns, styleRunCount); 397 * if (width <= lineWidth) { 398 * // everything fits onto one line 399 * 400 * // prepare rendering a new line from either left or right 401 * startLine(paraLevel, width); 402 * 403 * renderLine(para, text, 0, length, styleRuns, styleRunCount); 404 * } else { 405 * // we need to render several lines 406 * Bidi line = new Bidi(length, 0); 407 * int start = 0, limit; 408 * int styleRunStart = 0, styleRunLimit; 409 * 410 * for (;;) { 411 * limit = length; 412 * styleRunLimit = styleRunCount; 413 * width = getLineBreak(text, new Bounds(start, limit), 414 * para, styleRuns, 415 * new Bounds(styleRunStart, styleRunLimit)); 416 * try { 417 * line = para.setLine(start, limit); 418 * } catch (Exception e) { 419 * e.printStackTrace(); 420 * return; 421 * } 422 * // prepare rendering a new line 423 * // from either left or right 424 * startLine(paraLevel, width); 425 * 426 * if (styleRunStart > 0) { 427 * int newRunCount = styleRuns.length - styleRunStart; 428 * StyleRun[] newRuns = new StyleRun[newRunCount]; 429 * System.arraycopy(styleRuns, styleRunStart, newRuns, 0, 430 * newRunCount); 431 * renderLine(line, text, start, limit, newRuns, 432 * styleRunLimit - styleRunStart); 433 * } else { 434 * renderLine(line, text, start, limit, styleRuns, 435 * styleRunLimit - styleRunStart); 436 * } 437 * if (limit == length) { 438 * break; 439 * } 440 * start = limit; 441 * styleRunStart = styleRunLimit - 1; 442 * if (start >= styleRuns[styleRunStart].limit) { 443 * ++styleRunStart; 444 * } 445 * } 446 * } 447 * } 448 * 449 * public static void main(String[] args) 450 * { 451 * renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80); 452 * renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60); 453 * } 454 * } 455 * 456 * }</pre> 457 */ 458 459 public class BidiBase { 460 461 class Point { 462 int pos; /* position in text */ 463 int flag; /* flag for LRM/RLM, before/after */ 464 } 465 466 class InsertPoints { 467 int size; 468 int confirmed; 469 Point[] points = new Point[0]; 470 } 471 472 /** Paragraph level setting<p> 473 * 474 * Constant indicating that the base direction depends on the first strong 475 * directional character in the text according to the Unicode Bidirectional 476 * Algorithm. If no strong directional character is present, 477 * then set the paragraph level to 0 (left-to-right).<p> 478 * 479 * If this value is used in conjunction with reordering modes 480 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 481 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 482 * is assumed to be visual LTR, and the text after reordering is required 483 * to be the corresponding logical string with appropriate contextual 484 * direction. The direction of the result string will be RTL if either 485 * the righmost or leftmost strong character of the source text is RTL 486 * or Arabic Letter, the direction will be LTR otherwise.<p> 487 * 488 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 489 * be added at the beginning of the result string to ensure round trip 490 * (that the result string, when reordered back to visual, will produce 491 * the original source text). 492 * @see #REORDER_INVERSE_LIKE_DIRECT 493 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 494 * @stable ICU 3.8 495 */ 496 public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e; 497 498 /** Paragraph level setting<p> 499 * 500 * Constant indicating that the base direction depends on the first strong 501 * directional character in the text according to the Unicode Bidirectional 502 * Algorithm. If no strong directional character is present, 503 * then set the paragraph level to 1 (right-to-left).<p> 504 * 505 * If this value is used in conjunction with reordering modes 506 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 507 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 508 * is assumed to be visual LTR, and the text after reordering is required 509 * to be the corresponding logical string with appropriate contextual 510 * direction. The direction of the result string will be RTL if either 511 * the righmost or leftmost strong character of the source text is RTL 512 * or Arabic Letter, or if the text contains no strong character; 513 * the direction will be LTR otherwise.<p> 514 * 515 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 516 * be added at the beginning of the result string to ensure round trip 517 * (that the result string, when reordered back to visual, will produce 518 * the original source text). 519 * @see #REORDER_INVERSE_LIKE_DIRECT 520 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 521 * @stable ICU 3.8 522 */ 523 public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f; 524 525 /** 526 * Maximum explicit embedding level. 527 * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>). 528 * @stable ICU 3.8 529 */ 530 public static final byte MAX_EXPLICIT_LEVEL = 61; 531 532 /** 533 * Bit flag for level input. 534 * Overrides directional properties. 535 * @stable ICU 3.8 536 */ 537 public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80; 538 539 /** 540 * Special value which can be returned by the mapping methods when a 541 * logical index has no corresponding visual index or vice-versa. This may 542 * happen for the logical-to-visual mapping of a Bidi control when option 543 * <code>OPTION_REMOVE_CONTROLS</code> is 544 * specified. This can also happen for the visual-to-logical mapping of a 545 * Bidi mark (LRM or RLM) inserted by option 546 * <code>OPTION_INSERT_MARKS</code>. 547 * @see #getVisualIndex 548 * @see #getVisualMap 549 * @see #getLogicalIndex 550 * @see #getLogicalMap 551 * @see #OPTION_INSERT_MARKS 552 * @see #OPTION_REMOVE_CONTROLS 553 * @stable ICU 3.8 554 */ 555 public static final int MAP_NOWHERE = -1; 556 557 /** 558 * Mixed-directional text. 559 * @stable ICU 3.8 560 */ 561 public static final byte MIXED = 2; 562 563 /** 564 * option bit for writeReordered(): 565 * replace characters with the "mirrored" property in RTL runs 566 * by their mirror-image mappings 567 * 568 * @see #writeReordered 569 * @stable ICU 3.8 570 */ 571 public static final short DO_MIRRORING = 2; 572 573 /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode. 574 * @see #setReorderingMode 575 * @stable ICU 3.8 576 */ 577 private static final short REORDER_DEFAULT = 0; 578 579 /** Reordering mode: Logical to Visual algorithm which handles numbers in 580 * a way which mimicks the behavior of Windows XP. 581 * @see #setReorderingMode 582 * @stable ICU 3.8 583 */ 584 private static final short REORDER_NUMBERS_SPECIAL = 1; 585 586 /** Reordering mode: Logical to Visual algorithm grouping numbers with 587 * adjacent R characters (reversible algorithm). 588 * @see #setReorderingMode 589 * @stable ICU 3.8 590 */ 591 private static final short REORDER_GROUP_NUMBERS_WITH_R = 2; 592 593 /** Reordering mode: Reorder runs only to transform a Logical LTR string 594 * to the logical RTL string with the same display, or vice-versa.<br> 595 * If this mode is set together with option 596 * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source 597 * text may be removed and other controls may be added to produce the 598 * minimum combination which has the required display. 599 * @see #OPTION_INSERT_MARKS 600 * @see #setReorderingMode 601 * @stable ICU 3.8 602 */ 603 private static final short REORDER_RUNS_ONLY = 3; 604 605 /** Reordering mode: Visual to Logical algorithm which handles numbers 606 * like L (same algorithm as selected by <code>setInverse(true)</code>. 607 * @see #setInverse 608 * @see #setReorderingMode 609 * @stable ICU 3.8 610 */ 611 private static final short REORDER_INVERSE_NUMBERS_AS_L = 4; 612 613 /** Reordering mode: Visual to Logical algorithm equivalent to the regular 614 * Logical to Visual algorithm. 615 * @see #setReorderingMode 616 * @stable ICU 3.8 617 */ 618 private static final short REORDER_INVERSE_LIKE_DIRECT = 5; 619 620 /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the 621 * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 622 * @see #setReorderingMode 623 * @stable ICU 3.8 624 */ 625 private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6; 626 627 /* Reordering mode values must be ordered so that all the regular logical to 628 * visual modes come first, and all inverse Bidi modes come last. 629 */ 630 private static final short REORDER_LAST_LOGICAL_TO_VISUAL = 631 REORDER_NUMBERS_SPECIAL; 632 633 /** 634 * Option bit for <code>setReorderingOptions</code>: 635 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 636 * a reordering to a Logical order 637 * 638 * <p>This option must be set or reset before calling 639 * <code>setPara</code>.</p> 640 * 641 * <p>This option is significant only with reordering modes which generate 642 * a result with Logical order, specifically.</p> 643 * <ul> 644 * <li><code>REORDER_RUNS_ONLY</code></li> 645 * <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li> 646 * <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li> 647 * <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 648 * </ul> 649 * 650 * <p>If this option is set in conjunction with reordering mode 651 * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 652 * <code>setInverse(true)</code>, it implies option 653 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 654 * <code>writeReordered()</code>.</p> 655 * 656 * <p>For other reordering modes, a minimum number of LRM or RLM characters 657 * will be added to the source text after reordering it so as to ensure 658 * round trip, i.e. when applying the inverse reordering mode on the 659 * resulting logical text with removal of Bidi marks 660 * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling 661 * <code>setPara()</code> or option 662 * <code>REMOVE_BIDI_CONTROLS</code> in 663 * <code>writeReordered</code>), the result will be identical to the 664 * source text in the first transformation. 665 * 666 * <p>This option will be ignored if specified together with option 667 * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option 668 * <code>REMOVE_BIDI_CONTROLS</code> in calls to method 669 * <code>writeReordered()</code> and it implies option 670 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 671 * <code>writeReordered()</code> if the reordering mode is 672 * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 673 * 674 * @see #setReorderingMode 675 * @see #setReorderingOptions 676 * @see #INSERT_LRM_FOR_NUMERIC 677 * @see #REMOVE_BIDI_CONTROLS 678 * @see #OPTION_REMOVE_CONTROLS 679 * @see #REORDER_RUNS_ONLY 680 * @see #REORDER_INVERSE_NUMBERS_AS_L 681 * @see #REORDER_INVERSE_LIKE_DIRECT 682 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 683 * @stable ICU 3.8 684 */ 685 private static final int OPTION_INSERT_MARKS = 1; 686 687 /** 688 * Option bit for <code>setReorderingOptions</code>: 689 * remove Bidi control characters 690 * 691 * <p>This option must be set or reset before calling 692 * <code>setPara</code>.</p> 693 * 694 * <p>This option nullifies option 695 * <code>OPTION_INSERT_MARKS</code>. It inhibits option 696 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 697 * <code>writeReordered()</code> and it implies option 698 * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p> 699 * 700 * @see #setReorderingMode 701 * @see #setReorderingOptions 702 * @see #OPTION_INSERT_MARKS 703 * @see #INSERT_LRM_FOR_NUMERIC 704 * @see #REMOVE_BIDI_CONTROLS 705 * @stable ICU 3.8 706 */ 707 private static final int OPTION_REMOVE_CONTROLS = 2; 708 709 /** 710 * Option bit for <code>setReorderingOptions</code>: 711 * process the output as part of a stream to be continued 712 * 713 * <p>This option must be set or reset before calling 714 * <code>setPara</code>.</p> 715 * 716 * <p>This option specifies that the caller is interested in processing 717 * large text object in parts. The results of the successive calls are 718 * expected to be concatenated by the caller. Only the call for the last 719 * part will have this option bit off.</p> 720 * 721 * <p>When this option bit is on, <code>setPara()</code> may process 722 * less than the full source text in order to truncate the text at a 723 * meaningful boundary. The caller should call 724 * <code>getProcessedLength()</code> immediately after calling 725 * <code>setPara()</code> in order to determine how much of the source 726 * text has been processed. Source text beyond that length should be 727 * resubmitted in following calls to <code>setPara</code>. The 728 * processed length may be less than the length of the source text if a 729 * character preceding the last character of the source text constitutes a 730 * reasonable boundary (like a block separator) for text to be continued.<br> 731 * If the last character of the source text constitutes a reasonable 732 * boundary, the whole text will be processed at once.<br> 733 * If nowhere in the source text there exists 734 * such a reasonable boundary, the processed length will be zero.<br> 735 * The caller should check for such an occurrence and do one of the following: 736 * <ul><li>submit a larger amount of text with a better chance to include 737 * a reasonable boundary.</li> 738 * <li>resubmit the same text after turning off option 739 * <code>OPTION_STREAMING</code>.</li></ul> 740 * In all cases, this option should be turned off before processing the last 741 * part of the text.</p> 742 * 743 * <p>When the <code>OPTION_STREAMING</code> option is used, it is 744 * recommended to call <code>orderParagraphsLTR()</code> with argument 745 * <code>orderParagraphsLTR</code> set to <code>true</code> before calling 746 * <code>setPara()</code> so that later paragraphs may be concatenated to 747 * previous paragraphs on the right. 748 * </p> 749 * 750 * @see #setReorderingMode 751 * @see #setReorderingOptions 752 * @see #getProcessedLength 753 * @see #orderParagraphsLTR 754 * @stable ICU 3.8 755 */ 756 private static final int OPTION_STREAMING = 4; 757 758 /* 759 * Comparing the description of the Bidi algorithm with this implementation 760 * is easier with the same names for the Bidi types in the code as there. 761 * See UCharacterDirection 762 */ 763 private static final byte L = 0; 764 private static final byte R = 1; 765 private static final byte EN = 2; 766 private static final byte ES = 3; 767 private static final byte ET = 4; 768 private static final byte AN = 5; 769 private static final byte CS = 6; 770 static final byte B = 7; 771 private static final byte S = 8; 772 private static final byte WS = 9; 773 private static final byte ON = 10; 774 private static final byte LRE = 11; 775 private static final byte LRO = 12; 776 private static final byte AL = 13; 777 private static final byte RLE = 14; 778 private static final byte RLO = 15; 779 private static final byte PDF = 16; 780 private static final byte NSM = 17; 781 private static final byte BN = 18; 782 783 private static final int MASK_R_AL = (1 << R | 1 << AL); 784 785 private static final char CR = '\r'; 786 private static final char LF = '\n'; 787 788 static final int LRM_BEFORE = 1; 789 static final int LRM_AFTER = 2; 790 static final int RLM_BEFORE = 4; 791 static final int RLM_AFTER = 8; 792 793 /* 794 * reference to parent paragraph object (reference to self if this object is 795 * a paragraph object); set to null in a newly opened object; set to a 796 * real value after a successful execution of setPara or setLine 797 */ 798 BidiBase paraBidi; 799 800 final UBiDiProps bdp; 801 802 /* character array representing the current text */ 803 char[] text; 804 805 /* length of the current text */ 806 int originalLength; 807 808 /* if the option OPTION_STREAMING is set, this is the length of 809 * text actually processed by <code>setPara</code>, which may be shorter 810 * than the original length. Otherwise, it is identical to the original 811 * length. 812 */ 813 public int length; 814 815 /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi 816 * marks are allowed to be inserted in one of the reordering modes, the 817 * length of the result string may be different from the processed length. 818 */ 819 int resultLength; 820 821 /* indicators for whether memory may be allocated after construction */ 822 boolean mayAllocateText; 823 boolean mayAllocateRuns; 824 825 /* arrays with one value per text-character */ 826 byte[] dirPropsMemory = new byte[1]; 827 byte[] levelsMemory = new byte[1]; 828 byte[] dirProps; 829 byte[] levels; 830 831 /* must block separators receive level 0? */ 832 boolean orderParagraphsLTR; 833 834 /* the paragraph level */ 835 byte paraLevel; 836 837 /* original paraLevel when contextual */ 838 /* must be one of DEFAULT_xxx or 0 if not contextual */ 839 byte defaultParaLevel; 840 841 /* the following is set in setPara, used in processPropertySeq */ 842 843 ImpTabPair impTabPair; /* reference to levels state table pair */ 844 845 /* the overall paragraph or line directionality*/ 846 byte direction; 847 848 /* flags is a bit set for which directional properties are in the text */ 849 int flags; 850 851 /* lastArabicPos is index to the last AL in the text, -1 if none */ 852 int lastArabicPos; 853 854 /* characters after trailingWSStart are WS and are */ 855 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 856 int trailingWSStart; 857 858 /* fields for paragraph handling */ 859 int paraCount; /* set in getDirProps() */ 860 int[] parasMemory = new int[1]; 861 int[] paras; /* limits of paragraphs, filled in 862 ResolveExplicitLevels() or CheckExplicitLevels() */ 863 864 /* for single paragraph text, we only need a tiny array of paras (no allocation) */ 865 int[] simpleParas = {0}; 866 867 /* fields for line reordering */ 868 int runCount; /* ==-1: runs not set up yet */ 869 BidiRun[] runsMemory = new BidiRun[0]; 870 BidiRun[] runs; 871 872 /* for non-mixed text, we only need a tiny array of runs (no allocation) */ 873 BidiRun[] simpleRuns = {new BidiRun()}; 874 875 /* mapping of runs in logical order to visual order */ 876 int[] logicalToVisualRunsMap; 877 878 /* flag to indicate that the map has been updated */ 879 boolean isGoodLogicalToVisualRunsMap; 880 881 /* for inverse Bidi with insertion of directional marks */ 882 InsertPoints insertPoints = new InsertPoints(); 883 884 /* for option OPTION_REMOVE_CONTROLS */ 885 int controlCount; 886 887 /* 888 * Sometimes, bit values are more appropriate 889 * to deal with directionality properties. 890 * Abbreviations in these method names refer to names 891 * used in the Bidi algorithm. 892 */ 893 static int DirPropFlag(byte dir) { 894 return (1 << dir); 895 } 896 897 /* 898 * The following bit is ORed to the property of characters in paragraphs 899 * with contextual RTL direction when paraLevel is contextual. 900 */ 901 static final byte CONTEXT_RTL_SHIFT = 6; 902 static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40 903 static byte NoContextRTL(byte dir) 904 { 905 return (byte)(dir & ~CONTEXT_RTL); 906 } 907 908 /* 909 * The following is a variant of DirProp.DirPropFlag() which ignores the 910 * CONTEXT_RTL bit. 911 */ 912 static int DirPropFlagNC(byte dir) { 913 return (1<<(dir & ~CONTEXT_RTL)); 914 } 915 916 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31); 917 918 /* to avoid some conditional statements, use tiny constant arrays */ 919 static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) }; 920 static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) }; 921 static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) }; 922 923 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; } 924 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; } 925 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; } 926 927 /* 928 * are there any characters that are LTR? 929 */ 930 static final int MASK_LTR = 931 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO); 932 933 /* 934 * are there any characters that are RTL? 935 */ 936 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO); 937 938 /* explicit embedding codes */ 939 private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO); 940 private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO); 941 private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF); 942 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT; 943 944 /* paragraph and segment separators */ 945 private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S); 946 947 /* all types that are counted as White Space or Neutral in some steps */ 948 static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT; 949 private static final int MASK_N = DirPropFlag(ON)|MASK_WS; 950 951 /* types that are neutrals or could becomes neutrals in (Wn) */ 952 private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N; 953 954 /* 955 * These types may be changed to "e", 956 * the embedding type (L or R) of the run, 957 * in the Bidi algorithm (N2) 958 */ 959 static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N; 960 961 /* 962 * the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java 963 */ 964 private static byte GetLRFromLevel(byte level) 965 { 966 return (byte)(level & 1); 967 } 968 969 private static boolean IsDefaultLevel(byte level) 970 { 971 return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR); 972 } 973 974 byte GetParaLevelAt(int index) 975 { 976 return (defaultParaLevel != 0) ? 977 (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel; 978 } 979 980 static boolean IsBidiControlChar(int c) 981 { 982 /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or 983 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 984 return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))); 985 } 986 987 public void verifyValidPara() 988 { 989 if (this != this.paraBidi) { 990 throw new IllegalStateException(""); 991 } 992 } 993 994 public void verifyValidParaOrLine() 995 { 996 BidiBase para = this.paraBidi; 997 /* verify Para */ 998 if (this == para) { 999 return; 1000 } 1001 /* verify Line */ 1002 if ((para == null) || (para != para.paraBidi)) { 1003 throw new IllegalStateException(); 1004 } 1005 } 1006 1007 public void verifyRange(int index, int start, int limit) 1008 { 1009 if (index < start || index >= limit) { 1010 throw new IllegalArgumentException("Value " + index + 1011 " is out of range " + start + " to " + limit); 1012 } 1013 } 1014 1015 public void verifyIndex(int index, int start, int limit) 1016 { 1017 if (index < start || index >= limit) { 1018 throw new ArrayIndexOutOfBoundsException("Index " + index + 1019 " is out of range " + start + " to " + limit); 1020 } 1021 } 1022 1023 /** 1024 * Allocate a <code>Bidi</code> object with preallocated memory 1025 * for internal structures. 1026 * This method provides a <code>Bidi</code> object like the default constructor 1027 * but it also preallocates memory for internal structures 1028 * according to the sizings supplied by the caller.<p> 1029 * The preallocation can be limited to some of the internal memory 1030 * by setting some values to 0 here. That means that if, e.g., 1031 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 1032 * be set to <code>maxLength</code> (the only failproof value) to avoid 1033 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 1034 * and the internal structures that are associated with it will be allocated 1035 * on demand, just like with the default constructor. 1036 * 1037 * @param maxLength is the maximum text or line length that internal memory 1038 * will be preallocated for. An attempt to associate this object with a 1039 * longer text will fail, unless this value is 0, which leaves the allocation 1040 * up to the implementation. 1041 * 1042 * @param maxRunCount is the maximum anticipated number of same-level runs 1043 * that internal memory will be preallocated for. An attempt to access 1044 * visual runs on an object that was not preallocated for as many runs 1045 * as the text was actually resolved to will fail, 1046 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 1047 * The number of runs depends on the actual text and maybe anywhere between 1048 * 1 and <code>maxLength</code>. It is typically small. 1049 * 1050 * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0 1051 * @stable ICU 3.8 1052 */ 1053 public BidiBase(int maxLength, int maxRunCount) 1054 { 1055 /* check the argument values */ 1056 if (maxLength < 0 || maxRunCount < 0) { 1057 throw new IllegalArgumentException(); 1058 } 1059 1060 /* reset the object, all reference variables null, all flags false, 1061 all sizes 0. 1062 In fact, we don't need to do anything, since class members are 1063 initialized as zero when an instance is created. 1064 */ 1065 /* 1066 mayAllocateText = false; 1067 mayAllocateRuns = false; 1068 orderParagraphsLTR = false; 1069 paraCount = 0; 1070 runCount = 0; 1071 trailingWSStart = 0; 1072 flags = 0; 1073 paraLevel = 0; 1074 defaultParaLevel = 0; 1075 direction = 0; 1076 */ 1077 /* get Bidi properties */ 1078 try { 1079 bdp = UBiDiProps.getSingleton(); 1080 } 1081 catch (IOException e) { 1082 throw new MissingResourceException(e.getMessage(), "(BidiProps)", ""); 1083 } 1084 1085 /* allocate memory for arrays as requested */ 1086 if (maxLength > 0) { 1087 getInitialDirPropsMemory(maxLength); 1088 getInitialLevelsMemory(maxLength); 1089 } else { 1090 mayAllocateText = true; 1091 } 1092 1093 if (maxRunCount > 0) { 1094 // if maxRunCount == 1, use simpleRuns[] 1095 if (maxRunCount > 1) { 1096 getInitialRunsMemory(maxRunCount); 1097 } 1098 } else { 1099 mayAllocateRuns = true; 1100 } 1101 } 1102 1103 /* 1104 * We are allowed to allocate memory if object==null or 1105 * mayAllocate==true for each array that we need. 1106 * 1107 * Assume sizeNeeded>0. 1108 * If object != null, then assume size > 0. 1109 */ 1110 private Object getMemory(String label, Object array, Class<?> arrayClass, 1111 boolean mayAllocate, int sizeNeeded) 1112 { 1113 int len = Array.getLength(array); 1114 1115 /* we have at least enough memory and must not allocate */ 1116 if (sizeNeeded == len) { 1117 return array; 1118 } 1119 if (!mayAllocate) { 1120 /* we must not allocate */ 1121 if (sizeNeeded <= len) { 1122 return array; 1123 } 1124 throw new OutOfMemoryError("Failed to allocate memory for " 1125 + label); 1126 } 1127 /* we may try to grow or shrink */ 1128 /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid 1129 the allocation altogether and rely on this.length */ 1130 try { 1131 return Array.newInstance(arrayClass, sizeNeeded); 1132 } catch (Exception e) { 1133 throw new OutOfMemoryError("Failed to allocate memory for " 1134 + label); 1135 } 1136 } 1137 1138 /* helper methods for each allocated array */ 1139 private void getDirPropsMemory(boolean mayAllocate, int len) 1140 { 1141 Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len); 1142 dirPropsMemory = (byte[]) array; 1143 } 1144 1145 void getDirPropsMemory(int len) 1146 { 1147 getDirPropsMemory(mayAllocateText, len); 1148 } 1149 1150 private void getLevelsMemory(boolean mayAllocate, int len) 1151 { 1152 Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len); 1153 levelsMemory = (byte[]) array; 1154 } 1155 1156 void getLevelsMemory(int len) 1157 { 1158 getLevelsMemory(mayAllocateText, len); 1159 } 1160 1161 private void getRunsMemory(boolean mayAllocate, int len) 1162 { 1163 Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len); 1164 runsMemory = (BidiRun[]) array; 1165 } 1166 1167 void getRunsMemory(int len) 1168 { 1169 getRunsMemory(mayAllocateRuns, len); 1170 } 1171 1172 /* additional methods used by constructor - always allow allocation */ 1173 private void getInitialDirPropsMemory(int len) 1174 { 1175 getDirPropsMemory(true, len); 1176 } 1177 1178 private void getInitialLevelsMemory(int len) 1179 { 1180 getLevelsMemory(true, len); 1181 } 1182 1183 private void getInitialParasMemory(int len) 1184 { 1185 Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len); 1186 parasMemory = (int[]) array; 1187 } 1188 1189 private void getInitialRunsMemory(int len) 1190 { 1191 getRunsMemory(true, len); 1192 } 1193 1194 /* perform (P2)..(P3) ------------------------------------------------------- */ 1195 1196 private void getDirProps() 1197 { 1198 int i = 0, i0, i1; 1199 flags = 0; /* collect all directionalities in the text */ 1200 int uchar; 1201 byte dirProp; 1202 byte paraDirDefault = 0; /* initialize to avoid compiler warnings */ 1203 boolean isDefaultLevel = IsDefaultLevel(paraLevel); 1204 /* for inverse Bidi, the default para level is set to RTL if there is a 1205 strong R or AL character at either end of the text */ 1206 lastArabicPos = -1; 1207 controlCount = 0; 1208 1209 final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */ 1210 final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */ 1211 final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */ 1212 1213 int state; 1214 int paraStart = 0; /* index of first char in paragraph */ 1215 byte paraDir; /* == CONTEXT_RTL within paragraphs 1216 starting with strong R char */ 1217 byte lastStrongDir=0; /* for default level & inverse Bidi */ 1218 int lastStrongLTR=0; /* for STREAMING option */ 1219 1220 if (isDefaultLevel) { 1221 paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0; 1222 paraDir = paraDirDefault; 1223 lastStrongDir = paraDirDefault; 1224 state = LOOKING_FOR_STRONG; 1225 } else { 1226 state = NOT_CONTEXTUAL; 1227 paraDir = 0; 1228 } 1229 /* count paragraphs and determine the paragraph level (P2..P3) */ 1230 /* 1231 * see comment on constant fields: 1232 * the LEVEL_DEFAULT_XXX values are designed so that 1233 * their low-order bit alone yields the intended default 1234 */ 1235 1236 for (i = 0; i < originalLength; /* i is incremented in the loop */) { 1237 i0 = i; /* index of first code unit */ 1238 uchar = UTF16.charAt(text, 0, originalLength, i); 1239 i += Character.charCount(uchar); 1240 i1 = i - 1; /* index of last code unit, gets the directional property */ 1241 1242 dirProp = (byte)bdp.getClass(uchar); 1243 1244 flags |= DirPropFlag(dirProp); 1245 dirProps[i1] = (byte)(dirProp | paraDir); 1246 if (i1 > i0) { /* set previous code units' properties to BN */ 1247 flags |= DirPropFlag(BN); 1248 do { 1249 dirProps[--i1] = (byte)(BN | paraDir); 1250 } while (i1 > i0); 1251 } 1252 if (state == LOOKING_FOR_STRONG) { 1253 if (dirProp == L) { 1254 state = FOUND_STRONG_CHAR; 1255 if (paraDir != 0) { 1256 paraDir = 0; 1257 for (i1 = paraStart; i1 < i; i1++) { 1258 dirProps[i1] &= ~CONTEXT_RTL; 1259 } 1260 } 1261 continue; 1262 } 1263 if (dirProp == R || dirProp == AL) { 1264 state = FOUND_STRONG_CHAR; 1265 if (paraDir == 0) { 1266 paraDir = CONTEXT_RTL; 1267 for (i1 = paraStart; i1 < i; i1++) { 1268 dirProps[i1] |= CONTEXT_RTL; 1269 } 1270 } 1271 continue; 1272 } 1273 } 1274 if (dirProp == L) { 1275 lastStrongDir = 0; 1276 lastStrongLTR = i; /* i is index to next character */ 1277 } 1278 else if (dirProp == R) { 1279 lastStrongDir = CONTEXT_RTL; 1280 } 1281 else if (dirProp == AL) { 1282 lastStrongDir = CONTEXT_RTL; 1283 lastArabicPos = i-1; 1284 } 1285 else if (dirProp == B) { 1286 if (i < originalLength) { /* B not last char in text */ 1287 if (!((uchar == (int)CR) && (text[i] == (int)LF))) { 1288 paraCount++; 1289 } 1290 if (isDefaultLevel) { 1291 state=LOOKING_FOR_STRONG; 1292 paraStart = i; /* i is index to next character */ 1293 paraDir = paraDirDefault; 1294 lastStrongDir = paraDirDefault; 1295 } 1296 } 1297 } 1298 } 1299 if (isDefaultLevel) { 1300 paraLevel = GetParaLevelAt(0); 1301 } 1302 1303 /* The following line does nothing new for contextual paraLevel, but is 1304 needed for absolute paraLevel. */ 1305 flags |= DirPropFlagLR(paraLevel); 1306 1307 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1308 flags |= DirPropFlag(L); 1309 } 1310 } 1311 1312 /* perform (X1)..(X9) ------------------------------------------------------- */ 1313 1314 /* determine if the text is mixed-directional or single-directional */ 1315 private byte directionFromFlags() { 1316 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 1317 if (!((flags & MASK_RTL) != 0 || 1318 ((flags & DirPropFlag(AN)) != 0 && 1319 (flags & MASK_POSSIBLE_N) != 0))) { 1320 return Bidi.DIRECTION_LEFT_TO_RIGHT; 1321 } else if ((flags & MASK_LTR) == 0) { 1322 return Bidi.DIRECTION_RIGHT_TO_LEFT; 1323 } else { 1324 return MIXED; 1325 } 1326 } 1327 1328 /* 1329 * Resolve the explicit levels as specified by explicit embedding codes. 1330 * Recalculate the flags to have them reflect the real properties 1331 * after taking the explicit embeddings into account. 1332 * 1333 * The Bidi algorithm is designed to result in the same behavior whether embedding 1334 * levels are externally specified (from "styled text", supposedly the preferred 1335 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. 1336 * That is why (X9) instructs to remove all explicit codes (and BN). 1337 * However, in a real implementation, this removal of these codes and their index 1338 * positions in the plain text is undesirable since it would result in 1339 * reallocated, reindexed text. 1340 * Instead, this implementation leaves the codes in there and just ignores them 1341 * in the subsequent processing. 1342 * In order to get the same reordering behavior, positions with a BN or an 1343 * explicit embedding code just get the same level assigned as the last "real" 1344 * character. 1345 * 1346 * Some implementations, not this one, then overwrite some of these 1347 * directionality properties at "real" same-level-run boundaries by 1348 * L or R codes so that the resolution of weak types can be performed on the 1349 * entire paragraph at once instead of having to parse it once more and 1350 * perform that resolution on same-level-runs. 1351 * This limits the scope of the implicit rules in effectively 1352 * the same way as the run limits. 1353 * 1354 * Instead, this implementation does not modify these codes. 1355 * On one hand, the paragraph has to be scanned for same-level-runs, but 1356 * on the other hand, this saves another loop to reset these codes, 1357 * or saves making and modifying a copy of dirProps[]. 1358 * 1359 * 1360 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm. 1361 * 1362 * 1363 * Handling the stack of explicit levels (Xn): 1364 * 1365 * With the Bidi stack of explicit levels, 1366 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, 1367 * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61. 1368 * 1369 * In order to have a correct push-pop semantics even in the case of overflows, 1370 * there are two overflow counters: 1371 * - countOver60 is incremented with each LRx at level 60 1372 * - from level 60, one RLx increases the level to 61 1373 * - countOver61 is incremented with each LRx and RLx at level 61 1374 * 1375 * Popping levels with PDF must work in the opposite order so that level 61 1376 * is correct at the correct point. Underflows (too many PDFs) must be checked. 1377 * 1378 * This implementation assumes that MAX_EXPLICIT_LEVEL is odd. 1379 */ 1380 private byte resolveExplicitLevels() { 1381 int i = 0; 1382 byte dirProp; 1383 byte level = GetParaLevelAt(0); 1384 1385 byte dirct; 1386 int paraIndex = 0; 1387 1388 /* determine if the text is mixed-directional or single-directional */ 1389 dirct = directionFromFlags(); 1390 1391 /* we may not need to resolve any explicit levels, but for multiple 1392 paragraphs we want to loop on all chars to set the para boundaries */ 1393 if ((dirct != MIXED) && (paraCount == 1)) { 1394 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 1395 } else if ((paraCount == 1) && 1396 ((flags & MASK_EXPLICIT) == 0)) { 1397 /* mixed, but all characters are at the same embedding level */ 1398 /* or we are in "inverse Bidi" */ 1399 /* and we don't have contextual multiple paragraphs with some B char */ 1400 /* set all levels to the paragraph level */ 1401 for (i = 0; i < length; ++i) { 1402 levels[i] = level; 1403 } 1404 } else { 1405 /* continue to perform (Xn) */ 1406 1407 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 1408 /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */ 1409 byte embeddingLevel = level; 1410 byte newLevel; 1411 byte stackTop = 0; 1412 1413 byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */ 1414 int countOver60 = 0; 1415 int countOver61 = 0; /* count overflows of explicit levels */ 1416 1417 /* recalculate the flags */ 1418 flags = 0; 1419 1420 for (i = 0; i < length; ++i) { 1421 dirProp = NoContextRTL(dirProps[i]); 1422 switch(dirProp) { 1423 case LRE: 1424 case LRO: 1425 /* (X3, X5) */ 1426 newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */ 1427 if (newLevel <= MAX_EXPLICIT_LEVEL) { 1428 stack[stackTop] = embeddingLevel; 1429 ++stackTop; 1430 embeddingLevel = newLevel; 1431 if (dirProp == LRO) { 1432 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1433 } 1434 /* we don't need to set LEVEL_OVERRIDE off for LRE 1435 since this has already been done for newLevel which is 1436 the source for embeddingLevel. 1437 */ 1438 } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) { 1439 ++countOver61; 1440 } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ { 1441 ++countOver60; 1442 } 1443 flags |= DirPropFlag(BN); 1444 break; 1445 case RLE: 1446 case RLO: 1447 /* (X2, X4) */ 1448 newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */ 1449 if (newLevel<=MAX_EXPLICIT_LEVEL) { 1450 stack[stackTop] = embeddingLevel; 1451 ++stackTop; 1452 embeddingLevel = newLevel; 1453 if (dirProp == RLO) { 1454 embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; 1455 } 1456 /* we don't need to set LEVEL_OVERRIDE off for RLE 1457 since this has already been done for newLevel which is 1458 the source for embeddingLevel. 1459 */ 1460 } else { 1461 ++countOver61; 1462 } 1463 flags |= DirPropFlag(BN); 1464 break; 1465 case PDF: 1466 /* (X7) */ 1467 /* handle all the overflow cases first */ 1468 if (countOver61 > 0) { 1469 --countOver61; 1470 } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) { 1471 /* handle LRx overflows from level 60 */ 1472 --countOver60; 1473 } else if (stackTop > 0) { 1474 /* this is the pop operation; it also pops level 61 while countOver60>0 */ 1475 --stackTop; 1476 embeddingLevel = stack[stackTop]; 1477 /* } else { (underflow) */ 1478 } 1479 flags |= DirPropFlag(BN); 1480 break; 1481 case B: 1482 stackTop = 0; 1483 countOver60 = 0; 1484 countOver61 = 0; 1485 level = GetParaLevelAt(i); 1486 if ((i + 1) < length) { 1487 embeddingLevel = GetParaLevelAt(i+1); 1488 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1489 paras[paraIndex++] = i+1; 1490 } 1491 } 1492 flags |= DirPropFlag(B); 1493 break; 1494 case BN: 1495 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 1496 /* they will get their levels set correctly in adjustWSLevels() */ 1497 flags |= DirPropFlag(BN); 1498 break; 1499 default: 1500 /* all other types get the "real" level */ 1501 if (level != embeddingLevel) { 1502 level = embeddingLevel; 1503 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1504 flags |= DirPropFlagO(level) | DirPropFlagMultiRuns; 1505 } else { 1506 flags |= DirPropFlagE(level) | DirPropFlagMultiRuns; 1507 } 1508 } 1509 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 1510 flags |= DirPropFlag(dirProp); 1511 } 1512 break; 1513 } 1514 1515 /* 1516 * We need to set reasonable levels even on BN codes and 1517 * explicit codes because we will later look at same-level runs (X10). 1518 */ 1519 levels[i] = level; 1520 } 1521 if ((flags & MASK_EMBEDDING) != 0) { 1522 flags |= DirPropFlagLR(paraLevel); 1523 } 1524 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1525 flags |= DirPropFlag(L); 1526 } 1527 1528 /* subsequently, ignore the explicit codes and BN (X9) */ 1529 1530 /* again, determine if the text is mixed-directional or single-directional */ 1531 dirct = directionFromFlags(); 1532 } 1533 1534 return dirct; 1535 } 1536 1537 /* 1538 * Use a pre-specified embedding levels array: 1539 * 1540 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 1541 * ignore all explicit codes (X9), 1542 * and check all the preset levels. 1543 * 1544 * Recalculate the flags to have them reflect the real properties 1545 * after taking the explicit embeddings into account. 1546 */ 1547 private byte checkExplicitLevels() { 1548 byte dirProp; 1549 int i; 1550 this.flags = 0; /* collect all directionalities in the text */ 1551 byte level; 1552 int paraIndex = 0; 1553 1554 for (i = 0; i < length; ++i) { 1555 if (levels[i] == 0) { 1556 levels[i] = paraLevel; 1557 } 1558 if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) { 1559 if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) { 1560 levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE); 1561 } else { 1562 levels[i] = paraLevel; 1563 } 1564 } 1565 level = levels[i]; 1566 dirProp = NoContextRTL(dirProps[i]); 1567 if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { 1568 /* keep the override flag in levels[i] but adjust the flags */ 1569 level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */ 1570 flags |= DirPropFlagO(level); 1571 } else { 1572 /* set the flags */ 1573 flags |= DirPropFlagE(level) | DirPropFlag(dirProp); 1574 } 1575 1576 if ((level < GetParaLevelAt(i) && 1577 !((0 == level) && (dirProp == B))) || 1578 (MAX_EXPLICIT_LEVEL <level)) { 1579 /* level out of bounds */ 1580 throw new IllegalArgumentException("level " + level + 1581 " out of bounds at index " + i); 1582 } 1583 if ((dirProp == B) && ((i + 1) < length)) { 1584 if (!((text[i] == CR) && (text[i + 1] == LF))) { 1585 paras[paraIndex++] = i + 1; 1586 } 1587 } 1588 } 1589 if ((flags&MASK_EMBEDDING) != 0) { 1590 flags |= DirPropFlagLR(paraLevel); 1591 } 1592 1593 /* determine if the text is mixed-directional or single-directional */ 1594 return directionFromFlags(); 1595 } 1596 1597 /*********************************************************************/ 1598 /* The Properties state machine table */ 1599 /*********************************************************************/ 1600 /* */ 1601 /* All table cells are 8 bits: */ 1602 /* bits 0..4: next state */ 1603 /* bits 5..7: action to perform (if > 0) */ 1604 /* */ 1605 /* Cells may be of format "n" where n represents the next state */ 1606 /* (except for the rightmost column). */ 1607 /* Cells may also be of format "_(x,y)" where x represents an action */ 1608 /* to perform and y represents the next state. */ 1609 /* */ 1610 /*********************************************************************/ 1611 /* Definitions and type for properties state tables */ 1612 /*********************************************************************/ 1613 private static final int IMPTABPROPS_COLUMNS = 14; 1614 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1; 1615 private static short GetStateProps(short cell) { 1616 return (short)(cell & 0x1f); 1617 } 1618 private static short GetActionProps(short cell) { 1619 return (short)(cell >> 5); 1620 } 1621 1622 private static final short groupProp[] = /* dirProp regrouped */ 1623 { 1624 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ 1625 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 1626 }; 1627 private static final short _L = 0; 1628 private static final short _R = 1; 1629 private static final short _EN = 2; 1630 private static final short _AN = 3; 1631 private static final short _ON = 4; 1632 private static final short _S = 5; 1633 private static final short _B = 6; /* reduced dirProp */ 1634 1635 /*********************************************************************/ 1636 /* */ 1637 /* PROPERTIES STATE TABLE */ 1638 /* */ 1639 /* In table impTabProps, */ 1640 /* - the ON column regroups ON and WS */ 1641 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */ 1642 /* - the Res column is the reduced property assigned to a run */ 1643 /* */ 1644 /* Action 1: process current run1, init new run1 */ 1645 /* 2: init new run2 */ 1646 /* 3: process run1, process run2, init new run1 */ 1647 /* 4: process run1, set run1=run2, init new run2 */ 1648 /* */ 1649 /* Notes: */ 1650 /* 1) This table is used in resolveImplicitLevels(). */ 1651 /* 2) This table triggers actions when there is a change in the Bidi*/ 1652 /* property of incoming characters (action 1). */ 1653 /* 3) Most such property sequences are processed immediately (in */ 1654 /* fact, passed to processPropertySeq(). */ 1655 /* 4) However, numbers are assembled as one sequence. This means */ 1656 /* that undefined situations (like CS following digits, until */ 1657 /* it is known if the next char will be a digit) are held until */ 1658 /* following chars define them. */ 1659 /* Example: digits followed by CS, then comes another CS or ON; */ 1660 /* the digits will be processed, then the CS assigned */ 1661 /* as the start of an ON sequence (action 3). */ 1662 /* 5) There are cases where more than one sequence must be */ 1663 /* processed, for instance digits followed by CS followed by L: */ 1664 /* the digits must be processed as one sequence, and the CS */ 1665 /* must be processed as an ON sequence, all this before starting */ 1666 /* assembling chars for the opening L sequence. */ 1667 /* */ 1668 /* */ 1669 private static final short impTabProps[][] = 1670 { 1671 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */ 1672 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON }, 1673 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L }, 1674 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R }, 1675 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R }, 1676 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN }, 1677 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN }, 1678 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN }, 1679 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON }, 1680 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON }, 1681 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON }, 1682 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN }, 1683 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN }, 1684 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN }, 1685 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN }, 1686 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON }, 1687 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S }, 1688 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S }, 1689 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B } 1690 }; 1691 1692 /*********************************************************************/ 1693 /* The levels state machine tables */ 1694 /*********************************************************************/ 1695 /* */ 1696 /* All table cells are 8 bits: */ 1697 /* bits 0..3: next state */ 1698 /* bits 4..7: action to perform (if > 0) */ 1699 /* */ 1700 /* Cells may be of format "n" where n represents the next state */ 1701 /* (except for the rightmost column). */ 1702 /* Cells may also be of format "_(x,y)" where x represents an action */ 1703 /* to perform and y represents the next state. */ 1704 /* */ 1705 /* This format limits each table to 16 states each and to 15 actions.*/ 1706 /* */ 1707 /*********************************************************************/ 1708 /* Definitions and type for levels state tables */ 1709 /*********************************************************************/ 1710 private static final int IMPTABLEVELS_COLUMNS = _B + 2; 1711 private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1; 1712 private static short GetState(byte cell) { return (short)(cell & 0x0f); } 1713 private static short GetAction(byte cell) { return (short)(cell >> 4); } 1714 1715 private static class ImpTabPair { 1716 byte[][][] imptab; 1717 short[][] impact; 1718 1719 ImpTabPair(byte[][] table1, byte[][] table2, 1720 short[] act1, short[] act2) { 1721 imptab = new byte[][][] {table1, table2}; 1722 impact = new short[][] {act1, act2}; 1723 } 1724 } 1725 1726 /*********************************************************************/ 1727 /* */ 1728 /* LEVELS STATE TABLES */ 1729 /* */ 1730 /* In all levels state tables, */ 1731 /* - state 0 is the initial state */ 1732 /* - the Res column is the increment to add to the text level */ 1733 /* for this property sequence. */ 1734 /* */ 1735 /* The impact arrays for each table of a pair map the local action */ 1736 /* numbers of the table to the total list of actions. For instance, */ 1737 /* action 2 in a given table corresponds to the action number which */ 1738 /* appears in entry [2] of the impact array for that table. */ 1739 /* The first entry of all impact arrays must be 0. */ 1740 /* */ 1741 /* Action 1: init conditional sequence */ 1742 /* 2: prepend conditional sequence to current sequence */ 1743 /* 3: set ON sequence to new level - 1 */ 1744 /* 4: init EN/AN/ON sequence */ 1745 /* 5: fix EN/AN/ON sequence followed by R */ 1746 /* 6: set previous level sequence to level 2 */ 1747 /* */ 1748 /* Notes: */ 1749 /* 1) These tables are used in processPropertySeq(). The input */ 1750 /* is property sequences as determined by resolveImplicitLevels. */ 1751 /* 2) Most such property sequences are processed immediately */ 1752 /* (levels are assigned). */ 1753 /* 3) However, some sequences cannot be assigned a final level till */ 1754 /* one or more following sequences are received. For instance, */ 1755 /* ON following an R sequence within an even-level paragraph. */ 1756 /* If the following sequence is R, the ON sequence will be */ 1757 /* assigned basic run level+1, and so will the R sequence. */ 1758 /* 4) S is generally handled like ON, since its level will be fixed */ 1759 /* to paragraph level in adjustWSLevels(). */ 1760 /* */ 1761 1762 private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */ 1763 /* In this table, conditional sequences receive the higher possible level 1764 until proven otherwise. 1765 */ 1766 { 1767 /* L, R, EN, AN, ON, S, B, Res */ 1768 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 }, 1769 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 }, 1770 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 }, 1771 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 }, 1772 /* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 }, 1773 /* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 } 1774 }; 1775 1776 private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */ 1777 /* In this table, conditional sequences receive the lower possible level 1778 until proven otherwise. 1779 */ 1780 { 1781 /* L, R, EN, AN, ON, S, B, Res */ 1782 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1783 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 }, 1784 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1785 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 }, 1786 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 }, 1787 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 } 1788 }; 1789 1790 private static final short[] impAct0 = {0,1,2,3,4,5,6}; 1791 1792 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair( 1793 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0); 1794 1795 private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */ 1796 /* In this table, conditional sequences receive the higher possible 1797 level until proven otherwise. 1798 */ 1799 /* L, R, EN, AN, ON, S, B, Res */ 1800 /* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 }, 1801 /* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 }, 1802 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 }, 1803 /* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 }, 1804 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 } 1805 }; 1806 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair( 1807 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0); 1808 1809 private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = { 1810 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1811 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1812 */ 1813 /* L, R, EN, AN, ON, S, B, Res */ 1814 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 }, 1815 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 }, 1816 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 }, 1817 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 }, 1818 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 }, 1819 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 } 1820 }; 1821 private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = { 1822 /* In this table, EN/AN+ON sequences receive levels as if associated with R 1823 until proven that there is L on both sides. AN is handled like EN. 1824 */ 1825 /* L, R, EN, AN, ON, S, B, Res */ 1826 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 }, 1827 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 }, 1828 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 }, 1829 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 }, 1830 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 } 1831 }; 1832 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new 1833 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R, 1834 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0); 1835 1836 private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = { 1837 /* This table is identical to the Default LTR table except that EN and AN 1838 are handled like L. 1839 */ 1840 /* L, R, EN, AN, ON, S, B, Res */ 1841 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 }, 1842 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 }, 1843 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 }, 1844 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 }, 1845 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 }, 1846 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 } 1847 }; 1848 private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = { 1849 /* This table is identical to the Default RTL table except that EN and AN 1850 are handled like L. 1851 */ 1852 /* L, R, EN, AN, ON, S, B, Res */ 1853 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 }, 1854 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 }, 1855 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 }, 1856 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 }, 1857 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 }, 1858 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 } 1859 }; 1860 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair 1861 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L, 1862 impAct0, impAct0); 1863 1864 private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */ 1865 /* In this table, conditional sequences receive the lower possible level 1866 until proven otherwise. 1867 */ 1868 /* L, R, EN, AN, ON, S, B, Res */ 1869 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 1870 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 }, 1871 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 1872 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 }, 1873 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 }, 1874 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 }, 1875 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 } 1876 }; 1877 private static final short[] impAct1 = {0,1,11,12}; 1878 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair( 1879 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1880 1881 private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1882 /* The case handled in this table is (visually): R EN L 1883 */ 1884 /* L, R, EN, AN, ON, S, B, Res */ 1885 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 }, 1886 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 }, 1887 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 }, 1888 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 }, 1889 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 }, 1890 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 }, 1891 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 } 1892 }; 1893 private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { 1894 /* The cases handled in this table are (visually): R EN L 1895 R L AN L 1896 */ 1897 /* L, R, EN, AN, ON, S, B, Res */ 1898 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 }, 1899 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 }, 1900 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 }, 1901 /* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 }, 1902 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 }, 1903 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 }, 1904 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 } 1905 }; 1906 private static final short impAct2[] = {0,1,7,8,9,10}; 1907 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = 1908 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1909 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1910 1911 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair( 1912 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 1913 1914 private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = { 1915 /* The case handled in this table is (visually): R EN L 1916 */ 1917 /* L, R, EN, AN, ON, S, B, Res */ 1918 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 }, 1919 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 }, 1920 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 }, 1921 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 }, 1922 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 } 1923 }; 1924 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new 1925 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1926 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); 1927 1928 private class LevState { 1929 byte[][] impTab; /* level table pointer */ 1930 short[] impAct; /* action map array */ 1931 int startON; /* start of ON sequence */ 1932 int startL2EN; /* start of level 2 sequence */ 1933 int lastStrongRTL; /* index of last found R or AL */ 1934 short state; /* current state */ 1935 byte runLevel; /* run level before implicit solving */ 1936 } 1937 1938 /*------------------------------------------------------------------------*/ 1939 1940 static final int FIRSTALLOC = 10; 1941 /* 1942 * param pos: position where to insert 1943 * param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1944 */ 1945 private void addPoint(int pos, int flag) 1946 { 1947 Point point = new Point(); 1948 1949 int len = insertPoints.points.length; 1950 if (len == 0) { 1951 insertPoints.points = new Point[FIRSTALLOC]; 1952 len = FIRSTALLOC; 1953 } 1954 if (insertPoints.size >= len) { /* no room for new point */ 1955 Point[] savePoints = insertPoints.points; 1956 insertPoints.points = new Point[len * 2]; 1957 System.arraycopy(savePoints, 0, insertPoints.points, 0, len); 1958 } 1959 point.pos = pos; 1960 point.flag = flag; 1961 insertPoints.points[insertPoints.size] = point; 1962 insertPoints.size++; 1963 } 1964 1965 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1966 1967 /* 1968 * This implementation of the (Wn) rules applies all rules in one pass. 1969 * In order to do so, it needs a look-ahead of typically 1 character 1970 * (except for W5: sequences of ET) and keeps track of changes 1971 * in a rule Wp that affect a later Wq (p<q). 1972 * 1973 * The (Nn) and (In) rules are also performed in that same single loop, 1974 * but effectively one iteration behind for white space. 1975 * 1976 * Since all implicit rules are performed in one step, it is not necessary 1977 * to actually store the intermediate directional properties in dirProps[]. 1978 */ 1979 1980 private void processPropertySeq(LevState levState, short _prop, 1981 int start, int limit) { 1982 byte cell; 1983 byte[][] impTab = levState.impTab; 1984 short[] impAct = levState.impAct; 1985 short oldStateSeq,actionSeq; 1986 byte level, addLevel; 1987 int start0, k; 1988 1989 start0 = start; /* save original start position */ 1990 oldStateSeq = levState.state; 1991 cell = impTab[oldStateSeq][_prop]; 1992 levState.state = GetState(cell); /* isolate the new state */ 1993 actionSeq = impAct[GetAction(cell)]; /* isolate the action */ 1994 addLevel = impTab[levState.state][IMPTABLEVELS_RES]; 1995 1996 if (actionSeq != 0) { 1997 switch (actionSeq) { 1998 case 1: /* init ON seq */ 1999 levState.startON = start0; 2000 break; 2001 2002 case 2: /* prepend ON seq to current seq */ 2003 start = levState.startON; 2004 break; 2005 2006 case 3: /* L or S after possible relevant EN/AN */ 2007 /* check if we had EN after R/AL */ 2008 if (levState.startL2EN >= 0) { 2009 addPoint(levState.startL2EN, LRM_BEFORE); 2010 } 2011 levState.startL2EN = -1; /* not within previous if since could also be -2 */ 2012 /* check if we had any relevant EN/AN after R/AL */ 2013 if ((insertPoints.points.length == 0) || 2014 (insertPoints.size <= insertPoints.confirmed)) { 2015 /* nothing, just clean up */ 2016 levState.lastStrongRTL = -1; 2017 /* check if we have a pending conditional segment */ 2018 level = impTab[oldStateSeq][IMPTABLEVELS_RES]; 2019 if ((level & 1) != 0 && levState.startON > 0) { /* after ON */ 2020 start = levState.startON; /* reset to basic run level */ 2021 } 2022 if (_prop == _S) { /* add LRM before S */ 2023 addPoint(start0, LRM_BEFORE); 2024 insertPoints.confirmed = insertPoints.size; 2025 } 2026 break; 2027 } 2028 /* reset previous RTL cont to level for LTR text */ 2029 for (k = levState.lastStrongRTL + 1; k < start0; k++) { 2030 /* reset odd level, leave runLevel+2 as is */ 2031 levels[k] = (byte)((levels[k] - 2) & ~1); 2032 } 2033 /* mark insert points as confirmed */ 2034 insertPoints.confirmed = insertPoints.size; 2035 levState.lastStrongRTL = -1; 2036 if (_prop == _S) { /* add LRM before S */ 2037 addPoint(start0, LRM_BEFORE); 2038 insertPoints.confirmed = insertPoints.size; 2039 } 2040 break; 2041 2042 case 4: /* R/AL after possible relevant EN/AN */ 2043 /* just clean up */ 2044 if (insertPoints.points.length > 0) 2045 /* remove all non confirmed insert points */ 2046 insertPoints.size = insertPoints.confirmed; 2047 levState.startON = -1; 2048 levState.startL2EN = -1; 2049 levState.lastStrongRTL = limit - 1; 2050 break; 2051 2052 case 5: /* EN/AN after R/AL + possible cont */ 2053 /* check for real AN */ 2054 if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) { 2055 /* real AN */ 2056 if (levState.startL2EN == -1) { /* if no relevant EN already found */ 2057 /* just note the righmost digit as a strong RTL */ 2058 levState.lastStrongRTL = limit - 1; 2059 break; 2060 } 2061 if (levState.startL2EN >= 0) { /* after EN, no AN */ 2062 addPoint(levState.startL2EN, LRM_BEFORE); 2063 levState.startL2EN = -2; 2064 } 2065 /* note AN */ 2066 addPoint(start0, LRM_BEFORE); 2067 break; 2068 } 2069 /* if first EN/AN after R/AL */ 2070 if (levState.startL2EN == -1) { 2071 levState.startL2EN = start0; 2072 } 2073 break; 2074 2075 case 6: /* note location of latest R/AL */ 2076 levState.lastStrongRTL = limit - 1; 2077 levState.startON = -1; 2078 break; 2079 2080 case 7: /* L after R+ON/EN/AN */ 2081 /* include possible adjacent number on the left */ 2082 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) { 2083 } 2084 if (k >= 0) { 2085 addPoint(k, RLM_BEFORE); /* add RLM before */ 2086 insertPoints.confirmed = insertPoints.size; /* confirm it */ 2087 } 2088 levState.startON = start0; 2089 break; 2090 2091 case 8: /* AN after L */ 2092 /* AN numbers between L text on both sides may be trouble. */ 2093 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 2094 addPoint(start0, LRM_BEFORE); /* add LRM before */ 2095 addPoint(start0, LRM_AFTER); /* add LRM after */ 2096 break; 2097 2098 case 9: /* R after L+ON/EN/AN */ 2099 /* false alert, infirm LRMs around previous AN */ 2100 insertPoints.size=insertPoints.confirmed; 2101 if (_prop == _S) { /* add RLM before S */ 2102 addPoint(start0, RLM_BEFORE); 2103 insertPoints.confirmed = insertPoints.size; 2104 } 2105 break; 2106 2107 case 10: /* L after L+ON/AN */ 2108 level = (byte)(levState.runLevel + addLevel); 2109 for (k=levState.startON; k < start0; k++) { 2110 if (levels[k] < level) { 2111 levels[k] = level; 2112 } 2113 } 2114 insertPoints.confirmed = insertPoints.size; /* confirm inserts */ 2115 levState.startON = start0; 2116 break; 2117 2118 case 11: /* L after L+ON+EN/AN/ON */ 2119 level = levState.runLevel; 2120 for (k = start0-1; k >= levState.startON; k--) { 2121 if (levels[k] == level+3) { 2122 while (levels[k] == level+3) { 2123 levels[k--] -= 2; 2124 } 2125 while (levels[k] == level) { 2126 k--; 2127 } 2128 } 2129 if (levels[k] == level+2) { 2130 levels[k] = level; 2131 continue; 2132 } 2133 levels[k] = (byte)(level+1); 2134 } 2135 break; 2136 2137 case 12: /* R after L+ON+EN/AN/ON */ 2138 level = (byte)(levState.runLevel+1); 2139 for (k = start0-1; k >= levState.startON; k--) { 2140 if (levels[k] > level) { 2141 levels[k] -= 2; 2142 } 2143 } 2144 break; 2145 2146 default: /* we should never get here */ 2147 throw new IllegalStateException("Internal ICU error in processPropertySeq"); 2148 } 2149 } 2150 if ((addLevel) != 0 || (start < start0)) { 2151 level = (byte)(levState.runLevel + addLevel); 2152 for (k = start; k < limit; k++) { 2153 levels[k] = level; 2154 } 2155 } 2156 } 2157 2158 private void resolveImplicitLevels(int start, int limit, short sor, short eor) 2159 { 2160 LevState levState = new LevState(); 2161 int i, start1, start2; 2162 short oldStateImp, stateImp, actionImp; 2163 short gprop, resProp, cell; 2164 short nextStrongProp = R; 2165 int nextStrongPos = -1; 2166 2167 2168 /* check for RTL inverse Bidi mode */ 2169 /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to 2170 * loop on the text characters from end to start. 2171 * This would need a different properties state table (at least different 2172 * actions) and different levels state tables (maybe very similar to the 2173 * LTR corresponding ones. 2174 */ 2175 /* initialize for levels state table */ 2176 levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2177 levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2178 levState.state = 0; 2179 levState.runLevel = levels[start]; 2180 levState.impTab = impTabPair.imptab[levState.runLevel & 1]; 2181 levState.impAct = impTabPair.impact[levState.runLevel & 1]; 2182 processPropertySeq(levState, sor, start, start); 2183 /* initialize for property state table */ 2184 if (dirProps[start] == NSM) { 2185 stateImp = (short)(1 + sor); 2186 } else { 2187 stateImp = 0; 2188 } 2189 start1 = start; 2190 start2 = 0; 2191 2192 for (i = start; i <= limit; i++) { 2193 if (i >= limit) { 2194 gprop = eor; 2195 } else { 2196 short prop, prop1; 2197 prop = NoContextRTL(dirProps[i]); 2198 gprop = groupProp[prop]; 2199 } 2200 oldStateImp = stateImp; 2201 cell = impTabProps[oldStateImp][gprop]; 2202 stateImp = GetStateProps(cell); /* isolate the new state */ 2203 actionImp = GetActionProps(cell); /* isolate the action */ 2204 if ((i == limit) && (actionImp == 0)) { 2205 /* there is an unprocessed sequence if its property == eor */ 2206 actionImp = 1; /* process the last sequence */ 2207 } 2208 if (actionImp != 0) { 2209 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES]; 2210 switch (actionImp) { 2211 case 1: /* process current seq1, init new seq1 */ 2212 processPropertySeq(levState, resProp, start1, i); 2213 start1 = i; 2214 break; 2215 case 2: /* init new seq2 */ 2216 start2 = i; 2217 break; 2218 case 3: /* process seq1, process seq2, init new seq1 */ 2219 processPropertySeq(levState, resProp, start1, start2); 2220 processPropertySeq(levState, _ON, start2, i); 2221 start1 = i; 2222 break; 2223 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 2224 processPropertySeq(levState, resProp, start1, start2); 2225 start1 = start2; 2226 start2 = i; 2227 break; 2228 default: /* we should never get here */ 2229 throw new IllegalStateException("Internal ICU error in resolveImplicitLevels"); 2230 } 2231 } 2232 } 2233 /* flush possible pending sequence, e.g. ON */ 2234 processPropertySeq(levState, eor, limit, limit); 2235 } 2236 2237 /* perform (L1) and (X9) ---------------------------------------------------- */ 2238 2239 /* 2240 * Reset the embedding levels for some non-graphic characters (L1). 2241 * This method also sets appropriate levels for BN, and 2242 * explicit embedding types that are supposed to have been removed 2243 * from the paragraph in (X9). 2244 */ 2245 private void adjustWSLevels() { 2246 int i; 2247 2248 if ((flags & MASK_WS) != 0) { 2249 int flag; 2250 i = trailingWSStart; 2251 while (i > 0) { 2252 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 2253 while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) { 2254 if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2255 levels[i] = 0; 2256 } else { 2257 levels[i] = GetParaLevelAt(i); 2258 } 2259 } 2260 2261 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 2262 /* here, i+1 is guaranteed to be <length */ 2263 while (i > 0) { 2264 flag = DirPropFlagNC(dirProps[--i]); 2265 if ((flag & MASK_BN_EXPLICIT) != 0) { 2266 levels[i] = levels[i + 1]; 2267 } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 2268 levels[i] = 0; 2269 break; 2270 } else if ((flag & MASK_B_S) != 0){ 2271 levels[i] = GetParaLevelAt(i); 2272 break; 2273 } 2274 } 2275 } 2276 } 2277 } 2278 2279 private int Bidi_Min(int x, int y) { 2280 return x < y ? x : y; 2281 } 2282 2283 private int Bidi_Abs(int x) { 2284 return x >= 0 ? x : -x; 2285 } 2286 2287 /** 2288 * Perform the Unicode Bidi algorithm. It is defined in the 2289 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2290 * version 13, 2291 * also described in The Unicode Standard, Version 4.0 .<p> 2292 * 2293 * This method takes a piece of plain text containing one or more paragraphs, 2294 * with or without externally specified embedding levels from <i>styled</i> 2295 * text and computes the left-right-directionality of each character.<p> 2296 * 2297 * If the entire text is all of the same directionality, then 2298 * the method may not perform all the steps described by the algorithm, 2299 * i.e., some levels may not be the same as if all steps were performed. 2300 * This is not relevant for unidirectional text.<br> 2301 * For example, in pure LTR text with numbers the numbers would get 2302 * a resolved level of 2 higher than the surrounding text according to 2303 * the algorithm. This implementation may set all resolved levels to 2304 * the same value in such a case.<p> 2305 * 2306 * The text can be composed of multiple paragraphs. Occurrence of a block 2307 * separator in the text terminates a paragraph, and whatever comes next starts 2308 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2309 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2310 * in that case, the pair of characters is considered as terminating the 2311 * preceding paragraph, and a new paragraph will be started by a character 2312 * coming after the LF. 2313 * 2314 * Although the text is passed here as a <code>String</code>, it is 2315 * stored internally as an array of characters. Therefore the 2316 * documentation will refer to indexes of the characters in the text. 2317 * 2318 * @param text contains the text that the Bidi algorithm will be performed 2319 * on. This text can be retrieved with <code>getText()</code> or 2320 * <code>getTextAsString</code>.<br> 2321 * 2322 * @param paraLevel specifies the default level for the text; 2323 * it is typically 0 (LTR) or 1 (RTL). 2324 * If the method shall determine the paragraph level from the text, 2325 * then <code>paraLevel</code> can be set to 2326 * either <code>LEVEL_DEFAULT_LTR</code> 2327 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2328 * paragraphs, the paragraph level shall be determined separately for 2329 * each paragraph; if a paragraph does not include any strongly typed 2330 * character, then the desired default is used (0 for LTR or 1 for RTL). 2331 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2332 * is also valid, with odd levels indicating RTL. 2333 * 2334 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 2335 * ignoring characters like LRE and PDF in the text. 2336 * A level overrides the directional property of its corresponding 2337 * (same index) character if the level has the 2338 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2339 * Except for that bit, it must be 2340 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 2341 * with one exception: a level of zero may be specified for a 2342 * paragraph separator even if <code>paraLevel>0</code> when multiple 2343 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2344 * <strong>Caution: </strong>A reference to this array, not a copy 2345 * of the levels, will be stored in the <code>Bidi</code> object; 2346 * the <code>embeddingLevels</code> 2347 * should not be modified to avoid unexpected results on subsequent 2348 * Bidi operations. However, the <code>setPara()</code> and 2349 * <code>setLine()</code> methods may modify some or all of the 2350 * levels.<br><br> 2351 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2352 * have one entry for each character in <code>text</code>. 2353 * 2354 * @throws IllegalArgumentException if the values in embeddingLevels are 2355 * not within the allowed range 2356 * 2357 * @see #LEVEL_DEFAULT_LTR 2358 * @see #LEVEL_DEFAULT_RTL 2359 * @see #LEVEL_OVERRIDE 2360 * @see #MAX_EXPLICIT_LEVEL 2361 * @stable ICU 3.8 2362 */ 2363 void setPara(String text, byte paraLevel, byte[] embeddingLevels) 2364 { 2365 if (text == null) { 2366 setPara(new char[0], paraLevel, embeddingLevels); 2367 } else { 2368 setPara(text.toCharArray(), paraLevel, embeddingLevels); 2369 } 2370 } 2371 2372 /** 2373 * Perform the Unicode Bidi algorithm. It is defined in the 2374 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2375 * version 13, 2376 * also described in The Unicode Standard, Version 4.0 .<p> 2377 * 2378 * This method takes a piece of plain text containing one or more paragraphs, 2379 * with or without externally specified embedding levels from <i>styled</i> 2380 * text and computes the left-right-directionality of each character.<p> 2381 * 2382 * If the entire text is all of the same directionality, then 2383 * the method may not perform all the steps described by the algorithm, 2384 * i.e., some levels may not be the same as if all steps were performed. 2385 * This is not relevant for unidirectional text.<br> 2386 * For example, in pure LTR text with numbers the numbers would get 2387 * a resolved level of 2 higher than the surrounding text according to 2388 * the algorithm. This implementation may set all resolved levels to 2389 * the same value in such a case.<p> 2390 * 2391 * The text can be composed of multiple paragraphs. Occurrence of a block 2392 * separator in the text terminates a paragraph, and whatever comes next starts 2393 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 2394 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 2395 * in that case, the pair of characters is considered as terminating the 2396 * preceding paragraph, and a new paragraph will be started by a character 2397 * coming after the LF. 2398 * 2399 * The text is stored internally as an array of characters. Therefore the 2400 * documentation will refer to indexes of the characters in the text. 2401 * 2402 * @param chars contains the text that the Bidi algorithm will be performed 2403 * on. This text can be retrieved with <code>getText()</code> or 2404 * <code>getTextAsString</code>.<br> 2405 * 2406 * @param paraLevel specifies the default level for the text; 2407 * it is typically 0 (LTR) or 1 (RTL). 2408 * If the method shall determine the paragraph level from the text, 2409 * then <code>paraLevel</code> can be set to 2410 * either <code>LEVEL_DEFAULT_LTR</code> 2411 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 2412 * paragraphs, the paragraph level shall be determined separately for 2413 * each paragraph; if a paragraph does not include any strongly typed 2414 * character, then the desired default is used (0 for LTR or 1 for RTL). 2415 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 2416 * is also valid, with odd levels indicating RTL. 2417 * 2418 * @param embeddingLevels (in) may be used to preset the embedding and 2419 * override levels, ignoring characters like LRE and PDF in the text. 2420 * A level overrides the directional property of its corresponding 2421 * (same index) character if the level has the 2422 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 2423 * Except for that bit, it must be 2424 * {@code paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL}, 2425 * with one exception: a level of zero may be specified for a 2426 * paragraph separator even if {@code paraLevel > 0} when multiple 2427 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 2428 * <strong>Caution: </strong>A reference to this array, not a copy 2429 * of the levels, will be stored in the <code>Bidi</code> object; 2430 * the <code>embeddingLevels</code> 2431 * should not be modified to avoid unexpected results on subsequent 2432 * Bidi operations. However, the <code>setPara()</code> and 2433 * <code>setLine()</code> methods may modify some or all of the 2434 * levels.<br><br> 2435 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 2436 * have one entry for each character in <code>text</code>. 2437 * 2438 * @throws IllegalArgumentException if the values in embeddingLevels are 2439 * not within the allowed range 2440 * 2441 * @see #LEVEL_DEFAULT_LTR 2442 * @see #LEVEL_DEFAULT_RTL 2443 * @see #LEVEL_OVERRIDE 2444 * @see #MAX_EXPLICIT_LEVEL 2445 * @stable ICU 3.8 2446 */ 2447 public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels) 2448 { 2449 /* check the argument values */ 2450 if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) { 2451 verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1); 2452 } 2453 if (chars == null) { 2454 chars = new char[0]; 2455 } 2456 2457 /* initialize the Bidi object */ 2458 this.paraBidi = null; /* mark unfinished setPara */ 2459 this.text = chars; 2460 this.length = this.originalLength = this.resultLength = text.length; 2461 this.paraLevel = paraLevel; 2462 this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2463 this.paraCount = 1; 2464 2465 /* Allocate zero-length arrays instead of setting to null here; then 2466 * checks for null in various places can be eliminated. 2467 */ 2468 dirProps = new byte[0]; 2469 levels = new byte[0]; 2470 runs = new BidiRun[0]; 2471 isGoodLogicalToVisualRunsMap = false; 2472 insertPoints.size = 0; /* clean up from last call */ 2473 insertPoints.confirmed = 0; /* clean up from last call */ 2474 2475 /* 2476 * Save the original paraLevel if contextual; otherwise, set to 0. 2477 */ 2478 if (IsDefaultLevel(paraLevel)) { 2479 defaultParaLevel = paraLevel; 2480 } else { 2481 defaultParaLevel = 0; 2482 } 2483 2484 if (length == 0) { 2485 /* 2486 * For an empty paragraph, create a Bidi object with the paraLevel and 2487 * the flags and the direction set but without allocating zero-length arrays. 2488 * There is nothing more to do. 2489 */ 2490 if (IsDefaultLevel(paraLevel)) { 2491 this.paraLevel &= 1; 2492 defaultParaLevel = 0; 2493 } 2494 if ((this.paraLevel & 1) != 0) { 2495 flags = DirPropFlag(R); 2496 direction = Bidi.DIRECTION_RIGHT_TO_LEFT; 2497 } else { 2498 flags = DirPropFlag(L); 2499 direction = Bidi.DIRECTION_LEFT_TO_RIGHT; 2500 } 2501 2502 runCount = 0; 2503 paraCount = 0; 2504 paraBidi = this; /* mark successful setPara */ 2505 return; 2506 } 2507 2508 runCount = -1; 2509 2510 /* 2511 * Get the directional properties, 2512 * the flags bit-set, and 2513 * determine the paragraph level if necessary. 2514 */ 2515 getDirPropsMemory(length); 2516 dirProps = dirPropsMemory; 2517 getDirProps(); 2518 2519 /* the processed length may have changed if OPTION_STREAMING is set */ 2520 trailingWSStart = length; /* the levels[] will reflect the WS run */ 2521 2522 /* allocate paras memory */ 2523 if (paraCount > 1) { 2524 getInitialParasMemory(paraCount); 2525 paras = parasMemory; 2526 paras[paraCount - 1] = length; 2527 } else { 2528 /* initialize paras for single paragraph */ 2529 paras = simpleParas; 2530 simpleParas[0] = length; 2531 } 2532 2533 /* are explicit levels specified? */ 2534 if (embeddingLevels == null) { 2535 /* no: determine explicit levels according to the (Xn) rules */ 2536 getLevelsMemory(length); 2537 levels = levelsMemory; 2538 direction = resolveExplicitLevels(); 2539 } else { 2540 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */ 2541 levels = embeddingLevels; 2542 direction = checkExplicitLevels(); 2543 } 2544 2545 /* 2546 * The steps after (X9) in the Bidi algorithm are performed only if 2547 * the paragraph text has mixed directionality! 2548 */ 2549 switch (direction) { 2550 case Bidi.DIRECTION_LEFT_TO_RIGHT: 2551 /* make sure paraLevel is even */ 2552 paraLevel = (byte)((paraLevel + 1) & ~1); 2553 2554 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2555 trailingWSStart = 0; 2556 break; 2557 case Bidi.DIRECTION_RIGHT_TO_LEFT: 2558 /* make sure paraLevel is odd */ 2559 paraLevel |= 1; 2560 2561 /* all levels are implicitly at paraLevel (important for getLevels()) */ 2562 trailingWSStart = 0; 2563 break; 2564 default: 2565 this.impTabPair = impTab_DEFAULT; 2566 2567 /* 2568 * If there are no external levels specified and there 2569 * are no significant explicit level codes in the text, 2570 * then we can treat the entire paragraph as one run. 2571 * Otherwise, we need to perform the following rules on runs of 2572 * the text with the same embedding levels. (X10) 2573 * "Significant" explicit level codes are ones that actually 2574 * affect non-BN characters. 2575 * Examples for "insignificant" ones are empty embeddings 2576 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 2577 */ 2578 if (embeddingLevels == null && paraCount <= 1 && 2579 (flags & DirPropFlagMultiRuns) == 0) { 2580 resolveImplicitLevels(0, length, 2581 GetLRFromLevel(GetParaLevelAt(0)), 2582 GetLRFromLevel(GetParaLevelAt(length - 1))); 2583 } else { 2584 /* sor, eor: start and end types of same-level-run */ 2585 int start, limit = 0; 2586 byte level, nextLevel; 2587 short sor, eor; 2588 2589 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 2590 level = GetParaLevelAt(0); 2591 nextLevel = levels[0]; 2592 if (level < nextLevel) { 2593 eor = GetLRFromLevel(nextLevel); 2594 } else { 2595 eor = GetLRFromLevel(level); 2596 } 2597 2598 do { 2599 /* determine start and limit of the run (end points just behind the run) */ 2600 2601 /* the values for this run's start are the same as for the previous run's end */ 2602 start = limit; 2603 level = nextLevel; 2604 if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) { 2605 /* except if this is a new paragraph, then set sor = para level */ 2606 sor = GetLRFromLevel(GetParaLevelAt(start)); 2607 } else { 2608 sor = eor; 2609 } 2610 2611 /* search for the limit of this run */ 2612 while (++limit < length && levels[limit] == level) {} 2613 2614 /* get the correct level of the next run */ 2615 if (limit < length) { 2616 nextLevel = levels[limit]; 2617 } else { 2618 nextLevel = GetParaLevelAt(length - 1); 2619 } 2620 2621 /* determine eor from max(level, nextLevel); sor is last run's eor */ 2622 if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) { 2623 eor = GetLRFromLevel(nextLevel); 2624 } else { 2625 eor = GetLRFromLevel(level); 2626 } 2627 2628 /* if the run consists of overridden directional types, then there 2629 are no implicit types to be resolved */ 2630 if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { 2631 resolveImplicitLevels(start, limit, sor, eor); 2632 } else { 2633 /* remove the LEVEL_OVERRIDE flags */ 2634 do { 2635 levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE; 2636 } while (start < limit); 2637 } 2638 } while (limit < length); 2639 } 2640 2641 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2642 adjustWSLevels(); 2643 2644 break; 2645 } 2646 2647 resultLength += insertPoints.size; 2648 paraBidi = this; /* mark successful setPara */ 2649 } 2650 2651 /** 2652 * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the 2653 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 2654 * version 13, 2655 * also described in The Unicode Standard, Version 4.0 .<p> 2656 * 2657 * This method takes a paragraph of text and computes the 2658 * left-right-directionality of each character. The text should not 2659 * contain any Unicode block separators.<p> 2660 * 2661 * The RUN_DIRECTION attribute in the text, if present, determines the base 2662 * direction (left-to-right or right-to-left). If not present, the base 2663 * direction is computed using the Unicode Bidirectional Algorithm, 2664 * defaulting to left-to-right if there are no strong directional characters 2665 * in the text. This attribute, if present, must be applied to all the text 2666 * in the paragraph.<p> 2667 * 2668 * The BIDI_EMBEDDING attribute in the text, if present, represents 2669 * embedding level information. Negative values from -1 to -62 indicate 2670 * overrides at the absolute value of the level. Positive values from 1 to 2671 * 62 indicate embeddings. Where values are zero or not defined, the base 2672 * embedding level as determined by the base direction is assumed.<p> 2673 * 2674 * The NUMERIC_SHAPING attribute in the text, if present, converts European 2675 * digits to other decimal digits before running the bidi algorithm. This 2676 * attribute, if present, must be applied to all the text in the paragraph. 2677 * 2678 * If the entire text is all of the same directionality, then 2679 * the method may not perform all the steps described by the algorithm, 2680 * i.e., some levels may not be the same as if all steps were performed. 2681 * This is not relevant for unidirectional text.<br> 2682 * For example, in pure LTR text with numbers the numbers would get 2683 * a resolved level of 2 higher than the surrounding text according to 2684 * the algorithm. This implementation may set all resolved levels to 2685 * the same value in such a case. 2686 * 2687 * @param paragraph a paragraph of text with optional character and 2688 * paragraph attribute information 2689 * @stable ICU 3.8 2690 */ 2691 public void setPara(AttributedCharacterIterator paragraph) 2692 { 2693 byte paraLvl; 2694 char ch = paragraph.first(); 2695 Boolean runDirection = 2696 (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION); 2697 Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING); 2698 if (runDirection == null) { 2699 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 2700 } else { 2701 paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ? 2702 (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT; 2703 } 2704 2705 byte[] lvls = null; 2706 int len = paragraph.getEndIndex() - paragraph.getBeginIndex(); 2707 byte[] embeddingLevels = new byte[len]; 2708 char[] txt = new char[len]; 2709 int i = 0; 2710 while (ch != AttributedCharacterIterator.DONE) { 2711 txt[i] = ch; 2712 Integer embedding = 2713 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING); 2714 if (embedding != null) { 2715 byte level = embedding.byteValue(); 2716 if (level == 0) { 2717 /* no-op */ 2718 } else if (level < 0) { 2719 lvls = embeddingLevels; 2720 embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE); 2721 } else { 2722 lvls = embeddingLevels; 2723 embeddingLevels[i] = level; 2724 } 2725 } 2726 ch = paragraph.next(); 2727 ++i; 2728 } 2729 2730 if (shaper != null) { 2731 NumericShapings.shape(shaper, txt, 0, len); 2732 } 2733 setPara(txt, paraLvl, lvls); 2734 } 2735 2736 /** 2737 * Specify whether block separators must be allocated level zero, 2738 * so that successive paragraphs will progress from left to right. 2739 * This method must be called before <code>setPara()</code>. 2740 * Paragraph separators (B) may appear in the text. Setting them to level zero 2741 * means that all paragraph separators (including one possibly appearing 2742 * in the last text position) are kept in the reordered text after the text 2743 * that they follow in the source text. 2744 * When this feature is not enabled, a paragraph separator at the last 2745 * position of the text before reordering will go to the first position 2746 * of the reordered text when the paragraph level is odd. 2747 * 2748 * @param ordarParaLTR specifies whether paragraph separators (B) must 2749 * receive level 0, so that successive paragraphs progress from left to right. 2750 * 2751 * @see #setPara 2752 * @stable ICU 3.8 2753 */ 2754 private void orderParagraphsLTR(boolean ordarParaLTR) { 2755 orderParagraphsLTR = ordarParaLTR; 2756 } 2757 2758 /** 2759 * Get the directionality of the text. 2760 * 2761 * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code> 2762 * that indicates if the entire text 2763 * represented by this object is unidirectional, 2764 * and which direction, or if it is mixed-directional. 2765 * 2766 * @throws IllegalStateException if this call is not preceded by a successful 2767 * call to <code>setPara</code> or <code>setLine</code> 2768 * 2769 * @see #LTR 2770 * @see #RTL 2771 * @see #MIXED 2772 * @stable ICU 3.8 2773 */ 2774 private byte getDirection() 2775 { 2776 verifyValidParaOrLine(); 2777 return direction; 2778 } 2779 2780 /** 2781 * Get the length of the text. 2782 * 2783 * @return The length of the text that the <code>Bidi</code> object was 2784 * created for. 2785 * 2786 * @throws IllegalStateException if this call is not preceded by a successful 2787 * call to <code>setPara</code> or <code>setLine</code> 2788 * @stable ICU 3.8 2789 */ 2790 public int getLength() 2791 { 2792 verifyValidParaOrLine(); 2793 return originalLength; 2794 } 2795 2796 /* paragraphs API methods ------------------------------------------------- */ 2797 2798 /** 2799 * Get the paragraph level of the text. 2800 * 2801 * @return The paragraph level. If there are multiple paragraphs, their 2802 * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or 2803 * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph 2804 * is returned. 2805 * 2806 * @throws IllegalStateException if this call is not preceded by a successful 2807 * call to <code>setPara</code> or <code>setLine</code> 2808 * 2809 * @see #LEVEL_DEFAULT_LTR 2810 * @see #LEVEL_DEFAULT_RTL 2811 * @see #getParagraph 2812 * @see #getParagraphByIndex 2813 * @stable ICU 3.8 2814 */ 2815 public byte getParaLevel() 2816 { 2817 verifyValidParaOrLine(); 2818 return paraLevel; 2819 } 2820 2821 /** 2822 * Get the index of a paragraph, given a position within the text. 2823 * 2824 * @param charIndex is the index of a character within the text, in the 2825 * range <code>[0..getProcessedLength()-1]</code>. 2826 * 2827 * @return The index of the paragraph containing the specified position, 2828 * starting from 0. 2829 * 2830 * @throws IllegalStateException if this call is not preceded by a successful 2831 * call to <code>setPara</code> or <code>setLine</code> 2832 * @throws IllegalArgumentException if charIndex is not within the legal range 2833 * 2834 * @see com.ibm.icu.text.BidiRun 2835 * @see #getProcessedLength 2836 * @stable ICU 3.8 2837 */ 2838 public int getParagraphIndex(int charIndex) 2839 { 2840 verifyValidParaOrLine(); 2841 BidiBase bidi = paraBidi; /* get Para object if Line object */ 2842 verifyRange(charIndex, 0, bidi.length); 2843 int paraIndex; 2844 for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) { 2845 } 2846 return paraIndex; 2847 } 2848 2849 /** 2850 * <code>setLine()</code> returns a <code>Bidi</code> object to 2851 * contain the reordering information, especially the resolved levels, 2852 * for all the characters in a line of text. This line of text is 2853 * specified by referring to a <code>Bidi</code> object representing 2854 * this information for a piece of text containing one or more paragraphs, 2855 * and by specifying a range of indexes in this text.<p> 2856 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 2857 * 2858 * This is used after calling <code>setPara()</code> 2859 * for a piece of text, and after line-breaking on that text. 2860 * It is not necessary if each paragraph is treated as a single line.<p> 2861 * 2862 * After line-breaking, rules (L1) and (L2) for the treatment of 2863 * trailing WS and for reordering are performed on 2864 * a <code>Bidi</code> object that represents a line.<p> 2865 * 2866 * <strong>Important: </strong>the line <code>Bidi</code> object may 2867 * reference data within the global text <code>Bidi</code> object. 2868 * You should not alter the content of the global text object until 2869 * you are finished using the line object. 2870 * 2871 * @param start is the line's first index into the text. 2872 * 2873 * @param limit is just behind the line's last index into the text 2874 * (its last index +1). 2875 * 2876 * @return a <code>Bidi</code> object that will now represent a line of the text. 2877 * 2878 * @throws IllegalStateException if this call is not preceded by a successful 2879 * call to <code>setPara</code> 2880 * @throws IllegalArgumentException if start and limit are not in the range 2881 * <code>0<=start<limit<=getProcessedLength()</code>, 2882 * or if the specified line crosses a paragraph boundary 2883 * 2884 * @see #setPara 2885 * @see #getProcessedLength 2886 * @stable ICU 3.8 2887 */ 2888 public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit) 2889 { 2890 verifyValidPara(); 2891 verifyRange(start, 0, limit); 2892 verifyRange(limit, 0, length+1); 2893 2894 return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit); 2895 } 2896 2897 /** 2898 * Get the level for one character. 2899 * 2900 * @param charIndex the index of a character. 2901 * 2902 * @return The level for the character at <code>charIndex</code>. 2903 * 2904 * @throws IllegalStateException if this call is not preceded by a successful 2905 * call to <code>setPara</code> or <code>setLine</code> 2906 * @throws IllegalArgumentException if charIndex is not in the range 2907 * <code>0<=charIndex<getProcessedLength()</code> 2908 * 2909 * @see #getProcessedLength 2910 * @stable ICU 3.8 2911 */ 2912 public byte getLevelAt(int charIndex) 2913 { 2914 if (charIndex < 0 || charIndex >= length) { 2915 return (byte)getBaseLevel(); 2916 } 2917 verifyValidParaOrLine(); 2918 verifyRange(charIndex, 0, length); 2919 return BidiLine.getLevelAt(this, charIndex); 2920 } 2921 2922 /** 2923 * Get an array of levels for each character.<p> 2924 * 2925 * Note that this method may allocate memory under some 2926 * circumstances, unlike <code>getLevelAt()</code>. 2927 * 2928 * @return The levels array for the text, 2929 * or <code>null</code> if an error occurs. 2930 * 2931 * @throws IllegalStateException if this call is not preceded by a successful 2932 * call to <code>setPara</code> or <code>setLine</code> 2933 * @stable ICU 3.8 2934 */ 2935 private byte[] getLevels() 2936 { 2937 verifyValidParaOrLine(); 2938 if (length <= 0) { 2939 return new byte[0]; 2940 } 2941 return BidiLine.getLevels(this); 2942 } 2943 2944 /** 2945 * Get the number of runs. 2946 * This method may invoke the actual reordering on the 2947 * <code>Bidi</code> object, after <code>setPara()</code> 2948 * may have resolved only the levels of the text. Therefore, 2949 * <code>countRuns()</code> may have to allocate memory, 2950 * and may throw an exception if it fails to do so. 2951 * 2952 * @return The number of runs. 2953 * 2954 * @throws IllegalStateException if this call is not preceded by a successful 2955 * call to <code>setPara</code> or <code>setLine</code> 2956 * @stable ICU 3.8 2957 */ 2958 public int countRuns() 2959 { 2960 verifyValidParaOrLine(); 2961 BidiLine.getRuns(this); 2962 return runCount; 2963 } 2964 2965 /** 2966 * Get a visual-to-logical index map (array) for the characters in the 2967 * <code>Bidi</code> (paragraph or line) object. 2968 * <p> 2969 * Some values in the map may be <code>MAP_NOWHERE</code> if the 2970 * corresponding text characters are Bidi marks inserted in the visual 2971 * output by the option <code>OPTION_INSERT_MARKS</code>. 2972 * <p> 2973 * When the visual output is altered by using options of 2974 * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>, 2975 * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>, 2976 * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 2977 * be correct. It is advised to use, when possible, reordering options 2978 * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}. 2979 * 2980 * @return an array of <code>getResultLength()</code> 2981 * indexes which will reflect the reordering of the characters.<br><br> 2982 * The index map will result in 2983 * <code>indexMap[visualIndex]==logicalIndex</code>, where 2984 * <code>indexMap</code> represents the returned array. 2985 * 2986 * @throws IllegalStateException if this call is not preceded by a successful 2987 * call to <code>setPara</code> or <code>setLine</code> 2988 * 2989 * @see #getLogicalMap 2990 * @see #getLogicalIndex 2991 * @see #getResultLength 2992 * @see #MAP_NOWHERE 2993 * @see #OPTION_INSERT_MARKS 2994 * @see #writeReordered 2995 * @stable ICU 3.8 2996 */ 2997 private int[] getVisualMap() 2998 { 2999 /* countRuns() checks successful call to setPara/setLine */ 3000 countRuns(); 3001 if (resultLength <= 0) { 3002 return new int[0]; 3003 } 3004 return BidiLine.getVisualMap(this); 3005 } 3006 3007 /** 3008 * This is a convenience method that does not use a <code>Bidi</code> object. 3009 * It is intended to be used for when an application has determined the levels 3010 * of objects (character sequences) and just needs to have them reordered (L2). 3011 * This is equivalent to using <code>getVisualMap()</code> on a 3012 * <code>Bidi</code> object. 3013 * 3014 * @param levels is an array of levels that have been determined by 3015 * the application. 3016 * 3017 * @return an array of <code>levels.length</code> 3018 * indexes which will reflect the reordering of the characters.<p> 3019 * The index map will result in 3020 * <code>indexMap[visualIndex]==logicalIndex</code>, where 3021 * <code>indexMap</code> represents the returned array. 3022 * 3023 * @stable ICU 3.8 3024 */ 3025 private static int[] reorderVisual(byte[] levels) 3026 { 3027 return BidiLine.reorderVisual(levels); 3028 } 3029 3030 /** 3031 * Constant indicating that the base direction depends on the first strong 3032 * directional character in the text according to the Unicode Bidirectional 3033 * Algorithm. If no strong directional character is present, the base 3034 * direction is left-to-right. 3035 * @stable ICU 3.8 3036 */ 3037 private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e; 3038 3039 /** 3040 * Constant indicating that the base direction depends on the first strong 3041 * directional character in the text according to the Unicode Bidirectional 3042 * Algorithm. If no strong directional character is present, the base 3043 * direction is right-to-left. 3044 * @stable ICU 3.8 3045 */ 3046 private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f; 3047 3048 /** 3049 * Create Bidi from the given text, embedding, and direction information. 3050 * The embeddings array may be null. If present, the values represent 3051 * embedding level information. Negative values from -1 to -61 indicate 3052 * overrides at the absolute value of the level. Positive values from 1 to 3053 * 61 indicate embeddings. Where values are zero, the base embedding level 3054 * as determined by the base direction is assumed.<p> 3055 * 3056 * Note: this constructor calls setPara() internally. 3057 * 3058 * @param text an array containing the paragraph of text to process. 3059 * @param textStart the index into the text array of the start of the 3060 * paragraph. 3061 * @param embeddings an array containing embedding values for each character 3062 * in the paragraph. This can be null, in which case it is assumed 3063 * that there is no external embedding information. 3064 * @param embStart the index into the embedding array of the start of the 3065 * paragraph. 3066 * @param paragraphLength the length of the paragraph in the text and 3067 * embeddings arrays. 3068 * @param flags a collection of flags that control the algorithm. The 3069 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, 3070 * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and 3071 * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved. 3072 * 3073 * @throws IllegalArgumentException if the values in embeddings are 3074 * not within the allowed range 3075 * 3076 * @see #DIRECTION_LEFT_TO_RIGHT 3077 * @see #DIRECTION_RIGHT_TO_LEFT 3078 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 3079 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 3080 * @stable ICU 3.8 3081 */ 3082 public BidiBase(char[] text, 3083 int textStart, 3084 byte[] embeddings, 3085 int embStart, 3086 int paragraphLength, 3087 int flags) 3088 { 3089 this(0, 0); 3090 byte paraLvl; 3091 switch (flags) { 3092 case Bidi.DIRECTION_LEFT_TO_RIGHT: 3093 default: 3094 paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT; 3095 break; 3096 case Bidi.DIRECTION_RIGHT_TO_LEFT: 3097 paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT; 3098 break; 3099 case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT: 3100 paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; 3101 break; 3102 case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT: 3103 paraLvl = INTERNAL_LEVEL_DEFAULT_RTL; 3104 break; 3105 } 3106 byte[] paraEmbeddings; 3107 if (embeddings == null) { 3108 paraEmbeddings = null; 3109 } else { 3110 paraEmbeddings = new byte[paragraphLength]; 3111 byte lev; 3112 for (int i = 0; i < paragraphLength; i++) { 3113 lev = embeddings[i + embStart]; 3114 if (lev < 0) { 3115 lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE); 3116 } else if (lev == 0) { 3117 lev = paraLvl; 3118 if (paraLvl > MAX_EXPLICIT_LEVEL) { 3119 lev &= 1; 3120 } 3121 } 3122 paraEmbeddings[i] = lev; 3123 } 3124 } 3125 if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { 3126 setPara(text, paraLvl, paraEmbeddings); 3127 } else { 3128 char[] paraText = new char[paragraphLength]; 3129 System.arraycopy(text, textStart, paraText, 0, paragraphLength); 3130 setPara(paraText, paraLvl, paraEmbeddings); 3131 } 3132 } 3133 3134 /** 3135 * Return true if the line is not left-to-right or right-to-left. This means 3136 * it either has mixed runs of left-to-right and right-to-left text, or the 3137 * base direction differs from the direction of the only run of text. 3138 * 3139 * @return true if the line is not left-to-right or right-to-left. 3140 * 3141 * @throws IllegalStateException if this call is not preceded by a successful 3142 * call to <code>setPara</code> 3143 * @stable ICU 3.8 3144 */ 3145 public boolean isMixed() 3146 { 3147 return (!isLeftToRight() && !isRightToLeft()); 3148 } 3149 3150 /** 3151 * Return true if the line is all left-to-right text and the base direction 3152 * is left-to-right. 3153 * 3154 * @return true if the line is all left-to-right text and the base direction 3155 * is left-to-right. 3156 * 3157 * @throws IllegalStateException if this call is not preceded by a successful 3158 * call to <code>setPara</code> 3159 * @stable ICU 3.8 3160 */ 3161 public boolean isLeftToRight() 3162 { 3163 return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0); 3164 } 3165 3166 /** 3167 * Return true if the line is all right-to-left text, and the base direction 3168 * is right-to-left 3169 * 3170 * @return true if the line is all right-to-left text, and the base 3171 * direction is right-to-left 3172 * 3173 * @throws IllegalStateException if this call is not preceded by a successful 3174 * call to <code>setPara</code> 3175 * @stable ICU 3.8 3176 */ 3177 public boolean isRightToLeft() 3178 { 3179 return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1); 3180 } 3181 3182 /** 3183 * Return true if the base direction is left-to-right 3184 * 3185 * @return true if the base direction is left-to-right 3186 * 3187 * @throws IllegalStateException if this call is not preceded by a successful 3188 * call to <code>setPara</code> or <code>setLine</code> 3189 * 3190 * @stable ICU 3.8 3191 */ 3192 public boolean baseIsLeftToRight() 3193 { 3194 return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT); 3195 } 3196 3197 /** 3198 * Return the base level (0 if left-to-right, 1 if right-to-left). 3199 * 3200 * @return the base level 3201 * 3202 * @throws IllegalStateException if this call is not preceded by a successful 3203 * call to <code>setPara</code> or <code>setLine</code> 3204 * 3205 * @stable ICU 3.8 3206 */ 3207 public int getBaseLevel() 3208 { 3209 return getParaLevel(); 3210 } 3211 3212 /** 3213 * Compute the logical to visual run mapping 3214 */ 3215 private void getLogicalToVisualRunsMap() 3216 { 3217 if (isGoodLogicalToVisualRunsMap) { 3218 return; 3219 } 3220 int count = countRuns(); 3221 if ((logicalToVisualRunsMap == null) || 3222 (logicalToVisualRunsMap.length < count)) { 3223 logicalToVisualRunsMap = new int[count]; 3224 } 3225 int i; 3226 long[] keys = new long[count]; 3227 for (i = 0; i < count; i++) { 3228 keys[i] = ((long)(runs[i].start)<<32) + i; 3229 } 3230 Arrays.sort(keys); 3231 for (i = 0; i < count; i++) { 3232 logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF); 3233 } 3234 keys = null; 3235 isGoodLogicalToVisualRunsMap = true; 3236 } 3237 3238 /** 3239 * Return the level of the nth logical run in this line. 3240 * 3241 * @param run the index of the run, between 0 and <code>countRuns()-1</code> 3242 * 3243 * @return the level of the run 3244 * 3245 * @throws IllegalStateException if this call is not preceded by a successful 3246 * call to <code>setPara</code> or <code>setLine</code> 3247 * @throws IllegalArgumentException if <code>run</code> is not in 3248 * the range <code>0<=run<countRuns()</code> 3249 * @stable ICU 3.8 3250 */ 3251 public int getRunLevel(int run) 3252 { 3253 verifyValidParaOrLine(); 3254 BidiLine.getRuns(this); 3255 if (run < 0 || run >= runCount) { 3256 return getParaLevel(); 3257 } 3258 getLogicalToVisualRunsMap(); 3259 return runs[logicalToVisualRunsMap[run]].level; 3260 } 3261 3262 /** 3263 * Return the index of the character at the start of the nth logical run in 3264 * this line, as an offset from the start of the line. 3265 * 3266 * @param run the index of the run, between 0 and <code>countRuns()</code> 3267 * 3268 * @return the start of the run 3269 * 3270 * @throws IllegalStateException if this call is not preceded by a successful 3271 * call to <code>setPara</code> or <code>setLine</code> 3272 * @throws IllegalArgumentException if <code>run</code> is not in 3273 * the range <code>0<=run<countRuns()</code> 3274 * @stable ICU 3.8 3275 */ 3276 public int getRunStart(int run) 3277 { 3278 verifyValidParaOrLine(); 3279 BidiLine.getRuns(this); 3280 if (runCount == 1) { 3281 return 0; 3282 } else if (run == runCount) { 3283 return length; 3284 } 3285 verifyIndex(run, 0, runCount); 3286 getLogicalToVisualRunsMap(); 3287 return runs[logicalToVisualRunsMap[run]].start; 3288 } 3289 3290 /** 3291 * Return the index of the character past the end of the nth logical run in 3292 * this line, as an offset from the start of the line. For example, this 3293 * will return the length of the line for the last run on the line. 3294 * 3295 * @param run the index of the run, between 0 and <code>countRuns()</code> 3296 * 3297 * @return the limit of the run 3298 * 3299 * @throws IllegalStateException if this call is not preceded by a successful 3300 * call to <code>setPara</code> or <code>setLine</code> 3301 * @throws IllegalArgumentException if <code>run</code> is not in 3302 * the range <code>0<=run<countRuns()</code> 3303 * @stable ICU 3.8 3304 */ 3305 public int getRunLimit(int run) 3306 { 3307 verifyValidParaOrLine(); 3308 BidiLine.getRuns(this); 3309 if (runCount == 1) { 3310 return length; 3311 } 3312 verifyIndex(run, 0, runCount); 3313 getLogicalToVisualRunsMap(); 3314 int idx = logicalToVisualRunsMap[run]; 3315 int len = idx == 0 ? runs[idx].limit : 3316 runs[idx].limit - runs[idx-1].limit; 3317 return runs[idx].start + len; 3318 } 3319 3320 /** 3321 * Return true if the specified text requires bidi analysis. If this returns 3322 * false, the text will display left-to-right. Clients can then avoid 3323 * constructing a Bidi object. Text in the Arabic Presentation Forms area of 3324 * Unicode is presumed to already be shaped and ordered for display, and so 3325 * will not cause this method to return true. 3326 * 3327 * @param text the text containing the characters to test 3328 * @param start the start of the range of characters to test 3329 * @param limit the limit of the range of characters to test 3330 * 3331 * @return true if the range of characters requires bidi analysis 3332 * 3333 * @stable ICU 3.8 3334 */ 3335 public static boolean requiresBidi(char[] text, 3336 int start, 3337 int limit) 3338 { 3339 final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT | 3340 1 << AL | 3341 1 << RLE | 3342 1 << RLO | 3343 1 << AN); 3344 3345 if (0 > start || start > limit || limit > text.length) { 3346 throw new IllegalArgumentException("Value start " + start + 3347 " is out of range 0 to " + limit); 3348 } 3349 for (int i = start; i < limit; ++i) { 3350 if (Character.isHighSurrogate(text[i]) && i < (limit-1) && 3351 Character.isLowSurrogate(text[i+1])) { 3352 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) { 3353 return true; 3354 } 3355 } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) { 3356 return true; 3357 } 3358 } 3359 return false; 3360 } 3361 3362 /** 3363 * Reorder the objects in the array into visual order based on their levels. 3364 * This is a utility method to use when you have a collection of objects 3365 * representing runs of text in logical order, each run containing text at a 3366 * single level. The elements at <code>index</code> from 3367 * <code>objectStart</code> up to <code>objectStart + count</code> in the 3368 * objects array will be reordered into visual order assuming 3369 * each run of text has the level indicated by the corresponding element in 3370 * the levels array (at <code>index - objectStart + levelStart</code>). 3371 * 3372 * @param levels an array representing the bidi level of each object 3373 * @param levelStart the start position in the levels array 3374 * @param objects the array of objects to be reordered into visual order 3375 * @param objectStart the start position in the objects array 3376 * @param count the number of objects to reorder 3377 * @stable ICU 3.8 3378 */ 3379 public static void reorderVisually(byte[] levels, 3380 int levelStart, 3381 Object[] objects, 3382 int objectStart, 3383 int count) 3384 { 3385 if (0 > levelStart || levels.length <= levelStart) { 3386 throw new IllegalArgumentException("Value levelStart " + 3387 levelStart + " is out of range 0 to " + 3388 (levels.length-1)); 3389 } 3390 if (0 > objectStart || objects.length <= objectStart) { 3391 throw new IllegalArgumentException("Value objectStart " + 3392 levelStart + " is out of range 0 to " + 3393 (objects.length-1)); 3394 } 3395 if (0 > count || objects.length < (objectStart+count)) { 3396 throw new IllegalArgumentException("Value count " + 3397 levelStart + " is out of range 0 to " + 3398 (objects.length - objectStart)); 3399 } 3400 byte[] reorderLevels = new byte[count]; 3401 System.arraycopy(levels, levelStart, reorderLevels, 0, count); 3402 int[] indexMap = reorderVisual(reorderLevels); 3403 Object[] temp = new Object[count]; 3404 System.arraycopy(objects, objectStart, temp, 0, count); 3405 for (int i = 0; i < count; ++i) { 3406 objects[objectStart + i] = temp[indexMap[i]]; 3407 } 3408 } 3409 3410 /** 3411 * Display the bidi internal state, used in debugging. 3412 */ 3413 public String toString() { 3414 StringBuilder buf = new StringBuilder(getClass().getName()); 3415 3416 buf.append("[dir: "); 3417 buf.append(direction); 3418 buf.append(" baselevel: "); 3419 buf.append(paraLevel); 3420 buf.append(" length: "); 3421 buf.append(length); 3422 buf.append(" runs: "); 3423 if (levels == null) { 3424 buf.append("none"); 3425 } else { 3426 buf.append('['); 3427 buf.append(levels[0]); 3428 for (int i = 1; i < levels.length; i++) { 3429 buf.append(' '); 3430 buf.append(levels[i]); 3431 } 3432 buf.append(']'); 3433 } 3434 buf.append(" text: [0x"); 3435 buf.append(Integer.toHexString(text[0])); 3436 for (int i = 1; i < text.length; i++) { 3437 buf.append(" 0x"); 3438 buf.append(Integer.toHexString(text[i])); 3439 } 3440 buf.append("]]"); 3441 3442 return buf.toString(); 3443 } 3444 3445 /** 3446 * A class that provides access to constants defined by 3447 * java.awt.font.TextAttribute without creating a static dependency. 3448 */ 3449 private static class TextAttributeConstants { 3450 // Make sure to load the AWT's TextAttribute class before using the constants, if any. 3451 static { 3452 try { 3453 Class.forName("java.awt.font.TextAttribute", true, null); 3454 } catch (ClassNotFoundException e) {} 3455 } 3456 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 3457 3458 /** 3459 * TextAttribute instances (or a fake Attribute type if 3460 * java.awt.font.TextAttribute is not present) 3461 */ 3462 static final AttributedCharacterIterator.Attribute RUN_DIRECTION = 3463 getTextAttribute("RUN_DIRECTION"); 3464 static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING = 3465 getTextAttribute("NUMERIC_SHAPING"); 3466 static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING = 3467 getTextAttribute("BIDI_EMBEDDING"); 3468 3469 /** 3470 * TextAttribute.RUN_DIRECTION_LTR 3471 */ 3472 static final Boolean RUN_DIRECTION_LTR = (jafa == null) ? 3473 Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR"); 3474 3475 @SuppressWarnings("serial") 3476 private static AttributedCharacterIterator.Attribute 3477 getTextAttribute(String name) 3478 { 3479 if (jafa == null) { 3480 // fake attribute 3481 return new AttributedCharacterIterator.Attribute(name) { }; 3482 } else { 3483 return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name); 3484 } 3485 } 3486 } 3487 3488 /** 3489 * A class that provides access to java.awt.font.NumericShaper without 3490 * creating a static dependency. 3491 */ 3492 private static class NumericShapings { 3493 // Make sure to load the AWT's NumericShaper class before calling shape, if any. 3494 static { 3495 try { 3496 Class.forName("java.awt.font.NumericShaper", true, null); 3497 } catch (ClassNotFoundException e) {} 3498 } 3499 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 3500 3501 /** 3502 * Invokes NumericShaping shape(text,start,count) method. 3503 */ 3504 static void shape(Object shaper, char[] text, int start, int count) { 3505 if (jafa != null) { 3506 jafa.shape(shaper, text, start, count); 3507 } 3508 } 3509 } 3510 }