1 /* 2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 2001-2014, International Business Machines 29 * Corporation and others. All Rights Reserved. 30 ******************************************************************************* 31 */ 32 33 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 34 * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre 35 * concept of RUNS_ONLY which is a double operation. 36 * It could be advantageous to divide this into 3 concepts: 37 * a) Operation: direct / inverse / RUNS_ONLY 38 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L 39 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 40 * This would allow combinations not possible today like RUNS_ONLY with 41 * NUMBERS_SPECIAL. 42 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 43 * REMOVE_CONTROLS for the inverse step. 44 * Not all combinations would be supported, and probably not all do make sense. 45 * This would need to document which ones are supported and what are the 46 * fallbacks for unsupported combinations. 47 */ 48 49 package sun.text.bidi; 50 51 import java.lang.reflect.Array; 52 import java.text.AttributedCharacterIterator; 53 import java.text.Bidi; 54 import java.util.Arrays; 55 import jdk.internal.misc.JavaAWTFontAccess; 56 import jdk.internal.misc.SharedSecrets; 57 import sun.text.normalizer.UBiDiProps; 58 import sun.text.normalizer.UCharacter; 59 import sun.text.normalizer.UTF16; 60 61 /** 62 * 63 * <h2>Bidi algorithm for ICU</h2> 64 * 65 * This is an implementation of the Unicode Bidirectional Algorithm. The 66 * algorithm is defined in the <a 67 * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>. 68 * <p> 69 * 70 * Note: Libraries that perform a bidirectional algorithm and reorder strings 71 * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and 72 * shaping (ArabicShaping) classes can be used at the core of such "Storage 73 * Layout Engines". 74 * 75 * <h3>General remarks about the API:</h3> 76 * 77 * The "limit" of a sequence of characters is the position just after 78 * their last character, i.e., one more than that position. 79 * <p> 80 * 81 * Some of the API methods provide access to "runs". Such a 82 * "run" is defined as a sequence of characters that are at the same 83 * embedding level after performing the Bidi algorithm. 84 * 85 * <h3>Basic concept: paragraph</h3> 86 * A piece of text can be divided into several paragraphs by characters 87 * with the Bidi class <code>Block Separator</code>. For handling of 88 * paragraphs, see: 89 * <ul> 90 * <li>{@link #countParagraphs} 91 * <li>{@link #getParaLevel} 92 * <li>{@link #getParagraph} 93 * <li>{@link #getParagraphByIndex} 94 * </ul> 95 * 96 * <h3>Basic concept: text direction</h3> 97 * The direction of a piece of text may be: 98 * <ul> 99 * <li>{@link #LTR} 100 * <li>{@link #RTL} 101 * <li>{@link #MIXED} 102 * <li>{@link #NEUTRAL} 103 * </ul> 104 * 105 * <h3>Basic concept: levels</h3> 106 * 107 * Levels in this API represent embedding levels according to the Unicode 108 * Bidirectional Algorithm. 109 * Their low-order bit (even/odd value) indicates the visual direction.<p> 110 * 111 * Levels can be abstract values when used for the 112 * <code>paraLevel</code> and <code>embeddingLevels</code> 113 * arguments of <code>setPara()</code>; there: 114 * <ul> 115 * <li>the high-order bit of an <code>embeddingLevels[]</code> 116 * value indicates whether the using application is 117 * specifying the level of a character to <i>override</i> whatever the 118 * Bidi implementation would resolve it to.</li> 119 * <li><code>paraLevel</code> can be set to the 120 * pseudo-level values <code>LEVEL_DEFAULT_LTR</code> 121 * and <code>LEVEL_DEFAULT_RTL</code>.</li> 122 * </ul> 123 * 124 * <p>The related constants are not real, valid level values. 125 * <code>DEFAULT_XXX</code> can be used to specify 126 * a default for the paragraph level for 127 * when the <code>setPara()</code> method 128 * shall determine it but there is no 129 * strongly typed character in the input.<p> 130 * 131 * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even 132 * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd, 133 * just like with normal LTR and RTL level values - 134 * these special values are designed that way. Also, the implementation 135 * assumes that MAX_EXPLICIT_LEVEL is odd. 136 * 137 * <p><b>See Also:</b> 138 * <ul> 139 * <li>{@link #LEVEL_DEFAULT_LTR} 140 * <li>{@link #LEVEL_DEFAULT_RTL} 141 * <li>{@link #LEVEL_OVERRIDE} 142 * <li>{@link #MAX_EXPLICIT_LEVEL} 143 * <li>{@link #setPara} 144 * </ul> 145 * 146 * <h3>Basic concept: Reordering Mode</h3> 147 * Reordering mode values indicate which variant of the Bidi algorithm to 148 * use. 149 * 150 * <p><b>See Also:</b> 151 * <ul> 152 * <li>{@link #setReorderingMode} 153 * <li>{@link #REORDER_DEFAULT} 154 * <li>{@link #REORDER_NUMBERS_SPECIAL} 155 * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R} 156 * <li>{@link #REORDER_RUNS_ONLY} 157 * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L} 158 * <li>{@link #REORDER_INVERSE_LIKE_DIRECT} 159 * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL} 160 * </ul> 161 * 162 * <h3>Basic concept: Reordering Options</h3> 163 * Reordering options can be applied during Bidi text transformations. 164 * 165 * <p><b>See Also:</b> 166 * <ul> 167 * <li>{@link #setReorderingOptions} 168 * <li>{@link #OPTION_DEFAULT} 169 * <li>{@link #OPTION_INSERT_MARKS} 170 * <li>{@link #OPTION_REMOVE_CONTROLS} 171 * <li>{@link #OPTION_STREAMING} 172 * </ul> 173 * 174 * 175 * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer) 176 * @stable ICU 3.8 177 * 178 * 179 * <h4> Sample code for the ICU Bidi API </h4> 180 * 181 * <h5>Rendering a paragraph with the ICU Bidi API</h5> 182 * 183 * This is (hypothetical) sample code that illustrates how the ICU Bidi API 184 * could be used to render a paragraph of text. Rendering code depends highly on 185 * the graphics system, therefore this sample code must make a lot of 186 * assumptions, which may or may not match any existing graphics system's 187 * properties. 188 * 189 * <p> 190 * The basic assumptions are: 191 * </p> 192 * <ul> 193 * <li>Rendering is done from left to right on a horizontal line.</li> 194 * <li>A run of single-style, unidirectional text can be rendered at once. 195 * </li> 196 * <li>Such a run of text is passed to the graphics system with characters 197 * (code units) in logical order.</li> 198 * <li>The line-breaking algorithm is very complicated and Locale-dependent - 199 * and therefore its implementation omitted from this sample code.</li> 200 * </ul> 201 * 202 * <pre>{@code 203 * 204 * package com.ibm.icu.dev.test.bidi; 205 * 206 * import com.ibm.icu.text.Bidi; 207 * import com.ibm.icu.text.BidiRun; 208 * 209 * public class Sample { 210 * 211 * static final int styleNormal = 0; 212 * static final int styleSelected = 1; 213 * static final int styleBold = 2; 214 * static final int styleItalics = 4; 215 * static final int styleSuper=8; 216 * static final int styleSub = 16; 217 * 218 * static class StyleRun { 219 * int limit; 220 * int style; 221 * 222 * public StyleRun(int limit, int style) { 223 * this.limit = limit; 224 * this.style = style; 225 * } 226 * } 227 * 228 * static class Bounds { 229 * int start; 230 * int limit; 231 * 232 * public Bounds(int start, int limit) { 233 * this.start = start; 234 * this.limit = limit; 235 * } 236 * } 237 * 238 * static int getTextWidth(String text, int start, int limit, 239 * StyleRun[] styleRuns, int styleRunCount) { 240 * // simplistic way to compute the width 241 * return limit - start; 242 * } 243 * 244 * // set limit and StyleRun limit for a line 245 * // from text[start] and from styleRuns[styleRunStart] 246 * // using Bidi.getLogicalRun(...) 247 * // returns line width 248 * static int getLineBreak(String text, Bounds line, Bidi para, 249 * StyleRun styleRuns[], Bounds styleRun) { 250 * // dummy return 251 * return 0; 252 * } 253 * 254 * // render runs on a line sequentially, always from left to right 255 * 256 * // prepare rendering a new line 257 * static void startLine(byte textDirection, int lineWidth) { 258 * System.out.println(); 259 * } 260 * 261 * // render a run of text and advance to the right by the run width 262 * // the text[start..limit-1] is always in logical order 263 * static void renderRun(String text, int start, int limit, 264 * byte textDirection, int style) { 265 * } 266 * 267 * // We could compute a cross-product 268 * // from the style runs with the directional runs 269 * // and then reorder it. 270 * // Instead, here we iterate over each run type 271 * // and render the intersections - 272 * // with shortcuts in simple (and common) cases. 273 * // renderParagraph() is the main function. 274 * 275 * // render a directional run with 276 * // (possibly) multiple style runs intersecting with it 277 * static void renderDirectionalRun(String text, int start, int limit, 278 * byte direction, StyleRun styleRuns[], 279 * int styleRunCount) { 280 * int i; 281 * 282 * // iterate over style runs 283 * if (direction == Bidi.LTR) { 284 * int styleLimit; 285 * for (i = 0; i < styleRunCount; ++i) { 286 * styleLimit = styleRuns[i].limit; 287 * if (start < styleLimit) { 288 * if (styleLimit > limit) { 289 * styleLimit = limit; 290 * } 291 * renderRun(text, start, styleLimit, 292 * direction, styleRuns[i].style); 293 * if (styleLimit == limit) { 294 * break; 295 * } 296 * start = styleLimit; 297 * } 298 * } 299 * } else { 300 * int styleStart; 301 * 302 * for (i = styleRunCount-1; i >= 0; --i) { 303 * if (i > 0) { 304 * styleStart = styleRuns[i-1].limit; 305 * } else { 306 * styleStart = 0; 307 * } 308 * if (limit >= styleStart) { 309 * if (styleStart < start) { 310 * styleStart = start; 311 * } 312 * renderRun(text, styleStart, limit, direction, 313 * styleRuns[i].style); 314 * if (styleStart == start) { 315 * break; 316 * } 317 * limit = styleStart; 318 * } 319 * } 320 * } 321 * } 322 * 323 * // the line object represents text[start..limit-1] 324 * static void renderLine(Bidi line, String text, int start, int limit, 325 * StyleRun styleRuns[], int styleRunCount) { 326 * byte direction = line.getDirection(); 327 * if (direction != Bidi.MIXED) { 328 * // unidirectional 329 * if (styleRunCount <= 1) { 330 * renderRun(text, start, limit, direction, styleRuns[0].style); 331 * } else { 332 * renderDirectionalRun(text, start, limit, direction, 333 * styleRuns, styleRunCount); 334 * } 335 * } else { 336 * // mixed-directional 337 * int count, i; 338 * BidiRun run; 339 * 340 * try { 341 * count = line.countRuns(); 342 * } catch (IllegalStateException e) { 343 * e.printStackTrace(); 344 * return; 345 * } 346 * if (styleRunCount <= 1) { 347 * int style = styleRuns[0].style; 348 * 349 * // iterate over directional runs 350 * for (i = 0; i < count; ++i) { 351 * run = line.getVisualRun(i); 352 * renderRun(text, run.getStart(), run.getLimit(), 353 * run.getDirection(), style); 354 * } 355 * } else { 356 * // iterate over both directional and style runs 357 * for (i = 0; i < count; ++i) { 358 * run = line.getVisualRun(i); 359 * renderDirectionalRun(text, run.getStart(), 360 * run.getLimit(), run.getDirection(), 361 * styleRuns, styleRunCount); 362 * } 363 * } 364 * } 365 * } 366 * 367 * static void renderParagraph(String text, byte textDirection, 368 * StyleRun styleRuns[], int styleRunCount, 369 * int lineWidth) { 370 * int length = text.length(); 371 * Bidi para = new Bidi(); 372 * try { 373 * para.setPara(text, 374 * textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL 375 * : Bidi.LEVEL_DEFAULT_LTR, 376 * null); 377 * } catch (Exception e) { 378 * e.printStackTrace(); 379 * return; 380 * } 381 * byte paraLevel = (byte)(1 & para.getParaLevel()); 382 * StyleRun styleRun = new StyleRun(length, styleNormal); 383 * 384 * if (styleRuns == null || styleRunCount <= 0) { 385 * styleRuns = new StyleRun[1]; 386 * styleRunCount = 1; 387 * styleRuns[0] = styleRun; 388 * } 389 * // assume styleRuns[styleRunCount-1].limit>=length 390 * 391 * int width = getTextWidth(text, 0, length, styleRuns, styleRunCount); 392 * if (width <= lineWidth) { 393 * // everything fits onto one line 394 * 395 * // prepare rendering a new line from either left or right 396 * startLine(paraLevel, width); 397 * 398 * renderLine(para, text, 0, length, styleRuns, styleRunCount); 399 * } else { 400 * // we need to render several lines 401 * Bidi line = new Bidi(length, 0); 402 * int start = 0, limit; 403 * int styleRunStart = 0, styleRunLimit; 404 * 405 * for (;;) { 406 * limit = length; 407 * styleRunLimit = styleRunCount; 408 * width = getLineBreak(text, new Bounds(start, limit), 409 * para, styleRuns, 410 * new Bounds(styleRunStart, styleRunLimit)); 411 * try { 412 * line = para.setLine(start, limit); 413 * } catch (Exception e) { 414 * e.printStackTrace(); 415 * return; 416 * } 417 * // prepare rendering a new line 418 * // from either left or right 419 * startLine(paraLevel, width); 420 * 421 * if (styleRunStart > 0) { 422 * int newRunCount = styleRuns.length - styleRunStart; 423 * StyleRun[] newRuns = new StyleRun[newRunCount]; 424 * System.arraycopy(styleRuns, styleRunStart, newRuns, 0, 425 * newRunCount); 426 * renderLine(line, text, start, limit, newRuns, 427 * styleRunLimit - styleRunStart); 428 * } else { 429 * renderLine(line, text, start, limit, styleRuns, 430 * styleRunLimit - styleRunStart); 431 * } 432 * if (limit == length) { 433 * break; 434 * } 435 * start = limit; 436 * styleRunStart = styleRunLimit - 1; 437 * if (start >= styleRuns[styleRunStart].limit) { 438 * ++styleRunStart; 439 * } 440 * } 441 * } 442 * } 443 * 444 * public static void main(String[] args) 445 * { 446 * renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80); 447 * renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60); 448 * } 449 * } 450 * 451 * }</pre> 452 */ 453 454 /* 455 * General implementation notes: 456 * 457 * Throughout the implementation, there are comments like (W2) that refer to 458 * rules of the BiDi algorithm, in this example to the second rule of the 459 * resolution of weak types. 460 * 461 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 462 * character according to UTF-16, the second UChar gets the directional property of 463 * the entire character assigned, while the first one gets a BN, a boundary 464 * neutral, type, which is ignored by most of the algorithm according to 465 * rule (X9) and the implementation suggestions of the BiDi algorithm. 466 * 467 * Later, adjustWSLevels() will set the level for each BN to that of the 468 * following character (UChar), which results in surrogate pairs getting the 469 * same level on each of their surrogates. 470 * 471 * In a UTF-8 implementation, the same thing could be done: the last byte of 472 * a multi-byte sequence would get the "real" property, while all previous 473 * bytes of that sequence would get BN. 474 * 475 * It is not possible to assign all those parts of a character the same real 476 * property because this would fail in the resolution of weak types with rules 477 * that look at immediately surrounding types. 478 * 479 * As a related topic, this implementation does not remove Boundary Neutral 480 * types from the input, but ignores them wherever this is relevant. 481 * For example, the loop for the resolution of the weak types reads 482 * types until it finds a non-BN. 483 * Also, explicit embedding codes are neither changed into BN nor removed. 484 * They are only treated the same way real BNs are. 485 * As stated before, adjustWSLevels() takes care of them at the end. 486 * For the purpose of conformance, the levels of all these codes 487 * do not matter. 488 * 489 * Note that this implementation modifies the dirProps 490 * after the initial setup, when applying X5c (replace FSI by LRI or RLI), 491 * X6, N0 (replace paired brackets by L or R). 492 * 493 * In this implementation, the resolution of weak types (W1 to W6), 494 * neutrals (N1 and N2), and the assignment of the resolved level (In) 495 * are all done in one single loop, in resolveImplicitLevels(). 496 * Changes of dirProp values are done on the fly, without writing 497 * them back to the dirProps array. 498 * 499 * 500 * This implementation contains code that allows to bypass steps of the 501 * algorithm that are not needed on the specific paragraph 502 * in order to speed up the most common cases considerably, 503 * like text that is entirely LTR, or RTL text without numbers. 504 * 505 * Most of this is done by setting a bit for each directional property 506 * in a flags variable and later checking for whether there are 507 * any LTR characters or any RTL characters, or both, whether 508 * there are any explicit embedding codes, etc. 509 * 510 * If the (Xn) steps are performed, then the flags are re-evaluated, 511 * because they will then not contain the embedding codes any more 512 * and will be adjusted for override codes, so that subsequently 513 * more bypassing may be possible than what the initial flags suggested. 514 * 515 * If the text is not mixed-directional, then the 516 * algorithm steps for the weak type resolution are not performed, 517 * and all levels are set to the paragraph level. 518 * 519 * If there are no explicit embedding codes, then the (Xn) steps 520 * are not performed. 521 * 522 * If embedding levels are supplied as a parameter, then all 523 * explicit embedding codes are ignored, and the (Xn) steps 524 * are not performed. 525 * 526 * White Space types could get the level of the run they belong to, 527 * and are checked with a test of (flags&MASK_EMBEDDING) to 528 * consider if the paragraph direction should be considered in 529 * the flags variable. 530 * 531 * If there are no White Space types in the paragraph, then 532 * (L1) is not necessary in adjustWSLevels(). 533 */ 534 535 public class BidiBase { 536 537 static class Point { 538 int pos; /* position in text */ 539 int flag; /* flag for LRM/RLM, before/after */ 540 } 541 542 static class InsertPoints { 543 int size; 544 int confirmed; 545 Point[] points = new Point[0]; 546 } 547 548 static class Opening { 549 int position; /* position of opening bracket */ 550 int match; /* matching char or -position of closing bracket */ 551 int contextPos; /* position of last strong char found before opening */ 552 short flags; /* bits for L or R/AL found within the pair */ 553 byte contextDir; /* L or R according to last strong char before opening */ 554 } 555 556 static class IsoRun { 557 int contextPos; /* position of char determining context */ 558 short start; /* index of first opening entry for this run */ 559 short limit; /* index after last opening entry for this run */ 560 byte level; /* level of this run */ 561 byte lastStrong; /* bidi class of last strong char found in this run */ 562 byte lastBase; /* bidi class of last base char found in this run */ 563 byte contextDir; /* L or R to use as context for following openings */ 564 } 565 566 static class BracketData { 567 Opening[] openings = new Opening[SIMPLE_PARAS_COUNT]; 568 int isoRunLast; /* index of last used entry */ 569 /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL 570 + 1 for index 0, + 1 for before the first isolated sequence */ 571 IsoRun[] isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2]; 572 boolean isNumbersSpecial; /*reordering mode for NUMBERS_SPECIAL */ 573 } 574 575 static class Isolate { 576 int startON; 577 int start1; 578 short stateImp; 579 short state; 580 } 581 582 /** Paragraph level setting<p> 583 * 584 * Constant indicating that the base direction depends on the first strong 585 * directional character in the text according to the Unicode Bidirectional 586 * Algorithm. If no strong directional character is present, 587 * then set the paragraph level to 0 (left-to-right).<p> 588 * 589 * If this value is used in conjunction with reordering modes 590 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 591 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 592 * is assumed to be visual LTR, and the text after reordering is required 593 * to be the corresponding logical string with appropriate contextual 594 * direction. The direction of the result string will be RTL if either 595 * the rightmost or leftmost strong character of the source text is RTL 596 * or Arabic Letter, the direction will be LTR otherwise.<p> 597 * 598 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 599 * be added at the beginning of the result string to ensure round trip 600 * (that the result string, when reordered back to visual, will produce 601 * the original source text). 602 * @see #REORDER_INVERSE_LIKE_DIRECT 603 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 604 * @stable ICU 3.8 605 */ 606 public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e; 607 608 /** Paragraph level setting<p> 609 * 610 * Constant indicating that the base direction depends on the first strong 611 * directional character in the text according to the Unicode Bidirectional 612 * Algorithm. If no strong directional character is present, 613 * then set the paragraph level to 1 (right-to-left).<p> 614 * 615 * If this value is used in conjunction with reordering modes 616 * <code>REORDER_INVERSE_LIKE_DIRECT</code> or 617 * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder 618 * is assumed to be visual LTR, and the text after reordering is required 619 * to be the corresponding logical string with appropriate contextual 620 * direction. The direction of the result string will be RTL if either 621 * the rightmost or leftmost strong character of the source text is RTL 622 * or Arabic Letter, or if the text contains no strong character; 623 * the direction will be LTR otherwise.<p> 624 * 625 * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may 626 * be added at the beginning of the result string to ensure round trip 627 * (that the result string, when reordered back to visual, will produce 628 * the original source text). 629 * @see #REORDER_INVERSE_LIKE_DIRECT 630 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 631 * @stable ICU 3.8 632 */ 633 public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f; 634 635 /** 636 * Maximum explicit embedding level. 637 * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>). 638 * @stable ICU 3.8 639 */ 640 public static final byte MAX_EXPLICIT_LEVEL = 125; 641 642 /** 643 * Bit flag for level input. 644 * Overrides directional properties. 645 * @stable ICU 3.8 646 */ 647 public static final byte LEVEL_OVERRIDE = (byte)0x80; 648 649 /** 650 * Special value which can be returned by the mapping methods when a 651 * logical index has no corresponding visual index or vice-versa. This may 652 * happen for the logical-to-visual mapping of a Bidi control when option 653 * <code>OPTION_REMOVE_CONTROLS</code> is 654 * specified. This can also happen for the visual-to-logical mapping of a 655 * Bidi mark (LRM or RLM) inserted by option 656 * <code>OPTION_INSERT_MARKS</code>. 657 * @see #getVisualIndex 658 * @see #getVisualMap 659 * @see #getLogicalIndex 660 * @see #getLogicalMap 661 * @see #OPTION_INSERT_MARKS 662 * @see #OPTION_REMOVE_CONTROLS 663 * @stable ICU 3.8 664 */ 665 public static final int MAP_NOWHERE = -1; 666 667 /** 668 * Left-to-right text. 669 * <ul> 670 * <li>As return value for <code>getDirection()</code>, it means 671 * that the source string contains no right-to-left characters, or 672 * that the source string is empty and the paragraph level is even. 673 * <li>As return value for <code>getBaseDirection()</code>, it 674 * means that the first strong character of the source string has 675 * a left-to-right direction. 676 * </ul> 677 * @stable ICU 3.8 678 */ 679 public static final byte LTR = 0; 680 681 /** 682 * Right-to-left text. 683 * <ul> 684 * <li>As return value for <code>getDirection()</code>, it means 685 * that the source string contains no left-to-right characters, or 686 * that the source string is empty and the paragraph level is odd. 687 * <li>As return value for <code>getBaseDirection()</code>, it 688 * means that the first strong character of the source string has 689 * a right-to-left direction. 690 * </ul> 691 * @stable ICU 3.8 692 */ 693 public static final byte RTL = 1; 694 695 /** 696 * Mixed-directional text. 697 * <p>As return value for <code>getDirection()</code>, it means 698 * that the source string contains both left-to-right and 699 * right-to-left characters. 700 * @stable ICU 3.8 701 */ 702 public static final byte MIXED = 2; 703 704 /** 705 * option bit for writeReordered(): 706 * keep combining characters after their base characters in RTL runs 707 * 708 * @see #writeReordered 709 * @stable ICU 3.8 710 */ 711 public static final short KEEP_BASE_COMBINING = 1; 712 713 /** 714 * option bit for writeReordered(): 715 * replace characters with the "mirrored" property in RTL runs 716 * by their mirror-image mappings 717 * 718 * @see #writeReordered 719 * @stable ICU 3.8 720 */ 721 public static final short DO_MIRRORING = 2; 722 723 /** 724 * option bit for writeReordered(): 725 * surround the run with LRMs if necessary; 726 * this is part of the approximate "inverse Bidi" algorithm 727 * 728 * <p>This option does not imply corresponding adjustment of the index 729 * mappings.</p> 730 * 731 * @see #setInverse 732 * @see #writeReordered 733 * @stable ICU 3.8 734 */ 735 public static final short INSERT_LRM_FOR_NUMERIC = 4; 736 737 /** 738 * option bit for writeReordered(): 739 * remove Bidi control characters 740 * (this does not affect INSERT_LRM_FOR_NUMERIC) 741 * 742 * <p>This option does not imply corresponding adjustment of the index 743 * mappings.</p> 744 * 745 * @see #writeReordered 746 * @see #INSERT_LRM_FOR_NUMERIC 747 * @stable ICU 3.8 748 */ 749 public static final short REMOVE_BIDI_CONTROLS = 8; 750 751 /** 752 * option bit for writeReordered(): 753 * write the output in reverse order 754 * 755 * <p>This has the same effect as calling <code>writeReordered()</code> 756 * first without this option, and then calling 757 * <code>writeReverse()</code> without mirroring. 758 * Doing this in the same step is faster and avoids a temporary buffer. 759 * An example for using this option is output to a character terminal that 760 * is designed for RTL scripts and stores text in reverse order.</p> 761 * 762 * @see #writeReordered 763 * @stable ICU 3.8 764 */ 765 public static final short OUTPUT_REVERSE = 16; 766 767 /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode. 768 * @see #setReorderingMode 769 * @stable ICU 3.8 770 */ 771 private static final short REORDER_DEFAULT = 0; 772 773 /** Reordering mode: Logical to Visual algorithm which handles numbers in 774 * a way which mimicks the behavior of Windows XP. 775 * @see #setReorderingMode 776 * @stable ICU 3.8 777 */ 778 private static final short REORDER_NUMBERS_SPECIAL = 1; 779 780 /** Reordering mode: Logical to Visual algorithm grouping numbers with 781 * adjacent R characters (reversible algorithm). 782 * @see #setReorderingMode 783 * @stable ICU 3.8 784 */ 785 private static final short REORDER_GROUP_NUMBERS_WITH_R = 2; 786 787 /** Reordering mode: Reorder runs only to transform a Logical LTR string 788 * to the logical RTL string with the same display, or vice-versa.<br> 789 * If this mode is set together with option 790 * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source 791 * text may be removed and other controls may be added to produce the 792 * minimum combination which has the required display. 793 * @see #OPTION_INSERT_MARKS 794 * @see #setReorderingMode 795 * @stable ICU 3.8 796 */ 797 static final short REORDER_RUNS_ONLY = 3; 798 799 /** Reordering mode: Visual to Logical algorithm which handles numbers 800 * like L (same algorithm as selected by <code>setInverse(true)</code>. 801 * @see #setInverse 802 * @see #setReorderingMode 803 * @stable ICU 3.8 804 */ 805 static final short REORDER_INVERSE_NUMBERS_AS_L = 4; 806 807 /** Reordering mode: Visual to Logical algorithm equivalent to the regular 808 * Logical to Visual algorithm. 809 * @see #setReorderingMode 810 * @stable ICU 3.8 811 */ 812 static final short REORDER_INVERSE_LIKE_DIRECT = 5; 813 814 /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the 815 * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. 816 * @see #setReorderingMode 817 * @stable ICU 3.8 818 */ 819 static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6; 820 821 /* Reordering mode values must be ordered so that all the regular logical to 822 * visual modes come first, and all inverse Bidi modes come last. 823 */ 824 private static final short REORDER_LAST_LOGICAL_TO_VISUAL = 825 REORDER_NUMBERS_SPECIAL; 826 827 /** 828 * Option bit for <code>setReorderingOptions</code>: 829 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of 830 * a reordering to a Logical order 831 * 832 * <p>This option must be set or reset before calling 833 * <code>setPara</code>.</p> 834 * 835 * <p>This option is significant only with reordering modes which generate 836 * a result with Logical order, specifically.</p> 837 * <ul> 838 * <li><code>REORDER_RUNS_ONLY</code></li> 839 * <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li> 840 * <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li> 841 * <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> 842 * </ul> 843 * 844 * <p>If this option is set in conjunction with reordering mode 845 * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling 846 * <code>setInverse(true)</code>, it implies option 847 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 848 * <code>writeReordered()</code>.</p> 849 * 850 * <p>For other reordering modes, a minimum number of LRM or RLM characters 851 * will be added to the source text after reordering it so as to ensure 852 * round trip, i.e. when applying the inverse reordering mode on the 853 * resulting logical text with removal of Bidi marks 854 * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling 855 * <code>setPara()</code> or option 856 * <code>REMOVE_BIDI_CONTROLS</code> in 857 * <code>writeReordered</code>), the result will be identical to the 858 * source text in the first transformation. 859 * 860 * <p>This option will be ignored if specified together with option 861 * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option 862 * <code>REMOVE_BIDI_CONTROLS</code> in calls to method 863 * <code>writeReordered()</code> and it implies option 864 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 865 * <code>writeReordered()</code> if the reordering mode is 866 * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p> 867 * 868 * @see #setReorderingMode 869 * @see #setReorderingOptions 870 * @see #INSERT_LRM_FOR_NUMERIC 871 * @see #REMOVE_BIDI_CONTROLS 872 * @see #OPTION_REMOVE_CONTROLS 873 * @see #REORDER_RUNS_ONLY 874 * @see #REORDER_INVERSE_NUMBERS_AS_L 875 * @see #REORDER_INVERSE_LIKE_DIRECT 876 * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL 877 * @stable ICU 3.8 878 */ 879 static final int OPTION_INSERT_MARKS = 1; 880 881 /** 882 * Option bit for <code>setReorderingOptions</code>: 883 * remove Bidi control characters 884 * 885 * <p>This option must be set or reset before calling 886 * <code>setPara</code>.</p> 887 * 888 * <p>This option nullifies option 889 * <code>OPTION_INSERT_MARKS</code>. It inhibits option 890 * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method 891 * <code>writeReordered()</code> and it implies option 892 * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p> 893 * 894 * @see #setReorderingMode 895 * @see #setReorderingOptions 896 * @see #OPTION_INSERT_MARKS 897 * @see #INSERT_LRM_FOR_NUMERIC 898 * @see #REMOVE_BIDI_CONTROLS 899 * @stable ICU 3.8 900 */ 901 static final int OPTION_REMOVE_CONTROLS = 2; 902 903 /** 904 * Option bit for <code>setReorderingOptions</code>: 905 * process the output as part of a stream to be continued 906 * 907 * <p>This option must be set or reset before calling 908 * <code>setPara</code>.</p> 909 * 910 * <p>This option specifies that the caller is interested in processing 911 * large text object in parts. The results of the successive calls are 912 * expected to be concatenated by the caller. Only the call for the last 913 * part will have this option bit off.</p> 914 * 915 * <p>When this option bit is on, <code>setPara()</code> may process 916 * less than the full source text in order to truncate the text at a 917 * meaningful boundary. The caller should call 918 * <code>getProcessedLength()</code> immediately after calling 919 * <code>setPara()</code> in order to determine how much of the source 920 * text has been processed. Source text beyond that length should be 921 * resubmitted in following calls to <code>setPara</code>. The 922 * processed length may be less than the length of the source text if a 923 * character preceding the last character of the source text constitutes a 924 * reasonable boundary (like a block separator) for text to be continued.<br> 925 * If the last character of the source text constitutes a reasonable 926 * boundary, the whole text will be processed at once.<br> 927 * If nowhere in the source text there exists 928 * such a reasonable boundary, the processed length will be zero.<br> 929 * The caller should check for such an occurrence and do one of the following: 930 * <ul><li>submit a larger amount of text with a better chance to include 931 * a reasonable boundary.</li> 932 * <li>resubmit the same text after turning off option 933 * <code>OPTION_STREAMING</code>.</li></ul> 934 * In all cases, this option should be turned off before processing the last 935 * part of the text.</p> 936 * 937 * <p>When the <code>OPTION_STREAMING</code> option is used, it is 938 * recommended to call <code>orderParagraphsLTR(true)</code> before calling 939 * <code>setPara()</code> so that later paragraphs may be concatenated to 940 * previous paragraphs on the right. 941 * </p> 942 * 943 * @see #setReorderingMode 944 * @see #setReorderingOptions 945 * @see #getProcessedLength 946 * @stable ICU 3.8 947 */ 948 private static final int OPTION_STREAMING = 4; 949 950 /* 951 * Comparing the description of the Bidi algorithm with this implementation 952 * is easier with the same names for the Bidi types in the code as there. 953 * See UCharacterDirection 954 */ 955 /* private */ static final byte L = 0; 956 private static final byte R = 1; 957 private static final byte EN = 2; 958 private static final byte ES = 3; 959 private static final byte ET = 4; 960 private static final byte AN = 5; 961 private static final byte CS = 6; 962 static final byte B = 7; 963 private static final byte S = 8; 964 private static final byte WS = 9; 965 private static final byte ON = 10; 966 private static final byte LRE = 11; 967 private static final byte LRO = 12; 968 private static final byte AL = 13; 969 private static final byte RLE = 14; 970 private static final byte RLO = 15; 971 private static final byte PDF = 16; 972 private static final byte NSM = 17; 973 private static final byte BN = 18; 974 private static final byte FSI = 19; 975 private static final byte LRI = 20; 976 private static final byte RLI = 21; 977 private static final byte PDI = 22; 978 private static final byte ENL = PDI + 1; /* EN after W7 */ 979 private static final byte ENR = ENL + 1; /* EN not subject to W7 */ 980 981 // Number of directional types 982 private static final int CHAR_DIRECTION_COUNT = 23; 983 984 /** 985 * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). 986 * Used in UAX #9: Unicode Bidirectional Algorithm 987 * (http://www.unicode.org/reports/tr9/) 988 * Returns UCharacter.BidiPairedBracketType values. 989 * @stable ICU 52 990 */ 991 public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; 992 993 /** 994 * Bidi Paired Bracket Type constants. 995 * 996 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 997 * @stable ICU 52 998 */ 999 public static interface BidiPairedBracketType { 1000 /** 1001 * Not a paired bracket. 1002 * @stable ICU 52 1003 */ 1004 public static final int NONE = 0; 1005 /** 1006 * Open paired bracket. 1007 * @stable ICU 52 1008 */ 1009 public static final int OPEN = 1; 1010 /** 1011 * Close paired bracket. 1012 * @stable ICU 52 1013 */ 1014 public static final int CLOSE = 2; 1015 /** 1016 * @stable ICU 52 1017 */ 1018 public static final int COUNT = 3; 1019 } 1020 1021 /* number of paras entries allocated initially */ 1022 static final int SIMPLE_PARAS_COUNT = 10; 1023 1024 private static final char CR = '\r'; 1025 private static final char LF = '\n'; 1026 1027 static final int LRM_BEFORE = 1; 1028 static final int LRM_AFTER = 2; 1029 static final int RLM_BEFORE = 4; 1030 static final int RLM_AFTER = 8; 1031 1032 /* flags for Opening.flags */ 1033 static final byte FOUND_L = (byte)DirPropFlag(L); 1034 static final byte FOUND_R = (byte)DirPropFlag(R); 1035 1036 /* 1037 * The following bit is used for the directional isolate status. 1038 * Stack entries corresponding to isolate sequences are greater than ISOLATE. 1039 */ 1040 static final int ISOLATE = 0x0100; 1041 1042 /* 1043 * reference to parent paragraph object (reference to self if this object is 1044 * a paragraph object); set to null in a newly opened object; set to a 1045 * real value after a successful execution of setPara or setLine 1046 */ 1047 BidiBase paraBidi; 1048 1049 final UBiDiProps bdp; 1050 1051 /* character array representing the current text */ 1052 char[] text; 1053 1054 /* length of the current text */ 1055 int originalLength; 1056 1057 /* if the option OPTION_STREAMING is set, this is the length of 1058 * text actually processed by <code>setPara</code>, which may be shorter 1059 * than the original length. Otherwise, it is identical to the original 1060 * length. 1061 */ 1062 public int length; 1063 1064 /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi 1065 * marks are allowed to be inserted in one of the reordering modes, the 1066 * length of the result string may be different from the processed length. 1067 */ 1068 int resultLength; 1069 1070 /* indicators for whether memory may be allocated after construction */ 1071 boolean mayAllocateText; 1072 boolean mayAllocateRuns; 1073 1074 /* arrays with one value per text-character */ 1075 byte[] dirPropsMemory = new byte[1]; 1076 byte[] levelsMemory = new byte[1]; 1077 byte[] dirProps; 1078 byte[] levels; 1079 1080 /* are we performing an approximation of the "inverse Bidi" algorithm? */ 1081 boolean isInverse; 1082 1083 /* are we using the basic algorithm or its variation? */ 1084 int reorderingMode; 1085 1086 /* bitmask for reordering options */ 1087 int reorderingOptions; 1088 1089 /* must block separators receive level 0? */ 1090 boolean orderParagraphsLTR; 1091 1092 /* the paragraph level */ 1093 byte paraLevel; 1094 1095 /* original paraLevel when contextual */ 1096 /* must be one of DEFAULT_xxx or 0 if not contextual */ 1097 byte defaultParaLevel; 1098 1099 /* the following is set in setPara, used in processPropertySeq */ 1100 1101 ImpTabPair impTabPair; /* reference to levels state table pair */ 1102 1103 /* the overall paragraph or line directionality*/ 1104 byte direction; 1105 1106 /* flags is a bit set for which directional properties are in the text */ 1107 int flags; 1108 1109 /* lastArabicPos is index to the last AL in the text, -1 if none */ 1110 int lastArabicPos; 1111 1112 /* characters after trailingWSStart are WS and are */ 1113 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 1114 int trailingWSStart; 1115 1116 /* fields for paragraph handling, set in getDirProps() */ 1117 int paraCount; 1118 int[] paras_limit = new int[SIMPLE_PARAS_COUNT]; 1119 byte[] paras_level = new byte[SIMPLE_PARAS_COUNT]; 1120 1121 /* fields for line reordering */ 1122 int runCount; /* ==-1: runs not set up yet */ 1123 BidiRun[] runsMemory = new BidiRun[0]; 1124 BidiRun[] runs; 1125 1126 /* for non-mixed text, we only need a tiny array of runs (no allocation) */ 1127 BidiRun[] simpleRuns = {new BidiRun()}; 1128 1129 /* fields for managing isolate sequences */ 1130 Isolate[] isolates; 1131 1132 /* maximum or current nesting depth of isolate sequences */ 1133 /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal 1134 nesting encountered. 1135 Within resolveImplicitLevels(), this is the index of the current isolates 1136 stack entry. */ 1137 int isolateCount; 1138 1139 /* mapping of runs in logical order to visual order */ 1140 int[] logicalToVisualRunsMap; 1141 /* flag to indicate that the map has been updated */ 1142 boolean isGoodLogicalToVisualRunsMap; 1143 1144 /* for inverse Bidi with insertion of directional marks */ 1145 InsertPoints insertPoints = new InsertPoints(); 1146 1147 /* for option OPTION_REMOVE_CONTROLS */ 1148 int controlCount; 1149 1150 /* 1151 * Sometimes, bit values are more appropriate 1152 * to deal with directionality properties. 1153 * Abbreviations in these method names refer to names 1154 * used in the Bidi algorithm. 1155 */ 1156 static int DirPropFlag(byte dir) { 1157 return (1 << dir); 1158 } 1159 1160 boolean testDirPropFlagAt(int flag, int index) { 1161 return ((DirPropFlag(dirProps[index]) & flag) != 0); 1162 } 1163 1164 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31); 1165 1166 /* to avoid some conditional statements, use tiny constant arrays */ 1167 static final int[] DirPropFlagLR = { DirPropFlag(L), DirPropFlag(R) }; 1168 static final int[] DirPropFlagE = { DirPropFlag(LRE), DirPropFlag(RLE) }; 1169 static final int[] DirPropFlagO = { DirPropFlag(LRO), DirPropFlag(RLO) }; 1170 1171 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; } 1172 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; } 1173 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; } 1174 static final byte DirFromStrong(byte strong) { return strong == L ? L : R; } 1175 static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); } 1176 1177 /* are there any characters that are LTR or RTL? */ 1178 static final int MASK_LTR = 1179 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI); 1180 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI); 1181 1182 static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL); 1183 1184 /* explicit embedding codes */ 1185 private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF); 1186 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT; 1187 1188 /* explicit isolate codes */ 1189 private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI); 1190 1191 /* paragraph and segment separators */ 1192 private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S); 1193 1194 /* all types that are counted as White Space or Neutral in some steps */ 1195 static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO; 1196 1197 /* types that are neutrals or could becomes neutrals in (Wn) */ 1198 private static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS; 1199 1200 /* 1201 * These types may be changed to "e", 1202 * the embedding type (L or R) of the run, 1203 * in the Bidi algorithm (N2) 1204 */ 1205 private static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N; 1206 1207 /* 1208 * the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java 1209 */ 1210 private static byte GetLRFromLevel(byte level) 1211 { 1212 return (byte)(level & 1); 1213 } 1214 1215 private static boolean IsDefaultLevel(byte level) 1216 { 1217 return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR); 1218 } 1219 1220 static boolean IsBidiControlChar(int c) 1221 { 1222 /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or 1223 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 1224 return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)) 1225 || ((c >= 0x2066) && (c <= 0x2069))); 1226 } 1227 1228 void verifyValidPara() 1229 { 1230 if (!(this == this.paraBidi)) { 1231 throw new IllegalStateException(); 1232 } 1233 } 1234 1235 void verifyValidParaOrLine() 1236 { 1237 BidiBase para = this.paraBidi; 1238 /* verify Para */ 1239 if (this == para) { 1240 return; 1241 } 1242 /* verify Line */ 1243 if ((para == null) || (para != para.paraBidi)) { 1244 throw new IllegalStateException(); 1245 } 1246 } 1247 1248 void verifyRange(int index, int start, int limit) 1249 { 1250 if (index < start || index >= limit) { 1251 throw new IllegalArgumentException("Value " + index + 1252 " is out of range " + start + " to " + limit); 1253 } 1254 } 1255 1256 /** 1257 * Allocate a <code>Bidi</code> object with preallocated memory 1258 * for internal structures. 1259 * This method provides a <code>Bidi</code> object like the default constructor 1260 * but it also preallocates memory for internal structures 1261 * according to the sizings supplied by the caller.<p> 1262 * The preallocation can be limited to some of the internal memory 1263 * by setting some values to 0 here. That means that if, e.g., 1264 * <code>maxRunCount</code> cannot be reasonably predetermined and should not 1265 * be set to <code>maxLength</code> (the only failproof value) to avoid 1266 * wasting memory, then <code>maxRunCount</code> could be set to 0 here 1267 * and the internal structures that are associated with it will be allocated 1268 * on demand, just like with the default constructor. 1269 * 1270 * @param maxLength is the maximum text or line length that internal memory 1271 * will be preallocated for. An attempt to associate this object with a 1272 * longer text will fail, unless this value is 0, which leaves the allocation 1273 * up to the implementation. 1274 * 1275 * @param maxRunCount is the maximum anticipated number of same-level runs 1276 * that internal memory will be preallocated for. An attempt to access 1277 * visual runs on an object that was not preallocated for as many runs 1278 * as the text was actually resolved to will fail, 1279 * unless this value is 0, which leaves the allocation up to the implementation.<br><br> 1280 * The number of runs depends on the actual text and maybe anywhere between 1281 * 1 and <code>maxLength</code>. It is typically small. 1282 * 1283 * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0 1284 * @stable ICU 3.8 1285 */ 1286 public BidiBase(int maxLength, int maxRunCount) 1287 { 1288 /* check the argument values */ 1289 if (maxLength < 0 || maxRunCount < 0) { 1290 throw new IllegalArgumentException(); 1291 } 1292 1293 /* reset the object, all reference variables null, all flags false, 1294 all sizes 0. 1295 In fact, we don't need to do anything, since class members are 1296 initialized as zero when an instance is created. 1297 */ 1298 /* 1299 mayAllocateText = false; 1300 mayAllocateRuns = false; 1301 orderParagraphsLTR = false; 1302 paraCount = 0; 1303 runCount = 0; 1304 trailingWSStart = 0; 1305 flags = 0; 1306 paraLevel = 0; 1307 defaultParaLevel = 0; 1308 direction = 0; 1309 */ 1310 /* get Bidi properties */ 1311 bdp = UBiDiProps.INSTANCE; 1312 1313 /* allocate memory for arrays as requested */ 1314 if (maxLength > 0) { 1315 getInitialDirPropsMemory(maxLength); 1316 getInitialLevelsMemory(maxLength); 1317 } else { 1318 mayAllocateText = true; 1319 } 1320 1321 if (maxRunCount > 0) { 1322 // if maxRunCount == 1, use simpleRuns[] 1323 if (maxRunCount > 1) { 1324 getInitialRunsMemory(maxRunCount); 1325 } 1326 } else { 1327 mayAllocateRuns = true; 1328 } 1329 } 1330 1331 /* 1332 * We are allowed to allocate memory if object==null or 1333 * mayAllocate==true for each array that we need. 1334 * 1335 * Assume sizeNeeded>0. 1336 * If object != null, then assume size > 0. 1337 */ 1338 private Object getMemory(String label, Object array, Class<?> arrayClass, 1339 boolean mayAllocate, int sizeNeeded) 1340 { 1341 int len = Array.getLength(array); 1342 1343 /* we have at least enough memory and must not allocate */ 1344 if (sizeNeeded == len) { 1345 return array; 1346 } 1347 if (!mayAllocate) { 1348 /* we must not allocate */ 1349 if (sizeNeeded <= len) { 1350 return array; 1351 } 1352 throw new OutOfMemoryError("Failed to allocate memory for " 1353 + label); 1354 } 1355 /* we may try to grow or shrink */ 1356 /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid 1357 the allocation altogether and rely on this.length */ 1358 try { 1359 return Array.newInstance(arrayClass, sizeNeeded); 1360 } catch (Exception e) { 1361 throw new OutOfMemoryError("Failed to allocate memory for " 1362 + label); 1363 } 1364 } 1365 1366 /* helper methods for each allocated array */ 1367 private void getDirPropsMemory(boolean mayAllocate, int len) 1368 { 1369 Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len); 1370 dirPropsMemory = (byte[]) array; 1371 } 1372 1373 void getDirPropsMemory(int len) 1374 { 1375 getDirPropsMemory(mayAllocateText, len); 1376 } 1377 1378 private void getLevelsMemory(boolean mayAllocate, int len) 1379 { 1380 Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len); 1381 levelsMemory = (byte[]) array; 1382 } 1383 1384 void getLevelsMemory(int len) 1385 { 1386 getLevelsMemory(mayAllocateText, len); 1387 } 1388 1389 private void getRunsMemory(boolean mayAllocate, int len) 1390 { 1391 Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len); 1392 runsMemory = (BidiRun[]) array; 1393 } 1394 1395 void getRunsMemory(int len) 1396 { 1397 getRunsMemory(mayAllocateRuns, len); 1398 } 1399 1400 /* additional methods used by constructor - always allow allocation */ 1401 private void getInitialDirPropsMemory(int len) 1402 { 1403 getDirPropsMemory(true, len); 1404 } 1405 1406 private void getInitialLevelsMemory(int len) 1407 { 1408 getLevelsMemory(true, len); 1409 } 1410 1411 private void getInitialRunsMemory(int len) 1412 { 1413 getRunsMemory(true, len); 1414 } 1415 1416 /** 1417 * Is this <code>Bidi</code> object set to perform the inverse Bidi 1418 * algorithm? 1419 * <p>Note: calling this method after setting the reordering mode with 1420 * <code>setReorderingMode</code> will return <code>true</code> if the 1421 * reordering mode was set to 1422 * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code> 1423 * for all other values.</p> 1424 * 1425 * @return <code>true</code> if the <code>Bidi</code> object is set to 1426 * perform the inverse Bidi algorithm by handling numbers as L. 1427 * 1428 * @see #setInverse 1429 * @see #setReorderingMode 1430 * @see #REORDER_INVERSE_NUMBERS_AS_L 1431 * @stable ICU 3.8 1432 */ 1433 public boolean isInverse() { 1434 return isInverse; 1435 } 1436 1437 /* perform (P2)..(P3) ------------------------------------------------------- */ 1438 1439 /* 1440 * Check that there are enough entries in the arrays paras_limit and paras_level 1441 */ 1442 private void checkParaCount() { 1443 int[] saveLimits; 1444 byte[] saveLevels; 1445 int count = paraCount; 1446 if (count <= paras_level.length) 1447 return; 1448 int oldLength = paras_level.length; 1449 saveLimits = paras_limit; 1450 saveLevels = paras_level; 1451 try { 1452 paras_limit = new int[count * 2]; 1453 paras_level = new byte[count * 2]; 1454 } catch (Exception e) { 1455 throw new OutOfMemoryError("Failed to allocate memory for paras"); 1456 } 1457 System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength); 1458 System.arraycopy(saveLevels, 0, paras_level, 0, oldLength); 1459 } 1460 1461 /* 1462 * Get the directional properties for the text, calculate the flags bit-set, and 1463 * determine the paragraph level if necessary (in paras_level[i]). 1464 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. 1465 * When encountering an FSI, it is initially replaced with an LRI, which is the 1466 * default. Only if a strong R or AL is found within its scope will the LRI be 1467 * replaced by an RLI. 1468 */ 1469 static final int NOT_SEEKING_STRONG = 0; /* 0: not contextual paraLevel, not after FSI */ 1470 static final int SEEKING_STRONG_FOR_PARA = 1; /* 1: looking for first strong char in para */ 1471 static final int SEEKING_STRONG_FOR_FSI = 2; /* 2: looking for first strong after FSI */ 1472 static final int LOOKING_FOR_PDI = 3; /* 3: found strong after FSI, looking for PDI */ 1473 1474 private void getDirProps() 1475 { 1476 int i = 0, i0, i1; 1477 flags = 0; /* collect all directionalities in the text */ 1478 int uchar; 1479 byte dirProp; 1480 byte defaultParaLevel = 0; /* initialize to avoid compiler warnings */ 1481 boolean isDefaultLevel = IsDefaultLevel(paraLevel); 1482 /* for inverse Bidi, the default para level is set to RTL if there is a 1483 strong R or AL character at either end of the text */ 1484 boolean isDefaultLevelInverse=isDefaultLevel && 1485 (reorderingMode == REORDER_INVERSE_LIKE_DIRECT || 1486 reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 1487 lastArabicPos = -1; 1488 int controlCount = 0; 1489 boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0; 1490 1491 byte state; 1492 byte lastStrong = ON; /* for default level & inverse Bidi */ 1493 /* The following stacks are used to manage isolate sequences. Those 1494 sequences may be nested, but obviously never more deeply than the 1495 maximum explicit embedding level. 1496 lastStack is the index of the last used entry in the stack. A value of -1 1497 means that there is no open isolate sequence. 1498 lastStack is reset to -1 on paragraph boundaries. */ 1499 /* The following stack contains the position of the initiator of 1500 each open isolate sequence */ 1501 int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1]; 1502 /* The following stack contains the last known state before 1503 encountering the initiator of an isolate sequence */ 1504 byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1]; 1505 int stackLast=-1; 1506 1507 if ((reorderingOptions & OPTION_STREAMING) != 0) 1508 length = 0; 1509 defaultParaLevel = (byte)(paraLevel & 1); 1510 1511 if (isDefaultLevel) { 1512 paras_level[0] = defaultParaLevel; 1513 lastStrong = defaultParaLevel; 1514 state = SEEKING_STRONG_FOR_PARA; 1515 } else { 1516 paras_level[0] = paraLevel; 1517 state = NOT_SEEKING_STRONG; 1518 } 1519 /* count paragraphs and determine the paragraph level (P2..P3) */ 1520 /* 1521 * see comment on constant fields: 1522 * the LEVEL_DEFAULT_XXX values are designed so that 1523 * their low-order bit alone yields the intended default 1524 */ 1525 1526 for (i = 0; i < originalLength; /* i is incremented in the loop */) { 1527 i0 = i; /* index of first code unit */ 1528 uchar = UTF16.charAt(text, 0, originalLength, i); 1529 i += UTF16.getCharCount(uchar); 1530 i1 = i - 1; /* index of last code unit, gets the directional property */ 1531 1532 dirProp = (byte)getCustomizedClass(uchar); 1533 flags |= DirPropFlag(dirProp); 1534 dirProps[i1] = dirProp; 1535 if (i1 > i0) { /* set previous code units' properties to BN */ 1536 flags |= DirPropFlag(BN); 1537 do { 1538 dirProps[--i1] = BN; 1539 } while (i1 > i0); 1540 } 1541 if (removeBidiControls && IsBidiControlChar(uchar)) { 1542 controlCount++; 1543 } 1544 if (dirProp == L) { 1545 if (state == SEEKING_STRONG_FOR_PARA) { 1546 paras_level[paraCount - 1] = 0; 1547 state = NOT_SEEKING_STRONG; 1548 } 1549 else if (state == SEEKING_STRONG_FOR_FSI) { 1550 if (stackLast <= MAX_EXPLICIT_LEVEL) { 1551 /* no need for next statement, already set by default */ 1552 /* dirProps[isolateStartStack[stackLast]] = LRI; */ 1553 flags |= DirPropFlag(LRI); 1554 } 1555 state = LOOKING_FOR_PDI; 1556 } 1557 lastStrong = L; 1558 continue; 1559 } 1560 if (dirProp == R || dirProp == AL) { 1561 if (state == SEEKING_STRONG_FOR_PARA) { 1562 paras_level[paraCount - 1] = 1; 1563 state = NOT_SEEKING_STRONG; 1564 } 1565 else if (state == SEEKING_STRONG_FOR_FSI) { 1566 if (stackLast <= MAX_EXPLICIT_LEVEL) { 1567 dirProps[isolateStartStack[stackLast]] = RLI; 1568 flags |= DirPropFlag(RLI); 1569 } 1570 state = LOOKING_FOR_PDI; 1571 } 1572 lastStrong = R; 1573 if (dirProp == AL) 1574 lastArabicPos = i - 1; 1575 continue; 1576 } 1577 if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */ 1578 stackLast++; 1579 if (stackLast <= MAX_EXPLICIT_LEVEL) { 1580 isolateStartStack[stackLast] = i - 1; 1581 previousStateStack[stackLast] = state; 1582 } 1583 if (dirProp == FSI) { 1584 dirProps[i-1] = LRI; /* default if no strong char */ 1585 state = SEEKING_STRONG_FOR_FSI; 1586 } 1587 else 1588 state = LOOKING_FOR_PDI; 1589 continue; 1590 } 1591 if (dirProp == PDI) { 1592 if (state == SEEKING_STRONG_FOR_FSI) { 1593 if (stackLast <= MAX_EXPLICIT_LEVEL) { 1594 /* no need for next statement, already set by default */ 1595 /* dirProps[isolateStartStack[stackLast]] = LRI; */ 1596 flags |= DirPropFlag(LRI); 1597 } 1598 } 1599 if (stackLast >= 0) { 1600 if (stackLast <= MAX_EXPLICIT_LEVEL) 1601 state = previousStateStack[stackLast]; 1602 stackLast--; 1603 } 1604 continue; 1605 } 1606 if (dirProp == B) { 1607 if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */ 1608 continue; 1609 paras_limit[paraCount - 1] = i; 1610 if (isDefaultLevelInverse && lastStrong == R) 1611 paras_level[paraCount - 1] = 1; 1612 if ((reorderingOptions & OPTION_STREAMING) != 0) { 1613 /* When streaming, we only process whole paragraphs 1614 thus some updates are only done on paragraph boundaries */ 1615 length = i; /* i is index to next character */ 1616 this.controlCount = controlCount; 1617 } 1618 if (i < originalLength) { /* B not last char in text */ 1619 paraCount++; 1620 checkParaCount(); /* check that there is enough memory for a new para entry */ 1621 if (isDefaultLevel) { 1622 paras_level[paraCount - 1] = defaultParaLevel; 1623 state = SEEKING_STRONG_FOR_PARA; 1624 lastStrong = defaultParaLevel; 1625 } else { 1626 paras_level[paraCount - 1] = paraLevel; 1627 state = NOT_SEEKING_STRONG; 1628 } 1629 stackLast = -1; 1630 } 1631 continue; 1632 } 1633 } 1634 /* +Ignore still open isolate sequences with overflow */ 1635 if (stackLast > MAX_EXPLICIT_LEVEL) { 1636 stackLast = MAX_EXPLICIT_LEVEL; 1637 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ 1638 } 1639 /* Resolve direction of still unresolved open FSI sequences */ 1640 while (stackLast >= 0) { 1641 if (state == SEEKING_STRONG_FOR_FSI) { 1642 /* no need for next statement, already set by default */ 1643 /* dirProps[isolateStartStack[stackLast]] = LRI; */ 1644 flags |= DirPropFlag(LRI); 1645 break; 1646 } 1647 state = previousStateStack[stackLast]; 1648 stackLast--; 1649 } 1650 /* When streaming, ignore text after the last paragraph separator */ 1651 if ((reorderingOptions & OPTION_STREAMING) != 0) { 1652 if (length < originalLength) 1653 paraCount--; 1654 } else { 1655 paras_limit[paraCount - 1] = originalLength; 1656 this.controlCount = controlCount; 1657 } 1658 /* For inverse bidi, default para direction is RTL if there is 1659 a strong R or AL at either end of the paragraph */ 1660 if (isDefaultLevelInverse && lastStrong == R) { 1661 paras_level[paraCount - 1] = 1; 1662 } 1663 if (isDefaultLevel) { 1664 paraLevel = paras_level[0]; 1665 } 1666 /* The following is needed to resolve the text direction for default level 1667 paragraphs containing no strong character */ 1668 for (i = 0; i < paraCount; i++) 1669 flags |= DirPropFlagLR(paras_level[i]); 1670 1671 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 1672 flags |= DirPropFlag(L); 1673 } 1674 } 1675 1676 /* determine the paragraph level at position index */ 1677 byte GetParaLevelAt(int pindex) 1678 { 1679 if (defaultParaLevel == 0 || pindex < paras_limit[0]) 1680 return paraLevel; 1681 int i; 1682 for (i = 1; i < paraCount; i++) 1683 if (pindex < paras_limit[i]) 1684 break; 1685 if (i >= paraCount) 1686 i = paraCount - 1; 1687 return paras_level[i]; 1688 } 1689 1690 /* Functions for handling paired brackets ----------------------------------- */ 1691 1692 /* In the isoRuns array, the first entry is used for text outside of any 1693 isolate sequence. Higher entries are used for each more deeply nested 1694 isolate sequence. isoRunLast is the index of the last used entry. The 1695 openings array is used to note the data of opening brackets not yet 1696 matched by a closing bracket, or matched but still susceptible to change 1697 level. 1698 Each isoRun entry contains the index of the first and 1699 one-after-last openings entries for pending opening brackets it 1700 contains. The next openings entry to use is the one-after-last of the 1701 most deeply nested isoRun entry. 1702 isoRun entries also contain their current embedding level and the last 1703 encountered strong character, since these will be needed to resolve 1704 the level of paired brackets. */ 1705 1706 private void bracketInit(BracketData bd) { 1707 bd.isoRunLast = 0; 1708 bd.isoRuns[0] = new IsoRun(); 1709 bd.isoRuns[0].start = 0; 1710 bd.isoRuns[0].limit = 0; 1711 bd.isoRuns[0].level = GetParaLevelAt(0); 1712 bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1); 1713 bd.isoRuns[0].contextPos = 0; 1714 bd.openings = new Opening[SIMPLE_PARAS_COUNT]; 1715 bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL || 1716 reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL; 1717 } 1718 1719 /* paragraph boundary */ 1720 private void bracketProcessB(BracketData bd, byte level) { 1721 bd.isoRunLast = 0; 1722 bd.isoRuns[0].limit = 0; 1723 bd.isoRuns[0].level = level; 1724 bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1); 1725 bd.isoRuns[0].contextPos = 0; 1726 } 1727 1728 /* LRE, LRO, RLE, RLO, PDF */ 1729 private void bracketProcessBoundary(BracketData bd, int lastCcPos, 1730 byte contextLevel, byte embeddingLevel) { 1731 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1732 if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */ 1733 return; 1734 if (NoOverride(embeddingLevel) > NoOverride(contextLevel)) /* not a PDF */ 1735 contextLevel = embeddingLevel; 1736 pLastIsoRun.limit = pLastIsoRun.start; 1737 pLastIsoRun.level = embeddingLevel; 1738 pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1); 1739 pLastIsoRun.contextPos = lastCcPos; 1740 } 1741 1742 /* LRI or RLI */ 1743 private void bracketProcessLRI_RLI(BracketData bd, byte level) { 1744 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1745 short lastLimit; 1746 pLastIsoRun.lastBase = ON; 1747 lastLimit = pLastIsoRun.limit; 1748 bd.isoRunLast++; 1749 pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1750 if (pLastIsoRun == null) 1751 pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun(); 1752 pLastIsoRun.start = pLastIsoRun.limit = lastLimit; 1753 pLastIsoRun.level = level; 1754 pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1); 1755 pLastIsoRun.contextPos = 0; 1756 } 1757 1758 /* PDI */ 1759 private void bracketProcessPDI(BracketData bd) { 1760 IsoRun pLastIsoRun; 1761 bd.isoRunLast--; 1762 pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1763 pLastIsoRun.lastBase = ON; 1764 } 1765 1766 /* newly found opening bracket: create an openings entry */ 1767 private void bracketAddOpening(BracketData bd, char match, int position) { 1768 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1769 Opening pOpening; 1770 if (pLastIsoRun.limit >= bd.openings.length) { /* no available new entry */ 1771 Opening[] saveOpenings = bd.openings; 1772 int count; 1773 try { 1774 count = bd.openings.length; 1775 bd.openings = new Opening[count * 2]; 1776 } catch (Exception e) { 1777 throw new OutOfMemoryError("Failed to allocate memory for openings"); 1778 } 1779 System.arraycopy(saveOpenings, 0, bd.openings, 0, count); 1780 } 1781 pOpening = bd.openings[pLastIsoRun.limit]; 1782 if (pOpening == null) 1783 pOpening = bd.openings[pLastIsoRun.limit]= new Opening(); 1784 pOpening.position = position; 1785 pOpening.match = match; 1786 pOpening.contextDir = pLastIsoRun.contextDir; 1787 pOpening.contextPos = pLastIsoRun.contextPos; 1788 pOpening.flags = 0; 1789 pLastIsoRun.limit++; 1790 } 1791 1792 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ 1793 private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) { 1794 /* This function calls itself recursively */ 1795 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1796 Opening qOpening; 1797 int k, openingPosition, closingPosition; 1798 for (k = openingIndex+1; k < pLastIsoRun.limit; k++) { 1799 qOpening = bd.openings[k]; 1800 if (qOpening.match >= 0) /* not an N0c match */ 1801 continue; 1802 if (newPropPosition < qOpening.contextPos) 1803 break; 1804 if (newPropPosition >= qOpening.position) 1805 continue; 1806 if (newProp == qOpening.contextDir) 1807 break; 1808 openingPosition = qOpening.position; 1809 dirProps[openingPosition] = newProp; 1810 closingPosition = -(qOpening.match); 1811 dirProps[closingPosition] = newProp; 1812 qOpening.match = 0; /* prevent further changes */ 1813 fixN0c(bd, k, openingPosition, newProp); 1814 fixN0c(bd, k, closingPosition, newProp); 1815 } 1816 } 1817 1818 /* process closing bracket; return L or R if N0b or N0c, ON if N0d */ 1819 private byte bracketProcessClosing(BracketData bd, int openIdx, int position) { 1820 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1821 Opening pOpening, qOpening; 1822 byte direction; 1823 boolean stable; 1824 byte newProp; 1825 pOpening = bd.openings[openIdx]; 1826 direction = (byte)(pLastIsoRun.level & 1); 1827 stable = true; /* assume stable until proved otherwise */ 1828 1829 /* The stable flag is set when brackets are paired and their 1830 level is resolved and cannot be changed by what will be 1831 found later in the source string. 1832 An unstable match can occur only when applying N0c, where 1833 the resolved level depends on the preceding context, and 1834 this context may be affected by text occurring later. 1835 Example: RTL paragraph containing: abc[(latin) HEBREW] 1836 When the closing parenthesis is encountered, it appears 1837 that N0c1 must be applied since 'abc' sets an opposite 1838 direction context and both parentheses receive level 2. 1839 However, when the closing square bracket is processed, 1840 N0b applies because of 'HEBREW' being included within the 1841 brackets, thus the square brackets are treated like R and 1842 receive level 1. However, this changes the preceding 1843 context of the opening parenthesis, and it now appears 1844 that N0c2 must be applied to the parentheses rather than 1845 N0c1. */ 1846 1847 if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) || 1848 (direction == 1 && (pOpening.flags & FOUND_R) > 0)) { /* N0b */ 1849 newProp = direction; 1850 } 1851 else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) { /* N0c */ 1852 /* it is stable if there is no preceding text or in 1853 conditions too complicated and not worth checking */ 1854 stable = (openIdx == pLastIsoRun.start); 1855 if (direction != pOpening.contextDir) 1856 newProp = pOpening.contextDir; /* N0c1 */ 1857 else 1858 newProp = direction; /* N0c2 */ 1859 } else { 1860 /* forget this and any brackets nested within this pair */ 1861 pLastIsoRun.limit = (short)openIdx; 1862 return ON; /* N0d */ 1863 } 1864 dirProps[pOpening.position] = newProp; 1865 dirProps[position] = newProp; 1866 /* Update nested N0c pairs that may be affected */ 1867 fixN0c(bd, openIdx, pOpening.position, newProp); 1868 if (stable) { 1869 pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */ 1870 /* remove lower located synonyms if any */ 1871 while (pLastIsoRun.limit > pLastIsoRun.start && 1872 bd.openings[pLastIsoRun.limit - 1].position == pOpening.position) 1873 pLastIsoRun.limit--; 1874 } else { 1875 int k; 1876 pOpening.match = -position; 1877 /* neutralize lower located synonyms if any */ 1878 k = openIdx - 1; 1879 while (k >= pLastIsoRun.start && 1880 bd.openings[k].position == pOpening.position) 1881 bd.openings[k--].match = 0; 1882 /* neutralize any unmatched opening between the current pair; 1883 this will also neutralize higher located synonyms if any */ 1884 for (k = openIdx + 1; k < pLastIsoRun.limit; k++) { 1885 qOpening =bd.openings[k]; 1886 if (qOpening.position >= position) 1887 break; 1888 if (qOpening.match > 0) 1889 qOpening.match = 0; 1890 } 1891 } 1892 return newProp; 1893 } 1894 1895 /* handle strong characters, digits and candidates for closing brackets */ 1896 private void bracketProcessChar(BracketData bd, int position) { 1897 IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast]; 1898 byte dirProp, newProp; 1899 byte level; 1900 dirProp = dirProps[position]; 1901 if (dirProp == ON) { 1902 char c, match; 1903 int idx; 1904 /* First see if it is a matching closing bracket. Hopefully, this is 1905 more efficient than checking if it is a closing bracket at all */ 1906 c = text[position]; 1907 for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) { 1908 if (bd.openings[idx].match != c) 1909 continue; 1910 /* We have a match */ 1911 newProp = bracketProcessClosing(bd, idx, position); 1912 if(newProp == ON) { /* N0d */ 1913 c = 0; /* prevent handling as an opening */ 1914 break; 1915 } 1916 pLastIsoRun.lastBase = ON; 1917 pLastIsoRun.contextDir = newProp; 1918 pLastIsoRun.contextPos = position; 1919 level = levels[position]; 1920 if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */ 1921 short flag; 1922 int i; 1923 newProp = (byte)(level & 1); 1924 pLastIsoRun.lastStrong = newProp; 1925 flag = (short)DirPropFlag(newProp); 1926 for (i = pLastIsoRun.start; i < idx; i++) 1927 bd.openings[i].flags |= flag; 1928 /* matching brackets are not overridden by LRO/RLO */ 1929 levels[position] &= ~LEVEL_OVERRIDE; 1930 } 1931 /* matching brackets are not overridden by LRO/RLO */ 1932 levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE; 1933 return; 1934 } 1935 /* We get here only if the ON character is not a matching closing 1936 bracket or it is a case of N0d */ 1937 /* Now see if it is an opening bracket */ 1938 if (c != 0) { 1939 match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */ 1940 } else { 1941 match = 0; 1942 } 1943 if (match != c && /* has a matching char */ 1944 UCharacter.getIntPropertyValue(c, BIDI_PAIRED_BRACKET_TYPE) == 1945 /* opening bracket */ BidiPairedBracketType.OPEN) { 1946 /* special case: process synonyms 1947 create an opening entry for each synonym */ 1948 if (match == 0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ 1949 bracketAddOpening(bd, (char)0x3009, position); 1950 } 1951 else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */ 1952 bracketAddOpening(bd, (char)0x232A, position); 1953 } 1954 bracketAddOpening(bd, match, position); 1955 } 1956 } 1957 level = levels[position]; 1958 if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */ 1959 newProp = (byte)(level & 1); 1960 if (dirProp != S && dirProp != WS && dirProp != ON) 1961 dirProps[position] = newProp; 1962 pLastIsoRun.lastBase = newProp; 1963 pLastIsoRun.lastStrong = newProp; 1964 pLastIsoRun.contextDir = newProp; 1965 pLastIsoRun.contextPos = position; 1966 } 1967 else if (dirProp <= R || dirProp == AL) { 1968 newProp = DirFromStrong(dirProp); 1969 pLastIsoRun.lastBase = dirProp; 1970 pLastIsoRun.lastStrong = dirProp; 1971 pLastIsoRun.contextDir = newProp; 1972 pLastIsoRun.contextPos = position; 1973 } 1974 else if(dirProp == EN) { 1975 pLastIsoRun.lastBase = EN; 1976 if (pLastIsoRun.lastStrong == L) { 1977 newProp = L; /* W7 */ 1978 if (!bd.isNumbersSpecial) 1979 dirProps[position] = ENL; 1980 pLastIsoRun.contextDir = L; 1981 pLastIsoRun.contextPos = position; 1982 } 1983 else { 1984 newProp = R; /* N0 */ 1985 if (pLastIsoRun.lastStrong == AL) 1986 dirProps[position] = AN; /* W2 */ 1987 else 1988 dirProps[position] = ENR; 1989 pLastIsoRun.contextDir = R; 1990 pLastIsoRun.contextPos = position; 1991 } 1992 } 1993 else if (dirProp == AN) { 1994 newProp = R; /* N0 */ 1995 pLastIsoRun.lastBase = AN; 1996 pLastIsoRun.contextDir = R; 1997 pLastIsoRun.contextPos = position; 1998 } 1999 else if (dirProp == NSM) { 2000 /* if the last real char was ON, change NSM to ON so that it 2001 will stay ON even if the last real char is a bracket which 2002 may be changed to L or R */ 2003 newProp = pLastIsoRun.lastBase; 2004 if (newProp == ON) 2005 dirProps[position] = newProp; 2006 } 2007 else { 2008 newProp = dirProp; 2009 pLastIsoRun.lastBase = dirProp; 2010 } 2011 if (newProp <= R || newProp == AL) { 2012 int i; 2013 short flag = (short)DirPropFlag(DirFromStrong(newProp)); 2014 for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++) 2015 if (position > bd.openings[i].position) 2016 bd.openings[i].flags |= flag; 2017 } 2018 } 2019 2020 /* perform (X1)..(X9) ------------------------------------------------------- */ 2021 2022 /* determine if the text is mixed-directional or single-directional */ 2023 private byte directionFromFlags() { 2024 2025 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 2026 if (!((flags & MASK_RTL) != 0 || 2027 ((flags & DirPropFlag(AN)) != 0 && 2028 (flags & MASK_POSSIBLE_N) != 0))) { 2029 return LTR; 2030 } else if ((flags & MASK_LTR) == 0) { 2031 return RTL; 2032 } else { 2033 return MIXED; 2034 } 2035 } 2036 2037 /* 2038 * Resolve the explicit levels as specified by explicit embedding codes. 2039 * Recalculate the flags to have them reflect the real properties 2040 * after taking the explicit embeddings into account. 2041 * 2042 * The BiDi algorithm is designed to result in the same behavior whether embedding 2043 * levels are externally specified (from "styled text", supposedly the preferred 2044 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. 2045 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). 2046 * However, in a real implementation, the removal of these codes and their index 2047 * positions in the plain text is undesirable since it would result in 2048 * reallocated, reindexed text. 2049 * Instead, this implementation leaves the codes in there and just ignores them 2050 * in the subsequent processing. 2051 * In order to get the same reordering behavior, positions with a BN or a not-isolate 2052 * explicit embedding code just get the same level assigned as the last "real" 2053 * character. 2054 * 2055 * Some implementations, not this one, then overwrite some of these 2056 * directionality properties at "real" same-level-run boundaries by 2057 * L or R codes so that the resolution of weak types can be performed on the 2058 * entire paragraph at once instead of having to parse it once more and 2059 * perform that resolution on same-level-runs. 2060 * This limits the scope of the implicit rules in effectively 2061 * the same way as the run limits. 2062 * 2063 * Instead, this implementation does not modify these codes, except for 2064 * paired brackets whose properties (ON) may be replaced by L or R. 2065 * On one hand, the paragraph has to be scanned for same-level-runs, but 2066 * on the other hand, this saves another loop to reset these codes, 2067 * or saves making and modifying a copy of dirProps[]. 2068 * 2069 * 2070 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 2071 * 2072 * 2073 * Handling the stack of explicit levels (Xn): 2074 * 2075 * With the BiDi stack of explicit levels, as pushed with each 2076 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, 2077 * the explicit level must never exceed MAX_EXPLICIT_LEVEL. 2078 * 2079 * In order to have a correct push-pop semantics even in the case of overflows, 2080 * overflow counters and a valid isolate counter are used as described in UAX#9 2081 * section 3.3.2 "Explicit Levels and Directions". 2082 * 2083 * This implementation assumes that MAX_EXPLICIT_LEVEL is odd. 2084 * 2085 * Returns the direction 2086 * 2087 */ 2088 private byte resolveExplicitLevels() { 2089 int i = 0; 2090 byte dirProp; 2091 byte level = GetParaLevelAt(0); 2092 byte dirct; 2093 isolateCount = 0; 2094 2095 /* determine if the text is mixed-directional or single-directional */ 2096 dirct = directionFromFlags(); 2097 2098 /* we may not need to resolve any explicit levels */ 2099 if (dirct != MIXED) { 2100 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 2101 return dirct; 2102 } 2103 2104 if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) { 2105 /* inverse BiDi: mixed, but all characters are at the same embedding level */ 2106 /* set all levels to the paragraph level */ 2107 int paraIndex, start, limit; 2108 for (paraIndex = 0; paraIndex < paraCount; paraIndex++) { 2109 if (paraIndex == 0) 2110 start = 0; 2111 else 2112 start = paras_limit[paraIndex - 1]; 2113 limit = paras_limit[paraIndex]; 2114 level = paras_level[paraIndex]; 2115 for (i = start; i < limit; i++) 2116 levels[i] =level; 2117 } 2118 return dirct; /* no bracket matching for inverse BiDi */ 2119 } 2120 if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) { 2121 /* no embeddings, set all levels to the paragraph level */ 2122 /* we still have to perform bracket matching */ 2123 int paraIndex, start, limit; 2124 BracketData bracketData = new BracketData(); 2125 bracketInit(bracketData); 2126 for (paraIndex = 0; paraIndex < paraCount; paraIndex++) { 2127 if (paraIndex == 0) 2128 start = 0; 2129 else 2130 start = paras_limit[paraIndex-1]; 2131 limit = paras_limit[paraIndex]; 2132 level = paras_level[paraIndex]; 2133 for (i = start; i < limit; i++) { 2134 levels[i] = level; 2135 dirProp = dirProps[i]; 2136 if (dirProp == BN) 2137 continue; 2138 if (dirProp == B) { 2139 if ((i + 1) < length) { 2140 if (text[i] == CR && text[i + 1] == LF) 2141 continue; /* skip CR when followed by LF */ 2142 bracketProcessB(bracketData, level); 2143 } 2144 continue; 2145 } 2146 bracketProcessChar(bracketData, i); 2147 } 2148 } 2149 return dirct; 2150 } 2151 /* continue to perform (Xn) */ 2152 2153 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 2154 /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */ 2155 byte embeddingLevel = level, newLevel; 2156 byte previousLevel = level; /* previous level for regular (not CC) characters */ 2157 int lastCcPos = 0; /* index of last effective LRx,RLx, PDx */ 2158 2159 /* The following stack remembers the embedding level and the ISOLATE flag of level runs. 2160 stackLast points to its current entry. */ 2161 short[] stack = new short[MAX_EXPLICIT_LEVEL + 2]; /* we never push anything >= MAX_EXPLICIT_LEVEL 2162 but we need one more entry as base */ 2163 int stackLast = 0; 2164 int overflowIsolateCount = 0; 2165 int overflowEmbeddingCount = 0; 2166 int validIsolateCount = 0; 2167 BracketData bracketData = new BracketData(); 2168 bracketInit(bracketData); 2169 stack[0] = level; /* initialize base entry to para level, no override, no isolate */ 2170 2171 /* recalculate the flags */ 2172 flags = 0; 2173 2174 for (i = 0; i < length; i++) { 2175 dirProp = dirProps[i]; 2176 switch (dirProp) { 2177 case LRE: 2178 case RLE: 2179 case LRO: 2180 case RLO: 2181 /* (X2, X3, X4, X5) */ 2182 flags |= DirPropFlag(BN); 2183 levels[i] = previousLevel; 2184 if (dirProp == LRE || dirProp == LRO) { 2185 /* least greater even level */ 2186 newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1)); 2187 } else { 2188 /* least greater odd level */ 2189 newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1); 2190 } 2191 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 && 2192 overflowEmbeddingCount == 0) { 2193 lastCcPos = i; 2194 embeddingLevel = newLevel; 2195 if (dirProp == LRO || dirProp == RLO) 2196 embeddingLevel |= LEVEL_OVERRIDE; 2197 stackLast++; 2198 stack[stackLast] = embeddingLevel; 2199 /* we don't need to set LEVEL_OVERRIDE off for LRE and RLE 2200 since this has already been done for newLevel which is 2201 the source for embeddingLevel. 2202 */ 2203 } else { 2204 if (overflowIsolateCount == 0) 2205 overflowEmbeddingCount++; 2206 } 2207 break; 2208 case PDF: 2209 /* (X7) */ 2210 flags |= DirPropFlag(BN); 2211 levels[i] = previousLevel; 2212 /* handle all the overflow cases first */ 2213 if (overflowIsolateCount > 0) { 2214 break; 2215 } 2216 if (overflowEmbeddingCount > 0) { 2217 overflowEmbeddingCount--; 2218 break; 2219 } 2220 if (stackLast > 0 && stack[stackLast] < ISOLATE) { /* not an isolate entry */ 2221 lastCcPos = i; 2222 stackLast--; 2223 embeddingLevel = (byte)stack[stackLast]; 2224 } 2225 break; 2226 case LRI: 2227 case RLI: 2228 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel); 2229 levels[i] = NoOverride(embeddingLevel); 2230 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) { 2231 bracketProcessBoundary(bracketData, lastCcPos, 2232 previousLevel, embeddingLevel); 2233 flags |= DirPropFlagMultiRuns; 2234 } 2235 previousLevel = embeddingLevel; 2236 /* (X5a, X5b) */ 2237 if (dirProp == LRI) 2238 /* least greater even level */ 2239 newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1)); 2240 else 2241 /* least greater odd level */ 2242 newLevel=(byte)((NoOverride(embeddingLevel)+1)|1); 2243 if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 2244 && overflowEmbeddingCount == 0) { 2245 flags |= DirPropFlag(dirProp); 2246 lastCcPos = i; 2247 validIsolateCount++; 2248 if (validIsolateCount > isolateCount) 2249 isolateCount = validIsolateCount; 2250 embeddingLevel = newLevel; 2251 /* we can increment stackLast without checking because newLevel 2252 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ 2253 stackLast++; 2254 stack[stackLast] = (short)(embeddingLevel + ISOLATE); 2255 bracketProcessLRI_RLI(bracketData, embeddingLevel); 2256 } else { 2257 /* make it WS so that it is handled by adjustWSLevels() */ 2258 dirProps[i] = WS; 2259 overflowIsolateCount++; 2260 } 2261 break; 2262 case PDI: 2263 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) { 2264 bracketProcessBoundary(bracketData, lastCcPos, 2265 previousLevel, embeddingLevel); 2266 flags |= DirPropFlagMultiRuns; 2267 } 2268 /* (X6a) */ 2269 if (overflowIsolateCount > 0) { 2270 overflowIsolateCount--; 2271 /* make it WS so that it is handled by adjustWSLevels() */ 2272 dirProps[i] = WS; 2273 } 2274 else if (validIsolateCount > 0) { 2275 flags |= DirPropFlag(PDI); 2276 lastCcPos = i; 2277 overflowEmbeddingCount = 0; 2278 while (stack[stackLast] < ISOLATE) /* pop embedding entries */ 2279 stackLast--; /* until the last isolate entry */ 2280 stackLast--; /* pop also the last isolate entry */ 2281 validIsolateCount--; 2282 bracketProcessPDI(bracketData); 2283 } else 2284 /* make it WS so that it is handled by adjustWSLevels() */ 2285 dirProps[i] = WS; 2286 embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE); 2287 flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel); 2288 previousLevel = embeddingLevel; 2289 levels[i] = NoOverride(embeddingLevel); 2290 break; 2291 case B: 2292 flags |= DirPropFlag(B); 2293 levels[i] = GetParaLevelAt(i); 2294 if ((i + 1) < length) { 2295 if (text[i] == CR && text[i + 1] == LF) 2296 break; /* skip CR when followed by LF */ 2297 overflowEmbeddingCount = overflowIsolateCount = 0; 2298 validIsolateCount = 0; 2299 stackLast = 0; 2300 previousLevel = embeddingLevel = GetParaLevelAt(i + 1); 2301 stack[0] = embeddingLevel; /* initialize base entry to para level, no override, no isolate */ 2302 bracketProcessB(bracketData, embeddingLevel); 2303 } 2304 break; 2305 case BN: 2306 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 2307 /* they will get their levels set correctly in adjustWSLevels() */ 2308 levels[i] = previousLevel; 2309 flags |= DirPropFlag(BN); 2310 break; 2311 default: 2312 /* all other types are normal characters and get the "real" level */ 2313 if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) { 2314 bracketProcessBoundary(bracketData, lastCcPos, 2315 previousLevel, embeddingLevel); 2316 flags |= DirPropFlagMultiRuns; 2317 if ((embeddingLevel & LEVEL_OVERRIDE) != 0) 2318 flags |= DirPropFlagO(embeddingLevel); 2319 else 2320 flags |= DirPropFlagE(embeddingLevel); 2321 } 2322 previousLevel = embeddingLevel; 2323 levels[i] = embeddingLevel; 2324 bracketProcessChar(bracketData, i); 2325 /* the dirProp may have been changed in bracketProcessChar() */ 2326 flags |= DirPropFlag(dirProps[i]); 2327 break; 2328 } 2329 } 2330 if ((flags & MASK_EMBEDDING) != 0) { 2331 flags |= DirPropFlagLR(paraLevel); 2332 } 2333 if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { 2334 flags |= DirPropFlag(L); 2335 } 2336 /* again, determine if the text is mixed-directional or single-directional */ 2337 dirct = directionFromFlags(); 2338 2339 return dirct; 2340 } 2341 2342 /* 2343 * Use a pre-specified embedding levels array: 2344 * 2345 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 2346 * ignore all explicit codes (X9), 2347 * and check all the preset levels. 2348 * 2349 * Recalculate the flags to have them reflect the real properties 2350 * after taking the explicit embeddings into account. 2351 */ 2352 private byte checkExplicitLevels() { 2353 byte dirProp; 2354 int i; 2355 int isolateCount = 0; 2356 2357 this.flags = 0; /* collect all directionalities in the text */ 2358 byte level; 2359 this.isolateCount = 0; 2360 2361 for (i = 0; i < length; ++i) { 2362 if (levels[i] == 0) { 2363 levels[i] = paraLevel; 2364 } 2365 2366 // for backward compatibility 2367 if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) { 2368 if ((levels[i] & LEVEL_OVERRIDE) != 0) { 2369 levels[i] = (byte)(paraLevel|LEVEL_OVERRIDE); 2370 } else { 2371 levels[i] = paraLevel; 2372 } 2373 } 2374 2375 level = levels[i]; 2376 dirProp = dirProps[i]; 2377 if (dirProp == LRI || dirProp == RLI) { 2378 isolateCount++; 2379 if (isolateCount > this.isolateCount) 2380 this.isolateCount = isolateCount; 2381 } 2382 else if (dirProp == PDI) { 2383 isolateCount--; 2384 } else if (dirProp == B) { 2385 isolateCount = 0; 2386 } 2387 if ((level & LEVEL_OVERRIDE) != 0) { 2388 /* keep the override flag in levels[i] but adjust the flags */ 2389 level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */ 2390 flags |= DirPropFlagO(level); 2391 } else { 2392 /* set the flags */ 2393 flags |= DirPropFlagE(level) | DirPropFlag(dirProp); 2394 } 2395 if ((level < GetParaLevelAt(i) && 2396 !((0 == level) && (dirProp == B))) || 2397 (MAX_EXPLICIT_LEVEL < level)) { 2398 /* level out of bounds */ 2399 throw new IllegalArgumentException("level " + level + 2400 " out of bounds at " + i); 2401 } 2402 } 2403 if ((flags & MASK_EMBEDDING) != 0) { 2404 flags |= DirPropFlagLR(paraLevel); 2405 } 2406 /* determine if the text is mixed-directional or single-directional */ 2407 return directionFromFlags(); 2408 } 2409 2410 /*********************************************************************/ 2411 /* The Properties state machine table */ 2412 /*********************************************************************/ 2413 /* */ 2414 /* All table cells are 8 bits: */ 2415 /* bits 0..4: next state */ 2416 /* bits 5..7: action to perform (if > 0) */ 2417 /* */ 2418 /* Cells may be of format "n" where n represents the next state */ 2419 /* (except for the rightmost column). */ 2420 /* Cells may also be of format "_(x,y)" where x represents an action */ 2421 /* to perform and y represents the next state. */ 2422 /* */ 2423 /*********************************************************************/ 2424 /* Definitions and type for properties state tables */ 2425 /*********************************************************************/ 2426 private static final int IMPTABPROPS_COLUMNS = 16; 2427 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1; 2428 private static short GetStateProps(short cell) { 2429 return (short)(cell & 0x1f); 2430 } 2431 private static short GetActionProps(short cell) { 2432 return (short)(cell >> 5); 2433 } 2434 2435 private static final short[] groupProp = /* dirProp regrouped */ 2436 { 2437 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ 2438 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 2439 }; 2440 private static final short _L = 0; 2441 private static final short _R = 1; 2442 private static final short _EN = 2; 2443 private static final short _AN = 3; 2444 private static final short _ON = 4; 2445 private static final short _S = 5; 2446 private static final short _B = 6; /* reduced dirProp */ 2447 2448 /*********************************************************************/ 2449 /* */ 2450 /* PROPERTIES STATE TABLE */ 2451 /* */ 2452 /* In table impTabProps, */ 2453 /* - the ON column regroups ON and WS, FSI, RLI, LRI and PDI */ 2454 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */ 2455 /* - the Res column is the reduced property assigned to a run */ 2456 /* */ 2457 /* Action 1: process current run1, init new run1 */ 2458 /* 2: init new run2 */ 2459 /* 3: process run1, process run2, init new run1 */ 2460 /* 4: process run1, set run1=run2, init new run2 */ 2461 /* */ 2462 /* Notes: */ 2463 /* 1) This table is used in resolveImplicitLevels(). */ 2464 /* 2) This table triggers actions when there is a change in the Bidi*/ 2465 /* property of incoming characters (action 1). */ 2466 /* 3) Most such property sequences are processed immediately (in */ 2467 /* fact, passed to processPropertySeq(). */ 2468 /* 4) However, numbers are assembled as one sequence. This means */ 2469 /* that undefined situations (like CS following digits, until */ 2470 /* it is known if the next char will be a digit) are held until */ 2471 /* following chars define them. */ 2472 /* Example: digits followed by CS, then comes another CS or ON; */ 2473 /* the digits will be processed, then the CS assigned */ 2474 /* as the start of an ON sequence (action 3). */ 2475 /* 5) There are cases where more than one sequence must be */ 2476 /* processed, for instance digits followed by CS followed by L: */ 2477 /* the digits must be processed as one sequence, and the CS */ 2478 /* must be processed as an ON sequence, all this before starting */ 2479 /* assembling chars for the opening L sequence. */ 2480 /* */ 2481 /* */ 2482 private static final short[][] impTabProps = 2483 { 2484 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, ENL, ENR, Res */ 2485 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, 18, 21, _ON }, 2486 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, 32+18, 32+21, _L }, 2487 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, 32+18, 32+21, _R }, 2488 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, 32+18, 32+21, _R }, 2489 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, 18, 21, _EN }, 2490 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, 32+18, 32+21, _AN }, 2491 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, 18, 21, _AN }, 2492 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, 32+18, 32+21, _ON }, 2493 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, 32+18, 32+21, _ON }, 2494 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, 18, 21, _ON }, 2495 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, 18, 21, _EN }, 2496 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, 18, 21, _EN }, 2497 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, 96+18, 96+21, _AN }, 2498 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, 18, 21, _AN }, 2499 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3,128+18,128+21, _ON }, 2500 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, 32+18, 32+21, _S }, 2501 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, 32+18, 32+21, _S }, 2502 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, 32+18, 32+21, _B }, 2503 /*18 ENL */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 64+19, 20, 64+19, 18, 18, 32+3, 18, 21, _L }, 2504 /*19 ENL+ES/CS */ { 96+1, 96+2, 18, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 19, 128+7, 96+3, 18, 21, _L }, 2505 /*20 ENL+ET */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 32+7, 20, 32+7, 20, 20, 32+3, 18, 21, _L }, 2506 /*21 ENR */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 64+22, 23, 64+22, 21, 21, 32+3, 18, 21, _AN }, 2507 /*22 ENR+ES/CS */ { 96+1, 96+2, 21, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 22, 128+7, 96+3, 18, 21, _AN }, 2508 /*23 ENR+ET */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 32+7, 23, 32+7, 23, 23, 32+3, 18, 21, _AN } 2509 }; 2510 2511 /*********************************************************************/ 2512 /* The levels state machine tables */ 2513 /*********************************************************************/ 2514 /* */ 2515 /* All table cells are 8 bits: */ 2516 /* bits 0..3: next state */ 2517 /* bits 4..7: action to perform (if > 0) */ 2518 /* */ 2519 /* Cells may be of format "n" where n represents the next state */ 2520 /* (except for the rightmost column). */ 2521 /* Cells may also be of format "_(x,y)" where x represents an action */ 2522 /* to perform and y represents the next state. */ 2523 /* */ 2524 /* This format limits each table to 16 states each and to 15 actions.*/ 2525 /* */ 2526 /*********************************************************************/ 2527 /* Definitions and type for levels state tables */ 2528 /*********************************************************************/ 2529 private static final int IMPTABLEVELS_COLUMNS = _B + 2; 2530 private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1; 2531 private static short GetState(byte cell) { return (short)(cell & 0x0f); } 2532 private static short GetAction(byte cell) { return (short)(cell >> 4); } 2533 2534 private static class ImpTabPair { 2535 byte[][][] imptab; 2536 short[][] impact; 2537 2538 ImpTabPair(byte[][] table1, byte[][] table2, 2539 short[] act1, short[] act2) { 2540 imptab = new byte[][][] {table1, table2}; 2541 impact = new short[][] {act1, act2}; 2542 } 2543 } 2544 2545 /*********************************************************************/ 2546 /* */ 2547 /* LEVELS STATE TABLES */ 2548 /* */ 2549 /* In all levels state tables, */ 2550 /* - state 0 is the initial state */ 2551 /* - the Res column is the increment to add to the text level */ 2552 /* for this property sequence. */ 2553 /* */ 2554 /* The impact arrays for each table of a pair map the local action */ 2555 /* numbers of the table to the total list of actions. For instance, */ 2556 /* action 2 in a given table corresponds to the action number which */ 2557 /* appears in entry [2] of the impact array for that table. */ 2558 /* The first entry of all impact arrays must be 0. */ 2559 /* */ 2560 /* Action 1: init conditional sequence */ 2561 /* 2: prepend conditional sequence to current sequence */ 2562 /* 3: set ON sequence to new level - 1 */ 2563 /* 4: init EN/AN/ON sequence */ 2564 /* 5: fix EN/AN/ON sequence followed by R */ 2565 /* 6: set previous level sequence to level 2 */ 2566 /* */ 2567 /* Notes: */ 2568 /* 1) These tables are used in processPropertySeq(). The input */ 2569 /* is property sequences as determined by resolveImplicitLevels. */ 2570 /* 2) Most such property sequences are processed immediately */ 2571 /* (levels are assigned). */ 2572 /* 3) However, some sequences cannot be assigned a final level till */ 2573 /* one or more following sequences are received. For instance, */ 2574 /* ON following an R sequence within an even-level paragraph. */ 2575 /* If the following sequence is R, the ON sequence will be */ 2576 /* assigned basic run level+1, and so will the R sequence. */ 2577 /* 4) S is generally handled like ON, since its level will be fixed */ 2578 /* to paragraph level in adjustWSLevels(). */ 2579 /* */ 2580 2581 private static final byte[][] impTabL_DEFAULT = /* Even paragraph level */ 2582 /* In this table, conditional sequences receive the lower possible level 2583 until proven otherwise. 2584 */ 2585 { 2586 /* L, R, EN, AN, ON, S, B, Res */ 2587 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 }, 2588 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 }, 2589 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 }, 2590 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 }, 2591 /* 4 : R+ON */ { 0, 0x21, 0x33, 0x33, 4, 4, 0, 0 }, 2592 /* 5 : AN+ON */ { 0, 0x21, 0, 0x32, 5, 5, 0, 0 } 2593 }; 2594 2595 private static final byte[][] impTabR_DEFAULT = /* Odd paragraph level */ 2596 /* In this table, conditional sequences receive the lower possible level 2597 until proven otherwise. 2598 */ 2599 { 2600 /* L, R, EN, AN, ON, S, B, Res */ 2601 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 2602 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 }, 2603 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 2604 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 }, 2605 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 }, 2606 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 } 2607 }; 2608 2609 private static final short[] impAct0 = {0,1,2,3,4}; 2610 2611 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair( 2612 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0); 2613 2614 private static final byte[][] impTabL_NUMBERS_SPECIAL = { /* Even paragraph level */ 2615 /* In this table, conditional sequences receive the lower possible 2616 level until proven otherwise. 2617 */ 2618 /* L, R, EN, AN, ON, S, B, Res */ 2619 /* 0 : init */ { 0, 2, 0x11, 0x11, 0, 0, 0, 0 }, 2620 /* 1 : L+EN/AN */ { 0, 0x42, 1, 1, 0, 0, 0, 0 }, 2621 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0x13, 0, 1 }, 2622 /* 3 : R+ON */ { 0, 0x22, 0x34, 0x34, 3, 3, 0, 0 }, 2623 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 } 2624 }; 2625 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair( 2626 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0); 2627 2628 private static final byte[][] impTabL_GROUP_NUMBERS_WITH_R = { 2629 /* In this table, EN/AN+ON sequences receive levels as if associated with R 2630 until proven that there is L or sor/eor on both sides. AN is handled like EN. 2631 */ 2632 /* L, R, EN, AN, ON, S, B, Res */ 2633 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 }, 2634 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 }, 2635 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 }, 2636 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 }, 2637 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 }, 2638 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 } 2639 }; 2640 private static final byte[][] impTabR_GROUP_NUMBERS_WITH_R = { 2641 /* In this table, EN/AN+ON sequences receive levels as if associated with R 2642 until proven that there is L on both sides. AN is handled like EN. 2643 */ 2644 /* L, R, EN, AN, ON, S, B, Res */ 2645 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 }, 2646 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 }, 2647 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 }, 2648 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 }, 2649 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 } 2650 }; 2651 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new 2652 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R, 2653 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0); 2654 2655 private static final byte[][] impTabL_INVERSE_NUMBERS_AS_L = { 2656 /* This table is identical to the Default LTR table except that EN and AN 2657 are handled like L. 2658 */ 2659 /* L, R, EN, AN, ON, S, B, Res */ 2660 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 }, 2661 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 }, 2662 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 }, 2663 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 }, 2664 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 }, 2665 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 } 2666 }; 2667 private static final byte[][] impTabR_INVERSE_NUMBERS_AS_L = { 2668 /* This table is identical to the Default RTL table except that EN and AN 2669 are handled like L. 2670 */ 2671 /* L, R, EN, AN, ON, S, B, Res */ 2672 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 }, 2673 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 }, 2674 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 }, 2675 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 }, 2676 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 }, 2677 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 } 2678 }; 2679 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair 2680 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L, 2681 impAct0, impAct0); 2682 2683 private static final byte[][] impTabR_INVERSE_LIKE_DIRECT = { /* Odd paragraph level */ 2684 /* In this table, conditional sequences receive the lower possible level 2685 until proven otherwise. 2686 */ 2687 /* L, R, EN, AN, ON, S, B, Res */ 2688 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, 2689 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 }, 2690 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, 2691 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 }, 2692 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 }, 2693 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 }, 2694 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 } 2695 }; 2696 private static final short[] impAct1 = {0,1,13,14}; 2697 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair( 2698 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 2699 2700 private static final byte[][] impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = { 2701 /* The case handled in this table is (visually): R EN L 2702 */ 2703 /* L, R, EN, AN, ON, S, B, Res */ 2704 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 }, 2705 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 }, 2706 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 }, 2707 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 }, 2708 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 }, 2709 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 }, 2710 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 } 2711 }; 2712 private static final byte[][] impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = { 2713 /* The cases handled in this table are (visually): R EN L 2714 R L AN L 2715 */ 2716 /* L, R, EN, AN, ON, S, B, Res */ 2717 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 }, 2718 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 }, 2719 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 }, 2720 /* 3 : L */ { 3, 0, 3, 0x36, 0x14, 0x40, 0, 1 }, 2721 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 }, 2722 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 }, 2723 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 } 2724 }; 2725 private static final short[] impAct2 = {0,1,2,5,6,7,8}; 2726 private static final short[] impAct3 = {0,1,9,10,11,12}; 2727 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = 2728 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 2729 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3); 2730 2731 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair( 2732 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); 2733 2734 private static final byte[][] impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 2735 /* The case handled in this table is (visually): R EN L 2736 */ 2737 /* L, R, EN, AN, ON, S, B, Res */ 2738 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 }, 2739 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 }, 2740 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 }, 2741 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 }, 2742 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 } 2743 }; 2744 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new 2745 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 2746 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3); 2747 2748 private static class LevState { 2749 byte[][] impTab; /* level table pointer */ 2750 short[] impAct; /* action map array */ 2751 int startON; /* start of ON sequence */ 2752 int startL2EN; /* start of level 2 sequence */ 2753 int lastStrongRTL; /* index of last found R or AL */ 2754 int runStart; /* start position of the run */ 2755 short state; /* current state */ 2756 byte runLevel; /* run level before implicit solving */ 2757 } 2758 2759 /*------------------------------------------------------------------------*/ 2760 2761 static final int FIRSTALLOC = 10; 2762 /* 2763 * param pos: position where to insert 2764 * param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 2765 */ 2766 private void addPoint(int pos, int flag) 2767 { 2768 Point point = new Point(); 2769 2770 int len = insertPoints.points.length; 2771 if (len == 0) { 2772 insertPoints.points = new Point[FIRSTALLOC]; 2773 len = FIRSTALLOC; 2774 } 2775 if (insertPoints.size >= len) { /* no room for new point */ 2776 Point[] savePoints = insertPoints.points; 2777 insertPoints.points = new Point[len * 2]; 2778 System.arraycopy(savePoints, 0, insertPoints.points, 0, len); 2779 } 2780 point.pos = pos; 2781 point.flag = flag; 2782 insertPoints.points[insertPoints.size] = point; 2783 insertPoints.size++; 2784 } 2785 2786 private void setLevelsOutsideIsolates(int start, int limit, byte level) 2787 { 2788 byte dirProp; 2789 int isolateCount = 0, k; 2790 for (k = start; k < limit; k++) { 2791 dirProp = dirProps[k]; 2792 if (dirProp == PDI) 2793 isolateCount--; 2794 if (isolateCount == 0) { 2795 levels[k] = level; 2796 } 2797 if (dirProp == LRI || dirProp == RLI) 2798 isolateCount++; 2799 } 2800 } 2801 2802 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 2803 2804 /* 2805 * This implementation of the (Wn) rules applies all rules in one pass. 2806 * In order to do so, it needs a look-ahead of typically 1 character 2807 * (except for W5: sequences of ET) and keeps track of changes 2808 * in a rule Wp that affect a later Wq (p<q). 2809 * 2810 * The (Nn) and (In) rules are also performed in that same single loop, 2811 * but effectively one iteration behind for white space. 2812 * 2813 * Since all implicit rules are performed in one step, it is not necessary 2814 * to actually store the intermediate directional properties in dirProps[]. 2815 */ 2816 2817 private void processPropertySeq(LevState levState, short _prop, 2818 int start, int limit) { 2819 byte cell; 2820 byte[][] impTab = levState.impTab; 2821 short[] impAct = levState.impAct; 2822 short oldStateSeq,actionSeq; 2823 byte level, addLevel; 2824 int start0, k; 2825 2826 start0 = start; /* save original start position */ 2827 oldStateSeq = levState.state; 2828 cell = impTab[oldStateSeq][_prop]; 2829 levState.state = GetState(cell); /* isolate the new state */ 2830 actionSeq = impAct[GetAction(cell)]; /* isolate the action */ 2831 addLevel = impTab[levState.state][IMPTABLEVELS_RES]; 2832 2833 if (actionSeq != 0) { 2834 switch (actionSeq) { 2835 case 1: /* init ON seq */ 2836 levState.startON = start0; 2837 break; 2838 2839 case 2: /* prepend ON seq to current seq */ 2840 start = levState.startON; 2841 break; 2842 2843 case 3: /* EN/AN after R+ON */ 2844 level = (byte)(levState.runLevel + 1); 2845 setLevelsOutsideIsolates(levState.startON, start0, level); 2846 break; 2847 2848 case 4: /* EN/AN before R for NUMBERS_SPECIAL */ 2849 level = (byte)(levState.runLevel + 2); 2850 setLevelsOutsideIsolates(levState.startON, start0, level); 2851 break; 2852 2853 case 5: /* L or S after possible relevant EN/AN */ 2854 /* check if we had EN after R/AL */ 2855 if (levState.startL2EN >= 0) { 2856 addPoint(levState.startL2EN, LRM_BEFORE); 2857 } 2858 levState.startL2EN = -1; /* not within previous if since could also be -2 */ 2859 /* check if we had any relevant EN/AN after R/AL */ 2860 if ((insertPoints.points.length == 0) || 2861 (insertPoints.size <= insertPoints.confirmed)) { 2862 /* nothing, just clean up */ 2863 levState.lastStrongRTL = -1; 2864 /* check if we have a pending conditional segment */ 2865 level = impTab[oldStateSeq][IMPTABLEVELS_RES]; 2866 if ((level & 1) != 0 && levState.startON > 0) { /* after ON */ 2867 start = levState.startON; /* reset to basic run level */ 2868 } 2869 if (_prop == _S) { /* add LRM before S */ 2870 addPoint(start0, LRM_BEFORE); 2871 insertPoints.confirmed = insertPoints.size; 2872 } 2873 break; 2874 } 2875 /* reset previous RTL cont to level for LTR text */ 2876 for (k = levState.lastStrongRTL + 1; k < start0; k++) { 2877 /* reset odd level, leave runLevel+2 as is */ 2878 levels[k] = (byte)((levels[k] - 2) & ~1); 2879 } 2880 /* mark insert points as confirmed */ 2881 insertPoints.confirmed = insertPoints.size; 2882 levState.lastStrongRTL = -1; 2883 if (_prop == _S) { /* add LRM before S */ 2884 addPoint(start0, LRM_BEFORE); 2885 insertPoints.confirmed = insertPoints.size; 2886 } 2887 break; 2888 2889 case 6: /* R/AL after possible relevant EN/AN */ 2890 /* just clean up */ 2891 if (insertPoints.points.length > 0) 2892 /* remove all non confirmed insert points */ 2893 insertPoints.size = insertPoints.confirmed; 2894 levState.startON = -1; 2895 levState.startL2EN = -1; 2896 levState.lastStrongRTL = limit - 1; 2897 break; 2898 2899 case 7: /* EN/AN after R/AL + possible cont */ 2900 /* check for real AN */ 2901 2902 if ((_prop == _AN) && (dirProps[start0] == AN) && 2903 (reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 2904 { 2905 /* real AN */ 2906 if (levState.startL2EN == -1) { /* if no relevant EN already found */ 2907 /* just note the rightmost digit as a strong RTL */ 2908 levState.lastStrongRTL = limit - 1; 2909 break; 2910 } 2911 if (levState.startL2EN >= 0) { /* after EN, no AN */ 2912 addPoint(levState.startL2EN, LRM_BEFORE); 2913 levState.startL2EN = -2; 2914 } 2915 /* note AN */ 2916 addPoint(start0, LRM_BEFORE); 2917 break; 2918 } 2919 /* if first EN/AN after R/AL */ 2920 if (levState.startL2EN == -1) { 2921 levState.startL2EN = start0; 2922 } 2923 break; 2924 2925 case 8: /* note location of latest R/AL */ 2926 levState.lastStrongRTL = limit - 1; 2927 levState.startON = -1; 2928 break; 2929 2930 case 9: /* L after R+ON/EN/AN */ 2931 /* include possible adjacent number on the left */ 2932 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) { 2933 } 2934 if (k >= 0) { 2935 addPoint(k, RLM_BEFORE); /* add RLM before */ 2936 insertPoints.confirmed = insertPoints.size; /* confirm it */ 2937 } 2938 levState.startON = start0; 2939 break; 2940 2941 case 10: /* AN after L */ 2942 /* AN numbers between L text on both sides may be trouble. */ 2943 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 2944 addPoint(start0, LRM_BEFORE); /* add LRM before */ 2945 addPoint(start0, LRM_AFTER); /* add LRM after */ 2946 break; 2947 2948 case 11: /* R after L+ON/EN/AN */ 2949 /* false alert, infirm LRMs around previous AN */ 2950 insertPoints.size=insertPoints.confirmed; 2951 if (_prop == _S) { /* add RLM before S */ 2952 addPoint(start0, RLM_BEFORE); 2953 insertPoints.confirmed = insertPoints.size; 2954 } 2955 break; 2956 2957 case 12: /* L after L+ON/AN */ 2958 level = (byte)(levState.runLevel + addLevel); 2959 for (k=levState.startON; k < start0; k++) { 2960 if (levels[k] < level) { 2961 levels[k] = level; 2962 } 2963 } 2964 insertPoints.confirmed = insertPoints.size; /* confirm inserts */ 2965 levState.startON = start0; 2966 break; 2967 2968 case 13: /* L after L+ON+EN/AN/ON */ 2969 level = levState.runLevel; 2970 for (k = start0-1; k >= levState.startON; k--) { 2971 if (levels[k] == level+3) { 2972 while (levels[k] == level+3) { 2973 levels[k--] -= 2; 2974 } 2975 while (levels[k] == level) { 2976 k--; 2977 } 2978 } 2979 if (levels[k] == level+2) { 2980 levels[k] = level; 2981 continue; 2982 } 2983 levels[k] = (byte)(level+1); 2984 } 2985 break; 2986 2987 case 14: /* R after L+ON+EN/AN/ON */ 2988 level = (byte)(levState.runLevel+1); 2989 for (k = start0-1; k >= levState.startON; k--) { 2990 if (levels[k] > level) { 2991 levels[k] -= 2; 2992 } 2993 } 2994 break; 2995 2996 default: /* we should never get here */ 2997 throw new IllegalStateException("Internal ICU error in processPropertySeq"); 2998 } 2999 } 3000 if ((addLevel) != 0 || (start < start0)) { 3001 level = (byte)(levState.runLevel + addLevel); 3002 if (start >= levState.runStart) { 3003 for (k = start; k < limit; k++) { 3004 levels[k] = level; 3005 } 3006 } else { 3007 setLevelsOutsideIsolates(start, limit, level); 3008 } 3009 } 3010 } 3011 3012 private void resolveImplicitLevels(int start, int limit, short sor, short eor) 3013 { 3014 byte dirProp; 3015 LevState levState = new LevState(); 3016 int i, start1, start2; 3017 short oldStateImp, stateImp, actionImp; 3018 short gprop, resProp, cell; 3019 boolean inverseRTL; 3020 short nextStrongProp = R; 3021 int nextStrongPos = -1; 3022 3023 /* check for RTL inverse Bidi mode */ 3024 /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to 3025 * loop on the text characters from end to start. 3026 * This would need a different properties state table (at least different 3027 * actions) and different levels state tables (maybe very similar to the 3028 * LTR corresponding ones. 3029 */ 3030 inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) && 3031 (reorderingMode == REORDER_INVERSE_LIKE_DIRECT || 3032 reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 3033 /* initialize for property and levels state table */ 3034 levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 3035 levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 3036 levState.runStart = start; 3037 levState.runLevel = levels[start]; 3038 levState.impTab = impTabPair.imptab[levState.runLevel & 1]; 3039 levState.impAct = impTabPair.impact[levState.runLevel & 1]; 3040 3041 /* The isolates[] entries contain enough information to 3042 resume the bidi algorithm in the same state as it was 3043 when it was interrupted by an isolate sequence. */ 3044 if (dirProps[start] == PDI) { 3045 levState.startON = isolates[isolateCount].startON; 3046 start1 = isolates[isolateCount].start1; 3047 stateImp = isolates[isolateCount].stateImp; 3048 levState.state = isolates[isolateCount].state; 3049 isolateCount--; 3050 } else { 3051 levState.startON = -1; 3052 start1 = start; 3053 if (dirProps[start] == NSM) 3054 stateImp = (short)(1 + sor); 3055 else 3056 stateImp = 0; 3057 levState.state = 0; 3058 processPropertySeq(levState, sor, start, start); 3059 } 3060 start2 = start; /* to make the Java compiler happy */ 3061 3062 for (i = start; i <= limit; i++) { 3063 if (i >= limit) { 3064 int k; 3065 for (k = limit - 1; 3066 k > start && 3067 (DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0; 3068 k--); 3069 dirProp = dirProps[k]; 3070 if (dirProp == LRI || dirProp == RLI) 3071 break; /* no forced closing for sequence ending with LRI/RLI */ 3072 gprop = eor; 3073 } else { 3074 byte prop, prop1; 3075 prop = dirProps[i]; 3076 if (prop == B) 3077 isolateCount = -1; /* current isolates stack entry == none */ 3078 if (inverseRTL) { 3079 if (prop == AL) { 3080 /* AL before EN does not make it AN */ 3081 prop = R; 3082 } else if (prop == EN) { 3083 if (nextStrongPos <= i) { 3084 /* look for next strong char (L/R/AL) */ 3085 int j; 3086 nextStrongProp = R; /* set default */ 3087 nextStrongPos = limit; 3088 for (j = i+1; j < limit; j++) { 3089 prop1 = dirProps[j]; 3090 if (prop1 == L || prop1 == R || prop1 == AL) { 3091 nextStrongProp = prop1; 3092 nextStrongPos = j; 3093 break; 3094 } 3095 } 3096 } 3097 if (nextStrongProp == AL) { 3098 prop = AN; 3099 } 3100 } 3101 } 3102 gprop = groupProp[prop]; 3103 } 3104 oldStateImp = stateImp; 3105 cell = impTabProps[oldStateImp][gprop]; 3106 stateImp = GetStateProps(cell); /* isolate the new state */ 3107 actionImp = GetActionProps(cell); /* isolate the action */ 3108 if ((i == limit) && (actionImp == 0)) { 3109 /* there is an unprocessed sequence if its property == eor */ 3110 actionImp = 1; /* process the last sequence */ 3111 } 3112 if (actionImp != 0) { 3113 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES]; 3114 switch (actionImp) { 3115 case 1: /* process current seq1, init new seq1 */ 3116 processPropertySeq(levState, resProp, start1, i); 3117 start1 = i; 3118 break; 3119 case 2: /* init new seq2 */ 3120 start2 = i; 3121 break; 3122 case 3: /* process seq1, process seq2, init new seq1 */ 3123 processPropertySeq(levState, resProp, start1, start2); 3124 processPropertySeq(levState, _ON, start2, i); 3125 start1 = i; 3126 break; 3127 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 3128 processPropertySeq(levState, resProp, start1, start2); 3129 start1 = start2; 3130 start2 = i; 3131 break; 3132 default: /* we should never get here */ 3133 throw new IllegalStateException("Internal ICU error in resolveImplicitLevels"); 3134 } 3135 } 3136 } 3137 3138 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ 3139 for (i = limit - 1; 3140 i > start && 3141 (DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0; 3142 i--); 3143 dirProp = dirProps[i]; 3144 if ((dirProp == LRI || dirProp == RLI) && limit < length) { 3145 isolateCount++; 3146 if (isolates[isolateCount] == null) 3147 isolates[isolateCount] = new Isolate(); 3148 isolates[isolateCount].stateImp = stateImp; 3149 isolates[isolateCount].state = levState.state; 3150 isolates[isolateCount].start1 = start1; 3151 isolates[isolateCount].startON = levState.startON; 3152 } 3153 else 3154 processPropertySeq(levState, eor, limit, limit); 3155 } 3156 3157 /* perform (L1) and (X9) ---------------------------------------------------- */ 3158 3159 /* 3160 * Reset the embedding levels for some non-graphic characters (L1). 3161 * This method also sets appropriate levels for BN, and 3162 * explicit embedding types that are supposed to have been removed 3163 * from the paragraph in (X9). 3164 */ 3165 private void adjustWSLevels() { 3166 int i; 3167 3168 if ((flags & MASK_WS) != 0) { 3169 int flag; 3170 i = trailingWSStart; 3171 while (i > 0) { 3172 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 3173 while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) { 3174 if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 3175 levels[i] = 0; 3176 } else { 3177 levels[i] = GetParaLevelAt(i); 3178 } 3179 } 3180 3181 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 3182 /* here, i+1 is guaranteed to be <length */ 3183 while (i > 0) { 3184 flag = DirPropFlag(dirProps[--i]); 3185 if ((flag & MASK_BN_EXPLICIT) != 0) { 3186 levels[i] = levels[i + 1]; 3187 } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { 3188 levels[i] = 0; 3189 break; 3190 } else if ((flag & MASK_B_S) != 0){ 3191 levels[i] = GetParaLevelAt(i); 3192 break; 3193 } 3194 } 3195 } 3196 } 3197 } 3198 3199 private void setParaSuccess() { 3200 paraBidi = this; /* mark successful setPara */ 3201 } 3202 3203 private int Bidi_Min(int x, int y) { 3204 return x < y ? x : y; 3205 } 3206 3207 private int Bidi_Abs(int x) { 3208 return x >= 0 ? x : -x; 3209 } 3210 3211 void setParaRunsOnly(char[] parmText, byte parmParaLevel) { 3212 int[] visualMap; 3213 String visualText; 3214 int saveLength, saveTrailingWSStart; 3215 byte[] saveLevels; 3216 byte saveDirection; 3217 int i, j, visualStart, logicalStart, 3218 oldRunCount, runLength, addedRuns, insertRemove, 3219 start, limit, step, indexOddBit, logicalPos, 3220 index, index1; 3221 int saveOptions; 3222 3223 reorderingMode = REORDER_DEFAULT; 3224 int parmLength = parmText.length; 3225 if (parmLength == 0) { 3226 setPara(parmText, parmParaLevel, null); 3227 reorderingMode = REORDER_RUNS_ONLY; 3228 return; 3229 } 3230 /* obtain memory for mapping table and visual text */ 3231 saveOptions = reorderingOptions; 3232 if ((saveOptions & OPTION_INSERT_MARKS) > 0) { 3233 reorderingOptions &= ~OPTION_INSERT_MARKS; 3234 reorderingOptions |= OPTION_REMOVE_CONTROLS; 3235 } 3236 parmParaLevel &= 1; /* accept only 0 or 1 */ 3237 setPara(parmText, parmParaLevel, null); 3238 /* we cannot access directly levels since it is not yet set if 3239 * direction is not MIXED 3240 */ 3241 saveLevels = new byte[this.length]; 3242 System.arraycopy(getLevels(), 0, saveLevels, 0, this.length); 3243 saveTrailingWSStart = trailingWSStart; 3244 3245 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 3246 * the visual map and the dirProps array to drive the second call 3247 * to setPara (but must make provision for possible removal of 3248 * Bidi controls. Alternatively, only use the dirProps array via 3249 * customized classifier callback. 3250 */ 3251 visualText = writeReordered(DO_MIRRORING); 3252 visualMap = getVisualMap(); 3253 this.reorderingOptions = saveOptions; 3254 saveLength = this.length; 3255 saveDirection=this.direction; 3256 3257 this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT; 3258 parmParaLevel ^= 1; 3259 setPara(visualText, parmParaLevel, null); 3260 BidiLine.getRuns(this); 3261 /* check if some runs must be split, count how many splits */ 3262 addedRuns = 0; 3263 oldRunCount = this.runCount; 3264 visualStart = 0; 3265 for (i = 0; i < oldRunCount; i++, visualStart += runLength) { 3266 runLength = runs[i].limit - visualStart; 3267 if (runLength < 2) { 3268 continue; 3269 } 3270 logicalStart = runs[i].start; 3271 for (j = logicalStart+1; j < logicalStart+runLength; j++) { 3272 index = visualMap[j]; 3273 index1 = visualMap[j-1]; 3274 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) { 3275 addedRuns++; 3276 } 3277 } 3278 } 3279 if (addedRuns > 0) { 3280 getRunsMemory(oldRunCount + addedRuns); 3281 if (runCount == 1) { 3282 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 3283 runsMemory[0] = runs[0]; 3284 } else { 3285 System.arraycopy(runs, 0, runsMemory, 0, runCount); 3286 } 3287 runs = runsMemory; 3288 runCount += addedRuns; 3289 for (i = oldRunCount; i < runCount; i++) { 3290 if (runs[i] == null) { 3291 runs[i] = new BidiRun(0, 0, (byte)0); 3292 } 3293 } 3294 } 3295 /* split runs which are not consecutive in source text */ 3296 int newI; 3297 for (i = oldRunCount-1; i >= 0; i--) { 3298 newI = i + addedRuns; 3299 runLength = i==0 ? runs[0].limit : 3300 runs[i].limit - runs[i-1].limit; 3301 logicalStart = runs[i].start; 3302 indexOddBit = runs[i].level & 1; 3303 if (runLength < 2) { 3304 if (addedRuns > 0) { 3305 runs[newI].copyFrom(runs[i]); 3306 } 3307 logicalPos = visualMap[logicalStart]; 3308 runs[newI].start = logicalPos; 3309 runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit); 3310 continue; 3311 } 3312 if (indexOddBit > 0) { 3313 start = logicalStart; 3314 limit = logicalStart + runLength - 1; 3315 step = 1; 3316 } else { 3317 start = logicalStart + runLength - 1; 3318 limit = logicalStart; 3319 step = -1; 3320 } 3321 for (j = start; j != limit; j += step) { 3322 index = visualMap[j]; 3323 index1 = visualMap[j+step]; 3324 if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) { 3325 logicalPos = Bidi_Min(visualMap[start], index); 3326 runs[newI].start = logicalPos; 3327 runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit); 3328 runs[newI].limit = runs[i].limit; 3329 runs[i].limit -= Bidi_Abs(j - start) + 1; 3330 insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER); 3331 runs[newI].insertRemove = insertRemove; 3332 runs[i].insertRemove &= ~insertRemove; 3333 start = j + step; 3334 addedRuns--; 3335 newI--; 3336 } 3337 } 3338 if (addedRuns > 0) { 3339 runs[newI].copyFrom(runs[i]); 3340 } 3341 logicalPos = Bidi_Min(visualMap[start], visualMap[limit]); 3342 runs[newI].start = logicalPos; 3343 runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit); 3344 } 3345 3346 cleanup1: 3347 /* restore initial paraLevel */ 3348 this.paraLevel ^= 1; 3349 cleanup2: 3350 /* restore real text */ 3351 this.text = parmText; 3352 this.length = saveLength; 3353 this.originalLength = parmLength; 3354 this.direction=saveDirection; 3355 this.levels = saveLevels; 3356 this.trailingWSStart = saveTrailingWSStart; 3357 if (runCount > 1) { 3358 this.direction = MIXED; 3359 } 3360 cleanup3: 3361 this.reorderingMode = REORDER_RUNS_ONLY; 3362 } 3363 3364 /** 3365 * Perform the Unicode Bidi algorithm. It is defined in the 3366 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 3367 * version 13, 3368 * also described in The Unicode Standard, Version 4.0 .<p> 3369 * 3370 * This method takes a piece of plain text containing one or more paragraphs, 3371 * with or without externally specified embedding levels from <i>styled</i> 3372 * text and computes the left-right-directionality of each character.<p> 3373 * 3374 * If the entire text is all of the same directionality, then 3375 * the method may not perform all the steps described by the algorithm, 3376 * i.e., some levels may not be the same as if all steps were performed. 3377 * This is not relevant for unidirectional text.<br> 3378 * For example, in pure LTR text with numbers the numbers would get 3379 * a resolved level of 2 higher than the surrounding text according to 3380 * the algorithm. This implementation may set all resolved levels to 3381 * the same value in such a case.<p> 3382 * 3383 * The text can be composed of multiple paragraphs. Occurrence of a block 3384 * separator in the text terminates a paragraph, and whatever comes next starts 3385 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 3386 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 3387 * in that case, the pair of characters is considered as terminating the 3388 * preceding paragraph, and a new paragraph will be started by a character 3389 * coming after the LF. 3390 * 3391 * Although the text is passed here as a <code>String</code>, it is 3392 * stored internally as an array of characters. Therefore the 3393 * documentation will refer to indexes of the characters in the text. 3394 * 3395 * @param text contains the text that the Bidi algorithm will be performed 3396 * on. This text can be retrieved with <code>getText()</code> or 3397 * <code>getTextAsString</code>.<br> 3398 * 3399 * @param paraLevel specifies the default level for the text; 3400 * it is typically 0 (LTR) or 1 (RTL). 3401 * If the method shall determine the paragraph level from the text, 3402 * then <code>paraLevel</code> can be set to 3403 * either <code>LEVEL_DEFAULT_LTR</code> 3404 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 3405 * paragraphs, the paragraph level shall be determined separately for 3406 * each paragraph; if a paragraph does not include any strongly typed 3407 * character, then the desired default is used (0 for LTR or 1 for RTL). 3408 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 3409 * is also valid, with odd levels indicating RTL. 3410 * 3411 * @param embeddingLevels (in) may be used to preset the embedding and override levels, 3412 * ignoring characters like LRE and PDF in the text. 3413 * A level overrides the directional property of its corresponding 3414 * (same index) character if the level has the 3415 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 3416 * Except for that bit, it must be 3417 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 3418 * with one exception: a level of zero may be specified for a 3419 * paragraph separator even if <code>paraLevel>0</code> when multiple 3420 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 3421 * <strong>Caution: </strong>A reference to this array, not a copy 3422 * of the levels, will be stored in the <code>Bidi</code> object; 3423 * the <code>embeddingLevels</code> 3424 * should not be modified to avoid unexpected results on subsequent 3425 * Bidi operations. However, the <code>setPara()</code> and 3426 * <code>setLine()</code> methods may modify some or all of the 3427 * levels.<br><br> 3428 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 3429 * have one entry for each character in <code>text</code>. 3430 * 3431 * @throws IllegalArgumentException if the values in embeddingLevels are 3432 * not within the allowed range 3433 * 3434 * @see #LEVEL_DEFAULT_LTR 3435 * @see #LEVEL_DEFAULT_RTL 3436 * @see #LEVEL_OVERRIDE 3437 * @see #MAX_EXPLICIT_LEVEL 3438 * @stable ICU 3.8 3439 */ 3440 void setPara(String text, byte paraLevel, byte[] embeddingLevels) 3441 { 3442 if (text == null) { 3443 setPara(new char[0], paraLevel, embeddingLevels); 3444 } else { 3445 setPara(text.toCharArray(), paraLevel, embeddingLevels); 3446 } 3447 } 3448 3449 /** 3450 * Perform the Unicode Bidi algorithm. It is defined in the 3451 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 3452 * version 13, 3453 * also described in The Unicode Standard, Version 4.0 .<p> 3454 * 3455 * This method takes a piece of plain text containing one or more paragraphs, 3456 * with or without externally specified embedding levels from <i>styled</i> 3457 * text and computes the left-right-directionality of each character.<p> 3458 * 3459 * If the entire text is all of the same directionality, then 3460 * the method may not perform all the steps described by the algorithm, 3461 * i.e., some levels may not be the same as if all steps were performed. 3462 * This is not relevant for unidirectional text.<br> 3463 * For example, in pure LTR text with numbers the numbers would get 3464 * a resolved level of 2 higher than the surrounding text according to 3465 * the algorithm. This implementation may set all resolved levels to 3466 * the same value in such a case. 3467 * 3468 * The text can be composed of multiple paragraphs. Occurrence of a block 3469 * separator in the text terminates a paragraph, and whatever comes next starts 3470 * a new paragraph. The exception to this rule is when a Carriage Return (CR) 3471 * is followed by a Line Feed (LF). Both CR and LF are block separators, but 3472 * in that case, the pair of characters is considered as terminating the 3473 * preceding paragraph, and a new paragraph will be started by a character 3474 * coming after the LF. 3475 * 3476 * The text is stored internally as an array of characters. Therefore the 3477 * documentation will refer to indexes of the characters in the text. 3478 * 3479 * @param chars contains the text that the Bidi algorithm will be performed 3480 * on. This text can be retrieved with <code>getText()</code> or 3481 * <code>getTextAsString</code>.<br> 3482 * 3483 * @param paraLevel specifies the default level for the text; 3484 * it is typically 0 (LTR) or 1 (RTL). 3485 * If the method shall determine the paragraph level from the text, 3486 * then <code>paraLevel</code> can be set to 3487 * either <code>LEVEL_DEFAULT_LTR</code> 3488 * or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple 3489 * paragraphs, the paragraph level shall be determined separately for 3490 * each paragraph; if a paragraph does not include any strongly typed 3491 * character, then the desired default is used (0 for LTR or 1 for RTL). 3492 * Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code> 3493 * is also valid, with odd levels indicating RTL. 3494 * 3495 * @param embeddingLevels (in) may be used to preset the embedding and 3496 * override levels, ignoring characters like LRE and PDF in the text. 3497 * A level overrides the directional property of its corresponding 3498 * (same index) character if the level has the 3499 * <code>LEVEL_OVERRIDE</code> bit set.<br><br> 3500 * Except for that bit, it must be 3501 * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, 3502 * with one exception: a level of zero may be specified for a 3503 * paragraph separator even if <code>paraLevel>0</code> when multiple 3504 * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> 3505 * <strong>Caution: </strong>A reference to this array, not a copy 3506 * of the levels, will be stored in the <code>Bidi</code> object; 3507 * the <code>embeddingLevels</code> 3508 * should not be modified to avoid unexpected results on subsequent 3509 * Bidi operations. However, the <code>setPara()</code> and 3510 * <code>setLine()</code> methods may modify some or all of the 3511 * levels.<br><br> 3512 * <strong>Note:</strong> the <code>embeddingLevels</code> array must 3513 * have one entry for each character in <code>text</code>. 3514 * 3515 * @throws IllegalArgumentException if the values in embeddingLevels are 3516 * not within the allowed range 3517 * 3518 * @see #LEVEL_DEFAULT_LTR 3519 * @see #LEVEL_DEFAULT_RTL 3520 * @see #LEVEL_OVERRIDE 3521 * @see #MAX_EXPLICIT_LEVEL 3522 * @stable ICU 3.8 3523 */ 3524 void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels) 3525 { 3526 /* check the argument values */ 3527 if (paraLevel < LEVEL_DEFAULT_LTR) { 3528 verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1); 3529 } 3530 if (chars == null) { 3531 chars = new char[0]; 3532 } 3533 3534 /* special treatment for RUNS_ONLY mode */ 3535 if (reorderingMode == REORDER_RUNS_ONLY) { 3536 setParaRunsOnly(chars, paraLevel); 3537 return; 3538 } 3539 3540 /* initialize the Bidi object */ 3541 this.paraBidi = null; /* mark unfinished setPara */ 3542 this.text = chars; 3543 this.length = this.originalLength = this.resultLength = text.length; 3544 this.paraLevel = paraLevel; 3545 this.direction = (byte)(paraLevel & 1); 3546 this.paraCount = 1; 3547 3548 /* Allocate zero-length arrays instead of setting to null here; then 3549 * checks for null in various places can be eliminated. 3550 */ 3551 dirProps = new byte[0]; 3552 levels = new byte[0]; 3553 runs = new BidiRun[0]; 3554 isGoodLogicalToVisualRunsMap = false; 3555 insertPoints.size = 0; /* clean up from last call */ 3556 insertPoints.confirmed = 0; /* clean up from last call */ 3557 3558 /* 3559 * Save the original paraLevel if contextual; otherwise, set to 0. 3560 */ 3561 defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0; 3562 3563 if (length == 0) { 3564 /* 3565 * For an empty paragraph, create a Bidi object with the paraLevel and 3566 * the flags and the direction set but without allocating zero-length arrays. 3567 * There is nothing more to do. 3568 */ 3569 if (IsDefaultLevel(paraLevel)) { 3570 this.paraLevel &= 1; 3571 defaultParaLevel = 0; 3572 } 3573 flags = DirPropFlagLR(paraLevel); 3574 runCount = 0; 3575 paraCount = 0; 3576 setParaSuccess(); 3577 return; 3578 } 3579 3580 runCount = -1; 3581 3582 /* 3583 * Get the directional properties, 3584 * the flags bit-set, and 3585 * determine the paragraph level if necessary. 3586 */ 3587 getDirPropsMemory(length); 3588 dirProps = dirPropsMemory; 3589 getDirProps(); 3590 /* the processed length may have changed if OPTION_STREAMING is set */ 3591 trailingWSStart = length; /* the levels[] will reflect the WS run */ 3592 3593 /* are explicit levels specified? */ 3594 if (embeddingLevels == null) { 3595 /* no: determine explicit levels according to the (Xn) rules */ 3596 getLevelsMemory(length); 3597 levels = levelsMemory; 3598 direction = resolveExplicitLevels(); 3599 } else { 3600 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */ 3601 levels = embeddingLevels; 3602 direction = checkExplicitLevels(); 3603 } 3604 3605 /* allocate isolate memory */ 3606 if (isolateCount > 0) { 3607 if (isolates == null || isolates.length < isolateCount) 3608 isolates = new Isolate[isolateCount + 3]; /* keep some reserve */ 3609 } 3610 isolateCount = -1; /* current isolates stack entry == none */ 3611 3612 /* 3613 * The steps after (X9) in the Bidi algorithm are performed only if 3614 * the paragraph text has mixed directionality! 3615 */ 3616 switch (direction) { 3617 case LTR: 3618 /* all levels are implicitly at paraLevel (important for getLevels()) */ 3619 trailingWSStart = 0; 3620 break; 3621 case RTL: 3622 /* all levels are implicitly at paraLevel (important for getLevels()) */ 3623 trailingWSStart = 0; 3624 break; 3625 default: 3626 /* 3627 * Choose the right implicit state table 3628 */ 3629 switch(reorderingMode) { 3630 case REORDER_DEFAULT: 3631 this.impTabPair = impTab_DEFAULT; 3632 break; 3633 case REORDER_NUMBERS_SPECIAL: 3634 this.impTabPair = impTab_NUMBERS_SPECIAL; 3635 break; 3636 case REORDER_GROUP_NUMBERS_WITH_R: 3637 this.impTabPair = impTab_GROUP_NUMBERS_WITH_R; 3638 break; 3639 case REORDER_RUNS_ONLY: 3640 /* we should never get here */ 3641 throw new InternalError("Internal ICU error in setPara"); 3642 /* break; */ 3643 case REORDER_INVERSE_NUMBERS_AS_L: 3644 this.impTabPair = impTab_INVERSE_NUMBERS_AS_L; 3645 break; 3646 case REORDER_INVERSE_LIKE_DIRECT: 3647 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) { 3648 this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 3649 } else { 3650 this.impTabPair = impTab_INVERSE_LIKE_DIRECT; 3651 } 3652 break; 3653 case REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 3654 if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) { 3655 this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 3656 } else { 3657 this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL; 3658 } 3659 break; 3660 } 3661 /* 3662 * If there are no external levels specified and there 3663 * are no significant explicit level codes in the text, 3664 * then we can treat the entire paragraph as one run. 3665 * Otherwise, we need to perform the following rules on runs of 3666 * the text with the same embedding levels. (X10) 3667 * "Significant" explicit level codes are ones that actually 3668 * affect non-BN characters. 3669 * Examples for "insignificant" ones are empty embeddings 3670 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 3671 */ 3672 if (embeddingLevels == null && paraCount <= 1 && 3673 (flags & DirPropFlagMultiRuns) == 0) { 3674 resolveImplicitLevels(0, length, 3675 GetLRFromLevel(GetParaLevelAt(0)), 3676 GetLRFromLevel(GetParaLevelAt(length - 1))); 3677 } else { 3678 /* sor, eor: start and end types of same-level-run */ 3679 int start, limit = 0; 3680 byte level, nextLevel; 3681 short sor, eor; 3682 3683 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 3684 level = GetParaLevelAt(0); 3685 nextLevel = levels[0]; 3686 if (level < nextLevel) { 3687 eor = GetLRFromLevel(nextLevel); 3688 } else { 3689 eor = GetLRFromLevel(level); 3690 } 3691 3692 do { 3693 /* determine start and limit of the run (end points just behind the run) */ 3694 3695 /* the values for this run's start are the same as for the previous run's end */ 3696 start = limit; 3697 level = nextLevel; 3698 if ((start > 0) && (dirProps[start - 1] == B)) { 3699 /* except if this is a new paragraph, then set sor = para level */ 3700 sor = GetLRFromLevel(GetParaLevelAt(start)); 3701 } else { 3702 sor = eor; 3703 } 3704 3705 /* search for the limit of this run */ 3706 while ((++limit < length) && 3707 ((levels[limit] == level) || 3708 ((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {} 3709 3710 /* get the correct level of the next run */ 3711 if (limit < length) { 3712 nextLevel = levels[limit]; 3713 } else { 3714 nextLevel = GetParaLevelAt(length - 1); 3715 } 3716 3717 /* determine eor from max(level, nextLevel); sor is last run's eor */ 3718 if (NoOverride(level) < NoOverride(nextLevel)) { 3719 eor = GetLRFromLevel(nextLevel); 3720 } else { 3721 eor = GetLRFromLevel(level); 3722 } 3723 3724 /* if the run consists of overridden directional types, then there 3725 are no implicit types to be resolved */ 3726 if ((level & LEVEL_OVERRIDE) == 0) { 3727 resolveImplicitLevels(start, limit, sor, eor); 3728 } else { 3729 /* remove the LEVEL_OVERRIDE flags */ 3730 do { 3731 levels[start++] &= ~LEVEL_OVERRIDE; 3732 } while (start < limit); 3733 } 3734 } while (limit < length); 3735 } 3736 3737 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 3738 adjustWSLevels(); 3739 3740 break; 3741 } 3742 3743 /* add RLM for inverse Bidi with contextual orientation resolving 3744 * to RTL which would not round-trip otherwise 3745 */ 3746 if ((defaultParaLevel > 0) && 3747 ((reorderingOptions & OPTION_INSERT_MARKS) != 0) && 3748 ((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) || 3749 (reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 3750 int start, last; 3751 byte level; 3752 byte dirProp; 3753 for (int i = 0; i < paraCount; i++) { 3754 last = paras_limit[i] - 1; 3755 level = paras_level[i]; 3756 if (level == 0) 3757 continue; /* LTR paragraph */ 3758 start = i == 0 ? 0 : paras_limit[i - 1]; 3759 for (int j = last; j >= start; j--) { 3760 dirProp = dirProps[j]; 3761 if (dirProp == L) { 3762 if (j < last) { 3763 while (dirProps[last] == B) { 3764 last--; 3765 } 3766 } 3767 addPoint(last, RLM_BEFORE); 3768 break; 3769 } 3770 if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) { 3771 break; 3772 } 3773 } 3774 } 3775 } 3776 3777 if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) { 3778 resultLength -= controlCount; 3779 } else { 3780 resultLength += insertPoints.size; 3781 } 3782 setParaSuccess(); 3783 } 3784 3785 /** 3786 * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the 3787 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, 3788 * version 13, 3789 * also described in The Unicode Standard, Version 4.0 .<p> 3790 * 3791 * This method takes a paragraph of text and computes the 3792 * left-right-directionality of each character. The text should not 3793 * contain any Unicode block separators.<p> 3794 * 3795 * The RUN_DIRECTION attribute in the text, if present, determines the base 3796 * direction (left-to-right or right-to-left). If not present, the base 3797 * direction is computed using the Unicode Bidirectional Algorithm, 3798 * defaulting to left-to-right if there are no strong directional characters 3799 * in the text. This attribute, if present, must be applied to all the text 3800 * in the paragraph.<p> 3801 * 3802 * The BIDI_EMBEDDING attribute in the text, if present, represents 3803 * embedding level information. Negative values from -1 to -62 indicate 3804 * overrides at the absolute value of the level. Positive values from 1 to 3805 * 62 indicate embeddings. Where values are zero or not defined, the base 3806 * embedding level as determined by the base direction is assumed.<p> 3807 * 3808 * The NUMERIC_SHAPING attribute in the text, if present, converts European 3809 * digits to other decimal digits before running the bidi algorithm. This 3810 * attribute, if present, must be applied to all the text in the paragraph. 3811 * 3812 * If the entire text is all of the same directionality, then 3813 * the method may not perform all the steps described by the algorithm, 3814 * i.e., some levels may not be the same as if all steps were performed. 3815 * This is not relevant for unidirectional text.<br> 3816 * For example, in pure LTR text with numbers the numbers would get 3817 * a resolved level of 2 higher than the surrounding text according to 3818 * the algorithm. This implementation may set all resolved levels to 3819 * the same value in such a case.<p> 3820 * 3821 * @param paragraph a paragraph of text with optional character and 3822 * paragraph attribute information 3823 * @stable ICU 3.8 3824 */ 3825 public void setPara(AttributedCharacterIterator paragraph) 3826 { 3827 byte paraLvl; 3828 char ch = paragraph.first(); 3829 Boolean runDirection = 3830 (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION); 3831 Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING); 3832 3833 if (runDirection == null) { 3834 paraLvl = LEVEL_DEFAULT_LTR; 3835 } else { 3836 paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ? 3837 LTR : RTL; 3838 } 3839 3840 byte[] lvls = null; 3841 int len = paragraph.getEndIndex() - paragraph.getBeginIndex(); 3842 byte[] embeddingLevels = new byte[len]; 3843 char[] txt = new char[len]; 3844 int i = 0; 3845 while (ch != AttributedCharacterIterator.DONE) { 3846 txt[i] = ch; 3847 Integer embedding = 3848 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING); 3849 if (embedding != null) { 3850 byte level = embedding.byteValue(); 3851 if (level == 0) { 3852 /* no-op */ 3853 } else if (level < 0) { 3854 lvls = embeddingLevels; 3855 embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE); 3856 } else { 3857 lvls = embeddingLevels; 3858 embeddingLevels[i] = level; 3859 } 3860 } 3861 ch = paragraph.next(); 3862 ++i; 3863 } 3864 3865 if (shaper != null) { 3866 NumericShapings.shape(shaper, txt, 0, len); 3867 } 3868 setPara(txt, paraLvl, lvls); 3869 } 3870 3871 /** 3872 * Specify whether block separators must be allocated level zero, 3873 * so that successive paragraphs will progress from left to right. 3874 * This method must be called before <code>setPara()</code>. 3875 * Paragraph separators (B) may appear in the text. Setting them to level zero 3876 * means that all paragraph separators (including one possibly appearing 3877 * in the last text position) are kept in the reordered text after the text 3878 * that they follow in the source text. 3879 * When this feature is not enabled, a paragraph separator at the last 3880 * position of the text before reordering will go to the first position 3881 * of the reordered text when the paragraph level is odd. 3882 * 3883 * @param ordarParaLTR specifies whether paragraph separators (B) must 3884 * receive level 0, so that successive paragraphs progress from left to right. 3885 * 3886 * @see #setPara 3887 * @stable ICU 3.8 3888 */ 3889 public void orderParagraphsLTR(boolean ordarParaLTR) { 3890 orderParagraphsLTR = ordarParaLTR; 3891 } 3892 3893 /** 3894 * Get the directionality of the text. 3895 * 3896 * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code> 3897 * that indicates if the entire text 3898 * represented by this object is unidirectional, 3899 * and which direction, or if it is mixed-directional. 3900 * 3901 * @throws IllegalStateException if this call is not preceded by a successful 3902 * call to <code>setPara</code> or <code>setLine</code> 3903 * 3904 * @see #LTR 3905 * @see #RTL 3906 * @see #MIXED 3907 * @stable ICU 3.8 3908 */ 3909 public byte getDirection() 3910 { 3911 verifyValidParaOrLine(); 3912 return direction; 3913 } 3914 3915 /** 3916 * Get the length of the text. 3917 * 3918 * @return The length of the text that the <code>Bidi</code> object was 3919 * created for. 3920 * 3921 * @throws IllegalStateException if this call is not preceded by a successful 3922 * call to <code>setPara</code> or <code>setLine</code> 3923 * @stable ICU 3.8 3924 */ 3925 public int getLength() 3926 { 3927 verifyValidParaOrLine(); 3928 return originalLength; 3929 } 3930 3931 /* paragraphs API methods ------------------------------------------------- */ 3932 3933 /** 3934 * Get the paragraph level of the text. 3935 * 3936 * @return The paragraph level. If there are multiple paragraphs, their 3937 * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or 3938 * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph 3939 * is returned. 3940 * 3941 * @throws IllegalStateException if this call is not preceded by a successful 3942 * call to <code>setPara</code> or <code>setLine</code> 3943 * 3944 * @see #LEVEL_DEFAULT_LTR 3945 * @see #LEVEL_DEFAULT_RTL 3946 * @see #getParagraph 3947 * @see #getParagraphByIndex 3948 * @stable ICU 3.8 3949 */ 3950 public byte getParaLevel() 3951 { 3952 verifyValidParaOrLine(); 3953 return paraLevel; 3954 } 3955 3956 /** 3957 * Retrieves the Bidi class for a given code point. 3958 * <p>If a <code>BidiClassifier</code> is defined and returns a value 3959 * other than <code>CLASS_DEFAULT</code>, that value is used; otherwise 3960 * the default class determination mechanism is invoked.</p> 3961 * 3962 * @param c The code point to get a Bidi class for. 3963 * 3964 * @return The Bidi class for the character <code>c</code> that is in effect 3965 * for this <code>Bidi</code> instance. 3966 * 3967 * @stable ICU 3.8 3968 */ 3969 public int getCustomizedClass(int c) { 3970 int dir; 3971 3972 dir = bdp.getClass(c); 3973 if (dir >= CHAR_DIRECTION_COUNT) 3974 dir = ON; 3975 return dir; 3976 } 3977 3978 /** 3979 * <code>setLine()</code> returns a <code>Bidi</code> object to 3980 * contain the reordering information, especially the resolved levels, 3981 * for all the characters in a line of text. This line of text is 3982 * specified by referring to a <code>Bidi</code> object representing 3983 * this information for a piece of text containing one or more paragraphs, 3984 * and by specifying a range of indexes in this text.<p> 3985 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> 3986 * 3987 * This is used after calling <code>setPara()</code> 3988 * for a piece of text, and after line-breaking on that text. 3989 * It is not necessary if each paragraph is treated as a single line.<p> 3990 * 3991 * After line-breaking, rules (L1) and (L2) for the treatment of 3992 * trailing WS and for reordering are performed on 3993 * a <code>Bidi</code> object that represents a line.<p> 3994 * 3995 * <strong>Important: </strong>the line <code>Bidi</code> object may 3996 * reference data within the global text <code>Bidi</code> object. 3997 * You should not alter the content of the global text object until 3998 * you are finished using the line object. 3999 * 4000 * @param start is the line's first index into the text. 4001 * 4002 * @param limit is just behind the line's last index into the text 4003 * (its last index +1). 4004 * 4005 * @return a <code>Bidi</code> object that will now represent a line of the text. 4006 * 4007 * @throws IllegalStateException if this call is not preceded by a successful 4008 * call to <code>setPara</code> 4009 * @throws IllegalArgumentException if start and limit are not in the range 4010 * <code>0<=start<limit<=getProcessedLength()</code>, 4011 * or if the specified line crosses a paragraph boundary 4012 * 4013 * @see #setPara 4014 * @see #getProcessedLength 4015 * @stable ICU 3.8 4016 */ 4017 public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit) 4018 { 4019 verifyValidPara(); 4020 verifyRange(start, 0, limit); 4021 verifyRange(limit, 0, length+1); 4022 4023 return BidiLine.setLine(this, newBidi, newBidiBase, start, limit); 4024 } 4025 4026 /** 4027 * Get the level for one character. 4028 * 4029 * @param charIndex the index of a character. 4030 * 4031 * @return The level for the character at <code>charIndex</code>. 4032 * 4033 * @throws IllegalStateException if this call is not preceded by a successful 4034 * call to <code>setPara</code> or <code>setLine</code> 4035 * @throws IllegalArgumentException if charIndex is not in the range 4036 * <code>0<=charIndex<getProcessedLength()</code> 4037 * 4038 * @see #getProcessedLength 4039 * @stable ICU 3.8 4040 */ 4041 public byte getLevelAt(int charIndex) 4042 { 4043 // for backward compatibility 4044 if (charIndex < 0 || charIndex >= length) { 4045 return (byte)getBaseLevel(); 4046 } 4047 4048 verifyValidParaOrLine(); 4049 verifyRange(charIndex, 0, length); 4050 return BidiLine.getLevelAt(this, charIndex); 4051 } 4052 4053 /** 4054 * Get an array of levels for each character.<p> 4055 * 4056 * Note that this method may allocate memory under some 4057 * circumstances, unlike <code>getLevelAt()</code>. 4058 * 4059 * @return The levels array for the text, 4060 * or <code>null</code> if an error occurs. 4061 * 4062 * @throws IllegalStateException if this call is not preceded by a successful 4063 * call to <code>setPara</code> or <code>setLine</code> 4064 * @stable ICU 3.8 4065 */ 4066 byte[] getLevels() 4067 { 4068 verifyValidParaOrLine(); 4069 if (length <= 0) { 4070 return new byte[0]; 4071 } 4072 return BidiLine.getLevels(this); 4073 } 4074 4075 /** 4076 * Get the number of runs. 4077 * This method may invoke the actual reordering on the 4078 * <code>Bidi</code> object, after <code>setPara()</code> 4079 * may have resolved only the levels of the text. Therefore, 4080 * <code>countRuns()</code> may have to allocate memory, 4081 * and may throw an exception if it fails to do so. 4082 * 4083 * @return The number of runs. 4084 * 4085 * @throws IllegalStateException if this call is not preceded by a successful 4086 * call to <code>setPara</code> or <code>setLine</code> 4087 * @stable ICU 3.8 4088 */ 4089 public int countRuns() 4090 { 4091 verifyValidParaOrLine(); 4092 BidiLine.getRuns(this); 4093 return runCount; 4094 } 4095 4096 /** 4097 * 4098 * Get a <code>BidiRun</code> object according to its index. BidiRun methods 4099 * may be used to retrieve the run's logical start, length and level, 4100 * which can be even for an LTR run or odd for an RTL run. 4101 * In an RTL run, the character at the logical start is 4102 * visually on the right of the displayed run. 4103 * The length is the number of characters in the run.<p> 4104 * <code>countRuns()</code> is normally called 4105 * before the runs are retrieved. 4106 * 4107 * <p> 4108 * Example: 4109 * <pre> 4110 * Bidi bidi = new Bidi(); 4111 * String text = "abc 123 DEFG xyz"; 4112 * bidi.setPara(text, Bidi.RTL, null); 4113 * int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length; 4114 * BidiRun run; 4115 * for (i = 0; i < count; ++i) { 4116 * run = bidi.getVisualRun(i); 4117 * logicalStart = run.getStart(); 4118 * length = run.getLength(); 4119 * if (Bidi.LTR == run.getEmbeddingLevel()) { 4120 * do { // LTR 4121 * show_char(text.charAt(logicalStart++), visualIndex++); 4122 * } while (--length > 0); 4123 * } else { 4124 * logicalStart += length; // logicalLimit 4125 * do { // RTL 4126 * show_char(text.charAt(--logicalStart), visualIndex++); 4127 * } while (--length > 0); 4128 * } 4129 * } 4130 * </pre> 4131 * <p> 4132 * Note that in right-to-left runs, code like this places 4133 * second surrogates before first ones (which is generally a bad idea) 4134 * and combining characters before base characters. 4135 * <p> 4136 * Use of <code>{@link #writeReordered}</code>, optionally with the 4137 * <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in 4138 * order to avoid these issues. 4139 * 4140 * @param runIndex is the number of the run in visual order, in the 4141 * range <code>[0..countRuns()-1]</code>. 4142 * 4143 * @return a BidiRun object containing the details of the run. The 4144 * directionality of the run is 4145 * <code>LTR==0</code> or <code>RTL==1</code>, 4146 * never <code>MIXED</code>. 4147 * 4148 * @throws IllegalStateException if this call is not preceded by a successful 4149 * call to <code>setPara</code> or <code>setLine</code> 4150 * @throws IllegalArgumentException if <code>runIndex</code> is not in 4151 * the range <code>0<=runIndex<countRuns()</code> 4152 * 4153 * @see #countRuns() 4154 * @see com.ibm.icu.text.BidiRun 4155 * @see com.ibm.icu.text.BidiRun#getStart() 4156 * @see com.ibm.icu.text.BidiRun#getLength() 4157 * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel() 4158 * @stable ICU 3.8 4159 */ 4160 BidiRun getVisualRun(int runIndex) 4161 { 4162 verifyValidParaOrLine(); 4163 BidiLine.getRuns(this); 4164 verifyRange(runIndex, 0, runCount); 4165 return BidiLine.getVisualRun(this, runIndex); 4166 } 4167 4168 /** 4169 * Get a visual-to-logical index map (array) for the characters in the 4170 * <code>Bidi</code> (paragraph or line) object. 4171 * <p> 4172 * Some values in the map may be <code>MAP_NOWHERE</code> if the 4173 * corresponding text characters are Bidi marks inserted in the visual 4174 * output by the option <code>OPTION_INSERT_MARKS</code>. 4175 * <p> 4176 * When the visual output is altered by using options of 4177 * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>, 4178 * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>, 4179 * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not 4180 * be correct. It is advised to use, when possible, reordering options 4181 * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}. 4182 * 4183 * @return an array of <code>getResultLength()</code> 4184 * indexes which will reflect the reordering of the characters.<br><br> 4185 * The index map will result in 4186 * <code>indexMap[visualIndex]==logicalIndex</code>, where 4187 * <code>indexMap</code> represents the returned array. 4188 * 4189 * @throws IllegalStateException if this call is not preceded by a successful 4190 * call to <code>setPara</code> or <code>setLine</code> 4191 * 4192 * @see #getLogicalMap 4193 * @see #getLogicalIndex 4194 * @see #getResultLength 4195 * @see #MAP_NOWHERE 4196 * @see #OPTION_INSERT_MARKS 4197 * @see #writeReordered 4198 * @stable ICU 3.8 4199 */ 4200 private int[] getVisualMap() 4201 { 4202 /* countRuns() checks successful call to setPara/setLine */ 4203 countRuns(); 4204 if (resultLength <= 0) { 4205 return new int[0]; 4206 } 4207 return BidiLine.getVisualMap(this); 4208 } 4209 4210 /** 4211 * This is a convenience method that does not use a <code>Bidi</code> object. 4212 * It is intended to be used for when an application has determined the levels 4213 * of objects (character sequences) and just needs to have them reordered (L2). 4214 * This is equivalent to using <code>getVisualMap()</code> on a 4215 * <code>Bidi</code> object. 4216 * 4217 * @param levels is an array of levels that have been determined by 4218 * the application. 4219 * 4220 * @return an array of <code>levels.length</code> 4221 * indexes which will reflect the reordering of the characters.<p> 4222 * The index map will result in 4223 * <code>indexMap[visualIndex]==logicalIndex</code>, where 4224 * <code>indexMap</code> represents the returned array. 4225 * 4226 * @stable ICU 3.8 4227 */ 4228 private static int[] reorderVisual(byte[] levels) 4229 { 4230 return BidiLine.reorderVisual(levels); 4231 } 4232 4233 /** 4234 * Constant indicating that the base direction depends on the first strong 4235 * directional character in the text according to the Unicode Bidirectional 4236 * Algorithm. If no strong directional character is present, the base 4237 * direction is right-to-left. 4238 * @stable ICU 3.8 4239 */ 4240 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL; 4241 4242 /** 4243 * Create Bidi from the given text, embedding, and direction information. 4244 * The embeddings array may be null. If present, the values represent 4245 * embedding level information. Negative values from -1 to -61 indicate 4246 * overrides at the absolute value of the level. Positive values from 1 to 4247 * 61 indicate embeddings. Where values are zero, the base embedding level 4248 * as determined by the base direction is assumed.<p> 4249 * 4250 * Note: this constructor calls setPara() internally. 4251 * 4252 * @param text an array containing the paragraph of text to process. 4253 * @param textStart the index into the text array of the start of the 4254 * paragraph. 4255 * @param embeddings an array containing embedding values for each character 4256 * in the paragraph. This can be null, in which case it is assumed 4257 * that there is no external embedding information. 4258 * @param embStart the index into the embedding array of the start of the 4259 * paragraph. 4260 * @param paragraphLength the length of the paragraph in the text and 4261 * embeddings arrays. 4262 * @param flags a collection of flags that control the algorithm. The 4263 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, 4264 * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and 4265 * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved. 4266 * 4267 * @throws IllegalArgumentException if the values in embeddings are 4268 * not within the allowed range 4269 * 4270 * @see #DIRECTION_LEFT_TO_RIGHT 4271 * @see #DIRECTION_RIGHT_TO_LEFT 4272 * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT 4273 * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT 4274 * @stable ICU 3.8 4275 */ 4276 public BidiBase(char[] text, 4277 int textStart, 4278 byte[] embeddings, 4279 int embStart, 4280 int paragraphLength, 4281 int flags) 4282 { 4283 this(0, 0); 4284 byte paraLvl; 4285 switch (flags) { 4286 case Bidi.DIRECTION_LEFT_TO_RIGHT: 4287 default: 4288 paraLvl = LTR; 4289 break; 4290 case Bidi.DIRECTION_RIGHT_TO_LEFT: 4291 paraLvl = RTL; 4292 break; 4293 case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT: 4294 paraLvl = LEVEL_DEFAULT_LTR; 4295 break; 4296 case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT: 4297 paraLvl = LEVEL_DEFAULT_RTL; 4298 break; 4299 } 4300 byte[] paraEmbeddings; 4301 if (embeddings == null) { 4302 paraEmbeddings = null; 4303 } else { 4304 paraEmbeddings = new byte[paragraphLength]; 4305 byte lev; 4306 for (int i = 0; i < paragraphLength; i++) { 4307 lev = embeddings[i + embStart]; 4308 if (lev < 0) { 4309 lev = (byte)((- lev) | LEVEL_OVERRIDE); 4310 } else if (lev == 0) { 4311 lev = paraLvl; 4312 if (paraLvl > MAX_EXPLICIT_LEVEL) { 4313 lev &= 1; 4314 } 4315 } 4316 paraEmbeddings[i] = lev; 4317 } 4318 } 4319 4320 char[] paraText = new char[paragraphLength]; 4321 System.arraycopy(text, textStart, paraText, 0, paragraphLength); 4322 setPara(paraText, paraLvl, paraEmbeddings); 4323 } 4324 4325 /** 4326 * Return true if the line is not left-to-right or right-to-left. This means 4327 * it either has mixed runs of left-to-right and right-to-left text, or the 4328 * base direction differs from the direction of the only run of text. 4329 * 4330 * @return true if the line is not left-to-right or right-to-left. 4331 * 4332 * @throws IllegalStateException if this call is not preceded by a successful 4333 * call to <code>setPara</code> 4334 * @stable ICU 3.8 4335 */ 4336 public boolean isMixed() 4337 { 4338 return (!isLeftToRight() && !isRightToLeft()); 4339 } 4340 4341 /** 4342 * Return true if the line is all left-to-right text and the base direction 4343 * is left-to-right. 4344 * 4345 * @return true if the line is all left-to-right text and the base direction 4346 * is left-to-right. 4347 * 4348 * @throws IllegalStateException if this call is not preceded by a successful 4349 * call to <code>setPara</code> 4350 * @stable ICU 3.8 4351 */ 4352 public boolean isLeftToRight() 4353 { 4354 return (getDirection() == LTR && (paraLevel & 1) == 0); 4355 } 4356 4357 /** 4358 * Return true if the line is all right-to-left text, and the base direction 4359 * is right-to-left 4360 * 4361 * @return true if the line is all right-to-left text, and the base 4362 * direction is right-to-left 4363 * 4364 * @throws IllegalStateException if this call is not preceded by a successful 4365 * call to <code>setPara</code> 4366 * @stable ICU 3.8 4367 */ 4368 public boolean isRightToLeft() 4369 { 4370 return (getDirection() == RTL && (paraLevel & 1) == 1); 4371 } 4372 4373 /** 4374 * Return true if the base direction is left-to-right 4375 * 4376 * @return true if the base direction is left-to-right 4377 * 4378 * @throws IllegalStateException if this call is not preceded by a successful 4379 * call to <code>setPara</code> or <code>setLine</code> 4380 * 4381 * @stable ICU 3.8 4382 */ 4383 public boolean baseIsLeftToRight() 4384 { 4385 return (getParaLevel() == LTR); 4386 } 4387 4388 /** 4389 * Return the base level (0 if left-to-right, 1 if right-to-left). 4390 * 4391 * @return the base level 4392 * 4393 * @throws IllegalStateException if this call is not preceded by a successful 4394 * call to <code>setPara</code> or <code>setLine</code> 4395 * 4396 * @stable ICU 3.8 4397 */ 4398 public int getBaseLevel() 4399 { 4400 return getParaLevel(); 4401 } 4402 4403 /** 4404 * Compute the logical to visual run mapping 4405 */ 4406 void getLogicalToVisualRunsMap() 4407 { 4408 if (isGoodLogicalToVisualRunsMap) { 4409 return; 4410 } 4411 int count = countRuns(); 4412 if ((logicalToVisualRunsMap == null) || 4413 (logicalToVisualRunsMap.length < count)) { 4414 logicalToVisualRunsMap = new int[count]; 4415 } 4416 int i; 4417 long[] keys = new long[count]; 4418 for (i = 0; i < count; i++) { 4419 keys[i] = ((long)(runs[i].start)<<32) + i; 4420 } 4421 Arrays.sort(keys); 4422 for (i = 0; i < count; i++) { 4423 logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF); 4424 } 4425 isGoodLogicalToVisualRunsMap = true; 4426 } 4427 4428 /** 4429 * Return the level of the nth logical run in this line. 4430 * 4431 * @param run the index of the run, between 0 and <code>countRuns()-1</code> 4432 * 4433 * @return the level of the run 4434 * 4435 * @throws IllegalStateException if this call is not preceded by a successful 4436 * call to <code>setPara</code> or <code>setLine</code> 4437 * @throws IllegalArgumentException if <code>run</code> is not in 4438 * the range <code>0<=run<countRuns()</code> 4439 * @stable ICU 3.8 4440 */ 4441 public int getRunLevel(int run) 4442 { 4443 verifyValidParaOrLine(); 4444 BidiLine.getRuns(this); 4445 4446 // for backward compatibility 4447 if (run < 0 || run >= runCount) { 4448 return getParaLevel(); 4449 } 4450 4451 getLogicalToVisualRunsMap(); 4452 return runs[logicalToVisualRunsMap[run]].level; 4453 } 4454 4455 /** 4456 * Return the index of the character at the start of the nth logical run in 4457 * this line, as an offset from the start of the line. 4458 * 4459 * @param run the index of the run, between 0 and <code>countRuns()</code> 4460 * 4461 * @return the start of the run 4462 * 4463 * @throws IllegalStateException if this call is not preceded by a successful 4464 * call to <code>setPara</code> or <code>setLine</code> 4465 * @throws IllegalArgumentException if <code>run</code> is not in 4466 * the range <code>0<=run<countRuns()</code> 4467 * @stable ICU 3.8 4468 */ 4469 public int getRunStart(int run) 4470 { 4471 verifyValidParaOrLine(); 4472 BidiLine.getRuns(this); 4473 4474 // for backward compatibility 4475 if (runCount == 1) { 4476 return 0; 4477 } else if (run == runCount) { 4478 return length; 4479 } 4480 4481 getLogicalToVisualRunsMap(); 4482 return runs[logicalToVisualRunsMap[run]].start; 4483 } 4484 4485 /** 4486 * Return the index of the character past the end of the nth logical run in 4487 * this line, as an offset from the start of the line. For example, this 4488 * will return the length of the line for the last run on the line. 4489 * 4490 * @param run the index of the run, between 0 and <code>countRuns()</code> 4491 * 4492 * @return the limit of the run 4493 * 4494 * @throws IllegalStateException if this call is not preceded by a successful 4495 * call to <code>setPara</code> or <code>setLine</code> 4496 * @throws IllegalArgumentException if <code>run</code> is not in 4497 * the range <code>0<=run<countRuns()</code> 4498 * @stable ICU 3.8 4499 */ 4500 public int getRunLimit(int run) 4501 { 4502 verifyValidParaOrLine(); 4503 BidiLine.getRuns(this); 4504 4505 // for backward compatibility 4506 if (runCount == 1) { 4507 return length; 4508 } 4509 4510 getLogicalToVisualRunsMap(); 4511 int idx = logicalToVisualRunsMap[run]; 4512 int len = idx == 0 ? runs[idx].limit : 4513 runs[idx].limit - runs[idx-1].limit; 4514 return runs[idx].start + len; 4515 } 4516 4517 /** 4518 * Return true if the specified text requires bidi analysis. If this returns 4519 * false, the text will display left-to-right. Clients can then avoid 4520 * constructing a Bidi object. Text in the Arabic Presentation Forms area of 4521 * Unicode is presumed to already be shaped and ordered for display, and so 4522 * will not cause this method to return true. 4523 * 4524 * @param text the text containing the characters to test 4525 * @param start the start of the range of characters to test 4526 * @param limit the limit of the range of characters to test 4527 * 4528 * @return true if the range of characters requires bidi analysis 4529 * 4530 * @stable ICU 3.8 4531 */ 4532 public static boolean requiresBidi(char[] text, 4533 int start, 4534 int limit) 4535 { 4536 final int RTLMask = (1 << R | 4537 1 << AL | 4538 1 << RLE | 4539 1 << RLO | 4540 1 << AN); 4541 4542 if (0 > start || start > limit || limit > text.length) { 4543 throw new IllegalArgumentException("Value start " + start + 4544 " is out of range 0 to " + limit); 4545 } 4546 4547 for (int i = start; i < limit; ++i) { 4548 if (Character.isHighSurrogate(text[i]) && i < (limit-1) && 4549 Character.isLowSurrogate(text[i+1])) { 4550 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) { 4551 return true; 4552 } 4553 } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) { 4554 return true; 4555 } 4556 } 4557 4558 return false; 4559 } 4560 4561 /** 4562 * Reorder the objects in the array into visual order based on their levels. 4563 * This is a utility method to use when you have a collection of objects 4564 * representing runs of text in logical order, each run containing text at a 4565 * single level. The elements at <code>index</code> from 4566 * <code>objectStart</code> up to <code>objectStart + count</code> in the 4567 * objects array will be reordered into visual order assuming 4568 * each run of text has the level indicated by the corresponding element in 4569 * the levels array (at <code>index - objectStart + levelStart</code>). 4570 * 4571 * @param levels an array representing the bidi level of each object 4572 * @param levelStart the start position in the levels array 4573 * @param objects the array of objects to be reordered into visual order 4574 * @param objectStart the start position in the objects array 4575 * @param count the number of objects to reorder 4576 * @stable ICU 3.8 4577 */ 4578 public static void reorderVisually(byte[] levels, 4579 int levelStart, 4580 Object[] objects, 4581 int objectStart, 4582 int count) 4583 { 4584 // for backward compatibility 4585 if (0 > levelStart || levels.length <= levelStart) { 4586 throw new IllegalArgumentException("Value levelStart " + 4587 levelStart + " is out of range 0 to " + 4588 (levels.length-1)); 4589 } 4590 if (0 > objectStart || objects.length <= objectStart) { 4591 throw new IllegalArgumentException("Value objectStart " + 4592 levelStart + " is out of range 0 to " + 4593 (objects.length-1)); 4594 } 4595 if (0 > count || objects.length < (objectStart+count)) { 4596 throw new IllegalArgumentException("Value count " + 4597 levelStart + " is out of range 0 to " + 4598 (objects.length - objectStart)); 4599 } 4600 4601 byte[] reorderLevels = new byte[count]; 4602 System.arraycopy(levels, levelStart, reorderLevels, 0, count); 4603 int[] indexMap = reorderVisual(reorderLevels); 4604 Object[] temp = new Object[count]; 4605 System.arraycopy(objects, objectStart, temp, 0, count); 4606 for (int i = 0; i < count; ++i) { 4607 objects[objectStart + i] = temp[indexMap[i]]; 4608 } 4609 } 4610 4611 /** 4612 * Take a <code>Bidi</code> object containing the reordering 4613 * information for a piece of text (one or more paragraphs) set by 4614 * <code>setPara()</code> or for a line of text set by <code>setLine()</code> 4615 * and return a string containing the reordered text. 4616 * 4617 * <p>The text may have been aliased (only a reference was stored 4618 * without copying the contents), thus it must not have been modified 4619 * since the <code>setPara()</code> call.</p> 4620 * 4621 * This method preserves the integrity of characters with multiple 4622 * code units and (optionally) combining characters. 4623 * Characters in RTL runs can be replaced by mirror-image characters 4624 * in the returned string. Note that "real" mirroring has to be done in a 4625 * rendering engine by glyph selection and that for many "mirrored" 4626 * characters there are no Unicode characters as mirror-image equivalents. 4627 * There are also options to insert or remove Bidi control 4628 * characters; see the descriptions of the return value and the 4629 * <code>options</code> parameter, and of the option bit flags. 4630 * 4631 * @param options A bit set of options for the reordering that control 4632 * how the reordered text is written. 4633 * The options include mirroring the characters on a code 4634 * point basis and inserting LRM characters, which is used 4635 * especially for transforming visually stored text 4636 * to logically stored text (although this is still an 4637 * imperfect implementation of an "inverse Bidi" algorithm 4638 * because it uses the "forward Bidi" algorithm at its core). 4639 * The available options are: 4640 * <code>DO_MIRRORING</code>, 4641 * <code>INSERT_LRM_FOR_NUMERIC</code>, 4642 * <code>KEEP_BASE_COMBINING</code>, 4643 * <code>OUTPUT_REVERSE</code>, 4644 * <code>REMOVE_BIDI_CONTROLS</code>, 4645 * <code>STREAMING</code> 4646 * 4647 * @return The reordered text. 4648 * If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then 4649 * the length of the returned string could be as large as 4650 * <code>getLength()+2*countRuns()</code>.<br> 4651 * If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the 4652 * length of the returned string may be less than 4653 * <code>getLength()</code>.<br> 4654 * If none of these options is set, then the length of the returned 4655 * string will be exactly <code>getProcessedLength()</code>. 4656 * 4657 * @throws IllegalStateException if this call is not preceded by a successful 4658 * call to <code>setPara</code> or <code>setLine</code> 4659 * 4660 * @see #DO_MIRRORING 4661 * @see #INSERT_LRM_FOR_NUMERIC 4662 * @see #KEEP_BASE_COMBINING 4663 * @see #OUTPUT_REVERSE 4664 * @see #REMOVE_BIDI_CONTROLS 4665 * @see #OPTION_STREAMING 4666 * @see #getProcessedLength 4667 * @stable ICU 3.8 4668 */ 4669 public String writeReordered(int options) 4670 { 4671 verifyValidParaOrLine(); 4672 if (length == 0) { 4673 /* nothing to do */ 4674 return ""; 4675 } 4676 return BidiWriter.writeReordered(this, options); 4677 } 4678 4679 /** 4680 * Display the bidi internal state, used in debugging. 4681 */ 4682 public String toString() { 4683 StringBuilder buf = new StringBuilder(getClass().getName()); 4684 4685 buf.append("[dir: "); 4686 buf.append(direction); 4687 buf.append(" baselevel: "); 4688 buf.append(paraLevel); 4689 buf.append(" length: "); 4690 buf.append(length); 4691 buf.append(" runs: "); 4692 if (levels == null) { 4693 buf.append("none"); 4694 } else { 4695 buf.append('['); 4696 buf.append(levels[0]); 4697 for (int i = 1; i < levels.length; i++) { 4698 buf.append(' '); 4699 buf.append(levels[i]); 4700 } 4701 buf.append(']'); 4702 } 4703 buf.append(" text: [0x"); 4704 buf.append(Integer.toHexString(text[0])); 4705 for (int i = 1; i < text.length; i++) { 4706 buf.append(" 0x"); 4707 buf.append(Integer.toHexString(text[i])); 4708 } 4709 buf.append("]]"); 4710 4711 return buf.toString(); 4712 } 4713 4714 /** 4715 * A class that provides access to constants defined by 4716 * java.awt.font.TextAttribute without creating a static dependency. 4717 */ 4718 private static class TextAttributeConstants { 4719 // Make sure to load the AWT's TextAttribute class before using the constants, if any. 4720 static { 4721 try { 4722 Class.forName("java.awt.font.TextAttribute", true, null); 4723 } catch (ClassNotFoundException e) {} 4724 } 4725 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 4726 4727 /** 4728 * TextAttribute instances (or a fake Attribute type if 4729 * java.awt.font.TextAttribute is not present) 4730 */ 4731 static final AttributedCharacterIterator.Attribute RUN_DIRECTION = 4732 getTextAttribute("RUN_DIRECTION"); 4733 static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING = 4734 getTextAttribute("NUMERIC_SHAPING"); 4735 static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING = 4736 getTextAttribute("BIDI_EMBEDDING"); 4737 4738 /** 4739 * TextAttribute.RUN_DIRECTION_LTR 4740 */ 4741 static final Boolean RUN_DIRECTION_LTR = (jafa == null) ? 4742 Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR"); 4743 4744 @SuppressWarnings("serial") 4745 private static AttributedCharacterIterator.Attribute 4746 getTextAttribute(String name) 4747 { 4748 if (jafa == null) { 4749 // fake attribute 4750 return new AttributedCharacterIterator.Attribute(name) { }; 4751 } else { 4752 return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name); 4753 } 4754 } 4755 } 4756 4757 /** 4758 * A class that provides access to java.awt.font.NumericShaper without 4759 * creating a static dependency. 4760 */ 4761 private static class NumericShapings { 4762 // Make sure to load the AWT's NumericShaper class before calling shape, if any. 4763 static { 4764 try { 4765 Class.forName("java.awt.font.NumericShaper", true, null); 4766 } catch (ClassNotFoundException e) {} 4767 } 4768 static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess(); 4769 4770 /** 4771 * Invokes NumericShaping shape(text,start,count) method. 4772 */ 4773 static void shape(Object shaper, char[] text, int start, int count) { 4774 if (jafa != null) { 4775 jafa.shape(shaper, text, start, count); 4776 } 4777 } 4778 } 4779 4780 }