1 /*
   2  * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  *******************************************************************************
  27  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  28  *                                                                             *
  29  * The original version of this source code and documentation is copyrighted   *
  30  * and owned by IBM, These materials are provided under terms of a License     *
  31  * Agreement between IBM and Sun. This technology is protected by multiple     *
  32  * US and International patents. This notice and attribution to IBM may not    *
  33  * to removed.                                                                 *
  34  *******************************************************************************
  35  */
  36 
  37 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
  38  * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
  39  * concept of RUNS_ONLY which is a double operation.
  40  * It could be advantageous to divide this into 3 concepts:
  41  * a) Operation: direct / inverse / RUNS_ONLY
  42  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
  43  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
  44  * This would allow combinations not possible today like RUNS_ONLY with
  45  * NUMBERS_SPECIAL.
  46  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
  47  * REMOVE_CONTROLS for the inverse step.
  48  * Not all combinations would be supported, and probably not all do make sense.
  49  * This would need to document which ones are supported and what are the
  50  * fallbacks for unsupported combinations.
  51  */
  52 
  53 package sun.text.bidi;
  54 
  55 import java.io.IOException;
  56 import java.lang.reflect.Array;
  57 import java.text.AttributedCharacterIterator;
  58 import java.text.Bidi;
  59 import java.util.Arrays;
  60 import java.util.MissingResourceException;
  61 import sun.misc.JavaAWTFontAccess;
  62 import sun.misc.SharedSecrets;
  63 import sun.text.normalizer.UBiDiProps;
  64 import sun.text.normalizer.UCharacter;
  65 import sun.text.normalizer.UTF16;
  66 
  67 /**
  68  *
  69  * <h2>Bidi algorithm for ICU</h2>
  70  *
  71  * This is an implementation of the Unicode Bidirectional algorithm. The
  72  * algorithm is defined in the <a
  73  * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
  74  * version 13, also described in The Unicode Standard, Version 4.0 .
  75  * <p>
  76  *
  77  * Note: Libraries that perform a bidirectional algorithm and reorder strings
  78  * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
  79  * shaping (ArabicShaping) classes can be used at the core of such "Storage
  80  * Layout Engines".
  81  *
  82  * <h3>General remarks about the API:</h3>
  83  *
  84  * The "limit" of a sequence of characters is the position just after
  85  * their last character, i.e., one more than that position.
  86  * <p>
  87  *
  88  * Some of the API methods provide access to "runs". Such a
  89  * "run" is defined as a sequence of characters that are at the same
  90  * embedding level after performing the Bidi algorithm.
  91  *
  92  * <h3>Basic concept: paragraph</h3>
  93  * A piece of text can be divided into several paragraphs by characters
  94  * with the Bidi class <code>Block Separator</code>. For handling of
  95  * paragraphs, see:
  96  * <ul>
  97  * <li>{@link #countParagraphs}
  98  * <li>{@link #getParaLevel}
  99  * <li>{@link #getParagraph}
 100  * <li>{@link #getParagraphByIndex}
 101  * </ul>
 102  *
 103  * <h3>Basic concept: text direction</h3>
 104  * The direction of a piece of text may be:
 105  * <ul>
 106  * <li>{@link #LTR}
 107  * <li>{@link #RTL}
 108  * <li>{@link #MIXED}
 109  * </ul>
 110  *
 111  * <h3>Basic concept: levels</h3>
 112  *
 113  * Levels in this API represent embedding levels according to the Unicode
 114  * Bidirectional Algorithm.
 115  * Their low-order bit (even/odd value) indicates the visual direction.<p>
 116  *
 117  * Levels can be abstract values when used for the
 118  * <code>paraLevel</code> and <code>embeddingLevels</code>
 119  * arguments of <code>setPara()</code>; there:
 120  * <ul>
 121  * <li>the high-order bit of an <code>embeddingLevels[]</code>
 122  * value indicates whether the using application is
 123  * specifying the level of a character to <i>override</i> whatever the
 124  * Bidi implementation would resolve it to.</li>
 125  * <li><code>paraLevel</code> can be set to the
 126  * pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
 127  * and <code>LEVEL_DEFAULT_RTL</code>.</li>
 128  * </ul>
 129  *
 130  * <p>The related constants are not real, valid level values.
 131  * <code>DEFAULT_XXX</code> can be used to specify
 132  * a default for the paragraph level for
 133  * when the <code>setPara()</code> method
 134  * shall determine it but there is no
 135  * strongly typed character in the input.<p>
 136  *
 137  * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
 138  * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
 139  * just like with normal LTR and RTL level values -
 140  * these special values are designed that way. Also, the implementation
 141  * assumes that MAX_EXPLICIT_LEVEL is odd.
 142  *
 143  * <p><b>See Also:</b>
 144  * <ul>
 145  * <li>{@link #LEVEL_DEFAULT_LTR}
 146  * <li>{@link #LEVEL_DEFAULT_RTL}
 147  * <li>{@link #LEVEL_OVERRIDE}
 148  * <li>{@link #MAX_EXPLICIT_LEVEL}
 149  * <li>{@link #setPara}
 150  * </ul>
 151  *
 152  * <h3>Basic concept: Reordering Mode</h3>
 153  * Reordering mode values indicate which variant of the Bidi algorithm to
 154  * use.
 155  *
 156  * <p><b>See Also:</b>
 157  * <ul>
 158  * <li>{@link #setReorderingMode}
 159  * <li>{@link #REORDER_DEFAULT}
 160  * <li>{@link #REORDER_NUMBERS_SPECIAL}
 161  * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
 162  * <li>{@link #REORDER_RUNS_ONLY}
 163  * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
 164  * <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
 165  * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
 166  * </ul>
 167  *
 168  * <h3>Basic concept: Reordering Options</h3>
 169  * Reordering options can be applied during Bidi text transformations.
 170  * <p><b>See Also:</b>
 171  * <ul>
 172  * <li>{@link #setReorderingOptions}
 173  * <li>{@link #OPTION_DEFAULT}
 174  * <li>{@link #OPTION_INSERT_MARKS}
 175  * <li>{@link #OPTION_REMOVE_CONTROLS}
 176  * <li>{@link #OPTION_STREAMING}
 177  * </ul>
 178  *
 179  *
 180  * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
 181  * @stable ICU 3.8
 182  *
 183  *
 184  * <h4> Sample code for the ICU Bidi API </h4>
 185  *
 186  * <h5>Rendering a paragraph with the ICU Bidi API</h5>
 187  *
 188  * This is (hypothetical) sample code that illustrates how the ICU Bidi API
 189  * could be used to render a paragraph of text. Rendering code depends highly on
 190  * the graphics system, therefore this sample code must make a lot of
 191  * assumptions, which may or may not match any existing graphics system's
 192  * properties.
 193  *
 194  * <p>
 195  * The basic assumptions are:
 196  * </p>
 197  * <ul>
 198  * <li>Rendering is done from left to right on a horizontal line.</li>
 199  * <li>A run of single-style, unidirectional text can be rendered at once.
 200  * </li>
 201  * <li>Such a run of text is passed to the graphics system with characters
 202  * (code units) in logical order.</li>
 203  * <li>The line-breaking algorithm is very complicated and Locale-dependent -
 204  * and therefore its implementation omitted from this sample code.</li>
 205  * </ul>
 206  *
 207  * <pre>{@code
 208  *
 209  *  package com.ibm.icu.dev.test.bidi;
 210  *
 211  *  import com.ibm.icu.text.Bidi;
 212  *  import com.ibm.icu.text.BidiRun;
 213  *
 214  *  public class Sample {
 215  *
 216  *      static final int styleNormal = 0;
 217  *      static final int styleSelected = 1;
 218  *      static final int styleBold = 2;
 219  *      static final int styleItalics = 4;
 220  *      static final int styleSuper=8;
 221  *      static final int styleSub = 16;
 222  *
 223  *      static class StyleRun {
 224  *          int limit;
 225  *          int style;
 226  *
 227  *          public StyleRun(int limit, int style) {
 228  *              this.limit = limit;
 229  *              this.style = style;
 230  *          }
 231  *      }
 232  *
 233  *      static class Bounds {
 234  *          int start;
 235  *          int limit;
 236  *
 237  *          public Bounds(int start, int limit) {
 238  *              this.start = start;
 239  *              this.limit = limit;
 240  *          }
 241  *      }
 242  *
 243  *      static int getTextWidth(String text, int start, int limit,
 244  *                              StyleRun[] styleRuns, int styleRunCount) {
 245  *          // simplistic way to compute the width
 246  *          return limit - start;
 247  *      }
 248  *
 249  *      // set limit and StyleRun limit for a line
 250  *      // from text[start] and from styleRuns[styleRunStart]
 251  *      // using Bidi.getLogicalRun(...)
 252  *      // returns line width
 253  *      static int getLineBreak(String text, Bounds line, Bidi para,
 254  *                              StyleRun styleRuns[], Bounds styleRun) {
 255  *          // dummy return
 256  *          return 0;
 257  *      }
 258  *
 259  *      // render runs on a line sequentially, always from left to right
 260  *
 261  *      // prepare rendering a new line
 262  *      static void startLine(byte textDirection, int lineWidth) {
 263  *          System.out.println();
 264  *      }
 265  *
 266  *      // render a run of text and advance to the right by the run width
 267  *      // the text[start..limit-1] is always in logical order
 268  *      static void renderRun(String text, int start, int limit,
 269  *                            byte textDirection, int style) {
 270  *      }
 271  *
 272  *      // We could compute a cross-product
 273  *      // from the style runs with the directional runs
 274  *      // and then reorder it.
 275  *      // Instead, here we iterate over each run type
 276  *      // and render the intersections -
 277  *      // with shortcuts in simple (and common) cases.
 278  *      // renderParagraph() is the main function.
 279  *
 280  *      // render a directional run with
 281  *      // (possibly) multiple style runs intersecting with it
 282  *      static void renderDirectionalRun(String text, int start, int limit,
 283  *                                       byte direction, StyleRun styleRuns[],
 284  *                                       int styleRunCount) {
 285  *          int i;
 286  *
 287  *          // iterate over style runs
 288  *          if (direction == Bidi.LTR) {
 289  *              int styleLimit;
 290  *              for (i = 0; i < styleRunCount; ++i) {
 291  *                  styleLimit = styleRuns[i].limit;
 292  *                  if (start < styleLimit) {
 293  *                      if (styleLimit > limit) {
 294  *                          styleLimit = limit;
 295  *                      }
 296  *                      renderRun(text, start, styleLimit,
 297  *                                direction, styleRuns[i].style);
 298  *                      if (styleLimit == limit) {
 299  *                          break;
 300  *                      }
 301  *                      start = styleLimit;
 302  *                  }
 303  *              }
 304  *          } else {
 305  *              int styleStart;
 306  *
 307  *              for (i = styleRunCount-1; i >= 0; --i) {
 308  *                  if (i > 0) {
 309  *                      styleStart = styleRuns[i-1].limit;
 310  *                  } else {
 311  *                      styleStart = 0;
 312  *                  }
 313  *                  if (limit >= styleStart) {
 314  *                      if (styleStart < start) {
 315  *                          styleStart = start;
 316  *                      }
 317  *                      renderRun(text, styleStart, limit, direction,
 318  *                                styleRuns[i].style);
 319  *                      if (styleStart == start) {
 320  *                          break;
 321  *                      }
 322  *                      limit = styleStart;
 323  *                  }
 324  *              }
 325  *          }
 326  *      }
 327  *
 328  *      // the line object represents text[start..limit-1]
 329  *      static void renderLine(Bidi line, String text, int start, int limit,
 330  *                             StyleRun styleRuns[], int styleRunCount) {
 331  *          byte direction = line.getDirection();
 332  *          if (direction != Bidi.MIXED) {
 333  *              // unidirectional
 334  *              if (styleRunCount <= 1) {
 335  *                  renderRun(text, start, limit, direction, styleRuns[0].style);
 336  *              } else {
 337  *                  renderDirectionalRun(text, start, limit, direction,
 338  *                                       styleRuns, styleRunCount);
 339  *              }
 340  *          } else {
 341  *              // mixed-directional
 342  *              int count, i;
 343  *              BidiRun run;
 344  *
 345  *              try {
 346  *                  count = line.countRuns();
 347  *              } catch (IllegalStateException e) {
 348  *                  e.printStackTrace();
 349  *                  return;
 350  *              }
 351  *              if (styleRunCount <= 1) {
 352  *                  int style = styleRuns[0].style;
 353  *
 354  *                  // iterate over directional runs
 355  *                  for (i = 0; i < count; ++i) {
 356  *                      run = line.getVisualRun(i);
 357  *                      renderRun(text, run.getStart(), run.getLimit(),
 358  *                                run.getDirection(), style);
 359  *                  }
 360  *              } else {
 361  *                  // iterate over both directional and style runs
 362  *                  for (i = 0; i < count; ++i) {
 363  *                      run = line.getVisualRun(i);
 364  *                      renderDirectionalRun(text, run.getStart(),
 365  *                                           run.getLimit(), run.getDirection(),
 366  *                                           styleRuns, styleRunCount);
 367  *                  }
 368  *              }
 369  *          }
 370  *      }
 371  *
 372  *      static void renderParagraph(String text, byte textDirection,
 373  *                                  StyleRun styleRuns[], int styleRunCount,
 374  *                                  int lineWidth) {
 375  *          int length = text.length();
 376  *          Bidi para = new Bidi();
 377  *          try {
 378  *              para.setPara(text,
 379  *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
 380  *                                              : Bidi.LEVEL_DEFAULT_LTR,
 381  *                           null);
 382  *          } catch (Exception e) {
 383  *              e.printStackTrace();
 384  *              return;
 385  *          }
 386  *          byte paraLevel = (byte)(1 & para.getParaLevel());
 387  *          StyleRun styleRun = new StyleRun(length, styleNormal);
 388  *
 389  *          if (styleRuns == null || styleRunCount <= 0) {
 390  *              styleRuns = new StyleRun[1];
 391  *              styleRunCount = 1;
 392  *              styleRuns[0] = styleRun;
 393  *          }
 394  *          // assume styleRuns[styleRunCount-1].limit>=length
 395  *
 396  *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
 397  *          if (width <= lineWidth) {
 398  *              // everything fits onto one line
 399  *
 400  *              // prepare rendering a new line from either left or right
 401  *              startLine(paraLevel, width);
 402  *
 403  *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
 404  *          } else {
 405  *              // we need to render several lines
 406  *              Bidi line = new Bidi(length, 0);
 407  *              int start = 0, limit;
 408  *              int styleRunStart = 0, styleRunLimit;
 409  *
 410  *              for (;;) {
 411  *                  limit = length;
 412  *                  styleRunLimit = styleRunCount;
 413  *                  width = getLineBreak(text, new Bounds(start, limit),
 414  *                                       para, styleRuns,
 415  *                                       new Bounds(styleRunStart, styleRunLimit));
 416  *                  try {
 417  *                      line = para.setLine(start, limit);
 418  *                  } catch (Exception e) {
 419  *                      e.printStackTrace();
 420  *                      return;
 421  *                  }
 422  *                  // prepare rendering a new line
 423  *                  // from either left or right
 424  *                  startLine(paraLevel, width);
 425  *
 426  *                  if (styleRunStart > 0) {
 427  *                      int newRunCount = styleRuns.length - styleRunStart;
 428  *                      StyleRun[] newRuns = new StyleRun[newRunCount];
 429  *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
 430  *                                       newRunCount);
 431  *                      renderLine(line, text, start, limit, newRuns,
 432  *                                 styleRunLimit - styleRunStart);
 433  *                  } else {
 434  *                      renderLine(line, text, start, limit, styleRuns,
 435  *                                 styleRunLimit - styleRunStart);
 436  *                  }
 437  *                  if (limit == length) {
 438  *                      break;
 439  *                  }
 440  *                  start = limit;
 441  *                  styleRunStart = styleRunLimit - 1;
 442  *                  if (start >= styleRuns[styleRunStart].limit) {
 443  *                      ++styleRunStart;
 444  *                  }
 445  *              }
 446  *          }
 447  *      }
 448  *
 449  *      public static void main(String[] args)
 450  *      {
 451  *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
 452  *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
 453  *      }
 454  *  }
 455  *
 456  * }</pre>
 457  */
 458 
 459 public class BidiBase {
 460 
 461     class Point {
 462         int pos;    /* position in text */
 463         int flag;   /* flag for LRM/RLM, before/after */
 464     }
 465 
 466     class InsertPoints {
 467         int size;
 468         int confirmed;
 469         Point[] points = new Point[0];
 470     }
 471 
 472     /** Paragraph level setting<p>
 473      *
 474      * Constant indicating that the base direction depends on the first strong
 475      * directional character in the text according to the Unicode Bidirectional
 476      * Algorithm. If no strong directional character is present,
 477      * then set the paragraph level to 0 (left-to-right).<p>
 478      *
 479      * If this value is used in conjunction with reordering modes
 480      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
 481      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 482      * is assumed to be visual LTR, and the text after reordering is required
 483      * to be the corresponding logical string with appropriate contextual
 484      * direction. The direction of the result string will be RTL if either
 485      * the righmost or leftmost strong character of the source text is RTL
 486      * or Arabic Letter, the direction will be LTR otherwise.<p>
 487      *
 488      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
 489      * be added at the beginning of the result string to ensure round trip
 490      * (that the result string, when reordered back to visual, will produce
 491      * the original source text).
 492      * @see #REORDER_INVERSE_LIKE_DIRECT
 493      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 494      * @stable ICU 3.8
 495      */
 496     public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e;
 497 
 498     /** Paragraph level setting<p>
 499      *
 500      * Constant indicating that the base direction depends on the first strong
 501      * directional character in the text according to the Unicode Bidirectional
 502      * Algorithm. If no strong directional character is present,
 503      * then set the paragraph level to 1 (right-to-left).<p>
 504      *
 505      * If this value is used in conjunction with reordering modes
 506      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
 507      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 508      * is assumed to be visual LTR, and the text after reordering is required
 509      * to be the corresponding logical string with appropriate contextual
 510      * direction. The direction of the result string will be RTL if either
 511      * the righmost or leftmost strong character of the source text is RTL
 512      * or Arabic Letter, or if the text contains no strong character;
 513      * the direction will be LTR otherwise.<p>
 514      *
 515      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
 516      * be added at the beginning of the result string to ensure round trip
 517      * (that the result string, when reordered back to visual, will produce
 518      * the original source text).
 519      * @see #REORDER_INVERSE_LIKE_DIRECT
 520      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 521      * @stable ICU 3.8
 522      */
 523     public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f;
 524 
 525     /**
 526      * Maximum explicit embedding level.
 527      * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
 528      * @stable ICU 3.8
 529      */
 530     public static final byte MAX_EXPLICIT_LEVEL = 61;
 531 
 532     /**
 533      * Bit flag for level input.
 534      * Overrides directional properties.
 535      * @stable ICU 3.8
 536      */
 537     public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80;
 538 
 539     /**
 540      * Special value which can be returned by the mapping methods when a
 541      * logical index has no corresponding visual index or vice-versa. This may
 542      * happen for the logical-to-visual mapping of a Bidi control when option
 543      * <code>OPTION_REMOVE_CONTROLS</code> is
 544      * specified. This can also happen for the visual-to-logical mapping of a
 545      * Bidi mark (LRM or RLM) inserted by option
 546      * <code>OPTION_INSERT_MARKS</code>.
 547      * @see #getVisualIndex
 548      * @see #getVisualMap
 549      * @see #getLogicalIndex
 550      * @see #getLogicalMap
 551      * @see #OPTION_INSERT_MARKS
 552      * @see #OPTION_REMOVE_CONTROLS
 553      * @stable ICU 3.8
 554      */
 555     public static final int MAP_NOWHERE = -1;
 556 
 557     /**
 558      * Mixed-directional text.
 559      * @stable ICU 3.8
 560      */
 561     public static final byte MIXED = 2;
 562 
 563     /**
 564      * option bit for writeReordered():
 565      * replace characters with the "mirrored" property in RTL runs
 566      * by their mirror-image mappings
 567      *
 568      * @see #writeReordered
 569      * @stable ICU 3.8
 570      */
 571     public static final short DO_MIRRORING = 2;
 572 
 573     /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
 574      * @see #setReorderingMode
 575      * @stable ICU 3.8
 576      */
 577     private static final short REORDER_DEFAULT = 0;
 578 
 579     /** Reordering mode: Logical to Visual algorithm which handles numbers in
 580      * a way which mimicks the behavior of Windows XP.
 581      * @see #setReorderingMode
 582      * @stable ICU 3.8
 583      */
 584     private static final short REORDER_NUMBERS_SPECIAL = 1;
 585 
 586     /** Reordering mode: Logical to Visual algorithm grouping numbers with
 587      * adjacent R characters (reversible algorithm).
 588      * @see #setReorderingMode
 589      * @stable ICU 3.8
 590      */
 591     private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
 592 
 593     /** Reordering mode: Reorder runs only to transform a Logical LTR string
 594      * to the logical RTL string with the same display, or vice-versa.<br>
 595      * If this mode is set together with option
 596      * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
 597      * text may be removed and other controls may be added to produce the
 598      * minimum combination which has the required display.
 599      * @see #OPTION_INSERT_MARKS
 600      * @see #setReorderingMode
 601      * @stable ICU 3.8
 602      */
 603     private static final short REORDER_RUNS_ONLY = 3;
 604 
 605     /** Reordering mode: Visual to Logical algorithm which handles numbers
 606      * like L (same algorithm as selected by <code>setInverse(true)</code>.
 607      * @see #setInverse
 608      * @see #setReorderingMode
 609      * @stable ICU 3.8
 610      */
 611     private static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
 612 
 613     /** Reordering mode: Visual to Logical algorithm equivalent to the regular
 614      * Logical to Visual algorithm.
 615      * @see #setReorderingMode
 616      * @stable ICU 3.8
 617      */
 618     private static final short REORDER_INVERSE_LIKE_DIRECT = 5;
 619 
 620     /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
 621      * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
 622      * @see #setReorderingMode
 623      * @stable ICU 3.8
 624      */
 625     private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
 626 
 627     /* Reordering mode values must be ordered so that all the regular logical to
 628      * visual modes come first, and all inverse Bidi modes come last.
 629      */
 630     private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
 631             REORDER_NUMBERS_SPECIAL;
 632 
 633     /**
 634      * Option bit for <code>setReorderingOptions</code>:
 635      * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
 636      * a reordering to a Logical order
 637      *
 638      * <p>This option must be set or reset before calling
 639      * <code>setPara</code>.</p>
 640      *
 641      * <p>This option is significant only with reordering modes which generate
 642      * a result with Logical order, specifically.</p>
 643      * <ul>
 644      *   <li><code>REORDER_RUNS_ONLY</code></li>
 645      *   <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
 646      *   <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
 647      *   <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
 648      * </ul>
 649      *
 650      * <p>If this option is set in conjunction with reordering mode
 651      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
 652      * <code>setInverse(true)</code>, it implies option
 653      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 654      * <code>writeReordered()</code>.</p>
 655      *
 656      * <p>For other reordering modes, a minimum number of LRM or RLM characters
 657      * will be added to the source text after reordering it so as to ensure
 658      * round trip, i.e. when applying the inverse reordering mode on the
 659      * resulting logical text with removal of Bidi marks
 660      * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
 661      * <code>setPara()</code> or option
 662      * <code>REMOVE_BIDI_CONTROLS</code> in
 663      * <code>writeReordered</code>), the result will be identical to the
 664      * source text in the first transformation.
 665      *
 666      * <p>This option will be ignored if specified together with option
 667      * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
 668      * <code>REMOVE_BIDI_CONTROLS</code> in calls to method
 669      * <code>writeReordered()</code> and it implies option
 670      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 671      * <code>writeReordered()</code> if the reordering mode is
 672      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
 673      *
 674      * @see #setReorderingMode
 675      * @see #setReorderingOptions
 676      * @see #INSERT_LRM_FOR_NUMERIC
 677      * @see #REMOVE_BIDI_CONTROLS
 678      * @see #OPTION_REMOVE_CONTROLS
 679      * @see #REORDER_RUNS_ONLY
 680      * @see #REORDER_INVERSE_NUMBERS_AS_L
 681      * @see #REORDER_INVERSE_LIKE_DIRECT
 682      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 683      * @stable ICU 3.8
 684      */
 685     private static final int OPTION_INSERT_MARKS = 1;
 686 
 687     /**
 688      * Option bit for <code>setReorderingOptions</code>:
 689      * remove Bidi control characters
 690      *
 691      * <p>This option must be set or reset before calling
 692      * <code>setPara</code>.</p>
 693      *
 694      * <p>This option nullifies option
 695      * <code>OPTION_INSERT_MARKS</code>. It inhibits option
 696      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 697      * <code>writeReordered()</code> and it implies option
 698      * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>
 699      *
 700      * @see #setReorderingMode
 701      * @see #setReorderingOptions
 702      * @see #OPTION_INSERT_MARKS
 703      * @see #INSERT_LRM_FOR_NUMERIC
 704      * @see #REMOVE_BIDI_CONTROLS
 705      * @stable ICU 3.8
 706      */
 707     private static final int OPTION_REMOVE_CONTROLS = 2;
 708 
 709     /**
 710      * Option bit for <code>setReorderingOptions</code>:
 711      * process the output as part of a stream to be continued
 712      *
 713      * <p>This option must be set or reset before calling
 714      * <code>setPara</code>.</p>
 715      *
 716      * <p>This option specifies that the caller is interested in processing
 717      * large text object in parts. The results of the successive calls are
 718      * expected to be concatenated by the caller. Only the call for the last
 719      * part will have this option bit off.</p>
 720      *
 721      * <p>When this option bit is on, <code>setPara()</code> may process
 722      * less than the full source text in order to truncate the text at a
 723      * meaningful boundary. The caller should call
 724      * <code>getProcessedLength()</code> immediately after calling
 725      * <code>setPara()</code> in order to determine how much of the source
 726      * text has been processed. Source text beyond that length should be
 727      * resubmitted in following calls to <code>setPara</code>. The
 728      * processed length may be less than the length of the source text if a
 729      * character preceding the last character of the source text constitutes a
 730      * reasonable boundary (like a block separator) for text to be continued.<br>
 731      * If the last character of the source text constitutes a reasonable
 732      * boundary, the whole text will be processed at once.<br>
 733      * If nowhere in the source text there exists
 734      * such a reasonable boundary, the processed length will be zero.<br>
 735      * The caller should check for such an occurrence and do one of the following:
 736      * <ul><li>submit a larger amount of text with a better chance to include
 737      *         a reasonable boundary.</li>
 738      *     <li>resubmit the same text after turning off option
 739      *         <code>OPTION_STREAMING</code>.</li></ul>
 740      * In all cases, this option should be turned off before processing the last
 741      * part of the text.</p>
 742      *
 743      * <p>When the <code>OPTION_STREAMING</code> option is used, it is
 744      * recommended to call <code>orderParagraphsLTR()</code> with argument
 745      * <code>orderParagraphsLTR</code> set to <code>true</code> before calling
 746      * <code>setPara()</code> so that later paragraphs may be concatenated to
 747      * previous paragraphs on the right.
 748      * </p>
 749      *
 750      * @see #setReorderingMode
 751      * @see #setReorderingOptions
 752      * @see #getProcessedLength
 753      * @see #orderParagraphsLTR
 754      * @stable ICU 3.8
 755      */
 756     private static final int OPTION_STREAMING = 4;
 757 
 758     /*
 759      *   Comparing the description of the Bidi algorithm with this implementation
 760      *   is easier with the same names for the Bidi types in the code as there.
 761      *   See UCharacterDirection
 762      */
 763     private static final byte L   = 0;
 764     private static final byte R   = 1;
 765     private static final byte EN  = 2;
 766     private static final byte ES  = 3;
 767     private static final byte ET  = 4;
 768     private static final byte AN  = 5;
 769     private static final byte CS  = 6;
 770     static final byte B   = 7;
 771     private static final byte S   = 8;
 772     private static final byte WS  = 9;
 773     private static final byte ON  = 10;
 774     private static final byte LRE = 11;
 775     private static final byte LRO = 12;
 776     private static final byte AL  = 13;
 777     private static final byte RLE = 14;
 778     private static final byte RLO = 15;
 779     private static final byte PDF = 16;
 780     private static final byte NSM = 17;
 781     private static final byte BN  = 18;
 782 
 783     private static final int MASK_R_AL = (1 << R | 1 << AL);
 784 
 785     private static final char CR = '\r';
 786     private static final char LF = '\n';
 787 
 788     static final int LRM_BEFORE = 1;
 789     static final int LRM_AFTER = 2;
 790     static final int RLM_BEFORE = 4;
 791     static final int RLM_AFTER = 8;
 792 
 793     /*
 794      * reference to parent paragraph object (reference to self if this object is
 795      * a paragraph object); set to null in a newly opened object; set to a
 796      * real value after a successful execution of setPara or setLine
 797      */
 798     BidiBase                paraBidi;
 799 
 800     final UBiDiProps    bdp;
 801 
 802     /* character array representing the current text */
 803     char[]              text;
 804 
 805     /* length of the current text */
 806     int                 originalLength;
 807 
 808     /* if the option OPTION_STREAMING is set, this is the length of
 809      * text actually processed by <code>setPara</code>, which may be shorter
 810      * than the original length. Otherwise, it is identical to the original
 811      * length.
 812      */
 813     public int                 length;
 814 
 815     /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
 816      * marks are allowed to be inserted in one of the reordering modes, the
 817      * length of the result string may be different from the processed length.
 818      */
 819     int                 resultLength;
 820 
 821     /* indicators for whether memory may be allocated after construction */
 822     boolean             mayAllocateText;
 823     boolean             mayAllocateRuns;
 824 
 825     /* arrays with one value per text-character */
 826     byte[]              dirPropsMemory = new byte[1];
 827     byte[]              levelsMemory = new byte[1];
 828     byte[]              dirProps;
 829     byte[]              levels;
 830 
 831     /* must block separators receive level 0? */
 832     boolean             orderParagraphsLTR;
 833 
 834     /* the paragraph level */
 835     byte                paraLevel;
 836 
 837     /* original paraLevel when contextual */
 838     /* must be one of DEFAULT_xxx or 0 if not contextual */
 839     byte                defaultParaLevel;
 840 
 841     /* the following is set in setPara, used in processPropertySeq */
 842 
 843     ImpTabPair          impTabPair;  /* reference to levels state table pair */
 844 
 845     /* the overall paragraph or line directionality*/
 846     byte                direction;
 847 
 848     /* flags is a bit set for which directional properties are in the text */
 849     int                 flags;
 850 
 851     /* lastArabicPos is index to the last AL in the text, -1 if none */
 852     int                 lastArabicPos;
 853 
 854     /* characters after trailingWSStart are WS and are */
 855     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
 856     int                 trailingWSStart;
 857 
 858     /* fields for paragraph handling */
 859     int                 paraCount;       /* set in getDirProps() */
 860     int[]               parasMemory = new int[1];
 861     int[]               paras;           /* limits of paragraphs, filled in
 862                                           ResolveExplicitLevels() or CheckExplicitLevels() */
 863 
 864     /* for single paragraph text, we only need a tiny array of paras (no allocation) */
 865     int[]               simpleParas = {0};
 866 
 867     /* fields for line reordering */
 868     int                 runCount;     /* ==-1: runs not set up yet */
 869     BidiRun[]           runsMemory = new BidiRun[0];
 870     BidiRun[]           runs;
 871 
 872     /* for non-mixed text, we only need a tiny array of runs (no allocation) */
 873     BidiRun[]           simpleRuns = {new BidiRun()};
 874 
 875     /* mapping of runs in logical order to visual order */
 876     int[]               logicalToVisualRunsMap;
 877 
 878     /* flag to indicate that the map has been updated */
 879     boolean             isGoodLogicalToVisualRunsMap;
 880 
 881     /* for inverse Bidi with insertion of directional marks */
 882     InsertPoints        insertPoints = new InsertPoints();
 883 
 884     /* for option OPTION_REMOVE_CONTROLS */
 885     int                 controlCount;
 886 
 887     /*
 888      * Sometimes, bit values are more appropriate
 889      * to deal with directionality properties.
 890      * Abbreviations in these method names refer to names
 891      * used in the Bidi algorithm.
 892      */
 893     static int DirPropFlag(byte dir) {
 894         return (1 << dir);
 895     }
 896 
 897     /*
 898      * The following bit is ORed to the property of characters in paragraphs
 899      * with contextual RTL direction when paraLevel is contextual.
 900      */
 901     static final byte CONTEXT_RTL_SHIFT = 6;
 902     static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT);   // 0x40
 903     static byte NoContextRTL(byte dir)
 904     {
 905         return (byte)(dir & ~CONTEXT_RTL);
 906     }
 907 
 908     /*
 909      * The following is a variant of DirProp.DirPropFlag() which ignores the
 910      * CONTEXT_RTL bit.
 911      */
 912     static int DirPropFlagNC(byte dir) {
 913         return (1<<(dir & ~CONTEXT_RTL));
 914     }
 915 
 916     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
 917 
 918     /* to avoid some conditional statements, use tiny constant arrays */
 919     static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
 920     static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
 921     static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
 922 
 923     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
 924     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
 925     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
 926 
 927     /*
 928      *  are there any characters that are LTR?
 929      */
 930     static final int MASK_LTR =
 931         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO);
 932 
 933     /*
 934      *  are there any characters that are RTL?
 935      */
 936     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO);
 937 
 938     /* explicit embedding codes */
 939     private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO);
 940     private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO);
 941     private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF);
 942     private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
 943 
 944     /* paragraph and segment separators */
 945     private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
 946 
 947     /* all types that are counted as White Space or Neutral in some steps */
 948     static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT;
 949     private static final int MASK_N = DirPropFlag(ON)|MASK_WS;
 950 
 951     /* types that are neutrals or could becomes neutrals in (Wn) */
 952     private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N;
 953 
 954     /*
 955      * These types may be changed to "e",
 956      * the embedding type (L or R) of the run,
 957      * in the Bidi algorithm (N2)
 958      */
 959     static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
 960 
 961     /*
 962      *  the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
 963      */
 964     private static byte GetLRFromLevel(byte level)
 965     {
 966         return (byte)(level & 1);
 967     }
 968 
 969     private static boolean IsDefaultLevel(byte level)
 970     {
 971         return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR);
 972     }
 973 
 974     byte GetParaLevelAt(int index)
 975     {
 976         return (defaultParaLevel != 0) ?
 977                 (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel;
 978     }
 979 
 980     static boolean IsBidiControlChar(int c)
 981     {
 982         /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
 983                            0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
 984         return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
 985     }
 986 
 987     public void verifyValidPara()
 988     {
 989         if (this != this.paraBidi) {
 990             throw new IllegalStateException("");
 991         }
 992     }
 993 
 994     public void verifyValidParaOrLine()
 995     {
 996         BidiBase para = this.paraBidi;
 997         /* verify Para */
 998         if (this == para) {
 999             return;
1000         }
1001         /* verify Line */
1002         if ((para == null) || (para != para.paraBidi)) {
1003             throw new IllegalStateException();
1004         }
1005     }
1006 
1007     public void verifyRange(int index, int start, int limit)
1008     {
1009         if (index < start || index >= limit) {
1010             throw new IllegalArgumentException("Value " + index +
1011                       " is out of range " + start + " to " + limit);
1012         }
1013     }
1014 
1015     public void verifyIndex(int index, int start, int limit)
1016     {
1017         if (index < start || index >= limit) {
1018             throw new ArrayIndexOutOfBoundsException("Index " + index +
1019                       " is out of range " + start + " to " + limit);
1020         }
1021     }
1022 
1023     /**
1024      * Allocate a <code>Bidi</code> object with preallocated memory
1025      * for internal structures.
1026      * This method provides a <code>Bidi</code> object like the default constructor
1027      * but it also preallocates memory for internal structures
1028      * according to the sizings supplied by the caller.<p>
1029      * The preallocation can be limited to some of the internal memory
1030      * by setting some values to 0 here. That means that if, e.g.,
1031      * <code>maxRunCount</code> cannot be reasonably predetermined and should not
1032      * be set to <code>maxLength</code> (the only failproof value) to avoid
1033      * wasting  memory, then <code>maxRunCount</code> could be set to 0 here
1034      * and the internal structures that are associated with it will be allocated
1035      * on demand, just like with the default constructor.
1036      *
1037      * @param maxLength is the maximum text or line length that internal memory
1038      *        will be preallocated for. An attempt to associate this object with a
1039      *        longer text will fail, unless this value is 0, which leaves the allocation
1040      *        up to the implementation.
1041      *
1042      * @param maxRunCount is the maximum anticipated number of same-level runs
1043      *        that internal memory will be preallocated for. An attempt to access
1044      *        visual runs on an object that was not preallocated for as many runs
1045      *        as the text was actually resolved to will fail,
1046      *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1047      *        The number of runs depends on the actual text and maybe anywhere between
1048      *        1 and <code>maxLength</code>. It is typically small.
1049      *
1050      * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1051      * @stable ICU 3.8
1052      */
1053     public BidiBase(int maxLength, int maxRunCount)
1054      {
1055         /* check the argument values */
1056         if (maxLength < 0 || maxRunCount < 0) {
1057             throw new IllegalArgumentException();
1058         }
1059 
1060         /* reset the object, all reference variables null, all flags false,
1061            all sizes 0.
1062            In fact, we don't need to do anything, since class members are
1063            initialized as zero when an instance is created.
1064          */
1065         /*
1066         mayAllocateText = false;
1067         mayAllocateRuns = false;
1068         orderParagraphsLTR = false;
1069         paraCount = 0;
1070         runCount = 0;
1071         trailingWSStart = 0;
1072         flags = 0;
1073         paraLevel = 0;
1074         defaultParaLevel = 0;
1075         direction = 0;
1076         */
1077         /* get Bidi properties */
1078         try {
1079             bdp = UBiDiProps.getSingleton();
1080         }
1081         catch (IOException e) {
1082             throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
1083         }
1084 
1085         /* allocate memory for arrays as requested */
1086         if (maxLength > 0) {
1087             getInitialDirPropsMemory(maxLength);
1088             getInitialLevelsMemory(maxLength);
1089         } else {
1090             mayAllocateText = true;
1091         }
1092 
1093         if (maxRunCount > 0) {
1094             // if maxRunCount == 1, use simpleRuns[]
1095             if (maxRunCount > 1) {
1096                 getInitialRunsMemory(maxRunCount);
1097             }
1098         } else {
1099             mayAllocateRuns = true;
1100         }
1101     }
1102 
1103     /*
1104      * We are allowed to allocate memory if object==null or
1105      * mayAllocate==true for each array that we need.
1106      *
1107      * Assume sizeNeeded>0.
1108      * If object != null, then assume size > 0.
1109      */
1110     private Object getMemory(String label, Object array, Class<?> arrayClass,
1111             boolean mayAllocate, int sizeNeeded)
1112     {
1113         int len = Array.getLength(array);
1114 
1115         /* we have at least enough memory and must not allocate */
1116         if (sizeNeeded == len) {
1117             return array;
1118         }
1119         if (!mayAllocate) {
1120             /* we must not allocate */
1121             if (sizeNeeded <= len) {
1122                 return array;
1123             }
1124             throw new OutOfMemoryError("Failed to allocate memory for "
1125                                        + label);
1126         }
1127         /* we may try to grow or shrink */
1128         /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1129            the allocation altogether and rely on this.length */
1130         try {
1131             return Array.newInstance(arrayClass, sizeNeeded);
1132         } catch (Exception e) {
1133             throw new OutOfMemoryError("Failed to allocate memory for "
1134                                        + label);
1135         }
1136     }
1137 
1138     /* helper methods for each allocated array */
1139     private void getDirPropsMemory(boolean mayAllocate, int len)
1140     {
1141         Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1142         dirPropsMemory = (byte[]) array;
1143     }
1144 
1145     void getDirPropsMemory(int len)
1146     {
1147         getDirPropsMemory(mayAllocateText, len);
1148     }
1149 
1150     private void getLevelsMemory(boolean mayAllocate, int len)
1151     {
1152         Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1153         levelsMemory = (byte[]) array;
1154     }
1155 
1156     void getLevelsMemory(int len)
1157     {
1158         getLevelsMemory(mayAllocateText, len);
1159     }
1160 
1161     private void getRunsMemory(boolean mayAllocate, int len)
1162     {
1163         Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1164         runsMemory = (BidiRun[]) array;
1165     }
1166 
1167     void getRunsMemory(int len)
1168     {
1169         getRunsMemory(mayAllocateRuns, len);
1170     }
1171 
1172     /* additional methods used by constructor - always allow allocation */
1173     private void getInitialDirPropsMemory(int len)
1174     {
1175         getDirPropsMemory(true, len);
1176     }
1177 
1178     private void getInitialLevelsMemory(int len)
1179     {
1180         getLevelsMemory(true, len);
1181     }
1182 
1183     private void getInitialParasMemory(int len)
1184     {
1185         Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len);
1186         parasMemory = (int[]) array;
1187     }
1188 
1189     private void getInitialRunsMemory(int len)
1190     {
1191         getRunsMemory(true, len);
1192     }
1193 
1194 /* perform (P2)..(P3) ------------------------------------------------------- */
1195 
1196     private void getDirProps()
1197     {
1198         int i = 0, i0, i1;
1199         flags = 0;          /* collect all directionalities in the text */
1200         int uchar;
1201         byte dirProp;
1202         byte paraDirDefault = 0;   /* initialize to avoid compiler warnings */
1203         boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1204         /* for inverse Bidi, the default para level is set to RTL if there is a
1205            strong R or AL character at either end of the text                */
1206         lastArabicPos = -1;
1207         controlCount = 0;
1208 
1209         final int NOT_CONTEXTUAL = 0;         /* 0: not contextual paraLevel */
1210         final int LOOKING_FOR_STRONG = 1;     /* 1: looking for first strong char */
1211         final int FOUND_STRONG_CHAR = 2;      /* 2: found first strong char       */
1212 
1213         int state;
1214         int paraStart = 0;                    /* index of first char in paragraph */
1215         byte paraDir;                         /* == CONTEXT_RTL within paragraphs
1216                                                  starting with strong R char      */
1217         byte lastStrongDir=0;                 /* for default level & inverse Bidi */
1218         int lastStrongLTR=0;                  /* for STREAMING option             */
1219 
1220         if (isDefaultLevel) {
1221             paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0;
1222             paraDir = paraDirDefault;
1223             lastStrongDir = paraDirDefault;
1224             state = LOOKING_FOR_STRONG;
1225         } else {
1226             state = NOT_CONTEXTUAL;
1227             paraDir = 0;
1228         }
1229         /* count paragraphs and determine the paragraph level (P2..P3) */
1230         /*
1231          * see comment on constant fields:
1232          * the LEVEL_DEFAULT_XXX values are designed so that
1233          * their low-order bit alone yields the intended default
1234          */
1235 
1236         for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1237             i0 = i;                     /* index of first code unit */
1238             uchar = UTF16.charAt(text, 0, originalLength, i);
1239             i += Character.charCount(uchar);
1240             i1 = i - 1; /* index of last code unit, gets the directional property */
1241 
1242             dirProp = (byte)bdp.getClass(uchar);
1243 
1244             flags |= DirPropFlag(dirProp);
1245             dirProps[i1] = (byte)(dirProp | paraDir);
1246             if (i1 > i0) {     /* set previous code units' properties to BN */
1247                 flags |= DirPropFlag(BN);
1248                 do {
1249                     dirProps[--i1] = (byte)(BN | paraDir);
1250                 } while (i1 > i0);
1251             }
1252             if (state == LOOKING_FOR_STRONG) {
1253                 if (dirProp == L) {
1254                     state = FOUND_STRONG_CHAR;
1255                     if (paraDir != 0) {
1256                         paraDir = 0;
1257                         for (i1 = paraStart; i1 < i; i1++) {
1258                             dirProps[i1] &= ~CONTEXT_RTL;
1259                         }
1260                     }
1261                     continue;
1262                 }
1263                 if (dirProp == R || dirProp == AL) {
1264                     state = FOUND_STRONG_CHAR;
1265                     if (paraDir == 0) {
1266                         paraDir = CONTEXT_RTL;
1267                         for (i1 = paraStart; i1 < i; i1++) {
1268                             dirProps[i1] |= CONTEXT_RTL;
1269                         }
1270                     }
1271                     continue;
1272                 }
1273             }
1274             if (dirProp == L) {
1275                 lastStrongDir = 0;
1276                 lastStrongLTR = i;      /* i is index to next character */
1277             }
1278             else if (dirProp == R) {
1279                 lastStrongDir = CONTEXT_RTL;
1280             }
1281             else if (dirProp == AL) {
1282                 lastStrongDir = CONTEXT_RTL;
1283                 lastArabicPos = i-1;
1284             }
1285             else if (dirProp == B) {
1286                 if (i < originalLength) {   /* B not last char in text */
1287                     if (!((uchar == (int)CR) && (text[i] == (int)LF))) {
1288                         paraCount++;
1289                     }
1290                     if (isDefaultLevel) {
1291                         state=LOOKING_FOR_STRONG;
1292                         paraStart = i;        /* i is index to next character */
1293                         paraDir = paraDirDefault;
1294                         lastStrongDir = paraDirDefault;
1295                     }
1296                 }
1297             }
1298         }
1299         if (isDefaultLevel) {
1300             paraLevel = GetParaLevelAt(0);
1301         }
1302 
1303         /* The following line does nothing new for contextual paraLevel, but is
1304            needed for absolute paraLevel.                               */
1305         flags |= DirPropFlagLR(paraLevel);
1306 
1307         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1308             flags |= DirPropFlag(L);
1309         }
1310     }
1311 
1312     /* perform (X1)..(X9) ------------------------------------------------------- */
1313 
1314     /* determine if the text is mixed-directional or single-directional */
1315     private byte directionFromFlags() {
1316         /* if the text contains AN and neutrals, then some neutrals may become RTL */
1317         if (!((flags & MASK_RTL) != 0 ||
1318               ((flags & DirPropFlag(AN)) != 0 &&
1319                (flags & MASK_POSSIBLE_N) != 0))) {
1320             return Bidi.DIRECTION_LEFT_TO_RIGHT;
1321         } else if ((flags & MASK_LTR) == 0) {
1322             return Bidi.DIRECTION_RIGHT_TO_LEFT;
1323         } else {
1324             return MIXED;
1325         }
1326     }
1327 
1328     /*
1329      * Resolve the explicit levels as specified by explicit embedding codes.
1330      * Recalculate the flags to have them reflect the real properties
1331      * after taking the explicit embeddings into account.
1332      *
1333      * The Bidi algorithm is designed to result in the same behavior whether embedding
1334      * levels are externally specified (from "styled text", supposedly the preferred
1335      * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
1336      * That is why (X9) instructs to remove all explicit codes (and BN).
1337      * However, in a real implementation, this removal of these codes and their index
1338      * positions in the plain text is undesirable since it would result in
1339      * reallocated, reindexed text.
1340      * Instead, this implementation leaves the codes in there and just ignores them
1341      * in the subsequent processing.
1342      * In order to get the same reordering behavior, positions with a BN or an
1343      * explicit embedding code just get the same level assigned as the last "real"
1344      * character.
1345      *
1346      * Some implementations, not this one, then overwrite some of these
1347      * directionality properties at "real" same-level-run boundaries by
1348      * L or R codes so that the resolution of weak types can be performed on the
1349      * entire paragraph at once instead of having to parse it once more and
1350      * perform that resolution on same-level-runs.
1351      * This limits the scope of the implicit rules in effectively
1352      * the same way as the run limits.
1353      *
1354      * Instead, this implementation does not modify these codes.
1355      * On one hand, the paragraph has to be scanned for same-level-runs, but
1356      * on the other hand, this saves another loop to reset these codes,
1357      * or saves making and modifying a copy of dirProps[].
1358      *
1359      *
1360      * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
1361      *
1362      *
1363      * Handling the stack of explicit levels (Xn):
1364      *
1365      * With the Bidi stack of explicit levels,
1366      * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
1367      * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61.
1368      *
1369      * In order to have a correct push-pop semantics even in the case of overflows,
1370      * there are two overflow counters:
1371      * - countOver60 is incremented with each LRx at level 60
1372      * - from level 60, one RLx increases the level to 61
1373      * - countOver61 is incremented with each LRx and RLx at level 61
1374      *
1375      * Popping levels with PDF must work in the opposite order so that level 61
1376      * is correct at the correct point. Underflows (too many PDFs) must be checked.
1377      *
1378      * This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
1379      */
1380     private byte resolveExplicitLevels() {
1381         int i = 0;
1382         byte dirProp;
1383         byte level = GetParaLevelAt(0);
1384 
1385         byte dirct;
1386         int paraIndex = 0;
1387 
1388         /* determine if the text is mixed-directional or single-directional */
1389         dirct = directionFromFlags();
1390 
1391         /* we may not need to resolve any explicit levels, but for multiple
1392            paragraphs we want to loop on all chars to set the para boundaries */
1393         if ((dirct != MIXED) && (paraCount == 1)) {
1394             /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1395         } else if ((paraCount == 1) &&
1396                    ((flags & MASK_EXPLICIT) == 0)) {
1397             /* mixed, but all characters are at the same embedding level */
1398             /* or we are in "inverse Bidi" */
1399             /* and we don't have contextual multiple paragraphs with some B char */
1400             /* set all levels to the paragraph level */
1401             for (i = 0; i < length; ++i) {
1402                 levels[i] = level;
1403             }
1404         } else {
1405             /* continue to perform (Xn) */
1406 
1407             /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1408             /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
1409             byte embeddingLevel = level;
1410             byte newLevel;
1411             byte stackTop = 0;
1412 
1413             byte[] stack = new byte[MAX_EXPLICIT_LEVEL];    /* we never push anything >=MAX_EXPLICIT_LEVEL */
1414             int countOver60 = 0;
1415             int countOver61 = 0;  /* count overflows of explicit levels */
1416 
1417             /* recalculate the flags */
1418             flags = 0;
1419 
1420             for (i = 0; i < length; ++i) {
1421                 dirProp = NoContextRTL(dirProps[i]);
1422                 switch(dirProp) {
1423                 case LRE:
1424                 case LRO:
1425                     /* (X3, X5) */
1426                     newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */
1427                     if (newLevel <= MAX_EXPLICIT_LEVEL) {
1428                         stack[stackTop] = embeddingLevel;
1429                         ++stackTop;
1430                         embeddingLevel = newLevel;
1431                         if (dirProp == LRO) {
1432                             embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1433                         }
1434                         /* we don't need to set LEVEL_OVERRIDE off for LRE
1435                            since this has already been done for newLevel which is
1436                            the source for embeddingLevel.
1437                          */
1438                     } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) {
1439                         ++countOver61;
1440                     } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ {
1441                         ++countOver60;
1442                     }
1443                     flags |= DirPropFlag(BN);
1444                     break;
1445                 case RLE:
1446                 case RLO:
1447                     /* (X2, X4) */
1448                     newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */
1449                     if (newLevel<=MAX_EXPLICIT_LEVEL) {
1450                         stack[stackTop] = embeddingLevel;
1451                         ++stackTop;
1452                         embeddingLevel = newLevel;
1453                         if (dirProp == RLO) {
1454                             embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1455                         }
1456                         /* we don't need to set LEVEL_OVERRIDE off for RLE
1457                            since this has already been done for newLevel which is
1458                            the source for embeddingLevel.
1459                          */
1460                     } else {
1461                         ++countOver61;
1462                     }
1463                     flags |= DirPropFlag(BN);
1464                     break;
1465                 case PDF:
1466                     /* (X7) */
1467                     /* handle all the overflow cases first */
1468                     if (countOver61 > 0) {
1469                         --countOver61;
1470                     } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) {
1471                         /* handle LRx overflows from level 60 */
1472                         --countOver60;
1473                     } else if (stackTop > 0) {
1474                         /* this is the pop operation; it also pops level 61 while countOver60>0 */
1475                         --stackTop;
1476                         embeddingLevel = stack[stackTop];
1477                     /* } else { (underflow) */
1478                     }
1479                     flags |= DirPropFlag(BN);
1480                     break;
1481                 case B:
1482                     stackTop = 0;
1483                     countOver60 = 0;
1484                     countOver61 = 0;
1485                     level = GetParaLevelAt(i);
1486                     if ((i + 1) < length) {
1487                         embeddingLevel = GetParaLevelAt(i+1);
1488                         if (!((text[i] == CR) && (text[i + 1] == LF))) {
1489                             paras[paraIndex++] = i+1;
1490                         }
1491                     }
1492                     flags |= DirPropFlag(B);
1493                     break;
1494                 case BN:
1495                     /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1496                     /* they will get their levels set correctly in adjustWSLevels() */
1497                     flags |= DirPropFlag(BN);
1498                     break;
1499                 default:
1500                     /* all other types get the "real" level */
1501                     if (level != embeddingLevel) {
1502                         level = embeddingLevel;
1503                         if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1504                             flags |= DirPropFlagO(level) | DirPropFlagMultiRuns;
1505                         } else {
1506                             flags |= DirPropFlagE(level) | DirPropFlagMultiRuns;
1507                         }
1508                     }
1509                     if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
1510                         flags |= DirPropFlag(dirProp);
1511                     }
1512                     break;
1513                 }
1514 
1515                 /*
1516                  * We need to set reasonable levels even on BN codes and
1517                  * explicit codes because we will later look at same-level runs (X10).
1518                  */
1519                 levels[i] = level;
1520             }
1521             if ((flags & MASK_EMBEDDING) != 0) {
1522                 flags |= DirPropFlagLR(paraLevel);
1523             }
1524             if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1525                 flags |= DirPropFlag(L);
1526             }
1527 
1528             /* subsequently, ignore the explicit codes and BN (X9) */
1529 
1530             /* again, determine if the text is mixed-directional or single-directional */
1531             dirct = directionFromFlags();
1532         }
1533 
1534         return dirct;
1535     }
1536 
1537     /*
1538      * Use a pre-specified embedding levels array:
1539      *
1540      * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1541      * ignore all explicit codes (X9),
1542      * and check all the preset levels.
1543      *
1544      * Recalculate the flags to have them reflect the real properties
1545      * after taking the explicit embeddings into account.
1546      */
1547     private byte checkExplicitLevels() {
1548         byte dirProp;
1549         int i;
1550         this.flags = 0;     /* collect all directionalities in the text */
1551         byte level;
1552         int paraIndex = 0;
1553 
1554         for (i = 0; i < length; ++i) {
1555             if (levels[i] == 0) {
1556                 levels[i] = paraLevel;
1557             }
1558             if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
1559                 if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) {
1560                     levels[i] =  (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE);
1561                 } else {
1562                     levels[i] = paraLevel;
1563                 }
1564             }
1565             level = levels[i];
1566             dirProp = NoContextRTL(dirProps[i]);
1567             if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1568                 /* keep the override flag in levels[i] but adjust the flags */
1569                 level &= ~INTERNAL_LEVEL_OVERRIDE;     /* make the range check below simpler */
1570                 flags |= DirPropFlagO(level);
1571             } else {
1572                 /* set the flags */
1573                 flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
1574             }
1575 
1576             if ((level < GetParaLevelAt(i) &&
1577                     !((0 == level) && (dirProp == B))) ||
1578                     (MAX_EXPLICIT_LEVEL <level)) {
1579                 /* level out of bounds */
1580                 throw new IllegalArgumentException("level " + level +
1581                                                    " out of bounds at index " + i);
1582             }
1583             if ((dirProp == B) && ((i + 1) < length)) {
1584                 if (!((text[i] == CR) && (text[i + 1] == LF))) {
1585                     paras[paraIndex++] = i + 1;
1586                 }
1587             }
1588         }
1589         if ((flags&MASK_EMBEDDING) != 0) {
1590             flags |= DirPropFlagLR(paraLevel);
1591         }
1592 
1593         /* determine if the text is mixed-directional or single-directional */
1594         return directionFromFlags();
1595     }
1596 
1597     /*********************************************************************/
1598     /* The Properties state machine table                                */
1599     /*********************************************************************/
1600     /*                                                                   */
1601     /* All table cells are 8 bits:                                       */
1602     /*      bits 0..4:  next state                                       */
1603     /*      bits 5..7:  action to perform (if > 0)                       */
1604     /*                                                                   */
1605     /* Cells may be of format "n" where n represents the next state      */
1606     /* (except for the rightmost column).                                */
1607     /* Cells may also be of format "_(x,y)" where x represents an action */
1608     /* to perform and y represents the next state.                       */
1609     /*                                                                   */
1610     /*********************************************************************/
1611     /* Definitions and type for properties state tables                  */
1612     /*********************************************************************/
1613     private static final int IMPTABPROPS_COLUMNS = 14;
1614     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
1615     private static short GetStateProps(short cell) {
1616         return (short)(cell & 0x1f);
1617     }
1618     private static short GetActionProps(short cell) {
1619         return (short)(cell >> 5);
1620     }
1621 
1622     private static final short groupProp[] =          /* dirProp regrouped */
1623     {
1624         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
1625         0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
1626     };
1627     private static final short _L  = 0;
1628     private static final short _R  = 1;
1629     private static final short _EN = 2;
1630     private static final short _AN = 3;
1631     private static final short _ON = 4;
1632     private static final short _S  = 5;
1633     private static final short _B  = 6; /* reduced dirProp */
1634 
1635     /*********************************************************************/
1636     /*                                                                   */
1637     /*      PROPERTIES  STATE  TABLE                                     */
1638     /*                                                                   */
1639     /* In table impTabProps,                                             */
1640     /*      - the ON column regroups ON and WS                           */
1641     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
1642     /*      - the Res column is the reduced property assigned to a run   */
1643     /*                                                                   */
1644     /* Action 1: process current run1, init new run1                     */
1645     /*        2: init new run2                                           */
1646     /*        3: process run1, process run2, init new run1               */
1647     /*        4: process run1, set run1=run2, init new run2              */
1648     /*                                                                   */
1649     /* Notes:                                                            */
1650     /*  1) This table is used in resolveImplicitLevels().                */
1651     /*  2) This table triggers actions when there is a change in the Bidi*/
1652     /*     property of incoming characters (action 1).                   */
1653     /*  3) Most such property sequences are processed immediately (in    */
1654     /*     fact, passed to processPropertySeq().                         */
1655     /*  4) However, numbers are assembled as one sequence. This means    */
1656     /*     that undefined situations (like CS following digits, until    */
1657     /*     it is known if the next char will be a digit) are held until  */
1658     /*     following chars define them.                                  */
1659     /*     Example: digits followed by CS, then comes another CS or ON;  */
1660     /*              the digits will be processed, then the CS assigned   */
1661     /*              as the start of an ON sequence (action 3).           */
1662     /*  5) There are cases where more than one sequence must be          */
1663     /*     processed, for instance digits followed by CS followed by L:  */
1664     /*     the digits must be processed as one sequence, and the CS      */
1665     /*     must be processed as an ON sequence, all this before starting */
1666     /*     assembling chars for the opening L sequence.                  */
1667     /*                                                                   */
1668     /*                                                                   */
1669     private static final short impTabProps[][] =
1670     {
1671 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,  Res */
1672 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,  _ON },
1673 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3,   _L },
1674 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3,   _R },
1675 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3,   _R },
1676 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,  _EN },
1677 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3,  _AN },
1678 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,  _AN },
1679 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3,  _ON },
1680 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3,  _ON },
1681 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,  _ON },
1682 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,  _EN },
1683 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,  _EN },
1684 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3,  _AN },
1685 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,  _AN },
1686 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,  _ON },
1687 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3,   _S },
1688 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3,   _S },
1689 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3,   _B }
1690     };
1691 
1692     /*********************************************************************/
1693     /* The levels state machine tables                                   */
1694     /*********************************************************************/
1695     /*                                                                   */
1696     /* All table cells are 8 bits:                                       */
1697     /*      bits 0..3:  next state                                       */
1698     /*      bits 4..7:  action to perform (if > 0)                       */
1699     /*                                                                   */
1700     /* Cells may be of format "n" where n represents the next state      */
1701     /* (except for the rightmost column).                                */
1702     /* Cells may also be of format "_(x,y)" where x represents an action */
1703     /* to perform and y represents the next state.                       */
1704     /*                                                                   */
1705     /* This format limits each table to 16 states each and to 15 actions.*/
1706     /*                                                                   */
1707     /*********************************************************************/
1708     /* Definitions and type for levels state tables                      */
1709     /*********************************************************************/
1710     private static final int IMPTABLEVELS_COLUMNS = _B + 2;
1711     private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
1712     private static short GetState(byte cell) { return (short)(cell & 0x0f); }
1713     private static short GetAction(byte cell) { return (short)(cell >> 4); }
1714 
1715     private static class ImpTabPair {
1716         byte[][][] imptab;
1717         short[][] impact;
1718 
1719         ImpTabPair(byte[][] table1, byte[][] table2,
1720                    short[] act1, short[] act2) {
1721             imptab = new byte[][][] {table1, table2};
1722             impact = new short[][] {act1, act2};
1723         }
1724     }
1725 
1726     /*********************************************************************/
1727     /*                                                                   */
1728     /*      LEVELS  STATE  TABLES                                        */
1729     /*                                                                   */
1730     /* In all levels state tables,                                       */
1731     /*      - state 0 is the initial state                               */
1732     /*      - the Res column is the increment to add to the text level   */
1733     /*        for this property sequence.                                */
1734     /*                                                                   */
1735     /* The impact arrays for each table of a pair map the local action   */
1736     /* numbers of the table to the total list of actions. For instance,  */
1737     /* action 2 in a given table corresponds to the action number which  */
1738     /* appears in entry [2] of the impact array for that table.          */
1739     /* The first entry of all impact arrays must be 0.                   */
1740     /*                                                                   */
1741     /* Action 1: init conditional sequence                               */
1742     /*        2: prepend conditional sequence to current sequence        */
1743     /*        3: set ON sequence to new level - 1                        */
1744     /*        4: init EN/AN/ON sequence                                  */
1745     /*        5: fix EN/AN/ON sequence followed by R                     */
1746     /*        6: set previous level sequence to level 2                  */
1747     /*                                                                   */
1748     /* Notes:                                                            */
1749     /*  1) These tables are used in processPropertySeq(). The input      */
1750     /*     is property sequences as determined by resolveImplicitLevels. */
1751     /*  2) Most such property sequences are processed immediately        */
1752     /*     (levels are assigned).                                        */
1753     /*  3) However, some sequences cannot be assigned a final level till */
1754     /*     one or more following sequences are received. For instance,   */
1755     /*     ON following an R sequence within an even-level paragraph.    */
1756     /*     If the following sequence is R, the ON sequence will be       */
1757     /*     assigned basic run level+1, and so will the R sequence.       */
1758     /*  4) S is generally handled like ON, since its level will be fixed */
1759     /*     to paragraph level in adjustWSLevels().                       */
1760     /*                                                                   */
1761 
1762     private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
1763         /*  In this table, conditional sequences receive the higher possible level
1764             until proven otherwise.
1765         */
1766     {
1767         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1768         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
1769         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
1770         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
1771         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
1772         /* 4 : R+ON       */ {  0x20,     1,     3,     3,     4,     4,  0x20,  1 },
1773         /* 5 : AN+ON      */ {  0x20,     1,  0x20,     2,     5,     5,  0x20,  1 }
1774     };
1775 
1776     private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
1777         /*  In this table, conditional sequences receive the lower possible level
1778             until proven otherwise.
1779         */
1780     {
1781         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1782         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
1783         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
1784         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
1785         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
1786         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
1787         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
1788     };
1789 
1790     private static final short[] impAct0 = {0,1,2,3,4,5,6};
1791 
1792     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
1793             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
1794 
1795     private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
1796         /* In this table, conditional sequences receive the higher possible
1797            level until proven otherwise.
1798         */
1799         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1800         /* 0 : init       */ {     0,     2,     1,     1,     0,     0,     0,  0 },
1801         /* 1 : L+EN/AN    */ {     0,     2,     1,     1,     0,     0,     0,  2 },
1802         /* 2 : R          */ {     0,     2,     4,     4,  0x13,     0,     0,  1 },
1803         /* 3 : R+ON       */ {  0x20,     2,     4,     4,     3,     3,  0x20,  1 },
1804         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
1805     };
1806     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
1807             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
1808 
1809     private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
1810         /* In this table, EN/AN+ON sequences receive levels as if associated with R
1811            until proven that there is L or sor/eor on both sides. AN is handled like EN.
1812         */
1813         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1814         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
1815         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
1816         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
1817         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
1818         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
1819         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
1820     };
1821     private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
1822         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1823             until proven that there is L on both sides. AN is handled like EN.
1824         */
1825         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1826         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
1827         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
1828         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
1829         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
1830         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
1831     };
1832     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
1833             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
1834                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
1835 
1836     private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
1837         /* This table is identical to the Default LTR table except that EN and AN
1838            are handled like L.
1839         */
1840         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1841         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
1842         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
1843         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
1844         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
1845         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
1846         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
1847     };
1848     private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
1849         /* This table is identical to the Default RTL table except that EN and AN
1850            are handled like L.
1851         */
1852         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1853         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
1854         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
1855         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
1856         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
1857         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
1858         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
1859     };
1860     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
1861             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
1862              impAct0, impAct0);
1863 
1864     private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
1865         /*  In this table, conditional sequences receive the lower possible level
1866             until proven otherwise.
1867         */
1868         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1869         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
1870         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
1871         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
1872         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
1873         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
1874         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
1875         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
1876     };
1877     private static final short[] impAct1 = {0,1,11,12};
1878     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
1879             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1880 
1881     private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1882         /* The case handled in this table is (visually):  R EN L
1883          */
1884         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1885         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
1886         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
1887         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
1888         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
1889         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
1890         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
1891         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
1892     };
1893     private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1894         /* The cases handled in this table are (visually):  R EN L
1895                                                             R L AN L
1896         */
1897         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1898         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
1899         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
1900         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
1901         /* 3 : L          */ {    3 ,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
1902         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
1903         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
1904         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
1905     };
1906     private static final short impAct2[] = {0,1,7,8,9,10};
1907     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
1908             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1909                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1910 
1911     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
1912             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1913 
1914     private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
1915         /*  The case handled in this table is (visually):  R EN L
1916         */
1917         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1918         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
1919         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
1920         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
1921         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
1922         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
1923     };
1924     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
1925             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1926                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1927 
1928     private class LevState {
1929         byte[][] impTab;                /* level table pointer          */
1930         short[] impAct;                 /* action map array             */
1931         int startON;                    /* start of ON sequence         */
1932         int startL2EN;                  /* start of level 2 sequence    */
1933         int lastStrongRTL;              /* index of last found R or AL  */
1934         short state;                    /* current state                */
1935         byte runLevel;                  /* run level before implicit solving */
1936     }
1937 
1938     /*------------------------------------------------------------------------*/
1939 
1940     static final int FIRSTALLOC = 10;
1941     /*
1942      *  param pos:     position where to insert
1943      *  param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1944      */
1945     private void addPoint(int pos, int flag)
1946     {
1947         Point point = new Point();
1948 
1949         int len = insertPoints.points.length;
1950         if (len == 0) {
1951             insertPoints.points = new Point[FIRSTALLOC];
1952             len = FIRSTALLOC;
1953         }
1954         if (insertPoints.size >= len) { /* no room for new point */
1955             Point[] savePoints = insertPoints.points;
1956             insertPoints.points = new Point[len * 2];
1957             System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
1958         }
1959         point.pos = pos;
1960         point.flag = flag;
1961         insertPoints.points[insertPoints.size] = point;
1962         insertPoints.size++;
1963     }
1964 
1965     /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1966 
1967     /*
1968      * This implementation of the (Wn) rules applies all rules in one pass.
1969      * In order to do so, it needs a look-ahead of typically 1 character
1970      * (except for W5: sequences of ET) and keeps track of changes
1971      * in a rule Wp that affect a later Wq (p<q).
1972      *
1973      * The (Nn) and (In) rules are also performed in that same single loop,
1974      * but effectively one iteration behind for white space.
1975      *
1976      * Since all implicit rules are performed in one step, it is not necessary
1977      * to actually store the intermediate directional properties in dirProps[].
1978      */
1979 
1980     private void processPropertySeq(LevState levState, short _prop,
1981             int start, int limit) {
1982         byte cell;
1983         byte[][] impTab = levState.impTab;
1984         short[] impAct = levState.impAct;
1985         short oldStateSeq,actionSeq;
1986         byte level, addLevel;
1987         int start0, k;
1988 
1989         start0 = start;                 /* save original start position */
1990         oldStateSeq = levState.state;
1991         cell = impTab[oldStateSeq][_prop];
1992         levState.state = GetState(cell);        /* isolate the new state */
1993         actionSeq = impAct[GetAction(cell)];    /* isolate the action */
1994         addLevel = impTab[levState.state][IMPTABLEVELS_RES];
1995 
1996         if (actionSeq != 0) {
1997             switch (actionSeq) {
1998             case 1:                     /* init ON seq */
1999                 levState.startON = start0;
2000                 break;
2001 
2002             case 2:                     /* prepend ON seq to current seq */
2003                 start = levState.startON;
2004                 break;
2005 
2006             case 3:                     /* L or S after possible relevant EN/AN */
2007                 /* check if we had EN after R/AL */
2008                 if (levState.startL2EN >= 0) {
2009                     addPoint(levState.startL2EN, LRM_BEFORE);
2010                 }
2011                 levState.startL2EN = -1;  /* not within previous if since could also be -2 */
2012                 /* check if we had any relevant EN/AN after R/AL */
2013                 if ((insertPoints.points.length == 0) ||
2014                         (insertPoints.size <= insertPoints.confirmed)) {
2015                     /* nothing, just clean up */
2016                     levState.lastStrongRTL = -1;
2017                     /* check if we have a pending conditional segment */
2018                     level = impTab[oldStateSeq][IMPTABLEVELS_RES];
2019                     if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
2020                         start = levState.startON;   /* reset to basic run level */
2021                     }
2022                     if (_prop == _S) {              /* add LRM before S */
2023                         addPoint(start0, LRM_BEFORE);
2024                         insertPoints.confirmed = insertPoints.size;
2025                     }
2026                     break;
2027                 }
2028                 /* reset previous RTL cont to level for LTR text */
2029                 for (k = levState.lastStrongRTL + 1; k < start0; k++) {
2030                     /* reset odd level, leave runLevel+2 as is */
2031                     levels[k] = (byte)((levels[k] - 2) & ~1);
2032                 }
2033                 /* mark insert points as confirmed */
2034                 insertPoints.confirmed = insertPoints.size;
2035                 levState.lastStrongRTL = -1;
2036                 if (_prop == _S) {           /* add LRM before S */
2037                     addPoint(start0, LRM_BEFORE);
2038                     insertPoints.confirmed = insertPoints.size;
2039                 }
2040                 break;
2041 
2042             case 4:                     /* R/AL after possible relevant EN/AN */
2043                 /* just clean up */
2044                 if (insertPoints.points.length > 0)
2045                     /* remove all non confirmed insert points */
2046                     insertPoints.size = insertPoints.confirmed;
2047                 levState.startON = -1;
2048                 levState.startL2EN = -1;
2049                 levState.lastStrongRTL = limit - 1;
2050                 break;
2051 
2052             case 5:                     /* EN/AN after R/AL + possible cont */
2053                 /* check for real AN */
2054                 if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) {
2055                     /* real AN */
2056                     if (levState.startL2EN == -1) { /* if no relevant EN already found */
2057                         /* just note the righmost digit as a strong RTL */
2058                         levState.lastStrongRTL = limit - 1;
2059                         break;
2060                     }
2061                     if (levState.startL2EN >= 0)  { /* after EN, no AN */
2062                         addPoint(levState.startL2EN, LRM_BEFORE);
2063                         levState.startL2EN = -2;
2064                     }
2065                     /* note AN */
2066                     addPoint(start0, LRM_BEFORE);
2067                     break;
2068                 }
2069                 /* if first EN/AN after R/AL */
2070                 if (levState.startL2EN == -1) {
2071                     levState.startL2EN = start0;
2072                 }
2073                 break;
2074 
2075             case 6:                     /* note location of latest R/AL */
2076                 levState.lastStrongRTL = limit - 1;
2077                 levState.startON = -1;
2078                 break;
2079 
2080             case 7:                     /* L after R+ON/EN/AN */
2081                 /* include possible adjacent number on the left */
2082                 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
2083                 }
2084                 if (k >= 0) {
2085                     addPoint(k, RLM_BEFORE);    /* add RLM before */
2086                     insertPoints.confirmed = insertPoints.size; /* confirm it */
2087                 }
2088                 levState.startON = start0;
2089                 break;
2090 
2091             case 8:                     /* AN after L */
2092                 /* AN numbers between L text on both sides may be trouble. */
2093                 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2094                 addPoint(start0, LRM_BEFORE);   /* add LRM before */
2095                 addPoint(start0, LRM_AFTER);    /* add LRM after  */
2096                 break;
2097 
2098             case 9:                     /* R after L+ON/EN/AN */
2099                 /* false alert, infirm LRMs around previous AN */
2100                 insertPoints.size=insertPoints.confirmed;
2101                 if (_prop == _S) {          /* add RLM before S */
2102                     addPoint(start0, RLM_BEFORE);
2103                     insertPoints.confirmed = insertPoints.size;
2104                 }
2105                 break;
2106 
2107             case 10:                    /* L after L+ON/AN */
2108                 level = (byte)(levState.runLevel + addLevel);
2109                 for (k=levState.startON; k < start0; k++) {
2110                     if (levels[k] < level) {
2111                         levels[k] = level;
2112                     }
2113                 }
2114                 insertPoints.confirmed = insertPoints.size;   /* confirm inserts */
2115                 levState.startON = start0;
2116                 break;
2117 
2118             case 11:                    /* L after L+ON+EN/AN/ON */
2119                 level = levState.runLevel;
2120                 for (k = start0-1; k >= levState.startON; k--) {
2121                     if (levels[k] == level+3) {
2122                         while (levels[k] == level+3) {
2123                             levels[k--] -= 2;
2124                         }
2125                         while (levels[k] == level) {
2126                             k--;
2127                         }
2128                     }
2129                     if (levels[k] == level+2) {
2130                         levels[k] = level;
2131                         continue;
2132                     }
2133                     levels[k] = (byte)(level+1);
2134                 }
2135                 break;
2136 
2137             case 12:                    /* R after L+ON+EN/AN/ON */
2138                 level = (byte)(levState.runLevel+1);
2139                 for (k = start0-1; k >= levState.startON; k--) {
2140                     if (levels[k] > level) {
2141                         levels[k] -= 2;
2142                     }
2143                 }
2144                 break;
2145 
2146             default:                        /* we should never get here */
2147                 throw new IllegalStateException("Internal ICU error in processPropertySeq");
2148             }
2149         }
2150         if ((addLevel) != 0 || (start < start0)) {
2151             level = (byte)(levState.runLevel + addLevel);
2152             for (k = start; k < limit; k++) {
2153                 levels[k] = level;
2154             }
2155         }
2156     }
2157 
2158     private void resolveImplicitLevels(int start, int limit, short sor, short eor)
2159     {
2160         LevState levState = new LevState();
2161         int i, start1, start2;
2162         short oldStateImp, stateImp, actionImp;
2163         short gprop, resProp, cell;
2164         short nextStrongProp = R;
2165         int nextStrongPos = -1;
2166 
2167 
2168         /* check for RTL inverse Bidi mode */
2169         /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
2170          * loop on the text characters from end to start.
2171          * This would need a different properties state table (at least different
2172          * actions) and different levels state tables (maybe very similar to the
2173          * LTR corresponding ones.
2174          */
2175         /* initialize for levels state table */
2176         levState.startL2EN = -1;        /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2177         levState.lastStrongRTL = -1;    /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2178         levState.state = 0;
2179         levState.runLevel = levels[start];
2180         levState.impTab = impTabPair.imptab[levState.runLevel & 1];
2181         levState.impAct = impTabPair.impact[levState.runLevel & 1];
2182         processPropertySeq(levState, sor, start, start);
2183         /* initialize for property state table */
2184         if (dirProps[start] == NSM) {
2185             stateImp = (short)(1 + sor);
2186         } else {
2187             stateImp = 0;
2188         }
2189         start1 = start;
2190         start2 = 0;
2191 
2192         for (i = start; i <= limit; i++) {
2193             if (i >= limit) {
2194                 gprop = eor;
2195             } else {
2196                 short prop, prop1;
2197                 prop = NoContextRTL(dirProps[i]);
2198                 gprop = groupProp[prop];
2199             }
2200             oldStateImp = stateImp;
2201             cell = impTabProps[oldStateImp][gprop];
2202             stateImp = GetStateProps(cell);     /* isolate the new state */
2203             actionImp = GetActionProps(cell);   /* isolate the action */
2204             if ((i == limit) && (actionImp == 0)) {
2205                 /* there is an unprocessed sequence if its property == eor   */
2206                 actionImp = 1;                  /* process the last sequence */
2207             }
2208             if (actionImp != 0) {
2209                 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
2210                 switch (actionImp) {
2211                 case 1:             /* process current seq1, init new seq1 */
2212                     processPropertySeq(levState, resProp, start1, i);
2213                     start1 = i;
2214                     break;
2215                 case 2:             /* init new seq2 */
2216                     start2 = i;
2217                     break;
2218                 case 3:             /* process seq1, process seq2, init new seq1 */
2219                     processPropertySeq(levState, resProp, start1, start2);
2220                     processPropertySeq(levState, _ON, start2, i);
2221                     start1 = i;
2222                     break;
2223                 case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2224                     processPropertySeq(levState, resProp, start1, start2);
2225                     start1 = start2;
2226                     start2 = i;
2227                     break;
2228                 default:            /* we should never get here */
2229                     throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
2230                 }
2231             }
2232         }
2233         /* flush possible pending sequence, e.g. ON */
2234         processPropertySeq(levState, eor, limit, limit);
2235     }
2236 
2237     /* perform (L1) and (X9) ---------------------------------------------------- */
2238 
2239     /*
2240      * Reset the embedding levels for some non-graphic characters (L1).
2241      * This method also sets appropriate levels for BN, and
2242      * explicit embedding types that are supposed to have been removed
2243      * from the paragraph in (X9).
2244      */
2245     private void adjustWSLevels() {
2246         int i;
2247 
2248         if ((flags & MASK_WS) != 0) {
2249             int flag;
2250             i = trailingWSStart;
2251             while (i > 0) {
2252                 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2253                 while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) {
2254                     if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2255                         levels[i] = 0;
2256                     } else {
2257                         levels[i] = GetParaLevelAt(i);
2258                     }
2259                 }
2260 
2261                 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2262                 /* here, i+1 is guaranteed to be <length */
2263                 while (i > 0) {
2264                     flag = DirPropFlagNC(dirProps[--i]);
2265                     if ((flag & MASK_BN_EXPLICIT) != 0) {
2266                         levels[i] = levels[i + 1];
2267                     } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2268                         levels[i] = 0;
2269                         break;
2270                     } else if ((flag & MASK_B_S) != 0){
2271                         levels[i] = GetParaLevelAt(i);
2272                         break;
2273                     }
2274                 }
2275             }
2276         }
2277     }
2278 
2279     private int Bidi_Min(int x, int y) {
2280         return x < y ? x : y;
2281     }
2282 
2283     private int Bidi_Abs(int x) {
2284         return x >= 0 ? x : -x;
2285     }
2286 
2287     /**
2288      * Perform the Unicode Bidi algorithm. It is defined in the
2289      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2290      * version 13,
2291      * also described in The Unicode Standard, Version 4.0 .<p>
2292      *
2293      * This method takes a piece of plain text containing one or more paragraphs,
2294      * with or without externally specified embedding levels from <i>styled</i>
2295      * text and computes the left-right-directionality of each character.<p>
2296      *
2297      * If the entire text is all of the same directionality, then
2298      * the method may not perform all the steps described by the algorithm,
2299      * i.e., some levels may not be the same as if all steps were performed.
2300      * This is not relevant for unidirectional text.<br>
2301      * For example, in pure LTR text with numbers the numbers would get
2302      * a resolved level of 2 higher than the surrounding text according to
2303      * the algorithm. This implementation may set all resolved levels to
2304      * the same value in such a case.<p>
2305      *
2306      * The text can be composed of multiple paragraphs. Occurrence of a block
2307      * separator in the text terminates a paragraph, and whatever comes next starts
2308      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
2309      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
2310      * in that case, the pair of characters is considered as terminating the
2311      * preceding paragraph, and a new paragraph will be started by a character
2312      * coming after the LF.
2313      *
2314      * Although the text is passed here as a <code>String</code>, it is
2315      * stored internally as an array of characters. Therefore the
2316      * documentation will refer to indexes of the characters in the text.
2317      *
2318      * @param text contains the text that the Bidi algorithm will be performed
2319      *        on. This text can be retrieved with <code>getText()</code> or
2320      *        <code>getTextAsString</code>.<br>
2321      *
2322      * @param paraLevel specifies the default level for the text;
2323      *        it is typically 0 (LTR) or 1 (RTL).
2324      *        If the method shall determine the paragraph level from the text,
2325      *        then <code>paraLevel</code> can be set to
2326      *        either <code>LEVEL_DEFAULT_LTR</code>
2327      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2328      *        paragraphs, the paragraph level shall be determined separately for
2329      *        each paragraph; if a paragraph does not include any strongly typed
2330      *        character, then the desired default is used (0 for LTR or 1 for RTL).
2331      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2332      *        is also valid, with odd levels indicating RTL.
2333      *
2334      * @param embeddingLevels (in) may be used to preset the embedding and override levels,
2335      *        ignoring characters like LRE and PDF in the text.
2336      *        A level overrides the directional property of its corresponding
2337      *        (same index) character if the level has the
2338      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2339      *        Except for that bit, it must be
2340      *        <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
2341      *        with one exception: a level of zero may be specified for a
2342      *        paragraph separator even if <code>paraLevel&gt;0</code> when multiple
2343      *        paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2344      *        <strong>Caution: </strong>A reference to this array, not a copy
2345      *        of the levels, will be stored in the <code>Bidi</code> object;
2346      *        the <code>embeddingLevels</code>
2347      *        should not be modified to avoid unexpected results on subsequent
2348      *        Bidi operations. However, the <code>setPara()</code> and
2349      *        <code>setLine()</code> methods may modify some or all of the
2350      *        levels.<br><br>
2351      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
2352      *        have one entry for each character in <code>text</code>.
2353      *
2354      * @throws IllegalArgumentException if the values in embeddingLevels are
2355      *         not within the allowed range
2356      *
2357      * @see #LEVEL_DEFAULT_LTR
2358      * @see #LEVEL_DEFAULT_RTL
2359      * @see #LEVEL_OVERRIDE
2360      * @see #MAX_EXPLICIT_LEVEL
2361      * @stable ICU 3.8
2362      */
2363     void setPara(String text, byte paraLevel, byte[] embeddingLevels)
2364     {
2365         if (text == null) {
2366             setPara(new char[0], paraLevel, embeddingLevels);
2367         } else {
2368             setPara(text.toCharArray(), paraLevel, embeddingLevels);
2369         }
2370     }
2371 
2372     /**
2373      * Perform the Unicode Bidi algorithm. It is defined in the
2374      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2375      * version 13,
2376      * also described in The Unicode Standard, Version 4.0 .<p>
2377      *
2378      * This method takes a piece of plain text containing one or more paragraphs,
2379      * with or without externally specified embedding levels from <i>styled</i>
2380      * text and computes the left-right-directionality of each character.<p>
2381      *
2382      * If the entire text is all of the same directionality, then
2383      * the method may not perform all the steps described by the algorithm,
2384      * i.e., some levels may not be the same as if all steps were performed.
2385      * This is not relevant for unidirectional text.<br>
2386      * For example, in pure LTR text with numbers the numbers would get
2387      * a resolved level of 2 higher than the surrounding text according to
2388      * the algorithm. This implementation may set all resolved levels to
2389      * the same value in such a case.<p>
2390      *
2391      * The text can be composed of multiple paragraphs. Occurrence of a block
2392      * separator in the text terminates a paragraph, and whatever comes next starts
2393      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
2394      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
2395      * in that case, the pair of characters is considered as terminating the
2396      * preceding paragraph, and a new paragraph will be started by a character
2397      * coming after the LF.
2398      *
2399      * The text is stored internally as an array of characters. Therefore the
2400      * documentation will refer to indexes of the characters in the text.
2401      *
2402      * @param chars contains the text that the Bidi algorithm will be performed
2403      *        on. This text can be retrieved with <code>getText()</code> or
2404      *        <code>getTextAsString</code>.<br>
2405      *
2406      * @param paraLevel specifies the default level for the text;
2407      *        it is typically 0 (LTR) or 1 (RTL).
2408      *        If the method shall determine the paragraph level from the text,
2409      *        then <code>paraLevel</code> can be set to
2410      *        either <code>LEVEL_DEFAULT_LTR</code>
2411      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2412      *        paragraphs, the paragraph level shall be determined separately for
2413      *        each paragraph; if a paragraph does not include any strongly typed
2414      *        character, then the desired default is used (0 for LTR or 1 for RTL).
2415      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2416      *        is also valid, with odd levels indicating RTL.
2417      *
2418      * @param embeddingLevels (in) may be used to preset the embedding and
2419      *        override levels, ignoring characters like LRE and PDF in the text.
2420      *        A level overrides the directional property of its corresponding
2421      *        (same index) character if the level has the
2422      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2423      *        Except for that bit, it must be
2424      *        {@code paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL},
2425      *        with one exception: a level of zero may be specified for a
2426      *        paragraph separator even if {@code paraLevel > 0} when multiple
2427      *        paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2428      *        <strong>Caution: </strong>A reference to this array, not a copy
2429      *        of the levels, will be stored in the <code>Bidi</code> object;
2430      *        the <code>embeddingLevels</code>
2431      *        should not be modified to avoid unexpected results on subsequent
2432      *        Bidi operations. However, the <code>setPara()</code> and
2433      *        <code>setLine()</code> methods may modify some or all of the
2434      *        levels.<br><br>
2435      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
2436      *        have one entry for each character in <code>text</code>.
2437      *
2438      * @throws IllegalArgumentException if the values in embeddingLevels are
2439      *         not within the allowed range
2440      *
2441      * @see #LEVEL_DEFAULT_LTR
2442      * @see #LEVEL_DEFAULT_RTL
2443      * @see #LEVEL_OVERRIDE
2444      * @see #MAX_EXPLICIT_LEVEL
2445      * @stable ICU 3.8
2446      */
2447     public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
2448     {
2449         /* check the argument values */
2450         if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) {
2451             verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
2452         }
2453         if (chars == null) {
2454             chars = new char[0];
2455         }
2456 
2457         /* initialize the Bidi object */
2458         this.paraBidi = null;          /* mark unfinished setPara */
2459         this.text = chars;
2460         this.length = this.originalLength = this.resultLength = text.length;
2461         this.paraLevel = paraLevel;
2462         this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2463         this.paraCount = 1;
2464 
2465         /* Allocate zero-length arrays instead of setting to null here; then
2466          * checks for null in various places can be eliminated.
2467          */
2468         dirProps = new byte[0];
2469         levels = new byte[0];
2470         runs = new BidiRun[0];
2471         isGoodLogicalToVisualRunsMap = false;
2472         insertPoints.size = 0;          /* clean up from last call */
2473         insertPoints.confirmed = 0;     /* clean up from last call */
2474 
2475         /*
2476          * Save the original paraLevel if contextual; otherwise, set to 0.
2477          */
2478         if (IsDefaultLevel(paraLevel)) {
2479             defaultParaLevel = paraLevel;
2480         } else {
2481             defaultParaLevel = 0;
2482         }
2483 
2484         if (length == 0) {
2485             /*
2486              * For an empty paragraph, create a Bidi object with the paraLevel and
2487              * the flags and the direction set but without allocating zero-length arrays.
2488              * There is nothing more to do.
2489              */
2490             if (IsDefaultLevel(paraLevel)) {
2491                 this.paraLevel &= 1;
2492                 defaultParaLevel = 0;
2493             }
2494             if ((this.paraLevel & 1) != 0) {
2495                 flags = DirPropFlag(R);
2496                 direction = Bidi.DIRECTION_RIGHT_TO_LEFT;
2497             } else {
2498                 flags = DirPropFlag(L);
2499                 direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2500             }
2501 
2502             runCount = 0;
2503             paraCount = 0;
2504             paraBidi = this;         /* mark successful setPara */
2505             return;
2506         }
2507 
2508         runCount = -1;
2509 
2510         /*
2511          * Get the directional properties,
2512          * the flags bit-set, and
2513          * determine the paragraph level if necessary.
2514          */
2515         getDirPropsMemory(length);
2516         dirProps = dirPropsMemory;
2517         getDirProps();
2518 
2519         /* the processed length may have changed if OPTION_STREAMING is set */
2520         trailingWSStart = length;  /* the levels[] will reflect the WS run */
2521 
2522         /* allocate paras memory */
2523         if (paraCount > 1) {
2524             getInitialParasMemory(paraCount);
2525             paras = parasMemory;
2526             paras[paraCount - 1] = length;
2527         } else {
2528             /* initialize paras for single paragraph */
2529             paras = simpleParas;
2530             simpleParas[0] = length;
2531         }
2532 
2533         /* are explicit levels specified? */
2534         if (embeddingLevels == null) {
2535             /* no: determine explicit levels according to the (Xn) rules */
2536             getLevelsMemory(length);
2537             levels = levelsMemory;
2538             direction = resolveExplicitLevels();
2539         } else {
2540             /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
2541             levels = embeddingLevels;
2542             direction = checkExplicitLevels();
2543         }
2544 
2545         /*
2546          * The steps after (X9) in the Bidi algorithm are performed only if
2547          * the paragraph text has mixed directionality!
2548          */
2549         switch (direction) {
2550         case Bidi.DIRECTION_LEFT_TO_RIGHT:
2551             /* make sure paraLevel is even */
2552             paraLevel = (byte)((paraLevel + 1) & ~1);
2553 
2554             /* all levels are implicitly at paraLevel (important for getLevels()) */
2555             trailingWSStart = 0;
2556             break;
2557         case Bidi.DIRECTION_RIGHT_TO_LEFT:
2558             /* make sure paraLevel is odd */
2559             paraLevel |= 1;
2560 
2561             /* all levels are implicitly at paraLevel (important for getLevels()) */
2562             trailingWSStart = 0;
2563             break;
2564         default:
2565             this.impTabPair = impTab_DEFAULT;
2566 
2567             /*
2568              * If there are no external levels specified and there
2569              * are no significant explicit level codes in the text,
2570              * then we can treat the entire paragraph as one run.
2571              * Otherwise, we need to perform the following rules on runs of
2572              * the text with the same embedding levels. (X10)
2573              * "Significant" explicit level codes are ones that actually
2574              * affect non-BN characters.
2575              * Examples for "insignificant" ones are empty embeddings
2576              * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2577              */
2578             if (embeddingLevels == null && paraCount <= 1 &&
2579                 (flags & DirPropFlagMultiRuns) == 0) {
2580                 resolveImplicitLevels(0, length,
2581                         GetLRFromLevel(GetParaLevelAt(0)),
2582                         GetLRFromLevel(GetParaLevelAt(length - 1)));
2583             } else {
2584                 /* sor, eor: start and end types of same-level-run */
2585                 int start, limit = 0;
2586                 byte level, nextLevel;
2587                 short sor, eor;
2588 
2589                 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2590                 level = GetParaLevelAt(0);
2591                 nextLevel = levels[0];
2592                 if (level < nextLevel) {
2593                     eor = GetLRFromLevel(nextLevel);
2594                 } else {
2595                     eor = GetLRFromLevel(level);
2596                 }
2597 
2598                 do {
2599                     /* determine start and limit of the run (end points just behind the run) */
2600 
2601                     /* the values for this run's start are the same as for the previous run's end */
2602                     start = limit;
2603                     level = nextLevel;
2604                     if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) {
2605                         /* except if this is a new paragraph, then set sor = para level */
2606                         sor = GetLRFromLevel(GetParaLevelAt(start));
2607                     } else {
2608                         sor = eor;
2609                     }
2610 
2611                     /* search for the limit of this run */
2612                     while (++limit < length && levels[limit] == level) {}
2613 
2614                     /* get the correct level of the next run */
2615                     if (limit < length) {
2616                         nextLevel = levels[limit];
2617                     } else {
2618                         nextLevel = GetParaLevelAt(length - 1);
2619                     }
2620 
2621                     /* determine eor from max(level, nextLevel); sor is last run's eor */
2622                     if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) {
2623                         eor = GetLRFromLevel(nextLevel);
2624                     } else {
2625                         eor = GetLRFromLevel(level);
2626                     }
2627 
2628                     /* if the run consists of overridden directional types, then there
2629                        are no implicit types to be resolved */
2630                     if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
2631                         resolveImplicitLevels(start, limit, sor, eor);
2632                     } else {
2633                         /* remove the LEVEL_OVERRIDE flags */
2634                         do {
2635                             levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE;
2636                         } while (start < limit);
2637                     }
2638                 } while (limit  < length);
2639             }
2640 
2641             /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2642             adjustWSLevels();
2643 
2644             break;
2645         }
2646 
2647         resultLength += insertPoints.size;
2648         paraBidi = this;             /* mark successful setPara */
2649     }
2650 
2651     /**
2652      * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
2653      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2654      * version 13,
2655      * also described in The Unicode Standard, Version 4.0 .<p>
2656      *
2657      * This method takes a paragraph of text and computes the
2658      * left-right-directionality of each character. The text should not
2659      * contain any Unicode block separators.<p>
2660      *
2661      * The RUN_DIRECTION attribute in the text, if present, determines the base
2662      * direction (left-to-right or right-to-left). If not present, the base
2663      * direction is computed using the Unicode Bidirectional Algorithm,
2664      * defaulting to left-to-right if there are no strong directional characters
2665      * in the text. This attribute, if present, must be applied to all the text
2666      * in the paragraph.<p>
2667      *
2668      * The BIDI_EMBEDDING attribute in the text, if present, represents
2669      * embedding level information. Negative values from -1 to -62 indicate
2670      * overrides at the absolute value of the level. Positive values from 1 to
2671      * 62 indicate embeddings. Where values are zero or not defined, the base
2672      * embedding level as determined by the base direction is assumed.<p>
2673      *
2674      * The NUMERIC_SHAPING attribute in the text, if present, converts European
2675      * digits to other decimal digits before running the bidi algorithm. This
2676      * attribute, if present, must be applied to all the text in the paragraph.
2677      *
2678      * If the entire text is all of the same directionality, then
2679      * the method may not perform all the steps described by the algorithm,
2680      * i.e., some levels may not be the same as if all steps were performed.
2681      * This is not relevant for unidirectional text.<br>
2682      * For example, in pure LTR text with numbers the numbers would get
2683      * a resolved level of 2 higher than the surrounding text according to
2684      * the algorithm. This implementation may set all resolved levels to
2685      * the same value in such a case.
2686      *
2687      * @param paragraph a paragraph of text with optional character and
2688      *        paragraph attribute information
2689      * @stable ICU 3.8
2690      */
2691     public void setPara(AttributedCharacterIterator paragraph)
2692     {
2693         byte paraLvl;
2694         char ch = paragraph.first();
2695         Boolean runDirection =
2696             (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
2697         Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
2698         if (runDirection == null) {
2699             paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
2700         } else {
2701             paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
2702                         (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT;
2703         }
2704 
2705         byte[] lvls = null;
2706         int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
2707         byte[] embeddingLevels = new byte[len];
2708         char[] txt = new char[len];
2709         int i = 0;
2710         while (ch != AttributedCharacterIterator.DONE) {
2711             txt[i] = ch;
2712             Integer embedding =
2713                 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING);
2714             if (embedding != null) {
2715                 byte level = embedding.byteValue();
2716                 if (level == 0) {
2717                     /* no-op */
2718                 } else if (level < 0) {
2719                     lvls = embeddingLevels;
2720                     embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE);
2721                 } else {
2722                     lvls = embeddingLevels;
2723                     embeddingLevels[i] = level;
2724                 }
2725             }
2726             ch = paragraph.next();
2727             ++i;
2728         }
2729 
2730         if (shaper != null) {
2731             NumericShapings.shape(shaper, txt, 0, len);
2732         }
2733         setPara(txt, paraLvl, lvls);
2734     }
2735 
2736     /**
2737      * Specify whether block separators must be allocated level zero,
2738      * so that successive paragraphs will progress from left to right.
2739      * This method must be called before <code>setPara()</code>.
2740      * Paragraph separators (B) may appear in the text.  Setting them to level zero
2741      * means that all paragraph separators (including one possibly appearing
2742      * in the last text position) are kept in the reordered text after the text
2743      * that they follow in the source text.
2744      * When this feature is not enabled, a paragraph separator at the last
2745      * position of the text before reordering will go to the first position
2746      * of the reordered text when the paragraph level is odd.
2747      *
2748      * @param ordarParaLTR specifies whether paragraph separators (B) must
2749      * receive level 0, so that successive paragraphs progress from left to right.
2750      *
2751      * @see #setPara
2752      * @stable ICU 3.8
2753      */
2754     private void orderParagraphsLTR(boolean ordarParaLTR) {
2755         orderParagraphsLTR = ordarParaLTR;
2756     }
2757 
2758     /**
2759      * Get the directionality of the text.
2760      *
2761      * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
2762      *         that indicates if the entire text
2763      *         represented by this object is unidirectional,
2764      *         and which direction, or if it is mixed-directional.
2765      *
2766      * @throws IllegalStateException if this call is not preceded by a successful
2767      *         call to <code>setPara</code> or <code>setLine</code>
2768      *
2769      * @see #LTR
2770      * @see #RTL
2771      * @see #MIXED
2772      * @stable ICU 3.8
2773      */
2774     private byte getDirection()
2775     {
2776         verifyValidParaOrLine();
2777         return direction;
2778     }
2779 
2780     /**
2781      * Get the length of the text.
2782      *
2783      * @return The length of the text that the <code>Bidi</code> object was
2784      *         created for.
2785      *
2786      * @throws IllegalStateException if this call is not preceded by a successful
2787      *         call to <code>setPara</code> or <code>setLine</code>
2788      * @stable ICU 3.8
2789      */
2790     public int getLength()
2791     {
2792         verifyValidParaOrLine();
2793         return originalLength;
2794     }
2795 
2796     /* paragraphs API methods ------------------------------------------------- */
2797 
2798     /**
2799      * Get the paragraph level of the text.
2800      *
2801      * @return The paragraph level. If there are multiple paragraphs, their
2802      *         level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
2803      *         LEVEL_DEFAULT_RTL.  In that case, the level of the first paragraph
2804      *         is returned.
2805      *
2806      * @throws IllegalStateException if this call is not preceded by a successful
2807      *         call to <code>setPara</code> or <code>setLine</code>
2808      *
2809      * @see #LEVEL_DEFAULT_LTR
2810      * @see #LEVEL_DEFAULT_RTL
2811      * @see #getParagraph
2812      * @see #getParagraphByIndex
2813      * @stable ICU 3.8
2814      */
2815     public byte getParaLevel()
2816     {
2817         verifyValidParaOrLine();
2818         return paraLevel;
2819     }
2820 
2821     /**
2822      * Get the index of a paragraph, given a position within the text.
2823      *
2824      * @param charIndex is the index of a character within the text, in the
2825      *        range <code>[0..getProcessedLength()-1]</code>.
2826      *
2827      * @return The index of the paragraph containing the specified position,
2828      *         starting from 0.
2829      *
2830      * @throws IllegalStateException if this call is not preceded by a successful
2831      *         call to <code>setPara</code> or <code>setLine</code>
2832      * @throws IllegalArgumentException if charIndex is not within the legal range
2833      *
2834      * @see com.ibm.icu.text.BidiRun
2835      * @see #getProcessedLength
2836      * @stable ICU 3.8
2837      */
2838     public int getParagraphIndex(int charIndex)
2839     {
2840         verifyValidParaOrLine();
2841         BidiBase bidi = paraBidi;             /* get Para object if Line object */
2842         verifyRange(charIndex, 0, bidi.length);
2843         int paraIndex;
2844         for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) {
2845         }
2846         return paraIndex;
2847     }
2848 
2849     /**
2850      * <code>setLine()</code> returns a <code>Bidi</code> object to
2851      * contain the reordering information, especially the resolved levels,
2852      * for all the characters in a line of text. This line of text is
2853      * specified by referring to a <code>Bidi</code> object representing
2854      * this information for a piece of text containing one or more paragraphs,
2855      * and by specifying a range of indexes in this text.<p>
2856      * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
2857      *
2858      * This is used after calling <code>setPara()</code>
2859      * for a piece of text, and after line-breaking on that text.
2860      * It is not necessary if each paragraph is treated as a single line.<p>
2861      *
2862      * After line-breaking, rules (L1) and (L2) for the treatment of
2863      * trailing WS and for reordering are performed on
2864      * a <code>Bidi</code> object that represents a line.<p>
2865      *
2866      * <strong>Important: </strong>the line <code>Bidi</code> object may
2867      * reference data within the global text <code>Bidi</code> object.
2868      * You should not alter the content of the global text object until
2869      * you are finished using the line object.
2870      *
2871      * @param start is the line's first index into the text.
2872      *
2873      * @param limit is just behind the line's last index into the text
2874      *        (its last index +1).
2875      *
2876      * @return a <code>Bidi</code> object that will now represent a line of the text.
2877      *
2878      * @throws IllegalStateException if this call is not preceded by a successful
2879      *         call to <code>setPara</code>
2880      * @throws IllegalArgumentException if start and limit are not in the range
2881      *         <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
2882      *         or if the specified line crosses a paragraph boundary
2883      *
2884      * @see #setPara
2885      * @see #getProcessedLength
2886      * @stable ICU 3.8
2887      */
2888     public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit)
2889     {
2890         verifyValidPara();
2891         verifyRange(start, 0, limit);
2892         verifyRange(limit, 0, length+1);
2893 
2894         return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit);
2895     }
2896 
2897     /**
2898      * Get the level for one character.
2899      *
2900      * @param charIndex the index of a character.
2901      *
2902      * @return The level for the character at <code>charIndex</code>.
2903      *
2904      * @throws IllegalStateException if this call is not preceded by a successful
2905      *         call to <code>setPara</code> or <code>setLine</code>
2906      * @throws IllegalArgumentException if charIndex is not in the range
2907      *         <code>0&lt;=charIndex&lt;getProcessedLength()</code>
2908      *
2909      * @see #getProcessedLength
2910      * @stable ICU 3.8
2911      */
2912     public byte getLevelAt(int charIndex)
2913     {
2914         if (charIndex < 0 || charIndex >= length) {
2915             return (byte)getBaseLevel();
2916         }
2917         verifyValidParaOrLine();
2918         verifyRange(charIndex, 0, length);
2919         return BidiLine.getLevelAt(this, charIndex);
2920     }
2921 
2922     /**
2923      * Get an array of levels for each character.<p>
2924      *
2925      * Note that this method may allocate memory under some
2926      * circumstances, unlike <code>getLevelAt()</code>.
2927      *
2928      * @return The levels array for the text,
2929      *         or <code>null</code> if an error occurs.
2930      *
2931      * @throws IllegalStateException if this call is not preceded by a successful
2932      *         call to <code>setPara</code> or <code>setLine</code>
2933      * @stable ICU 3.8
2934      */
2935     private byte[] getLevels()
2936     {
2937         verifyValidParaOrLine();
2938         if (length <= 0) {
2939             return new byte[0];
2940         }
2941         return BidiLine.getLevels(this);
2942     }
2943 
2944     /**
2945      * Get the number of runs.
2946      * This method may invoke the actual reordering on the
2947      * <code>Bidi</code> object, after <code>setPara()</code>
2948      * may have resolved only the levels of the text. Therefore,
2949      * <code>countRuns()</code> may have to allocate memory,
2950      * and may throw an exception if it fails to do so.
2951      *
2952      * @return The number of runs.
2953      *
2954      * @throws IllegalStateException if this call is not preceded by a successful
2955      *         call to <code>setPara</code> or <code>setLine</code>
2956      * @stable ICU 3.8
2957      */
2958     public int countRuns()
2959     {
2960         verifyValidParaOrLine();
2961         BidiLine.getRuns(this);
2962         return runCount;
2963     }
2964 
2965     /**
2966      * Get a visual-to-logical index map (array) for the characters in the
2967      * <code>Bidi</code> (paragraph or line) object.
2968      * <p>
2969      * Some values in the map may be <code>MAP_NOWHERE</code> if the
2970      * corresponding text characters are Bidi marks inserted in the visual
2971      * output by the option <code>OPTION_INSERT_MARKS</code>.
2972      * <p>
2973      * When the visual output is altered by using options of
2974      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
2975      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
2976      * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
2977      * be correct. It is advised to use, when possible, reordering options
2978      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
2979      *
2980      * @return an array of <code>getResultLength()</code>
2981      *        indexes which will reflect the reordering of the characters.<br><br>
2982      *        The index map will result in
2983      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
2984      *        <code>indexMap</code> represents the returned array.
2985      *
2986      * @throws IllegalStateException if this call is not preceded by a successful
2987      *         call to <code>setPara</code> or <code>setLine</code>
2988      *
2989      * @see #getLogicalMap
2990      * @see #getLogicalIndex
2991      * @see #getResultLength
2992      * @see #MAP_NOWHERE
2993      * @see #OPTION_INSERT_MARKS
2994      * @see #writeReordered
2995      * @stable ICU 3.8
2996      */
2997     private int[] getVisualMap()
2998     {
2999         /* countRuns() checks successful call to setPara/setLine */
3000         countRuns();
3001         if (resultLength <= 0) {
3002             return new int[0];
3003         }
3004         return BidiLine.getVisualMap(this);
3005     }
3006 
3007     /**
3008      * This is a convenience method that does not use a <code>Bidi</code> object.
3009      * It is intended to be used for when an application has determined the levels
3010      * of objects (character sequences) and just needs to have them reordered (L2).
3011      * This is equivalent to using <code>getVisualMap()</code> on a
3012      * <code>Bidi</code> object.
3013      *
3014      * @param levels is an array of levels that have been determined by
3015      *        the application.
3016      *
3017      * @return an array of <code>levels.length</code>
3018      *        indexes which will reflect the reordering of the characters.<p>
3019      *        The index map will result in
3020      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
3021      *        <code>indexMap</code> represents the returned array.
3022      *
3023      * @stable ICU 3.8
3024      */
3025     private static int[] reorderVisual(byte[] levels)
3026     {
3027         return BidiLine.reorderVisual(levels);
3028     }
3029 
3030     /**
3031      * Constant indicating that the base direction depends on the first strong
3032      * directional character in the text according to the Unicode Bidirectional
3033      * Algorithm. If no strong directional character is present, the base
3034      * direction is left-to-right.
3035      * @stable ICU 3.8
3036      */
3037     private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e;
3038 
3039     /**
3040      * Constant indicating that the base direction depends on the first strong
3041      * directional character in the text according to the Unicode Bidirectional
3042      * Algorithm. If no strong directional character is present, the base
3043      * direction is right-to-left.
3044      * @stable ICU 3.8
3045      */
3046     private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f;
3047 
3048     /**
3049      * Create Bidi from the given text, embedding, and direction information.
3050      * The embeddings array may be null. If present, the values represent
3051      * embedding level information. Negative values from -1 to -61 indicate
3052      * overrides at the absolute value of the level. Positive values from 1 to
3053      * 61 indicate embeddings. Where values are zero, the base embedding level
3054      * as determined by the base direction is assumed.<p>
3055      *
3056      * Note: this constructor calls setPara() internally.
3057      *
3058      * @param text an array containing the paragraph of text to process.
3059      * @param textStart the index into the text array of the start of the
3060      *        paragraph.
3061      * @param embeddings an array containing embedding values for each character
3062      *        in the paragraph. This can be null, in which case it is assumed
3063      *        that there is no external embedding information.
3064      * @param embStart the index into the embedding array of the start of the
3065      *        paragraph.
3066      * @param paragraphLength the length of the paragraph in the text and
3067      *        embeddings arrays.
3068      * @param flags a collection of flags that control the algorithm. The
3069      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
3070      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
3071      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
3072      *
3073      * @throws IllegalArgumentException if the values in embeddings are
3074      *         not within the allowed range
3075      *
3076      * @see #DIRECTION_LEFT_TO_RIGHT
3077      * @see #DIRECTION_RIGHT_TO_LEFT
3078      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
3079      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
3080      * @stable ICU 3.8
3081      */
3082     public BidiBase(char[] text,
3083              int textStart,
3084              byte[] embeddings,
3085              int embStart,
3086              int paragraphLength,
3087              int flags)
3088      {
3089         this(0, 0);
3090         byte paraLvl;
3091         switch (flags) {
3092         case Bidi.DIRECTION_LEFT_TO_RIGHT:
3093         default:
3094             paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT;
3095             break;
3096         case Bidi.DIRECTION_RIGHT_TO_LEFT:
3097             paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT;
3098             break;
3099         case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
3100             paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
3101             break;
3102         case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
3103             paraLvl = INTERNAL_LEVEL_DEFAULT_RTL;
3104             break;
3105         }
3106         byte[] paraEmbeddings;
3107         if (embeddings == null) {
3108             paraEmbeddings = null;
3109         } else {
3110             paraEmbeddings = new byte[paragraphLength];
3111             byte lev;
3112             for (int i = 0; i < paragraphLength; i++) {
3113                 lev = embeddings[i + embStart];
3114                 if (lev < 0) {
3115                     lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE);
3116                 } else if (lev == 0) {
3117                     lev = paraLvl;
3118                     if (paraLvl > MAX_EXPLICIT_LEVEL) {
3119                         lev &= 1;
3120                     }
3121                 }
3122                 paraEmbeddings[i] = lev;
3123             }
3124         }
3125         if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {
3126             setPara(text, paraLvl, paraEmbeddings);
3127         } else {
3128             char[] paraText = new char[paragraphLength];
3129             System.arraycopy(text, textStart, paraText, 0, paragraphLength);
3130             setPara(paraText, paraLvl, paraEmbeddings);
3131         }
3132     }
3133 
3134     /**
3135      * Return true if the line is not left-to-right or right-to-left. This means
3136      * it either has mixed runs of left-to-right and right-to-left text, or the
3137      * base direction differs from the direction of the only run of text.
3138      *
3139      * @return true if the line is not left-to-right or right-to-left.
3140      *
3141      * @throws IllegalStateException if this call is not preceded by a successful
3142      *         call to <code>setPara</code>
3143      * @stable ICU 3.8
3144      */
3145     public boolean isMixed()
3146     {
3147         return (!isLeftToRight() && !isRightToLeft());
3148     }
3149 
3150     /**
3151     * Return true if the line is all left-to-right text and the base direction
3152      * is left-to-right.
3153      *
3154      * @return true if the line is all left-to-right text and the base direction
3155      *         is left-to-right.
3156      *
3157      * @throws IllegalStateException if this call is not preceded by a successful
3158      *         call to <code>setPara</code>
3159      * @stable ICU 3.8
3160      */
3161     public boolean isLeftToRight()
3162     {
3163         return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0);
3164     }
3165 
3166     /**
3167      * Return true if the line is all right-to-left text, and the base direction
3168      * is right-to-left
3169      *
3170      * @return true if the line is all right-to-left text, and the base
3171      *         direction is right-to-left
3172      *
3173      * @throws IllegalStateException if this call is not preceded by a successful
3174      *         call to <code>setPara</code>
3175      * @stable ICU 3.8
3176      */
3177     public boolean isRightToLeft()
3178     {
3179         return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1);
3180     }
3181 
3182     /**
3183      * Return true if the base direction is left-to-right
3184      *
3185      * @return true if the base direction is left-to-right
3186      *
3187      * @throws IllegalStateException if this call is not preceded by a successful
3188      *         call to <code>setPara</code> or <code>setLine</code>
3189      *
3190      * @stable ICU 3.8
3191      */
3192     public boolean baseIsLeftToRight()
3193     {
3194         return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT);
3195     }
3196 
3197     /**
3198      * Return the base level (0 if left-to-right, 1 if right-to-left).
3199      *
3200      * @return the base level
3201      *
3202      * @throws IllegalStateException if this call is not preceded by a successful
3203      *         call to <code>setPara</code> or <code>setLine</code>
3204      *
3205      * @stable ICU 3.8
3206      */
3207     public int getBaseLevel()
3208     {
3209         return getParaLevel();
3210     }
3211 
3212     /**
3213      * Compute the logical to visual run mapping
3214      */
3215     private void getLogicalToVisualRunsMap()
3216     {
3217         if (isGoodLogicalToVisualRunsMap) {
3218             return;
3219         }
3220         int count = countRuns();
3221         if ((logicalToVisualRunsMap == null) ||
3222             (logicalToVisualRunsMap.length < count)) {
3223             logicalToVisualRunsMap = new int[count];
3224         }
3225         int i;
3226         long[] keys = new long[count];
3227         for (i = 0; i < count; i++) {
3228             keys[i] = ((long)(runs[i].start)<<32) + i;
3229         }
3230         Arrays.sort(keys);
3231         for (i = 0; i < count; i++) {
3232             logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
3233         }
3234         keys = null;
3235         isGoodLogicalToVisualRunsMap = true;
3236     }
3237 
3238     /**
3239      * Return the level of the nth logical run in this line.
3240      *
3241      * @param run the index of the run, between 0 and <code>countRuns()-1</code>
3242      *
3243      * @return the level of the run
3244      *
3245      * @throws IllegalStateException if this call is not preceded by a successful
3246      *         call to <code>setPara</code> or <code>setLine</code>
3247      * @throws IllegalArgumentException if <code>run</code> is not in
3248      *         the range <code>0&lt;=run&lt;countRuns()</code>
3249      * @stable ICU 3.8
3250      */
3251     public int getRunLevel(int run)
3252     {
3253         verifyValidParaOrLine();
3254         BidiLine.getRuns(this);
3255         if (run < 0 || run >= runCount) {
3256             return getParaLevel();
3257         }
3258         getLogicalToVisualRunsMap();
3259         return runs[logicalToVisualRunsMap[run]].level;
3260     }
3261 
3262     /**
3263      * Return the index of the character at the start of the nth logical run in
3264      * this line, as an offset from the start of the line.
3265      *
3266      * @param run the index of the run, between 0 and <code>countRuns()</code>
3267      *
3268      * @return the start of the run
3269      *
3270      * @throws IllegalStateException if this call is not preceded by a successful
3271      *         call to <code>setPara</code> or <code>setLine</code>
3272      * @throws IllegalArgumentException if <code>run</code> is not in
3273      *         the range <code>0&lt;=run&lt;countRuns()</code>
3274      * @stable ICU 3.8
3275      */
3276     public int getRunStart(int run)
3277     {
3278         verifyValidParaOrLine();
3279         BidiLine.getRuns(this);
3280         if (runCount == 1) {
3281             return 0;
3282         } else if (run == runCount) {
3283             return length;
3284         }
3285         verifyIndex(run, 0, runCount);
3286         getLogicalToVisualRunsMap();
3287         return runs[logicalToVisualRunsMap[run]].start;
3288     }
3289 
3290     /**
3291      * Return the index of the character past the end of the nth logical run in
3292      * this line, as an offset from the start of the line. For example, this
3293      * will return the length of the line for the last run on the line.
3294      *
3295      * @param run the index of the run, between 0 and <code>countRuns()</code>
3296      *
3297      * @return the limit of the run
3298      *
3299      * @throws IllegalStateException if this call is not preceded by a successful
3300      *         call to <code>setPara</code> or <code>setLine</code>
3301      * @throws IllegalArgumentException if <code>run</code> is not in
3302      *         the range <code>0&lt;=run&lt;countRuns()</code>
3303      * @stable ICU 3.8
3304      */
3305     public int getRunLimit(int run)
3306     {
3307         verifyValidParaOrLine();
3308         BidiLine.getRuns(this);
3309         if (runCount == 1) {
3310             return length;
3311         }
3312         verifyIndex(run, 0, runCount);
3313         getLogicalToVisualRunsMap();
3314         int idx = logicalToVisualRunsMap[run];
3315         int len = idx == 0 ? runs[idx].limit :
3316                                 runs[idx].limit - runs[idx-1].limit;
3317         return runs[idx].start + len;
3318     }
3319 
3320     /**
3321      * Return true if the specified text requires bidi analysis. If this returns
3322      * false, the text will display left-to-right. Clients can then avoid
3323      * constructing a Bidi object. Text in the Arabic Presentation Forms area of
3324      * Unicode is presumed to already be shaped and ordered for display, and so
3325      * will not cause this method to return true.
3326      *
3327      * @param text the text containing the characters to test
3328      * @param start the start of the range of characters to test
3329      * @param limit the limit of the range of characters to test
3330      *
3331      * @return true if the range of characters requires bidi analysis
3332      *
3333      * @stable ICU 3.8
3334      */
3335     public static boolean requiresBidi(char[] text,
3336             int start,
3337             int limit)
3338     {
3339         final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT |
3340                 1 << AL |
3341                 1 << RLE |
3342                 1 << RLO |
3343                 1 << AN);
3344 
3345         if (0 > start || start > limit || limit > text.length) {
3346             throw new IllegalArgumentException("Value start " + start +
3347                       " is out of range 0 to " + limit);
3348         }
3349         for (int i = start; i < limit; ++i) {
3350             if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
3351                 Character.isLowSurrogate(text[i+1])) {
3352                 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
3353                     return true;
3354                 }
3355             } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
3356                 return true;
3357             }
3358         }
3359         return false;
3360     }
3361 
3362     /**
3363      * Reorder the objects in the array into visual order based on their levels.
3364      * This is a utility method to use when you have a collection of objects
3365      * representing runs of text in logical order, each run containing text at a
3366      * single level. The elements at <code>index</code> from
3367      * <code>objectStart</code> up to <code>objectStart + count</code> in the
3368      * objects array will be reordered into visual order assuming
3369      * each run of text has the level indicated by the corresponding element in
3370      * the levels array (at <code>index - objectStart + levelStart</code>).
3371      *
3372      * @param levels an array representing the bidi level of each object
3373      * @param levelStart the start position in the levels array
3374      * @param objects the array of objects to be reordered into visual order
3375      * @param objectStart the start position in the objects array
3376      * @param count the number of objects to reorder
3377      * @stable ICU 3.8
3378      */
3379     public static void reorderVisually(byte[] levels,
3380             int levelStart,
3381             Object[] objects,
3382             int objectStart,
3383             int count)
3384     {
3385         if (0 > levelStart || levels.length <= levelStart) {
3386             throw new IllegalArgumentException("Value levelStart " +
3387                       levelStart + " is out of range 0 to " +
3388                       (levels.length-1));
3389         }
3390         if (0 > objectStart || objects.length <= objectStart) {
3391             throw new IllegalArgumentException("Value objectStart " +
3392                       levelStart + " is out of range 0 to " +
3393                       (objects.length-1));
3394         }
3395         if (0 > count || objects.length < (objectStart+count)) {
3396             throw new IllegalArgumentException("Value count " +
3397                       levelStart + " is out of range 0 to " +
3398                       (objects.length - objectStart));
3399         }
3400         byte[] reorderLevels = new byte[count];
3401         System.arraycopy(levels, levelStart, reorderLevels, 0, count);
3402         int[] indexMap = reorderVisual(reorderLevels);
3403         Object[] temp = new Object[count];
3404         System.arraycopy(objects, objectStart, temp, 0, count);
3405         for (int i = 0; i < count; ++i) {
3406             objects[objectStart + i] = temp[indexMap[i]];
3407         }
3408     }
3409 
3410     /**
3411      * Display the bidi internal state, used in debugging.
3412      */
3413     public String toString() {
3414         StringBuilder buf = new StringBuilder(getClass().getName());
3415 
3416         buf.append("[dir: ");
3417         buf.append(direction);
3418         buf.append(" baselevel: ");
3419         buf.append(paraLevel);
3420         buf.append(" length: ");
3421         buf.append(length);
3422         buf.append(" runs: ");
3423         if (levels == null) {
3424             buf.append("none");
3425         } else {
3426             buf.append('[');
3427             buf.append(levels[0]);
3428             for (int i = 1; i < levels.length; i++) {
3429                 buf.append(' ');
3430                 buf.append(levels[i]);
3431             }
3432             buf.append(']');
3433         }
3434         buf.append(" text: [0x");
3435         buf.append(Integer.toHexString(text[0]));
3436         for (int i = 1; i < text.length; i++) {
3437             buf.append(" 0x");
3438             buf.append(Integer.toHexString(text[i]));
3439         }
3440         buf.append("]]");
3441 
3442         return buf.toString();
3443     }
3444 
3445     /**
3446      * A class that provides access to constants defined by
3447      * java.awt.font.TextAttribute without creating a static dependency.
3448      */
3449     private static class TextAttributeConstants {
3450         // Make sure to load the AWT's TextAttribute class before using the constants, if any.
3451         static {
3452             try {
3453                 Class.forName("java.awt.font.TextAttribute", true, null);
3454             } catch (ClassNotFoundException e) {}
3455         }
3456         static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
3457 
3458         /**
3459          * TextAttribute instances (or a fake Attribute type if
3460          * java.awt.font.TextAttribute is not present)
3461          */
3462         static final AttributedCharacterIterator.Attribute RUN_DIRECTION =
3463             getTextAttribute("RUN_DIRECTION");
3464         static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING =
3465             getTextAttribute("NUMERIC_SHAPING");
3466         static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING =
3467             getTextAttribute("BIDI_EMBEDDING");
3468 
3469         /**
3470          * TextAttribute.RUN_DIRECTION_LTR
3471          */
3472         static final Boolean RUN_DIRECTION_LTR = (jafa == null) ?
3473             Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR");
3474 
3475         @SuppressWarnings("serial")
3476         private static AttributedCharacterIterator.Attribute
3477             getTextAttribute(String name)
3478         {
3479             if (jafa == null) {
3480                 // fake attribute
3481                 return new AttributedCharacterIterator.Attribute(name) { };
3482             } else {
3483                 return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name);
3484             }
3485         }
3486     }
3487 
3488     /**
3489      * A class that provides access to java.awt.font.NumericShaper without
3490      * creating a static dependency.
3491      */
3492     private static class NumericShapings {
3493         // Make sure to load the AWT's NumericShaper class before calling shape, if any.
3494         static {
3495             try {
3496                 Class.forName("java.awt.font.NumericShaper", true, null);
3497             } catch (ClassNotFoundException e) {}
3498         }
3499         static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
3500 
3501         /**
3502          * Invokes NumericShaping shape(text,start,count) method.
3503          */
3504         static void shape(Object shaper, char[] text, int start, int count) {
3505             if (jafa != null) {
3506                 jafa.shape(shaper, text, start, count);
3507             }
3508         }
3509     }
3510 }