1 /*
   2  * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  *******************************************************************************
  27  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  28  *                                                                             *
  29  * The original version of this source code and documentation is copyrighted   *
  30  * and owned by IBM, These materials are provided under terms of a License     *
  31  * Agreement between IBM and Sun. This technology is protected by multiple     *
  32  * US and International patents. This notice and attribution to IBM may not    *
  33  * to removed.                                                                 *
  34  *******************************************************************************
  35  */
  36 
  37 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
  38  * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
  39  * concept of RUNS_ONLY which is a double operation.
  40  * It could be advantageous to divide this into 3 concepts:
  41  * a) Operation: direct / inverse / RUNS_ONLY
  42  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
  43  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
  44  * This would allow combinations not possible today like RUNS_ONLY with
  45  * NUMBERS_SPECIAL.
  46  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
  47  * REMOVE_CONTROLS for the inverse step.
  48  * Not all combinations would be supported, and probably not all do make sense.
  49  * This would need to document which ones are supported and what are the
  50  * fallbacks for unsupported combinations.
  51  */
  52 
  53 package sun.text.bidi;
  54 
  55 import java.io.IOException;
  56 import java.lang.reflect.Array;
  57 import java.text.AttributedCharacterIterator;
  58 import java.text.Bidi;
  59 import java.util.Arrays;
  60 import java.util.MissingResourceException;
  61 import sun.misc.JavaAWTFontAccess;
  62 import sun.misc.SharedSecrets;
  63 import sun.text.normalizer.UBiDiProps;
  64 import sun.text.normalizer.UCharacter;
  65 import sun.text.normalizer.UTF16;
  66 
  67 /**
  68  *
  69  * <h2>Bidi algorithm for ICU</h2>
  70  *
  71  * This is an implementation of the Unicode Bidirectional algorithm. The
  72  * algorithm is defined in the <a
  73  * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
  74  * version 13, also described in The Unicode Standard, Version 4.0 .
  75  * <p>
  76  *
  77  * Note: Libraries that perform a bidirectional algorithm and reorder strings
  78  * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
  79  * shaping (ArabicShaping) classes can be used at the core of such "Storage
  80  * Layout Engines".
  81  *
  82  * <h3>General remarks about the API:</h3>
  83  *
  84  * The &quot;limit&quot; of a sequence of characters is the position just after
  85  * their last character, i.e., one more than that position.
  86  * <p>
  87  *
  88  * Some of the API methods provide access to &quot;runs&quot;. Such a
  89  * &quot;run&quot; is defined as a sequence of characters that are at the same
  90  * embedding level after performing the Bidi algorithm.
  91  * <p>
  92  *
  93  * <h3>Basic concept: paragraph</h3>
  94  * A piece of text can be divided into several paragraphs by characters
  95  * with the Bidi class <code>Block Separator</code>. For handling of
  96  * paragraphs, see:
  97  * <ul>
  98  * <li>{@link #countParagraphs}
  99  * <li>{@link #getParaLevel}
 100  * <li>{@link #getParagraph}
 101  * <li>{@link #getParagraphByIndex}
 102  * </ul>
 103  *
 104  * <h3>Basic concept: text direction</h3>
 105  * The direction of a piece of text may be:
 106  * <ul>
 107  * <li>{@link #LTR}
 108  * <li>{@link #RTL}
 109  * <li>{@link #MIXED}
 110  * </ul>
 111  *
 112  * <h3>Basic concept: levels</h3>
 113  *
 114  * Levels in this API represent embedding levels according to the Unicode
 115  * Bidirectional Algorithm.
 116  * Their low-order bit (even/odd value) indicates the visual direction.<p>
 117  *
 118  * Levels can be abstract values when used for the
 119  * <code>paraLevel</code> and <code>embeddingLevels</code>
 120  * arguments of <code>setPara()</code>; there:
 121  * <ul>
 122  * <li>the high-order bit of an <code>embeddingLevels[]</code>
 123  * value indicates whether the using application is
 124  * specifying the level of a character to <i>override</i> whatever the
 125  * Bidi implementation would resolve it to.</li>
 126  * <li><code>paraLevel</code> can be set to the
 127  * pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
 128  * and <code>LEVEL_DEFAULT_RTL</code>.</li>
 129  * </ul>
 130  *
 131  * <p>The related constants are not real, valid level values.
 132  * <code>DEFAULT_XXX</code> can be used to specify
 133  * a default for the paragraph level for
 134  * when the <code>setPara()</code> method
 135  * shall determine it but there is no
 136  * strongly typed character in the input.<p>
 137  *
 138  * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
 139  * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
 140  * just like with normal LTR and RTL level values -
 141  * these special values are designed that way. Also, the implementation
 142  * assumes that MAX_EXPLICIT_LEVEL is odd.
 143  *
 144  * <ul><b>See Also:</b>
 145  * <li>{@link #LEVEL_DEFAULT_LTR}
 146  * <li>{@link #LEVEL_DEFAULT_RTL}
 147  * <li>{@link #LEVEL_OVERRIDE}
 148  * <li>{@link #MAX_EXPLICIT_LEVEL}
 149  * <li>{@link #setPara}
 150  * </ul>
 151  *
 152  * <h3>Basic concept: Reordering Mode</h3>
 153  * Reordering mode values indicate which variant of the Bidi algorithm to
 154  * use.
 155  *
 156  * <ul><b>See Also:</b>
 157  * <li>{@link #setReorderingMode}
 158  * <li>{@link #REORDER_DEFAULT}
 159  * <li>{@link #REORDER_NUMBERS_SPECIAL}
 160  * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
 161  * <li>{@link #REORDER_RUNS_ONLY}
 162  * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
 163  * <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
 164  * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
 165  * </ul>
 166  *
 167  * <h3>Basic concept: Reordering Options</h3>
 168  * Reordering options can be applied during Bidi text transformations.
 169  * <ul><b>See Also:</b>
 170  * <li>{@link #setReorderingOptions}
 171  * <li>{@link #OPTION_DEFAULT}
 172  * <li>{@link #OPTION_INSERT_MARKS}
 173  * <li>{@link #OPTION_REMOVE_CONTROLS}
 174  * <li>{@link #OPTION_STREAMING}
 175  * </ul>
 176  *
 177  *
 178  * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
 179  * @stable ICU 3.8
 180  *
 181  *
 182  * <h4> Sample code for the ICU Bidi API </h4>
 183  *
 184  * <h5>Rendering a paragraph with the ICU Bidi API</h5>
 185  *
 186  * This is (hypothetical) sample code that illustrates how the ICU Bidi API
 187  * could be used to render a paragraph of text. Rendering code depends highly on
 188  * the graphics system, therefore this sample code must make a lot of
 189  * assumptions, which may or may not match any existing graphics system's
 190  * properties.
 191  *
 192  * <p>
 193  * The basic assumptions are:
 194  * </p>
 195  * <ul>
 196  * <li>Rendering is done from left to right on a horizontal line.</li>
 197  * <li>A run of single-style, unidirectional text can be rendered at once.
 198  * </li>
 199  * <li>Such a run of text is passed to the graphics system with characters
 200  * (code units) in logical order.</li>
 201  * <li>The line-breaking algorithm is very complicated and Locale-dependent -
 202  * and therefore its implementation omitted from this sample code.</li>
 203  * </ul>
 204  *
 205  * <pre>
 206  *
 207  *  package com.ibm.icu.dev.test.bidi;
 208  *
 209  *  import com.ibm.icu.text.Bidi;
 210  *  import com.ibm.icu.text.BidiRun;
 211  *
 212  *  public class Sample {
 213  *
 214  *      static final int styleNormal = 0;
 215  *      static final int styleSelected = 1;
 216  *      static final int styleBold = 2;
 217  *      static final int styleItalics = 4;
 218  *      static final int styleSuper=8;
 219  *      static final int styleSub = 16;
 220  *
 221  *      static class StyleRun {
 222  *          int limit;
 223  *          int style;
 224  *
 225  *          public StyleRun(int limit, int style) {
 226  *              this.limit = limit;
 227  *              this.style = style;
 228  *          }
 229  *      }
 230  *
 231  *      static class Bounds {
 232  *          int start;
 233  *          int limit;
 234  *
 235  *          public Bounds(int start, int limit) {
 236  *              this.start = start;
 237  *              this.limit = limit;
 238  *          }
 239  *      }
 240  *
 241  *      static int getTextWidth(String text, int start, int limit,
 242  *                              StyleRun[] styleRuns, int styleRunCount) {
 243  *          // simplistic way to compute the width
 244  *          return limit - start;
 245  *      }
 246  *
 247  *      // set limit and StyleRun limit for a line
 248  *      // from text[start] and from styleRuns[styleRunStart]
 249  *      // using Bidi.getLogicalRun(...)
 250  *      // returns line width
 251  *      static int getLineBreak(String text, Bounds line, Bidi para,
 252  *                              StyleRun styleRuns[], Bounds styleRun) {
 253  *          // dummy return
 254  *          return 0;
 255  *      }
 256  *
 257  *      // render runs on a line sequentially, always from left to right
 258  *
 259  *      // prepare rendering a new line
 260  *      static void startLine(byte textDirection, int lineWidth) {
 261  *          System.out.println();
 262  *      }
 263  *
 264  *      // render a run of text and advance to the right by the run width
 265  *      // the text[start..limit-1] is always in logical order
 266  *      static void renderRun(String text, int start, int limit,
 267  *                            byte textDirection, int style) {
 268  *      }
 269  *
 270  *      // We could compute a cross-product
 271  *      // from the style runs with the directional runs
 272  *      // and then reorder it.
 273  *      // Instead, here we iterate over each run type
 274  *      // and render the intersections -
 275  *      // with shortcuts in simple (and common) cases.
 276  *      // renderParagraph() is the main function.
 277  *
 278  *      // render a directional run with
 279  *      // (possibly) multiple style runs intersecting with it
 280  *      static void renderDirectionalRun(String text, int start, int limit,
 281  *                                       byte direction, StyleRun styleRuns[],
 282  *                                       int styleRunCount) {
 283  *          int i;
 284  *
 285  *          // iterate over style runs
 286  *          if (direction == Bidi.LTR) {
 287  *              int styleLimit;
 288  *              for (i = 0; i < styleRunCount; ++i) {
 289  *                  styleLimit = styleRuns[i].limit;
 290  *                  if (start < styleLimit) {
 291  *                      if (styleLimit > limit) {
 292  *                          styleLimit = limit;
 293  *                      }
 294  *                      renderRun(text, start, styleLimit,
 295  *                                direction, styleRuns[i].style);
 296  *                      if (styleLimit == limit) {
 297  *                          break;
 298  *                      }
 299  *                      start = styleLimit;
 300  *                  }
 301  *              }
 302  *          } else {
 303  *              int styleStart;
 304  *
 305  *              for (i = styleRunCount-1; i >= 0; --i) {
 306  *                  if (i > 0) {
 307  *                      styleStart = styleRuns[i-1].limit;
 308  *                  } else {
 309  *                      styleStart = 0;
 310  *                  }
 311  *                  if (limit >= styleStart) {
 312  *                      if (styleStart < start) {
 313  *                          styleStart = start;
 314  *                      }
 315  *                      renderRun(text, styleStart, limit, direction,
 316  *                                styleRuns[i].style);
 317  *                      if (styleStart == start) {
 318  *                          break;
 319  *                      }
 320  *                      limit = styleStart;
 321  *                  }
 322  *              }
 323  *          }
 324  *      }
 325  *
 326  *      // the line object represents text[start..limit-1]
 327  *      static void renderLine(Bidi line, String text, int start, int limit,
 328  *                             StyleRun styleRuns[], int styleRunCount) {
 329  *          byte direction = line.getDirection();
 330  *          if (direction != Bidi.MIXED) {
 331  *              // unidirectional
 332  *              if (styleRunCount <= 1) {
 333  *                  renderRun(text, start, limit, direction, styleRuns[0].style);
 334  *              } else {
 335  *                  renderDirectionalRun(text, start, limit, direction,
 336  *                                       styleRuns, styleRunCount);
 337  *              }
 338  *          } else {
 339  *              // mixed-directional
 340  *              int count, i;
 341  *              BidiRun run;
 342  *
 343  *              try {
 344  *                  count = line.countRuns();
 345  *              } catch (IllegalStateException e) {
 346  *                  e.printStackTrace();
 347  *                  return;
 348  *              }
 349  *              if (styleRunCount <= 1) {
 350  *                  int style = styleRuns[0].style;
 351  *
 352  *                  // iterate over directional runs
 353  *                  for (i = 0; i < count; ++i) {
 354  *                      run = line.getVisualRun(i);
 355  *                      renderRun(text, run.getStart(), run.getLimit(),
 356  *                                run.getDirection(), style);
 357  *                  }
 358  *              } else {
 359  *                  // iterate over both directional and style runs
 360  *                  for (i = 0; i < count; ++i) {
 361  *                      run = line.getVisualRun(i);
 362  *                      renderDirectionalRun(text, run.getStart(),
 363  *                                           run.getLimit(), run.getDirection(),
 364  *                                           styleRuns, styleRunCount);
 365  *                  }
 366  *              }
 367  *          }
 368  *      }
 369  *
 370  *      static void renderParagraph(String text, byte textDirection,
 371  *                                  StyleRun styleRuns[], int styleRunCount,
 372  *                                  int lineWidth) {
 373  *          int length = text.length();
 374  *          Bidi para = new Bidi();
 375  *          try {
 376  *              para.setPara(text,
 377  *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
 378  *                                              : Bidi.LEVEL_DEFAULT_LTR,
 379  *                           null);
 380  *          } catch (Exception e) {
 381  *              e.printStackTrace();
 382  *              return;
 383  *          }
 384  *          byte paraLevel = (byte)(1 & para.getParaLevel());
 385  *          StyleRun styleRun = new StyleRun(length, styleNormal);
 386  *
 387  *          if (styleRuns == null || styleRunCount <= 0) {
 388  *              styleRuns = new StyleRun[1];
 389  *              styleRunCount = 1;
 390  *              styleRuns[0] = styleRun;
 391  *          }
 392  *          // assume styleRuns[styleRunCount-1].limit>=length
 393  *
 394  *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
 395  *          if (width <= lineWidth) {
 396  *              // everything fits onto one line
 397  *
 398  *              // prepare rendering a new line from either left or right
 399  *              startLine(paraLevel, width);
 400  *
 401  *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
 402  *          } else {
 403  *              // we need to render several lines
 404  *              Bidi line = new Bidi(length, 0);
 405  *              int start = 0, limit;
 406  *              int styleRunStart = 0, styleRunLimit;
 407  *
 408  *              for (;;) {
 409  *                  limit = length;
 410  *                  styleRunLimit = styleRunCount;
 411  *                  width = getLineBreak(text, new Bounds(start, limit),
 412  *                                       para, styleRuns,
 413  *                                       new Bounds(styleRunStart, styleRunLimit));
 414  *                  try {
 415  *                      line = para.setLine(start, limit);
 416  *                  } catch (Exception e) {
 417  *                      e.printStackTrace();
 418  *                      return;
 419  *                  }
 420  *                  // prepare rendering a new line
 421  *                  // from either left or right
 422  *                  startLine(paraLevel, width);
 423  *
 424  *                  if (styleRunStart > 0) {
 425  *                      int newRunCount = styleRuns.length - styleRunStart;
 426  *                      StyleRun[] newRuns = new StyleRun[newRunCount];
 427  *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
 428  *                                       newRunCount);
 429  *                      renderLine(line, text, start, limit, newRuns,
 430  *                                 styleRunLimit - styleRunStart);
 431  *                  } else {
 432  *                      renderLine(line, text, start, limit, styleRuns,
 433  *                                 styleRunLimit - styleRunStart);
 434  *                  }
 435  *                  if (limit == length) {
 436  *                      break;
 437  *                  }
 438  *                  start = limit;
 439  *                  styleRunStart = styleRunLimit - 1;
 440  *                  if (start >= styleRuns[styleRunStart].limit) {
 441  *                      ++styleRunStart;
 442  *                  }
 443  *              }
 444  *          }
 445  *      }
 446  *
 447  *      public static void main(String[] args)
 448  *      {
 449  *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
 450  *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
 451  *      }
 452  *  }
 453  *
 454  * </pre>
 455  */
 456 
 457 public class BidiBase {
 458 
 459     class Point {
 460         int pos;    /* position in text */
 461         int flag;   /* flag for LRM/RLM, before/after */
 462     }
 463 
 464     class InsertPoints {
 465         int size;
 466         int confirmed;
 467         Point[] points = new Point[0];
 468     }
 469 
 470     /** Paragraph level setting<p>
 471      *
 472      * Constant indicating that the base direction depends on the first strong
 473      * directional character in the text according to the Unicode Bidirectional
 474      * Algorithm. If no strong directional character is present,
 475      * then set the paragraph level to 0 (left-to-right).<p>
 476      *
 477      * If this value is used in conjunction with reordering modes
 478      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
 479      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 480      * is assumed to be visual LTR, and the text after reordering is required
 481      * to be the corresponding logical string with appropriate contextual
 482      * direction. The direction of the result string will be RTL if either
 483      * the righmost or leftmost strong character of the source text is RTL
 484      * or Arabic Letter, the direction will be LTR otherwise.<p>
 485      *
 486      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
 487      * be added at the beginning of the result string to ensure round trip
 488      * (that the result string, when reordered back to visual, will produce
 489      * the original source text).
 490      * @see #REORDER_INVERSE_LIKE_DIRECT
 491      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 492      * @stable ICU 3.8
 493      */
 494     public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e;
 495 
 496     /** Paragraph level setting<p>
 497      *
 498      * Constant indicating that the base direction depends on the first strong
 499      * directional character in the text according to the Unicode Bidirectional
 500      * Algorithm. If no strong directional character is present,
 501      * then set the paragraph level to 1 (right-to-left).<p>
 502      *
 503      * If this value is used in conjunction with reordering modes
 504      * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
 505      * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
 506      * is assumed to be visual LTR, and the text after reordering is required
 507      * to be the corresponding logical string with appropriate contextual
 508      * direction. The direction of the result string will be RTL if either
 509      * the righmost or leftmost strong character of the source text is RTL
 510      * or Arabic Letter, or if the text contains no strong character;
 511      * the direction will be LTR otherwise.<p>
 512      *
 513      * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
 514      * be added at the beginning of the result string to ensure round trip
 515      * (that the result string, when reordered back to visual, will produce
 516      * the original source text).
 517      * @see #REORDER_INVERSE_LIKE_DIRECT
 518      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 519      * @stable ICU 3.8
 520      */
 521     public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f;
 522 
 523     /**
 524      * Maximum explicit embedding level.
 525      * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
 526      * @stable ICU 3.8
 527      */
 528     public static final byte MAX_EXPLICIT_LEVEL = 61;
 529 
 530     /**
 531      * Bit flag for level input.
 532      * Overrides directional properties.
 533      * @stable ICU 3.8
 534      */
 535     public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80;
 536 
 537     /**
 538      * Special value which can be returned by the mapping methods when a
 539      * logical index has no corresponding visual index or vice-versa. This may
 540      * happen for the logical-to-visual mapping of a Bidi control when option
 541      * <code>OPTION_REMOVE_CONTROLS</code> is
 542      * specified. This can also happen for the visual-to-logical mapping of a
 543      * Bidi mark (LRM or RLM) inserted by option
 544      * <code>OPTION_INSERT_MARKS</code>.
 545      * @see #getVisualIndex
 546      * @see #getVisualMap
 547      * @see #getLogicalIndex
 548      * @see #getLogicalMap
 549      * @see #OPTION_INSERT_MARKS
 550      * @see #OPTION_REMOVE_CONTROLS
 551      * @stable ICU 3.8
 552      */
 553     public static final int MAP_NOWHERE = -1;
 554 
 555     /**
 556      * Mixed-directional text.
 557      * @stable ICU 3.8
 558      */
 559     public static final byte MIXED = 2;
 560 
 561     /**
 562      * option bit for writeReordered():
 563      * replace characters with the "mirrored" property in RTL runs
 564      * by their mirror-image mappings
 565      *
 566      * @see #writeReordered
 567      * @stable ICU 3.8
 568      */
 569     public static final short DO_MIRRORING = 2;
 570 
 571     /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
 572      * @see #setReorderingMode
 573      * @stable ICU 3.8
 574      */
 575     private static final short REORDER_DEFAULT = 0;
 576 
 577     /** Reordering mode: Logical to Visual algorithm which handles numbers in
 578      * a way which mimicks the behavior of Windows XP.
 579      * @see #setReorderingMode
 580      * @stable ICU 3.8
 581      */
 582     private static final short REORDER_NUMBERS_SPECIAL = 1;
 583 
 584     /** Reordering mode: Logical to Visual algorithm grouping numbers with
 585      * adjacent R characters (reversible algorithm).
 586      * @see #setReorderingMode
 587      * @stable ICU 3.8
 588      */
 589     private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
 590 
 591     /** Reordering mode: Reorder runs only to transform a Logical LTR string
 592      * to the logical RTL string with the same display, or vice-versa.<br>
 593      * If this mode is set together with option
 594      * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
 595      * text may be removed and other controls may be added to produce the
 596      * minimum combination which has the required display.
 597      * @see #OPTION_INSERT_MARKS
 598      * @see #setReorderingMode
 599      * @stable ICU 3.8
 600      */
 601     private static final short REORDER_RUNS_ONLY = 3;
 602 
 603     /** Reordering mode: Visual to Logical algorithm which handles numbers
 604      * like L (same algorithm as selected by <code>setInverse(true)</code>.
 605      * @see #setInverse
 606      * @see #setReorderingMode
 607      * @stable ICU 3.8
 608      */
 609     private static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
 610 
 611     /** Reordering mode: Visual to Logical algorithm equivalent to the regular
 612      * Logical to Visual algorithm.
 613      * @see #setReorderingMode
 614      * @stable ICU 3.8
 615      */
 616     private static final short REORDER_INVERSE_LIKE_DIRECT = 5;
 617 
 618     /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
 619      * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
 620      * @see #setReorderingMode
 621      * @stable ICU 3.8
 622      */
 623     private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
 624 
 625     /* Reordering mode values must be ordered so that all the regular logical to
 626      * visual modes come first, and all inverse Bidi modes come last.
 627      */
 628     private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
 629             REORDER_NUMBERS_SPECIAL;
 630 
 631     /**
 632      * Option bit for <code>setReorderingOptions</code>:
 633      * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
 634      * a reordering to a Logical order
 635      *
 636      * <p>This option must be set or reset before calling
 637      * <code>setPara</code>.</p>
 638      *
 639      * <p>This option is significant only with reordering modes which generate
 640      * a result with Logical order, specifically.</p>
 641      * <ul>
 642      *   <li><code>REORDER_RUNS_ONLY</code></li>
 643      *   <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
 644      *   <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
 645      *   <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
 646      * </ul>
 647      *
 648      * <p>If this option is set in conjunction with reordering mode
 649      * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
 650      * <code>setInverse(true)</code>, it implies option
 651      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 652      * <code>writeReordered()</code>.</p>
 653      *
 654      * <p>For other reordering modes, a minimum number of LRM or RLM characters
 655      * will be added to the source text after reordering it so as to ensure
 656      * round trip, i.e. when applying the inverse reordering mode on the
 657      * resulting logical text with removal of Bidi marks
 658      * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
 659      * <code>setPara()</code> or option
 660      * <code>REMOVE_BIDI_CONTROLS</code> in
 661      * <code>writeReordered</code>), the result will be identical to the
 662      * source text in the first transformation.
 663      *
 664      * <p>This option will be ignored if specified together with option
 665      * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
 666      * <code>REMOVE_BIDI_CONTROLS</code> in calls to method
 667      * <code>writeReordered()</code> and it implies option
 668      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 669      * <code>writeReordered()</code> if the reordering mode is
 670      * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
 671      *
 672      * @see #setReorderingMode
 673      * @see #setReorderingOptions
 674      * @see #INSERT_LRM_FOR_NUMERIC
 675      * @see #REMOVE_BIDI_CONTROLS
 676      * @see #OPTION_REMOVE_CONTROLS
 677      * @see #REORDER_RUNS_ONLY
 678      * @see #REORDER_INVERSE_NUMBERS_AS_L
 679      * @see #REORDER_INVERSE_LIKE_DIRECT
 680      * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
 681      * @stable ICU 3.8
 682      */
 683     private static final int OPTION_INSERT_MARKS = 1;
 684 
 685     /**
 686      * Option bit for <code>setReorderingOptions</code>:
 687      * remove Bidi control characters
 688      *
 689      * <p>This option must be set or reset before calling
 690      * <code>setPara</code>.</p>
 691      *
 692      * <p>This option nullifies option
 693      * <code>OPTION_INSERT_MARKS</code>. It inhibits option
 694      * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
 695      * <code>writeReordered()</code> and it implies option
 696      * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>
 697      *
 698      * @see #setReorderingMode
 699      * @see #setReorderingOptions
 700      * @see #OPTION_INSERT_MARKS
 701      * @see #INSERT_LRM_FOR_NUMERIC
 702      * @see #REMOVE_BIDI_CONTROLS
 703      * @stable ICU 3.8
 704      */
 705     private static final int OPTION_REMOVE_CONTROLS = 2;
 706 
 707     /**
 708      * Option bit for <code>setReorderingOptions</code>:
 709      * process the output as part of a stream to be continued
 710      *
 711      * <p>This option must be set or reset before calling
 712      * <code>setPara</code>.</p>
 713      *
 714      * <p>This option specifies that the caller is interested in processing
 715      * large text object in parts. The results of the successive calls are
 716      * expected to be concatenated by the caller. Only the call for the last
 717      * part will have this option bit off.</p>
 718      *
 719      * <p>When this option bit is on, <code>setPara()</code> may process
 720      * less than the full source text in order to truncate the text at a
 721      * meaningful boundary. The caller should call
 722      * <code>getProcessedLength()</code> immediately after calling
 723      * <code>setPara()</code> in order to determine how much of the source
 724      * text has been processed. Source text beyond that length should be
 725      * resubmitted in following calls to <code>setPara</code>. The
 726      * processed length may be less than the length of the source text if a
 727      * character preceding the last character of the source text constitutes a
 728      * reasonable boundary (like a block separator) for text to be continued.<br>
 729      * If the last character of the source text constitutes a reasonable
 730      * boundary, the whole text will be processed at once.<br>
 731      * If nowhere in the source text there exists
 732      * such a reasonable boundary, the processed length will be zero.<br>
 733      * The caller should check for such an occurrence and do one of the following:
 734      * <ul><li>submit a larger amount of text with a better chance to include
 735      *         a reasonable boundary.</li>
 736      *     <li>resubmit the same text after turning off option
 737      *         <code>OPTION_STREAMING</code>.</li></ul>
 738      * In all cases, this option should be turned off before processing the last
 739      * part of the text.</p>
 740      *
 741      * <p>When the <code>OPTION_STREAMING</code> option is used, it is
 742      * recommended to call <code>orderParagraphsLTR()</code> with argument
 743      * <code>orderParagraphsLTR</code> set to <code>true</code> before calling
 744      * <code>setPara()</code> so that later paragraphs may be concatenated to
 745      * previous paragraphs on the right.
 746      * </p>
 747      *
 748      * @see #setReorderingMode
 749      * @see #setReorderingOptions
 750      * @see #getProcessedLength
 751      * @see #orderParagraphsLTR
 752      * @stable ICU 3.8
 753      */
 754     private static final int OPTION_STREAMING = 4;
 755 
 756     /*
 757      *   Comparing the description of the Bidi algorithm with this implementation
 758      *   is easier with the same names for the Bidi types in the code as there.
 759      *   See UCharacterDirection
 760      */
 761     private static final byte L   = 0;
 762     private static final byte R   = 1;
 763     private static final byte EN  = 2;
 764     private static final byte ES  = 3;
 765     private static final byte ET  = 4;
 766     private static final byte AN  = 5;
 767     private static final byte CS  = 6;
 768     static final byte B   = 7;
 769     private static final byte S   = 8;
 770     private static final byte WS  = 9;
 771     private static final byte ON  = 10;
 772     private static final byte LRE = 11;
 773     private static final byte LRO = 12;
 774     private static final byte AL  = 13;
 775     private static final byte RLE = 14;
 776     private static final byte RLO = 15;
 777     private static final byte PDF = 16;
 778     private static final byte NSM = 17;
 779     private static final byte BN  = 18;
 780 
 781     private static final int MASK_R_AL = (1 << R | 1 << AL);
 782 
 783     private static final char CR = '\r';
 784     private static final char LF = '\n';
 785 
 786     static final int LRM_BEFORE = 1;
 787     static final int LRM_AFTER = 2;
 788     static final int RLM_BEFORE = 4;
 789     static final int RLM_AFTER = 8;
 790 
 791     /*
 792      * reference to parent paragraph object (reference to self if this object is
 793      * a paragraph object); set to null in a newly opened object; set to a
 794      * real value after a successful execution of setPara or setLine
 795      */
 796     BidiBase                paraBidi;
 797 
 798     final UBiDiProps    bdp;
 799 
 800     /* character array representing the current text */
 801     char[]              text;
 802 
 803     /* length of the current text */
 804     int                 originalLength;
 805 
 806     /* if the option OPTION_STREAMING is set, this is the length of
 807      * text actually processed by <code>setPara</code>, which may be shorter
 808      * than the original length. Otherwise, it is identical to the original
 809      * length.
 810      */
 811     public int                 length;
 812 
 813     /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
 814      * marks are allowed to be inserted in one of the reordering modes, the
 815      * length of the result string may be different from the processed length.
 816      */
 817     int                 resultLength;
 818 
 819     /* indicators for whether memory may be allocated after construction */
 820     boolean             mayAllocateText;
 821     boolean             mayAllocateRuns;
 822 
 823     /* arrays with one value per text-character */
 824     byte[]              dirPropsMemory = new byte[1];
 825     byte[]              levelsMemory = new byte[1];
 826     byte[]              dirProps;
 827     byte[]              levels;
 828 
 829     /* must block separators receive level 0? */
 830     boolean             orderParagraphsLTR;
 831 
 832     /* the paragraph level */
 833     byte                paraLevel;
 834 
 835     /* original paraLevel when contextual */
 836     /* must be one of DEFAULT_xxx or 0 if not contextual */
 837     byte                defaultParaLevel;
 838 
 839     /* the following is set in setPara, used in processPropertySeq */
 840 
 841     ImpTabPair          impTabPair;  /* reference to levels state table pair */
 842 
 843     /* the overall paragraph or line directionality*/
 844     byte                direction;
 845 
 846     /* flags is a bit set for which directional properties are in the text */
 847     int                 flags;
 848 
 849     /* lastArabicPos is index to the last AL in the text, -1 if none */
 850     int                 lastArabicPos;
 851 
 852     /* characters after trailingWSStart are WS and are */
 853     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
 854     int                 trailingWSStart;
 855 
 856     /* fields for paragraph handling */
 857     int                 paraCount;       /* set in getDirProps() */
 858     int[]               parasMemory = new int[1];
 859     int[]               paras;           /* limits of paragraphs, filled in
 860                                           ResolveExplicitLevels() or CheckExplicitLevels() */
 861 
 862     /* for single paragraph text, we only need a tiny array of paras (no allocation) */
 863     int[]               simpleParas = {0};
 864 
 865     /* fields for line reordering */
 866     int                 runCount;     /* ==-1: runs not set up yet */
 867     BidiRun[]           runsMemory = new BidiRun[0];
 868     BidiRun[]           runs;
 869 
 870     /* for non-mixed text, we only need a tiny array of runs (no allocation) */
 871     BidiRun[]           simpleRuns = {new BidiRun()};
 872 
 873     /* mapping of runs in logical order to visual order */
 874     int[]               logicalToVisualRunsMap;
 875 
 876     /* flag to indicate that the map has been updated */
 877     boolean             isGoodLogicalToVisualRunsMap;
 878 
 879     /* for inverse Bidi with insertion of directional marks */
 880     InsertPoints        insertPoints = new InsertPoints();
 881 
 882     /* for option OPTION_REMOVE_CONTROLS */
 883     int                 controlCount;
 884 
 885     /*
 886      * Sometimes, bit values are more appropriate
 887      * to deal with directionality properties.
 888      * Abbreviations in these method names refer to names
 889      * used in the Bidi algorithm.
 890      */
 891     static int DirPropFlag(byte dir) {
 892         return (1 << dir);
 893     }
 894 
 895     /*
 896      * The following bit is ORed to the property of characters in paragraphs
 897      * with contextual RTL direction when paraLevel is contextual.
 898      */
 899     static final byte CONTEXT_RTL_SHIFT = 6;
 900     static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT);   // 0x40
 901     static byte NoContextRTL(byte dir)
 902     {
 903         return (byte)(dir & ~CONTEXT_RTL);
 904     }
 905 
 906     /*
 907      * The following is a variant of DirProp.DirPropFlag() which ignores the
 908      * CONTEXT_RTL bit.
 909      */
 910     static int DirPropFlagNC(byte dir) {
 911         return (1<<(dir & ~CONTEXT_RTL));
 912     }
 913 
 914     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
 915 
 916     /* to avoid some conditional statements, use tiny constant arrays */
 917     static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
 918     static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
 919     static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
 920 
 921     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
 922     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
 923     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
 924 
 925     /*
 926      *  are there any characters that are LTR?
 927      */
 928     static final int MASK_LTR =
 929         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO);
 930 
 931     /*
 932      *  are there any characters that are RTL?
 933      */
 934     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO);
 935 
 936     /* explicit embedding codes */
 937     private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO);
 938     private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO);
 939     private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF);
 940     private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
 941 
 942     /* paragraph and segment separators */
 943     private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
 944 
 945     /* all types that are counted as White Space or Neutral in some steps */
 946     static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT;
 947     private static final int MASK_N = DirPropFlag(ON)|MASK_WS;
 948 
 949     /* types that are neutrals or could becomes neutrals in (Wn) */
 950     private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N;
 951 
 952     /*
 953      * These types may be changed to "e",
 954      * the embedding type (L or R) of the run,
 955      * in the Bidi algorithm (N2)
 956      */
 957     static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
 958 
 959     /*
 960      *  the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
 961      */
 962     private static byte GetLRFromLevel(byte level)
 963     {
 964         return (byte)(level & 1);
 965     }
 966 
 967     private static boolean IsDefaultLevel(byte level)
 968     {
 969         return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR);
 970     }
 971 
 972     byte GetParaLevelAt(int index)
 973     {
 974         return (defaultParaLevel != 0) ?
 975                 (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel;
 976     }
 977 
 978     static boolean IsBidiControlChar(int c)
 979     {
 980         /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
 981                            0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
 982         return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
 983     }
 984 
 985     public void verifyValidPara()
 986     {
 987         if (this != this.paraBidi) {
 988             throw new IllegalStateException("");
 989         }
 990     }
 991 
 992     public void verifyValidParaOrLine()
 993     {
 994         BidiBase para = this.paraBidi;
 995         /* verify Para */
 996         if (this == para) {
 997             return;
 998         }
 999         /* verify Line */
1000         if ((para == null) || (para != para.paraBidi)) {
1001             throw new IllegalStateException();
1002         }
1003     }
1004 
1005     public void verifyRange(int index, int start, int limit)
1006     {
1007         if (index < start || index >= limit) {
1008             throw new IllegalArgumentException("Value " + index +
1009                       " is out of range " + start + " to " + limit);
1010         }
1011     }
1012 
1013     public void verifyIndex(int index, int start, int limit)
1014     {
1015         if (index < start || index >= limit) {
1016             throw new ArrayIndexOutOfBoundsException("Index " + index +
1017                       " is out of range " + start + " to " + limit);
1018         }
1019     }
1020 
1021     /**
1022      * Allocate a <code>Bidi</code> object with preallocated memory
1023      * for internal structures.
1024      * This method provides a <code>Bidi</code> object like the default constructor
1025      * but it also preallocates memory for internal structures
1026      * according to the sizings supplied by the caller.<p>
1027      * The preallocation can be limited to some of the internal memory
1028      * by setting some values to 0 here. That means that if, e.g.,
1029      * <code>maxRunCount</code> cannot be reasonably predetermined and should not
1030      * be set to <code>maxLength</code> (the only failproof value) to avoid
1031      * wasting  memory, then <code>maxRunCount</code> could be set to 0 here
1032      * and the internal structures that are associated with it will be allocated
1033      * on demand, just like with the default constructor.
1034      *
1035      * @param maxLength is the maximum text or line length that internal memory
1036      *        will be preallocated for. An attempt to associate this object with a
1037      *        longer text will fail, unless this value is 0, which leaves the allocation
1038      *        up to the implementation.
1039      *
1040      * @param maxRunCount is the maximum anticipated number of same-level runs
1041      *        that internal memory will be preallocated for. An attempt to access
1042      *        visual runs on an object that was not preallocated for as many runs
1043      *        as the text was actually resolved to will fail,
1044      *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1045      *        The number of runs depends on the actual text and maybe anywhere between
1046      *        1 and <code>maxLength</code>. It is typically small.
1047      *
1048      * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1049      * @stable ICU 3.8
1050      */
1051     public BidiBase(int maxLength, int maxRunCount)
1052      {
1053         /* check the argument values */
1054         if (maxLength < 0 || maxRunCount < 0) {
1055             throw new IllegalArgumentException();
1056         }
1057 
1058         /* reset the object, all reference variables null, all flags false,
1059            all sizes 0.
1060            In fact, we don't need to do anything, since class members are
1061            initialized as zero when an instance is created.
1062          */
1063         /*
1064         mayAllocateText = false;
1065         mayAllocateRuns = false;
1066         orderParagraphsLTR = false;
1067         paraCount = 0;
1068         runCount = 0;
1069         trailingWSStart = 0;
1070         flags = 0;
1071         paraLevel = 0;
1072         defaultParaLevel = 0;
1073         direction = 0;
1074         */
1075         /* get Bidi properties */
1076         try {
1077             bdp = UBiDiProps.getSingleton();
1078         }
1079         catch (IOException e) {
1080             throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
1081         }
1082 
1083         /* allocate memory for arrays as requested */
1084         if (maxLength > 0) {
1085             getInitialDirPropsMemory(maxLength);
1086             getInitialLevelsMemory(maxLength);
1087         } else {
1088             mayAllocateText = true;
1089         }
1090 
1091         if (maxRunCount > 0) {
1092             // if maxRunCount == 1, use simpleRuns[]
1093             if (maxRunCount > 1) {
1094                 getInitialRunsMemory(maxRunCount);
1095             }
1096         } else {
1097             mayAllocateRuns = true;
1098         }
1099     }
1100 
1101     /*
1102      * We are allowed to allocate memory if object==null or
1103      * mayAllocate==true for each array that we need.
1104      *
1105      * Assume sizeNeeded>0.
1106      * If object != null, then assume size > 0.
1107      */
1108     private Object getMemory(String label, Object array, Class<?> arrayClass,
1109             boolean mayAllocate, int sizeNeeded)
1110     {
1111         int len = Array.getLength(array);
1112 
1113         /* we have at least enough memory and must not allocate */
1114         if (sizeNeeded == len) {
1115             return array;
1116         }
1117         if (!mayAllocate) {
1118             /* we must not allocate */
1119             if (sizeNeeded <= len) {
1120                 return array;
1121             }
1122             throw new OutOfMemoryError("Failed to allocate memory for "
1123                                        + label);
1124         }
1125         /* we may try to grow or shrink */
1126         /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1127            the allocation altogether and rely on this.length */
1128         try {
1129             return Array.newInstance(arrayClass, sizeNeeded);
1130         } catch (Exception e) {
1131             throw new OutOfMemoryError("Failed to allocate memory for "
1132                                        + label);
1133         }
1134     }
1135 
1136     /* helper methods for each allocated array */
1137     private void getDirPropsMemory(boolean mayAllocate, int len)
1138     {
1139         Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1140         dirPropsMemory = (byte[]) array;
1141     }
1142 
1143     void getDirPropsMemory(int len)
1144     {
1145         getDirPropsMemory(mayAllocateText, len);
1146     }
1147 
1148     private void getLevelsMemory(boolean mayAllocate, int len)
1149     {
1150         Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1151         levelsMemory = (byte[]) array;
1152     }
1153 
1154     void getLevelsMemory(int len)
1155     {
1156         getLevelsMemory(mayAllocateText, len);
1157     }
1158 
1159     private void getRunsMemory(boolean mayAllocate, int len)
1160     {
1161         Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1162         runsMemory = (BidiRun[]) array;
1163     }
1164 
1165     void getRunsMemory(int len)
1166     {
1167         getRunsMemory(mayAllocateRuns, len);
1168     }
1169 
1170     /* additional methods used by constructor - always allow allocation */
1171     private void getInitialDirPropsMemory(int len)
1172     {
1173         getDirPropsMemory(true, len);
1174     }
1175 
1176     private void getInitialLevelsMemory(int len)
1177     {
1178         getLevelsMemory(true, len);
1179     }
1180 
1181     private void getInitialParasMemory(int len)
1182     {
1183         Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len);
1184         parasMemory = (int[]) array;
1185     }
1186 
1187     private void getInitialRunsMemory(int len)
1188     {
1189         getRunsMemory(true, len);
1190     }
1191 
1192 /* perform (P2)..(P3) ------------------------------------------------------- */
1193 
1194     private void getDirProps()
1195     {
1196         int i = 0, i0, i1;
1197         flags = 0;          /* collect all directionalities in the text */
1198         int uchar;
1199         byte dirProp;
1200         byte paraDirDefault = 0;   /* initialize to avoid compiler warnings */
1201         boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1202         /* for inverse Bidi, the default para level is set to RTL if there is a
1203            strong R or AL character at either end of the text                */
1204         lastArabicPos = -1;
1205         controlCount = 0;
1206 
1207         final int NOT_CONTEXTUAL = 0;         /* 0: not contextual paraLevel */
1208         final int LOOKING_FOR_STRONG = 1;     /* 1: looking for first strong char */
1209         final int FOUND_STRONG_CHAR = 2;      /* 2: found first strong char       */
1210 
1211         int state;
1212         int paraStart = 0;                    /* index of first char in paragraph */
1213         byte paraDir;                         /* == CONTEXT_RTL within paragraphs
1214                                                  starting with strong R char      */
1215         byte lastStrongDir=0;                 /* for default level & inverse Bidi */
1216         int lastStrongLTR=0;                  /* for STREAMING option             */
1217 
1218         if (isDefaultLevel) {
1219             paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0;
1220             paraDir = paraDirDefault;
1221             lastStrongDir = paraDirDefault;
1222             state = LOOKING_FOR_STRONG;
1223         } else {
1224             state = NOT_CONTEXTUAL;
1225             paraDir = 0;
1226         }
1227         /* count paragraphs and determine the paragraph level (P2..P3) */
1228         /*
1229          * see comment on constant fields:
1230          * the LEVEL_DEFAULT_XXX values are designed so that
1231          * their low-order bit alone yields the intended default
1232          */
1233 
1234         for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1235             i0 = i;                     /* index of first code unit */
1236             uchar = UTF16.charAt(text, 0, originalLength, i);
1237             i += Character.charCount(uchar);
1238             i1 = i - 1; /* index of last code unit, gets the directional property */
1239 
1240             dirProp = (byte)bdp.getClass(uchar);
1241 
1242             flags |= DirPropFlag(dirProp);
1243             dirProps[i1] = (byte)(dirProp | paraDir);
1244             if (i1 > i0) {     /* set previous code units' properties to BN */
1245                 flags |= DirPropFlag(BN);
1246                 do {
1247                     dirProps[--i1] = (byte)(BN | paraDir);
1248                 } while (i1 > i0);
1249             }
1250             if (state == LOOKING_FOR_STRONG) {
1251                 if (dirProp == L) {
1252                     state = FOUND_STRONG_CHAR;
1253                     if (paraDir != 0) {
1254                         paraDir = 0;
1255                         for (i1 = paraStart; i1 < i; i1++) {
1256                             dirProps[i1] &= ~CONTEXT_RTL;
1257                         }
1258                     }
1259                     continue;
1260                 }
1261                 if (dirProp == R || dirProp == AL) {
1262                     state = FOUND_STRONG_CHAR;
1263                     if (paraDir == 0) {
1264                         paraDir = CONTEXT_RTL;
1265                         for (i1 = paraStart; i1 < i; i1++) {
1266                             dirProps[i1] |= CONTEXT_RTL;
1267                         }
1268                     }
1269                     continue;
1270                 }
1271             }
1272             if (dirProp == L) {
1273                 lastStrongDir = 0;
1274                 lastStrongLTR = i;      /* i is index to next character */
1275             }
1276             else if (dirProp == R) {
1277                 lastStrongDir = CONTEXT_RTL;
1278             }
1279             else if (dirProp == AL) {
1280                 lastStrongDir = CONTEXT_RTL;
1281                 lastArabicPos = i-1;
1282             }
1283             else if (dirProp == B) {
1284                 if (i < originalLength) {   /* B not last char in text */
1285                     if (!((uchar == (int)CR) && (text[i] == (int)LF))) {
1286                         paraCount++;
1287                     }
1288                     if (isDefaultLevel) {
1289                         state=LOOKING_FOR_STRONG;
1290                         paraStart = i;        /* i is index to next character */
1291                         paraDir = paraDirDefault;
1292                         lastStrongDir = paraDirDefault;
1293                     }
1294                 }
1295             }
1296         }
1297         if (isDefaultLevel) {
1298             paraLevel = GetParaLevelAt(0);
1299         }
1300 
1301         /* The following line does nothing new for contextual paraLevel, but is
1302            needed for absolute paraLevel.                               */
1303         flags |= DirPropFlagLR(paraLevel);
1304 
1305         if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1306             flags |= DirPropFlag(L);
1307         }
1308     }
1309 
1310     /* perform (X1)..(X9) ------------------------------------------------------- */
1311 
1312     /* determine if the text is mixed-directional or single-directional */
1313     private byte directionFromFlags() {
1314         /* if the text contains AN and neutrals, then some neutrals may become RTL */
1315         if (!((flags & MASK_RTL) != 0 ||
1316               ((flags & DirPropFlag(AN)) != 0 &&
1317                (flags & MASK_POSSIBLE_N) != 0))) {
1318             return Bidi.DIRECTION_LEFT_TO_RIGHT;
1319         } else if ((flags & MASK_LTR) == 0) {
1320             return Bidi.DIRECTION_RIGHT_TO_LEFT;
1321         } else {
1322             return MIXED;
1323         }
1324     }
1325 
1326     /*
1327      * Resolve the explicit levels as specified by explicit embedding codes.
1328      * Recalculate the flags to have them reflect the real properties
1329      * after taking the explicit embeddings into account.
1330      *
1331      * The Bidi algorithm is designed to result in the same behavior whether embedding
1332      * levels are externally specified (from "styled text", supposedly the preferred
1333      * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
1334      * That is why (X9) instructs to remove all explicit codes (and BN).
1335      * However, in a real implementation, this removal of these codes and their index
1336      * positions in the plain text is undesirable since it would result in
1337      * reallocated, reindexed text.
1338      * Instead, this implementation leaves the codes in there and just ignores them
1339      * in the subsequent processing.
1340      * In order to get the same reordering behavior, positions with a BN or an
1341      * explicit embedding code just get the same level assigned as the last "real"
1342      * character.
1343      *
1344      * Some implementations, not this one, then overwrite some of these
1345      * directionality properties at "real" same-level-run boundaries by
1346      * L or R codes so that the resolution of weak types can be performed on the
1347      * entire paragraph at once instead of having to parse it once more and
1348      * perform that resolution on same-level-runs.
1349      * This limits the scope of the implicit rules in effectively
1350      * the same way as the run limits.
1351      *
1352      * Instead, this implementation does not modify these codes.
1353      * On one hand, the paragraph has to be scanned for same-level-runs, but
1354      * on the other hand, this saves another loop to reset these codes,
1355      * or saves making and modifying a copy of dirProps[].
1356      *
1357      *
1358      * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
1359      *
1360      *
1361      * Handling the stack of explicit levels (Xn):
1362      *
1363      * With the Bidi stack of explicit levels,
1364      * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
1365      * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61.
1366      *
1367      * In order to have a correct push-pop semantics even in the case of overflows,
1368      * there are two overflow counters:
1369      * - countOver60 is incremented with each LRx at level 60
1370      * - from level 60, one RLx increases the level to 61
1371      * - countOver61 is incremented with each LRx and RLx at level 61
1372      *
1373      * Popping levels with PDF must work in the opposite order so that level 61
1374      * is correct at the correct point. Underflows (too many PDFs) must be checked.
1375      *
1376      * This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
1377      */
1378     private byte resolveExplicitLevels() {
1379         int i = 0;
1380         byte dirProp;
1381         byte level = GetParaLevelAt(0);
1382 
1383         byte dirct;
1384         int paraIndex = 0;
1385 
1386         /* determine if the text is mixed-directional or single-directional */
1387         dirct = directionFromFlags();
1388 
1389         /* we may not need to resolve any explicit levels, but for multiple
1390            paragraphs we want to loop on all chars to set the para boundaries */
1391         if ((dirct != MIXED) && (paraCount == 1)) {
1392             /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1393         } else if ((paraCount == 1) &&
1394                    ((flags & MASK_EXPLICIT) == 0)) {
1395             /* mixed, but all characters are at the same embedding level */
1396             /* or we are in "inverse Bidi" */
1397             /* and we don't have contextual multiple paragraphs with some B char */
1398             /* set all levels to the paragraph level */
1399             for (i = 0; i < length; ++i) {
1400                 levels[i] = level;
1401             }
1402         } else {
1403             /* continue to perform (Xn) */
1404 
1405             /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1406             /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
1407             byte embeddingLevel = level;
1408             byte newLevel;
1409             byte stackTop = 0;
1410 
1411             byte[] stack = new byte[MAX_EXPLICIT_LEVEL];    /* we never push anything >=MAX_EXPLICIT_LEVEL */
1412             int countOver60 = 0;
1413             int countOver61 = 0;  /* count overflows of explicit levels */
1414 
1415             /* recalculate the flags */
1416             flags = 0;
1417 
1418             for (i = 0; i < length; ++i) {
1419                 dirProp = NoContextRTL(dirProps[i]);
1420                 switch(dirProp) {
1421                 case LRE:
1422                 case LRO:
1423                     /* (X3, X5) */
1424                     newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */
1425                     if (newLevel <= MAX_EXPLICIT_LEVEL) {
1426                         stack[stackTop] = embeddingLevel;
1427                         ++stackTop;
1428                         embeddingLevel = newLevel;
1429                         if (dirProp == LRO) {
1430                             embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1431                         }
1432                         /* we don't need to set LEVEL_OVERRIDE off for LRE
1433                            since this has already been done for newLevel which is
1434                            the source for embeddingLevel.
1435                          */
1436                     } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) {
1437                         ++countOver61;
1438                     } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ {
1439                         ++countOver60;
1440                     }
1441                     flags |= DirPropFlag(BN);
1442                     break;
1443                 case RLE:
1444                 case RLO:
1445                     /* (X2, X4) */
1446                     newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */
1447                     if (newLevel<=MAX_EXPLICIT_LEVEL) {
1448                         stack[stackTop] = embeddingLevel;
1449                         ++stackTop;
1450                         embeddingLevel = newLevel;
1451                         if (dirProp == RLO) {
1452                             embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1453                         }
1454                         /* we don't need to set LEVEL_OVERRIDE off for RLE
1455                            since this has already been done for newLevel which is
1456                            the source for embeddingLevel.
1457                          */
1458                     } else {
1459                         ++countOver61;
1460                     }
1461                     flags |= DirPropFlag(BN);
1462                     break;
1463                 case PDF:
1464                     /* (X7) */
1465                     /* handle all the overflow cases first */
1466                     if (countOver61 > 0) {
1467                         --countOver61;
1468                     } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) {
1469                         /* handle LRx overflows from level 60 */
1470                         --countOver60;
1471                     } else if (stackTop > 0) {
1472                         /* this is the pop operation; it also pops level 61 while countOver60>0 */
1473                         --stackTop;
1474                         embeddingLevel = stack[stackTop];
1475                     /* } else { (underflow) */
1476                     }
1477                     flags |= DirPropFlag(BN);
1478                     break;
1479                 case B:
1480                     stackTop = 0;
1481                     countOver60 = 0;
1482                     countOver61 = 0;
1483                     level = GetParaLevelAt(i);
1484                     if ((i + 1) < length) {
1485                         embeddingLevel = GetParaLevelAt(i+1);
1486                         if (!((text[i] == CR) && (text[i + 1] == LF))) {
1487                             paras[paraIndex++] = i+1;
1488                         }
1489                     }
1490                     flags |= DirPropFlag(B);
1491                     break;
1492                 case BN:
1493                     /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1494                     /* they will get their levels set correctly in adjustWSLevels() */
1495                     flags |= DirPropFlag(BN);
1496                     break;
1497                 default:
1498                     /* all other types get the "real" level */
1499                     if (level != embeddingLevel) {
1500                         level = embeddingLevel;
1501                         if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1502                             flags |= DirPropFlagO(level) | DirPropFlagMultiRuns;
1503                         } else {
1504                             flags |= DirPropFlagE(level) | DirPropFlagMultiRuns;
1505                         }
1506                     }
1507                     if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
1508                         flags |= DirPropFlag(dirProp);
1509                     }
1510                     break;
1511                 }
1512 
1513                 /*
1514                  * We need to set reasonable levels even on BN codes and
1515                  * explicit codes because we will later look at same-level runs (X10).
1516                  */
1517                 levels[i] = level;
1518             }
1519             if ((flags & MASK_EMBEDDING) != 0) {
1520                 flags |= DirPropFlagLR(paraLevel);
1521             }
1522             if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1523                 flags |= DirPropFlag(L);
1524             }
1525 
1526             /* subsequently, ignore the explicit codes and BN (X9) */
1527 
1528             /* again, determine if the text is mixed-directional or single-directional */
1529             dirct = directionFromFlags();
1530         }
1531 
1532         return dirct;
1533     }
1534 
1535     /*
1536      * Use a pre-specified embedding levels array:
1537      *
1538      * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1539      * ignore all explicit codes (X9),
1540      * and check all the preset levels.
1541      *
1542      * Recalculate the flags to have them reflect the real properties
1543      * after taking the explicit embeddings into account.
1544      */
1545     private byte checkExplicitLevels() {
1546         byte dirProp;
1547         int i;
1548         this.flags = 0;     /* collect all directionalities in the text */
1549         byte level;
1550         int paraIndex = 0;
1551 
1552         for (i = 0; i < length; ++i) {
1553             if (levels[i] == 0) {
1554                 levels[i] = paraLevel;
1555             }
1556             if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
1557                 if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) {
1558                     levels[i] =  (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE);
1559                 } else {
1560                     levels[i] = paraLevel;
1561                 }
1562             }
1563             level = levels[i];
1564             dirProp = NoContextRTL(dirProps[i]);
1565             if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1566                 /* keep the override flag in levels[i] but adjust the flags */
1567                 level &= ~INTERNAL_LEVEL_OVERRIDE;     /* make the range check below simpler */
1568                 flags |= DirPropFlagO(level);
1569             } else {
1570                 /* set the flags */
1571                 flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
1572             }
1573 
1574             if ((level < GetParaLevelAt(i) &&
1575                     !((0 == level) && (dirProp == B))) ||
1576                     (MAX_EXPLICIT_LEVEL <level)) {
1577                 /* level out of bounds */
1578                 throw new IllegalArgumentException("level " + level +
1579                                                    " out of bounds at index " + i);
1580             }
1581             if ((dirProp == B) && ((i + 1) < length)) {
1582                 if (!((text[i] == CR) && (text[i + 1] == LF))) {
1583                     paras[paraIndex++] = i + 1;
1584                 }
1585             }
1586         }
1587         if ((flags&MASK_EMBEDDING) != 0) {
1588             flags |= DirPropFlagLR(paraLevel);
1589         }
1590 
1591         /* determine if the text is mixed-directional or single-directional */
1592         return directionFromFlags();
1593     }
1594 
1595     /*********************************************************************/
1596     /* The Properties state machine table                                */
1597     /*********************************************************************/
1598     /*                                                                   */
1599     /* All table cells are 8 bits:                                       */
1600     /*      bits 0..4:  next state                                       */
1601     /*      bits 5..7:  action to perform (if > 0)                       */
1602     /*                                                                   */
1603     /* Cells may be of format "n" where n represents the next state      */
1604     /* (except for the rightmost column).                                */
1605     /* Cells may also be of format "_(x,y)" where x represents an action */
1606     /* to perform and y represents the next state.                       */
1607     /*                                                                   */
1608     /*********************************************************************/
1609     /* Definitions and type for properties state tables                  */
1610     /*********************************************************************/
1611     private static final int IMPTABPROPS_COLUMNS = 14;
1612     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
1613     private static short GetStateProps(short cell) {
1614         return (short)(cell & 0x1f);
1615     }
1616     private static short GetActionProps(short cell) {
1617         return (short)(cell >> 5);
1618     }
1619 
1620     private static final short groupProp[] =          /* dirProp regrouped */
1621     {
1622         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
1623         0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
1624     };
1625     private static final short _L  = 0;
1626     private static final short _R  = 1;
1627     private static final short _EN = 2;
1628     private static final short _AN = 3;
1629     private static final short _ON = 4;
1630     private static final short _S  = 5;
1631     private static final short _B  = 6; /* reduced dirProp */
1632 
1633     /*********************************************************************/
1634     /*                                                                   */
1635     /*      PROPERTIES  STATE  TABLE                                     */
1636     /*                                                                   */
1637     /* In table impTabProps,                                             */
1638     /*      - the ON column regroups ON and WS                           */
1639     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
1640     /*      - the Res column is the reduced property assigned to a run   */
1641     /*                                                                   */
1642     /* Action 1: process current run1, init new run1                     */
1643     /*        2: init new run2                                           */
1644     /*        3: process run1, process run2, init new run1               */
1645     /*        4: process run1, set run1=run2, init new run2              */
1646     /*                                                                   */
1647     /* Notes:                                                            */
1648     /*  1) This table is used in resolveImplicitLevels().                */
1649     /*  2) This table triggers actions when there is a change in the Bidi*/
1650     /*     property of incoming characters (action 1).                   */
1651     /*  3) Most such property sequences are processed immediately (in    */
1652     /*     fact, passed to processPropertySeq().                         */
1653     /*  4) However, numbers are assembled as one sequence. This means    */
1654     /*     that undefined situations (like CS following digits, until    */
1655     /*     it is known if the next char will be a digit) are held until  */
1656     /*     following chars define them.                                  */
1657     /*     Example: digits followed by CS, then comes another CS or ON;  */
1658     /*              the digits will be processed, then the CS assigned   */
1659     /*              as the start of an ON sequence (action 3).           */
1660     /*  5) There are cases where more than one sequence must be          */
1661     /*     processed, for instance digits followed by CS followed by L:  */
1662     /*     the digits must be processed as one sequence, and the CS      */
1663     /*     must be processed as an ON sequence, all this before starting */
1664     /*     assembling chars for the opening L sequence.                  */
1665     /*                                                                   */
1666     /*                                                                   */
1667     private static final short impTabProps[][] =
1668     {
1669 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,  Res */
1670 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,  _ON },
1671 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3,   _L },
1672 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3,   _R },
1673 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3,   _R },
1674 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,  _EN },
1675 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3,  _AN },
1676 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,  _AN },
1677 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3,  _ON },
1678 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3,  _ON },
1679 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,  _ON },
1680 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,  _EN },
1681 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,  _EN },
1682 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3,  _AN },
1683 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,  _AN },
1684 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,  _ON },
1685 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3,   _S },
1686 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3,   _S },
1687 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3,   _B }
1688     };
1689 
1690     /*********************************************************************/
1691     /* The levels state machine tables                                   */
1692     /*********************************************************************/
1693     /*                                                                   */
1694     /* All table cells are 8 bits:                                       */
1695     /*      bits 0..3:  next state                                       */
1696     /*      bits 4..7:  action to perform (if > 0)                       */
1697     /*                                                                   */
1698     /* Cells may be of format "n" where n represents the next state      */
1699     /* (except for the rightmost column).                                */
1700     /* Cells may also be of format "_(x,y)" where x represents an action */
1701     /* to perform and y represents the next state.                       */
1702     /*                                                                   */
1703     /* This format limits each table to 16 states each and to 15 actions.*/
1704     /*                                                                   */
1705     /*********************************************************************/
1706     /* Definitions and type for levels state tables                      */
1707     /*********************************************************************/
1708     private static final int IMPTABLEVELS_COLUMNS = _B + 2;
1709     private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
1710     private static short GetState(byte cell) { return (short)(cell & 0x0f); }
1711     private static short GetAction(byte cell) { return (short)(cell >> 4); }
1712 
1713     private static class ImpTabPair {
1714         byte[][][] imptab;
1715         short[][] impact;
1716 
1717         ImpTabPair(byte[][] table1, byte[][] table2,
1718                    short[] act1, short[] act2) {
1719             imptab = new byte[][][] {table1, table2};
1720             impact = new short[][] {act1, act2};
1721         }
1722     }
1723 
1724     /*********************************************************************/
1725     /*                                                                   */
1726     /*      LEVELS  STATE  TABLES                                        */
1727     /*                                                                   */
1728     /* In all levels state tables,                                       */
1729     /*      - state 0 is the initial state                               */
1730     /*      - the Res column is the increment to add to the text level   */
1731     /*        for this property sequence.                                */
1732     /*                                                                   */
1733     /* The impact arrays for each table of a pair map the local action   */
1734     /* numbers of the table to the total list of actions. For instance,  */
1735     /* action 2 in a given table corresponds to the action number which  */
1736     /* appears in entry [2] of the impact array for that table.          */
1737     /* The first entry of all impact arrays must be 0.                   */
1738     /*                                                                   */
1739     /* Action 1: init conditional sequence                               */
1740     /*        2: prepend conditional sequence to current sequence        */
1741     /*        3: set ON sequence to new level - 1                        */
1742     /*        4: init EN/AN/ON sequence                                  */
1743     /*        5: fix EN/AN/ON sequence followed by R                     */
1744     /*        6: set previous level sequence to level 2                  */
1745     /*                                                                   */
1746     /* Notes:                                                            */
1747     /*  1) These tables are used in processPropertySeq(). The input      */
1748     /*     is property sequences as determined by resolveImplicitLevels. */
1749     /*  2) Most such property sequences are processed immediately        */
1750     /*     (levels are assigned).                                        */
1751     /*  3) However, some sequences cannot be assigned a final level till */
1752     /*     one or more following sequences are received. For instance,   */
1753     /*     ON following an R sequence within an even-level paragraph.    */
1754     /*     If the following sequence is R, the ON sequence will be       */
1755     /*     assigned basic run level+1, and so will the R sequence.       */
1756     /*  4) S is generally handled like ON, since its level will be fixed */
1757     /*     to paragraph level in adjustWSLevels().                       */
1758     /*                                                                   */
1759 
1760     private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
1761         /*  In this table, conditional sequences receive the higher possible level
1762             until proven otherwise.
1763         */
1764     {
1765         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1766         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
1767         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
1768         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
1769         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
1770         /* 4 : R+ON       */ {  0x20,     1,     3,     3,     4,     4,  0x20,  1 },
1771         /* 5 : AN+ON      */ {  0x20,     1,  0x20,     2,     5,     5,  0x20,  1 }
1772     };
1773 
1774     private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
1775         /*  In this table, conditional sequences receive the lower possible level
1776             until proven otherwise.
1777         */
1778     {
1779         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1780         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
1781         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
1782         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
1783         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
1784         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
1785         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
1786     };
1787 
1788     private static final short[] impAct0 = {0,1,2,3,4,5,6};
1789 
1790     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
1791             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
1792 
1793     private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
1794         /* In this table, conditional sequences receive the higher possible
1795            level until proven otherwise.
1796         */
1797         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1798         /* 0 : init       */ {     0,     2,     1,     1,     0,     0,     0,  0 },
1799         /* 1 : L+EN/AN    */ {     0,     2,     1,     1,     0,     0,     0,  2 },
1800         /* 2 : R          */ {     0,     2,     4,     4,  0x13,     0,     0,  1 },
1801         /* 3 : R+ON       */ {  0x20,     2,     4,     4,     3,     3,  0x20,  1 },
1802         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
1803     };
1804     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
1805             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
1806 
1807     private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
1808         /* In this table, EN/AN+ON sequences receive levels as if associated with R
1809            until proven that there is L or sor/eor on both sides. AN is handled like EN.
1810         */
1811         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1812         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
1813         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
1814         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
1815         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
1816         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
1817         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
1818     };
1819     private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
1820         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1821             until proven that there is L on both sides. AN is handled like EN.
1822         */
1823         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1824         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
1825         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
1826         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
1827         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
1828         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
1829     };
1830     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
1831             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
1832                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
1833 
1834     private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
1835         /* This table is identical to the Default LTR table except that EN and AN
1836            are handled like L.
1837         */
1838         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1839         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
1840         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
1841         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
1842         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
1843         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
1844         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
1845     };
1846     private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
1847         /* This table is identical to the Default RTL table except that EN and AN
1848            are handled like L.
1849         */
1850         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1851         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
1852         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
1853         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
1854         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
1855         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
1856         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
1857     };
1858     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
1859             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
1860              impAct0, impAct0);
1861 
1862     private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
1863         /*  In this table, conditional sequences receive the lower possible level
1864             until proven otherwise.
1865         */
1866         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1867         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
1868         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
1869         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
1870         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
1871         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
1872         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
1873         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
1874     };
1875     private static final short[] impAct1 = {0,1,11,12};
1876     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
1877             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1878 
1879     private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1880         /* The case handled in this table is (visually):  R EN L
1881          */
1882         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1883         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
1884         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
1885         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
1886         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
1887         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
1888         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
1889         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
1890     };
1891     private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1892         /* The cases handled in this table are (visually):  R EN L
1893                                                             R L AN L
1894         */
1895         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1896         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
1897         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
1898         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
1899         /* 3 : L          */ {    3 ,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
1900         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
1901         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
1902         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
1903     };
1904     private static final short impAct2[] = {0,1,7,8,9,10};
1905     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
1906             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1907                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1908 
1909     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
1910             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1911 
1912     private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
1913         /*  The case handled in this table is (visually):  R EN L
1914         */
1915         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
1916         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
1917         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
1918         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
1919         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
1920         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
1921     };
1922     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
1923             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1924                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1925 
1926     private class LevState {
1927         byte[][] impTab;                /* level table pointer          */
1928         short[] impAct;                 /* action map array             */
1929         int startON;                    /* start of ON sequence         */
1930         int startL2EN;                  /* start of level 2 sequence    */
1931         int lastStrongRTL;              /* index of last found R or AL  */
1932         short state;                    /* current state                */
1933         byte runLevel;                  /* run level before implicit solving */
1934     }
1935 
1936     /*------------------------------------------------------------------------*/
1937 
1938     static final int FIRSTALLOC = 10;
1939     /*
1940      *  param pos:     position where to insert
1941      *  param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1942      */
1943     private void addPoint(int pos, int flag)
1944     {
1945         Point point = new Point();
1946 
1947         int len = insertPoints.points.length;
1948         if (len == 0) {
1949             insertPoints.points = new Point[FIRSTALLOC];
1950             len = FIRSTALLOC;
1951         }
1952         if (insertPoints.size >= len) { /* no room for new point */
1953             Point[] savePoints = insertPoints.points;
1954             insertPoints.points = new Point[len * 2];
1955             System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
1956         }
1957         point.pos = pos;
1958         point.flag = flag;
1959         insertPoints.points[insertPoints.size] = point;
1960         insertPoints.size++;
1961     }
1962 
1963     /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1964 
1965     /*
1966      * This implementation of the (Wn) rules applies all rules in one pass.
1967      * In order to do so, it needs a look-ahead of typically 1 character
1968      * (except for W5: sequences of ET) and keeps track of changes
1969      * in a rule Wp that affect a later Wq (p<q).
1970      *
1971      * The (Nn) and (In) rules are also performed in that same single loop,
1972      * but effectively one iteration behind for white space.
1973      *
1974      * Since all implicit rules are performed in one step, it is not necessary
1975      * to actually store the intermediate directional properties in dirProps[].
1976      */
1977 
1978     private void processPropertySeq(LevState levState, short _prop,
1979             int start, int limit) {
1980         byte cell;
1981         byte[][] impTab = levState.impTab;
1982         short[] impAct = levState.impAct;
1983         short oldStateSeq,actionSeq;
1984         byte level, addLevel;
1985         int start0, k;
1986 
1987         start0 = start;                 /* save original start position */
1988         oldStateSeq = levState.state;
1989         cell = impTab[oldStateSeq][_prop];
1990         levState.state = GetState(cell);        /* isolate the new state */
1991         actionSeq = impAct[GetAction(cell)];    /* isolate the action */
1992         addLevel = impTab[levState.state][IMPTABLEVELS_RES];
1993 
1994         if (actionSeq != 0) {
1995             switch (actionSeq) {
1996             case 1:                     /* init ON seq */
1997                 levState.startON = start0;
1998                 break;
1999 
2000             case 2:                     /* prepend ON seq to current seq */
2001                 start = levState.startON;
2002                 break;
2003 
2004             case 3:                     /* L or S after possible relevant EN/AN */
2005                 /* check if we had EN after R/AL */
2006                 if (levState.startL2EN >= 0) {
2007                     addPoint(levState.startL2EN, LRM_BEFORE);
2008                 }
2009                 levState.startL2EN = -1;  /* not within previous if since could also be -2 */
2010                 /* check if we had any relevant EN/AN after R/AL */
2011                 if ((insertPoints.points.length == 0) ||
2012                         (insertPoints.size <= insertPoints.confirmed)) {
2013                     /* nothing, just clean up */
2014                     levState.lastStrongRTL = -1;
2015                     /* check if we have a pending conditional segment */
2016                     level = impTab[oldStateSeq][IMPTABLEVELS_RES];
2017                     if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
2018                         start = levState.startON;   /* reset to basic run level */
2019                     }
2020                     if (_prop == _S) {              /* add LRM before S */
2021                         addPoint(start0, LRM_BEFORE);
2022                         insertPoints.confirmed = insertPoints.size;
2023                     }
2024                     break;
2025                 }
2026                 /* reset previous RTL cont to level for LTR text */
2027                 for (k = levState.lastStrongRTL + 1; k < start0; k++) {
2028                     /* reset odd level, leave runLevel+2 as is */
2029                     levels[k] = (byte)((levels[k] - 2) & ~1);
2030                 }
2031                 /* mark insert points as confirmed */
2032                 insertPoints.confirmed = insertPoints.size;
2033                 levState.lastStrongRTL = -1;
2034                 if (_prop == _S) {           /* add LRM before S */
2035                     addPoint(start0, LRM_BEFORE);
2036                     insertPoints.confirmed = insertPoints.size;
2037                 }
2038                 break;
2039 
2040             case 4:                     /* R/AL after possible relevant EN/AN */
2041                 /* just clean up */
2042                 if (insertPoints.points.length > 0)
2043                     /* remove all non confirmed insert points */
2044                     insertPoints.size = insertPoints.confirmed;
2045                 levState.startON = -1;
2046                 levState.startL2EN = -1;
2047                 levState.lastStrongRTL = limit - 1;
2048                 break;
2049 
2050             case 5:                     /* EN/AN after R/AL + possible cont */
2051                 /* check for real AN */
2052                 if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) {
2053                     /* real AN */
2054                     if (levState.startL2EN == -1) { /* if no relevant EN already found */
2055                         /* just note the righmost digit as a strong RTL */
2056                         levState.lastStrongRTL = limit - 1;
2057                         break;
2058                     }
2059                     if (levState.startL2EN >= 0)  { /* after EN, no AN */
2060                         addPoint(levState.startL2EN, LRM_BEFORE);
2061                         levState.startL2EN = -2;
2062                     }
2063                     /* note AN */
2064                     addPoint(start0, LRM_BEFORE);
2065                     break;
2066                 }
2067                 /* if first EN/AN after R/AL */
2068                 if (levState.startL2EN == -1) {
2069                     levState.startL2EN = start0;
2070                 }
2071                 break;
2072 
2073             case 6:                     /* note location of latest R/AL */
2074                 levState.lastStrongRTL = limit - 1;
2075                 levState.startON = -1;
2076                 break;
2077 
2078             case 7:                     /* L after R+ON/EN/AN */
2079                 /* include possible adjacent number on the left */
2080                 for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
2081                 }
2082                 if (k >= 0) {
2083                     addPoint(k, RLM_BEFORE);    /* add RLM before */
2084                     insertPoints.confirmed = insertPoints.size; /* confirm it */
2085                 }
2086                 levState.startON = start0;
2087                 break;
2088 
2089             case 8:                     /* AN after L */
2090                 /* AN numbers between L text on both sides may be trouble. */
2091                 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2092                 addPoint(start0, LRM_BEFORE);   /* add LRM before */
2093                 addPoint(start0, LRM_AFTER);    /* add LRM after  */
2094                 break;
2095 
2096             case 9:                     /* R after L+ON/EN/AN */
2097                 /* false alert, infirm LRMs around previous AN */
2098                 insertPoints.size=insertPoints.confirmed;
2099                 if (_prop == _S) {          /* add RLM before S */
2100                     addPoint(start0, RLM_BEFORE);
2101                     insertPoints.confirmed = insertPoints.size;
2102                 }
2103                 break;
2104 
2105             case 10:                    /* L after L+ON/AN */
2106                 level = (byte)(levState.runLevel + addLevel);
2107                 for (k=levState.startON; k < start0; k++) {
2108                     if (levels[k] < level) {
2109                         levels[k] = level;
2110                     }
2111                 }
2112                 insertPoints.confirmed = insertPoints.size;   /* confirm inserts */
2113                 levState.startON = start0;
2114                 break;
2115 
2116             case 11:                    /* L after L+ON+EN/AN/ON */
2117                 level = levState.runLevel;
2118                 for (k = start0-1; k >= levState.startON; k--) {
2119                     if (levels[k] == level+3) {
2120                         while (levels[k] == level+3) {
2121                             levels[k--] -= 2;
2122                         }
2123                         while (levels[k] == level) {
2124                             k--;
2125                         }
2126                     }
2127                     if (levels[k] == level+2) {
2128                         levels[k] = level;
2129                         continue;
2130                     }
2131                     levels[k] = (byte)(level+1);
2132                 }
2133                 break;
2134 
2135             case 12:                    /* R after L+ON+EN/AN/ON */
2136                 level = (byte)(levState.runLevel+1);
2137                 for (k = start0-1; k >= levState.startON; k--) {
2138                     if (levels[k] > level) {
2139                         levels[k] -= 2;
2140                     }
2141                 }
2142                 break;
2143 
2144             default:                        /* we should never get here */
2145                 throw new IllegalStateException("Internal ICU error in processPropertySeq");
2146             }
2147         }
2148         if ((addLevel) != 0 || (start < start0)) {
2149             level = (byte)(levState.runLevel + addLevel);
2150             for (k = start; k < limit; k++) {
2151                 levels[k] = level;
2152             }
2153         }
2154     }
2155 
2156     private void resolveImplicitLevels(int start, int limit, short sor, short eor)
2157     {
2158         LevState levState = new LevState();
2159         int i, start1, start2;
2160         short oldStateImp, stateImp, actionImp;
2161         short gprop, resProp, cell;
2162         short nextStrongProp = R;
2163         int nextStrongPos = -1;
2164 
2165 
2166         /* check for RTL inverse Bidi mode */
2167         /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
2168          * loop on the text characters from end to start.
2169          * This would need a different properties state table (at least different
2170          * actions) and different levels state tables (maybe very similar to the
2171          * LTR corresponding ones.
2172          */
2173         /* initialize for levels state table */
2174         levState.startL2EN = -1;        /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2175         levState.lastStrongRTL = -1;    /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2176         levState.state = 0;
2177         levState.runLevel = levels[start];
2178         levState.impTab = impTabPair.imptab[levState.runLevel & 1];
2179         levState.impAct = impTabPair.impact[levState.runLevel & 1];
2180         processPropertySeq(levState, sor, start, start);
2181         /* initialize for property state table */
2182         if (dirProps[start] == NSM) {
2183             stateImp = (short)(1 + sor);
2184         } else {
2185             stateImp = 0;
2186         }
2187         start1 = start;
2188         start2 = 0;
2189 
2190         for (i = start; i <= limit; i++) {
2191             if (i >= limit) {
2192                 gprop = eor;
2193             } else {
2194                 short prop, prop1;
2195                 prop = NoContextRTL(dirProps[i]);
2196                 gprop = groupProp[prop];
2197             }
2198             oldStateImp = stateImp;
2199             cell = impTabProps[oldStateImp][gprop];
2200             stateImp = GetStateProps(cell);     /* isolate the new state */
2201             actionImp = GetActionProps(cell);   /* isolate the action */
2202             if ((i == limit) && (actionImp == 0)) {
2203                 /* there is an unprocessed sequence if its property == eor   */
2204                 actionImp = 1;                  /* process the last sequence */
2205             }
2206             if (actionImp != 0) {
2207                 resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
2208                 switch (actionImp) {
2209                 case 1:             /* process current seq1, init new seq1 */
2210                     processPropertySeq(levState, resProp, start1, i);
2211                     start1 = i;
2212                     break;
2213                 case 2:             /* init new seq2 */
2214                     start2 = i;
2215                     break;
2216                 case 3:             /* process seq1, process seq2, init new seq1 */
2217                     processPropertySeq(levState, resProp, start1, start2);
2218                     processPropertySeq(levState, _ON, start2, i);
2219                     start1 = i;
2220                     break;
2221                 case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2222                     processPropertySeq(levState, resProp, start1, start2);
2223                     start1 = start2;
2224                     start2 = i;
2225                     break;
2226                 default:            /* we should never get here */
2227                     throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
2228                 }
2229             }
2230         }
2231         /* flush possible pending sequence, e.g. ON */
2232         processPropertySeq(levState, eor, limit, limit);
2233     }
2234 
2235     /* perform (L1) and (X9) ---------------------------------------------------- */
2236 
2237     /*
2238      * Reset the embedding levels for some non-graphic characters (L1).
2239      * This method also sets appropriate levels for BN, and
2240      * explicit embedding types that are supposed to have been removed
2241      * from the paragraph in (X9).
2242      */
2243     private void adjustWSLevels() {
2244         int i;
2245 
2246         if ((flags & MASK_WS) != 0) {
2247             int flag;
2248             i = trailingWSStart;
2249             while (i > 0) {
2250                 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2251                 while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) {
2252                     if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2253                         levels[i] = 0;
2254                     } else {
2255                         levels[i] = GetParaLevelAt(i);
2256                     }
2257                 }
2258 
2259                 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2260                 /* here, i+1 is guaranteed to be <length */
2261                 while (i > 0) {
2262                     flag = DirPropFlagNC(dirProps[--i]);
2263                     if ((flag & MASK_BN_EXPLICIT) != 0) {
2264                         levels[i] = levels[i + 1];
2265                     } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2266                         levels[i] = 0;
2267                         break;
2268                     } else if ((flag & MASK_B_S) != 0){
2269                         levels[i] = GetParaLevelAt(i);
2270                         break;
2271                     }
2272                 }
2273             }
2274         }
2275     }
2276 
2277     private int Bidi_Min(int x, int y) {
2278         return x < y ? x : y;
2279     }
2280 
2281     private int Bidi_Abs(int x) {
2282         return x >= 0 ? x : -x;
2283     }
2284 
2285     /**
2286      * Perform the Unicode Bidi algorithm. It is defined in the
2287      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2288      * version 13,
2289      * also described in The Unicode Standard, Version 4.0 .<p>
2290      *
2291      * This method takes a piece of plain text containing one or more paragraphs,
2292      * with or without externally specified embedding levels from <i>styled</i>
2293      * text and computes the left-right-directionality of each character.<p>
2294      *
2295      * If the entire text is all of the same directionality, then
2296      * the method may not perform all the steps described by the algorithm,
2297      * i.e., some levels may not be the same as if all steps were performed.
2298      * This is not relevant for unidirectional text.<br>
2299      * For example, in pure LTR text with numbers the numbers would get
2300      * a resolved level of 2 higher than the surrounding text according to
2301      * the algorithm. This implementation may set all resolved levels to
2302      * the same value in such a case.<p>
2303      *
2304      * The text can be composed of multiple paragraphs. Occurrence of a block
2305      * separator in the text terminates a paragraph, and whatever comes next starts
2306      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
2307      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
2308      * in that case, the pair of characters is considered as terminating the
2309      * preceding paragraph, and a new paragraph will be started by a character
2310      * coming after the LF.
2311      *
2312      * Although the text is passed here as a <code>String</code>, it is
2313      * stored internally as an array of characters. Therefore the
2314      * documentation will refer to indexes of the characters in the text.
2315      *
2316      * @param text contains the text that the Bidi algorithm will be performed
2317      *        on. This text can be retrieved with <code>getText()</code> or
2318      *        <code>getTextAsString</code>.<br>
2319      *
2320      * @param paraLevel specifies the default level for the text;
2321      *        it is typically 0 (LTR) or 1 (RTL).
2322      *        If the method shall determine the paragraph level from the text,
2323      *        then <code>paraLevel</code> can be set to
2324      *        either <code>LEVEL_DEFAULT_LTR</code>
2325      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2326      *        paragraphs, the paragraph level shall be determined separately for
2327      *        each paragraph; if a paragraph does not include any strongly typed
2328      *        character, then the desired default is used (0 for LTR or 1 for RTL).
2329      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2330      *        is also valid, with odd levels indicating RTL.
2331      *
2332      * @param embeddingLevels (in) may be used to preset the embedding and override levels,
2333      *        ignoring characters like LRE and PDF in the text.
2334      *        A level overrides the directional property of its corresponding
2335      *        (same index) character if the level has the
2336      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2337      *        Except for that bit, it must be
2338      *        <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
2339      *        with one exception: a level of zero may be specified for a
2340      *        paragraph separator even if <code>paraLevel&gt;0</code> when multiple
2341      *        paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2342      *        <strong>Caution: </strong>A reference to this array, not a copy
2343      *        of the levels, will be stored in the <code>Bidi</code> object;
2344      *        the <code>embeddingLevels</code>
2345      *        should not be modified to avoid unexpected results on subsequent
2346      *        Bidi operations. However, the <code>setPara()</code> and
2347      *        <code>setLine()</code> methods may modify some or all of the
2348      *        levels.<br><br>
2349      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
2350      *        have one entry for each character in <code>text</code>.
2351      *
2352      * @throws IllegalArgumentException if the values in embeddingLevels are
2353      *         not within the allowed range
2354      *
2355      * @see #LEVEL_DEFAULT_LTR
2356      * @see #LEVEL_DEFAULT_RTL
2357      * @see #LEVEL_OVERRIDE
2358      * @see #MAX_EXPLICIT_LEVEL
2359      * @stable ICU 3.8
2360      */
2361     void setPara(String text, byte paraLevel, byte[] embeddingLevels)
2362     {
2363         if (text == null) {
2364             setPara(new char[0], paraLevel, embeddingLevels);
2365         } else {
2366             setPara(text.toCharArray(), paraLevel, embeddingLevels);
2367         }
2368     }
2369 
2370     /**
2371      * Perform the Unicode Bidi algorithm. It is defined in the
2372      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2373      * version 13,
2374      * also described in The Unicode Standard, Version 4.0 .<p>
2375      *
2376      * This method takes a piece of plain text containing one or more paragraphs,
2377      * with or without externally specified embedding levels from <i>styled</i>
2378      * text and computes the left-right-directionality of each character.<p>
2379      *
2380      * If the entire text is all of the same directionality, then
2381      * the method may not perform all the steps described by the algorithm,
2382      * i.e., some levels may not be the same as if all steps were performed.
2383      * This is not relevant for unidirectional text.<br>
2384      * For example, in pure LTR text with numbers the numbers would get
2385      * a resolved level of 2 higher than the surrounding text according to
2386      * the algorithm. This implementation may set all resolved levels to
2387      * the same value in such a case.<p>
2388      *
2389      * The text can be composed of multiple paragraphs. Occurrence of a block
2390      * separator in the text terminates a paragraph, and whatever comes next starts
2391      * a new paragraph. The exception to this rule is when a Carriage Return (CR)
2392      * is followed by a Line Feed (LF). Both CR and LF are block separators, but
2393      * in that case, the pair of characters is considered as terminating the
2394      * preceding paragraph, and a new paragraph will be started by a character
2395      * coming after the LF.
2396      *
2397      * The text is stored internally as an array of characters. Therefore the
2398      * documentation will refer to indexes of the characters in the text.
2399      *
2400      * @param chars contains the text that the Bidi algorithm will be performed
2401      *        on. This text can be retrieved with <code>getText()</code> or
2402      *        <code>getTextAsString</code>.<br>
2403      *
2404      * @param paraLevel specifies the default level for the text;
2405      *        it is typically 0 (LTR) or 1 (RTL).
2406      *        If the method shall determine the paragraph level from the text,
2407      *        then <code>paraLevel</code> can be set to
2408      *        either <code>LEVEL_DEFAULT_LTR</code>
2409      *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2410      *        paragraphs, the paragraph level shall be determined separately for
2411      *        each paragraph; if a paragraph does not include any strongly typed
2412      *        character, then the desired default is used (0 for LTR or 1 for RTL).
2413      *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2414      *        is also valid, with odd levels indicating RTL.
2415      *
2416      * @param embeddingLevels (in) may be used to preset the embedding and
2417      *        override levels, ignoring characters like LRE and PDF in the text.
2418      *        A level overrides the directional property of its corresponding
2419      *        (same index) character if the level has the
2420      *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2421      *        Except for that bit, it must be
2422      *        <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
2423      *        with one exception: a level of zero may be specified for a
2424      *        paragraph separator even if <code>paraLevel&gt;0</code> when multiple
2425      *        paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2426      *        <strong>Caution: </strong>A reference to this array, not a copy
2427      *        of the levels, will be stored in the <code>Bidi</code> object;
2428      *        the <code>embeddingLevels</code>
2429      *        should not be modified to avoid unexpected results on subsequent
2430      *        Bidi operations. However, the <code>setPara()</code> and
2431      *        <code>setLine()</code> methods may modify some or all of the
2432      *        levels.<br><br>
2433      *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
2434      *        have one entry for each character in <code>text</code>.
2435      *
2436      * @throws IllegalArgumentException if the values in embeddingLevels are
2437      *         not within the allowed range
2438      *
2439      * @see #LEVEL_DEFAULT_LTR
2440      * @see #LEVEL_DEFAULT_RTL
2441      * @see #LEVEL_OVERRIDE
2442      * @see #MAX_EXPLICIT_LEVEL
2443      * @stable ICU 3.8
2444      */
2445     public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
2446     {
2447         /* check the argument values */
2448         if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) {
2449             verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
2450         }
2451         if (chars == null) {
2452             chars = new char[0];
2453         }
2454 
2455         /* initialize the Bidi object */
2456         this.paraBidi = null;          /* mark unfinished setPara */
2457         this.text = chars;
2458         this.length = this.originalLength = this.resultLength = text.length;
2459         this.paraLevel = paraLevel;
2460         this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2461         this.paraCount = 1;
2462 
2463         /* Allocate zero-length arrays instead of setting to null here; then
2464          * checks for null in various places can be eliminated.
2465          */
2466         dirProps = new byte[0];
2467         levels = new byte[0];
2468         runs = new BidiRun[0];
2469         isGoodLogicalToVisualRunsMap = false;
2470         insertPoints.size = 0;          /* clean up from last call */
2471         insertPoints.confirmed = 0;     /* clean up from last call */
2472 
2473         /*
2474          * Save the original paraLevel if contextual; otherwise, set to 0.
2475          */
2476         if (IsDefaultLevel(paraLevel)) {
2477             defaultParaLevel = paraLevel;
2478         } else {
2479             defaultParaLevel = 0;
2480         }
2481 
2482         if (length == 0) {
2483             /*
2484              * For an empty paragraph, create a Bidi object with the paraLevel and
2485              * the flags and the direction set but without allocating zero-length arrays.
2486              * There is nothing more to do.
2487              */
2488             if (IsDefaultLevel(paraLevel)) {
2489                 this.paraLevel &= 1;
2490                 defaultParaLevel = 0;
2491             }
2492             if ((this.paraLevel & 1) != 0) {
2493                 flags = DirPropFlag(R);
2494                 direction = Bidi.DIRECTION_RIGHT_TO_LEFT;
2495             } else {
2496                 flags = DirPropFlag(L);
2497                 direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2498             }
2499 
2500             runCount = 0;
2501             paraCount = 0;
2502             paraBidi = this;         /* mark successful setPara */
2503             return;
2504         }
2505 
2506         runCount = -1;
2507 
2508         /*
2509          * Get the directional properties,
2510          * the flags bit-set, and
2511          * determine the paragraph level if necessary.
2512          */
2513         getDirPropsMemory(length);
2514         dirProps = dirPropsMemory;
2515         getDirProps();
2516 
2517         /* the processed length may have changed if OPTION_STREAMING is set */
2518         trailingWSStart = length;  /* the levels[] will reflect the WS run */
2519 
2520         /* allocate paras memory */
2521         if (paraCount > 1) {
2522             getInitialParasMemory(paraCount);
2523             paras = parasMemory;
2524             paras[paraCount - 1] = length;
2525         } else {
2526             /* initialize paras for single paragraph */
2527             paras = simpleParas;
2528             simpleParas[0] = length;
2529         }
2530 
2531         /* are explicit levels specified? */
2532         if (embeddingLevels == null) {
2533             /* no: determine explicit levels according to the (Xn) rules */
2534             getLevelsMemory(length);
2535             levels = levelsMemory;
2536             direction = resolveExplicitLevels();
2537         } else {
2538             /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
2539             levels = embeddingLevels;
2540             direction = checkExplicitLevels();
2541         }
2542 
2543         /*
2544          * The steps after (X9) in the Bidi algorithm are performed only if
2545          * the paragraph text has mixed directionality!
2546          */
2547         switch (direction) {
2548         case Bidi.DIRECTION_LEFT_TO_RIGHT:
2549             /* make sure paraLevel is even */
2550             paraLevel = (byte)((paraLevel + 1) & ~1);
2551 
2552             /* all levels are implicitly at paraLevel (important for getLevels()) */
2553             trailingWSStart = 0;
2554             break;
2555         case Bidi.DIRECTION_RIGHT_TO_LEFT:
2556             /* make sure paraLevel is odd */
2557             paraLevel |= 1;
2558 
2559             /* all levels are implicitly at paraLevel (important for getLevels()) */
2560             trailingWSStart = 0;
2561             break;
2562         default:
2563             this.impTabPair = impTab_DEFAULT;
2564 
2565             /*
2566              * If there are no external levels specified and there
2567              * are no significant explicit level codes in the text,
2568              * then we can treat the entire paragraph as one run.
2569              * Otherwise, we need to perform the following rules on runs of
2570              * the text with the same embedding levels. (X10)
2571              * "Significant" explicit level codes are ones that actually
2572              * affect non-BN characters.
2573              * Examples for "insignificant" ones are empty embeddings
2574              * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2575              */
2576             if (embeddingLevels == null && paraCount <= 1 &&
2577                 (flags & DirPropFlagMultiRuns) == 0) {
2578                 resolveImplicitLevels(0, length,
2579                         GetLRFromLevel(GetParaLevelAt(0)),
2580                         GetLRFromLevel(GetParaLevelAt(length - 1)));
2581             } else {
2582                 /* sor, eor: start and end types of same-level-run */
2583                 int start, limit = 0;
2584                 byte level, nextLevel;
2585                 short sor, eor;
2586 
2587                 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2588                 level = GetParaLevelAt(0);
2589                 nextLevel = levels[0];
2590                 if (level < nextLevel) {
2591                     eor = GetLRFromLevel(nextLevel);
2592                 } else {
2593                     eor = GetLRFromLevel(level);
2594                 }
2595 
2596                 do {
2597                     /* determine start and limit of the run (end points just behind the run) */
2598 
2599                     /* the values for this run's start are the same as for the previous run's end */
2600                     start = limit;
2601                     level = nextLevel;
2602                     if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) {
2603                         /* except if this is a new paragraph, then set sor = para level */
2604                         sor = GetLRFromLevel(GetParaLevelAt(start));
2605                     } else {
2606                         sor = eor;
2607                     }
2608 
2609                     /* search for the limit of this run */
2610                     while (++limit < length && levels[limit] == level) {}
2611 
2612                     /* get the correct level of the next run */
2613                     if (limit < length) {
2614                         nextLevel = levels[limit];
2615                     } else {
2616                         nextLevel = GetParaLevelAt(length - 1);
2617                     }
2618 
2619                     /* determine eor from max(level, nextLevel); sor is last run's eor */
2620                     if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) {
2621                         eor = GetLRFromLevel(nextLevel);
2622                     } else {
2623                         eor = GetLRFromLevel(level);
2624                     }
2625 
2626                     /* if the run consists of overridden directional types, then there
2627                        are no implicit types to be resolved */
2628                     if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
2629                         resolveImplicitLevels(start, limit, sor, eor);
2630                     } else {
2631                         /* remove the LEVEL_OVERRIDE flags */
2632                         do {
2633                             levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE;
2634                         } while (start < limit);
2635                     }
2636                 } while (limit  < length);
2637             }
2638 
2639             /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2640             adjustWSLevels();
2641 
2642             break;
2643         }
2644 
2645         resultLength += insertPoints.size;
2646         paraBidi = this;             /* mark successful setPara */
2647     }
2648 
2649     /**
2650      * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
2651      * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2652      * version 13,
2653      * also described in The Unicode Standard, Version 4.0 .<p>
2654      *
2655      * This method takes a paragraph of text and computes the
2656      * left-right-directionality of each character. The text should not
2657      * contain any Unicode block separators.<p>
2658      *
2659      * The RUN_DIRECTION attribute in the text, if present, determines the base
2660      * direction (left-to-right or right-to-left). If not present, the base
2661      * direction is computed using the Unicode Bidirectional Algorithm,
2662      * defaulting to left-to-right if there are no strong directional characters
2663      * in the text. This attribute, if present, must be applied to all the text
2664      * in the paragraph.<p>
2665      *
2666      * The BIDI_EMBEDDING attribute in the text, if present, represents
2667      * embedding level information. Negative values from -1 to -62 indicate
2668      * overrides at the absolute value of the level. Positive values from 1 to
2669      * 62 indicate embeddings. Where values are zero or not defined, the base
2670      * embedding level as determined by the base direction is assumed.<p>
2671      *
2672      * The NUMERIC_SHAPING attribute in the text, if present, converts European
2673      * digits to other decimal digits before running the bidi algorithm. This
2674      * attribute, if present, must be applied to all the text in the paragraph.
2675      *
2676      * If the entire text is all of the same directionality, then
2677      * the method may not perform all the steps described by the algorithm,
2678      * i.e., some levels may not be the same as if all steps were performed.
2679      * This is not relevant for unidirectional text.<br>
2680      * For example, in pure LTR text with numbers the numbers would get
2681      * a resolved level of 2 higher than the surrounding text according to
2682      * the algorithm. This implementation may set all resolved levels to
2683      * the same value in such a case.<p>
2684      *
2685      * @param paragraph a paragraph of text with optional character and
2686      *        paragraph attribute information
2687      * @stable ICU 3.8
2688      */
2689     public void setPara(AttributedCharacterIterator paragraph)
2690     {
2691         byte paraLvl;
2692         char ch = paragraph.first();
2693         Boolean runDirection =
2694             (Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
2695         Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
2696         if (runDirection == null) {
2697             paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
2698         } else {
2699             paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
2700                         (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT;
2701         }
2702 
2703         byte[] lvls = null;
2704         int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
2705         byte[] embeddingLevels = new byte[len];
2706         char[] txt = new char[len];
2707         int i = 0;
2708         while (ch != AttributedCharacterIterator.DONE) {
2709             txt[i] = ch;
2710             Integer embedding =
2711                 (Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING);
2712             if (embedding != null) {
2713                 byte level = embedding.byteValue();
2714                 if (level == 0) {
2715                     /* no-op */
2716                 } else if (level < 0) {
2717                     lvls = embeddingLevels;
2718                     embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE);
2719                 } else {
2720                     lvls = embeddingLevels;
2721                     embeddingLevels[i] = level;
2722                 }
2723             }
2724             ch = paragraph.next();
2725             ++i;
2726         }
2727 
2728         if (shaper != null) {
2729             NumericShapings.shape(shaper, txt, 0, len);
2730         }
2731         setPara(txt, paraLvl, lvls);
2732     }
2733 
2734     /**
2735      * Specify whether block separators must be allocated level zero,
2736      * so that successive paragraphs will progress from left to right.
2737      * This method must be called before <code>setPara()</code>.
2738      * Paragraph separators (B) may appear in the text.  Setting them to level zero
2739      * means that all paragraph separators (including one possibly appearing
2740      * in the last text position) are kept in the reordered text after the text
2741      * that they follow in the source text.
2742      * When this feature is not enabled, a paragraph separator at the last
2743      * position of the text before reordering will go to the first position
2744      * of the reordered text when the paragraph level is odd.
2745      *
2746      * @param ordarParaLTR specifies whether paragraph separators (B) must
2747      * receive level 0, so that successive paragraphs progress from left to right.
2748      *
2749      * @see #setPara
2750      * @stable ICU 3.8
2751      */
2752     private void orderParagraphsLTR(boolean ordarParaLTR) {
2753         orderParagraphsLTR = ordarParaLTR;
2754     }
2755 
2756     /**
2757      * Get the directionality of the text.
2758      *
2759      * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
2760      *         that indicates if the entire text
2761      *         represented by this object is unidirectional,
2762      *         and which direction, or if it is mixed-directional.
2763      *
2764      * @throws IllegalStateException if this call is not preceded by a successful
2765      *         call to <code>setPara</code> or <code>setLine</code>
2766      *
2767      * @see #LTR
2768      * @see #RTL
2769      * @see #MIXED
2770      * @stable ICU 3.8
2771      */
2772     private byte getDirection()
2773     {
2774         verifyValidParaOrLine();
2775         return direction;
2776     }
2777 
2778     /**
2779      * Get the length of the text.
2780      *
2781      * @return The length of the text that the <code>Bidi</code> object was
2782      *         created for.
2783      *
2784      * @throws IllegalStateException if this call is not preceded by a successful
2785      *         call to <code>setPara</code> or <code>setLine</code>
2786      * @stable ICU 3.8
2787      */
2788     public int getLength()
2789     {
2790         verifyValidParaOrLine();
2791         return originalLength;
2792     }
2793 
2794     /* paragraphs API methods ------------------------------------------------- */
2795 
2796     /**
2797      * Get the paragraph level of the text.
2798      *
2799      * @return The paragraph level. If there are multiple paragraphs, their
2800      *         level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
2801      *         LEVEL_DEFAULT_RTL.  In that case, the level of the first paragraph
2802      *         is returned.
2803      *
2804      * @throws IllegalStateException if this call is not preceded by a successful
2805      *         call to <code>setPara</code> or <code>setLine</code>
2806      *
2807      * @see #LEVEL_DEFAULT_LTR
2808      * @see #LEVEL_DEFAULT_RTL
2809      * @see #getParagraph
2810      * @see #getParagraphByIndex
2811      * @stable ICU 3.8
2812      */
2813     public byte getParaLevel()
2814     {
2815         verifyValidParaOrLine();
2816         return paraLevel;
2817     }
2818 
2819     /**
2820      * Get the index of a paragraph, given a position within the text.<p>
2821      *
2822      * @param charIndex is the index of a character within the text, in the
2823      *        range <code>[0..getProcessedLength()-1]</code>.
2824      *
2825      * @return The index of the paragraph containing the specified position,
2826      *         starting from 0.
2827      *
2828      * @throws IllegalStateException if this call is not preceded by a successful
2829      *         call to <code>setPara</code> or <code>setLine</code>
2830      * @throws IllegalArgumentException if charIndex is not within the legal range
2831      *
2832      * @see com.ibm.icu.text.BidiRun
2833      * @see #getProcessedLength
2834      * @stable ICU 3.8
2835      */
2836     public int getParagraphIndex(int charIndex)
2837     {
2838         verifyValidParaOrLine();
2839         BidiBase bidi = paraBidi;             /* get Para object if Line object */
2840         verifyRange(charIndex, 0, bidi.length);
2841         int paraIndex;
2842         for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) {
2843         }
2844         return paraIndex;
2845     }
2846 
2847     /**
2848      * <code>setLine()</code> returns a <code>Bidi</code> object to
2849      * contain the reordering information, especially the resolved levels,
2850      * for all the characters in a line of text. This line of text is
2851      * specified by referring to a <code>Bidi</code> object representing
2852      * this information for a piece of text containing one or more paragraphs,
2853      * and by specifying a range of indexes in this text.<p>
2854      * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
2855      *
2856      * This is used after calling <code>setPara()</code>
2857      * for a piece of text, and after line-breaking on that text.
2858      * It is not necessary if each paragraph is treated as a single line.<p>
2859      *
2860      * After line-breaking, rules (L1) and (L2) for the treatment of
2861      * trailing WS and for reordering are performed on
2862      * a <code>Bidi</code> object that represents a line.<p>
2863      *
2864      * <strong>Important: </strong>the line <code>Bidi</code> object may
2865      * reference data within the global text <code>Bidi</code> object.
2866      * You should not alter the content of the global text object until
2867      * you are finished using the line object.
2868      *
2869      * @param start is the line's first index into the text.
2870      *
2871      * @param limit is just behind the line's last index into the text
2872      *        (its last index +1).
2873      *
2874      * @return a <code>Bidi</code> object that will now represent a line of the text.
2875      *
2876      * @throws IllegalStateException if this call is not preceded by a successful
2877      *         call to <code>setPara</code>
2878      * @throws IllegalArgumentException if start and limit are not in the range
2879      *         <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
2880      *         or if the specified line crosses a paragraph boundary
2881      *
2882      * @see #setPara
2883      * @see #getProcessedLength
2884      * @stable ICU 3.8
2885      */
2886     public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit)
2887     {
2888         verifyValidPara();
2889         verifyRange(start, 0, limit);
2890         verifyRange(limit, 0, length+1);
2891 
2892         return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit);
2893     }
2894 
2895     /**
2896      * Get the level for one character.
2897      *
2898      * @param charIndex the index of a character.
2899      *
2900      * @return The level for the character at <code>charIndex</code>.
2901      *
2902      * @throws IllegalStateException if this call is not preceded by a successful
2903      *         call to <code>setPara</code> or <code>setLine</code>
2904      * @throws IllegalArgumentException if charIndex is not in the range
2905      *         <code>0&lt;=charIndex&lt;getProcessedLength()</code>
2906      *
2907      * @see #getProcessedLength
2908      * @stable ICU 3.8
2909      */
2910     public byte getLevelAt(int charIndex)
2911     {
2912         if (charIndex < 0 || charIndex >= length) {
2913             return (byte)getBaseLevel();
2914         }
2915         verifyValidParaOrLine();
2916         verifyRange(charIndex, 0, length);
2917         return BidiLine.getLevelAt(this, charIndex);
2918     }
2919 
2920     /**
2921      * Get an array of levels for each character.<p>
2922      *
2923      * Note that this method may allocate memory under some
2924      * circumstances, unlike <code>getLevelAt()</code>.
2925      *
2926      * @return The levels array for the text,
2927      *         or <code>null</code> if an error occurs.
2928      *
2929      * @throws IllegalStateException if this call is not preceded by a successful
2930      *         call to <code>setPara</code> or <code>setLine</code>
2931      * @stable ICU 3.8
2932      */
2933     private byte[] getLevels()
2934     {
2935         verifyValidParaOrLine();
2936         if (length <= 0) {
2937             return new byte[0];
2938         }
2939         return BidiLine.getLevels(this);
2940     }
2941 
2942     /**
2943      * Get the number of runs.
2944      * This method may invoke the actual reordering on the
2945      * <code>Bidi</code> object, after <code>setPara()</code>
2946      * may have resolved only the levels of the text. Therefore,
2947      * <code>countRuns()</code> may have to allocate memory,
2948      * and may throw an exception if it fails to do so.
2949      *
2950      * @return The number of runs.
2951      *
2952      * @throws IllegalStateException if this call is not preceded by a successful
2953      *         call to <code>setPara</code> or <code>setLine</code>
2954      * @stable ICU 3.8
2955      */
2956     public int countRuns()
2957     {
2958         verifyValidParaOrLine();
2959         BidiLine.getRuns(this);
2960         return runCount;
2961     }
2962 
2963     /**
2964      * Get a visual-to-logical index map (array) for the characters in the
2965      * <code>Bidi</code> (paragraph or line) object.
2966      * <p>
2967      * Some values in the map may be <code>MAP_NOWHERE</code> if the
2968      * corresponding text characters are Bidi marks inserted in the visual
2969      * output by the option <code>OPTION_INSERT_MARKS</code>.
2970      * <p>
2971      * When the visual output is altered by using options of
2972      * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
2973      * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
2974      * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
2975      * be correct. It is advised to use, when possible, reordering options
2976      * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
2977      *
2978      * @return an array of <code>getResultLength()</code>
2979      *        indexes which will reflect the reordering of the characters.<br><br>
2980      *        The index map will result in
2981      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
2982      *        <code>indexMap</code> represents the returned array.
2983      *
2984      * @throws IllegalStateException if this call is not preceded by a successful
2985      *         call to <code>setPara</code> or <code>setLine</code>
2986      *
2987      * @see #getLogicalMap
2988      * @see #getLogicalIndex
2989      * @see #getResultLength
2990      * @see #MAP_NOWHERE
2991      * @see #OPTION_INSERT_MARKS
2992      * @see #writeReordered
2993      * @stable ICU 3.8
2994      */
2995     private int[] getVisualMap()
2996     {
2997         /* countRuns() checks successful call to setPara/setLine */
2998         countRuns();
2999         if (resultLength <= 0) {
3000             return new int[0];
3001         }
3002         return BidiLine.getVisualMap(this);
3003     }
3004 
3005     /**
3006      * This is a convenience method that does not use a <code>Bidi</code> object.
3007      * It is intended to be used for when an application has determined the levels
3008      * of objects (character sequences) and just needs to have them reordered (L2).
3009      * This is equivalent to using <code>getVisualMap()</code> on a
3010      * <code>Bidi</code> object.
3011      *
3012      * @param levels is an array of levels that have been determined by
3013      *        the application.
3014      *
3015      * @return an array of <code>levels.length</code>
3016      *        indexes which will reflect the reordering of the characters.<p>
3017      *        The index map will result in
3018      *        <code>indexMap[visualIndex]==logicalIndex</code>, where
3019      *        <code>indexMap</code> represents the returned array.
3020      *
3021      * @stable ICU 3.8
3022      */
3023     private static int[] reorderVisual(byte[] levels)
3024     {
3025         return BidiLine.reorderVisual(levels);
3026     }
3027 
3028     /**
3029      * Constant indicating that the base direction depends on the first strong
3030      * directional character in the text according to the Unicode Bidirectional
3031      * Algorithm. If no strong directional character is present, the base
3032      * direction is left-to-right.
3033      * @stable ICU 3.8
3034      */
3035     private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e;
3036 
3037     /**
3038      * Constant indicating that the base direction depends on the first strong
3039      * directional character in the text according to the Unicode Bidirectional
3040      * Algorithm. If no strong directional character is present, the base
3041      * direction is right-to-left.
3042      * @stable ICU 3.8
3043      */
3044     private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f;
3045 
3046     /**
3047      * Create Bidi from the given text, embedding, and direction information.
3048      * The embeddings array may be null. If present, the values represent
3049      * embedding level information. Negative values from -1 to -61 indicate
3050      * overrides at the absolute value of the level. Positive values from 1 to
3051      * 61 indicate embeddings. Where values are zero, the base embedding level
3052      * as determined by the base direction is assumed.<p>
3053      *
3054      * Note: this constructor calls setPara() internally.
3055      *
3056      * @param text an array containing the paragraph of text to process.
3057      * @param textStart the index into the text array of the start of the
3058      *        paragraph.
3059      * @param embeddings an array containing embedding values for each character
3060      *        in the paragraph. This can be null, in which case it is assumed
3061      *        that there is no external embedding information.
3062      * @param embStart the index into the embedding array of the start of the
3063      *        paragraph.
3064      * @param paragraphLength the length of the paragraph in the text and
3065      *        embeddings arrays.
3066      * @param flags a collection of flags that control the algorithm. The
3067      *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
3068      *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
3069      *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
3070      *
3071      * @throws IllegalArgumentException if the values in embeddings are
3072      *         not within the allowed range
3073      *
3074      * @see #DIRECTION_LEFT_TO_RIGHT
3075      * @see #DIRECTION_RIGHT_TO_LEFT
3076      * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
3077      * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
3078      * @stable ICU 3.8
3079      */
3080     public BidiBase(char[] text,
3081              int textStart,
3082              byte[] embeddings,
3083              int embStart,
3084              int paragraphLength,
3085              int flags)
3086      {
3087         this(0, 0);
3088         byte paraLvl;
3089         switch (flags) {
3090         case Bidi.DIRECTION_LEFT_TO_RIGHT:
3091         default:
3092             paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT;
3093             break;
3094         case Bidi.DIRECTION_RIGHT_TO_LEFT:
3095             paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT;
3096             break;
3097         case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
3098             paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
3099             break;
3100         case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
3101             paraLvl = INTERNAL_LEVEL_DEFAULT_RTL;
3102             break;
3103         }
3104         byte[] paraEmbeddings;
3105         if (embeddings == null) {
3106             paraEmbeddings = null;
3107         } else {
3108             paraEmbeddings = new byte[paragraphLength];
3109             byte lev;
3110             for (int i = 0; i < paragraphLength; i++) {
3111                 lev = embeddings[i + embStart];
3112                 if (lev < 0) {
3113                     lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE);
3114                 } else if (lev == 0) {
3115                     lev = paraLvl;
3116                     if (paraLvl > MAX_EXPLICIT_LEVEL) {
3117                         lev &= 1;
3118                     }
3119                 }
3120                 paraEmbeddings[i] = lev;
3121             }
3122         }
3123         if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {
3124             setPara(text, paraLvl, paraEmbeddings);
3125         } else {
3126             char[] paraText = new char[paragraphLength];
3127             System.arraycopy(text, textStart, paraText, 0, paragraphLength);
3128             setPara(paraText, paraLvl, paraEmbeddings);
3129         }
3130     }
3131 
3132     /**
3133      * Return true if the line is not left-to-right or right-to-left. This means
3134      * it either has mixed runs of left-to-right and right-to-left text, or the
3135      * base direction differs from the direction of the only run of text.
3136      *
3137      * @return true if the line is not left-to-right or right-to-left.
3138      *
3139      * @throws IllegalStateException if this call is not preceded by a successful
3140      *         call to <code>setPara</code>
3141      * @stable ICU 3.8
3142      */
3143     public boolean isMixed()
3144     {
3145         return (!isLeftToRight() && !isRightToLeft());
3146     }
3147 
3148     /**
3149     * Return true if the line is all left-to-right text and the base direction
3150      * is left-to-right.
3151      *
3152      * @return true if the line is all left-to-right text and the base direction
3153      *         is left-to-right.
3154      *
3155      * @throws IllegalStateException if this call is not preceded by a successful
3156      *         call to <code>setPara</code>
3157      * @stable ICU 3.8
3158      */
3159     public boolean isLeftToRight()
3160     {
3161         return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0);
3162     }
3163 
3164     /**
3165      * Return true if the line is all right-to-left text, and the base direction
3166      * is right-to-left
3167      *
3168      * @return true if the line is all right-to-left text, and the base
3169      *         direction is right-to-left
3170      *
3171      * @throws IllegalStateException if this call is not preceded by a successful
3172      *         call to <code>setPara</code>
3173      * @stable ICU 3.8
3174      */
3175     public boolean isRightToLeft()
3176     {
3177         return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1);
3178     }
3179 
3180     /**
3181      * Return true if the base direction is left-to-right
3182      *
3183      * @return true if the base direction is left-to-right
3184      *
3185      * @throws IllegalStateException if this call is not preceded by a successful
3186      *         call to <code>setPara</code> or <code>setLine</code>
3187      *
3188      * @stable ICU 3.8
3189      */
3190     public boolean baseIsLeftToRight()
3191     {
3192         return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT);
3193     }
3194 
3195     /**
3196      * Return the base level (0 if left-to-right, 1 if right-to-left).
3197      *
3198      * @return the base level
3199      *
3200      * @throws IllegalStateException if this call is not preceded by a successful
3201      *         call to <code>setPara</code> or <code>setLine</code>
3202      *
3203      * @stable ICU 3.8
3204      */
3205     public int getBaseLevel()
3206     {
3207         return getParaLevel();
3208     }
3209 
3210     /**
3211      * Compute the logical to visual run mapping
3212      */
3213     private void getLogicalToVisualRunsMap()
3214     {
3215         if (isGoodLogicalToVisualRunsMap) {
3216             return;
3217         }
3218         int count = countRuns();
3219         if ((logicalToVisualRunsMap == null) ||
3220             (logicalToVisualRunsMap.length < count)) {
3221             logicalToVisualRunsMap = new int[count];
3222         }
3223         int i;
3224         long[] keys = new long[count];
3225         for (i = 0; i < count; i++) {
3226             keys[i] = ((long)(runs[i].start)<<32) + i;
3227         }
3228         Arrays.sort(keys);
3229         for (i = 0; i < count; i++) {
3230             logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
3231         }
3232         keys = null;
3233         isGoodLogicalToVisualRunsMap = true;
3234     }
3235 
3236     /**
3237      * Return the level of the nth logical run in this line.
3238      *
3239      * @param run the index of the run, between 0 and <code>countRuns()-1</code>
3240      *
3241      * @return the level of the run
3242      *
3243      * @throws IllegalStateException if this call is not preceded by a successful
3244      *         call to <code>setPara</code> or <code>setLine</code>
3245      * @throws IllegalArgumentException if <code>run</code> is not in
3246      *         the range <code>0&lt;=run&lt;countRuns()</code>
3247      * @stable ICU 3.8
3248      */
3249     public int getRunLevel(int run)
3250     {
3251         verifyValidParaOrLine();
3252         BidiLine.getRuns(this);
3253         if (run < 0 || run >= runCount) {
3254             return getParaLevel();
3255         }
3256         getLogicalToVisualRunsMap();
3257         return runs[logicalToVisualRunsMap[run]].level;
3258     }
3259 
3260     /**
3261      * Return the index of the character at the start of the nth logical run in
3262      * this line, as an offset from the start of the line.
3263      *
3264      * @param run the index of the run, between 0 and <code>countRuns()</code>
3265      *
3266      * @return the start of the run
3267      *
3268      * @throws IllegalStateException if this call is not preceded by a successful
3269      *         call to <code>setPara</code> or <code>setLine</code>
3270      * @throws IllegalArgumentException if <code>run</code> is not in
3271      *         the range <code>0&lt;=run&lt;countRuns()</code>
3272      * @stable ICU 3.8
3273      */
3274     public int getRunStart(int run)
3275     {
3276         verifyValidParaOrLine();
3277         BidiLine.getRuns(this);
3278         if (runCount == 1) {
3279             return 0;
3280         } else if (run == runCount) {
3281             return length;
3282         }
3283         verifyIndex(run, 0, runCount);
3284         getLogicalToVisualRunsMap();
3285         return runs[logicalToVisualRunsMap[run]].start;
3286     }
3287 
3288     /**
3289      * Return the index of the character past the end of the nth logical run in
3290      * this line, as an offset from the start of the line. For example, this
3291      * will return the length of the line for the last run on the line.
3292      *
3293      * @param run the index of the run, between 0 and <code>countRuns()</code>
3294      *
3295      * @return the limit of the run
3296      *
3297      * @throws IllegalStateException if this call is not preceded by a successful
3298      *         call to <code>setPara</code> or <code>setLine</code>
3299      * @throws IllegalArgumentException if <code>run</code> is not in
3300      *         the range <code>0&lt;=run&lt;countRuns()</code>
3301      * @stable ICU 3.8
3302      */
3303     public int getRunLimit(int run)
3304     {
3305         verifyValidParaOrLine();
3306         BidiLine.getRuns(this);
3307         if (runCount == 1) {
3308             return length;
3309         }
3310         verifyIndex(run, 0, runCount);
3311         getLogicalToVisualRunsMap();
3312         int idx = logicalToVisualRunsMap[run];
3313         int len = idx == 0 ? runs[idx].limit :
3314                                 runs[idx].limit - runs[idx-1].limit;
3315         return runs[idx].start + len;
3316     }
3317 
3318     /**
3319      * Return true if the specified text requires bidi analysis. If this returns
3320      * false, the text will display left-to-right. Clients can then avoid
3321      * constructing a Bidi object. Text in the Arabic Presentation Forms area of
3322      * Unicode is presumed to already be shaped and ordered for display, and so
3323      * will not cause this method to return true.
3324      *
3325      * @param text the text containing the characters to test
3326      * @param start the start of the range of characters to test
3327      * @param limit the limit of the range of characters to test
3328      *
3329      * @return true if the range of characters requires bidi analysis
3330      *
3331      * @stable ICU 3.8
3332      */
3333     public static boolean requiresBidi(char[] text,
3334             int start,
3335             int limit)
3336     {
3337         final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT |
3338                 1 << AL |
3339                 1 << RLE |
3340                 1 << RLO |
3341                 1 << AN);
3342 
3343         if (0 > start || start > limit || limit > text.length) {
3344             throw new IllegalArgumentException("Value start " + start +
3345                       " is out of range 0 to " + limit);
3346         }
3347         for (int i = start; i < limit; ++i) {
3348             if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
3349                 Character.isLowSurrogate(text[i+1])) {
3350                 if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
3351                     return true;
3352                 }
3353             } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
3354                 return true;
3355             }
3356         }
3357         return false;
3358     }
3359 
3360     /**
3361      * Reorder the objects in the array into visual order based on their levels.
3362      * This is a utility method to use when you have a collection of objects
3363      * representing runs of text in logical order, each run containing text at a
3364      * single level. The elements at <code>index</code> from
3365      * <code>objectStart</code> up to <code>objectStart + count</code> in the
3366      * objects array will be reordered into visual order assuming
3367      * each run of text has the level indicated by the corresponding element in
3368      * the levels array (at <code>index - objectStart + levelStart</code>).
3369      *
3370      * @param levels an array representing the bidi level of each object
3371      * @param levelStart the start position in the levels array
3372      * @param objects the array of objects to be reordered into visual order
3373      * @param objectStart the start position in the objects array
3374      * @param count the number of objects to reorder
3375      * @stable ICU 3.8
3376      */
3377     public static void reorderVisually(byte[] levels,
3378             int levelStart,
3379             Object[] objects,
3380             int objectStart,
3381             int count)
3382     {
3383         if (0 > levelStart || levels.length <= levelStart) {
3384             throw new IllegalArgumentException("Value levelStart " +
3385                       levelStart + " is out of range 0 to " +
3386                       (levels.length-1));
3387         }
3388         if (0 > objectStart || objects.length <= objectStart) {
3389             throw new IllegalArgumentException("Value objectStart " +
3390                       levelStart + " is out of range 0 to " +
3391                       (objects.length-1));
3392         }
3393         if (0 > count || objects.length < (objectStart+count)) {
3394             throw new IllegalArgumentException("Value count " +
3395                       levelStart + " is out of range 0 to " +
3396                       (objects.length - objectStart));
3397         }
3398         byte[] reorderLevels = new byte[count];
3399         System.arraycopy(levels, levelStart, reorderLevels, 0, count);
3400         int[] indexMap = reorderVisual(reorderLevels);
3401         Object[] temp = new Object[count];
3402         System.arraycopy(objects, objectStart, temp, 0, count);
3403         for (int i = 0; i < count; ++i) {
3404             objects[objectStart + i] = temp[indexMap[i]];
3405         }
3406     }
3407 
3408     /**
3409      * Display the bidi internal state, used in debugging.
3410      */
3411     public String toString() {
3412         StringBuilder buf = new StringBuilder(getClass().getName());
3413 
3414         buf.append("[dir: ");
3415         buf.append(direction);
3416         buf.append(" baselevel: ");
3417         buf.append(paraLevel);
3418         buf.append(" length: ");
3419         buf.append(length);
3420         buf.append(" runs: ");
3421         if (levels == null) {
3422             buf.append("none");
3423         } else {
3424             buf.append('[');
3425             buf.append(levels[0]);
3426             for (int i = 1; i < levels.length; i++) {
3427                 buf.append(' ');
3428                 buf.append(levels[i]);
3429             }
3430             buf.append(']');
3431         }
3432         buf.append(" text: [0x");
3433         buf.append(Integer.toHexString(text[0]));
3434         for (int i = 1; i < text.length; i++) {
3435             buf.append(" 0x");
3436             buf.append(Integer.toHexString(text[i]));
3437         }
3438         buf.append("]]");
3439 
3440         return buf.toString();
3441     }
3442 
3443     /**
3444      * A class that provides access to constants defined by
3445      * java.awt.font.TextAttribute without creating a static dependency.
3446      */
3447     private static class TextAttributeConstants {
3448         // Make sure to load the AWT's TextAttribute class before using the constants, if any.
3449         static {
3450             try {
3451                 Class.forName("java.awt.font.TextAttribute", true, null);
3452             } catch (ClassNotFoundException e) {}
3453         }
3454         static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
3455 
3456         /**
3457          * TextAttribute instances (or a fake Attribute type if
3458          * java.awt.font.TextAttribute is not present)
3459          */
3460         static final AttributedCharacterIterator.Attribute RUN_DIRECTION =
3461             getTextAttribute("RUN_DIRECTION");
3462         static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING =
3463             getTextAttribute("NUMERIC_SHAPING");
3464         static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING =
3465             getTextAttribute("BIDI_EMBEDDING");
3466 
3467         /**
3468          * TextAttribute.RUN_DIRECTION_LTR
3469          */
3470         static final Boolean RUN_DIRECTION_LTR = (jafa == null) ?
3471             Boolean.FALSE : (Boolean)jafa.getTextAttributeConstant("RUN_DIRECTION_LTR");
3472 
3473         @SuppressWarnings("serial")
3474         private static AttributedCharacterIterator.Attribute
3475             getTextAttribute(String name)
3476         {
3477             if (jafa == null) {
3478                 // fake attribute
3479                 return new AttributedCharacterIterator.Attribute(name) { };
3480             } else {
3481                 return (AttributedCharacterIterator.Attribute)jafa.getTextAttributeConstant(name);
3482             }
3483         }
3484     }
3485 
3486     /**
3487      * A class that provides access to java.awt.font.NumericShaper without
3488      * creating a static dependency.
3489      */
3490     private static class NumericShapings {
3491         // Make sure to load the AWT's NumericShaper class before calling shape, if any.
3492         static {
3493             try {
3494                 Class.forName("java.awt.font.NumericShaper", true, null);
3495             } catch (ClassNotFoundException e) {}
3496         }
3497         static final JavaAWTFontAccess jafa = SharedSecrets.getJavaAWTFontAccess();
3498 
3499         /**
3500          * Invokes NumericShaping shape(text,start,count) method.
3501          */
3502         static void shape(Object shaper, char[] text, int start, int count) {
3503             if (jafa != null) {
3504                 jafa.shape(shaper, text, start, count);
3505             }
3506         }
3507     }
3508 }