< prev index next >
jdk/src/java.base/share/classes/sun/text/bidi/BidiBase.java
Print this page
*** 1,7 ****
/*
! * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
--- 1,7 ----
/*
! * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
*** 20,40 ****
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
! *******************************************************************************
! * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
! * *
! * The original version of this source code and documentation is copyrighted *
! * and owned by IBM, These materials are provided under terms of a License *
! * Agreement between IBM and Sun. This technology is protected by multiple *
! * US and International patents. This notice and attribution to IBM may not *
! * to removed. *
! *******************************************************************************
! */
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
* concept of RUNS_ONLY which is a double operation.
* It could be advantageous to divide this into 3 concepts:
--- 20,36 ----
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
+
/*
! *******************************************************************************
! * Copyright (C) 2001-2014, International Business Machines
! * Corporation and others. All Rights Reserved.
! *******************************************************************************
! */
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
* concept of RUNS_ONLY which is a double operation.
* It could be advantageous to divide this into 3 concepts:
*** 50,79 ****
* fallbacks for unsupported combinations.
*/
package sun.text.bidi;
- import java.io.IOException;
import java.lang.reflect.Array;
import java.text.AttributedCharacterIterator;
import java.text.Bidi;
import java.util.Arrays;
- import java.util.MissingResourceException;
import sun.misc.JavaAWTFontAccess;
import sun.misc.SharedSecrets;
import sun.text.normalizer.UBiDiProps;
import sun.text.normalizer.UCharacter;
import sun.text.normalizer.UTF16;
/**
*
* <h2>Bidi algorithm for ICU</h2>
*
! * This is an implementation of the Unicode Bidirectional algorithm. The
* algorithm is defined in the <a
! * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
! * version 13, also described in The Unicode Standard, Version 4.0 .
* <p>
*
* Note: Libraries that perform a bidirectional algorithm and reorder strings
* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
* shaping (ArabicShaping) classes can be used at the core of such "Storage
--- 46,72 ----
* fallbacks for unsupported combinations.
*/
package sun.text.bidi;
import java.lang.reflect.Array;
import java.text.AttributedCharacterIterator;
import java.text.Bidi;
import java.util.Arrays;
import sun.misc.JavaAWTFontAccess;
import sun.misc.SharedSecrets;
import sun.text.normalizer.UBiDiProps;
import sun.text.normalizer.UCharacter;
import sun.text.normalizer.UTF16;
/**
*
* <h2>Bidi algorithm for ICU</h2>
*
! * This is an implementation of the Unicode Bidirectional Algorithm. The
* algorithm is defined in the <a
! * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.
* <p>
*
* Note: Libraries that perform a bidirectional algorithm and reorder strings
* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
* shaping (ArabicShaping) classes can be used at the core of such "Storage
*** 104,113 ****
--- 97,107 ----
* The direction of a piece of text may be:
* <ul>
* <li>{@link #LTR}
* <li>{@link #RTL}
* <li>{@link #MIXED}
+ * <li>{@link #NEUTRAL}
* </ul>
*
* <h3>Basic concept: levels</h3>
*
* Levels in this API represent embedding levels according to the Unicode
*** 165,174 ****
--- 159,169 ----
* <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
* </ul>
*
* <h3>Basic concept: Reordering Options</h3>
* Reordering options can be applied during Bidi text transformations.
+ *
* <p><b>See Also:</b>
* <ul>
* <li>{@link #setReorderingOptions}
* <li>{@link #OPTION_DEFAULT}
* <li>{@link #OPTION_INSERT_MARKS}
*** 454,476 ****
* }
*
* }</pre>
*/
public class BidiBase {
! class Point {
int pos; /* position in text */
int flag; /* flag for LRM/RLM, before/after */
}
! class InsertPoints {
int size;
int confirmed;
Point[] points = new Point[0];
}
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present,
--- 449,586 ----
* }
*
* }</pre>
*/
+ /*
+ * General implementation notes:
+ *
+ * Throughout the implementation, there are comments like (W2) that refer to
+ * rules of the BiDi algorithm, in this example to the second rule of the
+ * resolution of weak types.
+ *
+ * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
+ * character according to UTF-16, the second UChar gets the directional property of
+ * the entire character assigned, while the first one gets a BN, a boundary
+ * neutral, type, which is ignored by most of the algorithm according to
+ * rule (X9) and the implementation suggestions of the BiDi algorithm.
+ *
+ * Later, adjustWSLevels() will set the level for each BN to that of the
+ * following character (UChar), which results in surrogate pairs getting the
+ * same level on each of their surrogates.
+ *
+ * In a UTF-8 implementation, the same thing could be done: the last byte of
+ * a multi-byte sequence would get the "real" property, while all previous
+ * bytes of that sequence would get BN.
+ *
+ * It is not possible to assign all those parts of a character the same real
+ * property because this would fail in the resolution of weak types with rules
+ * that look at immediately surrounding types.
+ *
+ * As a related topic, this implementation does not remove Boundary Neutral
+ * types from the input, but ignores them wherever this is relevant.
+ * For example, the loop for the resolution of the weak types reads
+ * types until it finds a non-BN.
+ * Also, explicit embedding codes are neither changed into BN nor removed.
+ * They are only treated the same way real BNs are.
+ * As stated before, adjustWSLevels() takes care of them at the end.
+ * For the purpose of conformance, the levels of all these codes
+ * do not matter.
+ *
+ * Note that this implementation modifies the dirProps
+ * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
+ * X6, N0 (replace paired brackets by L or R).
+ *
+ * In this implementation, the resolution of weak types (W1 to W6),
+ * neutrals (N1 and N2), and the assignment of the resolved level (In)
+ * are all done in one single loop, in resolveImplicitLevels().
+ * Changes of dirProp values are done on the fly, without writing
+ * them back to the dirProps array.
+ *
+ *
+ * This implementation contains code that allows to bypass steps of the
+ * algorithm that are not needed on the specific paragraph
+ * in order to speed up the most common cases considerably,
+ * like text that is entirely LTR, or RTL text without numbers.
+ *
+ * Most of this is done by setting a bit for each directional property
+ * in a flags variable and later checking for whether there are
+ * any LTR characters or any RTL characters, or both, whether
+ * there are any explicit embedding codes, etc.
+ *
+ * If the (Xn) steps are performed, then the flags are re-evaluated,
+ * because they will then not contain the embedding codes any more
+ * and will be adjusted for override codes, so that subsequently
+ * more bypassing may be possible than what the initial flags suggested.
+ *
+ * If the text is not mixed-directional, then the
+ * algorithm steps for the weak type resolution are not performed,
+ * and all levels are set to the paragraph level.
+ *
+ * If there are no explicit embedding codes, then the (Xn) steps
+ * are not performed.
+ *
+ * If embedding levels are supplied as a parameter, then all
+ * explicit embedding codes are ignored, and the (Xn) steps
+ * are not performed.
+ *
+ * White Space types could get the level of the run they belong to,
+ * and are checked with a test of (flags&MASK_EMBEDDING) to
+ * consider if the paragraph direction should be considered in
+ * the flags variable.
+ *
+ * If there are no White Space types in the paragraph, then
+ * (L1) is not necessary in adjustWSLevels().
+ */
+
public class BidiBase {
! static class Point {
int pos; /* position in text */
int flag; /* flag for LRM/RLM, before/after */
}
! static class InsertPoints {
int size;
int confirmed;
Point[] points = new Point[0];
}
+ static class Opening {
+ int position; /* position of opening bracket */
+ int match; /* matching char or -position of closing bracket */
+ int contextPos; /* position of last strong char found before opening */
+ short flags; /* bits for L or R/AL found within the pair */
+ byte contextDir; /* L or R according to last strong char before opening */
+ }
+
+ static class IsoRun {
+ int contextPos; /* position of char determining context */
+ short start; /* index of first opening entry for this run */
+ short limit; /* index after last opening entry for this run */
+ byte level; /* level of this run */
+ byte lastStrong; /* bidi class of last strong char found in this run */
+ byte lastBase; /* bidi class of last base char found in this run */
+ byte contextDir; /* L or R to use as context for following openings */
+ }
+
+ static class BracketData {
+ Opening[] openings = new Opening[SIMPLE_PARAS_COUNT];
+ int isoRunLast; /* index of last used entry */
+ /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
+ + 1 for index 0, + 1 for before the first isolated sequence */
+ IsoRun[] isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
+ boolean isNumbersSpecial; /*reordering mode for NUMBERS_SPECIAL */
+ }
+
+ static class Isolate {
+ int startON;
+ int start1;
+ short stateImp;
+ short state;
+ }
+
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present,
*** 480,501 ****
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
! * the righmost or leftmost strong character of the source text is RTL
* or Arabic Letter, the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e;
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
--- 590,611 ----
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
! * the rightmost or leftmost strong character of the source text is RTL
* or Arabic Letter, the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
* (that the result string, when reordered back to visual, will produce
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
/** Paragraph level setting<p>
*
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
*** 506,516 ****
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
! * the righmost or leftmost strong character of the source text is RTL
* or Arabic Letter, or if the text contains no strong character;
* the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
--- 616,626 ----
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
* is assumed to be visual LTR, and the text after reordering is required
* to be the corresponding logical string with appropriate contextual
* direction. The direction of the result string will be RTL if either
! * the rightmost or leftmost strong character of the source text is RTL
* or Arabic Letter, or if the text contains no strong character;
* the direction will be LTR otherwise.<p>
*
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
* be added at the beginning of the result string to ensure round trip
*** 518,542 ****
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f;
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 3.8
*/
! public static final byte MAX_EXPLICIT_LEVEL = 61;
/**
* Bit flag for level input.
* Overrides directional properties.
* @stable ICU 3.8
*/
! public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80;
/**
* Special value which can be returned by the mapping methods when a
* logical index has no corresponding visual index or vice-versa. This may
* happen for the logical-to-visual mapping of a Bidi control when option
--- 628,652 ----
* the original source text).
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 3.8
*/
! public static final byte MAX_EXPLICIT_LEVEL = 125;
/**
* Bit flag for level input.
* Overrides directional properties.
* @stable ICU 3.8
*/
! public static final byte LEVEL_OVERRIDE = (byte)0x80;
/**
* Special value which can be returned by the mapping methods when a
* logical index has no corresponding visual index or vice-versa. This may
* happen for the logical-to-visual mapping of a Bidi control when option
*** 553,577 ****
--- 663,771 ----
* @stable ICU 3.8
*/
public static final int MAP_NOWHERE = -1;
/**
+ * Left-to-right text.
+ * <ul>
+ * <li>As return value for <code>getDirection()</code>, it means
+ * that the source string contains no right-to-left characters, or
+ * that the source string is empty and the paragraph level is even.
+ * <li>As return value for <code>getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a left-to-right direction.
+ * </ul>
+ * @stable ICU 3.8
+ */
+ public static final byte LTR = 0;
+
+ /**
+ * Right-to-left text.
+ * <ul>
+ * <li>As return value for <code>getDirection()</code>, it means
+ * that the source string contains no left-to-right characters, or
+ * that the source string is empty and the paragraph level is odd.
+ * <li>As return value for <code>getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a right-to-left direction.
+ * </ul>
+ * @stable ICU 3.8
+ */
+ public static final byte RTL = 1;
+
+ /**
* Mixed-directional text.
+ * <p>As return value for <code>getDirection()</code>, it means
+ * that the source string contains both left-to-right and
+ * right-to-left characters.
* @stable ICU 3.8
*/
public static final byte MIXED = 2;
/**
* option bit for writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short KEEP_BASE_COMBINING = 1;
+
+ /**
+ * option bit for writeReordered():
* replace characters with the "mirrored" property in RTL runs
* by their mirror-image mappings
*
* @see #writeReordered
* @stable ICU 3.8
*/
public static final short DO_MIRRORING = 2;
+ /**
+ * option bit for writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see #setInverse
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short INSERT_LRM_FOR_NUMERIC = 4;
+
+ /**
+ * option bit for writeReordered():
+ * remove Bidi control characters
+ * (this does not affect INSERT_LRM_FOR_NUMERIC)
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see #writeReordered
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @stable ICU 3.8
+ */
+ public static final short REMOVE_BIDI_CONTROLS = 8;
+
+ /**
+ * option bit for writeReordered():
+ * write the output in reverse order
+ *
+ * <p>This has the same effect as calling <code>writeReordered()</code>
+ * first without this option, and then calling
+ * <code>writeReverse()</code> without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.</p>
+ *
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short OUTPUT_REVERSE = 16;
+
/** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
* @see #setReorderingMode
* @stable ICU 3.8
*/
private static final short REORDER_DEFAULT = 0;
*** 598,630 ****
* minimum combination which has the required display.
* @see #OPTION_INSERT_MARKS
* @see #setReorderingMode
* @stable ICU 3.8
*/
! private static final short REORDER_RUNS_ONLY = 3;
/** Reordering mode: Visual to Logical algorithm which handles numbers
* like L (same algorithm as selected by <code>setInverse(true)</code>.
* @see #setInverse
* @see #setReorderingMode
* @stable ICU 3.8
*/
! private static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
/** Reordering mode: Visual to Logical algorithm equivalent to the regular
* Logical to Visual algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
! private static final short REORDER_INVERSE_LIKE_DIRECT = 5;
/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
! private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
/* Reordering mode values must be ordered so that all the regular logical to
* visual modes come first, and all inverse Bidi modes come last.
*/
private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
--- 792,824 ----
* minimum combination which has the required display.
* @see #OPTION_INSERT_MARKS
* @see #setReorderingMode
* @stable ICU 3.8
*/
! static final short REORDER_RUNS_ONLY = 3;
/** Reordering mode: Visual to Logical algorithm which handles numbers
* like L (same algorithm as selected by <code>setInverse(true)</code>.
* @see #setInverse
* @see #setReorderingMode
* @stable ICU 3.8
*/
! static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
/** Reordering mode: Visual to Logical algorithm equivalent to the regular
* Logical to Visual algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
! static final short REORDER_INVERSE_LIKE_DIRECT = 5;
/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
* @see #setReorderingMode
* @stable ICU 3.8
*/
! static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
/* Reordering mode values must be ordered so that all the regular logical to
* visual modes come first, and all inverse Bidi modes come last.
*/
private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
*** 680,690 ****
* @see #REORDER_INVERSE_NUMBERS_AS_L
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! private static final int OPTION_INSERT_MARKS = 1;
/**
* Option bit for <code>setReorderingOptions</code>:
* remove Bidi control characters
*
--- 874,884 ----
* @see #REORDER_INVERSE_NUMBERS_AS_L
* @see #REORDER_INVERSE_LIKE_DIRECT
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
* @stable ICU 3.8
*/
! static final int OPTION_INSERT_MARKS = 1;
/**
* Option bit for <code>setReorderingOptions</code>:
* remove Bidi control characters
*
*** 702,712 ****
* @see #OPTION_INSERT_MARKS
* @see #INSERT_LRM_FOR_NUMERIC
* @see #REMOVE_BIDI_CONTROLS
* @stable ICU 3.8
*/
! private static final int OPTION_REMOVE_CONTROLS = 2;
/**
* Option bit for <code>setReorderingOptions</code>:
* process the output as part of a stream to be continued
*
--- 896,906 ----
* @see #OPTION_INSERT_MARKS
* @see #INSERT_LRM_FOR_NUMERIC
* @see #REMOVE_BIDI_CONTROLS
* @stable ICU 3.8
*/
! static final int OPTION_REMOVE_CONTROLS = 2;
/**
* Option bit for <code>setReorderingOptions</code>:
* process the output as part of a stream to be continued
*
*** 739,768 ****
* <code>OPTION_STREAMING</code>.</li></ul>
* In all cases, this option should be turned off before processing the last
* part of the text.</p>
*
* <p>When the <code>OPTION_STREAMING</code> option is used, it is
! * recommended to call <code>orderParagraphsLTR()</code> with argument
! * <code>orderParagraphsLTR</code> set to <code>true</code> before calling
* <code>setPara()</code> so that later paragraphs may be concatenated to
* previous paragraphs on the right.
* </p>
*
* @see #setReorderingMode
* @see #setReorderingOptions
* @see #getProcessedLength
- * @see #orderParagraphsLTR
* @stable ICU 3.8
*/
private static final int OPTION_STREAMING = 4;
/*
* Comparing the description of the Bidi algorithm with this implementation
* is easier with the same names for the Bidi types in the code as there.
* See UCharacterDirection
*/
! private static final byte L = 0;
private static final byte R = 1;
private static final byte EN = 2;
private static final byte ES = 3;
private static final byte ET = 4;
private static final byte AN = 5;
--- 933,960 ----
* <code>OPTION_STREAMING</code>.</li></ul>
* In all cases, this option should be turned off before processing the last
* part of the text.</p>
*
* <p>When the <code>OPTION_STREAMING</code> option is used, it is
! * recommended to call <code>orderParagraphsLTR(true)</code> before calling
* <code>setPara()</code> so that later paragraphs may be concatenated to
* previous paragraphs on the right.
* </p>
*
* @see #setReorderingMode
* @see #setReorderingOptions
* @see #getProcessedLength
* @stable ICU 3.8
*/
private static final int OPTION_STREAMING = 4;
/*
* Comparing the description of the Bidi algorithm with this implementation
* is easier with the same names for the Bidi types in the code as there.
* See UCharacterDirection
*/
! /* private */ static final byte L = 0;
private static final byte R = 1;
private static final byte EN = 2;
private static final byte ES = 3;
private static final byte ET = 4;
private static final byte AN = 5;
*** 777,797 ****
private static final byte RLE = 14;
private static final byte RLO = 15;
private static final byte PDF = 16;
private static final byte NSM = 17;
private static final byte BN = 18;
! private static final int MASK_R_AL = (1 << R | 1 << AL);
private static final char CR = '\r';
private static final char LF = '\n';
static final int LRM_BEFORE = 1;
static final int LRM_AFTER = 2;
static final int RLM_BEFORE = 4;
static final int RLM_AFTER = 8;
/*
* reference to parent paragraph object (reference to self if this object is
* a paragraph object); set to null in a newly opened object; set to a
* real value after a successful execution of setPara or setLine
*/
--- 969,1046 ----
private static final byte RLE = 14;
private static final byte RLO = 15;
private static final byte PDF = 16;
private static final byte NSM = 17;
private static final byte BN = 18;
+ private static final byte FSI = 19;
+ private static final byte LRI = 20;
+ private static final byte RLI = 21;
+ private static final byte PDI = 22;
+ private static final byte ENL = PDI + 1; /* EN after W7 */
+ private static final byte ENR = ENL + 1; /* EN not subject to W7 */
! // Number of directional types
! private static final int CHAR_DIRECTION_COUNT = 23;
!
! /**
! * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
! * Used in UAX #9: Unicode Bidirectional Algorithm
! * (http://www.unicode.org/reports/tr9/)
! * Returns UCharacter.BidiPairedBracketType values.
! * @stable ICU 52
! */
! public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
!
! /**
! * Bidi Paired Bracket Type constants.
! *
! * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
! * @stable ICU 52
! */
! public static interface BidiPairedBracketType {
! /**
! * Not a paired bracket.
! * @stable ICU 52
! */
! public static final int NONE = 0;
! /**
! * Open paired bracket.
! * @stable ICU 52
! */
! public static final int OPEN = 1;
! /**
! * Close paired bracket.
! * @stable ICU 52
! */
! public static final int CLOSE = 2;
! /**
! * @stable ICU 52
! */
! public static final int COUNT = 3;
! }
!
! /* number of paras entries allocated initially */
! static final int SIMPLE_PARAS_COUNT = 10;
private static final char CR = '\r';
private static final char LF = '\n';
static final int LRM_BEFORE = 1;
static final int LRM_AFTER = 2;
static final int RLM_BEFORE = 4;
static final int RLM_AFTER = 8;
+ /* flags for Opening.flags */
+ static final byte FOUND_L = (byte)DirPropFlag(L);
+ static final byte FOUND_R = (byte)DirPropFlag(R);
+
+ /*
+ * The following bit is used for the directional isolate status.
+ * Stack entries corresponding to isolate sequences are greater than ISOLATE.
+ */
+ static final int ISOLATE = 0x0100;
+
/*
* reference to parent paragraph object (reference to self if this object is
* a paragraph object); set to null in a newly opened object; set to a
* real value after a successful execution of setPara or setLine
*/
*** 826,835 ****
--- 1075,1093 ----
byte[] dirPropsMemory = new byte[1];
byte[] levelsMemory = new byte[1];
byte[] dirProps;
byte[] levels;
+ /* are we performing an approximation of the "inverse Bidi" algorithm? */
+ boolean isInverse;
+
+ /* are we using the basic algorithm or its variation? */
+ int reorderingMode;
+
+ /* bitmask for reordering options */
+ int reorderingOptions;
+
/* must block separators receive level 0? */
boolean orderParagraphsLTR;
/* the paragraph level */
byte paraLevel;
*** 853,882 ****
/* characters after trailingWSStart are WS and are */
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
int trailingWSStart;
! /* fields for paragraph handling */
! int paraCount; /* set in getDirProps() */
! int[] parasMemory = new int[1];
! int[] paras; /* limits of paragraphs, filled in
! ResolveExplicitLevels() or CheckExplicitLevels() */
!
! /* for single paragraph text, we only need a tiny array of paras (no allocation) */
! int[] simpleParas = {0};
/* fields for line reordering */
int runCount; /* ==-1: runs not set up yet */
BidiRun[] runsMemory = new BidiRun[0];
BidiRun[] runs;
/* for non-mixed text, we only need a tiny array of runs (no allocation) */
BidiRun[] simpleRuns = {new BidiRun()};
/* mapping of runs in logical order to visual order */
int[] logicalToVisualRunsMap;
-
/* flag to indicate that the map has been updated */
boolean isGoodLogicalToVisualRunsMap;
/* for inverse Bidi with insertion of directional marks */
InsertPoints insertPoints = new InsertPoints();
--- 1111,1145 ----
/* characters after trailingWSStart are WS and are */
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
int trailingWSStart;
! /* fields for paragraph handling, set in getDirProps() */
! int paraCount;
! int[] paras_limit = new int[SIMPLE_PARAS_COUNT];
! byte[] paras_level = new byte[SIMPLE_PARAS_COUNT];
/* fields for line reordering */
int runCount; /* ==-1: runs not set up yet */
BidiRun[] runsMemory = new BidiRun[0];
BidiRun[] runs;
/* for non-mixed text, we only need a tiny array of runs (no allocation) */
BidiRun[] simpleRuns = {new BidiRun()};
+ /* fields for managing isolate sequences */
+ Isolate[] isolates;
+
+ /* maximum or current nesting depth of isolate sequences */
+ /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
+ nesting encountered.
+ Within resolveImplicitLevels(), this is the index of the current isolates
+ stack entry. */
+ int isolateCount;
+
/* mapping of runs in logical order to visual order */
int[] logicalToVisualRunsMap;
/* flag to indicate that the map has been updated */
boolean isGoodLogicalToVisualRunsMap;
/* for inverse Bidi with insertion of directional marks */
InsertPoints insertPoints = new InsertPoints();
*** 892,918 ****
*/
static int DirPropFlag(byte dir) {
return (1 << dir);
}
! /*
! * The following bit is ORed to the property of characters in paragraphs
! * with contextual RTL direction when paraLevel is contextual.
! */
! static final byte CONTEXT_RTL_SHIFT = 6;
! static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40
! static byte NoContextRTL(byte dir)
! {
! return (byte)(dir & ~CONTEXT_RTL);
! }
!
! /*
! * The following is a variant of DirProp.DirPropFlag() which ignores the
! * CONTEXT_RTL bit.
! */
! static int DirPropFlagNC(byte dir) {
! return (1<<(dir & ~CONTEXT_RTL));
}
static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
/* to avoid some conditional statements, use tiny constant arrays */
--- 1155,1166 ----
*/
static int DirPropFlag(byte dir) {
return (1 << dir);
}
! boolean testDirPropFlagAt(int flag, int index) {
! return ((DirPropFlag(dirProps[index]) & flag) != 0);
}
static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
/* to avoid some conditional statements, use tiny constant arrays */
*** 921,964 ****
static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
! /*
! * are there any characters that are LTR?
! */
static final int MASK_LTR =
! DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO);
! /*
! * are there any characters that are RTL?
! */
! static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO);
/* explicit embedding codes */
! private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO);
! private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO);
! private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF);
private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
/* paragraph and segment separators */
private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
/* all types that are counted as White Space or Neutral in some steps */
! static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT;
! private static final int MASK_N = DirPropFlag(ON)|MASK_WS;
/* types that are neutrals or could becomes neutrals in (Wn) */
! private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N;
/*
* These types may be changed to "e",
* the embedding type (L or R) of the run,
* in the Bidi algorithm (N2)
*/
! static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
/*
* the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
*/
private static byte GetLRFromLevel(byte level)
--- 1169,1210 ----
static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
+ static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
+ static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
! /* are there any characters that are LTR or RTL? */
static final int MASK_LTR =
! DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
! static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
! static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
/* explicit embedding codes */
! private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
+ /* explicit isolate codes */
+ private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
+
/* paragraph and segment separators */
private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
/* all types that are counted as White Space or Neutral in some steps */
! static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO;
/* types that are neutrals or could becomes neutrals in (Wn) */
! private static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS;
/*
* These types may be changed to "e",
* the embedding type (L or R) of the run,
* in the Bidi algorithm (N2)
*/
! private static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
/*
* the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
*/
private static byte GetLRFromLevel(byte level)
*** 966,999 ****
return (byte)(level & 1);
}
private static boolean IsDefaultLevel(byte level)
{
! return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR);
! }
!
! byte GetParaLevelAt(int index)
! {
! return (defaultParaLevel != 0) ?
! (byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel;
}
static boolean IsBidiControlChar(int c)
{
/* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
! return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
}
! public void verifyValidPara()
{
! if (this != this.paraBidi) {
! throw new IllegalStateException("");
}
}
! public void verifyValidParaOrLine()
{
BidiBase para = this.paraBidi;
/* verify Para */
if (this == para) {
return;
--- 1212,1240 ----
return (byte)(level & 1);
}
private static boolean IsDefaultLevel(byte level)
{
! return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR);
}
static boolean IsBidiControlChar(int c)
{
/* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
! return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))
! || ((c >= 0x2066) && (c <= 0x2069)));
}
! void verifyValidPara()
{
! if (!(this == this.paraBidi)) {
! throw new IllegalStateException();
}
}
! void verifyValidParaOrLine()
{
BidiBase para = this.paraBidi;
/* verify Para */
if (this == para) {
return;
*** 1002,1027 ****
if ((para == null) || (para != para.paraBidi)) {
throw new IllegalStateException();
}
}
! public void verifyRange(int index, int start, int limit)
{
if (index < start || index >= limit) {
throw new IllegalArgumentException("Value " + index +
" is out of range " + start + " to " + limit);
}
}
- public void verifyIndex(int index, int start, int limit)
- {
- if (index < start || index >= limit) {
- throw new ArrayIndexOutOfBoundsException("Index " + index +
- " is out of range " + start + " to " + limit);
- }
- }
-
/**
* Allocate a <code>Bidi</code> object with preallocated memory
* for internal structures.
* This method provides a <code>Bidi</code> object like the default constructor
* but it also preallocates memory for internal structures
--- 1243,1260 ----
if ((para == null) || (para != para.paraBidi)) {
throw new IllegalStateException();
}
}
! void verifyRange(int index, int start, int limit)
{
if (index < start || index >= limit) {
throw new IllegalArgumentException("Value " + index +
" is out of range " + start + " to " + limit);
}
}
/**
* Allocate a <code>Bidi</code> object with preallocated memory
* for internal structures.
* This method provides a <code>Bidi</code> object like the default constructor
* but it also preallocates memory for internal structures
*** 1073,1088 ****
paraLevel = 0;
defaultParaLevel = 0;
direction = 0;
*/
/* get Bidi properties */
! try {
! bdp = UBiDiProps.getSingleton();
! }
! catch (IOException e) {
! throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
! }
/* allocate memory for arrays as requested */
if (maxLength > 0) {
getInitialDirPropsMemory(maxLength);
getInitialLevelsMemory(maxLength);
--- 1306,1316 ----
paraLevel = 0;
defaultParaLevel = 0;
direction = 0;
*/
/* get Bidi properties */
! bdp = UBiDiProps.INSTANCE;
/* allocate memory for arrays as requested */
if (maxLength > 0) {
getInitialDirPropsMemory(maxLength);
getInitialLevelsMemory(maxLength);
*** 1178,1232 ****
private void getInitialLevelsMemory(int len)
{
getLevelsMemory(true, len);
}
- private void getInitialParasMemory(int len)
- {
- Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len);
- parasMemory = (int[]) array;
- }
-
private void getInitialRunsMemory(int len)
{
getRunsMemory(true, len);
}
! /* perform (P2)..(P3) ------------------------------------------------------- */
private void getDirProps()
{
int i = 0, i0, i1;
flags = 0; /* collect all directionalities in the text */
int uchar;
byte dirProp;
! byte paraDirDefault = 0; /* initialize to avoid compiler warnings */
boolean isDefaultLevel = IsDefaultLevel(paraLevel);
/* for inverse Bidi, the default para level is set to RTL if there is a
strong R or AL character at either end of the text */
lastArabicPos = -1;
! controlCount = 0;
! final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */
! final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */
! final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */
!
! int state;
! int paraStart = 0; /* index of first char in paragraph */
! byte paraDir; /* == CONTEXT_RTL within paragraphs
! starting with strong R char */
! byte lastStrongDir=0; /* for default level & inverse Bidi */
! int lastStrongLTR=0; /* for STREAMING option */
if (isDefaultLevel) {
! paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0;
! paraDir = paraDirDefault;
! lastStrongDir = paraDirDefault;
! state = LOOKING_FOR_STRONG;
} else {
! state = NOT_CONTEXTUAL;
! paraDir = 0;
}
/* count paragraphs and determine the paragraph level (P2..P3) */
/*
* see comment on constant fields:
* the LEVEL_DEFAULT_XXX values are designed so that
--- 1406,1522 ----
private void getInitialLevelsMemory(int len)
{
getLevelsMemory(true, len);
}
private void getInitialRunsMemory(int len)
{
getRunsMemory(true, len);
}
! /**
! * Is this <code>Bidi</code> object set to perform the inverse Bidi
! * algorithm?
! * <p>Note: calling this method after setting the reordering mode with
! * <code>setReorderingMode</code> will return <code>true</code> if the
! * reordering mode was set to
! * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code>
! * for all other values.</p>
! *
! * @return <code>true</code> if the <code>Bidi</code> object is set to
! * perform the inverse Bidi algorithm by handling numbers as L.
! *
! * @see #setInverse
! * @see #setReorderingMode
! * @see #REORDER_INVERSE_NUMBERS_AS_L
! * @stable ICU 3.8
! */
! public boolean isInverse() {
! return isInverse;
! }
!
! /* perform (P2)..(P3) ------------------------------------------------------- */
!
! /*
! * Check that there are enough entries in the arrays paras_limit and paras_level
! */
! private void checkParaCount() {
! int[] saveLimits;
! byte[] saveLevels;
! int count = paraCount;
! if (count <= paras_level.length)
! return;
! int oldLength = paras_level.length;
! saveLimits = paras_limit;
! saveLevels = paras_level;
! try {
! paras_limit = new int[count * 2];
! paras_level = new byte[count * 2];
! } catch (Exception e) {
! throw new OutOfMemoryError("Failed to allocate memory for paras");
! }
! System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength);
! System.arraycopy(saveLevels, 0, paras_level, 0, oldLength);
! }
!
! /*
! * Get the directional properties for the text, calculate the flags bit-set, and
! * determine the paragraph level if necessary (in paras_level[i]).
! * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
! * When encountering an FSI, it is initially replaced with an LRI, which is the
! * default. Only if a strong R or AL is found within its scope will the LRI be
! * replaced by an RLI.
! */
! static final int NOT_SEEKING_STRONG = 0; /* 0: not contextual paraLevel, not after FSI */
! static final int SEEKING_STRONG_FOR_PARA = 1; /* 1: looking for first strong char in para */
! static final int SEEKING_STRONG_FOR_FSI = 2; /* 2: looking for first strong after FSI */
! static final int LOOKING_FOR_PDI = 3; /* 3: found strong after FSI, looking for PDI */
private void getDirProps()
{
int i = 0, i0, i1;
flags = 0; /* collect all directionalities in the text */
int uchar;
byte dirProp;
! byte defaultParaLevel = 0; /* initialize to avoid compiler warnings */
boolean isDefaultLevel = IsDefaultLevel(paraLevel);
/* for inverse Bidi, the default para level is set to RTL if there is a
strong R or AL character at either end of the text */
+ boolean isDefaultLevelInverse=isDefaultLevel &&
+ (reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
+ reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
lastArabicPos = -1;
! int controlCount = 0;
! boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0;
! byte state;
! byte lastStrong = ON; /* for default level & inverse Bidi */
! /* The following stacks are used to manage isolate sequences. Those
! sequences may be nested, but obviously never more deeply than the
! maximum explicit embedding level.
! lastStack is the index of the last used entry in the stack. A value of -1
! means that there is no open isolate sequence.
! lastStack is reset to -1 on paragraph boundaries. */
! /* The following stack contains the position of the initiator of
! each open isolate sequence */
! int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1];
! /* The following stack contains the last known state before
! encountering the initiator of an isolate sequence */
! byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1];
! int stackLast=-1;
!
! if ((reorderingOptions & OPTION_STREAMING) != 0)
! length = 0;
! defaultParaLevel = (byte)(paraLevel & 1);
if (isDefaultLevel) {
! paras_level[0] = defaultParaLevel;
! lastStrong = defaultParaLevel;
! state = SEEKING_STRONG_FOR_PARA;
} else {
! paras_level[0] = paraLevel;
! state = NOT_SEEKING_STRONG;
}
/* count paragraphs and determine the paragraph level (P2..P3) */
/*
* see comment on constant fields:
* the LEVEL_DEFAULT_XXX values are designed so that
*** 1234,1347 ****
*/
for (i = 0; i < originalLength; /* i is incremented in the loop */) {
i0 = i; /* index of first code unit */
uchar = UTF16.charAt(text, 0, originalLength, i);
! i += Character.charCount(uchar);
i1 = i - 1; /* index of last code unit, gets the directional property */
! dirProp = (byte)bdp.getClass(uchar);
!
flags |= DirPropFlag(dirProp);
! dirProps[i1] = (byte)(dirProp | paraDir);
if (i1 > i0) { /* set previous code units' properties to BN */
flags |= DirPropFlag(BN);
do {
! dirProps[--i1] = (byte)(BN | paraDir);
} while (i1 > i0);
}
! if (state == LOOKING_FOR_STRONG) {
if (dirProp == L) {
! state = FOUND_STRONG_CHAR;
! if (paraDir != 0) {
! paraDir = 0;
! for (i1 = paraStart; i1 < i; i1++) {
! dirProps[i1] &= ~CONTEXT_RTL;
}
}
continue;
}
if (dirProp == R || dirProp == AL) {
! state = FOUND_STRONG_CHAR;
! if (paraDir == 0) {
! paraDir = CONTEXT_RTL;
! for (i1 = paraStart; i1 < i; i1++) {
! dirProps[i1] |= CONTEXT_RTL;
}
}
continue;
}
}
- if (dirProp == L) {
- lastStrongDir = 0;
- lastStrongLTR = i; /* i is index to next character */
}
! else if (dirProp == R) {
! lastStrongDir = CONTEXT_RTL;
}
! else if (dirProp == AL) {
! lastStrongDir = CONTEXT_RTL;
! lastArabicPos = i-1;
}
- else if (dirProp == B) {
if (i < originalLength) { /* B not last char in text */
- if (!((uchar == (int)CR) && (text[i] == (int)LF))) {
paraCount++;
! }
if (isDefaultLevel) {
! state=LOOKING_FOR_STRONG;
! paraStart = i; /* i is index to next character */
! paraDir = paraDirDefault;
! lastStrongDir = paraDirDefault;
}
}
}
}
if (isDefaultLevel) {
! paraLevel = GetParaLevelAt(0);
}
!
! /* The following line does nothing new for contextual paraLevel, but is
! needed for absolute paraLevel. */
! flags |= DirPropFlagLR(paraLevel);
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
flags |= DirPropFlag(L);
}
}
/* perform (X1)..(X9) ------------------------------------------------------- */
/* determine if the text is mixed-directional or single-directional */
private byte directionFromFlags() {
/* if the text contains AN and neutrals, then some neutrals may become RTL */
if (!((flags & MASK_RTL) != 0 ||
((flags & DirPropFlag(AN)) != 0 &&
(flags & MASK_POSSIBLE_N) != 0))) {
! return Bidi.DIRECTION_LEFT_TO_RIGHT;
} else if ((flags & MASK_LTR) == 0) {
! return Bidi.DIRECTION_RIGHT_TO_LEFT;
} else {
return MIXED;
}
}
/*
* Resolve the explicit levels as specified by explicit embedding codes.
* Recalculate the flags to have them reflect the real properties
* after taking the explicit embeddings into account.
*
! * The Bidi algorithm is designed to result in the same behavior whether embedding
* levels are externally specified (from "styled text", supposedly the preferred
! * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
! * That is why (X9) instructs to remove all explicit codes (and BN).
! * However, in a real implementation, this removal of these codes and their index
* positions in the plain text is undesirable since it would result in
* reallocated, reindexed text.
* Instead, this implementation leaves the codes in there and just ignores them
* in the subsequent processing.
! * In order to get the same reordering behavior, positions with a BN or an
* explicit embedding code just get the same level assigned as the last "real"
* character.
*
* Some implementations, not this one, then overwrite some of these
* directionality properties at "real" same-level-run boundaries by
--- 1524,2056 ----
*/
for (i = 0; i < originalLength; /* i is incremented in the loop */) {
i0 = i; /* index of first code unit */
uchar = UTF16.charAt(text, 0, originalLength, i);
! i += UTF16.getCharCount(uchar);
i1 = i - 1; /* index of last code unit, gets the directional property */
! dirProp = (byte)getCustomizedClass(uchar);
flags |= DirPropFlag(dirProp);
! dirProps[i1] = dirProp;
if (i1 > i0) { /* set previous code units' properties to BN */
flags |= DirPropFlag(BN);
do {
! dirProps[--i1] = BN;
} while (i1 > i0);
}
! if (removeBidiControls && IsBidiControlChar(uchar)) {
! controlCount++;
! }
if (dirProp == L) {
! if (state == SEEKING_STRONG_FOR_PARA) {
! paras_level[paraCount - 1] = 0;
! state = NOT_SEEKING_STRONG;
! }
! else if (state == SEEKING_STRONG_FOR_FSI) {
! if (stackLast <= MAX_EXPLICIT_LEVEL) {
! /* no need for next statement, already set by default */
! /* dirProps[isolateStartStack[stackLast]] = LRI; */
! flags |= DirPropFlag(LRI);
}
+ state = LOOKING_FOR_PDI;
}
+ lastStrong = L;
continue;
}
if (dirProp == R || dirProp == AL) {
! if (state == SEEKING_STRONG_FOR_PARA) {
! paras_level[paraCount - 1] = 1;
! state = NOT_SEEKING_STRONG;
! }
! else if (state == SEEKING_STRONG_FOR_FSI) {
! if (stackLast <= MAX_EXPLICIT_LEVEL) {
! dirProps[isolateStartStack[stackLast]] = RLI;
! flags |= DirPropFlag(RLI);
! }
! state = LOOKING_FOR_PDI;
! }
! lastStrong = R;
! if (dirProp == AL)
! lastArabicPos = i - 1;
! continue;
}
+ if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */
+ stackLast++;
+ if (stackLast <= MAX_EXPLICIT_LEVEL) {
+ isolateStartStack[stackLast] = i - 1;
+ previousStateStack[stackLast] = state;
+ }
+ if (dirProp == FSI) {
+ dirProps[i-1] = LRI; /* default if no strong char */
+ state = SEEKING_STRONG_FOR_FSI;
}
+ else
+ state = LOOKING_FOR_PDI;
continue;
}
+ if (dirProp == PDI) {
+ if (state == SEEKING_STRONG_FOR_FSI) {
+ if (stackLast <= MAX_EXPLICIT_LEVEL) {
+ /* no need for next statement, already set by default */
+ /* dirProps[isolateStartStack[stackLast]] = LRI; */
+ flags |= DirPropFlag(LRI);
}
}
! if (stackLast >= 0) {
! if (stackLast <= MAX_EXPLICIT_LEVEL)
! state = previousStateStack[stackLast];
! stackLast--;
}
! continue;
! }
! if (dirProp == B) {
! if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */
! continue;
! paras_limit[paraCount - 1] = i;
! if (isDefaultLevelInverse && lastStrong == R)
! paras_level[paraCount - 1] = 1;
! if ((reorderingOptions & OPTION_STREAMING) != 0) {
! /* When streaming, we only process whole paragraphs
! thus some updates are only done on paragraph boundaries */
! length = i; /* i is index to next character */
! this.controlCount = controlCount;
}
if (i < originalLength) { /* B not last char in text */
paraCount++;
! checkParaCount(); /* check that there is enough memory for a new para entry */
if (isDefaultLevel) {
! paras_level[paraCount - 1] = defaultParaLevel;
! state = SEEKING_STRONG_FOR_PARA;
! lastStrong = defaultParaLevel;
! } else {
! paras_level[paraCount - 1] = paraLevel;
! state = NOT_SEEKING_STRONG;
! }
! stackLast = -1;
! }
! continue;
! }
! }
! /* +Ignore still open isolate sequences with overflow */
! if (stackLast > MAX_EXPLICIT_LEVEL) {
! stackLast = MAX_EXPLICIT_LEVEL;
! state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
! }
! /* Resolve direction of still unresolved open FSI sequences */
! while (stackLast >= 0) {
! if (state == SEEKING_STRONG_FOR_FSI) {
! /* no need for next statement, already set by default */
! /* dirProps[isolateStartStack[stackLast]] = LRI; */
! flags |= DirPropFlag(LRI);
! break;
}
+ state = previousStateStack[stackLast];
+ stackLast--;
}
+ /* When streaming, ignore text after the last paragraph separator */
+ if ((reorderingOptions & OPTION_STREAMING) != 0) {
+ if (length < originalLength)
+ paraCount--;
+ } else {
+ paras_limit[paraCount - 1] = originalLength;
+ this.controlCount = controlCount;
}
+ /* For inverse bidi, default para direction is RTL if there is
+ a strong R or AL at either end of the paragraph */
+ if (isDefaultLevelInverse && lastStrong == R) {
+ paras_level[paraCount - 1] = 1;
}
if (isDefaultLevel) {
! paraLevel = paras_level[0];
}
! /* The following is needed to resolve the text direction for default level
! paragraphs containing no strong character */
! for (i = 0; i < paraCount; i++)
! flags |= DirPropFlagLR(paras_level[i]);
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
flags |= DirPropFlag(L);
}
}
+ /* determine the paragraph level at position index */
+ byte GetParaLevelAt(int pindex)
+ {
+ if (defaultParaLevel == 0 || pindex < paras_limit[0])
+ return paraLevel;
+ int i;
+ for (i = 1; i < paraCount; i++)
+ if (pindex < paras_limit[i])
+ break;
+ if (i >= paraCount)
+ i = paraCount - 1;
+ return paras_level[i];
+ }
+
+ /* Functions for handling paired brackets ----------------------------------- */
+
+ /* In the isoRuns array, the first entry is used for text outside of any
+ isolate sequence. Higher entries are used for each more deeply nested
+ isolate sequence. isoRunLast is the index of the last used entry. The
+ openings array is used to note the data of opening brackets not yet
+ matched by a closing bracket, or matched but still susceptible to change
+ level.
+ Each isoRun entry contains the index of the first and
+ one-after-last openings entries for pending opening brackets it
+ contains. The next openings entry to use is the one-after-last of the
+ most deeply nested isoRun entry.
+ isoRun entries also contain their current embedding level and the last
+ encountered strong character, since these will be needed to resolve
+ the level of paired brackets. */
+
+ private void bracketInit(BracketData bd) {
+ bd.isoRunLast = 0;
+ bd.isoRuns[0] = new IsoRun();
+ bd.isoRuns[0].start = 0;
+ bd.isoRuns[0].limit = 0;
+ bd.isoRuns[0].level = GetParaLevelAt(0);
+ bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1);
+ bd.isoRuns[0].contextPos = 0;
+ bd.openings = new Opening[SIMPLE_PARAS_COUNT];
+ bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL ||
+ reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
+ }
+
+ /* paragraph boundary */
+ private void bracketProcessB(BracketData bd, byte level) {
+ bd.isoRunLast = 0;
+ bd.isoRuns[0].limit = 0;
+ bd.isoRuns[0].level = level;
+ bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1);
+ bd.isoRuns[0].contextPos = 0;
+ }
+
+ /* LRE, LRO, RLE, RLO, PDF */
+ private void bracketProcessBoundary(BracketData bd, int lastCcPos,
+ byte contextLevel, byte embeddingLevel) {
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */
+ return;
+ if (NoOverride(embeddingLevel) > NoOverride(contextLevel)) /* not a PDF */
+ contextLevel = embeddingLevel;
+ pLastIsoRun.limit = pLastIsoRun.start;
+ pLastIsoRun.level = embeddingLevel;
+ pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1);
+ pLastIsoRun.contextPos = lastCcPos;
+ }
+
+ /* LRI or RLI */
+ private void bracketProcessLRI_RLI(BracketData bd, byte level) {
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ short lastLimit;
+ pLastIsoRun.lastBase = ON;
+ lastLimit = pLastIsoRun.limit;
+ bd.isoRunLast++;
+ pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ if (pLastIsoRun == null)
+ pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun();
+ pLastIsoRun.start = pLastIsoRun.limit = lastLimit;
+ pLastIsoRun.level = level;
+ pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1);
+ pLastIsoRun.contextPos = 0;
+ }
+
+ /* PDI */
+ private void bracketProcessPDI(BracketData bd) {
+ IsoRun pLastIsoRun;
+ bd.isoRunLast--;
+ pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ pLastIsoRun.lastBase = ON;
+ }
+
+ /* newly found opening bracket: create an openings entry */
+ private void bracketAddOpening(BracketData bd, char match, int position) {
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ Opening pOpening;
+ if (pLastIsoRun.limit >= bd.openings.length) { /* no available new entry */
+ Opening[] saveOpenings = bd.openings;
+ int count;
+ try {
+ count = bd.openings.length;
+ bd.openings = new Opening[count * 2];
+ } catch (Exception e) {
+ throw new OutOfMemoryError("Failed to allocate memory for openings");
+ }
+ System.arraycopy(saveOpenings, 0, bd.openings, 0, count);
+ }
+ pOpening = bd.openings[pLastIsoRun.limit];
+ if (pOpening == null)
+ pOpening = bd.openings[pLastIsoRun.limit]= new Opening();
+ pOpening.position = position;
+ pOpening.match = match;
+ pOpening.contextDir = pLastIsoRun.contextDir;
+ pOpening.contextPos = pLastIsoRun.contextPos;
+ pOpening.flags = 0;
+ pLastIsoRun.limit++;
+ }
+
+ /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
+ private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) {
+ /* This function calls itself recursively */
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ Opening qOpening;
+ int k, openingPosition, closingPosition;
+ for (k = openingIndex+1; k < pLastIsoRun.limit; k++) {
+ qOpening = bd.openings[k];
+ if (qOpening.match >= 0) /* not an N0c match */
+ continue;
+ if (newPropPosition < qOpening.contextPos)
+ break;
+ if (newPropPosition >= qOpening.position)
+ continue;
+ if (newProp == qOpening.contextDir)
+ break;
+ openingPosition = qOpening.position;
+ dirProps[openingPosition] = newProp;
+ closingPosition = -(qOpening.match);
+ dirProps[closingPosition] = newProp;
+ qOpening.match = 0; /* prevent further changes */
+ fixN0c(bd, k, openingPosition, newProp);
+ fixN0c(bd, k, closingPosition, newProp);
+ }
+ }
+
+ /* process closing bracket; return L or R if N0b or N0c, ON if N0d */
+ private byte bracketProcessClosing(BracketData bd, int openIdx, int position) {
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ Opening pOpening, qOpening;
+ byte direction;
+ boolean stable;
+ byte newProp;
+ pOpening = bd.openings[openIdx];
+ direction = (byte)(pLastIsoRun.level & 1);
+ stable = true; /* assume stable until proved otherwise */
+
+ /* The stable flag is set when brackets are paired and their
+ level is resolved and cannot be changed by what will be
+ found later in the source string.
+ An unstable match can occur only when applying N0c, where
+ the resolved level depends on the preceding context, and
+ this context may be affected by text occurring later.
+ Example: RTL paragraph containing: abc[(latin) HEBREW]
+ When the closing parenthesis is encountered, it appears
+ that N0c1 must be applied since 'abc' sets an opposite
+ direction context and both parentheses receive level 2.
+ However, when the closing square bracket is processed,
+ N0b applies because of 'HEBREW' being included within the
+ brackets, thus the square brackets are treated like R and
+ receive level 1. However, this changes the preceding
+ context of the opening parenthesis, and it now appears
+ that N0c2 must be applied to the parentheses rather than
+ N0c1. */
+
+ if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) ||
+ (direction == 1 && (pOpening.flags & FOUND_R) > 0)) { /* N0b */
+ newProp = direction;
+ }
+ else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) { /* N0c */
+ /* it is stable if there is no preceding text or in
+ conditions too complicated and not worth checking */
+ stable = (openIdx == pLastIsoRun.start);
+ if (direction != pOpening.contextDir)
+ newProp = pOpening.contextDir; /* N0c1 */
+ else
+ newProp = direction; /* N0c2 */
+ } else {
+ /* forget this and any brackets nested within this pair */
+ pLastIsoRun.limit = (short)openIdx;
+ return ON; /* N0d */
+ }
+ dirProps[pOpening.position] = newProp;
+ dirProps[position] = newProp;
+ /* Update nested N0c pairs that may be affected */
+ fixN0c(bd, openIdx, pOpening.position, newProp);
+ if (stable) {
+ pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */
+ /* remove lower located synonyms if any */
+ while (pLastIsoRun.limit > pLastIsoRun.start &&
+ bd.openings[pLastIsoRun.limit - 1].position == pOpening.position)
+ pLastIsoRun.limit--;
+ } else {
+ int k;
+ pOpening.match = -position;
+ /* neutralize lower located synonyms if any */
+ k = openIdx - 1;
+ while (k >= pLastIsoRun.start &&
+ bd.openings[k].position == pOpening.position)
+ bd.openings[k--].match = 0;
+ /* neutralize any unmatched opening between the current pair;
+ this will also neutralize higher located synonyms if any */
+ for (k = openIdx + 1; k < pLastIsoRun.limit; k++) {
+ qOpening =bd.openings[k];
+ if (qOpening.position >= position)
+ break;
+ if (qOpening.match > 0)
+ qOpening.match = 0;
+ }
+ }
+ return newProp;
+ }
+
+ /* handle strong characters, digits and candidates for closing brackets */
+ private void bracketProcessChar(BracketData bd, int position) {
+ IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
+ byte dirProp, newProp;
+ byte level;
+ dirProp = dirProps[position];
+ if (dirProp == ON) {
+ char c, match;
+ int idx;
+ /* First see if it is a matching closing bracket. Hopefully, this is
+ more efficient than checking if it is a closing bracket at all */
+ c = text[position];
+ for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) {
+ if (bd.openings[idx].match != c)
+ continue;
+ /* We have a match */
+ newProp = bracketProcessClosing(bd, idx, position);
+ if(newProp == ON) { /* N0d */
+ c = 0; /* prevent handling as an opening */
+ break;
+ }
+ pLastIsoRun.lastBase = ON;
+ pLastIsoRun.contextDir = newProp;
+ pLastIsoRun.contextPos = position;
+ level = levels[position];
+ if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */
+ short flag;
+ int i;
+ newProp = (byte)(level & 1);
+ pLastIsoRun.lastStrong = newProp;
+ flag = (short)DirPropFlag(newProp);
+ for (i = pLastIsoRun.start; i < idx; i++)
+ bd.openings[i].flags |= flag;
+ /* matching brackets are not overridden by LRO/RLO */
+ levels[position] &= ~LEVEL_OVERRIDE;
+ }
+ /* matching brackets are not overridden by LRO/RLO */
+ levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE;
+ return;
+ }
+ /* We get here only if the ON character is not a matching closing
+ bracket or it is a case of N0d */
+ /* Now see if it is an opening bracket */
+ if (c != 0) {
+ match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */
+ } else {
+ match = 0;
+ }
+ if (match != c && /* has a matching char */
+ UCharacter.getIntPropertyValue(c, BIDI_PAIRED_BRACKET_TYPE) ==
+ /* opening bracket */ BidiPairedBracketType.OPEN) {
+ /* special case: process synonyms
+ create an opening entry for each synonym */
+ if (match == 0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
+ bracketAddOpening(bd, (char)0x3009, position);
+ }
+ else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */
+ bracketAddOpening(bd, (char)0x232A, position);
+ }
+ bracketAddOpening(bd, match, position);
+ }
+ }
+ level = levels[position];
+ if ((level & LEVEL_OVERRIDE) != 0) { /* X4, X5 */
+ newProp = (byte)(level & 1);
+ if (dirProp != S && dirProp != WS && dirProp != ON)
+ dirProps[position] = newProp;
+ pLastIsoRun.lastBase = newProp;
+ pLastIsoRun.lastStrong = newProp;
+ pLastIsoRun.contextDir = newProp;
+ pLastIsoRun.contextPos = position;
+ }
+ else if (dirProp <= R || dirProp == AL) {
+ newProp = DirFromStrong(dirProp);
+ pLastIsoRun.lastBase = dirProp;
+ pLastIsoRun.lastStrong = dirProp;
+ pLastIsoRun.contextDir = newProp;
+ pLastIsoRun.contextPos = position;
+ }
+ else if(dirProp == EN) {
+ pLastIsoRun.lastBase = EN;
+ if (pLastIsoRun.lastStrong == L) {
+ newProp = L; /* W7 */
+ if (!bd.isNumbersSpecial)
+ dirProps[position] = ENL;
+ pLastIsoRun.contextDir = L;
+ pLastIsoRun.contextPos = position;
+ }
+ else {
+ newProp = R; /* N0 */
+ if (pLastIsoRun.lastStrong == AL)
+ dirProps[position] = AN; /* W2 */
+ else
+ dirProps[position] = ENR;
+ pLastIsoRun.contextDir = R;
+ pLastIsoRun.contextPos = position;
+ }
+ }
+ else if (dirProp == AN) {
+ newProp = R; /* N0 */
+ pLastIsoRun.lastBase = AN;
+ pLastIsoRun.contextDir = R;
+ pLastIsoRun.contextPos = position;
+ }
+ else if (dirProp == NSM) {
+ /* if the last real char was ON, change NSM to ON so that it
+ will stay ON even if the last real char is a bracket which
+ may be changed to L or R */
+ newProp = pLastIsoRun.lastBase;
+ if (newProp == ON)
+ dirProps[position] = newProp;
+ }
+ else {
+ newProp = dirProp;
+ pLastIsoRun.lastBase = dirProp;
+ }
+ if (newProp <= R || newProp == AL) {
+ int i;
+ short flag = (short)DirPropFlag(DirFromStrong(newProp));
+ for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++)
+ if (position > bd.openings[i].position)
+ bd.openings[i].flags |= flag;
+ }
+ }
+
/* perform (X1)..(X9) ------------------------------------------------------- */
/* determine if the text is mixed-directional or single-directional */
private byte directionFromFlags() {
+
/* if the text contains AN and neutrals, then some neutrals may become RTL */
if (!((flags & MASK_RTL) != 0 ||
((flags & DirPropFlag(AN)) != 0 &&
(flags & MASK_POSSIBLE_N) != 0))) {
! return LTR;
} else if ((flags & MASK_LTR) == 0) {
! return RTL;
} else {
return MIXED;
}
}
/*
* Resolve the explicit levels as specified by explicit embedding codes.
* Recalculate the flags to have them reflect the real properties
* after taking the explicit embeddings into account.
*
! * The BiDi algorithm is designed to result in the same behavior whether embedding
* levels are externally specified (from "styled text", supposedly the preferred
! * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
! * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
! * However, in a real implementation, the removal of these codes and their index
* positions in the plain text is undesirable since it would result in
* reallocated, reindexed text.
* Instead, this implementation leaves the codes in there and just ignores them
* in the subsequent processing.
! * In order to get the same reordering behavior, positions with a BN or a not-isolate
* explicit embedding code just get the same level assigned as the last "real"
* character.
*
* Some implementations, not this one, then overwrite some of these
* directionality properties at "real" same-level-run boundaries by
*** 1349,1537 ****
* entire paragraph at once instead of having to parse it once more and
* perform that resolution on same-level-runs.
* This limits the scope of the implicit rules in effectively
* the same way as the run limits.
*
! * Instead, this implementation does not modify these codes.
* On one hand, the paragraph has to be scanned for same-level-runs, but
* on the other hand, this saves another loop to reset these codes,
* or saves making and modifying a copy of dirProps[].
*
*
! * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
*
*
* Handling the stack of explicit levels (Xn):
*
! * With the Bidi stack of explicit levels,
! * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
! * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61.
*
* In order to have a correct push-pop semantics even in the case of overflows,
! * there are two overflow counters:
! * - countOver60 is incremented with each LRx at level 60
! * - from level 60, one RLx increases the level to 61
! * - countOver61 is incremented with each LRx and RLx at level 61
! *
! * Popping levels with PDF must work in the opposite order so that level 61
! * is correct at the correct point. Underflows (too many PDFs) must be checked.
*
* This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
*/
private byte resolveExplicitLevels() {
int i = 0;
byte dirProp;
byte level = GetParaLevelAt(0);
-
byte dirct;
! int paraIndex = 0;
/* determine if the text is mixed-directional or single-directional */
dirct = directionFromFlags();
! /* we may not need to resolve any explicit levels, but for multiple
! paragraphs we want to loop on all chars to set the para boundaries */
! if ((dirct != MIXED) && (paraCount == 1)) {
/* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
! } else if ((paraCount == 1) &&
! ((flags & MASK_EXPLICIT) == 0)) {
! /* mixed, but all characters are at the same embedding level */
! /* or we are in "inverse Bidi" */
! /* and we don't have contextual multiple paragraphs with some B char */
/* set all levels to the paragraph level */
! for (i = 0; i < length; ++i) {
levels[i] = level;
}
- } else {
/* continue to perform (Xn) */
/* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
/* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
! byte embeddingLevel = level;
! byte newLevel;
! byte stackTop = 0;
!
! byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */
! int countOver60 = 0;
! int countOver61 = 0; /* count overflows of explicit levels */
/* recalculate the flags */
flags = 0;
! for (i = 0; i < length; ++i) {
! dirProp = NoContextRTL(dirProps[i]);
! switch(dirProp) {
case LRE:
- case LRO:
- /* (X3, X5) */
- newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */
- if (newLevel <= MAX_EXPLICIT_LEVEL) {
- stack[stackTop] = embeddingLevel;
- ++stackTop;
- embeddingLevel = newLevel;
- if (dirProp == LRO) {
- embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
- }
- /* we don't need to set LEVEL_OVERRIDE off for LRE
- since this has already been done for newLevel which is
- the source for embeddingLevel.
- */
- } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) {
- ++countOver61;
- } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ {
- ++countOver60;
- }
- flags |= DirPropFlag(BN);
- break;
case RLE:
case RLO:
! /* (X2, X4) */
! newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */
! if (newLevel<=MAX_EXPLICIT_LEVEL) {
! stack[stackTop] = embeddingLevel;
! ++stackTop;
! embeddingLevel = newLevel;
! if (dirProp == RLO) {
! embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
}
! /* we don't need to set LEVEL_OVERRIDE off for RLE
since this has already been done for newLevel which is
the source for embeddingLevel.
*/
} else {
! ++countOver61;
}
- flags |= DirPropFlag(BN);
break;
case PDF:
/* (X7) */
/* handle all the overflow cases first */
! if (countOver61 > 0) {
! --countOver61;
! } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) {
! /* handle LRx overflows from level 60 */
! --countOver60;
! } else if (stackTop > 0) {
! /* this is the pop operation; it also pops level 61 while countOver60>0 */
! --stackTop;
! embeddingLevel = stack[stackTop];
! /* } else { (underflow) */
}
! flags |= DirPropFlag(BN);
break;
- case B:
- stackTop = 0;
- countOver60 = 0;
- countOver61 = 0;
- level = GetParaLevelAt(i);
- if ((i + 1) < length) {
- embeddingLevel = GetParaLevelAt(i+1);
- if (!((text[i] == CR) && (text[i + 1] == LF))) {
- paras[paraIndex++] = i+1;
}
}
flags |= DirPropFlag(B);
break;
case BN:
/* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
/* they will get their levels set correctly in adjustWSLevels() */
flags |= DirPropFlag(BN);
break;
default:
! /* all other types get the "real" level */
! if (level != embeddingLevel) {
! level = embeddingLevel;
! if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
! flags |= DirPropFlagO(level) | DirPropFlagMultiRuns;
! } else {
! flags |= DirPropFlagE(level) | DirPropFlagMultiRuns;
! }
! }
! if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
! flags |= DirPropFlag(dirProp);
! }
break;
}
-
- /*
- * We need to set reasonable levels even on BN codes and
- * explicit codes because we will later look at same-level runs (X10).
- */
- levels[i] = level;
}
if ((flags & MASK_EMBEDDING) != 0) {
flags |= DirPropFlagLR(paraLevel);
}
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
flags |= DirPropFlag(L);
}
-
- /* subsequently, ignore the explicit codes and BN (X9) */
-
/* again, determine if the text is mixed-directional or single-directional */
dirct = directionFromFlags();
- }
return dirct;
}
/*
--- 2058,2342 ----
* entire paragraph at once instead of having to parse it once more and
* perform that resolution on same-level-runs.
* This limits the scope of the implicit rules in effectively
* the same way as the run limits.
*
! * Instead, this implementation does not modify these codes, except for
! * paired brackets whose properties (ON) may be replaced by L or R.
* On one hand, the paragraph has to be scanned for same-level-runs, but
* on the other hand, this saves another loop to reset these codes,
* or saves making and modifying a copy of dirProps[].
*
*
! * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
*
*
* Handling the stack of explicit levels (Xn):
*
! * With the BiDi stack of explicit levels, as pushed with each
! * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
! * the explicit level must never exceed MAX_EXPLICIT_LEVEL.
*
* In order to have a correct push-pop semantics even in the case of overflows,
! * overflow counters and a valid isolate counter are used as described in UAX#9
! * section 3.3.2 "Explicit Levels and Directions".
*
* This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
+ *
+ * Returns the direction
+ *
*/
private byte resolveExplicitLevels() {
int i = 0;
byte dirProp;
byte level = GetParaLevelAt(0);
byte dirct;
! isolateCount = 0;
/* determine if the text is mixed-directional or single-directional */
dirct = directionFromFlags();
! /* we may not need to resolve any explicit levels */
! if (dirct != MIXED) {
/* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
! return dirct;
! }
!
! if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) {
! /* inverse BiDi: mixed, but all characters are at the same embedding level */
/* set all levels to the paragraph level */
! int paraIndex, start, limit;
! for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
! if (paraIndex == 0)
! start = 0;
! else
! start = paras_limit[paraIndex - 1];
! limit = paras_limit[paraIndex];
! level = paras_level[paraIndex];
! for (i = start; i < limit; i++)
! levels[i] =level;
! }
! return dirct; /* no bracket matching for inverse BiDi */
! }
! if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) {
! /* no embeddings, set all levels to the paragraph level */
! /* we still have to perform bracket matching */
! int paraIndex, start, limit;
! BracketData bracketData = new BracketData();
! bracketInit(bracketData);
! for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
! if (paraIndex == 0)
! start = 0;
! else
! start = paras_limit[paraIndex-1];
! limit = paras_limit[paraIndex];
! level = paras_level[paraIndex];
! for (i = start; i < limit; i++) {
levels[i] = level;
+ dirProp = dirProps[i];
+ if (dirProp == BN)
+ continue;
+ if (dirProp == B) {
+ if ((i + 1) < length) {
+ if (text[i] == CR && text[i + 1] == LF)
+ continue; /* skip CR when followed by LF */
+ bracketProcessB(bracketData, level);
+ }
+ continue;
+ }
+ bracketProcessChar(bracketData, i);
+ }
+ }
+ return dirct;
}
/* continue to perform (Xn) */
/* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
/* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
! byte embeddingLevel = level, newLevel;
! byte previousLevel = level; /* previous level for regular (not CC) characters */
! int lastCcPos = 0; /* index of last effective LRx,RLx, PDx */
!
! /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
! stackLast points to its current entry. */
! short[] stack = new short[MAX_EXPLICIT_LEVEL + 2]; /* we never push anything >= MAX_EXPLICIT_LEVEL
! but we need one more entry as base */
! int stackLast = 0;
! int overflowIsolateCount = 0;
! int overflowEmbeddingCount = 0;
! int validIsolateCount = 0;
! BracketData bracketData = new BracketData();
! bracketInit(bracketData);
! stack[0] = level; /* initialize base entry to para level, no override, no isolate */
/* recalculate the flags */
flags = 0;
! for (i = 0; i < length; i++) {
! dirProp = dirProps[i];
! switch (dirProp) {
case LRE:
case RLE:
+ case LRO:
case RLO:
! /* (X2, X3, X4, X5) */
! flags |= DirPropFlag(BN);
! levels[i] = previousLevel;
! if (dirProp == LRE || dirProp == LRO) {
! /* least greater even level */
! newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1));
! } else {
! /* least greater odd level */
! newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1);
}
! if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 &&
! overflowEmbeddingCount == 0) {
! lastCcPos = i;
! embeddingLevel = newLevel;
! if (dirProp == LRO || dirProp == RLO)
! embeddingLevel |= LEVEL_OVERRIDE;
! stackLast++;
! stack[stackLast] = embeddingLevel;
! /* we don't need to set LEVEL_OVERRIDE off for LRE and RLE
since this has already been done for newLevel which is
the source for embeddingLevel.
*/
} else {
! if (overflowIsolateCount == 0)
! overflowEmbeddingCount++;
}
break;
case PDF:
/* (X7) */
+ flags |= DirPropFlag(BN);
+ levels[i] = previousLevel;
/* handle all the overflow cases first */
! if (overflowIsolateCount > 0) {
! break;
}
! if (overflowEmbeddingCount > 0) {
! overflowEmbeddingCount--;
break;
}
+ if (stackLast > 0 && stack[stackLast] < ISOLATE) { /* not an isolate entry */
+ lastCcPos = i;
+ stackLast--;
+ embeddingLevel = (byte)stack[stackLast];
+ }
+ break;
+ case LRI:
+ case RLI:
+ flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
+ levels[i] = NoOverride(embeddingLevel);
+ if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
+ bracketProcessBoundary(bracketData, lastCcPos,
+ previousLevel, embeddingLevel);
+ flags |= DirPropFlagMultiRuns;
+ }
+ previousLevel = embeddingLevel;
+ /* (X5a, X5b) */
+ if (dirProp == LRI)
+ /* least greater even level */
+ newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1));
+ else
+ /* least greater odd level */
+ newLevel=(byte)((NoOverride(embeddingLevel)+1)|1);
+ if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0
+ && overflowEmbeddingCount == 0) {
+ flags |= DirPropFlag(dirProp);
+ lastCcPos = i;
+ validIsolateCount++;
+ if (validIsolateCount > isolateCount)
+ isolateCount = validIsolateCount;
+ embeddingLevel = newLevel;
+ /* we can increment stackLast without checking because newLevel
+ will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
+ stackLast++;
+ stack[stackLast] = (short)(embeddingLevel + ISOLATE);
+ bracketProcessLRI_RLI(bracketData, embeddingLevel);
+ } else {
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i] = WS;
+ overflowIsolateCount++;
}
+ break;
+ case PDI:
+ if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
+ bracketProcessBoundary(bracketData, lastCcPos,
+ previousLevel, embeddingLevel);
+ flags |= DirPropFlagMultiRuns;
+ }
+ /* (X6a) */
+ if (overflowIsolateCount > 0) {
+ overflowIsolateCount--;
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i] = WS;
+ }
+ else if (validIsolateCount > 0) {
+ flags |= DirPropFlag(PDI);
+ lastCcPos = i;
+ overflowEmbeddingCount = 0;
+ while (stack[stackLast] < ISOLATE) /* pop embedding entries */
+ stackLast--; /* until the last isolate entry */
+ stackLast--; /* pop also the last isolate entry */
+ validIsolateCount--;
+ bracketProcessPDI(bracketData);
+ } else
+ /* make it WS so that it is handled by adjustWSLevels() */
+ dirProps[i] = WS;
+ embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE);
+ flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
+ previousLevel = embeddingLevel;
+ levels[i] = NoOverride(embeddingLevel);
+ break;
+ case B:
flags |= DirPropFlag(B);
+ levels[i] = GetParaLevelAt(i);
+ if ((i + 1) < length) {
+ if (text[i] == CR && text[i + 1] == LF)
+ break; /* skip CR when followed by LF */
+ overflowEmbeddingCount = overflowIsolateCount = 0;
+ validIsolateCount = 0;
+ stackLast = 0;
+ previousLevel = embeddingLevel = GetParaLevelAt(i + 1);
+ stack[0] = embeddingLevel; /* initialize base entry to para level, no override, no isolate */
+ bracketProcessB(bracketData, embeddingLevel);
+ }
break;
case BN:
/* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
/* they will get their levels set correctly in adjustWSLevels() */
+ levels[i] = previousLevel;
flags |= DirPropFlag(BN);
break;
default:
! /* all other types are normal characters and get the "real" level */
! if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
! bracketProcessBoundary(bracketData, lastCcPos,
! previousLevel, embeddingLevel);
! flags |= DirPropFlagMultiRuns;
! if ((embeddingLevel & LEVEL_OVERRIDE) != 0)
! flags |= DirPropFlagO(embeddingLevel);
! else
! flags |= DirPropFlagE(embeddingLevel);
! }
! previousLevel = embeddingLevel;
! levels[i] = embeddingLevel;
! bracketProcessChar(bracketData, i);
! /* the dirProp may have been changed in bracketProcessChar() */
! flags |= DirPropFlag(dirProps[i]);
break;
}
}
if ((flags & MASK_EMBEDDING) != 0) {
flags |= DirPropFlagLR(paraLevel);
}
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
flags |= DirPropFlag(L);
}
/* again, determine if the text is mixed-directional or single-directional */
dirct = directionFromFlags();
return dirct;
}
/*
*** 1545,1597 ****
* after taking the explicit embeddings into account.
*/
private byte checkExplicitLevels() {
byte dirProp;
int i;
this.flags = 0; /* collect all directionalities in the text */
byte level;
! int paraIndex = 0;
for (i = 0; i < length; ++i) {
if (levels[i] == 0) {
levels[i] = paraLevel;
}
if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
! if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) {
! levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE);
} else {
levels[i] = paraLevel;
}
}
level = levels[i];
! dirProp = NoContextRTL(dirProps[i]);
! if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
/* keep the override flag in levels[i] but adjust the flags */
! level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */
flags |= DirPropFlagO(level);
} else {
/* set the flags */
flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
}
-
if ((level < GetParaLevelAt(i) &&
!((0 == level) && (dirProp == B))) ||
! (MAX_EXPLICIT_LEVEL <level)) {
/* level out of bounds */
throw new IllegalArgumentException("level " + level +
! " out of bounds at index " + i);
}
- if ((dirProp == B) && ((i + 1) < length)) {
- if (!((text[i] == CR) && (text[i + 1] == LF))) {
- paras[paraIndex++] = i + 1;
}
! }
! }
! if ((flags&MASK_EMBEDDING) != 0) {
flags |= DirPropFlagLR(paraLevel);
}
-
/* determine if the text is mixed-directional or single-directional */
return directionFromFlags();
}
/*********************************************************************/
--- 2350,2410 ----
* after taking the explicit embeddings into account.
*/
private byte checkExplicitLevels() {
byte dirProp;
int i;
+ int isolateCount = 0;
+
this.flags = 0; /* collect all directionalities in the text */
byte level;
! this.isolateCount = 0;
for (i = 0; i < length; ++i) {
if (levels[i] == 0) {
levels[i] = paraLevel;
}
+
+ // for backward compatibility
if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
! if ((levels[i] & LEVEL_OVERRIDE) != 0) {
! levels[i] = (byte)(paraLevel|LEVEL_OVERRIDE);
} else {
levels[i] = paraLevel;
}
}
+
level = levels[i];
! dirProp = dirProps[i];
! if (dirProp == LRI || dirProp == RLI) {
! isolateCount++;
! if (isolateCount > this.isolateCount)
! this.isolateCount = isolateCount;
! }
! else if (dirProp == PDI) {
! isolateCount--;
! } else if (dirProp == B) {
! isolateCount = 0;
! }
! if ((level & LEVEL_OVERRIDE) != 0) {
/* keep the override flag in levels[i] but adjust the flags */
! level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */
flags |= DirPropFlagO(level);
} else {
/* set the flags */
flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
}
if ((level < GetParaLevelAt(i) &&
!((0 == level) && (dirProp == B))) ||
! (MAX_EXPLICIT_LEVEL < level)) {
/* level out of bounds */
throw new IllegalArgumentException("level " + level +
! " out of bounds at " + i);
}
}
! if ((flags & MASK_EMBEDDING) != 0) {
flags |= DirPropFlagLR(paraLevel);
}
/* determine if the text is mixed-directional or single-directional */
return directionFromFlags();
}
/*********************************************************************/
*** 1608,1630 ****
/* to perform and y represents the next state. */
/* */
/*********************************************************************/
/* Definitions and type for properties state tables */
/*********************************************************************/
! private static final int IMPTABPROPS_COLUMNS = 14;
private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
private static short GetStateProps(short cell) {
return (short)(cell & 0x1f);
}
private static short GetActionProps(short cell) {
return (short)(cell >> 5);
}
private static final short groupProp[] = /* dirProp regrouped */
{
! /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
! 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
};
private static final short _L = 0;
private static final short _R = 1;
private static final short _EN = 2;
private static final short _AN = 3;
--- 2421,2443 ----
/* to perform and y represents the next state. */
/* */
/*********************************************************************/
/* Definitions and type for properties state tables */
/*********************************************************************/
! private static final int IMPTABPROPS_COLUMNS = 16;
private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
private static short GetStateProps(short cell) {
return (short)(cell & 0x1f);
}
private static short GetActionProps(short cell) {
return (short)(cell >> 5);
}
private static final short groupProp[] = /* dirProp regrouped */
{
! /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
! 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
};
private static final short _L = 0;
private static final short _R = 1;
private static final short _EN = 2;
private static final short _AN = 3;
*** 1635,1645 ****
/*********************************************************************/
/* */
/* PROPERTIES STATE TABLE */
/* */
/* In table impTabProps, */
! /* - the ON column regroups ON and WS */
/* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
/* - the Res column is the reduced property assigned to a run */
/* */
/* Action 1: process current run1, init new run1 */
/* 2: init new run2 */
--- 2448,2458 ----
/*********************************************************************/
/* */
/* PROPERTIES STATE TABLE */
/* */
/* In table impTabProps, */
! /* - the ON column regroups ON and WS, FSI, RLI, LRI and PDI */
/* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
/* - the Res column is the reduced property assigned to a run */
/* */
/* Action 1: process current run1, init new run1 */
/* 2: init new run2 */
*** 1666,1694 ****
/* assembling chars for the opening L sequence. */
/* */
/* */
private static final short impTabProps[][] =
{
! /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */
! /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON },
! /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L },
! /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R },
! /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R },
! /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN },
! /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN },
! /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN },
! /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON },
! /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON },
! /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON },
! /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN },
! /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN },
! /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN },
! /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN },
! /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON },
! /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S },
! /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S },
! /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B }
};
/*********************************************************************/
/* The levels state machine tables */
/*********************************************************************/
--- 2479,2513 ----
/* assembling chars for the opening L sequence. */
/* */
/* */
private static final short impTabProps[][] =
{
! /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, ENL, ENR, Res */
! /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, 18, 21, _ON },
! /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, 32+18, 32+21, _L },
! /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, 32+18, 32+21, _R },
! /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, 32+18, 32+21, _R },
! /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, 18, 21, _EN },
! /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, 32+18, 32+21, _AN },
! /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, 18, 21, _AN },
! /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, 32+18, 32+21, _ON },
! /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, 32+18, 32+21, _ON },
! /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, 18, 21, _ON },
! /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, 18, 21, _EN },
! /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, 18, 21, _EN },
! /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, 96+18, 96+21, _AN },
! /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, 18, 21, _AN },
! /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3,128+18,128+21, _ON },
! /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, 32+18, 32+21, _S },
! /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, 32+18, 32+21, _S },
! /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, 32+18, 32+21, _B },
! /*18 ENL */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 64+19, 20, 64+19, 18, 18, 32+3, 18, 21, _L },
! /*19 ENL+ES/CS */ { 96+1, 96+2, 18, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 19, 128+7, 96+3, 18, 21, _L },
! /*20 ENL+ET */ { 32+1, 32+2, 18, 32+5, 32+7, 32+15, 32+17, 32+7, 20, 32+7, 20, 20, 32+3, 18, 21, _L },
! /*21 ENR */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 64+22, 23, 64+22, 21, 21, 32+3, 18, 21, _AN },
! /*22 ENR+ES/CS */ { 96+1, 96+2, 21, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 22, 128+7, 96+3, 18, 21, _AN },
! /*23 ENR+ET */ { 32+1, 32+2, 21, 32+5, 32+7, 32+15, 32+17, 32+7, 23, 32+7, 23, 23, 32+3, 18, 21, _AN }
};
/*********************************************************************/
/* The levels state machine tables */
/*********************************************************************/
*** 1758,1778 ****
/* 4) S is generally handled like ON, since its level will be fixed */
/* to paragraph level in adjustWSLevels(). */
/* */
private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
! /* In this table, conditional sequences receive the higher possible level
until proven otherwise.
*/
{
/* L, R, EN, AN, ON, S, B, Res */
/* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
/* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
/* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
/* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
! /* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 },
! /* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 }
};
private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */
/* In this table, conditional sequences receive the lower possible level
until proven otherwise.
--- 2577,2597 ----
/* 4) S is generally handled like ON, since its level will be fixed */
/* to paragraph level in adjustWSLevels(). */
/* */
private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
! /* In this table, conditional sequences receive the lower possible level
until proven otherwise.
*/
{
/* L, R, EN, AN, ON, S, B, Res */
/* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
/* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
/* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
/* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
! /* 4 : R+ON */ { 0, 0x21, 0x33, 0x33, 4, 4, 0, 0 },
! /* 5 : AN+ON */ { 0, 0x21, 0, 0x32, 5, 5, 0, 0 }
};
private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */
/* In this table, conditional sequences receive the lower possible level
until proven otherwise.
*** 1785,1808 ****
/* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
/* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
/* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
};
! private static final short[] impAct0 = {0,1,2,3,4,5,6};
private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
! /* In this table, conditional sequences receive the higher possible
level until proven otherwise.
*/
/* L, R, EN, AN, ON, S, B, Res */
! /* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 },
! /* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 },
! /* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 },
! /* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 },
/* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
};
private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
--- 2604,2627 ----
/* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
/* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
/* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
};
! private static final short[] impAct0 = {0,1,2,3,4};
private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
! /* In this table, conditional sequences receive the lower possible
level until proven otherwise.
*/
/* L, R, EN, AN, ON, S, B, Res */
! /* 0 : init */ { 0, 2, 0x11, 0x11, 0, 0, 0, 0 },
! /* 1 : L+EN/AN */ { 0, 0x42, 1, 1, 0, 0, 0, 0 },
! /* 2 : R */ { 0, 2, 4, 4, 0x13, 0x13, 0, 1 },
! /* 3 : R+ON */ { 0, 0x22, 0x34, 0x34, 3, 3, 0, 0 },
/* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
};
private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
*** 1872,1882 ****
/* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
/* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
/* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
/* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
};
! private static final short[] impAct1 = {0,1,11,12};
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
/* The case handled in this table is (visually): R EN L
--- 2691,2701 ----
/* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
/* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
/* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
/* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
};
! private static final short[] impAct1 = {0,1,13,14};
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
/* The case handled in this table is (visually): R EN L
*** 1896,1914 ****
*/
/* L, R, EN, AN, ON, S, B, Res */
/* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
/* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
/* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
! /* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 },
/* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
/* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
/* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
};
! private static final short impAct2[] = {0,1,7,8,9,10};
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
! impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
--- 2715,2734 ----
*/
/* L, R, EN, AN, ON, S, B, Res */
/* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
/* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
/* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
! /* 3 : L */ { 3, 0, 3, 0x36, 0x14, 0x40, 0, 1 },
/* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
/* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
/* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
};
! private static final short[] impAct2 = {0,1,2,5,6,7,8};
! private static final short[] impAct3 = {0,1,9,10,11,12};
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
! impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
*** 1921,1938 ****
/* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
/* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
};
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
! impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
! private class LevState {
byte[][] impTab; /* level table pointer */
short[] impAct; /* action map array */
int startON; /* start of ON sequence */
int startL2EN; /* start of level 2 sequence */
int lastStrongRTL; /* index of last found R or AL */
short state; /* current state */
byte runLevel; /* run level before implicit solving */
}
/*------------------------------------------------------------------------*/
--- 2741,2759 ----
/* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
/* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
};
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
! impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
! private static class LevState {
byte[][] impTab; /* level table pointer */
short[] impAct; /* action map array */
int startON; /* start of ON sequence */
int startL2EN; /* start of level 2 sequence */
int lastStrongRTL; /* index of last found R or AL */
+ int runStart; /* start position of the run */
short state; /* current state */
byte runLevel; /* run level before implicit solving */
}
/*------------------------------------------------------------------------*/
*** 1960,1969 ****
--- 2781,2806 ----
point.flag = flag;
insertPoints.points[insertPoints.size] = point;
insertPoints.size++;
}
+ private void setLevelsOutsideIsolates(int start, int limit, byte level)
+ {
+ byte dirProp;
+ int isolateCount = 0, k;
+ for (k = start; k < limit; k++) {
+ dirProp = dirProps[k];
+ if (dirProp == PDI)
+ isolateCount--;
+ if (isolateCount == 0) {
+ levels[k] = level;
+ }
+ if (dirProp == LRI || dirProp == RLI)
+ isolateCount++;
+ }
+ }
+
/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
/*
* This implementation of the (Wn) rules applies all rules in one pass.
* In order to do so, it needs a look-ahead of typically 1 character
*** 2001,2011 ****
case 2: /* prepend ON seq to current seq */
start = levState.startON;
break;
! case 3: /* L or S after possible relevant EN/AN */
/* check if we had EN after R/AL */
if (levState.startL2EN >= 0) {
addPoint(levState.startL2EN, LRM_BEFORE);
}
levState.startL2EN = -1; /* not within previous if since could also be -2 */
--- 2838,2858 ----
case 2: /* prepend ON seq to current seq */
start = levState.startON;
break;
! case 3: /* EN/AN after R+ON */
! level = (byte)(levState.runLevel + 1);
! setLevelsOutsideIsolates(levState.startON, start0, level);
! break;
!
! case 4: /* EN/AN before R for NUMBERS_SPECIAL */
! level = (byte)(levState.runLevel + 2);
! setLevelsOutsideIsolates(levState.startON, start0, level);
! break;
!
! case 5: /* L or S after possible relevant EN/AN */
/* check if we had EN after R/AL */
if (levState.startL2EN >= 0) {
addPoint(levState.startL2EN, LRM_BEFORE);
}
levState.startL2EN = -1; /* not within previous if since could also be -2 */
*** 2037,2062 ****
addPoint(start0, LRM_BEFORE);
insertPoints.confirmed = insertPoints.size;
}
break;
! case 4: /* R/AL after possible relevant EN/AN */
/* just clean up */
if (insertPoints.points.length > 0)
/* remove all non confirmed insert points */
insertPoints.size = insertPoints.confirmed;
levState.startON = -1;
levState.startL2EN = -1;
levState.lastStrongRTL = limit - 1;
break;
! case 5: /* EN/AN after R/AL + possible cont */
/* check for real AN */
! if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) {
/* real AN */
if (levState.startL2EN == -1) { /* if no relevant EN already found */
! /* just note the righmost digit as a strong RTL */
levState.lastStrongRTL = limit - 1;
break;
}
if (levState.startL2EN >= 0) { /* after EN, no AN */
addPoint(levState.startL2EN, LRM_BEFORE);
--- 2884,2912 ----
addPoint(start0, LRM_BEFORE);
insertPoints.confirmed = insertPoints.size;
}
break;
! case 6: /* R/AL after possible relevant EN/AN */
/* just clean up */
if (insertPoints.points.length > 0)
/* remove all non confirmed insert points */
insertPoints.size = insertPoints.confirmed;
levState.startON = -1;
levState.startL2EN = -1;
levState.lastStrongRTL = limit - 1;
break;
! case 7: /* EN/AN after R/AL + possible cont */
/* check for real AN */
!
! if ((_prop == _AN) && (dirProps[start0] == AN) &&
! (reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
! {
/* real AN */
if (levState.startL2EN == -1) { /* if no relevant EN already found */
! /* just note the rightmost digit as a strong RTL */
levState.lastStrongRTL = limit - 1;
break;
}
if (levState.startL2EN >= 0) { /* after EN, no AN */
addPoint(levState.startL2EN, LRM_BEFORE);
*** 2070,2123 ****
if (levState.startL2EN == -1) {
levState.startL2EN = start0;
}
break;
! case 6: /* note location of latest R/AL */
levState.lastStrongRTL = limit - 1;
levState.startON = -1;
break;
! case 7: /* L after R+ON/EN/AN */
/* include possible adjacent number on the left */
for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
}
if (k >= 0) {
addPoint(k, RLM_BEFORE); /* add RLM before */
insertPoints.confirmed = insertPoints.size; /* confirm it */
}
levState.startON = start0;
break;
! case 8: /* AN after L */
/* AN numbers between L text on both sides may be trouble. */
/* tentatively bracket with LRMs; will be confirmed if followed by L */
addPoint(start0, LRM_BEFORE); /* add LRM before */
addPoint(start0, LRM_AFTER); /* add LRM after */
break;
! case 9: /* R after L+ON/EN/AN */
/* false alert, infirm LRMs around previous AN */
insertPoints.size=insertPoints.confirmed;
if (_prop == _S) { /* add RLM before S */
addPoint(start0, RLM_BEFORE);
insertPoints.confirmed = insertPoints.size;
}
break;
! case 10: /* L after L+ON/AN */
level = (byte)(levState.runLevel + addLevel);
for (k=levState.startON; k < start0; k++) {
if (levels[k] < level) {
levels[k] = level;
}
}
insertPoints.confirmed = insertPoints.size; /* confirm inserts */
levState.startON = start0;
break;
! case 11: /* L after L+ON+EN/AN/ON */
level = levState.runLevel;
for (k = start0-1; k >= levState.startON; k--) {
if (levels[k] == level+3) {
while (levels[k] == level+3) {
levels[k--] -= 2;
--- 2920,2973 ----
if (levState.startL2EN == -1) {
levState.startL2EN = start0;
}
break;
! case 8: /* note location of latest R/AL */
levState.lastStrongRTL = limit - 1;
levState.startON = -1;
break;
! case 9: /* L after R+ON/EN/AN */
/* include possible adjacent number on the left */
for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
}
if (k >= 0) {
addPoint(k, RLM_BEFORE); /* add RLM before */
insertPoints.confirmed = insertPoints.size; /* confirm it */
}
levState.startON = start0;
break;
! case 10: /* AN after L */
/* AN numbers between L text on both sides may be trouble. */
/* tentatively bracket with LRMs; will be confirmed if followed by L */
addPoint(start0, LRM_BEFORE); /* add LRM before */
addPoint(start0, LRM_AFTER); /* add LRM after */
break;
! case 11: /* R after L+ON/EN/AN */
/* false alert, infirm LRMs around previous AN */
insertPoints.size=insertPoints.confirmed;
if (_prop == _S) { /* add RLM before S */
addPoint(start0, RLM_BEFORE);
insertPoints.confirmed = insertPoints.size;
}
break;
! case 12: /* L after L+ON/AN */
level = (byte)(levState.runLevel + addLevel);
for (k=levState.startON; k < start0; k++) {
if (levels[k] < level) {
levels[k] = level;
}
}
insertPoints.confirmed = insertPoints.size; /* confirm inserts */
levState.startON = start0;
break;
! case 13: /* L after L+ON+EN/AN/ON */
level = levState.runLevel;
for (k = start0-1; k >= levState.startON; k--) {
if (levels[k] == level+3) {
while (levels[k] == level+3) {
levels[k--] -= 2;
*** 2132,2142 ****
}
levels[k] = (byte)(level+1);
}
break;
! case 12: /* R after L+ON+EN/AN/ON */
level = (byte)(levState.runLevel+1);
for (k = start0-1; k >= levState.startON; k--) {
if (levels[k] > level) {
levels[k] -= 2;
}
--- 2982,2992 ----
}
levels[k] = (byte)(level+1);
}
break;
! case 14: /* R after L+ON+EN/AN/ON */
level = (byte)(levState.runLevel+1);
for (k = start0-1; k >= levState.startON; k--) {
if (levels[k] > level) {
levels[k] -= 2;
}
*** 2147,2202 ****
throw new IllegalStateException("Internal ICU error in processPropertySeq");
}
}
if ((addLevel) != 0 || (start < start0)) {
level = (byte)(levState.runLevel + addLevel);
for (k = start; k < limit; k++) {
levels[k] = level;
}
}
}
private void resolveImplicitLevels(int start, int limit, short sor, short eor)
{
LevState levState = new LevState();
int i, start1, start2;
short oldStateImp, stateImp, actionImp;
short gprop, resProp, cell;
short nextStrongProp = R;
int nextStrongPos = -1;
-
/* check for RTL inverse Bidi mode */
/* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
* loop on the text characters from end to start.
* This would need a different properties state table (at least different
* actions) and different levels state tables (maybe very similar to the
* LTR corresponding ones.
*/
! /* initialize for levels state table */
levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
! levState.state = 0;
levState.runLevel = levels[start];
levState.impTab = impTabPair.imptab[levState.runLevel & 1];
levState.impAct = impTabPair.impact[levState.runLevel & 1];
! processPropertySeq(levState, sor, start, start);
! /* initialize for property state table */
! if (dirProps[start] == NSM) {
! stateImp = (short)(1 + sor);
} else {
stateImp = 0;
}
! start1 = start;
! start2 = 0;
for (i = start; i <= limit; i++) {
if (i >= limit) {
gprop = eor;
} else {
! short prop, prop1;
! prop = NoContextRTL(dirProps[i]);
gprop = groupProp[prop];
}
oldStateImp = stateImp;
cell = impTabProps[oldStateImp][gprop];
stateImp = GetStateProps(cell); /* isolate the new state */
--- 2997,3106 ----
throw new IllegalStateException("Internal ICU error in processPropertySeq");
}
}
if ((addLevel) != 0 || (start < start0)) {
level = (byte)(levState.runLevel + addLevel);
+ if (start >= levState.runStart) {
for (k = start; k < limit; k++) {
levels[k] = level;
}
+ } else {
+ setLevelsOutsideIsolates(start, limit, level);
+ }
}
}
private void resolveImplicitLevels(int start, int limit, short sor, short eor)
{
+ byte dirProp;
LevState levState = new LevState();
int i, start1, start2;
short oldStateImp, stateImp, actionImp;
short gprop, resProp, cell;
+ boolean inverseRTL;
short nextStrongProp = R;
int nextStrongPos = -1;
/* check for RTL inverse Bidi mode */
/* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
* loop on the text characters from end to start.
* This would need a different properties state table (at least different
* actions) and different levels state tables (maybe very similar to the
* LTR corresponding ones.
*/
! inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) &&
! (reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
! reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
! /* initialize for property and levels state table */
levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
! levState.runStart = start;
levState.runLevel = levels[start];
levState.impTab = impTabPair.imptab[levState.runLevel & 1];
levState.impAct = impTabPair.impact[levState.runLevel & 1];
!
! /* The isolates[] entries contain enough information to
! resume the bidi algorithm in the same state as it was
! when it was interrupted by an isolate sequence. */
! if (dirProps[start] == PDI) {
! levState.startON = isolates[isolateCount].startON;
! start1 = isolates[isolateCount].start1;
! stateImp = isolates[isolateCount].stateImp;
! levState.state = isolates[isolateCount].state;
! isolateCount--;
} else {
+ levState.startON = -1;
+ start1 = start;
+ if (dirProps[start] == NSM)
+ stateImp = (short)(1 + sor);
+ else
stateImp = 0;
+ levState.state = 0;
+ processPropertySeq(levState, sor, start, start);
}
! start2 = start; /* to make the Java compiler happy */
for (i = start; i <= limit; i++) {
if (i >= limit) {
+ int k;
+ for (k = limit - 1;
+ k > start &&
+ (DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0;
+ k--);
+ dirProp = dirProps[k];
+ if (dirProp == LRI || dirProp == RLI)
+ break; /* no forced closing for sequence ending with LRI/RLI */
gprop = eor;
} else {
! byte prop, prop1;
! prop = dirProps[i];
! if (prop == B)
! isolateCount = -1; /* current isolates stack entry == none */
! if (inverseRTL) {
! if (prop == AL) {
! /* AL before EN does not make it AN */
! prop = R;
! } else if (prop == EN) {
! if (nextStrongPos <= i) {
! /* look for next strong char (L/R/AL) */
! int j;
! nextStrongProp = R; /* set default */
! nextStrongPos = limit;
! for (j = i+1; j < limit; j++) {
! prop1 = dirProps[j];
! if (prop1 == L || prop1 == R || prop1 == AL) {
! nextStrongProp = prop1;
! nextStrongPos = j;
! break;
! }
! }
! }
! if (nextStrongProp == AL) {
! prop = AN;
! }
! }
! }
gprop = groupProp[prop];
}
oldStateImp = stateImp;
cell = impTabProps[oldStateImp][gprop];
stateImp = GetStateProps(cell); /* isolate the new state */
*** 2228,2238 ****
default: /* we should never get here */
throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
}
}
}
! /* flush possible pending sequence, e.g. ON */
processPropertySeq(levState, eor, limit, limit);
}
/* perform (L1) and (X9) ---------------------------------------------------- */
--- 3132,3158 ----
default: /* we should never get here */
throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
}
}
}
!
! /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
! for (i = limit - 1;
! i > start &&
! (DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0;
! i--);
! dirProp = dirProps[i];
! if ((dirProp == LRI || dirProp == RLI) && limit < length) {
! isolateCount++;
! if (isolates[isolateCount] == null)
! isolates[isolateCount] = new Isolate();
! isolates[isolateCount].stateImp = stateImp;
! isolates[isolateCount].state = levState.state;
! isolates[isolateCount].start1 = start1;
! isolates[isolateCount].startON = levState.startON;
! }
! else
processPropertySeq(levState, eor, limit, limit);
}
/* perform (L1) and (X9) ---------------------------------------------------- */
*** 2248,2269 ****
if ((flags & MASK_WS) != 0) {
int flag;
i = trailingWSStart;
while (i > 0) {
/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
! while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) {
if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
levels[i] = 0;
} else {
levels[i] = GetParaLevelAt(i);
}
}
/* reset BN to the next character's paraLevel until B/S, which restarts above loop */
/* here, i+1 is guaranteed to be <length */
while (i > 0) {
! flag = DirPropFlagNC(dirProps[--i]);
if ((flag & MASK_BN_EXPLICIT) != 0) {
levels[i] = levels[i + 1];
} else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
levels[i] = 0;
break;
--- 3168,3189 ----
if ((flags & MASK_WS) != 0) {
int flag;
i = trailingWSStart;
while (i > 0) {
/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
! while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) {
if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
levels[i] = 0;
} else {
levels[i] = GetParaLevelAt(i);
}
}
/* reset BN to the next character's paraLevel until B/S, which restarts above loop */
/* here, i+1 is guaranteed to be <length */
while (i > 0) {
! flag = DirPropFlag(dirProps[--i]);
if ((flag & MASK_BN_EXPLICIT) != 0) {
levels[i] = levels[i + 1];
} else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
levels[i] = 0;
break;
*** 2274,2291 ****
--- 3194,3368 ----
}
}
}
}
+ private void setParaSuccess() {
+ paraBidi = this; /* mark successful setPara */
+ }
+
private int Bidi_Min(int x, int y) {
return x < y ? x : y;
}
private int Bidi_Abs(int x) {
return x >= 0 ? x : -x;
}
+ void setParaRunsOnly(char[] parmText, byte parmParaLevel) {
+ int[] visualMap;
+ String visualText;
+ int saveLength, saveTrailingWSStart;
+ byte[] saveLevels;
+ byte saveDirection;
+ int i, j, visualStart, logicalStart,
+ oldRunCount, runLength, addedRuns, insertRemove,
+ start, limit, step, indexOddBit, logicalPos,
+ index, index1;
+ int saveOptions;
+
+ reorderingMode = REORDER_DEFAULT;
+ int parmLength = parmText.length;
+ if (parmLength == 0) {
+ setPara(parmText, parmParaLevel, null);
+ reorderingMode = REORDER_RUNS_ONLY;
+ return;
+ }
+ /* obtain memory for mapping table and visual text */
+ saveOptions = reorderingOptions;
+ if ((saveOptions & OPTION_INSERT_MARKS) > 0) {
+ reorderingOptions &= ~OPTION_INSERT_MARKS;
+ reorderingOptions |= OPTION_REMOVE_CONTROLS;
+ }
+ parmParaLevel &= 1; /* accept only 0 or 1 */
+ setPara(parmText, parmParaLevel, null);
+ /* we cannot access directly levels since it is not yet set if
+ * direction is not MIXED
+ */
+ saveLevels = new byte[this.length];
+ System.arraycopy(getLevels(), 0, saveLevels, 0, this.length);
+ saveTrailingWSStart = trailingWSStart;
+
+ /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
+ * the visual map and the dirProps array to drive the second call
+ * to setPara (but must make provision for possible removal of
+ * Bidi controls. Alternatively, only use the dirProps array via
+ * customized classifier callback.
+ */
+ visualText = writeReordered(DO_MIRRORING);
+ visualMap = getVisualMap();
+ this.reorderingOptions = saveOptions;
+ saveLength = this.length;
+ saveDirection=this.direction;
+
+ this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT;
+ parmParaLevel ^= 1;
+ setPara(visualText, parmParaLevel, null);
+ BidiLine.getRuns(this);
+ /* check if some runs must be split, count how many splits */
+ addedRuns = 0;
+ oldRunCount = this.runCount;
+ visualStart = 0;
+ for (i = 0; i < oldRunCount; i++, visualStart += runLength) {
+ runLength = runs[i].limit - visualStart;
+ if (runLength < 2) {
+ continue;
+ }
+ logicalStart = runs[i].start;
+ for (j = logicalStart+1; j < logicalStart+runLength; j++) {
+ index = visualMap[j];
+ index1 = visualMap[j-1];
+ if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
+ addedRuns++;
+ }
+ }
+ }
+ if (addedRuns > 0) {
+ getRunsMemory(oldRunCount + addedRuns);
+ if (runCount == 1) {
+ /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
+ runsMemory[0] = runs[0];
+ } else {
+ System.arraycopy(runs, 0, runsMemory, 0, runCount);
+ }
+ runs = runsMemory;
+ runCount += addedRuns;
+ for (i = oldRunCount; i < runCount; i++) {
+ if (runs[i] == null) {
+ runs[i] = new BidiRun(0, 0, (byte)0);
+ }
+ }
+ }
+ /* split runs which are not consecutive in source text */
+ int newI;
+ for (i = oldRunCount-1; i >= 0; i--) {
+ newI = i + addedRuns;
+ runLength = i==0 ? runs[0].limit :
+ runs[i].limit - runs[i-1].limit;
+ logicalStart = runs[i].start;
+ indexOddBit = runs[i].level & 1;
+ if (runLength < 2) {
+ if (addedRuns > 0) {
+ runs[newI].copyFrom(runs[i]);
+ }
+ logicalPos = visualMap[logicalStart];
+ runs[newI].start = logicalPos;
+ runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
+ continue;
+ }
+ if (indexOddBit > 0) {
+ start = logicalStart;
+ limit = logicalStart + runLength - 1;
+ step = 1;
+ } else {
+ start = logicalStart + runLength - 1;
+ limit = logicalStart;
+ step = -1;
+ }
+ for (j = start; j != limit; j += step) {
+ index = visualMap[j];
+ index1 = visualMap[j+step];
+ if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
+ logicalPos = Bidi_Min(visualMap[start], index);
+ runs[newI].start = logicalPos;
+ runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
+ runs[newI].limit = runs[i].limit;
+ runs[i].limit -= Bidi_Abs(j - start) + 1;
+ insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER);
+ runs[newI].insertRemove = insertRemove;
+ runs[i].insertRemove &= ~insertRemove;
+ start = j + step;
+ addedRuns--;
+ newI--;
+ }
+ }
+ if (addedRuns > 0) {
+ runs[newI].copyFrom(runs[i]);
+ }
+ logicalPos = Bidi_Min(visualMap[start], visualMap[limit]);
+ runs[newI].start = logicalPos;
+ runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
+ }
+
+ cleanup1:
+ /* restore initial paraLevel */
+ this.paraLevel ^= 1;
+ cleanup2:
+ /* restore real text */
+ this.text = parmText;
+ this.length = saveLength;
+ this.originalLength = parmLength;
+ this.direction=saveDirection;
+ this.levels = saveLevels;
+ this.trailingWSStart = saveTrailingWSStart;
+ if (runCount > 1) {
+ this.direction = MIXED;
+ }
+ cleanup3:
+ this.reorderingMode = REORDER_RUNS_ONLY;
+ }
+
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
* version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*** 2384,2394 ****
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
! * the same value in such a case.<p>
*
* The text can be composed of multiple paragraphs. Occurrence of a block
* separator in the text terminates a paragraph, and whatever comes next starts
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
--- 3461,3471 ----
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
! * the same value in such a case.
*
* The text can be composed of multiple paragraphs. Occurrence of a block
* separator in the text terminates a paragraph, and whatever comes next starts
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
*** 2419,2431 ****
* override levels, ignoring characters like LRE and PDF in the text.
* A level overrides the directional property of its corresponding
* (same index) character if the level has the
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
* Except for that bit, it must be
! * {@code paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL},
* with one exception: a level of zero may be specified for a
! * paragraph separator even if {@code paraLevel > 0} when multiple
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
* <strong>Caution: </strong>A reference to this array, not a copy
* of the levels, will be stored in the <code>Bidi</code> object;
* the <code>embeddingLevels</code>
* should not be modified to avoid unexpected results on subsequent
--- 3496,3508 ----
* override levels, ignoring characters like LRE and PDF in the text.
* A level overrides the directional property of its corresponding
* (same index) character if the level has the
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
* Except for that bit, it must be
! * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
* with one exception: a level of zero may be specified for a
! * paragraph separator even if <code>paraLevel>0</code> when multiple
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
* <strong>Caution: </strong>A reference to this array, not a copy
* of the levels, will be stored in the <code>Bidi</code> object;
* the <code>embeddingLevels</code>
* should not be modified to avoid unexpected results on subsequent
*** 2442,2467 ****
* @see #LEVEL_DEFAULT_RTL
* @see #LEVEL_OVERRIDE
* @see #MAX_EXPLICIT_LEVEL
* @stable ICU 3.8
*/
! public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
{
/* check the argument values */
! if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) {
verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
}
if (chars == null) {
chars = new char[0];
}
/* initialize the Bidi object */
this.paraBidi = null; /* mark unfinished setPara */
this.text = chars;
this.length = this.originalLength = this.resultLength = text.length;
this.paraLevel = paraLevel;
! this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
this.paraCount = 1;
/* Allocate zero-length arrays instead of setting to null here; then
* checks for null in various places can be eliminated.
*/
--- 3519,3550 ----
* @see #LEVEL_DEFAULT_RTL
* @see #LEVEL_OVERRIDE
* @see #MAX_EXPLICIT_LEVEL
* @stable ICU 3.8
*/
! void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
{
/* check the argument values */
! if (paraLevel < LEVEL_DEFAULT_LTR) {
verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
}
if (chars == null) {
chars = new char[0];
}
+ /* special treatment for RUNS_ONLY mode */
+ if (reorderingMode == REORDER_RUNS_ONLY) {
+ setParaRunsOnly(chars, paraLevel);
+ return;
+ }
+
/* initialize the Bidi object */
this.paraBidi = null; /* mark unfinished setPara */
this.text = chars;
this.length = this.originalLength = this.resultLength = text.length;
this.paraLevel = paraLevel;
! this.direction = (byte)(paraLevel & 1);
this.paraCount = 1;
/* Allocate zero-length arrays instead of setting to null here; then
* checks for null in various places can be eliminated.
*/
*** 2473,2487 ****
insertPoints.confirmed = 0; /* clean up from last call */
/*
* Save the original paraLevel if contextual; otherwise, set to 0.
*/
! if (IsDefaultLevel(paraLevel)) {
! defaultParaLevel = paraLevel;
! } else {
! defaultParaLevel = 0;
! }
if (length == 0) {
/*
* For an empty paragraph, create a Bidi object with the paraLevel and
* the flags and the direction set but without allocating zero-length arrays.
--- 3556,3566 ----
insertPoints.confirmed = 0; /* clean up from last call */
/*
* Save the original paraLevel if contextual; otherwise, set to 0.
*/
! defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0;
if (length == 0) {
/*
* For an empty paragraph, create a Bidi object with the paraLevel and
* the flags and the direction set but without allocating zero-length arrays.
*** 2489,2509 ****
*/
if (IsDefaultLevel(paraLevel)) {
this.paraLevel &= 1;
defaultParaLevel = 0;
}
! if ((this.paraLevel & 1) != 0) {
! flags = DirPropFlag(R);
! direction = Bidi.DIRECTION_RIGHT_TO_LEFT;
! } else {
! flags = DirPropFlag(L);
! direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
! }
!
runCount = 0;
paraCount = 0;
! paraBidi = this; /* mark successful setPara */
return;
}
runCount = -1;
--- 3568,3581 ----
*/
if (IsDefaultLevel(paraLevel)) {
this.paraLevel &= 1;
defaultParaLevel = 0;
}
! flags = DirPropFlagLR(paraLevel);
runCount = 0;
paraCount = 0;
! setParaSuccess();
return;
}
runCount = -1;
*** 2513,2537 ****
* determine the paragraph level if necessary.
*/
getDirPropsMemory(length);
dirProps = dirPropsMemory;
getDirProps();
-
/* the processed length may have changed if OPTION_STREAMING is set */
trailingWSStart = length; /* the levels[] will reflect the WS run */
- /* allocate paras memory */
- if (paraCount > 1) {
- getInitialParasMemory(paraCount);
- paras = parasMemory;
- paras[paraCount - 1] = length;
- } else {
- /* initialize paras for single paragraph */
- paras = simpleParas;
- simpleParas[0] = length;
- }
-
/* are explicit levels specified? */
if (embeddingLevels == null) {
/* no: determine explicit levels according to the (Xn) rules */
getLevelsMemory(length);
levels = levelsMemory;
--- 3585,3597 ----
*** 2540,2571 ****
/* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
levels = embeddingLevels;
direction = checkExplicitLevels();
}
/*
* The steps after (X9) in the Bidi algorithm are performed only if
* the paragraph text has mixed directionality!
*/
switch (direction) {
! case Bidi.DIRECTION_LEFT_TO_RIGHT:
! /* make sure paraLevel is even */
! paraLevel = (byte)((paraLevel + 1) & ~1);
!
/* all levels are implicitly at paraLevel (important for getLevels()) */
trailingWSStart = 0;
break;
! case Bidi.DIRECTION_RIGHT_TO_LEFT:
! /* make sure paraLevel is odd */
! paraLevel |= 1;
!
/* all levels are implicitly at paraLevel (important for getLevels()) */
trailingWSStart = 0;
break;
default:
this.impTabPair = impTab_DEFAULT;
!
/*
* If there are no external levels specified and there
* are no significant explicit level codes in the text,
* then we can treat the entire paragraph as one run.
* Otherwise, we need to perform the following rules on runs of
--- 3600,3665 ----
/* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
levels = embeddingLevels;
direction = checkExplicitLevels();
}
+ /* allocate isolate memory */
+ if (isolateCount > 0) {
+ if (isolates == null || isolates.length < isolateCount)
+ isolates = new Isolate[isolateCount + 3]; /* keep some reserve */
+ }
+ isolateCount = -1; /* current isolates stack entry == none */
+
/*
* The steps after (X9) in the Bidi algorithm are performed only if
* the paragraph text has mixed directionality!
*/
switch (direction) {
! case LTR:
/* all levels are implicitly at paraLevel (important for getLevels()) */
trailingWSStart = 0;
break;
! case RTL:
/* all levels are implicitly at paraLevel (important for getLevels()) */
trailingWSStart = 0;
break;
default:
+ /*
+ * Choose the right implicit state table
+ */
+ switch(reorderingMode) {
+ case REORDER_DEFAULT:
this.impTabPair = impTab_DEFAULT;
! break;
! case REORDER_NUMBERS_SPECIAL:
! this.impTabPair = impTab_NUMBERS_SPECIAL;
! break;
! case REORDER_GROUP_NUMBERS_WITH_R:
! this.impTabPair = impTab_GROUP_NUMBERS_WITH_R;
! break;
! case REORDER_RUNS_ONLY:
! /* we should never get here */
! throw new InternalError("Internal ICU error in setPara");
! /* break; */
! case REORDER_INVERSE_NUMBERS_AS_L:
! this.impTabPair = impTab_INVERSE_NUMBERS_AS_L;
! break;
! case REORDER_INVERSE_LIKE_DIRECT:
! if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
! this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
! } else {
! this.impTabPair = impTab_INVERSE_LIKE_DIRECT;
! }
! break;
! case REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
! if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
! this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
! } else {
! this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL;
! }
! break;
! }
/*
* If there are no external levels specified and there
* are no significant explicit level codes in the text,
* then we can treat the entire paragraph as one run.
* Otherwise, we need to perform the following rules on runs of
*** 2599,2640 ****
/* determine start and limit of the run (end points just behind the run) */
/* the values for this run's start are the same as for the previous run's end */
start = limit;
level = nextLevel;
! if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) {
/* except if this is a new paragraph, then set sor = para level */
sor = GetLRFromLevel(GetParaLevelAt(start));
} else {
sor = eor;
}
/* search for the limit of this run */
! while (++limit < length && levels[limit] == level) {}
/* get the correct level of the next run */
if (limit < length) {
nextLevel = levels[limit];
} else {
nextLevel = GetParaLevelAt(length - 1);
}
/* determine eor from max(level, nextLevel); sor is last run's eor */
! if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) {
eor = GetLRFromLevel(nextLevel);
} else {
eor = GetLRFromLevel(level);
}
/* if the run consists of overridden directional types, then there
are no implicit types to be resolved */
! if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
resolveImplicitLevels(start, limit, sor, eor);
} else {
/* remove the LEVEL_OVERRIDE flags */
do {
! levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE;
} while (start < limit);
}
} while (limit < length);
}
--- 3693,3736 ----
/* determine start and limit of the run (end points just behind the run) */
/* the values for this run's start are the same as for the previous run's end */
start = limit;
level = nextLevel;
! if ((start > 0) && (dirProps[start - 1] == B)) {
/* except if this is a new paragraph, then set sor = para level */
sor = GetLRFromLevel(GetParaLevelAt(start));
} else {
sor = eor;
}
/* search for the limit of this run */
! while ((++limit < length) &&
! ((levels[limit] == level) ||
! ((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {}
/* get the correct level of the next run */
if (limit < length) {
nextLevel = levels[limit];
} else {
nextLevel = GetParaLevelAt(length - 1);
}
/* determine eor from max(level, nextLevel); sor is last run's eor */
! if (NoOverride(level) < NoOverride(nextLevel)) {
eor = GetLRFromLevel(nextLevel);
} else {
eor = GetLRFromLevel(level);
}
/* if the run consists of overridden directional types, then there
are no implicit types to be resolved */
! if ((level & LEVEL_OVERRIDE) == 0) {
resolveImplicitLevels(start, limit, sor, eor);
} else {
/* remove the LEVEL_OVERRIDE flags */
do {
! levels[start++] &= ~LEVEL_OVERRIDE;
} while (start < limit);
}
} while (limit < length);
}
*** 2642,2653 ****
adjustWSLevels();
break;
}
resultLength += insertPoints.size;
! paraBidi = this; /* mark successful setPara */
}
/**
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
--- 3738,3787 ----
adjustWSLevels();
break;
}
+ /* add RLM for inverse Bidi with contextual orientation resolving
+ * to RTL which would not round-trip otherwise
+ */
+ if ((defaultParaLevel > 0) &&
+ ((reorderingOptions & OPTION_INSERT_MARKS) != 0) &&
+ ((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) ||
+ (reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
+ int start, last;
+ byte level;
+ byte dirProp;
+ for (int i = 0; i < paraCount; i++) {
+ last = paras_limit[i] - 1;
+ level = paras_level[i];
+ if (level == 0)
+ continue; /* LTR paragraph */
+ start = i == 0 ? 0 : paras_limit[i - 1];
+ for (int j = last; j >= start; j--) {
+ dirProp = dirProps[j];
+ if (dirProp == L) {
+ if (j < last) {
+ while (dirProps[last] == B) {
+ last--;
+ }
+ }
+ addPoint(last, RLM_BEFORE);
+ break;
+ }
+ if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) {
+ break;
+ }
+ }
+ }
+ }
+
+ if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) {
+ resultLength -= controlCount;
+ } else {
resultLength += insertPoints.size;
! }
! setParaSuccess();
}
/**
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
*** 2680,2690 ****
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
! * the same value in such a case.
*
* @param paragraph a paragraph of text with optional character and
* paragraph attribute information
* @stable ICU 3.8
*/
--- 3814,3824 ----
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
! * the same value in such a case.<p>
*
* @param paragraph a paragraph of text with optional character and
* paragraph attribute information
* @stable ICU 3.8
*/
*** 2693,2707 ****
byte paraLvl;
char ch = paragraph.first();
Boolean runDirection =
(Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
if (runDirection == null) {
! paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
} else {
paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
! (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT;
}
byte[] lvls = null;
int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
byte[] embeddingLevels = new byte[len];
--- 3827,3842 ----
byte paraLvl;
char ch = paragraph.first();
Boolean runDirection =
(Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
+
if (runDirection == null) {
! paraLvl = LEVEL_DEFAULT_LTR;
} else {
paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
! LTR : RTL;
}
byte[] lvls = null;
int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
byte[] embeddingLevels = new byte[len];
*** 2715,2725 ****
byte level = embedding.byteValue();
if (level == 0) {
/* no-op */
} else if (level < 0) {
lvls = embeddingLevels;
! embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE);
} else {
lvls = embeddingLevels;
embeddingLevels[i] = level;
}
}
--- 3850,3860 ----
byte level = embedding.byteValue();
if (level == 0) {
/* no-op */
} else if (level < 0) {
lvls = embeddingLevels;
! embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE);
} else {
lvls = embeddingLevels;
embeddingLevels[i] = level;
}
}
*** 2749,2759 ****
* receive level 0, so that successive paragraphs progress from left to right.
*
* @see #setPara
* @stable ICU 3.8
*/
! private void orderParagraphsLTR(boolean ordarParaLTR) {
orderParagraphsLTR = ordarParaLTR;
}
/**
* Get the directionality of the text.
--- 3884,3894 ----
* receive level 0, so that successive paragraphs progress from left to right.
*
* @see #setPara
* @stable ICU 3.8
*/
! public void orderParagraphsLTR(boolean ordarParaLTR) {
orderParagraphsLTR = ordarParaLTR;
}
/**
* Get the directionality of the text.
*** 2769,2779 ****
* @see #LTR
* @see #RTL
* @see #MIXED
* @stable ICU 3.8
*/
! private byte getDirection()
{
verifyValidParaOrLine();
return direction;
}
--- 3904,3914 ----
* @see #LTR
* @see #RTL
* @see #MIXED
* @stable ICU 3.8
*/
! public byte getDirection()
{
verifyValidParaOrLine();
return direction;
}
*** 2817,2851 ****
verifyValidParaOrLine();
return paraLevel;
}
/**
! * Get the index of a paragraph, given a position within the text.
! *
! * @param charIndex is the index of a character within the text, in the
! * range <code>[0..getProcessedLength()-1]</code>.
*
! * @return The index of the paragraph containing the specified position,
! * starting from 0.
*
! * @throws IllegalStateException if this call is not preceded by a successful
! * call to <code>setPara</code> or <code>setLine</code>
! * @throws IllegalArgumentException if charIndex is not within the legal range
*
- * @see com.ibm.icu.text.BidiRun
- * @see #getProcessedLength
* @stable ICU 3.8
*/
! public int getParagraphIndex(int charIndex)
! {
! verifyValidParaOrLine();
! BidiBase bidi = paraBidi; /* get Para object if Line object */
! verifyRange(charIndex, 0, bidi.length);
! int paraIndex;
! for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) {
! }
! return paraIndex;
}
/**
* <code>setLine()</code> returns a <code>Bidi</code> object to
* contain the reordering information, especially the resolved levels,
--- 3952,3980 ----
verifyValidParaOrLine();
return paraLevel;
}
/**
! * Retrieves the Bidi class for a given code point.
! * <p>If a <code>BidiClassifier</code> is defined and returns a value
! * other than <code>CLASS_DEFAULT</code>, that value is used; otherwise
! * the default class determination mechanism is invoked.</p>
*
! * @param c The code point to get a Bidi class for.
*
! * @return The Bidi class for the character <code>c</code> that is in effect
! * for this <code>Bidi</code> instance.
*
* @stable ICU 3.8
*/
! public int getCustomizedClass(int c) {
! int dir;
!
! dir = bdp.getClass(c);
! if (dir >= CHAR_DIRECTION_COUNT)
! dir = ON;
! return dir;
}
/**
* <code>setLine()</code> returns a <code>Bidi</code> object to
* contain the reordering information, especially the resolved levels,
*** 2889,2899 ****
{
verifyValidPara();
verifyRange(start, 0, limit);
verifyRange(limit, 0, length+1);
! return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit);
}
/**
* Get the level for one character.
*
--- 4018,4028 ----
{
verifyValidPara();
verifyRange(start, 0, limit);
verifyRange(limit, 0, length+1);
! return BidiLine.setLine(this, newBidi, newBidiBase, start, limit);
}
/**
* Get the level for one character.
*
*** 2909,2921 ****
--- 4038,4052 ----
* @see #getProcessedLength
* @stable ICU 3.8
*/
public byte getLevelAt(int charIndex)
{
+ // for backward compatibility
if (charIndex < 0 || charIndex >= length) {
return (byte)getBaseLevel();
}
+
verifyValidParaOrLine();
verifyRange(charIndex, 0, length);
return BidiLine.getLevelAt(this, charIndex);
}
*** 2930,2940 ****
*
* @throws IllegalStateException if this call is not preceded by a successful
* call to <code>setPara</code> or <code>setLine</code>
* @stable ICU 3.8
*/
! private byte[] getLevels()
{
verifyValidParaOrLine();
if (length <= 0) {
return new byte[0];
}
--- 4061,4071 ----
*
* @throws IllegalStateException if this call is not preceded by a successful
* call to <code>setPara</code> or <code>setLine</code>
* @stable ICU 3.8
*/
! byte[] getLevels()
{
verifyValidParaOrLine();
if (length <= 0) {
return new byte[0];
}
*** 2961,2970 ****
--- 4092,4173 ----
BidiLine.getRuns(this);
return runCount;
}
/**
+ *
+ * Get a <code>BidiRun</code> object according to its index. BidiRun methods
+ * may be used to retrieve the run's logical start, length and level,
+ * which can be even for an LTR run or odd for an RTL run.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.<p>
+ * <code>countRuns()</code> is normally called
+ * before the runs are retrieved.
+ *
+ * <p>
+ * Example:
+ * <pre>
+ * Bidi bidi = new Bidi();
+ * String text = "abc 123 DEFG xyz";
+ * bidi.setPara(text, Bidi.RTL, null);
+ * int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
+ * BidiRun run;
+ * for (i = 0; i < count; ++i) {
+ * run = bidi.getVisualRun(i);
+ * logicalStart = run.getStart();
+ * length = run.getLength();
+ * if (Bidi.LTR == run.getEmbeddingLevel()) {
+ * do { // LTR
+ * show_char(text.charAt(logicalStart++), visualIndex++);
+ * } while (--length > 0);
+ * } else {
+ * logicalStart += length; // logicalLimit
+ * do { // RTL
+ * show_char(text.charAt(--logicalStart), visualIndex++);
+ * } while (--length > 0);
+ * }
+ * }
+ * </pre>
+ * <p>
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * <p>
+ * Use of <code>{@link #writeReordered}</code>, optionally with the
+ * <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in
+ * order to avoid these issues.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range <code>[0..countRuns()-1]</code>.
+ *
+ * @return a BidiRun object containing the details of the run. The
+ * directionality of the run is
+ * <code>LTR==0</code> or <code>RTL==1</code>,
+ * never <code>MIXED</code>.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to <code>setPara</code> or <code>setLine</code>
+ * @throws IllegalArgumentException if <code>runIndex</code> is not in
+ * the range <code>0<=runIndex<countRuns()</code>
+ *
+ * @see #countRuns()
+ * @see com.ibm.icu.text.BidiRun
+ * @see com.ibm.icu.text.BidiRun#getStart()
+ * @see com.ibm.icu.text.BidiRun#getLength()
+ * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
+ * @stable ICU 3.8
+ */
+ BidiRun getVisualRun(int runIndex)
+ {
+ verifyValidParaOrLine();
+ BidiLine.getRuns(this);
+ verifyRange(runIndex, 0, runCount);
+ return BidiLine.getVisualRun(this, runIndex);
+ }
+
+ /**
* Get a visual-to-logical index map (array) for the characters in the
* <code>Bidi</code> (paragraph or line) object.
* <p>
* Some values in the map may be <code>MAP_NOWHERE</code> if the
* corresponding text characters are Bidi marks inserted in the visual
*** 3029,3051 ****
/**
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present, the base
- * direction is left-to-right.
- * @stable ICU 3.8
- */
- private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e;
-
- /**
- * Constant indicating that the base direction depends on the first strong
- * directional character in the text according to the Unicode Bidirectional
- * Algorithm. If no strong directional character is present, the base
* direction is right-to-left.
* @stable ICU 3.8
*/
! private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f;
/**
* Create Bidi from the given text, embedding, and direction information.
* The embeddings array may be null. If present, the values represent
* embedding level information. Negative values from -1 to -61 indicate
--- 4232,4245 ----
/**
* Constant indicating that the base direction depends on the first strong
* directional character in the text according to the Unicode Bidirectional
* Algorithm. If no strong directional character is present, the base
* direction is right-to-left.
* @stable ICU 3.8
*/
! public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
/**
* Create Bidi from the given text, embedding, and direction information.
* The embeddings array may be null. If present, the values represent
* embedding level information. Negative values from -1 to -61 indicate
*** 3089,3108 ****
this(0, 0);
byte paraLvl;
switch (flags) {
case Bidi.DIRECTION_LEFT_TO_RIGHT:
default:
! paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT;
break;
case Bidi.DIRECTION_RIGHT_TO_LEFT:
! paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT;
break;
case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
! paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
break;
case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
! paraLvl = INTERNAL_LEVEL_DEFAULT_RTL;
break;
}
byte[] paraEmbeddings;
if (embeddings == null) {
paraEmbeddings = null;
--- 4283,4302 ----
this(0, 0);
byte paraLvl;
switch (flags) {
case Bidi.DIRECTION_LEFT_TO_RIGHT:
default:
! paraLvl = LTR;
break;
case Bidi.DIRECTION_RIGHT_TO_LEFT:
! paraLvl = RTL;
break;
case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
! paraLvl = LEVEL_DEFAULT_LTR;
break;
case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
! paraLvl = LEVEL_DEFAULT_RTL;
break;
}
byte[] paraEmbeddings;
if (embeddings == null) {
paraEmbeddings = null;
*** 3110,3137 ****
paraEmbeddings = new byte[paragraphLength];
byte lev;
for (int i = 0; i < paragraphLength; i++) {
lev = embeddings[i + embStart];
if (lev < 0) {
! lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE);
} else if (lev == 0) {
lev = paraLvl;
if (paraLvl > MAX_EXPLICIT_LEVEL) {
lev &= 1;
}
}
paraEmbeddings[i] = lev;
}
}
! if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {
! setPara(text, paraLvl, paraEmbeddings);
! } else {
char[] paraText = new char[paragraphLength];
System.arraycopy(text, textStart, paraText, 0, paragraphLength);
setPara(paraText, paraLvl, paraEmbeddings);
}
- }
/**
* Return true if the line is not left-to-right or right-to-left. This means
* it either has mixed runs of left-to-right and right-to-left text, or the
* base direction differs from the direction of the only run of text.
--- 4304,4328 ----
paraEmbeddings = new byte[paragraphLength];
byte lev;
for (int i = 0; i < paragraphLength; i++) {
lev = embeddings[i + embStart];
if (lev < 0) {
! lev = (byte)((- lev) | LEVEL_OVERRIDE);
} else if (lev == 0) {
lev = paraLvl;
if (paraLvl > MAX_EXPLICIT_LEVEL) {
lev &= 1;
}
}
paraEmbeddings[i] = lev;
}
}
!
char[] paraText = new char[paragraphLength];
System.arraycopy(text, textStart, paraText, 0, paragraphLength);
setPara(paraText, paraLvl, paraEmbeddings);
}
/**
* Return true if the line is not left-to-right or right-to-left. This means
* it either has mixed runs of left-to-right and right-to-left text, or the
* base direction differs from the direction of the only run of text.
*** 3158,3168 ****
* call to <code>setPara</code>
* @stable ICU 3.8
*/
public boolean isLeftToRight()
{
! return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0);
}
/**
* Return true if the line is all right-to-left text, and the base direction
* is right-to-left
--- 4349,4359 ----
* call to <code>setPara</code>
* @stable ICU 3.8
*/
public boolean isLeftToRight()
{
! return (getDirection() == LTR && (paraLevel & 1) == 0);
}
/**
* Return true if the line is all right-to-left text, and the base direction
* is right-to-left
*** 3174,3184 ****
* call to <code>setPara</code>
* @stable ICU 3.8
*/
public boolean isRightToLeft()
{
! return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1);
}
/**
* Return true if the base direction is left-to-right
*
--- 4365,4375 ----
* call to <code>setPara</code>
* @stable ICU 3.8
*/
public boolean isRightToLeft()
{
! return (getDirection() == RTL && (paraLevel & 1) == 1);
}
/**
* Return true if the base direction is left-to-right
*
*** 3189,3199 ****
*
* @stable ICU 3.8
*/
public boolean baseIsLeftToRight()
{
! return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT);
}
/**
* Return the base level (0 if left-to-right, 1 if right-to-left).
*
--- 4380,4390 ----
*
* @stable ICU 3.8
*/
public boolean baseIsLeftToRight()
{
! return (getParaLevel() == LTR);
}
/**
* Return the base level (0 if left-to-right, 1 if right-to-left).
*
*** 3210,3220 ****
}
/**
* Compute the logical to visual run mapping
*/
! private void getLogicalToVisualRunsMap()
{
if (isGoodLogicalToVisualRunsMap) {
return;
}
int count = countRuns();
--- 4401,4411 ----
}
/**
* Compute the logical to visual run mapping
*/
! void getLogicalToVisualRunsMap()
{
if (isGoodLogicalToVisualRunsMap) {
return;
}
int count = countRuns();
*** 3229,3239 ****
}
Arrays.sort(keys);
for (i = 0; i < count; i++) {
logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
}
- keys = null;
isGoodLogicalToVisualRunsMap = true;
}
/**
* Return the level of the nth logical run in this line.
--- 4420,4429 ----
*** 3250,3262 ****
--- 4440,4455 ----
*/
public int getRunLevel(int run)
{
verifyValidParaOrLine();
BidiLine.getRuns(this);
+
+ // for backward compatibility
if (run < 0 || run >= runCount) {
return getParaLevel();
}
+
getLogicalToVisualRunsMap();
return runs[logicalToVisualRunsMap[run]].level;
}
/**
*** 3275,3290 ****
*/
public int getRunStart(int run)
{
verifyValidParaOrLine();
BidiLine.getRuns(this);
if (runCount == 1) {
return 0;
} else if (run == runCount) {
return length;
}
! verifyIndex(run, 0, runCount);
getLogicalToVisualRunsMap();
return runs[logicalToVisualRunsMap[run]].start;
}
/**
--- 4468,4485 ----
*/
public int getRunStart(int run)
{
verifyValidParaOrLine();
BidiLine.getRuns(this);
+
+ // for backward compatibility
if (runCount == 1) {
return 0;
} else if (run == runCount) {
return length;
}
!
getLogicalToVisualRunsMap();
return runs[logicalToVisualRunsMap[run]].start;
}
/**
*** 3304,3317 ****
*/
public int getRunLimit(int run)
{
verifyValidParaOrLine();
BidiLine.getRuns(this);
if (runCount == 1) {
return length;
}
! verifyIndex(run, 0, runCount);
getLogicalToVisualRunsMap();
int idx = logicalToVisualRunsMap[run];
int len = idx == 0 ? runs[idx].limit :
runs[idx].limit - runs[idx-1].limit;
return runs[idx].start + len;
--- 4499,4514 ----
*/
public int getRunLimit(int run)
{
verifyValidParaOrLine();
BidiLine.getRuns(this);
+
+ // for backward compatibility
if (runCount == 1) {
return length;
}
!
getLogicalToVisualRunsMap();
int idx = logicalToVisualRunsMap[run];
int len = idx == 0 ? runs[idx].limit :
runs[idx].limit - runs[idx-1].limit;
return runs[idx].start + len;
*** 3334,3363 ****
*/
public static boolean requiresBidi(char[] text,
int start,
int limit)
{
! final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT |
1 << AL |
1 << RLE |
1 << RLO |
1 << AN);
if (0 > start || start > limit || limit > text.length) {
throw new IllegalArgumentException("Value start " + start +
" is out of range 0 to " + limit);
}
for (int i = start; i < limit; ++i) {
if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
Character.isLowSurrogate(text[i+1])) {
if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
return true;
}
} else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
return true;
}
}
return false;
}
/**
* Reorder the objects in the array into visual order based on their levels.
--- 4531,4562 ----
*/
public static boolean requiresBidi(char[] text,
int start,
int limit)
{
! final int RTLMask = (1 << R |
1 << AL |
1 << RLE |
1 << RLO |
1 << AN);
if (0 > start || start > limit || limit > text.length) {
throw new IllegalArgumentException("Value start " + start +
" is out of range 0 to " + limit);
}
+
for (int i = start; i < limit; ++i) {
if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
Character.isLowSurrogate(text[i+1])) {
if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
return true;
}
} else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
return true;
}
}
+
return false;
}
/**
* Reorder the objects in the array into visual order based on their levels.
*** 3380,3389 ****
--- 4579,4589 ----
int levelStart,
Object[] objects,
int objectStart,
int count)
{
+ // for backward compatibility
if (0 > levelStart || levels.length <= levelStart) {
throw new IllegalArgumentException("Value levelStart " +
levelStart + " is out of range 0 to " +
(levels.length-1));
}
*** 3395,3404 ****
--- 4595,4605 ----
if (0 > count || objects.length < (objectStart+count)) {
throw new IllegalArgumentException("Value count " +
levelStart + " is out of range 0 to " +
(objects.length - objectStart));
}
+
byte[] reorderLevels = new byte[count];
System.arraycopy(levels, levelStart, reorderLevels, 0, count);
int[] indexMap = reorderVisual(reorderLevels);
Object[] temp = new Object[count];
System.arraycopy(objects, objectStart, temp, 0, count);
*** 3406,3415 ****
--- 4607,4684 ----
objects[objectStart + i] = temp[indexMap[i]];
}
}
/**
+ * Take a <code>Bidi</code> object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * <code>setPara()</code> or for a line of text set by <code>setLine()</code>
+ * and return a string containing the reordered text.
+ *
+ * <p>The text may have been aliased (only a reference was stored
+ * without copying the contents), thus it must not have been modified
+ * since the <code>setPara()</code> call.</p>
+ *
+ * This method preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the returned string. Note that "real" mirroring has to be done in a
+ * rendering engine by glyph selection and that for many "mirrored"
+ * characters there are no Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the descriptions of the return value and the
+ * <code>options</code> parameter, and of the option bit flags.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * <code>DO_MIRRORING</code>,
+ * <code>INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>KEEP_BASE_COMBINING</code>,
+ * <code>OUTPUT_REVERSE</code>,
+ * <code>REMOVE_BIDI_CONTROLS</code>,
+ * <code>STREAMING</code>
+ *
+ * @return The reordered text.
+ * If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then
+ * the length of the returned string could be as large as
+ * <code>getLength()+2*countRuns()</code>.<br>
+ * If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the
+ * length of the returned string may be less than
+ * <code>getLength()</code>.<br>
+ * If none of these options is set, then the length of the returned
+ * string will be exactly <code>getProcessedLength()</code>.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to <code>setPara</code> or <code>setLine</code>
+ *
+ * @see #DO_MIRRORING
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @see #KEEP_BASE_COMBINING
+ * @see #OUTPUT_REVERSE
+ * @see #REMOVE_BIDI_CONTROLS
+ * @see #OPTION_STREAMING
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public String writeReordered(int options)
+ {
+ verifyValidParaOrLine();
+ if (length == 0) {
+ /* nothing to do */
+ return "";
+ }
+ return BidiWriter.writeReordered(this, options);
+ }
+
+ /**
* Display the bidi internal state, used in debugging.
*/
public String toString() {
StringBuilder buf = new StringBuilder(getClass().getName());
*** 3505,3510 ****
--- 4774,4780 ----
if (jafa != null) {
jafa.shape(shaper, text, start, count);
}
}
}
+
}
< prev index next >