< prev index next >

src/java.base/share/classes/sun/text/bidi/BidiBase.java

Print this page




1147     /* for option OPTION_REMOVE_CONTROLS */
1148     int                 controlCount;
1149 
1150     /*
1151      * Sometimes, bit values are more appropriate
1152      * to deal with directionality properties.
1153      * Abbreviations in these method names refer to names
1154      * used in the Bidi algorithm.
1155      */
1156     static int DirPropFlag(byte dir) {
1157         return (1 << dir);
1158     }
1159 
1160     boolean testDirPropFlagAt(int flag, int index) {
1161         return ((DirPropFlag(dirProps[index]) & flag) != 0);
1162     }
1163 
1164     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1165 
1166     /* to avoid some conditional statements, use tiny constant arrays */
1167     static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
1168     static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
1169     static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
1170 
1171     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1172     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
1173     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
1174     static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1175     static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1176 
1177     /*  are there any characters that are LTR or RTL? */
1178     static final int MASK_LTR =
1179         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1180     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1181 
1182     static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1183 
1184     /* explicit embedding codes */
1185     private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1186     private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1187 
1188     /* explicit isolate codes */
1189     private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);


2415     /*      bits 0..4:  next state                                       */
2416     /*      bits 5..7:  action to perform (if > 0)                       */
2417     /*                                                                   */
2418     /* Cells may be of format "n" where n represents the next state      */
2419     /* (except for the rightmost column).                                */
2420     /* Cells may also be of format "_(x,y)" where x represents an action */
2421     /* to perform and y represents the next state.                       */
2422     /*                                                                   */
2423     /*********************************************************************/
2424     /* Definitions and type for properties state tables                  */
2425     /*********************************************************************/
2426     private static final int IMPTABPROPS_COLUMNS = 16;
2427     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2428     private static short GetStateProps(short cell) {
2429         return (short)(cell & 0x1f);
2430     }
2431     private static short GetActionProps(short cell) {
2432         return (short)(cell >> 5);
2433     }
2434 
2435     private static final short groupProp[] =          /* dirProp regrouped */
2436     {
2437         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
2438             0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
2439     };
2440     private static final short _L  = 0;
2441     private static final short _R  = 1;
2442     private static final short _EN = 2;
2443     private static final short _AN = 3;
2444     private static final short _ON = 4;
2445     private static final short _S  = 5;
2446     private static final short _B  = 6; /* reduced dirProp */
2447 
2448     /*********************************************************************/
2449     /*                                                                   */
2450     /*      PROPERTIES  STATE  TABLE                                     */
2451     /*                                                                   */
2452     /* In table impTabProps,                                             */
2453     /*      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI    */
2454     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
2455     /*      - the Res column is the reduced property assigned to a run   */


2462     /* Notes:                                                            */
2463     /*  1) This table is used in resolveImplicitLevels().                */
2464     /*  2) This table triggers actions when there is a change in the Bidi*/
2465     /*     property of incoming characters (action 1).                   */
2466     /*  3) Most such property sequences are processed immediately (in    */
2467     /*     fact, passed to processPropertySeq().                         */
2468     /*  4) However, numbers are assembled as one sequence. This means    */
2469     /*     that undefined situations (like CS following digits, until    */
2470     /*     it is known if the next char will be a digit) are held until  */
2471     /*     following chars define them.                                  */
2472     /*     Example: digits followed by CS, then comes another CS or ON;  */
2473     /*              the digits will be processed, then the CS assigned   */
2474     /*              as the start of an ON sequence (action 3).           */
2475     /*  5) There are cases where more than one sequence must be          */
2476     /*     processed, for instance digits followed by CS followed by L:  */
2477     /*     the digits must be processed as one sequence, and the CS      */
2478     /*     must be processed as an ON sequence, all this before starting */
2479     /*     assembling chars for the opening L sequence.                  */
2480     /*                                                                   */
2481     /*                                                                   */
2482     private static final short impTabProps[][] =
2483     {
2484 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,   ENL,   ENR,   Res */
2485 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,    18,    21,   _ON },
2486 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3, 32+18, 32+21,    _L },
2487 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3, 32+18, 32+21,    _R },
2488 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3, 32+18, 32+21,    _R },
2489 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,    18,    21,   _EN },
2490 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3, 32+18, 32+21,   _AN },
2491 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,    18,    21,   _AN },
2492 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3, 32+18, 32+21,   _ON },
2493 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3, 32+18, 32+21,   _ON },
2494 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,    18,    21,   _ON },
2495 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,    18,    21,   _EN },
2496 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,    18,    21,   _EN },
2497 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3, 96+18, 96+21,   _AN },
2498 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,    18,    21,   _AN },
2499 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,128+18,128+21,   _ON },
2500 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3, 32+18, 32+21,    _S },
2501 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3, 32+18, 32+21,    _S },
2502 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3, 32+18, 32+21,    _B },


2561     /*        2: prepend conditional sequence to current sequence        */
2562     /*        3: set ON sequence to new level - 1                        */
2563     /*        4: init EN/AN/ON sequence                                  */
2564     /*        5: fix EN/AN/ON sequence followed by R                     */
2565     /*        6: set previous level sequence to level 2                  */
2566     /*                                                                   */
2567     /* Notes:                                                            */
2568     /*  1) These tables are used in processPropertySeq(). The input      */
2569     /*     is property sequences as determined by resolveImplicitLevels. */
2570     /*  2) Most such property sequences are processed immediately        */
2571     /*     (levels are assigned).                                        */
2572     /*  3) However, some sequences cannot be assigned a final level till */
2573     /*     one or more following sequences are received. For instance,   */
2574     /*     ON following an R sequence within an even-level paragraph.    */
2575     /*     If the following sequence is R, the ON sequence will be       */
2576     /*     assigned basic run level+1, and so will the R sequence.       */
2577     /*  4) S is generally handled like ON, since its level will be fixed */
2578     /*     to paragraph level in adjustWSLevels().                       */
2579     /*                                                                   */
2580 
2581     private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
2582         /*  In this table, conditional sequences receive the lower possible level
2583             until proven otherwise.
2584         */
2585     {
2586         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2587         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
2588         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
2589         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
2590         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
2591         /* 4 : R+ON       */ {     0,  0x21,  0x33,  0x33,     4,     4,     0,  0 },
2592         /* 5 : AN+ON      */ {     0,  0x21,     0,  0x32,     5,     5,     0,  0 }
2593     };
2594 
2595     private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
2596         /*  In this table, conditional sequences receive the lower possible level
2597             until proven otherwise.
2598         */
2599     {
2600         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2601         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2602         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
2603         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2604         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
2605         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
2606         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
2607     };
2608 
2609     private static final short[] impAct0 = {0,1,2,3,4};
2610 
2611     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2612             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2613 
2614     private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
2615         /* In this table, conditional sequences receive the lower possible
2616            level until proven otherwise.
2617         */
2618         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2619         /* 0 : init       */ {     0,     2,  0x11,  0x11,     0,     0,     0,  0 },
2620         /* 1 : L+EN/AN    */ {     0,  0x42,     1,     1,     0,     0,     0,  0 },
2621         /* 2 : R          */ {     0,     2,     4,     4,  0x13,  0x13,     0,  1 },
2622         /* 3 : R+ON       */ {     0,  0x22,  0x34,  0x34,     3,     3,     0,  0 },
2623         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
2624     };
2625     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2626             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2627 
2628     private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
2629         /* In this table, EN/AN+ON sequences receive levels as if associated with R
2630            until proven that there is L or sor/eor on both sides. AN is handled like EN.
2631         */
2632         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2633         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
2634         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
2635         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
2636         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
2637         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
2638         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
2639     };
2640     private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
2641         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
2642             until proven that there is L on both sides. AN is handled like EN.
2643         */
2644         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2645         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
2646         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
2647         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
2648         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
2649         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
2650     };
2651     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2652             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2653                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2654 
2655     private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
2656         /* This table is identical to the Default LTR table except that EN and AN
2657            are handled like L.
2658         */
2659         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2660         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
2661         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
2662         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
2663         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
2664         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
2665         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
2666     };
2667     private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
2668         /* This table is identical to the Default RTL table except that EN and AN
2669            are handled like L.
2670         */
2671         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2672         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
2673         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
2674         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
2675         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
2676         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
2677         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
2678     };
2679     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2680             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2681              impAct0, impAct0);
2682 
2683     private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
2684         /*  In this table, conditional sequences receive the lower possible level
2685             until proven otherwise.
2686         */
2687         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2688         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2689         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
2690         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2691         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
2692         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
2693         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
2694         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
2695     };
2696     private static final short[] impAct1 = {0,1,13,14};
2697     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
2698             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2699 
2700     private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2701         /* The case handled in this table is (visually):  R EN L
2702          */
2703         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2704         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
2705         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
2706         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
2707         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
2708         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
2709         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
2710         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
2711     };
2712     private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2713         /* The cases handled in this table are (visually):  R EN L
2714                                                             R L AN L
2715         */
2716         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2717         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
2718         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
2719         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
2720         /* 3 : L          */ {     3,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
2721         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
2722         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
2723         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
2724     };
2725     private static final short[] impAct2 = {0,1,2,5,6,7,8};
2726     private static final short[] impAct3 = {0,1,9,10,11,12};
2727     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
2728             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
2729                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2730 
2731     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
2732             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2733 
2734     private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
2735         /*  The case handled in this table is (visually):  R EN L
2736         */
2737         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2738         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
2739         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
2740         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
2741         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
2742         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
2743     };
2744     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
2745             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
2746                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2747 
2748     private static class LevState {
2749         byte[][] impTab;                /* level table pointer          */
2750         short[] impAct;                 /* action map array             */
2751         int startON;                    /* start of ON sequence         */
2752         int startL2EN;                  /* start of level 2 sequence    */
2753         int lastStrongRTL;              /* index of last found R or AL  */
2754         int runStart;                   /* start position of the run    */




1147     /* for option OPTION_REMOVE_CONTROLS */
1148     int                 controlCount;
1149 
1150     /*
1151      * Sometimes, bit values are more appropriate
1152      * to deal with directionality properties.
1153      * Abbreviations in these method names refer to names
1154      * used in the Bidi algorithm.
1155      */
1156     static int DirPropFlag(byte dir) {
1157         return (1 << dir);
1158     }
1159 
1160     boolean testDirPropFlagAt(int flag, int index) {
1161         return ((DirPropFlag(dirProps[index]) & flag) != 0);
1162     }
1163 
1164     static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1165 
1166     /* to avoid some conditional statements, use tiny constant arrays */
1167     static final int[] DirPropFlagLR = { DirPropFlag(L), DirPropFlag(R) };
1168     static final int[] DirPropFlagE = { DirPropFlag(LRE), DirPropFlag(RLE) };
1169     static final int[] DirPropFlagO = { DirPropFlag(LRO), DirPropFlag(RLO) };
1170 
1171     static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1172     static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
1173     static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
1174     static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1175     static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1176 
1177     /*  are there any characters that are LTR or RTL? */
1178     static final int MASK_LTR =
1179         DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1180     static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1181 
1182     static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1183 
1184     /* explicit embedding codes */
1185     private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1186     private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1187 
1188     /* explicit isolate codes */
1189     private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);


2415     /*      bits 0..4:  next state                                       */
2416     /*      bits 5..7:  action to perform (if > 0)                       */
2417     /*                                                                   */
2418     /* Cells may be of format "n" where n represents the next state      */
2419     /* (except for the rightmost column).                                */
2420     /* Cells may also be of format "_(x,y)" where x represents an action */
2421     /* to perform and y represents the next state.                       */
2422     /*                                                                   */
2423     /*********************************************************************/
2424     /* Definitions and type for properties state tables                  */
2425     /*********************************************************************/
2426     private static final int IMPTABPROPS_COLUMNS = 16;
2427     private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2428     private static short GetStateProps(short cell) {
2429         return (short)(cell & 0x1f);
2430     }
2431     private static short GetActionProps(short cell) {
2432         return (short)(cell >> 5);
2433     }
2434 
2435     private static final short[] groupProp =          /* dirProp regrouped */
2436     {
2437         /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
2438             0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
2439     };
2440     private static final short _L  = 0;
2441     private static final short _R  = 1;
2442     private static final short _EN = 2;
2443     private static final short _AN = 3;
2444     private static final short _ON = 4;
2445     private static final short _S  = 5;
2446     private static final short _B  = 6; /* reduced dirProp */
2447 
2448     /*********************************************************************/
2449     /*                                                                   */
2450     /*      PROPERTIES  STATE  TABLE                                     */
2451     /*                                                                   */
2452     /* In table impTabProps,                                             */
2453     /*      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI    */
2454     /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
2455     /*      - the Res column is the reduced property assigned to a run   */


2462     /* Notes:                                                            */
2463     /*  1) This table is used in resolveImplicitLevels().                */
2464     /*  2) This table triggers actions when there is a change in the Bidi*/
2465     /*     property of incoming characters (action 1).                   */
2466     /*  3) Most such property sequences are processed immediately (in    */
2467     /*     fact, passed to processPropertySeq().                         */
2468     /*  4) However, numbers are assembled as one sequence. This means    */
2469     /*     that undefined situations (like CS following digits, until    */
2470     /*     it is known if the next char will be a digit) are held until  */
2471     /*     following chars define them.                                  */
2472     /*     Example: digits followed by CS, then comes another CS or ON;  */
2473     /*              the digits will be processed, then the CS assigned   */
2474     /*              as the start of an ON sequence (action 3).           */
2475     /*  5) There are cases where more than one sequence must be          */
2476     /*     processed, for instance digits followed by CS followed by L:  */
2477     /*     the digits must be processed as one sequence, and the CS      */
2478     /*     must be processed as an ON sequence, all this before starting */
2479     /*     assembling chars for the opening L sequence.                  */
2480     /*                                                                   */
2481     /*                                                                   */
2482     private static final short[][] impTabProps =
2483     {
2484 /*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,   ENL,   ENR,   Res */
2485 /* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,    18,    21,   _ON },
2486 /* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3, 32+18, 32+21,    _L },
2487 /* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3, 32+18, 32+21,    _R },
2488 /* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3, 32+18, 32+21,    _R },
2489 /* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,    18,    21,   _EN },
2490 /* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3, 32+18, 32+21,   _AN },
2491 /* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,    18,    21,   _AN },
2492 /* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3, 32+18, 32+21,   _ON },
2493 /* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3, 32+18, 32+21,   _ON },
2494 /* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,    18,    21,   _ON },
2495 /*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,    18,    21,   _EN },
2496 /*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,    18,    21,   _EN },
2497 /*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3, 96+18, 96+21,   _AN },
2498 /*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,    18,    21,   _AN },
2499 /*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,128+18,128+21,   _ON },
2500 /*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3, 32+18, 32+21,    _S },
2501 /*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3, 32+18, 32+21,    _S },
2502 /*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3, 32+18, 32+21,    _B },


2561     /*        2: prepend conditional sequence to current sequence        */
2562     /*        3: set ON sequence to new level - 1                        */
2563     /*        4: init EN/AN/ON sequence                                  */
2564     /*        5: fix EN/AN/ON sequence followed by R                     */
2565     /*        6: set previous level sequence to level 2                  */
2566     /*                                                                   */
2567     /* Notes:                                                            */
2568     /*  1) These tables are used in processPropertySeq(). The input      */
2569     /*     is property sequences as determined by resolveImplicitLevels. */
2570     /*  2) Most such property sequences are processed immediately        */
2571     /*     (levels are assigned).                                        */
2572     /*  3) However, some sequences cannot be assigned a final level till */
2573     /*     one or more following sequences are received. For instance,   */
2574     /*     ON following an R sequence within an even-level paragraph.    */
2575     /*     If the following sequence is R, the ON sequence will be       */
2576     /*     assigned basic run level+1, and so will the R sequence.       */
2577     /*  4) S is generally handled like ON, since its level will be fixed */
2578     /*     to paragraph level in adjustWSLevels().                       */
2579     /*                                                                   */
2580 
2581     private static final byte[][] impTabL_DEFAULT = /* Even paragraph level */
2582         /*  In this table, conditional sequences receive the lower possible level
2583             until proven otherwise.
2584         */
2585     {
2586         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2587         /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
2588         /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
2589         /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
2590         /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
2591         /* 4 : R+ON       */ {     0,  0x21,  0x33,  0x33,     4,     4,     0,  0 },
2592         /* 5 : AN+ON      */ {     0,  0x21,     0,  0x32,     5,     5,     0,  0 }
2593     };
2594 
2595     private static final byte[][] impTabR_DEFAULT = /* Odd  paragraph level */
2596         /*  In this table, conditional sequences receive the lower possible level
2597             until proven otherwise.
2598         */
2599     {
2600         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2601         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2602         /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
2603         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2604         /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
2605         /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
2606         /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
2607     };
2608 
2609     private static final short[] impAct0 = {0,1,2,3,4};
2610 
2611     private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2612             impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2613 
2614     private static final byte[][] impTabL_NUMBERS_SPECIAL = { /* Even paragraph level */
2615         /* In this table, conditional sequences receive the lower possible
2616            level until proven otherwise.
2617         */
2618         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2619         /* 0 : init       */ {     0,     2,  0x11,  0x11,     0,     0,     0,  0 },
2620         /* 1 : L+EN/AN    */ {     0,  0x42,     1,     1,     0,     0,     0,  0 },
2621         /* 2 : R          */ {     0,     2,     4,     4,  0x13,  0x13,     0,  1 },
2622         /* 3 : R+ON       */ {     0,  0x22,  0x34,  0x34,     3,     3,     0,  0 },
2623         /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
2624     };
2625     private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2626             impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2627 
2628     private static final byte[][] impTabL_GROUP_NUMBERS_WITH_R = {
2629         /* In this table, EN/AN+ON sequences receive levels as if associated with R
2630            until proven that there is L or sor/eor on both sides. AN is handled like EN.
2631         */
2632         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2633         /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
2634         /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
2635         /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
2636         /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
2637         /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
2638         /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
2639     };
2640     private static final byte[][] impTabR_GROUP_NUMBERS_WITH_R = {
2641         /*  In this table, EN/AN+ON sequences receive levels as if associated with R
2642             until proven that there is L on both sides. AN is handled like EN.
2643         */
2644         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2645         /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
2646         /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
2647         /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
2648         /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
2649         /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
2650     };
2651     private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2652             ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2653                        impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2654 
2655     private static final byte[][] impTabL_INVERSE_NUMBERS_AS_L = {
2656         /* This table is identical to the Default LTR table except that EN and AN
2657            are handled like L.
2658         */
2659         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2660         /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
2661         /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
2662         /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
2663         /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
2664         /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
2665         /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
2666     };
2667     private static final byte[][] impTabR_INVERSE_NUMBERS_AS_L = {
2668         /* This table is identical to the Default RTL table except that EN and AN
2669            are handled like L.
2670         */
2671         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2672         /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
2673         /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
2674         /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
2675         /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
2676         /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
2677         /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
2678     };
2679     private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2680             (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2681              impAct0, impAct0);
2682 
2683     private static final byte[][] impTabR_INVERSE_LIKE_DIRECT = {  /* Odd  paragraph level */
2684         /*  In this table, conditional sequences receive the lower possible level
2685             until proven otherwise.
2686         */
2687         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2688         /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2689         /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
2690         /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2691         /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
2692         /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
2693         /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
2694         /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
2695     };
2696     private static final short[] impAct1 = {0,1,13,14};
2697     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
2698             impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2699 
2700     private static final byte[][] impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = {
2701         /* The case handled in this table is (visually):  R EN L
2702          */
2703         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2704         /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
2705         /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
2706         /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
2707         /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
2708         /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
2709         /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
2710         /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
2711     };
2712     private static final byte[][] impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = {
2713         /* The cases handled in this table are (visually):  R EN L
2714                                                             R L AN L
2715         */
2716         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2717         /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
2718         /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
2719         /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
2720         /* 3 : L          */ {     3,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
2721         /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
2722         /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
2723         /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
2724     };
2725     private static final short[] impAct2 = {0,1,2,5,6,7,8};
2726     private static final short[] impAct3 = {0,1,9,10,11,12};
2727     private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
2728             new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
2729                            impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2730 
2731     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
2732             impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2733 
2734     private static final byte[][] impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
2735         /*  The case handled in this table is (visually):  R EN L
2736         */
2737         /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2738         /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
2739         /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
2740         /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
2741         /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
2742         /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
2743     };
2744     private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
2745             ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
2746                        impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2747 
2748     private static class LevState {
2749         byte[][] impTab;                /* level table pointer          */
2750         short[] impAct;                 /* action map array             */
2751         int startON;                    /* start of ON sequence         */
2752         int startL2EN;                  /* start of level 2 sequence    */
2753         int lastStrongRTL;              /* index of last found R or AL  */
2754         int runStart;                   /* start position of the run    */


< prev index next >