1147 /* for option OPTION_REMOVE_CONTROLS */
1148 int controlCount;
1149
1150 /*
1151 * Sometimes, bit values are more appropriate
1152 * to deal with directionality properties.
1153 * Abbreviations in these method names refer to names
1154 * used in the Bidi algorithm.
1155 */
1156 static int DirPropFlag(byte dir) {
1157 return (1 << dir);
1158 }
1159
1160 boolean testDirPropFlagAt(int flag, int index) {
1161 return ((DirPropFlag(dirProps[index]) & flag) != 0);
1162 }
1163
1164 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1165
1166 /* to avoid some conditional statements, use tiny constant arrays */
1167 static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
1168 static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
1169 static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
1170
1171 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1172 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
1173 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
1174 static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1175 static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1176
1177 /* are there any characters that are LTR or RTL? */
1178 static final int MASK_LTR =
1179 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1180 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1181
1182 static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1183
1184 /* explicit embedding codes */
1185 private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1186 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1187
1188 /* explicit isolate codes */
1189 private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
2415 /* bits 0..4: next state */
2416 /* bits 5..7: action to perform (if > 0) */
2417 /* */
2418 /* Cells may be of format "n" where n represents the next state */
2419 /* (except for the rightmost column). */
2420 /* Cells may also be of format "_(x,y)" where x represents an action */
2421 /* to perform and y represents the next state. */
2422 /* */
2423 /*********************************************************************/
2424 /* Definitions and type for properties state tables */
2425 /*********************************************************************/
2426 private static final int IMPTABPROPS_COLUMNS = 16;
2427 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2428 private static short GetStateProps(short cell) {
2429 return (short)(cell & 0x1f);
2430 }
2431 private static short GetActionProps(short cell) {
2432 return (short)(cell >> 5);
2433 }
2434
2435 private static final short groupProp[] = /* dirProp regrouped */
2436 {
2437 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
2438 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
2439 };
2440 private static final short _L = 0;
2441 private static final short _R = 1;
2442 private static final short _EN = 2;
2443 private static final short _AN = 3;
2444 private static final short _ON = 4;
2445 private static final short _S = 5;
2446 private static final short _B = 6; /* reduced dirProp */
2447
2448 /*********************************************************************/
2449 /* */
2450 /* PROPERTIES STATE TABLE */
2451 /* */
2452 /* In table impTabProps, */
2453 /* - the ON column regroups ON and WS, FSI, RLI, LRI and PDI */
2454 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
2455 /* - the Res column is the reduced property assigned to a run */
2462 /* Notes: */
2463 /* 1) This table is used in resolveImplicitLevels(). */
2464 /* 2) This table triggers actions when there is a change in the Bidi*/
2465 /* property of incoming characters (action 1). */
2466 /* 3) Most such property sequences are processed immediately (in */
2467 /* fact, passed to processPropertySeq(). */
2468 /* 4) However, numbers are assembled as one sequence. This means */
2469 /* that undefined situations (like CS following digits, until */
2470 /* it is known if the next char will be a digit) are held until */
2471 /* following chars define them. */
2472 /* Example: digits followed by CS, then comes another CS or ON; */
2473 /* the digits will be processed, then the CS assigned */
2474 /* as the start of an ON sequence (action 3). */
2475 /* 5) There are cases where more than one sequence must be */
2476 /* processed, for instance digits followed by CS followed by L: */
2477 /* the digits must be processed as one sequence, and the CS */
2478 /* must be processed as an ON sequence, all this before starting */
2479 /* assembling chars for the opening L sequence. */
2480 /* */
2481 /* */
2482 private static final short impTabProps[][] =
2483 {
2484 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, ENL, ENR, Res */
2485 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, 18, 21, _ON },
2486 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, 32+18, 32+21, _L },
2487 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, 32+18, 32+21, _R },
2488 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, 32+18, 32+21, _R },
2489 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, 18, 21, _EN },
2490 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, 32+18, 32+21, _AN },
2491 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, 18, 21, _AN },
2492 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, 32+18, 32+21, _ON },
2493 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, 32+18, 32+21, _ON },
2494 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, 18, 21, _ON },
2495 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, 18, 21, _EN },
2496 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, 18, 21, _EN },
2497 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, 96+18, 96+21, _AN },
2498 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, 18, 21, _AN },
2499 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3,128+18,128+21, _ON },
2500 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, 32+18, 32+21, _S },
2501 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, 32+18, 32+21, _S },
2502 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, 32+18, 32+21, _B },
2561 /* 2: prepend conditional sequence to current sequence */
2562 /* 3: set ON sequence to new level - 1 */
2563 /* 4: init EN/AN/ON sequence */
2564 /* 5: fix EN/AN/ON sequence followed by R */
2565 /* 6: set previous level sequence to level 2 */
2566 /* */
2567 /* Notes: */
2568 /* 1) These tables are used in processPropertySeq(). The input */
2569 /* is property sequences as determined by resolveImplicitLevels. */
2570 /* 2) Most such property sequences are processed immediately */
2571 /* (levels are assigned). */
2572 /* 3) However, some sequences cannot be assigned a final level till */
2573 /* one or more following sequences are received. For instance, */
2574 /* ON following an R sequence within an even-level paragraph. */
2575 /* If the following sequence is R, the ON sequence will be */
2576 /* assigned basic run level+1, and so will the R sequence. */
2577 /* 4) S is generally handled like ON, since its level will be fixed */
2578 /* to paragraph level in adjustWSLevels(). */
2579 /* */
2580
2581 private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
2582 /* In this table, conditional sequences receive the lower possible level
2583 until proven otherwise.
2584 */
2585 {
2586 /* L, R, EN, AN, ON, S, B, Res */
2587 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
2588 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
2589 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
2590 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
2591 /* 4 : R+ON */ { 0, 0x21, 0x33, 0x33, 4, 4, 0, 0 },
2592 /* 5 : AN+ON */ { 0, 0x21, 0, 0x32, 5, 5, 0, 0 }
2593 };
2594
2595 private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */
2596 /* In this table, conditional sequences receive the lower possible level
2597 until proven otherwise.
2598 */
2599 {
2600 /* L, R, EN, AN, ON, S, B, Res */
2601 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2602 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 },
2603 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2604 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
2605 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
2606 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
2607 };
2608
2609 private static final short[] impAct0 = {0,1,2,3,4};
2610
2611 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2612 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2613
2614 private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
2615 /* In this table, conditional sequences receive the lower possible
2616 level until proven otherwise.
2617 */
2618 /* L, R, EN, AN, ON, S, B, Res */
2619 /* 0 : init */ { 0, 2, 0x11, 0x11, 0, 0, 0, 0 },
2620 /* 1 : L+EN/AN */ { 0, 0x42, 1, 1, 0, 0, 0, 0 },
2621 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0x13, 0, 1 },
2622 /* 3 : R+ON */ { 0, 0x22, 0x34, 0x34, 3, 3, 0, 0 },
2623 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
2624 };
2625 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2626 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2627
2628 private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
2629 /* In this table, EN/AN+ON sequences receive levels as if associated with R
2630 until proven that there is L or sor/eor on both sides. AN is handled like EN.
2631 */
2632 /* L, R, EN, AN, ON, S, B, Res */
2633 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 },
2634 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 },
2635 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 },
2636 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 },
2637 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 },
2638 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 }
2639 };
2640 private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
2641 /* In this table, EN/AN+ON sequences receive levels as if associated with R
2642 until proven that there is L on both sides. AN is handled like EN.
2643 */
2644 /* L, R, EN, AN, ON, S, B, Res */
2645 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 },
2646 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 },
2647 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 },
2648 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 },
2649 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 }
2650 };
2651 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2652 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2653 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2654
2655 private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
2656 /* This table is identical to the Default LTR table except that EN and AN
2657 are handled like L.
2658 */
2659 /* L, R, EN, AN, ON, S, B, Res */
2660 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 },
2661 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 },
2662 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 },
2663 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 },
2664 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 },
2665 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 }
2666 };
2667 private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
2668 /* This table is identical to the Default RTL table except that EN and AN
2669 are handled like L.
2670 */
2671 /* L, R, EN, AN, ON, S, B, Res */
2672 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 },
2673 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 },
2674 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 },
2675 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 },
2676 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 },
2677 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 }
2678 };
2679 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2680 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2681 impAct0, impAct0);
2682
2683 private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */
2684 /* In this table, conditional sequences receive the lower possible level
2685 until proven otherwise.
2686 */
2687 /* L, R, EN, AN, ON, S, B, Res */
2688 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2689 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 },
2690 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2691 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
2692 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
2693 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
2694 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
2695 };
2696 private static final short[] impAct1 = {0,1,13,14};
2697 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
2698 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2699
2700 private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2701 /* The case handled in this table is (visually): R EN L
2702 */
2703 /* L, R, EN, AN, ON, S, B, Res */
2704 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 },
2705 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 },
2706 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 },
2707 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 },
2708 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 },
2709 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 },
2710 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 }
2711 };
2712 private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
2713 /* The cases handled in this table are (visually): R EN L
2714 R L AN L
2715 */
2716 /* L, R, EN, AN, ON, S, B, Res */
2717 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
2718 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
2719 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
2720 /* 3 : L */ { 3, 0, 3, 0x36, 0x14, 0x40, 0, 1 },
2721 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
2722 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
2723 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
2724 };
2725 private static final short[] impAct2 = {0,1,2,5,6,7,8};
2726 private static final short[] impAct3 = {0,1,9,10,11,12};
2727 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
2728 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
2729 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2730
2731 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
2732 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2733
2734 private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
2735 /* The case handled in this table is (visually): R EN L
2736 */
2737 /* L, R, EN, AN, ON, S, B, Res */
2738 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 },
2739 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 },
2740 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 },
2741 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
2742 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
2743 };
2744 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
2745 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
2746 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2747
2748 private static class LevState {
2749 byte[][] impTab; /* level table pointer */
2750 short[] impAct; /* action map array */
2751 int startON; /* start of ON sequence */
2752 int startL2EN; /* start of level 2 sequence */
2753 int lastStrongRTL; /* index of last found R or AL */
2754 int runStart; /* start position of the run */
|
1147 /* for option OPTION_REMOVE_CONTROLS */
1148 int controlCount;
1149
1150 /*
1151 * Sometimes, bit values are more appropriate
1152 * to deal with directionality properties.
1153 * Abbreviations in these method names refer to names
1154 * used in the Bidi algorithm.
1155 */
1156 static int DirPropFlag(byte dir) {
1157 return (1 << dir);
1158 }
1159
1160 boolean testDirPropFlagAt(int flag, int index) {
1161 return ((DirPropFlag(dirProps[index]) & flag) != 0);
1162 }
1163
1164 static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1165
1166 /* to avoid some conditional statements, use tiny constant arrays */
1167 static final int[] DirPropFlagLR = { DirPropFlag(L), DirPropFlag(R) };
1168 static final int[] DirPropFlagE = { DirPropFlag(LRE), DirPropFlag(RLE) };
1169 static final int[] DirPropFlagO = { DirPropFlag(LRO), DirPropFlag(RLO) };
1170
1171 static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1172 static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
1173 static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
1174 static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1175 static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1176
1177 /* are there any characters that are LTR or RTL? */
1178 static final int MASK_LTR =
1179 DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1180 static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1181
1182 static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1183
1184 /* explicit embedding codes */
1185 private static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1186 private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1187
1188 /* explicit isolate codes */
1189 private static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
2415 /* bits 0..4: next state */
2416 /* bits 5..7: action to perform (if > 0) */
2417 /* */
2418 /* Cells may be of format "n" where n represents the next state */
2419 /* (except for the rightmost column). */
2420 /* Cells may also be of format "_(x,y)" where x represents an action */
2421 /* to perform and y represents the next state. */
2422 /* */
2423 /*********************************************************************/
2424 /* Definitions and type for properties state tables */
2425 /*********************************************************************/
2426 private static final int IMPTABPROPS_COLUMNS = 16;
2427 private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2428 private static short GetStateProps(short cell) {
2429 return (short)(cell & 0x1f);
2430 }
2431 private static short GetActionProps(short cell) {
2432 return (short)(cell >> 5);
2433 }
2434
2435 private static final short[] groupProp = /* dirProp regrouped */
2436 {
2437 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
2438 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
2439 };
2440 private static final short _L = 0;
2441 private static final short _R = 1;
2442 private static final short _EN = 2;
2443 private static final short _AN = 3;
2444 private static final short _ON = 4;
2445 private static final short _S = 5;
2446 private static final short _B = 6; /* reduced dirProp */
2447
2448 /*********************************************************************/
2449 /* */
2450 /* PROPERTIES STATE TABLE */
2451 /* */
2452 /* In table impTabProps, */
2453 /* - the ON column regroups ON and WS, FSI, RLI, LRI and PDI */
2454 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
2455 /* - the Res column is the reduced property assigned to a run */
2462 /* Notes: */
2463 /* 1) This table is used in resolveImplicitLevels(). */
2464 /* 2) This table triggers actions when there is a change in the Bidi*/
2465 /* property of incoming characters (action 1). */
2466 /* 3) Most such property sequences are processed immediately (in */
2467 /* fact, passed to processPropertySeq(). */
2468 /* 4) However, numbers are assembled as one sequence. This means */
2469 /* that undefined situations (like CS following digits, until */
2470 /* it is known if the next char will be a digit) are held until */
2471 /* following chars define them. */
2472 /* Example: digits followed by CS, then comes another CS or ON; */
2473 /* the digits will be processed, then the CS assigned */
2474 /* as the start of an ON sequence (action 3). */
2475 /* 5) There are cases where more than one sequence must be */
2476 /* processed, for instance digits followed by CS followed by L: */
2477 /* the digits must be processed as one sequence, and the CS */
2478 /* must be processed as an ON sequence, all this before starting */
2479 /* assembling chars for the opening L sequence. */
2480 /* */
2481 /* */
2482 private static final short[][] impTabProps =
2483 {
2484 /* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, ENL, ENR, Res */
2485 /* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, 18, 21, _ON },
2486 /* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, 32+18, 32+21, _L },
2487 /* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, 32+18, 32+21, _R },
2488 /* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, 32+18, 32+21, _R },
2489 /* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, 18, 21, _EN },
2490 /* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, 32+18, 32+21, _AN },
2491 /* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, 18, 21, _AN },
2492 /* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, 32+18, 32+21, _ON },
2493 /* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, 32+18, 32+21, _ON },
2494 /* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, 18, 21, _ON },
2495 /*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, 18, 21, _EN },
2496 /*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, 18, 21, _EN },
2497 /*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, 96+18, 96+21, _AN },
2498 /*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, 18, 21, _AN },
2499 /*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3,128+18,128+21, _ON },
2500 /*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, 32+18, 32+21, _S },
2501 /*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, 32+18, 32+21, _S },
2502 /*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, 32+18, 32+21, _B },
2561 /* 2: prepend conditional sequence to current sequence */
2562 /* 3: set ON sequence to new level - 1 */
2563 /* 4: init EN/AN/ON sequence */
2564 /* 5: fix EN/AN/ON sequence followed by R */
2565 /* 6: set previous level sequence to level 2 */
2566 /* */
2567 /* Notes: */
2568 /* 1) These tables are used in processPropertySeq(). The input */
2569 /* is property sequences as determined by resolveImplicitLevels. */
2570 /* 2) Most such property sequences are processed immediately */
2571 /* (levels are assigned). */
2572 /* 3) However, some sequences cannot be assigned a final level till */
2573 /* one or more following sequences are received. For instance, */
2574 /* ON following an R sequence within an even-level paragraph. */
2575 /* If the following sequence is R, the ON sequence will be */
2576 /* assigned basic run level+1, and so will the R sequence. */
2577 /* 4) S is generally handled like ON, since its level will be fixed */
2578 /* to paragraph level in adjustWSLevels(). */
2579 /* */
2580
2581 private static final byte[][] impTabL_DEFAULT = /* Even paragraph level */
2582 /* In this table, conditional sequences receive the lower possible level
2583 until proven otherwise.
2584 */
2585 {
2586 /* L, R, EN, AN, ON, S, B, Res */
2587 /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
2588 /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
2589 /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
2590 /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
2591 /* 4 : R+ON */ { 0, 0x21, 0x33, 0x33, 4, 4, 0, 0 },
2592 /* 5 : AN+ON */ { 0, 0x21, 0, 0x32, 5, 5, 0, 0 }
2593 };
2594
2595 private static final byte[][] impTabR_DEFAULT = /* Odd paragraph level */
2596 /* In this table, conditional sequences receive the lower possible level
2597 until proven otherwise.
2598 */
2599 {
2600 /* L, R, EN, AN, ON, S, B, Res */
2601 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2602 /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 },
2603 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2604 /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
2605 /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
2606 /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
2607 };
2608
2609 private static final short[] impAct0 = {0,1,2,3,4};
2610
2611 private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2612 impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2613
2614 private static final byte[][] impTabL_NUMBERS_SPECIAL = { /* Even paragraph level */
2615 /* In this table, conditional sequences receive the lower possible
2616 level until proven otherwise.
2617 */
2618 /* L, R, EN, AN, ON, S, B, Res */
2619 /* 0 : init */ { 0, 2, 0x11, 0x11, 0, 0, 0, 0 },
2620 /* 1 : L+EN/AN */ { 0, 0x42, 1, 1, 0, 0, 0, 0 },
2621 /* 2 : R */ { 0, 2, 4, 4, 0x13, 0x13, 0, 1 },
2622 /* 3 : R+ON */ { 0, 0x22, 0x34, 0x34, 3, 3, 0, 0 },
2623 /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
2624 };
2625 private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2626 impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2627
2628 private static final byte[][] impTabL_GROUP_NUMBERS_WITH_R = {
2629 /* In this table, EN/AN+ON sequences receive levels as if associated with R
2630 until proven that there is L or sor/eor on both sides. AN is handled like EN.
2631 */
2632 /* L, R, EN, AN, ON, S, B, Res */
2633 /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 },
2634 /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 },
2635 /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 },
2636 /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 },
2637 /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 },
2638 /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 }
2639 };
2640 private static final byte[][] impTabR_GROUP_NUMBERS_WITH_R = {
2641 /* In this table, EN/AN+ON sequences receive levels as if associated with R
2642 until proven that there is L on both sides. AN is handled like EN.
2643 */
2644 /* L, R, EN, AN, ON, S, B, Res */
2645 /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 },
2646 /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 },
2647 /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 },
2648 /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 },
2649 /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 }
2650 };
2651 private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2652 ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2653 impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2654
2655 private static final byte[][] impTabL_INVERSE_NUMBERS_AS_L = {
2656 /* This table is identical to the Default LTR table except that EN and AN
2657 are handled like L.
2658 */
2659 /* L, R, EN, AN, ON, S, B, Res */
2660 /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 },
2661 /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 },
2662 /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 },
2663 /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 },
2664 /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 },
2665 /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 }
2666 };
2667 private static final byte[][] impTabR_INVERSE_NUMBERS_AS_L = {
2668 /* This table is identical to the Default RTL table except that EN and AN
2669 are handled like L.
2670 */
2671 /* L, R, EN, AN, ON, S, B, Res */
2672 /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 },
2673 /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 },
2674 /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 },
2675 /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 },
2676 /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 },
2677 /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 }
2678 };
2679 private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
2680 (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
2681 impAct0, impAct0);
2682
2683 private static final byte[][] impTabR_INVERSE_LIKE_DIRECT = { /* Odd paragraph level */
2684 /* In this table, conditional sequences receive the lower possible level
2685 until proven otherwise.
2686 */
2687 /* L, R, EN, AN, ON, S, B, Res */
2688 /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
2689 /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 },
2690 /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
2691 /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
2692 /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
2693 /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
2694 /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
2695 };
2696 private static final short[] impAct1 = {0,1,13,14};
2697 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
2698 impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2699
2700 private static final byte[][] impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = {
2701 /* The case handled in this table is (visually): R EN L
2702 */
2703 /* L, R, EN, AN, ON, S, B, Res */
2704 /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 },
2705 /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 },
2706 /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 },
2707 /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 },
2708 /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 },
2709 /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 },
2710 /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 }
2711 };
2712 private static final byte[][] impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = {
2713 /* The cases handled in this table are (visually): R EN L
2714 R L AN L
2715 */
2716 /* L, R, EN, AN, ON, S, B, Res */
2717 /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
2718 /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
2719 /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
2720 /* 3 : L */ { 3, 0, 3, 0x36, 0x14, 0x40, 0, 1 },
2721 /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
2722 /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
2723 /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
2724 };
2725 private static final short[] impAct2 = {0,1,2,5,6,7,8};
2726 private static final short[] impAct3 = {0,1,9,10,11,12};
2727 private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
2728 new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
2729 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2730
2731 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
2732 impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
2733
2734 private static final byte[][] impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
2735 /* The case handled in this table is (visually): R EN L
2736 */
2737 /* L, R, EN, AN, ON, S, B, Res */
2738 /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 },
2739 /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 },
2740 /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 },
2741 /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
2742 /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
2743 };
2744 private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
2745 ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
2746 impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
2747
2748 private static class LevState {
2749 byte[][] impTab; /* level table pointer */
2750 short[] impAct; /* action map array */
2751 int startON; /* start of ON sequence */
2752 int startL2EN; /* start of level 2 sequence */
2753 int lastStrongRTL; /* index of last found R or AL */
2754 int runStart; /* start position of the run */
|