1 /*
   2  * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27 *******************************************************************************
  28 *   Copyright (C) 2001-2010, International Business Machines
  29 *   Corporation and others.  All Rights Reserved.
  30 *******************************************************************************
  31 */
  32 /* Written by Simon Montagu, Matitiahu Allouche
  33  * (ported from C code written by Markus W. Scherer)
  34  */
  35 
  36 package jdk.internal.icu.text;
  37 
  38 import jdk.internal.icu.lang.UCharacter;
  39 
  40 final class BidiWriter {
  41 
  42     /** Bidi control code points */
  43     static final char LRM_CHAR = 0x200e;
  44     static final char RLM_CHAR = 0x200f;
  45     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
  46                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
  47 
  48     private static boolean IsCombining(int type) {
  49         return ((1<<type &
  50                 (1<<UCharacter.NON_SPACING_MARK |
  51                  1<<UCharacter.COMBINING_SPACING_MARK |
  52                  1<<UCharacter.ENCLOSING_MARK)) != 0);
  53     }
  54 
  55     /*
  56      * When we have OUTPUT_REVERSE set on writeReordered(), then we
  57      * semantically write RTL runs in reverse and later reverse them again.
  58      * Instead, we actually write them in forward order to begin with.
  59      * However, if the RTL run was to be mirrored, we need to mirror here now
  60      * since the implicit second reversal must not do it.
  61      * It looks strange to do mirroring in LTR output, but it is only because
  62      * we are writing RTL output in reverse.
  63      */
  64     private static String doWriteForward(String src, int options) {
  65         /* optimize for several combinations of options */
  66         switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) {
  67         case 0: {
  68             /* simply return the LTR run */
  69             return src;
  70         }
  71         case BidiBase.DO_MIRRORING: {
  72             StringBuffer dest = new StringBuffer(src.length());
  73 
  74             /* do mirroring */
  75             int i=0;
  76             int c;
  77 
  78             do {
  79                 c = UTF16.charAt(src, i);
  80                 i += UTF16.getCharCount(c);
  81                 UTF16.append(dest, UCharacter.getMirror(c));
  82             } while(i < src.length());
  83             return dest.toString();
  84         }
  85         case BidiBase.REMOVE_BIDI_CONTROLS: {
  86             StringBuilder dest = new StringBuilder(src.length());
  87 
  88             /* copy the LTR run and remove any Bidi control characters */
  89             int i = 0;
  90             char c;
  91             do {
  92                 c = src.charAt(i++);
  93                 if(!BidiBase.IsBidiControlChar(c)) {
  94                     dest.append(c);
  95                 }
  96             } while(i < src.length());
  97             return dest.toString();
  98         }
  99         default: {
 100             StringBuffer dest = new StringBuffer(src.length());
 101 
 102             /* remove Bidi control characters and do mirroring */
 103             int i = 0;
 104             int c;
 105             do {
 106                 c = UTF16.charAt(src, i);
 107                 i += UTF16.getCharCount(c);
 108                 if(!BidiBase.IsBidiControlChar(c)) {
 109                     UTF16.append(dest, UCharacter.getMirror(c));
 110                 }
 111             } while(i < src.length());
 112                 return dest.toString();
 113             }
 114         } /* end of switch */
 115     }
 116 
 117     private static String doWriteForward(char[] text, int start, int limit,
 118                                          int options) {
 119         return doWriteForward(new String(text, start, limit - start), options);
 120     }
 121 
 122     static String writeReverse(String src, int options) {
 123         /*
 124          * RTL run -
 125          *
 126          * RTL runs need to be copied to the destination in reverse order
 127          * of code points, not code units, to keep Unicode characters intact.
 128          *
 129          * The general strategy for this is to read the source text
 130          * in backward order, collect all code units for a code point
 131          * (and optionally following combining characters, see below),
 132          * and copy all these code units in ascending order
 133          * to the destination for this run.
 134          *
 135          * Several options request whether combining characters
 136          * should be kept after their base characters,
 137          * whether Bidi control characters should be removed, and
 138          * whether characters should be replaced by their mirror-image
 139          * equivalent Unicode characters.
 140          */
 141         StringBuffer dest = new StringBuffer(src.length());
 142 
 143         /* optimize for several combinations of options */
 144         switch (options &
 145                 (BidiBase.REMOVE_BIDI_CONTROLS |
 146                  BidiBase.DO_MIRRORING |
 147                  BidiBase.KEEP_BASE_COMBINING)) {
 148 
 149         case 0:
 150             /*
 151              * With none of the "complicated" options set, the destination
 152              * run will have the same length as the source run,
 153              * and there is no mirroring and no keeping combining characters
 154              * with their base characters.
 155              *
 156              * XXX: or dest = UTF16.reverse(new StringBuffer(src));
 157              */
 158 
 159             int srcLength = src.length();
 160 
 161             /* preserve character integrity */
 162             do {
 163                 /* i is always after the last code unit known to need to be kept
 164                  *  in this segment */
 165                 int i = srcLength;
 166 
 167                 /* collect code units for one base character */
 168                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
 169                                                              srcLength - 1));
 170 
 171                 /* copy this base character */
 172                 dest.append(src.substring(srcLength, i));
 173             } while(srcLength > 0);
 174             break;
 175 
 176         case BidiBase.KEEP_BASE_COMBINING:
 177             /*
 178              * Here, too, the destination
 179              * run will have the same length as the source run,
 180              * and there is no mirroring.
 181              * We do need to keep combining characters with their base
 182              * characters.
 183              */
 184             srcLength = src.length();
 185 
 186             /* preserve character integrity */
 187             do {
 188                 /* i is always after the last code unit known to need to be kept
 189                  *  in this segment */
 190                 int c;
 191                 int i = srcLength;
 192 
 193                 /* collect code units and modifier letters for one base
 194                  * character */
 195                 do {
 196                     c = UTF16.charAt(src, srcLength - 1);
 197                     srcLength -= UTF16.getCharCount(c);
 198                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
 199 
 200                 /* copy this "user character" */
 201                 dest.append(src.substring(srcLength, i));
 202             } while(srcLength > 0);
 203             break;
 204 
 205         default:
 206             /*
 207              * With several "complicated" options set, this is the most
 208              * general and the slowest copying of an RTL run.
 209              * We will do mirroring, remove Bidi controls, and
 210              * keep combining characters with their base characters
 211              * as requested.
 212              */
 213             srcLength = src.length();
 214 
 215             /* preserve character integrity */
 216             do {
 217                 /* i is always after the last code unit known to need to be kept
 218                  *  in this segment */
 219                 int i = srcLength;
 220 
 221                 /* collect code units for one base character */
 222                 int c = UTF16.charAt(src, srcLength - 1);
 223                 srcLength -= UTF16.getCharCount(c);
 224                 if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) {
 225                     /* collect modifier letters for this base character */
 226                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
 227                         c = UTF16.charAt(src, srcLength - 1);
 228                         srcLength -= UTF16.getCharCount(c);
 229                     }
 230                 }
 231 
 232                 if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 &&
 233                     BidiBase.IsBidiControlChar(c)) {
 234                     /* do not copy this Bidi control character */
 235                     continue;
 236                 }
 237 
 238                 /* copy this "user character" */
 239                 int j = srcLength;
 240                 if((options & BidiBase.DO_MIRRORING) != 0) {
 241                     /* mirror only the base character */
 242                     c = UCharacter.getMirror(c);
 243                     UTF16.append(dest, c);
 244                     j += UTF16.getCharCount(c);
 245                 }
 246                 dest.append(src.substring(j, i));
 247             } while(srcLength > 0);
 248             break;
 249         } /* end of switch */
 250 
 251         return dest.toString();
 252     }
 253 
 254     static String doWriteReverse(char[] text, int start, int limit, int options) {
 255         return writeReverse(new String(text, start, limit - start), options);
 256     }
 257 
 258     static String writeReordered(BidiBase bidi, int options) {
 259         int run, runCount;
 260         StringBuilder dest;
 261         char[] text = bidi.text;
 262         runCount = bidi.countRuns();
 263 
 264         /*
 265          * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the
 266          * reordering mode (checked below) is appropriate.
 267          */
 268         if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) {
 269             options |= BidiBase.INSERT_LRM_FOR_NUMERIC;
 270             options &= ~BidiBase.REMOVE_BIDI_CONTROLS;
 271         }
 272         /*
 273          * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS
 274          * and cancels BidiBase.INSERT_LRM_FOR_NUMERIC.
 275          */
 276         if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) {
 277             options |= BidiBase.REMOVE_BIDI_CONTROLS;
 278             options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
 279         }
 280         /*
 281          * If we do not perform the "inverse Bidi" algorithm, then we
 282          * don't need to insert any LRMs, and don't need to test for it.
 283          */
 284         if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) &&
 285             (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT)  &&
 286             (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
 287             (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) {
 288             options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
 289         }
 290         dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ?
 291                                  bidi.length * 2 : bidi.length);
 292         /*
 293          * Iterate through all visual runs and copy the run text segments to
 294          * the destination, according to the options.
 295          *
 296          * The tests for where to insert LRMs ignore the fact that there may be
 297          * BN codes or non-BMP code points at the beginning and end of a run;
 298          * they may insert LRMs unnecessarily but the tests are faster this way
 299          * (this would have to be improved for UTF-8).
 300          */
 301         if ((options & BidiBase.OUTPUT_REVERSE) == 0) {
 302             /* forward output */
 303             if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
 304                 /* do not insert Bidi controls */
 305                 for (run = 0; run < runCount; ++run) {
 306                     BidiRun bidiRun = bidi.getVisualRun(run);
 307                     if (bidiRun.isEvenRun()) {
 308                         dest.append(doWriteForward(text, bidiRun.start,
 309                                                    bidiRun.limit,
 310                                                    options & ~BidiBase.DO_MIRRORING));
 311                      } else {
 312                         dest.append(doWriteReverse(text, bidiRun.start,
 313                                                    bidiRun.limit, options));
 314                      }
 315                 }
 316             } else {
 317                 /* insert Bidi controls for "inverse Bidi" */
 318                 byte[] dirProps = bidi.dirProps;
 319                 char uc;
 320                 int markFlag;
 321 
 322                 for (run = 0; run < runCount; ++run) {
 323                     BidiRun bidiRun = bidi.getVisualRun(run);
 324                     markFlag=0;
 325                     /* check if something relevant in insertPoints */
 326                     markFlag = bidi.runs[run].insertRemove;
 327                     if (markFlag < 0) { /* bidi controls count */
 328                         markFlag = 0;
 329                     }
 330                     if (bidiRun.isEvenRun()) {
 331                         if (bidi.isInverse() &&
 332                                 dirProps[bidiRun.start] != BidiBase.L) {
 333                             markFlag |= BidiBase.LRM_BEFORE;
 334                         }
 335                         if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
 336                             uc = LRM_CHAR;
 337                         } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
 338                             uc = RLM_CHAR;
 339                         } else {
 340                             uc = 0;
 341                         }
 342                         if (uc != 0) {
 343                             dest.append(uc);
 344                         }
 345                         dest.append(doWriteForward(text,
 346                                                    bidiRun.start, bidiRun.limit,
 347                                                    options & ~BidiBase.DO_MIRRORING));
 348 
 349                         if (bidi.isInverse() &&
 350                              dirProps[bidiRun.limit - 1] != BidiBase.L) {
 351                             markFlag |= BidiBase.LRM_AFTER;
 352                         }
 353                         if ((markFlag & BidiBase.LRM_AFTER) != 0) {
 354                             uc = LRM_CHAR;
 355                         } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
 356                             uc = RLM_CHAR;
 357                         } else {
 358                             uc = 0;
 359                         }
 360                         if (uc != 0) {
 361                             dest.append(uc);
 362                         }
 363                     } else { /* RTL run */
 364                         if (bidi.isInverse() &&
 365                             !bidi.testDirPropFlagAt(MASK_R_AL,
 366                                                     bidiRun.limit - 1)) {
 367                             markFlag |= BidiBase.RLM_BEFORE;
 368                         }
 369                         if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
 370                             uc = LRM_CHAR;
 371                         } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
 372                             uc = RLM_CHAR;
 373                         } else {
 374                             uc = 0;
 375                         }
 376                         if (uc != 0) {
 377                             dest.append(uc);
 378                         }
 379                         dest.append(doWriteReverse(text, bidiRun.start,
 380                                                    bidiRun.limit, options));
 381 
 382                         if(bidi.isInverse() &&
 383                                 (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
 384                             markFlag |= BidiBase.RLM_AFTER;
 385                         }
 386                         if ((markFlag & BidiBase.LRM_AFTER) != 0) {
 387                             uc = LRM_CHAR;
 388                         } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
 389                             uc = RLM_CHAR;
 390                         } else {
 391                             uc = 0;
 392                         }
 393                         if (uc != 0) {
 394                             dest.append(uc);
 395                         }
 396                     }
 397                 }
 398             }
 399         } else {
 400             /* reverse output */
 401             if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
 402                 /* do not insert Bidi controls */
 403                 for(run = runCount; --run >= 0; ) {
 404                     BidiRun bidiRun = bidi.getVisualRun(run);
 405                     if (bidiRun.isEvenRun()) {
 406                         dest.append(doWriteReverse(text,
 407                                                    bidiRun.start, bidiRun.limit,
 408                                                    options & ~BidiBase.DO_MIRRORING));
 409                     } else {
 410                         dest.append(doWriteForward(text, bidiRun.start,
 411                                                    bidiRun.limit, options));
 412                     }
 413                 }
 414             } else {
 415                 /* insert Bidi controls for "inverse Bidi" */
 416 
 417                 byte[] dirProps = bidi.dirProps;
 418 
 419                 for (run = runCount; --run >= 0; ) {
 420                     /* reverse output */
 421                     BidiRun bidiRun = bidi.getVisualRun(run);
 422                     if (bidiRun.isEvenRun()) {
 423                         if (dirProps[bidiRun.limit - 1] != BidiBase.L) {
 424                             dest.append(LRM_CHAR);
 425                         }
 426 
 427                         dest.append(doWriteReverse(text, bidiRun.start,
 428                                 bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
 429 
 430                         if (dirProps[bidiRun.start] != BidiBase.L) {
 431                             dest.append(LRM_CHAR);
 432                         }
 433                     } else {
 434                         if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
 435                             dest.append(RLM_CHAR);
 436                         }
 437 
 438                         dest.append(doWriteForward(text, bidiRun.start,
 439                                                    bidiRun.limit, options));
 440 
 441                         if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
 442                             dest.append(RLM_CHAR);
 443                         }
 444                     }
 445                 }
 446             }
 447         }
 448 
 449         return dest.toString();
 450     }
 451 }