1 /*
   2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27 *******************************************************************************
  28 *   Copyright (C) 2001-2010, International Business Machines
  29 *   Corporation and others.  All Rights Reserved.
  30 *******************************************************************************
  31 */
  32 /* Written by Simon Montagu, Matitiahu Allouche
  33  * (ported from C code written by Markus W. Scherer)
  34  */
  35 
  36 package sun.text.bidi;
  37 
  38 import sun.text.normalizer.UCharacter;
  39 import sun.text.normalizer.UTF16;
  40 
  41 final class BidiWriter {
  42 
  43     /** Bidi control code points */
  44     static final char LRM_CHAR = 0x200e;
  45     static final char RLM_CHAR = 0x200f;
  46     static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT |
  47                                   1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
  48 
  49     private static boolean IsCombining(int type) {
  50         return ((1<<type &
  51                 (1<<UCharacter.NON_SPACING_MARK |
  52                  1<<UCharacter.COMBINING_SPACING_MARK |
  53                  1<<UCharacter.ENCLOSING_MARK)) != 0);
  54     }
  55 
  56     /*
  57      * When we have OUTPUT_REVERSE set on writeReordered(), then we
  58      * semantically write RTL runs in reverse and later reverse them again.
  59      * Instead, we actually write them in forward order to begin with.
  60      * However, if the RTL run was to be mirrored, we need to mirror here now
  61      * since the implicit second reversal must not do it.
  62      * It looks strange to do mirroring in LTR output, but it is only because
  63      * we are writing RTL output in reverse.
  64      */
  65     private static String doWriteForward(String src, int options) {
  66         /* optimize for several combinations of options */
  67         switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) {
  68         case 0: {
  69             /* simply return the LTR run */
  70             return src;
  71         }
  72         case BidiBase.DO_MIRRORING: {
  73             StringBuffer dest = new StringBuffer(src.length());
  74 
  75             /* do mirroring */
  76             int i=0;
  77             int c;
  78 
  79             do {
  80                 c = UTF16.charAt(src, i);
  81                 i += UTF16.getCharCount(c);
  82                 UTF16.append(dest, UCharacter.getMirror(c));
  83             } while(i < src.length());
  84             return dest.toString();
  85         }
  86         case BidiBase.REMOVE_BIDI_CONTROLS: {
  87             StringBuilder dest = new StringBuilder(src.length());
  88 
  89             /* copy the LTR run and remove any Bidi control characters */
  90             int i = 0;
  91             char c;
  92             do {
  93                 c = src.charAt(i++);
  94                 if(!BidiBase.IsBidiControlChar(c)) {
  95                     dest.append(c);
  96                 }
  97             } while(i < src.length());
  98             return dest.toString();
  99         }
 100         default: {
 101             StringBuffer dest = new StringBuffer(src.length());
 102 
 103             /* remove Bidi control characters and do mirroring */
 104             int i = 0;
 105             int c;
 106             do {
 107                 c = UTF16.charAt(src, i);
 108                 i += UTF16.getCharCount(c);
 109                 if(!BidiBase.IsBidiControlChar(c)) {
 110                     UTF16.append(dest, UCharacter.getMirror(c));
 111                 }
 112             } while(i < src.length());
 113                 return dest.toString();
 114             }
 115         } /* end of switch */
 116     }
 117 
 118     private static String doWriteForward(char[] text, int start, int limit,
 119                                          int options) {
 120         return doWriteForward(new String(text, start, limit - start), options);
 121     }
 122 
 123     static String writeReverse(String src, int options) {
 124         /*
 125          * RTL run -
 126          *
 127          * RTL runs need to be copied to the destination in reverse order
 128          * of code points, not code units, to keep Unicode characters intact.
 129          *
 130          * The general strategy for this is to read the source text
 131          * in backward order, collect all code units for a code point
 132          * (and optionally following combining characters, see below),
 133          * and copy all these code units in ascending order
 134          * to the destination for this run.
 135          *
 136          * Several options request whether combining characters
 137          * should be kept after their base characters,
 138          * whether Bidi control characters should be removed, and
 139          * whether characters should be replaced by their mirror-image
 140          * equivalent Unicode characters.
 141          */
 142         StringBuffer dest = new StringBuffer(src.length());
 143 
 144         /* optimize for several combinations of options */
 145         switch (options &
 146                 (BidiBase.REMOVE_BIDI_CONTROLS |
 147                  BidiBase.DO_MIRRORING |
 148                  BidiBase.KEEP_BASE_COMBINING)) {
 149 
 150         case 0:
 151             /*
 152              * With none of the "complicated" options set, the destination
 153              * run will have the same length as the source run,
 154              * and there is no mirroring and no keeping combining characters
 155              * with their base characters.
 156              *
 157              * XXX: or dest = UTF16.reverse(new StringBuffer(src));
 158              */
 159 
 160             int srcLength = src.length();
 161 
 162             /* preserve character integrity */
 163             do {
 164                 /* i is always after the last code unit known to need to be kept
 165                  *  in this segment */
 166                 int i = srcLength;
 167 
 168                 /* collect code units for one base character */
 169                 srcLength -= UTF16.getCharCount(UTF16.charAt(src,
 170                                                              srcLength - 1));
 171 
 172                 /* copy this base character */
 173                 dest.append(src.substring(srcLength, i));
 174             } while(srcLength > 0);
 175             break;
 176 
 177         case BidiBase.KEEP_BASE_COMBINING:
 178             /*
 179              * Here, too, the destination
 180              * run will have the same length as the source run,
 181              * and there is no mirroring.
 182              * We do need to keep combining characters with their base
 183              * characters.
 184              */
 185             srcLength = src.length();
 186 
 187             /* preserve character integrity */
 188             do {
 189                 /* i is always after the last code unit known to need to be kept
 190                  *  in this segment */
 191                 int c;
 192                 int i = srcLength;
 193 
 194                 /* collect code units and modifier letters for one base
 195                  * character */
 196                 do {
 197                     c = UTF16.charAt(src, srcLength - 1);
 198                     srcLength -= UTF16.getCharCount(c);
 199                 } while(srcLength > 0 && IsCombining(UCharacter.getType(c)));
 200 
 201                 /* copy this "user character" */
 202                 dest.append(src.substring(srcLength, i));
 203             } while(srcLength > 0);
 204             break;
 205 
 206         default:
 207             /*
 208              * With several "complicated" options set, this is the most
 209              * general and the slowest copying of an RTL run.
 210              * We will do mirroring, remove Bidi controls, and
 211              * keep combining characters with their base characters
 212              * as requested.
 213              */
 214             srcLength = src.length();
 215 
 216             /* preserve character integrity */
 217             do {
 218                 /* i is always after the last code unit known to need to be kept
 219                  *  in this segment */
 220                 int i = srcLength;
 221 
 222                 /* collect code units for one base character */
 223                 int c = UTF16.charAt(src, srcLength - 1);
 224                 srcLength -= UTF16.getCharCount(c);
 225                 if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) {
 226                     /* collect modifier letters for this base character */
 227                     while(srcLength > 0 && IsCombining(UCharacter.getType(c))) {
 228                         c = UTF16.charAt(src, srcLength - 1);
 229                         srcLength -= UTF16.getCharCount(c);
 230                     }
 231                 }
 232 
 233                 if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 &&
 234                     BidiBase.IsBidiControlChar(c)) {
 235                     /* do not copy this Bidi control character */
 236                     continue;
 237                 }
 238 
 239                 /* copy this "user character" */
 240                 int j = srcLength;
 241                 if((options & BidiBase.DO_MIRRORING) != 0) {
 242                     /* mirror only the base character */
 243                     c = UCharacter.getMirror(c);
 244                     UTF16.append(dest, c);
 245                     j += UTF16.getCharCount(c);
 246                 }
 247                 dest.append(src.substring(j, i));
 248             } while(srcLength > 0);
 249             break;
 250         } /* end of switch */
 251 
 252         return dest.toString();
 253     }
 254 
 255     static String doWriteReverse(char[] text, int start, int limit, int options) {
 256         return writeReverse(new String(text, start, limit - start), options);
 257     }
 258 
 259     static String writeReordered(BidiBase bidi, int options) {
 260         int run, runCount;
 261         StringBuilder dest;
 262         char[] text = bidi.text;
 263         runCount = bidi.countRuns();
 264 
 265         /*
 266          * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the
 267          * reordering mode (checked below) is appropriate.
 268          */
 269         if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) {
 270             options |= BidiBase.INSERT_LRM_FOR_NUMERIC;
 271             options &= ~BidiBase.REMOVE_BIDI_CONTROLS;
 272         }
 273         /*
 274          * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS
 275          * and cancels BidiBase.INSERT_LRM_FOR_NUMERIC.
 276          */
 277         if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) {
 278             options |= BidiBase.REMOVE_BIDI_CONTROLS;
 279             options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
 280         }
 281         /*
 282          * If we do not perform the "inverse Bidi" algorithm, then we
 283          * don't need to insert any LRMs, and don't need to test for it.
 284          */
 285         if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) &&
 286             (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT)  &&
 287             (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
 288             (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) {
 289             options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
 290         }
 291         dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ?
 292                                  bidi.length * 2 : bidi.length);
 293         /*
 294          * Iterate through all visual runs and copy the run text segments to
 295          * the destination, according to the options.
 296          *
 297          * The tests for where to insert LRMs ignore the fact that there may be
 298          * BN codes or non-BMP code points at the beginning and end of a run;
 299          * they may insert LRMs unnecessarily but the tests are faster this way
 300          * (this would have to be improved for UTF-8).
 301          */
 302         if ((options & BidiBase.OUTPUT_REVERSE) == 0) {
 303             /* forward output */
 304             if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
 305                 /* do not insert Bidi controls */
 306                 for (run = 0; run < runCount; ++run) {
 307                     BidiRun bidiRun = bidi.getVisualRun(run);
 308                     if (bidiRun.isEvenRun()) {
 309                         dest.append(doWriteForward(text, bidiRun.start,
 310                                                    bidiRun.limit,
 311                                                    options & ~BidiBase.DO_MIRRORING));
 312                      } else {
 313                         dest.append(doWriteReverse(text, bidiRun.start,
 314                                                    bidiRun.limit, options));
 315                      }
 316                 }
 317             } else {
 318                 /* insert Bidi controls for "inverse Bidi" */
 319                 byte[] dirProps = bidi.dirProps;
 320                 char uc;
 321                 int markFlag;
 322 
 323                 for (run = 0; run < runCount; ++run) {
 324                     BidiRun bidiRun = bidi.getVisualRun(run);
 325                     markFlag=0;
 326                     /* check if something relevant in insertPoints */
 327                     markFlag = bidi.runs[run].insertRemove;
 328                     if (markFlag < 0) { /* bidi controls count */
 329                         markFlag = 0;
 330                     }
 331                     if (bidiRun.isEvenRun()) {
 332                         if (bidi.isInverse() &&
 333                                 dirProps[bidiRun.start] != BidiBase.L) {
 334                             markFlag |= BidiBase.LRM_BEFORE;
 335                         }
 336                         if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
 337                             uc = LRM_CHAR;
 338                         } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
 339                             uc = RLM_CHAR;
 340                         } else {
 341                             uc = 0;
 342                         }
 343                         if (uc != 0) {
 344                             dest.append(uc);
 345                         }
 346                         dest.append(doWriteForward(text,
 347                                                    bidiRun.start, bidiRun.limit,
 348                                                    options & ~BidiBase.DO_MIRRORING));
 349 
 350                         if (bidi.isInverse() &&
 351                              dirProps[bidiRun.limit - 1] != BidiBase.L) {
 352                             markFlag |= BidiBase.LRM_AFTER;
 353                         }
 354                         if ((markFlag & BidiBase.LRM_AFTER) != 0) {
 355                             uc = LRM_CHAR;
 356                         } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
 357                             uc = RLM_CHAR;
 358                         } else {
 359                             uc = 0;
 360                         }
 361                         if (uc != 0) {
 362                             dest.append(uc);
 363                         }
 364                     } else { /* RTL run */
 365                         if (bidi.isInverse() &&
 366                             !bidi.testDirPropFlagAt(MASK_R_AL,
 367                                                     bidiRun.limit - 1)) {
 368                             markFlag |= BidiBase.RLM_BEFORE;
 369                         }
 370                         if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
 371                             uc = LRM_CHAR;
 372                         } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
 373                             uc = RLM_CHAR;
 374                         } else {
 375                             uc = 0;
 376                         }
 377                         if (uc != 0) {
 378                             dest.append(uc);
 379                         }
 380                         dest.append(doWriteReverse(text, bidiRun.start,
 381                                                    bidiRun.limit, options));
 382 
 383                         if(bidi.isInverse() &&
 384                                 (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
 385                             markFlag |= BidiBase.RLM_AFTER;
 386                         }
 387                         if ((markFlag & BidiBase.LRM_AFTER) != 0) {
 388                             uc = LRM_CHAR;
 389                         } else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
 390                             uc = RLM_CHAR;
 391                         } else {
 392                             uc = 0;
 393                         }
 394                         if (uc != 0) {
 395                             dest.append(uc);
 396                         }
 397                     }
 398                 }
 399             }
 400         } else {
 401             /* reverse output */
 402             if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
 403                 /* do not insert Bidi controls */
 404                 for(run = runCount; --run >= 0; ) {
 405                     BidiRun bidiRun = bidi.getVisualRun(run);
 406                     if (bidiRun.isEvenRun()) {
 407                         dest.append(doWriteReverse(text,
 408                                                    bidiRun.start, bidiRun.limit,
 409                                                    options & ~BidiBase.DO_MIRRORING));
 410                     } else {
 411                         dest.append(doWriteForward(text, bidiRun.start,
 412                                                    bidiRun.limit, options));
 413                     }
 414                 }
 415             } else {
 416                 /* insert Bidi controls for "inverse Bidi" */
 417 
 418                 byte[] dirProps = bidi.dirProps;
 419 
 420                 for (run = runCount; --run >= 0; ) {
 421                     /* reverse output */
 422                     BidiRun bidiRun = bidi.getVisualRun(run);
 423                     if (bidiRun.isEvenRun()) {
 424                         if (dirProps[bidiRun.limit - 1] != BidiBase.L) {
 425                             dest.append(LRM_CHAR);
 426                         }
 427 
 428                         dest.append(doWriteReverse(text, bidiRun.start,
 429                                 bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
 430 
 431                         if (dirProps[bidiRun.start] != BidiBase.L) {
 432                             dest.append(LRM_CHAR);
 433                         }
 434                     } else {
 435                         if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
 436                             dest.append(RLM_CHAR);
 437                         }
 438 
 439                         dest.append(doWriteForward(text, bidiRun.start,
 440                                                    bidiRun.limit, options));
 441 
 442                         if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
 443                             dest.append(RLM_CHAR);
 444                         }
 445                     }
 446                 }
 447             }
 448         }
 449 
 450         return dest.toString();
 451     }
 452 }