1 /* 2 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 2001-2010, International Business Machines 29 * Corporation and others. All Rights Reserved. 30 ******************************************************************************* 31 */ 32 /* Written by Simon Montagu, Matitiahu Allouche 33 * (ported from C code written by Markus W. Scherer) 34 */ 35 36 package jdk.internal.icu.text; 37 38 import jdk.internal.icu.lang.UCharacter; 39 40 final class BidiWriter { 41 42 /** Bidi control code points */ 43 static final char LRM_CHAR = 0x200e; 44 static final char RLM_CHAR = 0x200f; 45 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | 46 1 << UCharacter.RIGHT_TO_LEFT_ARABIC); 47 48 private static boolean IsCombining(int type) { 49 return ((1<<type & 50 (1<<UCharacter.NON_SPACING_MARK | 51 1<<UCharacter.COMBINING_SPACING_MARK | 52 1<<UCharacter.ENCLOSING_MARK)) != 0); 53 } 54 55 /* 56 * When we have OUTPUT_REVERSE set on writeReordered(), then we 57 * semantically write RTL runs in reverse and later reverse them again. 58 * Instead, we actually write them in forward order to begin with. 59 * However, if the RTL run was to be mirrored, we need to mirror here now 60 * since the implicit second reversal must not do it. 61 * It looks strange to do mirroring in LTR output, but it is only because 62 * we are writing RTL output in reverse. 63 */ 64 private static String doWriteForward(String src, int options) { 65 /* optimize for several combinations of options */ 66 switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) { 67 case 0: { 68 /* simply return the LTR run */ 69 return src; 70 } 71 case BidiBase.DO_MIRRORING: { 72 StringBuffer dest = new StringBuffer(src.length()); 73 74 /* do mirroring */ 75 int i=0; 76 int c; 77 78 do { 79 c = UTF16.charAt(src, i); 80 i += UTF16.getCharCount(c); 81 UTF16.append(dest, UCharacter.getMirror(c)); 82 } while(i < src.length()); 83 return dest.toString(); 84 } 85 case BidiBase.REMOVE_BIDI_CONTROLS: { 86 StringBuilder dest = new StringBuilder(src.length()); 87 88 /* copy the LTR run and remove any Bidi control characters */ 89 int i = 0; 90 char c; 91 do { 92 c = src.charAt(i++); 93 if(!BidiBase.IsBidiControlChar(c)) { 94 dest.append(c); 95 } 96 } while(i < src.length()); 97 return dest.toString(); 98 } 99 default: { 100 StringBuffer dest = new StringBuffer(src.length()); 101 102 /* remove Bidi control characters and do mirroring */ 103 int i = 0; 104 int c; 105 do { 106 c = UTF16.charAt(src, i); 107 i += UTF16.getCharCount(c); 108 if(!BidiBase.IsBidiControlChar(c)) { 109 UTF16.append(dest, UCharacter.getMirror(c)); 110 } 111 } while(i < src.length()); 112 return dest.toString(); 113 } 114 } /* end of switch */ 115 } 116 117 private static String doWriteForward(char[] text, int start, int limit, 118 int options) { 119 return doWriteForward(new String(text, start, limit - start), options); 120 } 121 122 static String writeReverse(String src, int options) { 123 /* 124 * RTL run - 125 * 126 * RTL runs need to be copied to the destination in reverse order 127 * of code points, not code units, to keep Unicode characters intact. 128 * 129 * The general strategy for this is to read the source text 130 * in backward order, collect all code units for a code point 131 * (and optionally following combining characters, see below), 132 * and copy all these code units in ascending order 133 * to the destination for this run. 134 * 135 * Several options request whether combining characters 136 * should be kept after their base characters, 137 * whether Bidi control characters should be removed, and 138 * whether characters should be replaced by their mirror-image 139 * equivalent Unicode characters. 140 */ 141 StringBuffer dest = new StringBuffer(src.length()); 142 143 /* optimize for several combinations of options */ 144 switch (options & 145 (BidiBase.REMOVE_BIDI_CONTROLS | 146 BidiBase.DO_MIRRORING | 147 BidiBase.KEEP_BASE_COMBINING)) { 148 149 case 0: 150 /* 151 * With none of the "complicated" options set, the destination 152 * run will have the same length as the source run, 153 * and there is no mirroring and no keeping combining characters 154 * with their base characters. 155 * 156 * XXX: or dest = UTF16.reverse(new StringBuffer(src)); 157 */ 158 159 int srcLength = src.length(); 160 161 /* preserve character integrity */ 162 do { 163 /* i is always after the last code unit known to need to be kept 164 * in this segment */ 165 int i = srcLength; 166 167 /* collect code units for one base character */ 168 srcLength -= UTF16.getCharCount(UTF16.charAt(src, 169 srcLength - 1)); 170 171 /* copy this base character */ 172 dest.append(src.substring(srcLength, i)); 173 } while(srcLength > 0); 174 break; 175 176 case BidiBase.KEEP_BASE_COMBINING: 177 /* 178 * Here, too, the destination 179 * run will have the same length as the source run, 180 * and there is no mirroring. 181 * We do need to keep combining characters with their base 182 * characters. 183 */ 184 srcLength = src.length(); 185 186 /* preserve character integrity */ 187 do { 188 /* i is always after the last code unit known to need to be kept 189 * in this segment */ 190 int c; 191 int i = srcLength; 192 193 /* collect code units and modifier letters for one base 194 * character */ 195 do { 196 c = UTF16.charAt(src, srcLength - 1); 197 srcLength -= UTF16.getCharCount(c); 198 } while(srcLength > 0 && IsCombining(UCharacter.getType(c))); 199 200 /* copy this "user character" */ 201 dest.append(src.substring(srcLength, i)); 202 } while(srcLength > 0); 203 break; 204 205 default: 206 /* 207 * With several "complicated" options set, this is the most 208 * general and the slowest copying of an RTL run. 209 * We will do mirroring, remove Bidi controls, and 210 * keep combining characters with their base characters 211 * as requested. 212 */ 213 srcLength = src.length(); 214 215 /* preserve character integrity */ 216 do { 217 /* i is always after the last code unit known to need to be kept 218 * in this segment */ 219 int i = srcLength; 220 221 /* collect code units for one base character */ 222 int c = UTF16.charAt(src, srcLength - 1); 223 srcLength -= UTF16.getCharCount(c); 224 if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) { 225 /* collect modifier letters for this base character */ 226 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) { 227 c = UTF16.charAt(src, srcLength - 1); 228 srcLength -= UTF16.getCharCount(c); 229 } 230 } 231 232 if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 && 233 BidiBase.IsBidiControlChar(c)) { 234 /* do not copy this Bidi control character */ 235 continue; 236 } 237 238 /* copy this "user character" */ 239 int j = srcLength; 240 if((options & BidiBase.DO_MIRRORING) != 0) { 241 /* mirror only the base character */ 242 c = UCharacter.getMirror(c); 243 UTF16.append(dest, c); 244 j += UTF16.getCharCount(c); 245 } 246 dest.append(src.substring(j, i)); 247 } while(srcLength > 0); 248 break; 249 } /* end of switch */ 250 251 return dest.toString(); 252 } 253 254 static String doWriteReverse(char[] text, int start, int limit, int options) { 255 return writeReverse(new String(text, start, limit - start), options); 256 } 257 258 static String writeReordered(BidiBase bidi, int options) { 259 int run, runCount; 260 StringBuilder dest; 261 char[] text = bidi.text; 262 runCount = bidi.countRuns(); 263 264 /* 265 * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the 266 * reordering mode (checked below) is appropriate. 267 */ 268 if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) { 269 options |= BidiBase.INSERT_LRM_FOR_NUMERIC; 270 options &= ~BidiBase.REMOVE_BIDI_CONTROLS; 271 } 272 /* 273 * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS 274 * and cancels BidiBase.INSERT_LRM_FOR_NUMERIC. 275 */ 276 if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) { 277 options |= BidiBase.REMOVE_BIDI_CONTROLS; 278 options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC; 279 } 280 /* 281 * If we do not perform the "inverse Bidi" algorithm, then we 282 * don't need to insert any LRMs, and don't need to test for it. 283 */ 284 if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) && 285 (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT) && 286 (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && 287 (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) { 288 options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC; 289 } 290 dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ? 291 bidi.length * 2 : bidi.length); 292 /* 293 * Iterate through all visual runs and copy the run text segments to 294 * the destination, according to the options. 295 * 296 * The tests for where to insert LRMs ignore the fact that there may be 297 * BN codes or non-BMP code points at the beginning and end of a run; 298 * they may insert LRMs unnecessarily but the tests are faster this way 299 * (this would have to be improved for UTF-8). 300 */ 301 if ((options & BidiBase.OUTPUT_REVERSE) == 0) { 302 /* forward output */ 303 if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) { 304 /* do not insert Bidi controls */ 305 for (run = 0; run < runCount; ++run) { 306 BidiRun bidiRun = bidi.getVisualRun(run); 307 if (bidiRun.isEvenRun()) { 308 dest.append(doWriteForward(text, bidiRun.start, 309 bidiRun.limit, 310 options & ~BidiBase.DO_MIRRORING)); 311 } else { 312 dest.append(doWriteReverse(text, bidiRun.start, 313 bidiRun.limit, options)); 314 } 315 } 316 } else { 317 /* insert Bidi controls for "inverse Bidi" */ 318 byte[] dirProps = bidi.dirProps; 319 char uc; 320 int markFlag; 321 322 for (run = 0; run < runCount; ++run) { 323 BidiRun bidiRun = bidi.getVisualRun(run); 324 markFlag=0; 325 /* check if something relevant in insertPoints */ 326 markFlag = bidi.runs[run].insertRemove; 327 if (markFlag < 0) { /* bidi controls count */ 328 markFlag = 0; 329 } 330 if (bidiRun.isEvenRun()) { 331 if (bidi.isInverse() && 332 dirProps[bidiRun.start] != BidiBase.L) { 333 markFlag |= BidiBase.LRM_BEFORE; 334 } 335 if ((markFlag & BidiBase.LRM_BEFORE) != 0) { 336 uc = LRM_CHAR; 337 } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) { 338 uc = RLM_CHAR; 339 } else { 340 uc = 0; 341 } 342 if (uc != 0) { 343 dest.append(uc); 344 } 345 dest.append(doWriteForward(text, 346 bidiRun.start, bidiRun.limit, 347 options & ~BidiBase.DO_MIRRORING)); 348 349 if (bidi.isInverse() && 350 dirProps[bidiRun.limit - 1] != BidiBase.L) { 351 markFlag |= BidiBase.LRM_AFTER; 352 } 353 if ((markFlag & BidiBase.LRM_AFTER) != 0) { 354 uc = LRM_CHAR; 355 } else if ((markFlag & BidiBase.RLM_AFTER) != 0) { 356 uc = RLM_CHAR; 357 } else { 358 uc = 0; 359 } 360 if (uc != 0) { 361 dest.append(uc); 362 } 363 } else { /* RTL run */ 364 if (bidi.isInverse() && 365 !bidi.testDirPropFlagAt(MASK_R_AL, 366 bidiRun.limit - 1)) { 367 markFlag |= BidiBase.RLM_BEFORE; 368 } 369 if ((markFlag & BidiBase.LRM_BEFORE) != 0) { 370 uc = LRM_CHAR; 371 } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) { 372 uc = RLM_CHAR; 373 } else { 374 uc = 0; 375 } 376 if (uc != 0) { 377 dest.append(uc); 378 } 379 dest.append(doWriteReverse(text, bidiRun.start, 380 bidiRun.limit, options)); 381 382 if(bidi.isInverse() && 383 (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) { 384 markFlag |= BidiBase.RLM_AFTER; 385 } 386 if ((markFlag & BidiBase.LRM_AFTER) != 0) { 387 uc = LRM_CHAR; 388 } else if ((markFlag & BidiBase.RLM_AFTER) != 0) { 389 uc = RLM_CHAR; 390 } else { 391 uc = 0; 392 } 393 if (uc != 0) { 394 dest.append(uc); 395 } 396 } 397 } 398 } 399 } else { 400 /* reverse output */ 401 if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) { 402 /* do not insert Bidi controls */ 403 for(run = runCount; --run >= 0; ) { 404 BidiRun bidiRun = bidi.getVisualRun(run); 405 if (bidiRun.isEvenRun()) { 406 dest.append(doWriteReverse(text, 407 bidiRun.start, bidiRun.limit, 408 options & ~BidiBase.DO_MIRRORING)); 409 } else { 410 dest.append(doWriteForward(text, bidiRun.start, 411 bidiRun.limit, options)); 412 } 413 } 414 } else { 415 /* insert Bidi controls for "inverse Bidi" */ 416 417 byte[] dirProps = bidi.dirProps; 418 419 for (run = runCount; --run >= 0; ) { 420 /* reverse output */ 421 BidiRun bidiRun = bidi.getVisualRun(run); 422 if (bidiRun.isEvenRun()) { 423 if (dirProps[bidiRun.limit - 1] != BidiBase.L) { 424 dest.append(LRM_CHAR); 425 } 426 427 dest.append(doWriteReverse(text, bidiRun.start, 428 bidiRun.limit, options & ~BidiBase.DO_MIRRORING)); 429 430 if (dirProps[bidiRun.start] != BidiBase.L) { 431 dest.append(LRM_CHAR); 432 } 433 } else { 434 if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) { 435 dest.append(RLM_CHAR); 436 } 437 438 dest.append(doWriteForward(text, bidiRun.start, 439 bidiRun.limit, options)); 440 441 if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) { 442 dest.append(RLM_CHAR); 443 } 444 } 445 } 446 } 447 } 448 449 return dest.toString(); 450 } 451 }