1 /* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 2001-2010, International Business Machines 29 * Corporation and others. All Rights Reserved. 30 ******************************************************************************* 31 */ 32 /* Written by Simon Montagu, Matitiahu Allouche 33 * (ported from C code written by Markus W. Scherer) 34 */ 35 36 package sun.text.bidi; 37 38 import sun.text.normalizer.UCharacter; 39 import sun.text.normalizer.UTF16; 40 41 final class BidiWriter { 42 43 /** Bidi control code points */ 44 static final char LRM_CHAR = 0x200e; 45 static final char RLM_CHAR = 0x200f; 46 static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | 47 1 << UCharacter.RIGHT_TO_LEFT_ARABIC); 48 49 private static boolean IsCombining(int type) { 50 return ((1<<type & 51 (1<<UCharacter.NON_SPACING_MARK | 52 1<<UCharacter.COMBINING_SPACING_MARK | 53 1<<UCharacter.ENCLOSING_MARK)) != 0); 54 } 55 56 /* 57 * When we have OUTPUT_REVERSE set on writeReordered(), then we 58 * semantically write RTL runs in reverse and later reverse them again. 59 * Instead, we actually write them in forward order to begin with. 60 * However, if the RTL run was to be mirrored, we need to mirror here now 61 * since the implicit second reversal must not do it. 62 * It looks strange to do mirroring in LTR output, but it is only because 63 * we are writing RTL output in reverse. 64 */ 65 private static String doWriteForward(String src, int options) { 66 /* optimize for several combinations of options */ 67 switch(options&(BidiBase.REMOVE_BIDI_CONTROLS|BidiBase.DO_MIRRORING)) { 68 case 0: { 69 /* simply return the LTR run */ 70 return src; 71 } 72 case BidiBase.DO_MIRRORING: { 73 StringBuffer dest = new StringBuffer(src.length()); 74 75 /* do mirroring */ 76 int i=0; 77 int c; 78 79 do { 80 c = UTF16.charAt(src, i); 81 i += UTF16.getCharCount(c); 82 UTF16.append(dest, UCharacter.getMirror(c)); 83 } while(i < src.length()); 84 return dest.toString(); 85 } 86 case BidiBase.REMOVE_BIDI_CONTROLS: { 87 StringBuilder dest = new StringBuilder(src.length()); 88 89 /* copy the LTR run and remove any Bidi control characters */ 90 int i = 0; 91 char c; 92 do { 93 c = src.charAt(i++); 94 if(!BidiBase.IsBidiControlChar(c)) { 95 dest.append(c); 96 } 97 } while(i < src.length()); 98 return dest.toString(); 99 } 100 default: { 101 StringBuffer dest = new StringBuffer(src.length()); 102 103 /* remove Bidi control characters and do mirroring */ 104 int i = 0; 105 int c; 106 do { 107 c = UTF16.charAt(src, i); 108 i += UTF16.getCharCount(c); 109 if(!BidiBase.IsBidiControlChar(c)) { 110 UTF16.append(dest, UCharacter.getMirror(c)); 111 } 112 } while(i < src.length()); 113 return dest.toString(); 114 } 115 } /* end of switch */ 116 } 117 118 private static String doWriteForward(char[] text, int start, int limit, 119 int options) { 120 return doWriteForward(new String(text, start, limit - start), options); 121 } 122 123 static String writeReverse(String src, int options) { 124 /* 125 * RTL run - 126 * 127 * RTL runs need to be copied to the destination in reverse order 128 * of code points, not code units, to keep Unicode characters intact. 129 * 130 * The general strategy for this is to read the source text 131 * in backward order, collect all code units for a code point 132 * (and optionally following combining characters, see below), 133 * and copy all these code units in ascending order 134 * to the destination for this run. 135 * 136 * Several options request whether combining characters 137 * should be kept after their base characters, 138 * whether Bidi control characters should be removed, and 139 * whether characters should be replaced by their mirror-image 140 * equivalent Unicode characters. 141 */ 142 StringBuffer dest = new StringBuffer(src.length()); 143 144 /* optimize for several combinations of options */ 145 switch (options & 146 (BidiBase.REMOVE_BIDI_CONTROLS | 147 BidiBase.DO_MIRRORING | 148 BidiBase.KEEP_BASE_COMBINING)) { 149 150 case 0: 151 /* 152 * With none of the "complicated" options set, the destination 153 * run will have the same length as the source run, 154 * and there is no mirroring and no keeping combining characters 155 * with their base characters. 156 * 157 * XXX: or dest = UTF16.reverse(new StringBuffer(src)); 158 */ 159 160 int srcLength = src.length(); 161 162 /* preserve character integrity */ 163 do { 164 /* i is always after the last code unit known to need to be kept 165 * in this segment */ 166 int i = srcLength; 167 168 /* collect code units for one base character */ 169 srcLength -= UTF16.getCharCount(UTF16.charAt(src, 170 srcLength - 1)); 171 172 /* copy this base character */ 173 dest.append(src.substring(srcLength, i)); 174 } while(srcLength > 0); 175 break; 176 177 case BidiBase.KEEP_BASE_COMBINING: 178 /* 179 * Here, too, the destination 180 * run will have the same length as the source run, 181 * and there is no mirroring. 182 * We do need to keep combining characters with their base 183 * characters. 184 */ 185 srcLength = src.length(); 186 187 /* preserve character integrity */ 188 do { 189 /* i is always after the last code unit known to need to be kept 190 * in this segment */ 191 int c; 192 int i = srcLength; 193 194 /* collect code units and modifier letters for one base 195 * character */ 196 do { 197 c = UTF16.charAt(src, srcLength - 1); 198 srcLength -= UTF16.getCharCount(c); 199 } while(srcLength > 0 && IsCombining(UCharacter.getType(c))); 200 201 /* copy this "user character" */ 202 dest.append(src.substring(srcLength, i)); 203 } while(srcLength > 0); 204 break; 205 206 default: 207 /* 208 * With several "complicated" options set, this is the most 209 * general and the slowest copying of an RTL run. 210 * We will do mirroring, remove Bidi controls, and 211 * keep combining characters with their base characters 212 * as requested. 213 */ 214 srcLength = src.length(); 215 216 /* preserve character integrity */ 217 do { 218 /* i is always after the last code unit known to need to be kept 219 * in this segment */ 220 int i = srcLength; 221 222 /* collect code units for one base character */ 223 int c = UTF16.charAt(src, srcLength - 1); 224 srcLength -= UTF16.getCharCount(c); 225 if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) { 226 /* collect modifier letters for this base character */ 227 while(srcLength > 0 && IsCombining(UCharacter.getType(c))) { 228 c = UTF16.charAt(src, srcLength - 1); 229 srcLength -= UTF16.getCharCount(c); 230 } 231 } 232 233 if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 && 234 BidiBase.IsBidiControlChar(c)) { 235 /* do not copy this Bidi control character */ 236 continue; 237 } 238 239 /* copy this "user character" */ 240 int j = srcLength; 241 if((options & BidiBase.DO_MIRRORING) != 0) { 242 /* mirror only the base character */ 243 c = UCharacter.getMirror(c); 244 UTF16.append(dest, c); 245 j += UTF16.getCharCount(c); 246 } 247 dest.append(src.substring(j, i)); 248 } while(srcLength > 0); 249 break; 250 } /* end of switch */ 251 252 return dest.toString(); 253 } 254 255 static String doWriteReverse(char[] text, int start, int limit, int options) { 256 return writeReverse(new String(text, start, limit - start), options); 257 } 258 259 static String writeReordered(BidiBase bidi, int options) { 260 int run, runCount; 261 StringBuilder dest; 262 char[] text = bidi.text; 263 runCount = bidi.countRuns(); 264 265 /* 266 * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the 267 * reordering mode (checked below) is appropriate. 268 */ 269 if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) { 270 options |= BidiBase.INSERT_LRM_FOR_NUMERIC; 271 options &= ~BidiBase.REMOVE_BIDI_CONTROLS; 272 } 273 /* 274 * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS 275 * and cancels BidiBase.INSERT_LRM_FOR_NUMERIC. 276 */ 277 if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) { 278 options |= BidiBase.REMOVE_BIDI_CONTROLS; 279 options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC; 280 } 281 /* 282 * If we do not perform the "inverse Bidi" algorithm, then we 283 * don't need to insert any LRMs, and don't need to test for it. 284 */ 285 if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L) && 286 (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT) && 287 (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && 288 (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) { 289 options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC; 290 } 291 dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ? 292 bidi.length * 2 : bidi.length); 293 /* 294 * Iterate through all visual runs and copy the run text segments to 295 * the destination, according to the options. 296 * 297 * The tests for where to insert LRMs ignore the fact that there may be 298 * BN codes or non-BMP code points at the beginning and end of a run; 299 * they may insert LRMs unnecessarily but the tests are faster this way 300 * (this would have to be improved for UTF-8). 301 */ 302 if ((options & BidiBase.OUTPUT_REVERSE) == 0) { 303 /* forward output */ 304 if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) { 305 /* do not insert Bidi controls */ 306 for (run = 0; run < runCount; ++run) { 307 BidiRun bidiRun = bidi.getVisualRun(run); 308 if (bidiRun.isEvenRun()) { 309 dest.append(doWriteForward(text, bidiRun.start, 310 bidiRun.limit, 311 options & ~BidiBase.DO_MIRRORING)); 312 } else { 313 dest.append(doWriteReverse(text, bidiRun.start, 314 bidiRun.limit, options)); 315 } 316 } 317 } else { 318 /* insert Bidi controls for "inverse Bidi" */ 319 byte[] dirProps = bidi.dirProps; 320 char uc; 321 int markFlag; 322 323 for (run = 0; run < runCount; ++run) { 324 BidiRun bidiRun = bidi.getVisualRun(run); 325 markFlag=0; 326 /* check if something relevant in insertPoints */ 327 markFlag = bidi.runs[run].insertRemove; 328 if (markFlag < 0) { /* bidi controls count */ 329 markFlag = 0; 330 } 331 if (bidiRun.isEvenRun()) { 332 if (bidi.isInverse() && 333 dirProps[bidiRun.start] != BidiBase.L) { 334 markFlag |= BidiBase.LRM_BEFORE; 335 } 336 if ((markFlag & BidiBase.LRM_BEFORE) != 0) { 337 uc = LRM_CHAR; 338 } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) { 339 uc = RLM_CHAR; 340 } else { 341 uc = 0; 342 } 343 if (uc != 0) { 344 dest.append(uc); 345 } 346 dest.append(doWriteForward(text, 347 bidiRun.start, bidiRun.limit, 348 options & ~BidiBase.DO_MIRRORING)); 349 350 if (bidi.isInverse() && 351 dirProps[bidiRun.limit - 1] != BidiBase.L) { 352 markFlag |= BidiBase.LRM_AFTER; 353 } 354 if ((markFlag & BidiBase.LRM_AFTER) != 0) { 355 uc = LRM_CHAR; 356 } else if ((markFlag & BidiBase.RLM_AFTER) != 0) { 357 uc = RLM_CHAR; 358 } else { 359 uc = 0; 360 } 361 if (uc != 0) { 362 dest.append(uc); 363 } 364 } else { /* RTL run */ 365 if (bidi.isInverse() && 366 !bidi.testDirPropFlagAt(MASK_R_AL, 367 bidiRun.limit - 1)) { 368 markFlag |= BidiBase.RLM_BEFORE; 369 } 370 if ((markFlag & BidiBase.LRM_BEFORE) != 0) { 371 uc = LRM_CHAR; 372 } else if ((markFlag & BidiBase.RLM_BEFORE) != 0) { 373 uc = RLM_CHAR; 374 } else { 375 uc = 0; 376 } 377 if (uc != 0) { 378 dest.append(uc); 379 } 380 dest.append(doWriteReverse(text, bidiRun.start, 381 bidiRun.limit, options)); 382 383 if(bidi.isInverse() && 384 (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) { 385 markFlag |= BidiBase.RLM_AFTER; 386 } 387 if ((markFlag & BidiBase.LRM_AFTER) != 0) { 388 uc = LRM_CHAR; 389 } else if ((markFlag & BidiBase.RLM_AFTER) != 0) { 390 uc = RLM_CHAR; 391 } else { 392 uc = 0; 393 } 394 if (uc != 0) { 395 dest.append(uc); 396 } 397 } 398 } 399 } 400 } else { 401 /* reverse output */ 402 if((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) { 403 /* do not insert Bidi controls */ 404 for(run = runCount; --run >= 0; ) { 405 BidiRun bidiRun = bidi.getVisualRun(run); 406 if (bidiRun.isEvenRun()) { 407 dest.append(doWriteReverse(text, 408 bidiRun.start, bidiRun.limit, 409 options & ~BidiBase.DO_MIRRORING)); 410 } else { 411 dest.append(doWriteForward(text, bidiRun.start, 412 bidiRun.limit, options)); 413 } 414 } 415 } else { 416 /* insert Bidi controls for "inverse Bidi" */ 417 418 byte[] dirProps = bidi.dirProps; 419 420 for (run = runCount; --run >= 0; ) { 421 /* reverse output */ 422 BidiRun bidiRun = bidi.getVisualRun(run); 423 if (bidiRun.isEvenRun()) { 424 if (dirProps[bidiRun.limit - 1] != BidiBase.L) { 425 dest.append(LRM_CHAR); 426 } 427 428 dest.append(doWriteReverse(text, bidiRun.start, 429 bidiRun.limit, options & ~BidiBase.DO_MIRRORING)); 430 431 if (dirProps[bidiRun.start] != BidiBase.L) { 432 dest.append(LRM_CHAR); 433 } 434 } else { 435 if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) { 436 dest.append(RLM_CHAR); 437 } 438 439 dest.append(doWriteForward(text, bidiRun.start, 440 bidiRun.limit, options)); 441 442 if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) { 443 dest.append(RLM_CHAR); 444 } 445 } 446 } 447 } 448 } 449 450 return dest.toString(); 451 } 452 }