1 /* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 package jdk.nashorn.internal.joni.ast; 21 22 import jdk.nashorn.internal.joni.*; 23 import jdk.nashorn.internal.joni.constants.CCSTATE; 24 import jdk.nashorn.internal.joni.constants.CCVALTYPE; 25 import jdk.nashorn.internal.joni.encoding.AsciiTables; 26 import jdk.nashorn.internal.joni.encoding.CharacterType; 27 import jdk.nashorn.internal.joni.encoding.IntHolder; 28 import jdk.nashorn.internal.joni.exception.ErrorMessages; 29 import jdk.nashorn.internal.joni.exception.InternalException; 30 import jdk.nashorn.internal.joni.exception.SyntaxException; 31 import jdk.nashorn.internal.joni.exception.ValueException; 32 33 public final class CClassNode extends Node { 34 private static final int FLAG_NCCLASS_NOT = 1<<0; 35 private static final int FLAG_NCCLASS_SHARE = 1<<1; 36 37 int flags; 38 public final BitSet bs = new BitSet(); // conditional creation ? 39 public CodeRangeBuffer mbuf; /* multi-byte info or NULL */ 40 41 private int ctype; // for hashing purposes 42 43 // node_new_cclass 44 public CClassNode() {} 45 46 public CClassNode(int ctype, boolean not, int sbOut, int[]ranges) { 47 this(not, sbOut, ranges); 48 this.ctype = ctype; 49 } 50 51 public void clear() { 52 bs.clear(); 53 flags = 0; 54 mbuf = null; 55 } 56 57 // node_new_cclass_by_codepoint_range, only used by shared Char Classes 58 public CClassNode(boolean not, int sbOut, int[]ranges) { 59 if (not) setNot(); 60 // bs.clear(); 61 62 if (sbOut > 0 && ranges != null) { 63 int n = ranges[0]; 64 for (int i=0; i<n; i++) { 65 int from = ranges[i * 2 + 1]; 66 int to = ranges[i * 2 + 2]; 67 for (int j=from; j<=to; j++) { 68 if (j >= sbOut) { 69 setupBuffer(ranges); 70 return; 71 } 72 bs.set(j); 73 } 74 } 75 } 76 setupBuffer(ranges); 77 } 78 79 @Override 80 public int getType() { 81 return CCLASS; 82 } 83 84 @Override 85 public String getName() { 86 return "Character Class"; 87 } 88 89 @Override 90 public boolean equals(Object other) { 91 if (!(other instanceof CClassNode)) return false; 92 CClassNode cc = (CClassNode)other; 93 return ctype == cc.ctype && isNot() == cc.isNot(); 94 } 95 96 @Override 97 public int hashCode() { 98 if (Config.USE_SHARED_CCLASS_TABLE) { 99 int hash = 0; 100 hash += ctype; 101 if (isNot()) hash++; 102 return hash + (hash >> 5); 103 } else { 104 return super.hashCode(); 105 } 106 } 107 108 @Override 109 public String toString(int level) { 110 StringBuilder value = new StringBuilder(); 111 value.append("\n flags: " + flagsToString()); 112 value.append("\n bs: " + pad(bs, level + 1)); 113 value.append("\n mbuf: " + pad(mbuf, level + 1)); 114 115 return value.toString(); 116 } 117 118 public String flagsToString() { 119 StringBuilder flags = new StringBuilder(); 120 if (isNot()) flags.append("NOT "); 121 if (isShare()) flags.append("SHARE "); 122 return flags.toString(); 123 } 124 125 private void setupBuffer(int[]ranges) { 126 if (ranges != null) { 127 if (ranges[0] == 0) return; 128 mbuf = new CodeRangeBuffer(ranges); 129 } 130 } 131 132 public boolean isEmpty() { 133 return mbuf == null && bs.isEmpty(); 134 } 135 136 public void addCodeRangeToBuf(int from, int to) { 137 mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to); 138 } 139 140 public void addCodeRange(ScanEnvironment env, int from, int to) { 141 mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to); 142 } 143 144 public void addAllMultiByteRange() { 145 mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf); 146 } 147 148 public void clearNotFlag() { 149 if (isNot()) { 150 bs.invert(); 151 152 mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf); 153 clearNot(); 154 } 155 } 156 157 // and_cclass 158 public void and(CClassNode other) { 159 boolean not1 = isNot(); 160 BitSet bsr1 = bs; 161 CodeRangeBuffer buf1 = mbuf; 162 boolean not2 = other.isNot(); 163 BitSet bsr2 = other.bs; 164 CodeRangeBuffer buf2 = other.mbuf; 165 166 if (not1) { 167 BitSet bs1 = new BitSet(); 168 bsr1.invertTo(bs1); 169 bsr1 = bs1; 170 } 171 172 if (not2) { 173 BitSet bs2 = new BitSet(); 174 bsr2.invertTo(bs2); 175 bsr2 = bs2; 176 } 177 178 bsr1.and(bsr2); 179 180 if (bsr1 != bs) { 181 bs.copy(bsr1); 182 bsr1 = bs; 183 } 184 185 if (not1) { 186 bs.invert(); 187 } 188 189 CodeRangeBuffer pbuf = null; 190 191 if (not1 && not2) { 192 pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false); 193 } else { 194 pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2); 195 196 if (not1) { 197 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); 198 } 199 } 200 mbuf = pbuf; 201 202 } 203 204 // or_cclass 205 public void or(CClassNode other) { 206 boolean not1 = isNot(); 207 BitSet bsr1 = bs; 208 CodeRangeBuffer buf1 = mbuf; 209 boolean not2 = other.isNot(); 210 BitSet bsr2 = other.bs; 211 CodeRangeBuffer buf2 = other.mbuf; 212 213 if (not1) { 214 BitSet bs1 = new BitSet(); 215 bsr1.invertTo(bs1); 216 bsr1 = bs1; 217 } 218 219 if (not2) { 220 BitSet bs2 = new BitSet(); 221 bsr2.invertTo(bs2); 222 bsr2 = bs2; 223 } 224 225 bsr1.or(bsr2); 226 227 if (bsr1 != bs) { 228 bs.copy(bsr1); 229 bsr1 = bs; 230 } 231 232 if (not1) { 233 bs.invert(); 234 } 235 236 CodeRangeBuffer pbuf = null; 237 if (not1 && not2) { 238 pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false); 239 } else { 240 pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2); 241 if (not1) { 242 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); 243 } 244 } 245 mbuf = pbuf; 246 } 247 248 // add_ctype_to_cc_by_range // Encoding out! 249 public void addCTypeByRange(int ctype, boolean not, int sbOut, int mbr[]) { 250 int n = mbr[0]; 251 252 if (!not) { 253 for (int i=0; i<n; i++) { 254 for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) { 255 if (j >= sbOut) { 256 if (Config.VANILLA) { 257 if (j == mbr[i * 2 + 2]) { 258 i++; 259 } else if (j > mbr[i * 2 + 1]) { 260 addCodeRangeToBuf(j, mbr[i * 2 + 2]); 261 i++; 262 } 263 } else { 264 if (j >= mbr[i * 2 + 1]) { 265 addCodeRangeToBuf(j, mbr[i * 2 + 2]); 266 i++; 267 } 268 } 269 // !goto sb_end!, remove duplication! 270 for (; i<n; i++) { 271 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); 272 } 273 return; 274 } 275 bs.set(j); 276 } 277 } 278 // !sb_end:! 279 for (int i=0; i<n; i++) { 280 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); 281 } 282 283 } else { 284 int prev = 0; 285 286 for (int i=0; i<n; i++) { 287 for (int j=prev; j < mbr[2 * i + 1]; j++) { 288 if (j >= sbOut) { 289 // !goto sb_end2!, remove duplication 290 prev = sbOut; 291 for (i=0; i<n; i++) { 292 if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); 293 prev = mbr[i * 2 + 2] + 1; 294 } 295 if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); 296 return; 297 } 298 bs.set(j); 299 } 300 prev = mbr[2 * i + 2] + 1; 301 } 302 303 for (int j=prev; j<sbOut; j++) { 304 bs.set(j); 305 } 306 307 // !sb_end2:! 308 prev = sbOut; 309 for (int i=0; i<n; i++) { 310 if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); 311 prev = mbr[i * 2 + 2] + 1; 312 } 313 if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); 314 } 315 } 316 317 public void addCType(int ctype, boolean not, ScanEnvironment env, IntHolder sbOut) { 318 if (Config.NON_UNICODE_SDW) { 319 switch(ctype) { 320 case CharacterType.D: 321 case CharacterType.S: 322 case CharacterType.W: 323 ctype ^= CharacterType.SPECIAL_MASK; 324 325 if (env.syntax == Syntax.JAVASCRIPT && ctype == CharacterType.SPACE) { 326 // \s in JavaScript includes unicode characters. 327 break; 328 } 329 330 if (not) { 331 for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { 332 // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); 333 if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) == 0) bs.set(c); 334 } 335 addAllMultiByteRange(); 336 } else { 337 for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { 338 // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); 339 if ((AsciiTables.AsciiCtypeTable[c] & (1 << ctype)) != 0) bs.set(c); 340 } 341 } 342 return; 343 } 344 } 345 346 int[] ranges = EncodingHelper.ctypeCodeRange(ctype, sbOut); 347 if (ranges != null) { 348 addCTypeByRange(ctype, not, sbOut.value, ranges); 349 return; 350 } 351 352 switch(ctype) { 353 case CharacterType.ALPHA: 354 case CharacterType.BLANK: 355 case CharacterType.CNTRL: 356 case CharacterType.DIGIT: 357 case CharacterType.LOWER: 358 case CharacterType.PUNCT: 359 case CharacterType.SPACE: 360 case CharacterType.UPPER: 361 case CharacterType.XDIGIT: 362 case CharacterType.ASCII: 363 case CharacterType.ALNUM: 364 if (not) { 365 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 366 if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c); 367 } 368 addAllMultiByteRange(); 369 } else { 370 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 371 if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c); 372 } 373 } 374 break; 375 376 case CharacterType.GRAPH: 377 case CharacterType.PRINT: 378 if (not) { 379 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 380 if (!EncodingHelper.isCodeCType(c, ctype)) bs.set(c); 381 } 382 } else { 383 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 384 if (EncodingHelper.isCodeCType(c, ctype)) bs.set(c); 385 } 386 addAllMultiByteRange(); 387 } 388 break; 389 390 case CharacterType.WORD: 391 if (!not) { 392 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 393 if (EncodingHelper.isWord(c)) bs.set(c); 394 } 395 396 addAllMultiByteRange(); 397 } else { 398 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 399 if (!EncodingHelper.isWord(c)) bs.set(c); 400 } 401 } 402 break; 403 404 default: 405 throw new InternalException(ErrorMessages.ERR_PARSER_BUG); 406 } // switch 407 } 408 409 public static final class CCStateArg { 410 public int v; 411 public int vs; 412 public boolean vsIsRaw; 413 public boolean vIsRaw; 414 public CCVALTYPE inType; 415 public CCVALTYPE type; 416 public CCSTATE state; 417 } 418 419 public void nextStateClass(CCStateArg arg, ScanEnvironment env) { 420 if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE); 421 422 if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { 423 if (arg.type == CCVALTYPE.SB) { 424 bs.set(arg.vs); 425 } else if (arg.type == CCVALTYPE.CODE_POINT) { 426 addCodeRange(env, arg.vs, arg.vs); 427 } 428 } 429 arg.state = CCSTATE.VALUE; 430 arg.type = CCVALTYPE.CLASS; 431 } 432 433 public void nextStateValue(CCStateArg arg, ScanEnvironment env) { 434 435 switch(arg.state) { 436 case VALUE: 437 if (arg.type == CCVALTYPE.SB) { 438 if (arg.vs > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); 439 bs.set(arg.vs); 440 } else if (arg.type == CCVALTYPE.CODE_POINT) { 441 addCodeRange(env, arg.vs, arg.vs); 442 } 443 break; 444 445 case RANGE: 446 if (arg.inType == arg.type) { 447 if (arg.inType == CCVALTYPE.SB) { 448 if (arg.vs > 0xff || arg.v > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); 449 450 if (arg.vs > arg.v) { 451 if (env.syntax.allowEmptyRangeInCC()) { 452 // goto ccs_range_end 453 arg.state = CCSTATE.COMPLETE; 454 break; 455 } else { 456 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 457 } 458 } 459 bs.setRange(arg.vs, arg.v); 460 } else { 461 addCodeRange(env, arg.vs, arg.v); 462 } 463 } else { 464 if (arg.vs > arg.v) { 465 if (env.syntax.allowEmptyRangeInCC()) { 466 // goto ccs_range_end 467 arg.state = CCSTATE.COMPLETE; 468 break; 469 } else { 470 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 471 } 472 } 473 bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff); 474 addCodeRange(env, arg.vs, arg.v); 475 } 476 // ccs_range_end: 477 arg.state = CCSTATE.COMPLETE; 478 break; 479 480 case COMPLETE: 481 case START: 482 arg.state = CCSTATE.VALUE; 483 break; 484 485 default: 486 break; 487 488 } // switch 489 490 arg.vsIsRaw = arg.vIsRaw; 491 arg.vs = arg.v; 492 arg.type = arg.inType; 493 } 494 495 // onig_is_code_in_cc_len 496 public boolean isCodeInCCLength(int code) { 497 boolean found; 498 499 if (code > 0xff) { 500 if (mbuf == null) { 501 found = false; 502 } else { 503 found = EncodingHelper.isInCodeRange(mbuf.getCodeRange(), code); 504 } 505 } else { 506 found = bs.at(code); 507 } 508 509 if (isNot()) { 510 return !found; 511 } else { 512 return found; 513 } 514 } 515 516 // onig_is_code_in_cc 517 public boolean isCodeInCC(int code) { 518 return isCodeInCCLength(code); 519 } 520 521 public void setNot() { 522 flags |= FLAG_NCCLASS_NOT; 523 } 524 525 public void clearNot() { 526 flags &= ~FLAG_NCCLASS_NOT; 527 } 528 529 public boolean isNot() { 530 return (flags & FLAG_NCCLASS_NOT) != 0; 531 } 532 533 public void setShare() { 534 flags |= FLAG_NCCLASS_SHARE; 535 } 536 537 public void clearShare() { 538 flags &= ~FLAG_NCCLASS_SHARE; 539 } 540 541 public boolean isShare() { 542 return (flags & FLAG_NCCLASS_SHARE) != 0; 543 } 544 545 }